diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,254990 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 36420, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0, + "eval_loss": 0.6326802372932434, + "eval_runtime": 177.5382, + "eval_samples_per_second": 135.177, + "eval_steps_per_second": 16.898, + "step": 0 + }, + { + "epoch": 2.7457440966501922e-05, + "grad_norm": 2.2561402320861816, + "learning_rate": 0.0, + "loss": 0.8802, + "step": 1 + }, + { + "epoch": 5.4914881933003843e-05, + "grad_norm": 2.342005968093872, + "learning_rate": 2.0000000000000002e-07, + "loss": 0.9029, + "step": 2 + }, + { + "epoch": 8.237232289950576e-05, + "grad_norm": 2.3655002117156982, + "learning_rate": 4.0000000000000003e-07, + "loss": 0.8864, + "step": 3 + }, + { + "epoch": 0.00010982976386600769, + "grad_norm": 4.076173305511475, + "learning_rate": 6.000000000000001e-07, + "loss": 0.9276, + "step": 4 + }, + { + "epoch": 0.0001372872048325096, + "grad_norm": 2.35604190826416, + "learning_rate": 8.000000000000001e-07, + "loss": 0.898, + "step": 5 + }, + { + "epoch": 0.00016474464579901152, + "grad_norm": 2.186289072036743, + "learning_rate": 1.0000000000000002e-06, + "loss": 0.8972, + "step": 6 + }, + { + "epoch": 0.00019220208676551346, + "grad_norm": 1.9669835567474365, + "learning_rate": 1.2000000000000002e-06, + "loss": 0.8868, + "step": 7 + }, + { + "epoch": 0.00021965952773201537, + "grad_norm": 1.6937439441680908, + "learning_rate": 1.4000000000000001e-06, + "loss": 0.914, + "step": 8 + }, + { + "epoch": 0.0002471169686985173, + "grad_norm": 2.1082160472869873, + "learning_rate": 1.6000000000000001e-06, + "loss": 0.8594, + "step": 9 + }, + { + "epoch": 0.0002745744096650192, + "grad_norm": 1.773193359375, + "learning_rate": 1.8000000000000001e-06, + "loss": 0.8371, + "step": 10 + }, + { + "epoch": 0.00030203185063152114, + "grad_norm": 1.8144782781600952, + "learning_rate": 2.0000000000000003e-06, + "loss": 0.8764, + "step": 11 + }, + { + "epoch": 0.00032948929159802305, + "grad_norm": 1.2651466131210327, + "learning_rate": 2.2e-06, + "loss": 0.8279, + "step": 12 + }, + { + "epoch": 0.000356946732564525, + "grad_norm": 1.157173752784729, + "learning_rate": 2.4000000000000003e-06, + "loss": 0.7459, + "step": 13 + }, + { + "epoch": 0.0003844041735310269, + "grad_norm": 1.2744603157043457, + "learning_rate": 2.6e-06, + "loss": 0.8008, + "step": 14 + }, + { + "epoch": 0.00041186161449752884, + "grad_norm": 1.1443653106689453, + "learning_rate": 2.8000000000000003e-06, + "loss": 0.7943, + "step": 15 + }, + { + "epoch": 0.00043931905546403075, + "grad_norm": 1.0647474527359009, + "learning_rate": 3e-06, + "loss": 0.8079, + "step": 16 + }, + { + "epoch": 0.00046677649643053266, + "grad_norm": 0.7448004484176636, + "learning_rate": 3.2000000000000003e-06, + "loss": 0.7663, + "step": 17 + }, + { + "epoch": 0.0004942339373970346, + "grad_norm": 0.8476930856704712, + "learning_rate": 3.4000000000000005e-06, + "loss": 0.7532, + "step": 18 + }, + { + "epoch": 0.0005216913783635365, + "grad_norm": 0.6624916195869446, + "learning_rate": 3.6000000000000003e-06, + "loss": 0.7995, + "step": 19 + }, + { + "epoch": 0.0005491488193300384, + "grad_norm": 0.7960836887359619, + "learning_rate": 3.8000000000000005e-06, + "loss": 0.7677, + "step": 20 + }, + { + "epoch": 0.0005766062602965404, + "grad_norm": 0.7930276989936829, + "learning_rate": 4.000000000000001e-06, + "loss": 0.7259, + "step": 21 + }, + { + "epoch": 0.0006040637012630423, + "grad_norm": 0.90882807970047, + "learning_rate": 4.2000000000000004e-06, + "loss": 0.7541, + "step": 22 + }, + { + "epoch": 0.0006315211422295442, + "grad_norm": 0.8168877363204956, + "learning_rate": 4.4e-06, + "loss": 0.7322, + "step": 23 + }, + { + "epoch": 0.0006589785831960461, + "grad_norm": 0.703757643699646, + "learning_rate": 4.600000000000001e-06, + "loss": 0.6697, + "step": 24 + }, + { + "epoch": 0.0006864360241625481, + "grad_norm": 0.6717430949211121, + "learning_rate": 4.800000000000001e-06, + "loss": 0.6747, + "step": 25 + }, + { + "epoch": 0.00071389346512905, + "grad_norm": 0.642096221446991, + "learning_rate": 5e-06, + "loss": 0.7663, + "step": 26 + }, + { + "epoch": 0.0007413509060955519, + "grad_norm": 0.6495366096496582, + "learning_rate": 5.2e-06, + "loss": 0.6048, + "step": 27 + }, + { + "epoch": 0.0007688083470620538, + "grad_norm": 0.6014453172683716, + "learning_rate": 5.400000000000001e-06, + "loss": 0.7347, + "step": 28 + }, + { + "epoch": 0.0007962657880285557, + "grad_norm": 0.5283642411231995, + "learning_rate": 5.600000000000001e-06, + "loss": 0.6906, + "step": 29 + }, + { + "epoch": 0.0008237232289950577, + "grad_norm": 0.4862114489078522, + "learning_rate": 5.8e-06, + "loss": 0.7212, + "step": 30 + }, + { + "epoch": 0.0008511806699615595, + "grad_norm": 0.5186327695846558, + "learning_rate": 6e-06, + "loss": 0.6614, + "step": 31 + }, + { + "epoch": 0.0008786381109280615, + "grad_norm": 0.5351436138153076, + "learning_rate": 6.200000000000001e-06, + "loss": 0.7566, + "step": 32 + }, + { + "epoch": 0.0009060955518945635, + "grad_norm": 0.5533176064491272, + "learning_rate": 6.4000000000000006e-06, + "loss": 0.6553, + "step": 33 + }, + { + "epoch": 0.0009335529928610653, + "grad_norm": 0.5029674768447876, + "learning_rate": 6.600000000000001e-06, + "loss": 0.7282, + "step": 34 + }, + { + "epoch": 0.0009610104338275673, + "grad_norm": 0.44217249751091003, + "learning_rate": 6.800000000000001e-06, + "loss": 0.619, + "step": 35 + }, + { + "epoch": 0.0009884678747940692, + "grad_norm": 0.43247437477111816, + "learning_rate": 7e-06, + "loss": 0.6208, + "step": 36 + }, + { + "epoch": 0.001015925315760571, + "grad_norm": 0.8083419799804688, + "learning_rate": 7.2000000000000005e-06, + "loss": 0.6588, + "step": 37 + }, + { + "epoch": 0.001043382756727073, + "grad_norm": 0.49433842301368713, + "learning_rate": 7.4e-06, + "loss": 0.6537, + "step": 38 + }, + { + "epoch": 0.001070840197693575, + "grad_norm": 0.5236935019493103, + "learning_rate": 7.600000000000001e-06, + "loss": 0.6074, + "step": 39 + }, + { + "epoch": 0.001098297638660077, + "grad_norm": 0.5073657035827637, + "learning_rate": 7.800000000000002e-06, + "loss": 0.7987, + "step": 40 + }, + { + "epoch": 0.0011257550796265788, + "grad_norm": 0.4493747651576996, + "learning_rate": 8.000000000000001e-06, + "loss": 0.6389, + "step": 41 + }, + { + "epoch": 0.0011532125205930808, + "grad_norm": 0.5368253588676453, + "learning_rate": 8.2e-06, + "loss": 0.6919, + "step": 42 + }, + { + "epoch": 0.0011806699615595827, + "grad_norm": 0.5811830759048462, + "learning_rate": 8.400000000000001e-06, + "loss": 0.7515, + "step": 43 + }, + { + "epoch": 0.0012081274025260845, + "grad_norm": 0.43591269850730896, + "learning_rate": 8.6e-06, + "loss": 0.6522, + "step": 44 + }, + { + "epoch": 0.0012355848434925864, + "grad_norm": 0.4478546380996704, + "learning_rate": 8.8e-06, + "loss": 0.7013, + "step": 45 + }, + { + "epoch": 0.0012630422844590885, + "grad_norm": 0.44441911578178406, + "learning_rate": 9e-06, + "loss": 0.6555, + "step": 46 + }, + { + "epoch": 0.0012904997254255903, + "grad_norm": 0.4050477147102356, + "learning_rate": 9.200000000000002e-06, + "loss": 0.5679, + "step": 47 + }, + { + "epoch": 0.0013179571663920922, + "grad_norm": 0.4047469198703766, + "learning_rate": 9.4e-06, + "loss": 0.7032, + "step": 48 + }, + { + "epoch": 0.0013454146073585943, + "grad_norm": 0.44018757343292236, + "learning_rate": 9.600000000000001e-06, + "loss": 0.657, + "step": 49 + }, + { + "epoch": 0.0013728720483250961, + "grad_norm": 0.4194895923137665, + "learning_rate": 9.800000000000001e-06, + "loss": 0.61, + "step": 50 + }, + { + "epoch": 0.001400329489291598, + "grad_norm": 0.3997894525527954, + "learning_rate": 1e-05, + "loss": 0.5872, + "step": 51 + }, + { + "epoch": 0.0014277869302581, + "grad_norm": 0.4468090534210205, + "learning_rate": 1.02e-05, + "loss": 0.498, + "step": 52 + }, + { + "epoch": 0.001455244371224602, + "grad_norm": 0.48776981234550476, + "learning_rate": 1.04e-05, + "loss": 0.588, + "step": 53 + }, + { + "epoch": 0.0014827018121911038, + "grad_norm": 0.42795422673225403, + "learning_rate": 1.0600000000000002e-05, + "loss": 0.6725, + "step": 54 + }, + { + "epoch": 0.0015101592531576056, + "grad_norm": 0.4143153727054596, + "learning_rate": 1.0800000000000002e-05, + "loss": 0.6205, + "step": 55 + }, + { + "epoch": 0.0015376166941241077, + "grad_norm": 0.4018900692462921, + "learning_rate": 1.1000000000000001e-05, + "loss": 0.5532, + "step": 56 + }, + { + "epoch": 0.0015650741350906096, + "grad_norm": 0.4289415180683136, + "learning_rate": 1.1200000000000001e-05, + "loss": 0.5733, + "step": 57 + }, + { + "epoch": 0.0015925315760571114, + "grad_norm": 0.4622398018836975, + "learning_rate": 1.14e-05, + "loss": 0.6955, + "step": 58 + }, + { + "epoch": 0.0016199890170236135, + "grad_norm": 0.492209255695343, + "learning_rate": 1.16e-05, + "loss": 0.5962, + "step": 59 + }, + { + "epoch": 0.0016474464579901153, + "grad_norm": 0.38489100337028503, + "learning_rate": 1.18e-05, + "loss": 0.5443, + "step": 60 + }, + { + "epoch": 0.0016749038989566172, + "grad_norm": 0.42261120676994324, + "learning_rate": 1.2e-05, + "loss": 0.6293, + "step": 61 + }, + { + "epoch": 0.001702361339923119, + "grad_norm": 0.545713484287262, + "learning_rate": 1.22e-05, + "loss": 0.642, + "step": 62 + }, + { + "epoch": 0.0017298187808896211, + "grad_norm": 0.46870431303977966, + "learning_rate": 1.2400000000000002e-05, + "loss": 0.6618, + "step": 63 + }, + { + "epoch": 0.001757276221856123, + "grad_norm": 0.4857075810432434, + "learning_rate": 1.2600000000000001e-05, + "loss": 0.5794, + "step": 64 + }, + { + "epoch": 0.0017847336628226248, + "grad_norm": 0.4711804687976837, + "learning_rate": 1.2800000000000001e-05, + "loss": 0.6093, + "step": 65 + }, + { + "epoch": 0.001812191103789127, + "grad_norm": 0.45472607016563416, + "learning_rate": 1.3000000000000001e-05, + "loss": 0.614, + "step": 66 + }, + { + "epoch": 0.0018396485447556288, + "grad_norm": 0.5441303849220276, + "learning_rate": 1.3200000000000002e-05, + "loss": 0.6774, + "step": 67 + }, + { + "epoch": 0.0018671059857221306, + "grad_norm": 0.4511236548423767, + "learning_rate": 1.3400000000000002e-05, + "loss": 0.6283, + "step": 68 + }, + { + "epoch": 0.0018945634266886327, + "grad_norm": 0.441902756690979, + "learning_rate": 1.3600000000000002e-05, + "loss": 0.6967, + "step": 69 + }, + { + "epoch": 0.0019220208676551346, + "grad_norm": 0.44374457001686096, + "learning_rate": 1.38e-05, + "loss": 0.5963, + "step": 70 + }, + { + "epoch": 0.0019494783086216364, + "grad_norm": 0.4764389097690582, + "learning_rate": 1.4e-05, + "loss": 0.7037, + "step": 71 + }, + { + "epoch": 0.0019769357495881385, + "grad_norm": 0.44006603956222534, + "learning_rate": 1.4200000000000001e-05, + "loss": 0.6305, + "step": 72 + }, + { + "epoch": 0.0020043931905546404, + "grad_norm": 0.4069029986858368, + "learning_rate": 1.4400000000000001e-05, + "loss": 0.6288, + "step": 73 + }, + { + "epoch": 0.002031850631521142, + "grad_norm": 0.3871413469314575, + "learning_rate": 1.46e-05, + "loss": 0.5926, + "step": 74 + }, + { + "epoch": 0.002059308072487644, + "grad_norm": 0.4089374542236328, + "learning_rate": 1.48e-05, + "loss": 0.6765, + "step": 75 + }, + { + "epoch": 0.002086765513454146, + "grad_norm": 0.41028884053230286, + "learning_rate": 1.5000000000000002e-05, + "loss": 0.6027, + "step": 76 + }, + { + "epoch": 0.002114222954420648, + "grad_norm": 0.4265989065170288, + "learning_rate": 1.5200000000000002e-05, + "loss": 0.5517, + "step": 77 + }, + { + "epoch": 0.00214168039538715, + "grad_norm": 0.48942798376083374, + "learning_rate": 1.54e-05, + "loss": 0.6191, + "step": 78 + }, + { + "epoch": 0.002169137836353652, + "grad_norm": 0.5473577976226807, + "learning_rate": 1.5600000000000003e-05, + "loss": 0.7313, + "step": 79 + }, + { + "epoch": 0.002196595277320154, + "grad_norm": 0.5348865389823914, + "learning_rate": 1.58e-05, + "loss": 0.6606, + "step": 80 + }, + { + "epoch": 0.0022240527182866556, + "grad_norm": 0.4247622489929199, + "learning_rate": 1.6000000000000003e-05, + "loss": 0.6122, + "step": 81 + }, + { + "epoch": 0.0022515101592531575, + "grad_norm": 0.41674304008483887, + "learning_rate": 1.62e-05, + "loss": 0.5894, + "step": 82 + }, + { + "epoch": 0.0022789676002196594, + "grad_norm": 0.49677029252052307, + "learning_rate": 1.64e-05, + "loss": 0.6122, + "step": 83 + }, + { + "epoch": 0.0023064250411861617, + "grad_norm": 0.4433738887310028, + "learning_rate": 1.66e-05, + "loss": 0.6413, + "step": 84 + }, + { + "epoch": 0.0023338824821526635, + "grad_norm": 0.48676344752311707, + "learning_rate": 1.6800000000000002e-05, + "loss": 0.6909, + "step": 85 + }, + { + "epoch": 0.0023613399231191654, + "grad_norm": 0.4397391974925995, + "learning_rate": 1.7e-05, + "loss": 0.563, + "step": 86 + }, + { + "epoch": 0.0023887973640856672, + "grad_norm": 0.46282798051834106, + "learning_rate": 1.72e-05, + "loss": 0.7357, + "step": 87 + }, + { + "epoch": 0.002416254805052169, + "grad_norm": 0.45930078625679016, + "learning_rate": 1.7400000000000003e-05, + "loss": 0.62, + "step": 88 + }, + { + "epoch": 0.002443712246018671, + "grad_norm": 0.48151397705078125, + "learning_rate": 1.76e-05, + "loss": 0.6979, + "step": 89 + }, + { + "epoch": 0.002471169686985173, + "grad_norm": 0.4736911654472351, + "learning_rate": 1.7800000000000002e-05, + "loss": 0.6824, + "step": 90 + }, + { + "epoch": 0.002498627127951675, + "grad_norm": 0.45994314551353455, + "learning_rate": 1.8e-05, + "loss": 0.6294, + "step": 91 + }, + { + "epoch": 0.002526084568918177, + "grad_norm": 0.5020555257797241, + "learning_rate": 1.8200000000000002e-05, + "loss": 0.5782, + "step": 92 + }, + { + "epoch": 0.002553542009884679, + "grad_norm": 0.5591815114021301, + "learning_rate": 1.8400000000000003e-05, + "loss": 0.6595, + "step": 93 + }, + { + "epoch": 0.0025809994508511807, + "grad_norm": 0.44648024439811707, + "learning_rate": 1.86e-05, + "loss": 0.5676, + "step": 94 + }, + { + "epoch": 0.0026084568918176825, + "grad_norm": 0.445890873670578, + "learning_rate": 1.88e-05, + "loss": 0.7295, + "step": 95 + }, + { + "epoch": 0.0026359143327841844, + "grad_norm": 0.4448269009590149, + "learning_rate": 1.9e-05, + "loss": 0.5728, + "step": 96 + }, + { + "epoch": 0.0026633717737506862, + "grad_norm": 0.45189368724823, + "learning_rate": 1.9200000000000003e-05, + "loss": 0.6471, + "step": 97 + }, + { + "epoch": 0.0026908292147171885, + "grad_norm": 0.4151735007762909, + "learning_rate": 1.94e-05, + "loss": 0.5957, + "step": 98 + }, + { + "epoch": 0.0027182866556836904, + "grad_norm": 0.41519683599472046, + "learning_rate": 1.9600000000000002e-05, + "loss": 0.5478, + "step": 99 + }, + { + "epoch": 0.0027457440966501922, + "grad_norm": 0.43746230006217957, + "learning_rate": 1.98e-05, + "loss": 0.5593, + "step": 100 + }, + { + "epoch": 0.002773201537616694, + "grad_norm": 0.4645647406578064, + "learning_rate": 2e-05, + "loss": 0.6259, + "step": 101 + }, + { + "epoch": 0.002800658978583196, + "grad_norm": 1.1229034662246704, + "learning_rate": 1.9999999990673403e-05, + "loss": 0.5534, + "step": 102 + }, + { + "epoch": 0.002828116419549698, + "grad_norm": 0.4796474575996399, + "learning_rate": 1.9999999962693618e-05, + "loss": 0.6227, + "step": 103 + }, + { + "epoch": 0.0028555738605162, + "grad_norm": 0.41566044092178345, + "learning_rate": 1.999999991606064e-05, + "loss": 0.5946, + "step": 104 + }, + { + "epoch": 0.002883031301482702, + "grad_norm": 0.4952285587787628, + "learning_rate": 1.9999999850774467e-05, + "loss": 0.6729, + "step": 105 + }, + { + "epoch": 0.002910488742449204, + "grad_norm": 0.4553510844707489, + "learning_rate": 1.9999999766835108e-05, + "loss": 0.62, + "step": 106 + }, + { + "epoch": 0.0029379461834157057, + "grad_norm": 0.4013444781303406, + "learning_rate": 1.999999966424255e-05, + "loss": 0.5991, + "step": 107 + }, + { + "epoch": 0.0029654036243822075, + "grad_norm": 0.4763377010822296, + "learning_rate": 1.9999999542996812e-05, + "loss": 0.6157, + "step": 108 + }, + { + "epoch": 0.0029928610653487094, + "grad_norm": 0.4436170160770416, + "learning_rate": 1.9999999403097873e-05, + "loss": 0.5386, + "step": 109 + }, + { + "epoch": 0.0030203185063152112, + "grad_norm": 0.4334041178226471, + "learning_rate": 1.9999999244545753e-05, + "loss": 0.6781, + "step": 110 + }, + { + "epoch": 0.0030477759472817135, + "grad_norm": 0.5235326290130615, + "learning_rate": 1.9999999067340433e-05, + "loss": 0.6014, + "step": 111 + }, + { + "epoch": 0.0030752333882482154, + "grad_norm": 0.5019425749778748, + "learning_rate": 1.999999887148193e-05, + "loss": 0.5865, + "step": 112 + }, + { + "epoch": 0.0031026908292147173, + "grad_norm": 0.4108220636844635, + "learning_rate": 1.9999998656970236e-05, + "loss": 0.5724, + "step": 113 + }, + { + "epoch": 0.003130148270181219, + "grad_norm": 0.46410611271858215, + "learning_rate": 1.9999998423805352e-05, + "loss": 0.6435, + "step": 114 + }, + { + "epoch": 0.003157605711147721, + "grad_norm": 0.418171226978302, + "learning_rate": 1.9999998171987278e-05, + "loss": 0.6485, + "step": 115 + }, + { + "epoch": 0.003185063152114223, + "grad_norm": 0.4836839437484741, + "learning_rate": 1.9999997901516014e-05, + "loss": 0.6753, + "step": 116 + }, + { + "epoch": 0.0032125205930807247, + "grad_norm": 0.41291505098342896, + "learning_rate": 1.9999997612391567e-05, + "loss": 0.5849, + "step": 117 + }, + { + "epoch": 0.003239978034047227, + "grad_norm": 0.4702100157737732, + "learning_rate": 1.9999997304613932e-05, + "loss": 0.7081, + "step": 118 + }, + { + "epoch": 0.003267435475013729, + "grad_norm": 0.4679124057292938, + "learning_rate": 1.9999996978183108e-05, + "loss": 0.6667, + "step": 119 + }, + { + "epoch": 0.0032948929159802307, + "grad_norm": 0.42561307549476624, + "learning_rate": 1.99999966330991e-05, + "loss": 0.608, + "step": 120 + }, + { + "epoch": 0.0033223503569467325, + "grad_norm": 0.4386301636695862, + "learning_rate": 1.999999626936191e-05, + "loss": 0.5862, + "step": 121 + }, + { + "epoch": 0.0033498077979132344, + "grad_norm": 0.4232906997203827, + "learning_rate": 1.9999995886971528e-05, + "loss": 0.573, + "step": 122 + }, + { + "epoch": 0.0033772652388797363, + "grad_norm": 0.45866522192955017, + "learning_rate": 1.9999995485927966e-05, + "loss": 0.689, + "step": 123 + }, + { + "epoch": 0.003404722679846238, + "grad_norm": 0.46257665753364563, + "learning_rate": 1.9999995066231222e-05, + "loss": 0.6425, + "step": 124 + }, + { + "epoch": 0.0034321801208127404, + "grad_norm": 0.4337705373764038, + "learning_rate": 1.999999462788129e-05, + "loss": 0.5342, + "step": 125 + }, + { + "epoch": 0.0034596375617792423, + "grad_norm": 0.409372478723526, + "learning_rate": 1.9999994170878182e-05, + "loss": 0.5385, + "step": 126 + }, + { + "epoch": 0.003487095002745744, + "grad_norm": 0.43916985392570496, + "learning_rate": 1.9999993695221894e-05, + "loss": 0.5918, + "step": 127 + }, + { + "epoch": 0.003514552443712246, + "grad_norm": 0.4186578691005707, + "learning_rate": 1.9999993200912423e-05, + "loss": 0.6443, + "step": 128 + }, + { + "epoch": 0.003542009884678748, + "grad_norm": 0.512024998664856, + "learning_rate": 1.9999992687949775e-05, + "loss": 0.498, + "step": 129 + }, + { + "epoch": 0.0035694673256452497, + "grad_norm": 0.7444612979888916, + "learning_rate": 1.9999992156333947e-05, + "loss": 0.6449, + "step": 130 + }, + { + "epoch": 0.003596924766611752, + "grad_norm": 0.48492512106895447, + "learning_rate": 1.9999991606064942e-05, + "loss": 0.6568, + "step": 131 + }, + { + "epoch": 0.003624382207578254, + "grad_norm": 0.44983240962028503, + "learning_rate": 1.999999103714276e-05, + "loss": 0.5998, + "step": 132 + }, + { + "epoch": 0.0036518396485447557, + "grad_norm": 0.4248271584510803, + "learning_rate": 1.9999990449567407e-05, + "loss": 0.6453, + "step": 133 + }, + { + "epoch": 0.0036792970895112576, + "grad_norm": 0.4380721151828766, + "learning_rate": 1.9999989843338875e-05, + "loss": 0.6809, + "step": 134 + }, + { + "epoch": 0.0037067545304777594, + "grad_norm": 0.4258834719657898, + "learning_rate": 1.9999989218457175e-05, + "loss": 0.6324, + "step": 135 + }, + { + "epoch": 0.0037342119714442613, + "grad_norm": 0.4458576738834381, + "learning_rate": 1.99999885749223e-05, + "loss": 0.6717, + "step": 136 + }, + { + "epoch": 0.003761669412410763, + "grad_norm": 0.4505946636199951, + "learning_rate": 1.9999987912734257e-05, + "loss": 0.6018, + "step": 137 + }, + { + "epoch": 0.0037891268533772654, + "grad_norm": 0.4281530976295471, + "learning_rate": 1.9999987231893046e-05, + "loss": 0.6672, + "step": 138 + }, + { + "epoch": 0.0038165842943437673, + "grad_norm": 0.5011027455329895, + "learning_rate": 1.9999986532398666e-05, + "loss": 0.6524, + "step": 139 + }, + { + "epoch": 0.003844041735310269, + "grad_norm": 0.4920332133769989, + "learning_rate": 1.999998581425112e-05, + "loss": 0.5761, + "step": 140 + }, + { + "epoch": 0.003871499176276771, + "grad_norm": 0.49742692708969116, + "learning_rate": 1.9999985077450406e-05, + "loss": 0.5844, + "step": 141 + }, + { + "epoch": 0.003898956617243273, + "grad_norm": 0.4630191922187805, + "learning_rate": 1.9999984321996534e-05, + "loss": 0.683, + "step": 142 + }, + { + "epoch": 0.003926414058209775, + "grad_norm": 0.47620368003845215, + "learning_rate": 1.999998354788949e-05, + "loss": 0.6161, + "step": 143 + }, + { + "epoch": 0.003953871499176277, + "grad_norm": 0.4642435312271118, + "learning_rate": 1.9999982755129292e-05, + "loss": 0.6554, + "step": 144 + }, + { + "epoch": 0.003981328940142779, + "grad_norm": 0.4387148916721344, + "learning_rate": 1.9999981943715934e-05, + "loss": 0.6853, + "step": 145 + }, + { + "epoch": 0.004008786381109281, + "grad_norm": 0.4572583734989166, + "learning_rate": 1.999998111364942e-05, + "loss": 0.6093, + "step": 146 + }, + { + "epoch": 0.004036243822075783, + "grad_norm": 0.4165478050708771, + "learning_rate": 1.9999980264929744e-05, + "loss": 0.6117, + "step": 147 + }, + { + "epoch": 0.004063701263042284, + "grad_norm": 0.4298742711544037, + "learning_rate": 1.9999979397556917e-05, + "loss": 0.6015, + "step": 148 + }, + { + "epoch": 0.004091158704008786, + "grad_norm": 0.4505866765975952, + "learning_rate": 1.9999978511530937e-05, + "loss": 0.558, + "step": 149 + }, + { + "epoch": 0.004118616144975288, + "grad_norm": 0.4468693137168884, + "learning_rate": 1.9999977606851804e-05, + "loss": 0.6812, + "step": 150 + }, + { + "epoch": 0.00414607358594179, + "grad_norm": 0.42061498761177063, + "learning_rate": 1.999997668351952e-05, + "loss": 0.583, + "step": 151 + }, + { + "epoch": 0.004173531026908292, + "grad_norm": 0.4557202160358429, + "learning_rate": 1.999997574153409e-05, + "loss": 0.6236, + "step": 152 + }, + { + "epoch": 0.004200988467874794, + "grad_norm": 0.5489321947097778, + "learning_rate": 1.9999974780895514e-05, + "loss": 0.7379, + "step": 153 + }, + { + "epoch": 0.004228445908841296, + "grad_norm": 0.45926955342292786, + "learning_rate": 1.9999973801603793e-05, + "loss": 0.6898, + "step": 154 + }, + { + "epoch": 0.004255903349807798, + "grad_norm": 0.478107750415802, + "learning_rate": 1.999997280365893e-05, + "loss": 0.6662, + "step": 155 + }, + { + "epoch": 0.0042833607907743, + "grad_norm": 0.43631061911582947, + "learning_rate": 1.9999971787060923e-05, + "loss": 0.5895, + "step": 156 + }, + { + "epoch": 0.004310818231740802, + "grad_norm": 0.4447135031223297, + "learning_rate": 1.999997075180978e-05, + "loss": 0.657, + "step": 157 + }, + { + "epoch": 0.004338275672707304, + "grad_norm": 0.41299593448638916, + "learning_rate": 1.9999969697905496e-05, + "loss": 0.5642, + "step": 158 + }, + { + "epoch": 0.004365733113673806, + "grad_norm": 0.4609125554561615, + "learning_rate": 1.999996862534808e-05, + "loss": 0.6649, + "step": 159 + }, + { + "epoch": 0.004393190554640308, + "grad_norm": 0.4190620183944702, + "learning_rate": 1.999996753413753e-05, + "loss": 0.6188, + "step": 160 + }, + { + "epoch": 0.0044206479956068094, + "grad_norm": 0.44755828380584717, + "learning_rate": 1.9999966424273852e-05, + "loss": 0.6463, + "step": 161 + }, + { + "epoch": 0.004448105436573311, + "grad_norm": 0.5037378668785095, + "learning_rate": 1.9999965295757043e-05, + "loss": 0.6094, + "step": 162 + }, + { + "epoch": 0.004475562877539813, + "grad_norm": 0.422136127948761, + "learning_rate": 1.9999964148587106e-05, + "loss": 0.6634, + "step": 163 + }, + { + "epoch": 0.004503020318506315, + "grad_norm": 0.3965300917625427, + "learning_rate": 1.9999962982764045e-05, + "loss": 0.6735, + "step": 164 + }, + { + "epoch": 0.004530477759472817, + "grad_norm": 0.38264939188957214, + "learning_rate": 1.9999961798287863e-05, + "loss": 0.5633, + "step": 165 + }, + { + "epoch": 0.004557935200439319, + "grad_norm": 0.4847031831741333, + "learning_rate": 1.9999960595158558e-05, + "loss": 0.5131, + "step": 166 + }, + { + "epoch": 0.004585392641405821, + "grad_norm": 0.4248157739639282, + "learning_rate": 1.9999959373376134e-05, + "loss": 0.5826, + "step": 167 + }, + { + "epoch": 0.004612850082372323, + "grad_norm": 0.40812015533447266, + "learning_rate": 1.9999958132940595e-05, + "loss": 0.5653, + "step": 168 + }, + { + "epoch": 0.004640307523338825, + "grad_norm": 0.5641244649887085, + "learning_rate": 1.9999956873851943e-05, + "loss": 0.6459, + "step": 169 + }, + { + "epoch": 0.004667764964305327, + "grad_norm": 0.4661983847618103, + "learning_rate": 1.9999955596110182e-05, + "loss": 0.5959, + "step": 170 + }, + { + "epoch": 0.004695222405271829, + "grad_norm": 0.4596157371997833, + "learning_rate": 1.9999954299715313e-05, + "loss": 0.6741, + "step": 171 + }, + { + "epoch": 0.004722679846238331, + "grad_norm": 0.6327513456344604, + "learning_rate": 1.999995298466733e-05, + "loss": 0.5499, + "step": 172 + }, + { + "epoch": 0.004750137287204833, + "grad_norm": 0.4812397062778473, + "learning_rate": 1.999995165096625e-05, + "loss": 0.6152, + "step": 173 + }, + { + "epoch": 0.0047775947281713345, + "grad_norm": 0.4263966977596283, + "learning_rate": 1.999995029861207e-05, + "loss": 0.619, + "step": 174 + }, + { + "epoch": 0.004805052169137836, + "grad_norm": 0.4864668846130371, + "learning_rate": 1.9999948927604787e-05, + "loss": 0.6177, + "step": 175 + }, + { + "epoch": 0.004832509610104338, + "grad_norm": 0.43964195251464844, + "learning_rate": 1.9999947537944408e-05, + "loss": 0.6115, + "step": 176 + }, + { + "epoch": 0.00485996705107084, + "grad_norm": 0.45105692744255066, + "learning_rate": 1.9999946129630932e-05, + "loss": 0.6257, + "step": 177 + }, + { + "epoch": 0.004887424492037342, + "grad_norm": 0.39107412099838257, + "learning_rate": 1.9999944702664372e-05, + "loss": 0.6347, + "step": 178 + }, + { + "epoch": 0.004914881933003844, + "grad_norm": 0.3795750141143799, + "learning_rate": 1.9999943257044717e-05, + "loss": 0.5037, + "step": 179 + }, + { + "epoch": 0.004942339373970346, + "grad_norm": 0.39178532361984253, + "learning_rate": 1.9999941792771983e-05, + "loss": 0.5581, + "step": 180 + }, + { + "epoch": 0.0049697968149368475, + "grad_norm": 0.4771156907081604, + "learning_rate": 1.999994030984616e-05, + "loss": 0.6105, + "step": 181 + }, + { + "epoch": 0.00499725425590335, + "grad_norm": 0.4509790539741516, + "learning_rate": 1.999993880826726e-05, + "loss": 0.6598, + "step": 182 + }, + { + "epoch": 0.005024711696869852, + "grad_norm": 0.4423171579837799, + "learning_rate": 1.9999937288035278e-05, + "loss": 0.6374, + "step": 183 + }, + { + "epoch": 0.005052169137836354, + "grad_norm": 0.5727972984313965, + "learning_rate": 1.9999935749150227e-05, + "loss": 0.589, + "step": 184 + }, + { + "epoch": 0.005079626578802856, + "grad_norm": 0.4115532338619232, + "learning_rate": 1.99999341916121e-05, + "loss": 0.5308, + "step": 185 + }, + { + "epoch": 0.005107084019769358, + "grad_norm": 0.7021656036376953, + "learning_rate": 1.9999932615420908e-05, + "loss": 0.6746, + "step": 186 + }, + { + "epoch": 0.0051345414607358595, + "grad_norm": 3.469609260559082, + "learning_rate": 1.9999931020576646e-05, + "loss": 0.6083, + "step": 187 + }, + { + "epoch": 0.005161998901702361, + "grad_norm": 0.44507789611816406, + "learning_rate": 1.9999929407079322e-05, + "loss": 0.7131, + "step": 188 + }, + { + "epoch": 0.005189456342668863, + "grad_norm": 0.41362589597702026, + "learning_rate": 1.999992777492894e-05, + "loss": 0.5915, + "step": 189 + }, + { + "epoch": 0.005216913783635365, + "grad_norm": 0.42812520265579224, + "learning_rate": 1.99999261241255e-05, + "loss": 0.6013, + "step": 190 + }, + { + "epoch": 0.005244371224601867, + "grad_norm": 0.47485044598579407, + "learning_rate": 1.9999924454669008e-05, + "loss": 0.6892, + "step": 191 + }, + { + "epoch": 0.005271828665568369, + "grad_norm": 0.42368319630622864, + "learning_rate": 1.9999922766559466e-05, + "loss": 0.6236, + "step": 192 + }, + { + "epoch": 0.005299286106534871, + "grad_norm": 0.4164941608905792, + "learning_rate": 1.9999921059796872e-05, + "loss": 0.5604, + "step": 193 + }, + { + "epoch": 0.0053267435475013725, + "grad_norm": 0.3503522276878357, + "learning_rate": 1.9999919334381235e-05, + "loss": 0.5007, + "step": 194 + }, + { + "epoch": 0.005354200988467875, + "grad_norm": 0.4173789918422699, + "learning_rate": 1.999991759031256e-05, + "loss": 0.6528, + "step": 195 + }, + { + "epoch": 0.005381658429434377, + "grad_norm": 0.3919907808303833, + "learning_rate": 1.9999915827590843e-05, + "loss": 0.588, + "step": 196 + }, + { + "epoch": 0.005409115870400879, + "grad_norm": 0.4164595901966095, + "learning_rate": 1.9999914046216093e-05, + "loss": 0.6306, + "step": 197 + }, + { + "epoch": 0.005436573311367381, + "grad_norm": 0.4904977083206177, + "learning_rate": 1.9999912246188314e-05, + "loss": 0.5763, + "step": 198 + }, + { + "epoch": 0.005464030752333883, + "grad_norm": 0.37329134345054626, + "learning_rate": 1.9999910427507502e-05, + "loss": 0.5329, + "step": 199 + }, + { + "epoch": 0.0054914881933003845, + "grad_norm": 0.4144654870033264, + "learning_rate": 1.9999908590173672e-05, + "loss": 0.6314, + "step": 200 + }, + { + "epoch": 0.005518945634266886, + "grad_norm": 0.3920156955718994, + "learning_rate": 1.9999906734186815e-05, + "loss": 0.5881, + "step": 201 + }, + { + "epoch": 0.005546403075233388, + "grad_norm": 0.4405362606048584, + "learning_rate": 1.9999904859546943e-05, + "loss": 0.6078, + "step": 202 + }, + { + "epoch": 0.00557386051619989, + "grad_norm": 0.3813554644584656, + "learning_rate": 1.9999902966254058e-05, + "loss": 0.6288, + "step": 203 + }, + { + "epoch": 0.005601317957166392, + "grad_norm": 0.4086970388889313, + "learning_rate": 1.999990105430816e-05, + "loss": 0.5928, + "step": 204 + }, + { + "epoch": 0.005628775398132894, + "grad_norm": 0.41574662923812866, + "learning_rate": 1.9999899123709255e-05, + "loss": 0.5722, + "step": 205 + }, + { + "epoch": 0.005656232839099396, + "grad_norm": 0.40006983280181885, + "learning_rate": 1.999989717445735e-05, + "loss": 0.6232, + "step": 206 + }, + { + "epoch": 0.0056836902800658975, + "grad_norm": 0.48231008648872375, + "learning_rate": 1.9999895206552438e-05, + "loss": 0.6282, + "step": 207 + }, + { + "epoch": 0.0057111477210324, + "grad_norm": 0.3890703618526459, + "learning_rate": 1.9999893219994533e-05, + "loss": 0.6671, + "step": 208 + }, + { + "epoch": 0.005738605161998902, + "grad_norm": 0.4452843964099884, + "learning_rate": 1.999989121478364e-05, + "loss": 0.6513, + "step": 209 + }, + { + "epoch": 0.005766062602965404, + "grad_norm": 0.381786584854126, + "learning_rate": 1.9999889190919754e-05, + "loss": 0.5881, + "step": 210 + }, + { + "epoch": 0.005793520043931906, + "grad_norm": 0.3894210159778595, + "learning_rate": 1.9999887148402882e-05, + "loss": 0.6084, + "step": 211 + }, + { + "epoch": 0.005820977484898408, + "grad_norm": 0.3597693145275116, + "learning_rate": 1.9999885087233034e-05, + "loss": 0.5627, + "step": 212 + }, + { + "epoch": 0.0058484349258649095, + "grad_norm": 0.39207249879837036, + "learning_rate": 1.9999883007410205e-05, + "loss": 0.5817, + "step": 213 + }, + { + "epoch": 0.005875892366831411, + "grad_norm": 0.45632675290107727, + "learning_rate": 1.9999880908934403e-05, + "loss": 0.6772, + "step": 214 + }, + { + "epoch": 0.005903349807797913, + "grad_norm": 0.4458891749382019, + "learning_rate": 1.999987879180563e-05, + "loss": 0.67, + "step": 215 + }, + { + "epoch": 0.005930807248764415, + "grad_norm": 0.46367698907852173, + "learning_rate": 1.9999876656023893e-05, + "loss": 0.5532, + "step": 216 + }, + { + "epoch": 0.005958264689730917, + "grad_norm": 0.4678700566291809, + "learning_rate": 1.9999874501589195e-05, + "loss": 0.6394, + "step": 217 + }, + { + "epoch": 0.005985722130697419, + "grad_norm": 0.40423232316970825, + "learning_rate": 1.999987232850154e-05, + "loss": 0.5715, + "step": 218 + }, + { + "epoch": 0.006013179571663921, + "grad_norm": 0.5475576519966125, + "learning_rate": 1.9999870136760933e-05, + "loss": 0.5485, + "step": 219 + }, + { + "epoch": 0.0060406370126304225, + "grad_norm": 0.4014328718185425, + "learning_rate": 1.9999867926367372e-05, + "loss": 0.6441, + "step": 220 + }, + { + "epoch": 0.006068094453596924, + "grad_norm": 0.3991987109184265, + "learning_rate": 1.999986569732087e-05, + "loss": 0.5957, + "step": 221 + }, + { + "epoch": 0.006095551894563427, + "grad_norm": 0.43559056520462036, + "learning_rate": 1.9999863449621423e-05, + "loss": 0.66, + "step": 222 + }, + { + "epoch": 0.006123009335529929, + "grad_norm": 0.46711626648902893, + "learning_rate": 1.9999861183269044e-05, + "loss": 0.6736, + "step": 223 + }, + { + "epoch": 0.006150466776496431, + "grad_norm": 0.4080478549003601, + "learning_rate": 1.999985889826373e-05, + "loss": 0.59, + "step": 224 + }, + { + "epoch": 0.006177924217462933, + "grad_norm": 0.400508314371109, + "learning_rate": 1.9999856594605485e-05, + "loss": 0.6484, + "step": 225 + }, + { + "epoch": 0.0062053816584294345, + "grad_norm": 0.38320261240005493, + "learning_rate": 1.999985427229432e-05, + "loss": 0.5548, + "step": 226 + }, + { + "epoch": 0.006232839099395936, + "grad_norm": 0.4422648549079895, + "learning_rate": 1.999985193133023e-05, + "loss": 0.6269, + "step": 227 + }, + { + "epoch": 0.006260296540362438, + "grad_norm": 0.3704020082950592, + "learning_rate": 1.9999849571713228e-05, + "loss": 0.6068, + "step": 228 + }, + { + "epoch": 0.00628775398132894, + "grad_norm": 0.4695093631744385, + "learning_rate": 1.9999847193443314e-05, + "loss": 0.697, + "step": 229 + }, + { + "epoch": 0.006315211422295442, + "grad_norm": 0.441283255815506, + "learning_rate": 1.9999844796520495e-05, + "loss": 0.6467, + "step": 230 + }, + { + "epoch": 0.006342668863261944, + "grad_norm": 0.3810816705226898, + "learning_rate": 1.9999842380944773e-05, + "loss": 0.6547, + "step": 231 + }, + { + "epoch": 0.006370126304228446, + "grad_norm": 0.4478892683982849, + "learning_rate": 1.9999839946716153e-05, + "loss": 0.6145, + "step": 232 + }, + { + "epoch": 0.0063975837451949475, + "grad_norm": 0.3766327202320099, + "learning_rate": 1.9999837493834637e-05, + "loss": 0.6683, + "step": 233 + }, + { + "epoch": 0.006425041186161449, + "grad_norm": 0.40446531772613525, + "learning_rate": 1.9999835022300236e-05, + "loss": 0.563, + "step": 234 + }, + { + "epoch": 0.006452498627127952, + "grad_norm": 0.5221717357635498, + "learning_rate": 1.999983253211295e-05, + "loss": 0.5626, + "step": 235 + }, + { + "epoch": 0.006479956068094454, + "grad_norm": 0.4647086262702942, + "learning_rate": 1.999983002327279e-05, + "loss": 0.6272, + "step": 236 + }, + { + "epoch": 0.006507413509060956, + "grad_norm": 0.36568981409072876, + "learning_rate": 1.999982749577975e-05, + "loss": 0.5632, + "step": 237 + }, + { + "epoch": 0.006534870950027458, + "grad_norm": 0.41321995854377747, + "learning_rate": 1.9999824949633838e-05, + "loss": 0.5992, + "step": 238 + }, + { + "epoch": 0.0065623283909939595, + "grad_norm": 0.3914000988006592, + "learning_rate": 1.9999822384835065e-05, + "loss": 0.6265, + "step": 239 + }, + { + "epoch": 0.006589785831960461, + "grad_norm": 0.39914536476135254, + "learning_rate": 1.999981980138343e-05, + "loss": 0.6101, + "step": 240 + }, + { + "epoch": 0.006617243272926963, + "grad_norm": 0.416861355304718, + "learning_rate": 1.9999817199278942e-05, + "loss": 0.5741, + "step": 241 + }, + { + "epoch": 0.006644700713893465, + "grad_norm": 0.39164242148399353, + "learning_rate": 1.99998145785216e-05, + "loss": 0.5893, + "step": 242 + }, + { + "epoch": 0.006672158154859967, + "grad_norm": 0.5115380883216858, + "learning_rate": 1.9999811939111417e-05, + "loss": 0.6448, + "step": 243 + }, + { + "epoch": 0.006699615595826469, + "grad_norm": 0.4411737322807312, + "learning_rate": 1.9999809281048387e-05, + "loss": 0.5703, + "step": 244 + }, + { + "epoch": 0.006727073036792971, + "grad_norm": 0.576815128326416, + "learning_rate": 1.9999806604332527e-05, + "loss": 0.63, + "step": 245 + }, + { + "epoch": 0.0067545304777594725, + "grad_norm": 0.38987287878990173, + "learning_rate": 1.999980390896383e-05, + "loss": 0.5591, + "step": 246 + }, + { + "epoch": 0.006781987918725974, + "grad_norm": 0.42383235692977905, + "learning_rate": 1.999980119494231e-05, + "loss": 0.6196, + "step": 247 + }, + { + "epoch": 0.006809445359692476, + "grad_norm": 0.4189009964466095, + "learning_rate": 1.999979846226797e-05, + "loss": 0.6287, + "step": 248 + }, + { + "epoch": 0.006836902800658979, + "grad_norm": 0.44138169288635254, + "learning_rate": 1.999979571094081e-05, + "loss": 0.6352, + "step": 249 + }, + { + "epoch": 0.006864360241625481, + "grad_norm": 0.4521174728870392, + "learning_rate": 1.9999792940960847e-05, + "loss": 0.5807, + "step": 250 + }, + { + "epoch": 0.006891817682591983, + "grad_norm": 0.3877543807029724, + "learning_rate": 1.9999790152328075e-05, + "loss": 0.5806, + "step": 251 + }, + { + "epoch": 0.0069192751235584845, + "grad_norm": 0.42152369022369385, + "learning_rate": 1.9999787345042502e-05, + "loss": 0.6312, + "step": 252 + }, + { + "epoch": 0.006946732564524986, + "grad_norm": 0.4098742604255676, + "learning_rate": 1.9999784519104136e-05, + "loss": 0.5933, + "step": 253 + }, + { + "epoch": 0.006974190005491488, + "grad_norm": 0.3835645318031311, + "learning_rate": 1.999978167451298e-05, + "loss": 0.626, + "step": 254 + }, + { + "epoch": 0.00700164744645799, + "grad_norm": 0.38852691650390625, + "learning_rate": 1.9999778811269035e-05, + "loss": 0.644, + "step": 255 + }, + { + "epoch": 0.007029104887424492, + "grad_norm": 0.6437236666679382, + "learning_rate": 1.9999775929372315e-05, + "loss": 0.6188, + "step": 256 + }, + { + "epoch": 0.007056562328390994, + "grad_norm": 0.4205680191516876, + "learning_rate": 1.999977302882282e-05, + "loss": 0.5978, + "step": 257 + }, + { + "epoch": 0.007084019769357496, + "grad_norm": 0.41911906003952026, + "learning_rate": 1.9999770109620557e-05, + "loss": 0.6382, + "step": 258 + }, + { + "epoch": 0.0071114772103239975, + "grad_norm": 0.4383474290370941, + "learning_rate": 1.9999767171765533e-05, + "loss": 0.7556, + "step": 259 + }, + { + "epoch": 0.007138934651290499, + "grad_norm": 0.4251858592033386, + "learning_rate": 1.999976421525775e-05, + "loss": 0.6369, + "step": 260 + }, + { + "epoch": 0.007166392092257001, + "grad_norm": 1.1386762857437134, + "learning_rate": 1.9999761240097216e-05, + "loss": 0.6859, + "step": 261 + }, + { + "epoch": 0.007193849533223504, + "grad_norm": 0.3800390660762787, + "learning_rate": 1.9999758246283936e-05, + "loss": 0.5921, + "step": 262 + }, + { + "epoch": 0.007221306974190006, + "grad_norm": 0.43522900342941284, + "learning_rate": 1.9999755233817914e-05, + "loss": 0.5619, + "step": 263 + }, + { + "epoch": 0.007248764415156508, + "grad_norm": 0.33871057629585266, + "learning_rate": 1.999975220269916e-05, + "loss": 0.576, + "step": 264 + }, + { + "epoch": 0.0072762218561230095, + "grad_norm": 0.398775190114975, + "learning_rate": 1.9999749152927674e-05, + "loss": 0.6651, + "step": 265 + }, + { + "epoch": 0.007303679297089511, + "grad_norm": 0.403473824262619, + "learning_rate": 1.9999746084503463e-05, + "loss": 0.6252, + "step": 266 + }, + { + "epoch": 0.007331136738056013, + "grad_norm": 0.5099785327911377, + "learning_rate": 1.9999742997426533e-05, + "loss": 0.6448, + "step": 267 + }, + { + "epoch": 0.007358594179022515, + "grad_norm": 0.45460227131843567, + "learning_rate": 1.9999739891696897e-05, + "loss": 0.6201, + "step": 268 + }, + { + "epoch": 0.007386051619989017, + "grad_norm": 0.4064958393573761, + "learning_rate": 1.999973676731455e-05, + "loss": 0.5237, + "step": 269 + }, + { + "epoch": 0.007413509060955519, + "grad_norm": 0.38697826862335205, + "learning_rate": 1.9999733624279502e-05, + "loss": 0.6057, + "step": 270 + }, + { + "epoch": 0.007440966501922021, + "grad_norm": 0.3963126838207245, + "learning_rate": 1.999973046259176e-05, + "loss": 0.5588, + "step": 271 + }, + { + "epoch": 0.0074684239428885225, + "grad_norm": 0.3703741431236267, + "learning_rate": 1.9999727282251332e-05, + "loss": 0.5522, + "step": 272 + }, + { + "epoch": 0.007495881383855024, + "grad_norm": 0.4151833951473236, + "learning_rate": 1.999972408325822e-05, + "loss": 0.6224, + "step": 273 + }, + { + "epoch": 0.007523338824821526, + "grad_norm": 0.4059694707393646, + "learning_rate": 1.9999720865612428e-05, + "loss": 0.6942, + "step": 274 + }, + { + "epoch": 0.007550796265788029, + "grad_norm": 0.39608219265937805, + "learning_rate": 1.9999717629313968e-05, + "loss": 0.6273, + "step": 275 + }, + { + "epoch": 0.007578253706754531, + "grad_norm": 0.4666145145893097, + "learning_rate": 1.999971437436284e-05, + "loss": 0.768, + "step": 276 + }, + { + "epoch": 0.007605711147721033, + "grad_norm": 0.48801395297050476, + "learning_rate": 1.9999711100759053e-05, + "loss": 0.5688, + "step": 277 + }, + { + "epoch": 0.0076331685886875346, + "grad_norm": 0.46614870429039, + "learning_rate": 1.9999707808502618e-05, + "loss": 0.6076, + "step": 278 + }, + { + "epoch": 0.007660626029654036, + "grad_norm": 0.4650278687477112, + "learning_rate": 1.9999704497593532e-05, + "loss": 0.7125, + "step": 279 + }, + { + "epoch": 0.007688083470620538, + "grad_norm": 0.3995112180709839, + "learning_rate": 1.9999701168031808e-05, + "loss": 0.572, + "step": 280 + }, + { + "epoch": 0.00771554091158704, + "grad_norm": 0.4158041775226593, + "learning_rate": 1.999969781981745e-05, + "loss": 0.5786, + "step": 281 + }, + { + "epoch": 0.007742998352553542, + "grad_norm": 0.42611992359161377, + "learning_rate": 1.9999694452950463e-05, + "loss": 0.6018, + "step": 282 + }, + { + "epoch": 0.007770455793520044, + "grad_norm": 0.38140007853507996, + "learning_rate": 1.9999691067430853e-05, + "loss": 0.5825, + "step": 283 + }, + { + "epoch": 0.007797913234486546, + "grad_norm": 0.44541677832603455, + "learning_rate": 1.9999687663258628e-05, + "loss": 0.6411, + "step": 284 + }, + { + "epoch": 0.007825370675453048, + "grad_norm": 0.40978023409843445, + "learning_rate": 1.9999684240433796e-05, + "loss": 0.5177, + "step": 285 + }, + { + "epoch": 0.00785282811641955, + "grad_norm": 0.36902597546577454, + "learning_rate": 1.999968079895636e-05, + "loss": 0.5387, + "step": 286 + }, + { + "epoch": 0.007880285557386051, + "grad_norm": 0.5806494951248169, + "learning_rate": 1.999967733882633e-05, + "loss": 0.5714, + "step": 287 + }, + { + "epoch": 0.007907742998352554, + "grad_norm": 0.47856956720352173, + "learning_rate": 1.9999673860043706e-05, + "loss": 0.5658, + "step": 288 + }, + { + "epoch": 0.007935200439319055, + "grad_norm": 0.40733814239501953, + "learning_rate": 1.99996703626085e-05, + "loss": 0.6782, + "step": 289 + }, + { + "epoch": 0.007962657880285558, + "grad_norm": 0.42939478158950806, + "learning_rate": 1.999966684652072e-05, + "loss": 0.6381, + "step": 290 + }, + { + "epoch": 0.007990115321252059, + "grad_norm": 0.4574210047721863, + "learning_rate": 1.999966331178037e-05, + "loss": 0.5677, + "step": 291 + }, + { + "epoch": 0.008017572762218561, + "grad_norm": 0.4332565367221832, + "learning_rate": 1.999965975838745e-05, + "loss": 0.6322, + "step": 292 + }, + { + "epoch": 0.008045030203185062, + "grad_norm": 0.3806329369544983, + "learning_rate": 1.999965618634198e-05, + "loss": 0.6067, + "step": 293 + }, + { + "epoch": 0.008072487644151565, + "grad_norm": 0.42823880910873413, + "learning_rate": 1.999965259564396e-05, + "loss": 0.6378, + "step": 294 + }, + { + "epoch": 0.008099945085118066, + "grad_norm": 0.42657193541526794, + "learning_rate": 1.999964898629339e-05, + "loss": 0.6237, + "step": 295 + }, + { + "epoch": 0.008127402526084569, + "grad_norm": 0.4090295732021332, + "learning_rate": 1.999964535829029e-05, + "loss": 0.5975, + "step": 296 + }, + { + "epoch": 0.008154859967051072, + "grad_norm": 0.4147412180900574, + "learning_rate": 1.9999641711634656e-05, + "loss": 0.6473, + "step": 297 + }, + { + "epoch": 0.008182317408017573, + "grad_norm": 0.5293698906898499, + "learning_rate": 1.9999638046326497e-05, + "loss": 0.6546, + "step": 298 + }, + { + "epoch": 0.008209774848984075, + "grad_norm": 0.4355682134628296, + "learning_rate": 1.9999634362365825e-05, + "loss": 0.6017, + "step": 299 + }, + { + "epoch": 0.008237232289950576, + "grad_norm": 0.36183473467826843, + "learning_rate": 1.999963065975264e-05, + "loss": 0.7503, + "step": 300 + }, + { + "epoch": 0.008264689730917079, + "grad_norm": 0.39521846175193787, + "learning_rate": 1.9999626938486954e-05, + "loss": 0.6294, + "step": 301 + }, + { + "epoch": 0.00829214717188358, + "grad_norm": 0.37467971444129944, + "learning_rate": 1.9999623198568775e-05, + "loss": 0.5676, + "step": 302 + }, + { + "epoch": 0.008319604612850083, + "grad_norm": 0.3913743197917938, + "learning_rate": 1.9999619439998104e-05, + "loss": 0.6654, + "step": 303 + }, + { + "epoch": 0.008347062053816584, + "grad_norm": 0.36361223459243774, + "learning_rate": 1.9999615662774955e-05, + "loss": 0.6202, + "step": 304 + }, + { + "epoch": 0.008374519494783086, + "grad_norm": 0.35502925515174866, + "learning_rate": 1.999961186689933e-05, + "loss": 0.5438, + "step": 305 + }, + { + "epoch": 0.008401976935749587, + "grad_norm": 0.41663822531700134, + "learning_rate": 1.9999608052371233e-05, + "loss": 0.6184, + "step": 306 + }, + { + "epoch": 0.00842943437671609, + "grad_norm": 0.40139421820640564, + "learning_rate": 1.9999604219190678e-05, + "loss": 0.6051, + "step": 307 + }, + { + "epoch": 0.008456891817682591, + "grad_norm": 0.3863654136657715, + "learning_rate": 1.999960036735767e-05, + "loss": 0.5994, + "step": 308 + }, + { + "epoch": 0.008484349258649094, + "grad_norm": 0.42359796166419983, + "learning_rate": 1.9999596496872217e-05, + "loss": 0.5385, + "step": 309 + }, + { + "epoch": 0.008511806699615597, + "grad_norm": 0.372025728225708, + "learning_rate": 1.9999592607734324e-05, + "loss": 0.5407, + "step": 310 + }, + { + "epoch": 0.008539264140582098, + "grad_norm": 0.34968510270118713, + "learning_rate": 1.9999588699943997e-05, + "loss": 0.5633, + "step": 311 + }, + { + "epoch": 0.0085667215815486, + "grad_norm": 0.43541666865348816, + "learning_rate": 1.999958477350125e-05, + "loss": 0.6466, + "step": 312 + }, + { + "epoch": 0.008594179022515101, + "grad_norm": 0.3847510516643524, + "learning_rate": 1.9999580828406082e-05, + "loss": 0.6506, + "step": 313 + }, + { + "epoch": 0.008621636463481604, + "grad_norm": 0.38849589228630066, + "learning_rate": 1.999957686465851e-05, + "loss": 0.5717, + "step": 314 + }, + { + "epoch": 0.008649093904448105, + "grad_norm": 0.389700710773468, + "learning_rate": 1.9999572882258528e-05, + "loss": 0.6487, + "step": 315 + }, + { + "epoch": 0.008676551345414608, + "grad_norm": 0.5243780016899109, + "learning_rate": 1.9999568881206156e-05, + "loss": 0.5612, + "step": 316 + }, + { + "epoch": 0.008704008786381109, + "grad_norm": 0.5033589601516724, + "learning_rate": 1.9999564861501394e-05, + "loss": 0.6318, + "step": 317 + }, + { + "epoch": 0.008731466227347611, + "grad_norm": 0.39670807123184204, + "learning_rate": 1.999956082314425e-05, + "loss": 0.5521, + "step": 318 + }, + { + "epoch": 0.008758923668314112, + "grad_norm": 0.39557626843452454, + "learning_rate": 1.999955676613474e-05, + "loss": 0.626, + "step": 319 + }, + { + "epoch": 0.008786381109280615, + "grad_norm": 0.3835965096950531, + "learning_rate": 1.9999552690472862e-05, + "loss": 0.5775, + "step": 320 + }, + { + "epoch": 0.008813838550247116, + "grad_norm": 0.3820382058620453, + "learning_rate": 1.9999548596158626e-05, + "loss": 0.6041, + "step": 321 + }, + { + "epoch": 0.008841295991213619, + "grad_norm": 0.36794647574424744, + "learning_rate": 1.9999544483192043e-05, + "loss": 0.6059, + "step": 322 + }, + { + "epoch": 0.008868753432180122, + "grad_norm": 0.39219069480895996, + "learning_rate": 1.9999540351573114e-05, + "loss": 0.6153, + "step": 323 + }, + { + "epoch": 0.008896210873146623, + "grad_norm": 0.39058953523635864, + "learning_rate": 1.999953620130185e-05, + "loss": 0.5835, + "step": 324 + }, + { + "epoch": 0.008923668314113125, + "grad_norm": 0.37050989270210266, + "learning_rate": 1.9999532032378266e-05, + "loss": 0.5639, + "step": 325 + }, + { + "epoch": 0.008951125755079626, + "grad_norm": 0.37111926078796387, + "learning_rate": 1.999952784480236e-05, + "loss": 0.5823, + "step": 326 + }, + { + "epoch": 0.008978583196046129, + "grad_norm": 0.4303293824195862, + "learning_rate": 1.9999523638574144e-05, + "loss": 0.5708, + "step": 327 + }, + { + "epoch": 0.00900604063701263, + "grad_norm": 0.36852791905403137, + "learning_rate": 1.999951941369362e-05, + "loss": 0.6135, + "step": 328 + }, + { + "epoch": 0.009033498077979133, + "grad_norm": 0.799198567867279, + "learning_rate": 1.9999515170160806e-05, + "loss": 0.52, + "step": 329 + }, + { + "epoch": 0.009060955518945634, + "grad_norm": 0.39080047607421875, + "learning_rate": 1.9999510907975702e-05, + "loss": 0.5637, + "step": 330 + }, + { + "epoch": 0.009088412959912136, + "grad_norm": 0.40158334374427795, + "learning_rate": 1.9999506627138322e-05, + "loss": 0.634, + "step": 331 + }, + { + "epoch": 0.009115870400878637, + "grad_norm": 0.35106492042541504, + "learning_rate": 1.9999502327648667e-05, + "loss": 0.5729, + "step": 332 + }, + { + "epoch": 0.00914332784184514, + "grad_norm": 0.39517080783843994, + "learning_rate": 1.9999498009506754e-05, + "loss": 0.526, + "step": 333 + }, + { + "epoch": 0.009170785282811641, + "grad_norm": 0.3985733985900879, + "learning_rate": 1.999949367271258e-05, + "loss": 0.497, + "step": 334 + }, + { + "epoch": 0.009198242723778144, + "grad_norm": 0.4160799980163574, + "learning_rate": 1.9999489317266162e-05, + "loss": 0.6127, + "step": 335 + }, + { + "epoch": 0.009225700164744647, + "grad_norm": 0.40741023421287537, + "learning_rate": 1.99994849431675e-05, + "loss": 0.6519, + "step": 336 + }, + { + "epoch": 0.009253157605711148, + "grad_norm": 0.40619391202926636, + "learning_rate": 1.9999480550416615e-05, + "loss": 0.5528, + "step": 337 + }, + { + "epoch": 0.00928061504667765, + "grad_norm": 0.4305838644504547, + "learning_rate": 1.99994761390135e-05, + "loss": 0.58, + "step": 338 + }, + { + "epoch": 0.009308072487644151, + "grad_norm": 0.4111979305744171, + "learning_rate": 1.9999471708958176e-05, + "loss": 0.5665, + "step": 339 + }, + { + "epoch": 0.009335529928610654, + "grad_norm": 0.43987253308296204, + "learning_rate": 1.9999467260250643e-05, + "loss": 0.6567, + "step": 340 + }, + { + "epoch": 0.009362987369577155, + "grad_norm": 0.3774475157260895, + "learning_rate": 1.9999462792890913e-05, + "loss": 0.5573, + "step": 341 + }, + { + "epoch": 0.009390444810543658, + "grad_norm": 0.3702661693096161, + "learning_rate": 1.9999458306878992e-05, + "loss": 0.6083, + "step": 342 + }, + { + "epoch": 0.009417902251510159, + "grad_norm": 0.4131929874420166, + "learning_rate": 1.999945380221489e-05, + "loss": 0.6789, + "step": 343 + }, + { + "epoch": 0.009445359692476661, + "grad_norm": 0.4229578673839569, + "learning_rate": 1.9999449278898616e-05, + "loss": 0.5318, + "step": 344 + }, + { + "epoch": 0.009472817133443162, + "grad_norm": 0.38716715574264526, + "learning_rate": 1.999944473693018e-05, + "loss": 0.6216, + "step": 345 + }, + { + "epoch": 0.009500274574409665, + "grad_norm": 0.4167757034301758, + "learning_rate": 1.9999440176309586e-05, + "loss": 0.6254, + "step": 346 + }, + { + "epoch": 0.009527732015376166, + "grad_norm": 0.38367530703544617, + "learning_rate": 1.9999435597036844e-05, + "loss": 0.5595, + "step": 347 + }, + { + "epoch": 0.009555189456342669, + "grad_norm": 0.4259548783302307, + "learning_rate": 1.9999430999111962e-05, + "loss": 0.5517, + "step": 348 + }, + { + "epoch": 0.009582646897309172, + "grad_norm": 0.45225411653518677, + "learning_rate": 1.9999426382534954e-05, + "loss": 0.6619, + "step": 349 + }, + { + "epoch": 0.009610104338275673, + "grad_norm": 0.3605313301086426, + "learning_rate": 1.999942174730582e-05, + "loss": 0.5538, + "step": 350 + }, + { + "epoch": 0.009637561779242175, + "grad_norm": 0.4614499807357788, + "learning_rate": 1.9999417093424576e-05, + "loss": 0.7169, + "step": 351 + }, + { + "epoch": 0.009665019220208676, + "grad_norm": 0.35982346534729004, + "learning_rate": 1.9999412420891227e-05, + "loss": 0.6093, + "step": 352 + }, + { + "epoch": 0.009692476661175179, + "grad_norm": 0.5104308724403381, + "learning_rate": 1.9999407729705778e-05, + "loss": 0.4833, + "step": 353 + }, + { + "epoch": 0.00971993410214168, + "grad_norm": 0.3800354599952698, + "learning_rate": 1.9999403019868247e-05, + "loss": 0.5606, + "step": 354 + }, + { + "epoch": 0.009747391543108183, + "grad_norm": 0.4136630594730377, + "learning_rate": 1.9999398291378637e-05, + "loss": 0.5894, + "step": 355 + }, + { + "epoch": 0.009774848984074684, + "grad_norm": 0.44301921129226685, + "learning_rate": 1.999939354423696e-05, + "loss": 0.7001, + "step": 356 + }, + { + "epoch": 0.009802306425041186, + "grad_norm": 0.7333250045776367, + "learning_rate": 1.9999388778443222e-05, + "loss": 0.6461, + "step": 357 + }, + { + "epoch": 0.009829763866007687, + "grad_norm": 0.37262406945228577, + "learning_rate": 1.999938399399743e-05, + "loss": 0.5953, + "step": 358 + }, + { + "epoch": 0.00985722130697419, + "grad_norm": 0.3611081540584564, + "learning_rate": 1.9999379190899596e-05, + "loss": 0.546, + "step": 359 + }, + { + "epoch": 0.009884678747940691, + "grad_norm": 0.36833998560905457, + "learning_rate": 1.9999374369149727e-05, + "loss": 0.5865, + "step": 360 + }, + { + "epoch": 0.009912136188907194, + "grad_norm": 0.4237898290157318, + "learning_rate": 1.9999369528747838e-05, + "loss": 0.4831, + "step": 361 + }, + { + "epoch": 0.009939593629873695, + "grad_norm": 0.43199780583381653, + "learning_rate": 1.9999364669693927e-05, + "loss": 0.5832, + "step": 362 + }, + { + "epoch": 0.009967051070840198, + "grad_norm": 0.38108187913894653, + "learning_rate": 1.9999359791988015e-05, + "loss": 0.6026, + "step": 363 + }, + { + "epoch": 0.0099945085118067, + "grad_norm": 0.38783499598503113, + "learning_rate": 1.9999354895630102e-05, + "loss": 0.6187, + "step": 364 + }, + { + "epoch": 0.010021965952773201, + "grad_norm": 0.3971196711063385, + "learning_rate": 1.99993499806202e-05, + "loss": 0.6038, + "step": 365 + }, + { + "epoch": 0.010049423393739704, + "grad_norm": 0.38506606221199036, + "learning_rate": 1.9999345046958324e-05, + "loss": 0.5458, + "step": 366 + }, + { + "epoch": 0.010076880834706205, + "grad_norm": 0.36890357732772827, + "learning_rate": 1.9999340094644473e-05, + "loss": 0.6481, + "step": 367 + }, + { + "epoch": 0.010104338275672708, + "grad_norm": 0.3652532398700714, + "learning_rate": 1.9999335123678665e-05, + "loss": 0.5744, + "step": 368 + }, + { + "epoch": 0.010131795716639209, + "grad_norm": 0.3595273792743683, + "learning_rate": 1.9999330134060903e-05, + "loss": 0.5397, + "step": 369 + }, + { + "epoch": 0.010159253157605712, + "grad_norm": 0.37918394804000854, + "learning_rate": 1.99993251257912e-05, + "loss": 0.5599, + "step": 370 + }, + { + "epoch": 0.010186710598572212, + "grad_norm": 0.5408847332000732, + "learning_rate": 1.9999320098869563e-05, + "loss": 0.7187, + "step": 371 + }, + { + "epoch": 0.010214168039538715, + "grad_norm": 0.41097304224967957, + "learning_rate": 1.9999315053296002e-05, + "loss": 0.6566, + "step": 372 + }, + { + "epoch": 0.010241625480505216, + "grad_norm": 0.39629796147346497, + "learning_rate": 1.999930998907053e-05, + "loss": 0.5802, + "step": 373 + }, + { + "epoch": 0.010269082921471719, + "grad_norm": 0.37485307455062866, + "learning_rate": 1.999930490619315e-05, + "loss": 0.5374, + "step": 374 + }, + { + "epoch": 0.01029654036243822, + "grad_norm": 0.4000895321369171, + "learning_rate": 1.999929980466388e-05, + "loss": 0.5872, + "step": 375 + }, + { + "epoch": 0.010323997803404723, + "grad_norm": 0.3993488848209381, + "learning_rate": 1.999929468448272e-05, + "loss": 0.5805, + "step": 376 + }, + { + "epoch": 0.010351455244371225, + "grad_norm": 0.3904055058956146, + "learning_rate": 1.9999289545649687e-05, + "loss": 0.6936, + "step": 377 + }, + { + "epoch": 0.010378912685337726, + "grad_norm": 0.4211486577987671, + "learning_rate": 1.9999284388164784e-05, + "loss": 0.6477, + "step": 378 + }, + { + "epoch": 0.010406370126304229, + "grad_norm": 0.39395758509635925, + "learning_rate": 1.9999279212028026e-05, + "loss": 0.6097, + "step": 379 + }, + { + "epoch": 0.01043382756727073, + "grad_norm": 0.3844999074935913, + "learning_rate": 1.9999274017239423e-05, + "loss": 0.5465, + "step": 380 + }, + { + "epoch": 0.010461285008237233, + "grad_norm": 0.4007297158241272, + "learning_rate": 1.999926880379898e-05, + "loss": 0.5982, + "step": 381 + }, + { + "epoch": 0.010488742449203734, + "grad_norm": 0.5679020285606384, + "learning_rate": 1.999926357170671e-05, + "loss": 0.7846, + "step": 382 + }, + { + "epoch": 0.010516199890170237, + "grad_norm": 0.4418479800224304, + "learning_rate": 1.9999258320962622e-05, + "loss": 0.57, + "step": 383 + }, + { + "epoch": 0.010543657331136738, + "grad_norm": 0.418607234954834, + "learning_rate": 1.9999253051566727e-05, + "loss": 0.6835, + "step": 384 + }, + { + "epoch": 0.01057111477210324, + "grad_norm": 0.4055803716182709, + "learning_rate": 1.999924776351903e-05, + "loss": 0.6286, + "step": 385 + }, + { + "epoch": 0.010598572213069741, + "grad_norm": 0.3648841977119446, + "learning_rate": 1.999924245681955e-05, + "loss": 0.5695, + "step": 386 + }, + { + "epoch": 0.010626029654036244, + "grad_norm": 0.44627705216407776, + "learning_rate": 1.9999237131468286e-05, + "loss": 0.5807, + "step": 387 + }, + { + "epoch": 0.010653487095002745, + "grad_norm": 0.3643602430820465, + "learning_rate": 1.9999231787465255e-05, + "loss": 0.5563, + "step": 388 + }, + { + "epoch": 0.010680944535969248, + "grad_norm": 0.37555938959121704, + "learning_rate": 1.999922642481047e-05, + "loss": 0.6015, + "step": 389 + }, + { + "epoch": 0.01070840197693575, + "grad_norm": 0.49338334798812866, + "learning_rate": 1.999922104350393e-05, + "loss": 0.6029, + "step": 390 + }, + { + "epoch": 0.010735859417902251, + "grad_norm": 0.4592312276363373, + "learning_rate": 1.9999215643545656e-05, + "loss": 0.6884, + "step": 391 + }, + { + "epoch": 0.010763316858868754, + "grad_norm": 0.3789234459400177, + "learning_rate": 1.999921022493565e-05, + "loss": 0.5956, + "step": 392 + }, + { + "epoch": 0.010790774299835255, + "grad_norm": 0.4043075144290924, + "learning_rate": 1.9999204787673926e-05, + "loss": 0.6012, + "step": 393 + }, + { + "epoch": 0.010818231740801758, + "grad_norm": 0.4544488191604614, + "learning_rate": 1.9999199331760495e-05, + "loss": 0.5301, + "step": 394 + }, + { + "epoch": 0.010845689181768259, + "grad_norm": 0.3869175314903259, + "learning_rate": 1.9999193857195368e-05, + "loss": 0.5991, + "step": 395 + }, + { + "epoch": 0.010873146622734762, + "grad_norm": 0.48198366165161133, + "learning_rate": 1.999918836397855e-05, + "loss": 0.6078, + "step": 396 + }, + { + "epoch": 0.010900604063701263, + "grad_norm": 0.4633484482765198, + "learning_rate": 1.9999182852110053e-05, + "loss": 0.7243, + "step": 397 + }, + { + "epoch": 0.010928061504667765, + "grad_norm": 0.39428600668907166, + "learning_rate": 1.9999177321589892e-05, + "loss": 0.6114, + "step": 398 + }, + { + "epoch": 0.010955518945634266, + "grad_norm": 0.3761778175830841, + "learning_rate": 1.999917177241807e-05, + "loss": 0.5816, + "step": 399 + }, + { + "epoch": 0.010982976386600769, + "grad_norm": 1.1282670497894287, + "learning_rate": 1.9999166204594605e-05, + "loss": 0.5831, + "step": 400 + }, + { + "epoch": 0.01101043382756727, + "grad_norm": 0.4686826765537262, + "learning_rate": 1.9999160618119502e-05, + "loss": 0.5724, + "step": 401 + }, + { + "epoch": 0.011037891268533773, + "grad_norm": 0.39842167496681213, + "learning_rate": 1.9999155012992774e-05, + "loss": 0.5948, + "step": 402 + }, + { + "epoch": 0.011065348709500275, + "grad_norm": 0.421371728181839, + "learning_rate": 1.9999149389214428e-05, + "loss": 0.6233, + "step": 403 + }, + { + "epoch": 0.011092806150466776, + "grad_norm": 0.4203774929046631, + "learning_rate": 1.999914374678448e-05, + "loss": 0.6396, + "step": 404 + }, + { + "epoch": 0.011120263591433279, + "grad_norm": 0.4540972411632538, + "learning_rate": 1.999913808570294e-05, + "loss": 0.6167, + "step": 405 + }, + { + "epoch": 0.01114772103239978, + "grad_norm": 0.4429083466529846, + "learning_rate": 1.999913240596981e-05, + "loss": 0.6048, + "step": 406 + }, + { + "epoch": 0.011175178473366283, + "grad_norm": 0.3499901294708252, + "learning_rate": 1.9999126707585108e-05, + "loss": 0.5277, + "step": 407 + }, + { + "epoch": 0.011202635914332784, + "grad_norm": 0.38898661732673645, + "learning_rate": 1.9999120990548847e-05, + "loss": 0.5162, + "step": 408 + }, + { + "epoch": 0.011230093355299287, + "grad_norm": 0.37125349044799805, + "learning_rate": 1.999911525486103e-05, + "loss": 0.5905, + "step": 409 + }, + { + "epoch": 0.011257550796265788, + "grad_norm": 0.38218599557876587, + "learning_rate": 1.9999109500521673e-05, + "loss": 0.6646, + "step": 410 + }, + { + "epoch": 0.01128500823723229, + "grad_norm": 0.3971567451953888, + "learning_rate": 1.9999103727530786e-05, + "loss": 0.6434, + "step": 411 + }, + { + "epoch": 0.011312465678198791, + "grad_norm": 0.4186413288116455, + "learning_rate": 1.9999097935888382e-05, + "loss": 0.6382, + "step": 412 + }, + { + "epoch": 0.011339923119165294, + "grad_norm": 0.36706238985061646, + "learning_rate": 1.9999092125594465e-05, + "loss": 0.5521, + "step": 413 + }, + { + "epoch": 0.011367380560131795, + "grad_norm": 0.4050791561603546, + "learning_rate": 1.999908629664905e-05, + "loss": 0.6328, + "step": 414 + }, + { + "epoch": 0.011394838001098298, + "grad_norm": 0.38581904768943787, + "learning_rate": 1.9999080449052147e-05, + "loss": 0.6358, + "step": 415 + }, + { + "epoch": 0.0114222954420648, + "grad_norm": 0.4145922362804413, + "learning_rate": 1.999907458280377e-05, + "loss": 0.6181, + "step": 416 + }, + { + "epoch": 0.011449752883031301, + "grad_norm": 0.35262760519981384, + "learning_rate": 1.999906869790393e-05, + "loss": 0.5745, + "step": 417 + }, + { + "epoch": 0.011477210323997804, + "grad_norm": 0.49101522564888, + "learning_rate": 1.999906279435263e-05, + "loss": 0.5586, + "step": 418 + }, + { + "epoch": 0.011504667764964305, + "grad_norm": 0.41676822304725647, + "learning_rate": 1.999905687214989e-05, + "loss": 0.6504, + "step": 419 + }, + { + "epoch": 0.011532125205930808, + "grad_norm": 0.3991329073905945, + "learning_rate": 1.9999050931295713e-05, + "loss": 0.5266, + "step": 420 + }, + { + "epoch": 0.011559582646897309, + "grad_norm": 0.6659475564956665, + "learning_rate": 1.999904497179012e-05, + "loss": 0.5735, + "step": 421 + }, + { + "epoch": 0.011587040087863812, + "grad_norm": 0.34640198945999146, + "learning_rate": 1.9999038993633114e-05, + "loss": 0.586, + "step": 422 + }, + { + "epoch": 0.011614497528830313, + "grad_norm": 0.36985453963279724, + "learning_rate": 1.9999032996824714e-05, + "loss": 0.4776, + "step": 423 + }, + { + "epoch": 0.011641954969796815, + "grad_norm": 0.4077282249927521, + "learning_rate": 1.999902698136492e-05, + "loss": 0.514, + "step": 424 + }, + { + "epoch": 0.011669412410763316, + "grad_norm": 0.41774219274520874, + "learning_rate": 1.999902094725375e-05, + "loss": 0.6577, + "step": 425 + }, + { + "epoch": 0.011696869851729819, + "grad_norm": 0.4046265482902527, + "learning_rate": 1.999901489449122e-05, + "loss": 0.499, + "step": 426 + }, + { + "epoch": 0.01172432729269632, + "grad_norm": 0.4133409261703491, + "learning_rate": 1.999900882307733e-05, + "loss": 0.6334, + "step": 427 + }, + { + "epoch": 0.011751784733662823, + "grad_norm": 0.39341819286346436, + "learning_rate": 1.99990027330121e-05, + "loss": 0.5969, + "step": 428 + }, + { + "epoch": 0.011779242174629324, + "grad_norm": 0.4167121946811676, + "learning_rate": 1.9998996624295536e-05, + "loss": 0.6754, + "step": 429 + }, + { + "epoch": 0.011806699615595826, + "grad_norm": 0.3867819309234619, + "learning_rate": 1.9998990496927652e-05, + "loss": 0.606, + "step": 430 + }, + { + "epoch": 0.01183415705656233, + "grad_norm": 0.3635198771953583, + "learning_rate": 1.9998984350908463e-05, + "loss": 0.5184, + "step": 431 + }, + { + "epoch": 0.01186161449752883, + "grad_norm": 0.4112887382507324, + "learning_rate": 1.9998978186237977e-05, + "loss": 0.6396, + "step": 432 + }, + { + "epoch": 0.011889071938495333, + "grad_norm": 0.42449551820755005, + "learning_rate": 1.9998972002916202e-05, + "loss": 0.5869, + "step": 433 + }, + { + "epoch": 0.011916529379461834, + "grad_norm": 0.39251822233200073, + "learning_rate": 1.9998965800943154e-05, + "loss": 0.5808, + "step": 434 + }, + { + "epoch": 0.011943986820428337, + "grad_norm": 0.42648008465766907, + "learning_rate": 1.9998959580318844e-05, + "loss": 0.5732, + "step": 435 + }, + { + "epoch": 0.011971444261394838, + "grad_norm": 0.4095357060432434, + "learning_rate": 1.9998953341043282e-05, + "loss": 0.5974, + "step": 436 + }, + { + "epoch": 0.01199890170236134, + "grad_norm": 0.4035501480102539, + "learning_rate": 1.999894708311648e-05, + "loss": 0.635, + "step": 437 + }, + { + "epoch": 0.012026359143327841, + "grad_norm": 0.3491573631763458, + "learning_rate": 1.9998940806538452e-05, + "loss": 0.532, + "step": 438 + }, + { + "epoch": 0.012053816584294344, + "grad_norm": 0.34197482466697693, + "learning_rate": 1.999893451130921e-05, + "loss": 0.5335, + "step": 439 + }, + { + "epoch": 0.012081274025260845, + "grad_norm": 0.39727604389190674, + "learning_rate": 1.9998928197428763e-05, + "loss": 0.6076, + "step": 440 + }, + { + "epoch": 0.012108731466227348, + "grad_norm": 0.36327365040779114, + "learning_rate": 1.9998921864897123e-05, + "loss": 0.5861, + "step": 441 + }, + { + "epoch": 0.012136188907193849, + "grad_norm": 0.46729639172554016, + "learning_rate": 1.9998915513714302e-05, + "loss": 0.6098, + "step": 442 + }, + { + "epoch": 0.012163646348160351, + "grad_norm": 0.43233320116996765, + "learning_rate": 1.999890914388031e-05, + "loss": 0.5463, + "step": 443 + }, + { + "epoch": 0.012191103789126854, + "grad_norm": 0.4136062264442444, + "learning_rate": 1.9998902755395165e-05, + "loss": 0.6441, + "step": 444 + }, + { + "epoch": 0.012218561230093355, + "grad_norm": 0.3878582715988159, + "learning_rate": 1.9998896348258875e-05, + "loss": 0.6353, + "step": 445 + }, + { + "epoch": 0.012246018671059858, + "grad_norm": 0.4148087501525879, + "learning_rate": 1.999888992247145e-05, + "loss": 0.5967, + "step": 446 + }, + { + "epoch": 0.012273476112026359, + "grad_norm": 0.40921667218208313, + "learning_rate": 1.9998883478032906e-05, + "loss": 0.6296, + "step": 447 + }, + { + "epoch": 0.012300933552992862, + "grad_norm": 0.37157028913497925, + "learning_rate": 1.9998877014943253e-05, + "loss": 0.5351, + "step": 448 + }, + { + "epoch": 0.012328390993959363, + "grad_norm": 0.3901151418685913, + "learning_rate": 1.9998870533202497e-05, + "loss": 0.646, + "step": 449 + }, + { + "epoch": 0.012355848434925865, + "grad_norm": 0.42170265316963196, + "learning_rate": 1.9998864032810662e-05, + "loss": 0.6252, + "step": 450 + }, + { + "epoch": 0.012383305875892366, + "grad_norm": 0.4750824272632599, + "learning_rate": 1.9998857513767754e-05, + "loss": 0.5375, + "step": 451 + }, + { + "epoch": 0.012410763316858869, + "grad_norm": 0.41642192006111145, + "learning_rate": 1.9998850976073788e-05, + "loss": 0.5099, + "step": 452 + }, + { + "epoch": 0.01243822075782537, + "grad_norm": 0.4203537702560425, + "learning_rate": 1.999884441972877e-05, + "loss": 0.5477, + "step": 453 + }, + { + "epoch": 0.012465678198791873, + "grad_norm": 0.3524506688117981, + "learning_rate": 1.9998837844732714e-05, + "loss": 0.5242, + "step": 454 + }, + { + "epoch": 0.012493135639758374, + "grad_norm": 0.46098700165748596, + "learning_rate": 1.9998831251085638e-05, + "loss": 0.6856, + "step": 455 + }, + { + "epoch": 0.012520593080724876, + "grad_norm": 0.37626954913139343, + "learning_rate": 1.9998824638787547e-05, + "loss": 0.5859, + "step": 456 + }, + { + "epoch": 0.01254805052169138, + "grad_norm": 0.7488641142845154, + "learning_rate": 1.9998818007838458e-05, + "loss": 0.6959, + "step": 457 + }, + { + "epoch": 0.01257550796265788, + "grad_norm": 0.4118775427341461, + "learning_rate": 1.9998811358238384e-05, + "loss": 0.5697, + "step": 458 + }, + { + "epoch": 0.012602965403624383, + "grad_norm": 0.4069029688835144, + "learning_rate": 1.9998804689987333e-05, + "loss": 0.607, + "step": 459 + }, + { + "epoch": 0.012630422844590884, + "grad_norm": 0.4377652406692505, + "learning_rate": 1.9998798003085325e-05, + "loss": 0.5939, + "step": 460 + }, + { + "epoch": 0.012657880285557387, + "grad_norm": 0.37648123502731323, + "learning_rate": 1.9998791297532362e-05, + "loss": 0.6229, + "step": 461 + }, + { + "epoch": 0.012685337726523888, + "grad_norm": 0.3883246183395386, + "learning_rate": 1.9998784573328466e-05, + "loss": 0.6296, + "step": 462 + }, + { + "epoch": 0.01271279516749039, + "grad_norm": 0.3723233640193939, + "learning_rate": 1.9998777830473642e-05, + "loss": 0.5839, + "step": 463 + }, + { + "epoch": 0.012740252608456891, + "grad_norm": 0.5099871754646301, + "learning_rate": 1.999877106896791e-05, + "loss": 0.5836, + "step": 464 + }, + { + "epoch": 0.012767710049423394, + "grad_norm": 0.3458348214626312, + "learning_rate": 1.9998764288811273e-05, + "loss": 0.5643, + "step": 465 + }, + { + "epoch": 0.012795167490389895, + "grad_norm": 0.4301275610923767, + "learning_rate": 1.9998757490003754e-05, + "loss": 0.6464, + "step": 466 + }, + { + "epoch": 0.012822624931356398, + "grad_norm": 0.37440094351768494, + "learning_rate": 1.999875067254536e-05, + "loss": 0.6508, + "step": 467 + }, + { + "epoch": 0.012850082372322899, + "grad_norm": 0.4051024913787842, + "learning_rate": 1.9998743836436104e-05, + "loss": 0.5983, + "step": 468 + }, + { + "epoch": 0.012877539813289401, + "grad_norm": 0.38167673349380493, + "learning_rate": 1.9998736981676e-05, + "loss": 0.5775, + "step": 469 + }, + { + "epoch": 0.012904997254255904, + "grad_norm": 0.4419257342815399, + "learning_rate": 1.9998730108265062e-05, + "loss": 0.6152, + "step": 470 + }, + { + "epoch": 0.012932454695222405, + "grad_norm": 0.37587323784828186, + "learning_rate": 1.9998723216203298e-05, + "loss": 0.5778, + "step": 471 + }, + { + "epoch": 0.012959912136188908, + "grad_norm": 0.3446073532104492, + "learning_rate": 1.9998716305490722e-05, + "loss": 0.5312, + "step": 472 + }, + { + "epoch": 0.012987369577155409, + "grad_norm": 0.5147837996482849, + "learning_rate": 1.9998709376127355e-05, + "loss": 0.6409, + "step": 473 + }, + { + "epoch": 0.013014827018121912, + "grad_norm": 0.42770513892173767, + "learning_rate": 1.99987024281132e-05, + "loss": 0.6254, + "step": 474 + }, + { + "epoch": 0.013042284459088413, + "grad_norm": 0.39681941270828247, + "learning_rate": 1.9998695461448278e-05, + "loss": 0.5535, + "step": 475 + }, + { + "epoch": 0.013069741900054915, + "grad_norm": 0.44756025075912476, + "learning_rate": 1.9998688476132594e-05, + "loss": 0.6263, + "step": 476 + }, + { + "epoch": 0.013097199341021416, + "grad_norm": 0.41858989000320435, + "learning_rate": 1.9998681472166163e-05, + "loss": 0.615, + "step": 477 + }, + { + "epoch": 0.013124656781987919, + "grad_norm": 0.35515040159225464, + "learning_rate": 1.9998674449549002e-05, + "loss": 0.5665, + "step": 478 + }, + { + "epoch": 0.01315211422295442, + "grad_norm": 0.4042092263698578, + "learning_rate": 1.9998667408281124e-05, + "loss": 0.5971, + "step": 479 + }, + { + "epoch": 0.013179571663920923, + "grad_norm": 0.3441367745399475, + "learning_rate": 1.9998660348362536e-05, + "loss": 0.5473, + "step": 480 + }, + { + "epoch": 0.013207029104887424, + "grad_norm": 0.392115980386734, + "learning_rate": 1.999865326979326e-05, + "loss": 0.6438, + "step": 481 + }, + { + "epoch": 0.013234486545853926, + "grad_norm": 0.40628859400749207, + "learning_rate": 1.99986461725733e-05, + "loss": 0.5723, + "step": 482 + }, + { + "epoch": 0.01326194398682043, + "grad_norm": 0.3773040175437927, + "learning_rate": 1.9998639056702678e-05, + "loss": 0.5263, + "step": 483 + }, + { + "epoch": 0.01328940142778693, + "grad_norm": 0.3874393403530121, + "learning_rate": 1.99986319221814e-05, + "loss": 0.5682, + "step": 484 + }, + { + "epoch": 0.013316858868753433, + "grad_norm": 0.4044968783855438, + "learning_rate": 1.9998624769009484e-05, + "loss": 0.5692, + "step": 485 + }, + { + "epoch": 0.013344316309719934, + "grad_norm": 0.3392695486545563, + "learning_rate": 1.999861759718694e-05, + "loss": 0.5705, + "step": 486 + }, + { + "epoch": 0.013371773750686437, + "grad_norm": 0.37451910972595215, + "learning_rate": 1.9998610406713782e-05, + "loss": 0.611, + "step": 487 + }, + { + "epoch": 0.013399231191652938, + "grad_norm": 0.35444578528404236, + "learning_rate": 1.9998603197590025e-05, + "loss": 0.5331, + "step": 488 + }, + { + "epoch": 0.01342668863261944, + "grad_norm": 0.46706724166870117, + "learning_rate": 1.9998595969815684e-05, + "loss": 0.6238, + "step": 489 + }, + { + "epoch": 0.013454146073585941, + "grad_norm": 0.44799354672431946, + "learning_rate": 1.9998588723390768e-05, + "loss": 0.6324, + "step": 490 + }, + { + "epoch": 0.013481603514552444, + "grad_norm": 0.3829394578933716, + "learning_rate": 1.9998581458315294e-05, + "loss": 0.5986, + "step": 491 + }, + { + "epoch": 0.013509060955518945, + "grad_norm": 0.3845359683036804, + "learning_rate": 1.9998574174589277e-05, + "loss": 0.5767, + "step": 492 + }, + { + "epoch": 0.013536518396485448, + "grad_norm": 0.49297624826431274, + "learning_rate": 1.9998566872212725e-05, + "loss": 0.6295, + "step": 493 + }, + { + "epoch": 0.013563975837451949, + "grad_norm": 0.36868372559547424, + "learning_rate": 1.9998559551185653e-05, + "loss": 0.5755, + "step": 494 + }, + { + "epoch": 0.013591433278418451, + "grad_norm": 0.3506320118904114, + "learning_rate": 1.9998552211508078e-05, + "loss": 0.5656, + "step": 495 + }, + { + "epoch": 0.013618890719384952, + "grad_norm": 0.38464581966400146, + "learning_rate": 1.9998544853180013e-05, + "loss": 0.6289, + "step": 496 + }, + { + "epoch": 0.013646348160351455, + "grad_norm": 0.3753840923309326, + "learning_rate": 1.999853747620147e-05, + "loss": 0.6043, + "step": 497 + }, + { + "epoch": 0.013673805601317958, + "grad_norm": 0.3963780999183655, + "learning_rate": 1.9998530080572465e-05, + "loss": 0.5504, + "step": 498 + }, + { + "epoch": 0.013701263042284459, + "grad_norm": 0.3831250071525574, + "learning_rate": 1.9998522666293008e-05, + "loss": 0.6155, + "step": 499 + }, + { + "epoch": 0.013728720483250962, + "grad_norm": 0.3952217996120453, + "learning_rate": 1.9998515233363113e-05, + "loss": 0.6493, + "step": 500 + }, + { + "epoch": 0.013756177924217463, + "grad_norm": 0.377018541097641, + "learning_rate": 1.9998507781782802e-05, + "loss": 0.5851, + "step": 501 + }, + { + "epoch": 0.013783635365183965, + "grad_norm": 0.36269983649253845, + "learning_rate": 1.999850031155208e-05, + "loss": 0.6443, + "step": 502 + }, + { + "epoch": 0.013811092806150466, + "grad_norm": 0.39316627383232117, + "learning_rate": 1.999849282267096e-05, + "loss": 0.5832, + "step": 503 + }, + { + "epoch": 0.013838550247116969, + "grad_norm": 0.36537376046180725, + "learning_rate": 1.9998485315139466e-05, + "loss": 0.5017, + "step": 504 + }, + { + "epoch": 0.01386600768808347, + "grad_norm": 0.39261430501937866, + "learning_rate": 1.99984777889576e-05, + "loss": 0.5864, + "step": 505 + }, + { + "epoch": 0.013893465129049973, + "grad_norm": 0.3777439594268799, + "learning_rate": 1.9998470244125385e-05, + "loss": 0.62, + "step": 506 + }, + { + "epoch": 0.013920922570016474, + "grad_norm": 0.3682347536087036, + "learning_rate": 1.9998462680642833e-05, + "loss": 0.5041, + "step": 507 + }, + { + "epoch": 0.013948380010982976, + "grad_norm": 0.40784814953804016, + "learning_rate": 1.999845509850995e-05, + "loss": 0.5333, + "step": 508 + }, + { + "epoch": 0.013975837451949477, + "grad_norm": 0.4051037132740021, + "learning_rate": 1.9998447497726765e-05, + "loss": 0.6828, + "step": 509 + }, + { + "epoch": 0.01400329489291598, + "grad_norm": 0.36823195219039917, + "learning_rate": 1.9998439878293282e-05, + "loss": 0.5581, + "step": 510 + }, + { + "epoch": 0.014030752333882483, + "grad_norm": 0.3990444242954254, + "learning_rate": 1.9998432240209517e-05, + "loss": 0.5556, + "step": 511 + }, + { + "epoch": 0.014058209774848984, + "grad_norm": 0.4431035816669464, + "learning_rate": 1.9998424583475485e-05, + "loss": 0.5684, + "step": 512 + }, + { + "epoch": 0.014085667215815487, + "grad_norm": 0.4089876711368561, + "learning_rate": 1.9998416908091197e-05, + "loss": 0.617, + "step": 513 + }, + { + "epoch": 0.014113124656781988, + "grad_norm": 0.3681512176990509, + "learning_rate": 1.9998409214056677e-05, + "loss": 0.5576, + "step": 514 + }, + { + "epoch": 0.01414058209774849, + "grad_norm": 0.5452068448066711, + "learning_rate": 1.9998401501371928e-05, + "loss": 0.5729, + "step": 515 + }, + { + "epoch": 0.014168039538714991, + "grad_norm": 0.3845811188220978, + "learning_rate": 1.999839377003697e-05, + "loss": 0.5954, + "step": 516 + }, + { + "epoch": 0.014195496979681494, + "grad_norm": 0.369366854429245, + "learning_rate": 1.9998386020051814e-05, + "loss": 0.5675, + "step": 517 + }, + { + "epoch": 0.014222954420647995, + "grad_norm": 0.5670362114906311, + "learning_rate": 1.999837825141648e-05, + "loss": 0.5844, + "step": 518 + }, + { + "epoch": 0.014250411861614498, + "grad_norm": 0.40332505106925964, + "learning_rate": 1.999837046413098e-05, + "loss": 0.5621, + "step": 519 + }, + { + "epoch": 0.014277869302580999, + "grad_norm": 0.3800332546234131, + "learning_rate": 1.9998362658195328e-05, + "loss": 0.6491, + "step": 520 + }, + { + "epoch": 0.014305326743547502, + "grad_norm": 0.4471629858016968, + "learning_rate": 1.9998354833609537e-05, + "loss": 0.5931, + "step": 521 + }, + { + "epoch": 0.014332784184514002, + "grad_norm": 0.3648340702056885, + "learning_rate": 1.9998346990373623e-05, + "loss": 0.6339, + "step": 522 + }, + { + "epoch": 0.014360241625480505, + "grad_norm": 0.44361770153045654, + "learning_rate": 1.99983391284876e-05, + "loss": 0.5688, + "step": 523 + }, + { + "epoch": 0.014387699066447008, + "grad_norm": 0.37211835384368896, + "learning_rate": 1.9998331247951486e-05, + "loss": 0.5305, + "step": 524 + }, + { + "epoch": 0.014415156507413509, + "grad_norm": 0.401266872882843, + "learning_rate": 1.9998323348765292e-05, + "loss": 0.5714, + "step": 525 + }, + { + "epoch": 0.014442613948380012, + "grad_norm": 0.3878715932369232, + "learning_rate": 1.9998315430929034e-05, + "loss": 0.569, + "step": 526 + }, + { + "epoch": 0.014470071389346513, + "grad_norm": 0.3805721402168274, + "learning_rate": 1.9998307494442727e-05, + "loss": 0.5861, + "step": 527 + }, + { + "epoch": 0.014497528830313015, + "grad_norm": 0.4498625099658966, + "learning_rate": 1.9998299539306384e-05, + "loss": 0.5927, + "step": 528 + }, + { + "epoch": 0.014524986271279516, + "grad_norm": 0.38131728768348694, + "learning_rate": 1.9998291565520023e-05, + "loss": 0.6031, + "step": 529 + }, + { + "epoch": 0.014552443712246019, + "grad_norm": 0.38801324367523193, + "learning_rate": 1.9998283573083656e-05, + "loss": 0.6366, + "step": 530 + }, + { + "epoch": 0.01457990115321252, + "grad_norm": 0.402130663394928, + "learning_rate": 1.9998275561997297e-05, + "loss": 0.6269, + "step": 531 + }, + { + "epoch": 0.014607358594179023, + "grad_norm": 0.3892827332019806, + "learning_rate": 1.9998267532260967e-05, + "loss": 0.6156, + "step": 532 + }, + { + "epoch": 0.014634816035145524, + "grad_norm": 0.4167672395706177, + "learning_rate": 1.9998259483874675e-05, + "loss": 0.5741, + "step": 533 + }, + { + "epoch": 0.014662273476112027, + "grad_norm": 0.3267308473587036, + "learning_rate": 1.999825141683844e-05, + "loss": 0.5749, + "step": 534 + }, + { + "epoch": 0.014689730917078528, + "grad_norm": 0.3839011490345001, + "learning_rate": 1.999824333115227e-05, + "loss": 0.5772, + "step": 535 + }, + { + "epoch": 0.01471718835804503, + "grad_norm": 0.3850117027759552, + "learning_rate": 1.999823522681619e-05, + "loss": 0.5756, + "step": 536 + }, + { + "epoch": 0.014744645799011533, + "grad_norm": 0.4164671301841736, + "learning_rate": 1.9998227103830208e-05, + "loss": 0.6503, + "step": 537 + }, + { + "epoch": 0.014772103239978034, + "grad_norm": 0.41122978925704956, + "learning_rate": 1.999821896219434e-05, + "loss": 0.6301, + "step": 538 + }, + { + "epoch": 0.014799560680944537, + "grad_norm": 0.46953505277633667, + "learning_rate": 1.9998210801908607e-05, + "loss": 0.5889, + "step": 539 + }, + { + "epoch": 0.014827018121911038, + "grad_norm": 0.46712812781333923, + "learning_rate": 1.9998202622973014e-05, + "loss": 0.6586, + "step": 540 + }, + { + "epoch": 0.01485447556287754, + "grad_norm": 0.46527013182640076, + "learning_rate": 1.9998194425387588e-05, + "loss": 0.5824, + "step": 541 + }, + { + "epoch": 0.014881933003844041, + "grad_norm": 0.366098016500473, + "learning_rate": 1.9998186209152336e-05, + "loss": 0.6099, + "step": 542 + }, + { + "epoch": 0.014909390444810544, + "grad_norm": 0.3561984896659851, + "learning_rate": 1.9998177974267275e-05, + "loss": 0.6368, + "step": 543 + }, + { + "epoch": 0.014936847885777045, + "grad_norm": 0.4592185914516449, + "learning_rate": 1.999816972073242e-05, + "loss": 0.5208, + "step": 544 + }, + { + "epoch": 0.014964305326743548, + "grad_norm": 0.37104126811027527, + "learning_rate": 1.999816144854779e-05, + "loss": 0.5502, + "step": 545 + }, + { + "epoch": 0.014991762767710049, + "grad_norm": 0.3912109136581421, + "learning_rate": 1.9998153157713397e-05, + "loss": 0.7315, + "step": 546 + }, + { + "epoch": 0.015019220208676552, + "grad_norm": 0.5116318464279175, + "learning_rate": 1.9998144848229257e-05, + "loss": 0.6158, + "step": 547 + }, + { + "epoch": 0.015046677649643053, + "grad_norm": 0.35095739364624023, + "learning_rate": 1.9998136520095386e-05, + "loss": 0.5825, + "step": 548 + }, + { + "epoch": 0.015074135090609555, + "grad_norm": 0.4187716543674469, + "learning_rate": 1.99981281733118e-05, + "loss": 0.5321, + "step": 549 + }, + { + "epoch": 0.015101592531576058, + "grad_norm": 0.3867867588996887, + "learning_rate": 1.9998119807878513e-05, + "loss": 0.5678, + "step": 550 + }, + { + "epoch": 0.015129049972542559, + "grad_norm": 0.39295196533203125, + "learning_rate": 1.999811142379554e-05, + "loss": 0.5342, + "step": 551 + }, + { + "epoch": 0.015156507413509062, + "grad_norm": 0.35886532068252563, + "learning_rate": 1.99981030210629e-05, + "loss": 0.6229, + "step": 552 + }, + { + "epoch": 0.015183964854475563, + "grad_norm": 0.3691362738609314, + "learning_rate": 1.9998094599680605e-05, + "loss": 0.6431, + "step": 553 + }, + { + "epoch": 0.015211422295442065, + "grad_norm": 0.3991229832172394, + "learning_rate": 1.9998086159648678e-05, + "loss": 0.6209, + "step": 554 + }, + { + "epoch": 0.015238879736408566, + "grad_norm": 0.392595499753952, + "learning_rate": 1.9998077700967124e-05, + "loss": 0.6696, + "step": 555 + }, + { + "epoch": 0.015266337177375069, + "grad_norm": 0.42975157499313354, + "learning_rate": 1.999806922363597e-05, + "loss": 0.5418, + "step": 556 + }, + { + "epoch": 0.01529379461834157, + "grad_norm": 0.39465758204460144, + "learning_rate": 1.999806072765522e-05, + "loss": 0.6012, + "step": 557 + }, + { + "epoch": 0.015321252059308073, + "grad_norm": 0.47726041078567505, + "learning_rate": 1.9998052213024893e-05, + "loss": 0.5806, + "step": 558 + }, + { + "epoch": 0.015348709500274574, + "grad_norm": 0.978887140750885, + "learning_rate": 1.9998043679745012e-05, + "loss": 0.6535, + "step": 559 + }, + { + "epoch": 0.015376166941241077, + "grad_norm": 0.3782835006713867, + "learning_rate": 1.999803512781559e-05, + "loss": 0.5831, + "step": 560 + }, + { + "epoch": 0.015403624382207578, + "grad_norm": 0.3892028033733368, + "learning_rate": 1.999802655723664e-05, + "loss": 0.5753, + "step": 561 + }, + { + "epoch": 0.01543108182317408, + "grad_norm": 0.39572474360466003, + "learning_rate": 1.999801796800818e-05, + "loss": 0.6076, + "step": 562 + }, + { + "epoch": 0.015458539264140581, + "grad_norm": 0.46114587783813477, + "learning_rate": 1.9998009360130225e-05, + "loss": 0.6362, + "step": 563 + }, + { + "epoch": 0.015485996705107084, + "grad_norm": 0.3802315592765808, + "learning_rate": 1.9998000733602788e-05, + "loss": 0.5781, + "step": 564 + }, + { + "epoch": 0.015513454146073587, + "grad_norm": 0.3613618016242981, + "learning_rate": 1.9997992088425895e-05, + "loss": 0.5668, + "step": 565 + }, + { + "epoch": 0.015540911587040088, + "grad_norm": 0.3899209201335907, + "learning_rate": 1.9997983424599552e-05, + "loss": 0.6582, + "step": 566 + }, + { + "epoch": 0.01556836902800659, + "grad_norm": 0.3839051127433777, + "learning_rate": 1.999797474212378e-05, + "loss": 0.5908, + "step": 567 + }, + { + "epoch": 0.015595826468973091, + "grad_norm": 0.5837864279747009, + "learning_rate": 1.9997966040998595e-05, + "loss": 0.5791, + "step": 568 + }, + { + "epoch": 0.015623283909939594, + "grad_norm": 0.39719292521476746, + "learning_rate": 1.9997957321224007e-05, + "loss": 0.6178, + "step": 569 + }, + { + "epoch": 0.015650741350906095, + "grad_norm": 0.3915398120880127, + "learning_rate": 1.9997948582800045e-05, + "loss": 0.5295, + "step": 570 + }, + { + "epoch": 0.015678198791872596, + "grad_norm": 0.3873436748981476, + "learning_rate": 1.9997939825726715e-05, + "loss": 0.5852, + "step": 571 + }, + { + "epoch": 0.0157056562328391, + "grad_norm": 0.3592807948589325, + "learning_rate": 1.9997931050004036e-05, + "loss": 0.4739, + "step": 572 + }, + { + "epoch": 0.0157331136738056, + "grad_norm": 0.4087948799133301, + "learning_rate": 1.9997922255632023e-05, + "loss": 0.6564, + "step": 573 + }, + { + "epoch": 0.015760571114772103, + "grad_norm": 0.4732983708381653, + "learning_rate": 1.9997913442610696e-05, + "loss": 0.6002, + "step": 574 + }, + { + "epoch": 0.015788028555738604, + "grad_norm": 0.3809700906276703, + "learning_rate": 1.9997904610940072e-05, + "loss": 0.6194, + "step": 575 + }, + { + "epoch": 0.015815485996705108, + "grad_norm": 0.45309966802597046, + "learning_rate": 1.9997895760620157e-05, + "loss": 0.6071, + "step": 576 + }, + { + "epoch": 0.01584294343767161, + "grad_norm": 0.35042840242385864, + "learning_rate": 1.9997886891650982e-05, + "loss": 0.6006, + "step": 577 + }, + { + "epoch": 0.01587040087863811, + "grad_norm": 0.33388376235961914, + "learning_rate": 1.9997878004032555e-05, + "loss": 0.6194, + "step": 578 + }, + { + "epoch": 0.015897858319604614, + "grad_norm": 0.3959198594093323, + "learning_rate": 1.9997869097764897e-05, + "loss": 0.5916, + "step": 579 + }, + { + "epoch": 0.015925315760571115, + "grad_norm": 0.4101382791996002, + "learning_rate": 1.999786017284802e-05, + "loss": 0.5702, + "step": 580 + }, + { + "epoch": 0.015952773201537616, + "grad_norm": 0.4070645868778229, + "learning_rate": 1.9997851229281942e-05, + "loss": 0.5718, + "step": 581 + }, + { + "epoch": 0.015980230642504117, + "grad_norm": 0.37170371413230896, + "learning_rate": 1.999784226706668e-05, + "loss": 0.6563, + "step": 582 + }, + { + "epoch": 0.016007688083470622, + "grad_norm": 0.43527600169181824, + "learning_rate": 1.9997833286202255e-05, + "loss": 0.603, + "step": 583 + }, + { + "epoch": 0.016035145524437123, + "grad_norm": 0.35357949137687683, + "learning_rate": 1.9997824286688678e-05, + "loss": 0.5785, + "step": 584 + }, + { + "epoch": 0.016062602965403624, + "grad_norm": 0.4094401001930237, + "learning_rate": 1.999781526852597e-05, + "loss": 0.6416, + "step": 585 + }, + { + "epoch": 0.016090060406370125, + "grad_norm": 0.4384291172027588, + "learning_rate": 1.999780623171414e-05, + "loss": 0.642, + "step": 586 + }, + { + "epoch": 0.01611751784733663, + "grad_norm": 0.34347274899482727, + "learning_rate": 1.9997797176253213e-05, + "loss": 0.516, + "step": 587 + }, + { + "epoch": 0.01614497528830313, + "grad_norm": 0.4126909673213959, + "learning_rate": 1.9997788102143206e-05, + "loss": 0.6108, + "step": 588 + }, + { + "epoch": 0.01617243272926963, + "grad_norm": 0.37418729066848755, + "learning_rate": 1.999777900938413e-05, + "loss": 0.5346, + "step": 589 + }, + { + "epoch": 0.016199890170236132, + "grad_norm": 0.36670830845832825, + "learning_rate": 1.9997769897976008e-05, + "loss": 0.5593, + "step": 590 + }, + { + "epoch": 0.016227347611202637, + "grad_norm": 0.41985636949539185, + "learning_rate": 1.999776076791885e-05, + "loss": 0.58, + "step": 591 + }, + { + "epoch": 0.016254805052169138, + "grad_norm": 0.42642778158187866, + "learning_rate": 1.999775161921268e-05, + "loss": 0.6964, + "step": 592 + }, + { + "epoch": 0.01628226249313564, + "grad_norm": 0.46405890583992004, + "learning_rate": 1.9997742451857513e-05, + "loss": 0.6636, + "step": 593 + }, + { + "epoch": 0.016309719934102143, + "grad_norm": 0.38017377257347107, + "learning_rate": 1.9997733265853366e-05, + "loss": 0.515, + "step": 594 + }, + { + "epoch": 0.016337177375068644, + "grad_norm": 0.44501325488090515, + "learning_rate": 1.9997724061200254e-05, + "loss": 0.6398, + "step": 595 + }, + { + "epoch": 0.016364634816035145, + "grad_norm": 0.36285772919654846, + "learning_rate": 1.9997714837898193e-05, + "loss": 0.5774, + "step": 596 + }, + { + "epoch": 0.016392092257001646, + "grad_norm": 0.40975141525268555, + "learning_rate": 1.9997705595947208e-05, + "loss": 0.5681, + "step": 597 + }, + { + "epoch": 0.01641954969796815, + "grad_norm": 0.43118762969970703, + "learning_rate": 1.999769633534731e-05, + "loss": 0.5944, + "step": 598 + }, + { + "epoch": 0.01644700713893465, + "grad_norm": 0.39138084650039673, + "learning_rate": 1.9997687056098515e-05, + "loss": 0.6193, + "step": 599 + }, + { + "epoch": 0.016474464579901153, + "grad_norm": 0.36784979701042175, + "learning_rate": 1.9997677758200845e-05, + "loss": 0.6735, + "step": 600 + }, + { + "epoch": 0.016501922020867654, + "grad_norm": 0.36294370889663696, + "learning_rate": 1.999766844165431e-05, + "loss": 0.6779, + "step": 601 + }, + { + "epoch": 0.016529379461834158, + "grad_norm": 0.4124161899089813, + "learning_rate": 1.999765910645894e-05, + "loss": 0.7035, + "step": 602 + }, + { + "epoch": 0.01655683690280066, + "grad_norm": 0.40208274126052856, + "learning_rate": 1.9997649752614744e-05, + "loss": 0.6185, + "step": 603 + }, + { + "epoch": 0.01658429434376716, + "grad_norm": 0.4000442326068878, + "learning_rate": 1.9997640380121735e-05, + "loss": 0.5312, + "step": 604 + }, + { + "epoch": 0.016611751784733664, + "grad_norm": 0.36491647362709045, + "learning_rate": 1.999763098897994e-05, + "loss": 0.6631, + "step": 605 + }, + { + "epoch": 0.016639209225700165, + "grad_norm": 0.36979371309280396, + "learning_rate": 1.999762157918937e-05, + "loss": 0.6259, + "step": 606 + }, + { + "epoch": 0.016666666666666666, + "grad_norm": 0.3710172176361084, + "learning_rate": 1.9997612150750044e-05, + "loss": 0.6069, + "step": 607 + }, + { + "epoch": 0.016694124107633167, + "grad_norm": 0.37757453322410583, + "learning_rate": 1.9997602703661984e-05, + "loss": 0.5597, + "step": 608 + }, + { + "epoch": 0.016721581548599672, + "grad_norm": 0.42393893003463745, + "learning_rate": 1.9997593237925203e-05, + "loss": 0.6359, + "step": 609 + }, + { + "epoch": 0.016749038989566173, + "grad_norm": 0.40653982758522034, + "learning_rate": 1.999758375353972e-05, + "loss": 0.5559, + "step": 610 + }, + { + "epoch": 0.016776496430532674, + "grad_norm": 0.4024507403373718, + "learning_rate": 1.999757425050555e-05, + "loss": 0.5642, + "step": 611 + }, + { + "epoch": 0.016803953871499175, + "grad_norm": 0.3754747211933136, + "learning_rate": 1.9997564728822718e-05, + "loss": 0.6193, + "step": 612 + }, + { + "epoch": 0.01683141131246568, + "grad_norm": 0.46400701999664307, + "learning_rate": 1.9997555188491234e-05, + "loss": 0.6474, + "step": 613 + }, + { + "epoch": 0.01685886875343218, + "grad_norm": 0.39160293340682983, + "learning_rate": 1.9997545629511118e-05, + "loss": 0.5649, + "step": 614 + }, + { + "epoch": 0.01688632619439868, + "grad_norm": 0.37168920040130615, + "learning_rate": 1.999753605188239e-05, + "loss": 0.5518, + "step": 615 + }, + { + "epoch": 0.016913783635365182, + "grad_norm": 0.38634005188941956, + "learning_rate": 1.9997526455605064e-05, + "loss": 0.6012, + "step": 616 + }, + { + "epoch": 0.016941241076331687, + "grad_norm": 0.35299113392829895, + "learning_rate": 1.999751684067916e-05, + "loss": 0.6017, + "step": 617 + }, + { + "epoch": 0.016968698517298188, + "grad_norm": 0.3434194028377533, + "learning_rate": 1.9997507207104697e-05, + "loss": 0.6039, + "step": 618 + }, + { + "epoch": 0.01699615595826469, + "grad_norm": 0.3671810030937195, + "learning_rate": 1.9997497554881697e-05, + "loss": 0.4956, + "step": 619 + }, + { + "epoch": 0.017023613399231193, + "grad_norm": 0.3855080306529999, + "learning_rate": 1.9997487884010168e-05, + "loss": 0.511, + "step": 620 + }, + { + "epoch": 0.017051070840197694, + "grad_norm": 0.3590529263019562, + "learning_rate": 1.9997478194490135e-05, + "loss": 0.5682, + "step": 621 + }, + { + "epoch": 0.017078528281164195, + "grad_norm": 0.3853760063648224, + "learning_rate": 1.9997468486321614e-05, + "loss": 0.5669, + "step": 622 + }, + { + "epoch": 0.017105985722130696, + "grad_norm": 0.38928598165512085, + "learning_rate": 1.9997458759504623e-05, + "loss": 0.6273, + "step": 623 + }, + { + "epoch": 0.0171334431630972, + "grad_norm": 0.3647083342075348, + "learning_rate": 1.999744901403918e-05, + "loss": 0.6242, + "step": 624 + }, + { + "epoch": 0.0171609006040637, + "grad_norm": 0.40483883023262024, + "learning_rate": 1.9997439249925307e-05, + "loss": 0.5475, + "step": 625 + }, + { + "epoch": 0.017188358045030203, + "grad_norm": 0.328152060508728, + "learning_rate": 1.999742946716302e-05, + "loss": 0.492, + "step": 626 + }, + { + "epoch": 0.017215815485996704, + "grad_norm": 0.3460335433483124, + "learning_rate": 1.9997419665752333e-05, + "loss": 0.5432, + "step": 627 + }, + { + "epoch": 0.017243272926963208, + "grad_norm": 0.4378203749656677, + "learning_rate": 1.9997409845693264e-05, + "loss": 0.6527, + "step": 628 + }, + { + "epoch": 0.01727073036792971, + "grad_norm": 0.7637513875961304, + "learning_rate": 1.9997400006985843e-05, + "loss": 0.527, + "step": 629 + }, + { + "epoch": 0.01729818780889621, + "grad_norm": 0.42557206749916077, + "learning_rate": 1.9997390149630073e-05, + "loss": 0.6952, + "step": 630 + }, + { + "epoch": 0.017325645249862714, + "grad_norm": 0.3923731744289398, + "learning_rate": 1.9997380273625982e-05, + "loss": 0.601, + "step": 631 + }, + { + "epoch": 0.017353102690829215, + "grad_norm": 0.436728298664093, + "learning_rate": 1.9997370378973587e-05, + "loss": 0.6006, + "step": 632 + }, + { + "epoch": 0.017380560131795716, + "grad_norm": 0.37395837903022766, + "learning_rate": 1.9997360465672904e-05, + "loss": 0.6416, + "step": 633 + }, + { + "epoch": 0.017408017572762217, + "grad_norm": 0.3573594391345978, + "learning_rate": 1.9997350533723953e-05, + "loss": 0.5262, + "step": 634 + }, + { + "epoch": 0.017435475013728722, + "grad_norm": 0.39430806040763855, + "learning_rate": 1.9997340583126753e-05, + "loss": 0.5805, + "step": 635 + }, + { + "epoch": 0.017462932454695223, + "grad_norm": 0.441062867641449, + "learning_rate": 1.9997330613881322e-05, + "loss": 0.5672, + "step": 636 + }, + { + "epoch": 0.017490389895661724, + "grad_norm": 0.4491402506828308, + "learning_rate": 1.999732062598768e-05, + "loss": 0.7201, + "step": 637 + }, + { + "epoch": 0.017517847336628225, + "grad_norm": 0.37788498401641846, + "learning_rate": 1.9997310619445843e-05, + "loss": 0.5856, + "step": 638 + }, + { + "epoch": 0.01754530477759473, + "grad_norm": 0.3511843681335449, + "learning_rate": 1.999730059425583e-05, + "loss": 0.5611, + "step": 639 + }, + { + "epoch": 0.01757276221856123, + "grad_norm": 0.3528120219707489, + "learning_rate": 1.9997290550417664e-05, + "loss": 0.5425, + "step": 640 + }, + { + "epoch": 0.01760021965952773, + "grad_norm": 0.40895235538482666, + "learning_rate": 1.9997280487931355e-05, + "loss": 0.5981, + "step": 641 + }, + { + "epoch": 0.017627677100494232, + "grad_norm": 0.5240883827209473, + "learning_rate": 1.999727040679693e-05, + "loss": 0.6397, + "step": 642 + }, + { + "epoch": 0.017655134541460737, + "grad_norm": 0.3656834661960602, + "learning_rate": 1.9997260307014406e-05, + "loss": 0.6139, + "step": 643 + }, + { + "epoch": 0.017682591982427238, + "grad_norm": 0.3596108555793762, + "learning_rate": 1.99972501885838e-05, + "loss": 0.5773, + "step": 644 + }, + { + "epoch": 0.01771004942339374, + "grad_norm": 0.3943532109260559, + "learning_rate": 1.999724005150513e-05, + "loss": 0.6087, + "step": 645 + }, + { + "epoch": 0.017737506864360243, + "grad_norm": 0.36053335666656494, + "learning_rate": 1.999722989577842e-05, + "loss": 0.5835, + "step": 646 + }, + { + "epoch": 0.017764964305326744, + "grad_norm": 0.42509549856185913, + "learning_rate": 1.9997219721403684e-05, + "loss": 0.6688, + "step": 647 + }, + { + "epoch": 0.017792421746293245, + "grad_norm": 0.3831471800804138, + "learning_rate": 1.999720952838094e-05, + "loss": 0.556, + "step": 648 + }, + { + "epoch": 0.017819879187259746, + "grad_norm": 0.7185770273208618, + "learning_rate": 1.9997199316710213e-05, + "loss": 0.6329, + "step": 649 + }, + { + "epoch": 0.01784733662822625, + "grad_norm": 0.3929024934768677, + "learning_rate": 1.9997189086391518e-05, + "loss": 0.7131, + "step": 650 + }, + { + "epoch": 0.01787479406919275, + "grad_norm": 0.4883844554424286, + "learning_rate": 1.999717883742487e-05, + "loss": 0.5116, + "step": 651 + }, + { + "epoch": 0.017902251510159253, + "grad_norm": 0.41014277935028076, + "learning_rate": 1.99971685698103e-05, + "loss": 0.6076, + "step": 652 + }, + { + "epoch": 0.017929708951125754, + "grad_norm": 0.4055403470993042, + "learning_rate": 1.9997158283547814e-05, + "loss": 0.5961, + "step": 653 + }, + { + "epoch": 0.017957166392092258, + "grad_norm": 0.384277880191803, + "learning_rate": 1.9997147978637443e-05, + "loss": 0.5638, + "step": 654 + }, + { + "epoch": 0.01798462383305876, + "grad_norm": 0.37508463859558105, + "learning_rate": 1.9997137655079194e-05, + "loss": 0.5973, + "step": 655 + }, + { + "epoch": 0.01801208127402526, + "grad_norm": 0.38920071721076965, + "learning_rate": 1.9997127312873096e-05, + "loss": 0.6345, + "step": 656 + }, + { + "epoch": 0.01803953871499176, + "grad_norm": 0.38789263367652893, + "learning_rate": 1.9997116952019163e-05, + "loss": 0.6496, + "step": 657 + }, + { + "epoch": 0.018066996155958265, + "grad_norm": 0.4327630400657654, + "learning_rate": 1.999710657251742e-05, + "loss": 0.5824, + "step": 658 + }, + { + "epoch": 0.018094453596924766, + "grad_norm": 0.39532190561294556, + "learning_rate": 1.999709617436788e-05, + "loss": 0.6401, + "step": 659 + }, + { + "epoch": 0.018121911037891267, + "grad_norm": 0.3969400227069855, + "learning_rate": 1.9997085757570565e-05, + "loss": 0.5944, + "step": 660 + }, + { + "epoch": 0.018149368478857772, + "grad_norm": 0.5550959706306458, + "learning_rate": 1.9997075322125492e-05, + "loss": 0.6153, + "step": 661 + }, + { + "epoch": 0.018176825919824273, + "grad_norm": 0.4067176282405853, + "learning_rate": 1.9997064868032687e-05, + "loss": 0.6429, + "step": 662 + }, + { + "epoch": 0.018204283360790774, + "grad_norm": 0.35939744114875793, + "learning_rate": 1.9997054395292163e-05, + "loss": 0.5708, + "step": 663 + }, + { + "epoch": 0.018231740801757275, + "grad_norm": 0.39175185561180115, + "learning_rate": 1.999704390390394e-05, + "loss": 0.5345, + "step": 664 + }, + { + "epoch": 0.01825919824272378, + "grad_norm": 0.4091104567050934, + "learning_rate": 1.9997033393868042e-05, + "loss": 0.6652, + "step": 665 + }, + { + "epoch": 0.01828665568369028, + "grad_norm": 0.4726792275905609, + "learning_rate": 1.9997022865184486e-05, + "loss": 0.533, + "step": 666 + }, + { + "epoch": 0.01831411312465678, + "grad_norm": 0.3521459996700287, + "learning_rate": 1.9997012317853292e-05, + "loss": 0.5429, + "step": 667 + }, + { + "epoch": 0.018341570565623282, + "grad_norm": 0.3617788553237915, + "learning_rate": 1.9997001751874475e-05, + "loss": 0.528, + "step": 668 + }, + { + "epoch": 0.018369028006589787, + "grad_norm": 0.3685790002346039, + "learning_rate": 1.9996991167248065e-05, + "loss": 0.6191, + "step": 669 + }, + { + "epoch": 0.018396485447556288, + "grad_norm": 0.3416842818260193, + "learning_rate": 1.9996980563974073e-05, + "loss": 0.5215, + "step": 670 + }, + { + "epoch": 0.01842394288852279, + "grad_norm": 0.3642665445804596, + "learning_rate": 1.999696994205252e-05, + "loss": 0.6163, + "step": 671 + }, + { + "epoch": 0.018451400329489293, + "grad_norm": 0.3567480444908142, + "learning_rate": 1.999695930148343e-05, + "loss": 0.5503, + "step": 672 + }, + { + "epoch": 0.018478857770455794, + "grad_norm": 0.35434848070144653, + "learning_rate": 1.999694864226682e-05, + "loss": 0.5697, + "step": 673 + }, + { + "epoch": 0.018506315211422295, + "grad_norm": 0.36857515573501587, + "learning_rate": 1.999693796440271e-05, + "loss": 0.6676, + "step": 674 + }, + { + "epoch": 0.018533772652388796, + "grad_norm": 0.4114159643650055, + "learning_rate": 1.9996927267891123e-05, + "loss": 0.6253, + "step": 675 + }, + { + "epoch": 0.0185612300933553, + "grad_norm": 0.42688414454460144, + "learning_rate": 1.9996916552732073e-05, + "loss": 0.6039, + "step": 676 + }, + { + "epoch": 0.0185886875343218, + "grad_norm": 0.4053291976451874, + "learning_rate": 1.999690581892558e-05, + "loss": 0.6493, + "step": 677 + }, + { + "epoch": 0.018616144975288303, + "grad_norm": 0.37900951504707336, + "learning_rate": 1.999689506647167e-05, + "loss": 0.6184, + "step": 678 + }, + { + "epoch": 0.018643602416254804, + "grad_norm": 0.38820114731788635, + "learning_rate": 1.999688429537036e-05, + "loss": 0.5318, + "step": 679 + }, + { + "epoch": 0.018671059857221308, + "grad_norm": 0.41871899366378784, + "learning_rate": 1.9996873505621672e-05, + "loss": 0.6151, + "step": 680 + }, + { + "epoch": 0.01869851729818781, + "grad_norm": 0.42370879650115967, + "learning_rate": 1.9996862697225624e-05, + "loss": 0.6047, + "step": 681 + }, + { + "epoch": 0.01872597473915431, + "grad_norm": 0.4112662076950073, + "learning_rate": 1.9996851870182233e-05, + "loss": 0.6336, + "step": 682 + }, + { + "epoch": 0.01875343218012081, + "grad_norm": 0.3670344054698944, + "learning_rate": 1.999684102449152e-05, + "loss": 0.622, + "step": 683 + }, + { + "epoch": 0.018780889621087316, + "grad_norm": 0.4268692433834076, + "learning_rate": 1.9996830160153517e-05, + "loss": 0.5497, + "step": 684 + }, + { + "epoch": 0.018808347062053817, + "grad_norm": 0.3788997530937195, + "learning_rate": 1.999681927716823e-05, + "loss": 0.5358, + "step": 685 + }, + { + "epoch": 0.018835804503020318, + "grad_norm": 0.37218910455703735, + "learning_rate": 1.9996808375535686e-05, + "loss": 0.4709, + "step": 686 + }, + { + "epoch": 0.018863261943986822, + "grad_norm": 0.42026597261428833, + "learning_rate": 1.99967974552559e-05, + "loss": 0.7038, + "step": 687 + }, + { + "epoch": 0.018890719384953323, + "grad_norm": 0.40262511372566223, + "learning_rate": 1.99967865163289e-05, + "loss": 0.5092, + "step": 688 + }, + { + "epoch": 0.018918176825919824, + "grad_norm": 0.3546545207500458, + "learning_rate": 1.99967755587547e-05, + "loss": 0.6381, + "step": 689 + }, + { + "epoch": 0.018945634266886325, + "grad_norm": 0.3929683268070221, + "learning_rate": 1.9996764582533326e-05, + "loss": 0.567, + "step": 690 + }, + { + "epoch": 0.01897309170785283, + "grad_norm": 0.4001508355140686, + "learning_rate": 1.9996753587664794e-05, + "loss": 0.5886, + "step": 691 + }, + { + "epoch": 0.01900054914881933, + "grad_norm": 0.35207152366638184, + "learning_rate": 1.9996742574149123e-05, + "loss": 0.5739, + "step": 692 + }, + { + "epoch": 0.01902800658978583, + "grad_norm": 0.36567723751068115, + "learning_rate": 1.999673154198634e-05, + "loss": 0.6034, + "step": 693 + }, + { + "epoch": 0.019055464030752332, + "grad_norm": 0.40501561760902405, + "learning_rate": 1.9996720491176462e-05, + "loss": 0.5993, + "step": 694 + }, + { + "epoch": 0.019082921471718837, + "grad_norm": 0.3868468403816223, + "learning_rate": 1.999670942171951e-05, + "loss": 0.579, + "step": 695 + }, + { + "epoch": 0.019110378912685338, + "grad_norm": 0.3892608880996704, + "learning_rate": 1.9996698333615503e-05, + "loss": 0.6181, + "step": 696 + }, + { + "epoch": 0.01913783635365184, + "grad_norm": 0.4524693787097931, + "learning_rate": 1.9996687226864462e-05, + "loss": 0.532, + "step": 697 + }, + { + "epoch": 0.019165293794618343, + "grad_norm": 0.37429699301719666, + "learning_rate": 1.999667610146641e-05, + "loss": 0.6278, + "step": 698 + }, + { + "epoch": 0.019192751235584844, + "grad_norm": 0.3698277175426483, + "learning_rate": 1.9996664957421367e-05, + "loss": 0.5945, + "step": 699 + }, + { + "epoch": 0.019220208676551345, + "grad_norm": 0.441307932138443, + "learning_rate": 1.9996653794729353e-05, + "loss": 0.5737, + "step": 700 + }, + { + "epoch": 0.019247666117517846, + "grad_norm": 0.3855707049369812, + "learning_rate": 1.999664261339039e-05, + "loss": 0.5946, + "step": 701 + }, + { + "epoch": 0.01927512355848435, + "grad_norm": 0.38945701718330383, + "learning_rate": 1.99966314134045e-05, + "loss": 0.5966, + "step": 702 + }, + { + "epoch": 0.01930258099945085, + "grad_norm": 0.379617840051651, + "learning_rate": 1.9996620194771698e-05, + "loss": 0.5929, + "step": 703 + }, + { + "epoch": 0.019330038440417353, + "grad_norm": 0.3983360528945923, + "learning_rate": 1.9996608957492006e-05, + "loss": 0.577, + "step": 704 + }, + { + "epoch": 0.019357495881383854, + "grad_norm": 0.34047651290893555, + "learning_rate": 1.9996597701565453e-05, + "loss": 0.5414, + "step": 705 + }, + { + "epoch": 0.019384953322350358, + "grad_norm": 0.4075978994369507, + "learning_rate": 1.9996586426992052e-05, + "loss": 0.5607, + "step": 706 + }, + { + "epoch": 0.01941241076331686, + "grad_norm": 0.3390105068683624, + "learning_rate": 1.9996575133771828e-05, + "loss": 0.5567, + "step": 707 + }, + { + "epoch": 0.01943986820428336, + "grad_norm": 0.4025273025035858, + "learning_rate": 1.9996563821904803e-05, + "loss": 0.6369, + "step": 708 + }, + { + "epoch": 0.01946732564524986, + "grad_norm": 0.42383697628974915, + "learning_rate": 1.9996552491390992e-05, + "loss": 0.6831, + "step": 709 + }, + { + "epoch": 0.019494783086216366, + "grad_norm": 0.4264463484287262, + "learning_rate": 1.9996541142230425e-05, + "loss": 0.6113, + "step": 710 + }, + { + "epoch": 0.019522240527182867, + "grad_norm": 0.41362059116363525, + "learning_rate": 1.999652977442311e-05, + "loss": 0.6939, + "step": 711 + }, + { + "epoch": 0.019549697968149368, + "grad_norm": 0.3867991864681244, + "learning_rate": 1.9996518387969083e-05, + "loss": 0.5496, + "step": 712 + }, + { + "epoch": 0.019577155409115872, + "grad_norm": 0.3825870454311371, + "learning_rate": 1.999650698286836e-05, + "loss": 0.6467, + "step": 713 + }, + { + "epoch": 0.019604612850082373, + "grad_norm": 0.4010181725025177, + "learning_rate": 1.9996495559120957e-05, + "loss": 0.6138, + "step": 714 + }, + { + "epoch": 0.019632070291048874, + "grad_norm": 0.3686906695365906, + "learning_rate": 1.99964841167269e-05, + "loss": 0.5615, + "step": 715 + }, + { + "epoch": 0.019659527732015375, + "grad_norm": 0.4058796167373657, + "learning_rate": 1.999647265568621e-05, + "loss": 0.6292, + "step": 716 + }, + { + "epoch": 0.01968698517298188, + "grad_norm": 0.37050995230674744, + "learning_rate": 1.9996461175998905e-05, + "loss": 0.6338, + "step": 717 + }, + { + "epoch": 0.01971444261394838, + "grad_norm": 0.34270793199539185, + "learning_rate": 1.9996449677665014e-05, + "loss": 0.4983, + "step": 718 + }, + { + "epoch": 0.01974190005491488, + "grad_norm": 0.35190996527671814, + "learning_rate": 1.9996438160684553e-05, + "loss": 0.502, + "step": 719 + }, + { + "epoch": 0.019769357495881382, + "grad_norm": 0.39960989356040955, + "learning_rate": 1.9996426625057547e-05, + "loss": 0.5888, + "step": 720 + }, + { + "epoch": 0.019796814936847887, + "grad_norm": 0.39513394236564636, + "learning_rate": 1.9996415070784007e-05, + "loss": 0.6204, + "step": 721 + }, + { + "epoch": 0.019824272377814388, + "grad_norm": 0.35115572810173035, + "learning_rate": 1.9996403497863966e-05, + "loss": 0.599, + "step": 722 + }, + { + "epoch": 0.01985172981878089, + "grad_norm": 0.3970882296562195, + "learning_rate": 1.9996391906297446e-05, + "loss": 0.5573, + "step": 723 + }, + { + "epoch": 0.01987918725974739, + "grad_norm": 0.44875118136405945, + "learning_rate": 1.9996380296084462e-05, + "loss": 0.764, + "step": 724 + }, + { + "epoch": 0.019906644700713894, + "grad_norm": 0.3716571629047394, + "learning_rate": 1.9996368667225037e-05, + "loss": 0.5655, + "step": 725 + }, + { + "epoch": 0.019934102141680395, + "grad_norm": 0.4899410307407379, + "learning_rate": 1.9996357019719194e-05, + "loss": 0.5899, + "step": 726 + }, + { + "epoch": 0.019961559582646896, + "grad_norm": 0.47499653697013855, + "learning_rate": 1.9996345353566955e-05, + "loss": 0.5837, + "step": 727 + }, + { + "epoch": 0.0199890170236134, + "grad_norm": 0.4007241427898407, + "learning_rate": 1.9996333668768342e-05, + "loss": 0.5501, + "step": 728 + }, + { + "epoch": 0.0200164744645799, + "grad_norm": 0.42610257863998413, + "learning_rate": 1.9996321965323376e-05, + "loss": 0.5657, + "step": 729 + }, + { + "epoch": 0.020043931905546403, + "grad_norm": 0.35504621267318726, + "learning_rate": 1.999631024323208e-05, + "loss": 0.6636, + "step": 730 + }, + { + "epoch": 0.020071389346512904, + "grad_norm": 0.5344265699386597, + "learning_rate": 1.9996298502494474e-05, + "loss": 0.5473, + "step": 731 + }, + { + "epoch": 0.020098846787479408, + "grad_norm": 0.369143545627594, + "learning_rate": 1.9996286743110582e-05, + "loss": 0.588, + "step": 732 + }, + { + "epoch": 0.02012630422844591, + "grad_norm": 0.41597768664360046, + "learning_rate": 1.999627496508042e-05, + "loss": 0.5127, + "step": 733 + }, + { + "epoch": 0.02015376166941241, + "grad_norm": 0.37029096484184265, + "learning_rate": 1.9996263168404023e-05, + "loss": 0.5197, + "step": 734 + }, + { + "epoch": 0.02018121911037891, + "grad_norm": 0.3724589943885803, + "learning_rate": 1.99962513530814e-05, + "loss": 0.5545, + "step": 735 + }, + { + "epoch": 0.020208676551345416, + "grad_norm": 0.40416592359542847, + "learning_rate": 1.9996239519112578e-05, + "loss": 0.6692, + "step": 736 + }, + { + "epoch": 0.020236133992311917, + "grad_norm": 0.378384530544281, + "learning_rate": 1.9996227666497575e-05, + "loss": 0.5671, + "step": 737 + }, + { + "epoch": 0.020263591433278418, + "grad_norm": 0.39161446690559387, + "learning_rate": 1.9996215795236423e-05, + "loss": 0.5885, + "step": 738 + }, + { + "epoch": 0.020291048874244922, + "grad_norm": 0.36782926321029663, + "learning_rate": 1.9996203905329138e-05, + "loss": 0.5308, + "step": 739 + }, + { + "epoch": 0.020318506315211423, + "grad_norm": 0.4427297115325928, + "learning_rate": 1.9996191996775737e-05, + "loss": 0.5512, + "step": 740 + }, + { + "epoch": 0.020345963756177924, + "grad_norm": 0.39539363980293274, + "learning_rate": 1.999618006957625e-05, + "loss": 0.5917, + "step": 741 + }, + { + "epoch": 0.020373421197144425, + "grad_norm": 0.39683249592781067, + "learning_rate": 1.99961681237307e-05, + "loss": 0.6572, + "step": 742 + }, + { + "epoch": 0.02040087863811093, + "grad_norm": 0.3967967629432678, + "learning_rate": 1.9996156159239104e-05, + "loss": 0.6313, + "step": 743 + }, + { + "epoch": 0.02042833607907743, + "grad_norm": 0.3610052168369293, + "learning_rate": 1.9996144176101487e-05, + "loss": 0.6616, + "step": 744 + }, + { + "epoch": 0.02045579352004393, + "grad_norm": 0.3304113447666168, + "learning_rate": 1.9996132174317867e-05, + "loss": 0.5033, + "step": 745 + }, + { + "epoch": 0.020483250961010432, + "grad_norm": 0.368051141500473, + "learning_rate": 1.9996120153888273e-05, + "loss": 0.5672, + "step": 746 + }, + { + "epoch": 0.020510708401976937, + "grad_norm": 0.3525749146938324, + "learning_rate": 1.9996108114812726e-05, + "loss": 0.5228, + "step": 747 + }, + { + "epoch": 0.020538165842943438, + "grad_norm": 0.35352861881256104, + "learning_rate": 1.9996096057091246e-05, + "loss": 0.5042, + "step": 748 + }, + { + "epoch": 0.02056562328390994, + "grad_norm": 0.34667524695396423, + "learning_rate": 1.9996083980723854e-05, + "loss": 0.5905, + "step": 749 + }, + { + "epoch": 0.02059308072487644, + "grad_norm": 0.3735881745815277, + "learning_rate": 1.9996071885710576e-05, + "loss": 0.5851, + "step": 750 + }, + { + "epoch": 0.020620538165842944, + "grad_norm": 0.37982043623924255, + "learning_rate": 1.9996059772051434e-05, + "loss": 0.5889, + "step": 751 + }, + { + "epoch": 0.020647995606809445, + "grad_norm": 0.4002512991428375, + "learning_rate": 1.9996047639746453e-05, + "loss": 0.644, + "step": 752 + }, + { + "epoch": 0.020675453047775946, + "grad_norm": 0.3585599660873413, + "learning_rate": 1.999603548879565e-05, + "loss": 0.6271, + "step": 753 + }, + { + "epoch": 0.02070291048874245, + "grad_norm": 0.36989226937294006, + "learning_rate": 1.999602331919905e-05, + "loss": 0.6115, + "step": 754 + }, + { + "epoch": 0.02073036792970895, + "grad_norm": 0.3567410707473755, + "learning_rate": 1.9996011130956677e-05, + "loss": 0.5691, + "step": 755 + }, + { + "epoch": 0.020757825370675453, + "grad_norm": 0.43828344345092773, + "learning_rate": 1.999599892406855e-05, + "loss": 0.5916, + "step": 756 + }, + { + "epoch": 0.020785282811641954, + "grad_norm": 0.4001257121562958, + "learning_rate": 1.99959866985347e-05, + "loss": 0.5844, + "step": 757 + }, + { + "epoch": 0.020812740252608458, + "grad_norm": 0.37951651215553284, + "learning_rate": 1.9995974454355143e-05, + "loss": 0.6155, + "step": 758 + }, + { + "epoch": 0.02084019769357496, + "grad_norm": 0.45075759291648865, + "learning_rate": 1.99959621915299e-05, + "loss": 0.5821, + "step": 759 + }, + { + "epoch": 0.02086765513454146, + "grad_norm": 0.35843023657798767, + "learning_rate": 1.9995949910059002e-05, + "loss": 0.6045, + "step": 760 + }, + { + "epoch": 0.02089511257550796, + "grad_norm": 0.352909117937088, + "learning_rate": 1.9995937609942463e-05, + "loss": 0.5682, + "step": 761 + }, + { + "epoch": 0.020922570016474466, + "grad_norm": 0.39389362931251526, + "learning_rate": 1.9995925291180313e-05, + "loss": 0.6055, + "step": 762 + }, + { + "epoch": 0.020950027457440967, + "grad_norm": 0.39120009541511536, + "learning_rate": 1.999591295377257e-05, + "loss": 0.5527, + "step": 763 + }, + { + "epoch": 0.020977484898407468, + "grad_norm": 0.387358695268631, + "learning_rate": 1.9995900597719258e-05, + "loss": 0.6594, + "step": 764 + }, + { + "epoch": 0.021004942339373972, + "grad_norm": 0.36977407336235046, + "learning_rate": 1.9995888223020404e-05, + "loss": 0.5769, + "step": 765 + }, + { + "epoch": 0.021032399780340473, + "grad_norm": 0.36938515305519104, + "learning_rate": 1.9995875829676027e-05, + "loss": 0.5652, + "step": 766 + }, + { + "epoch": 0.021059857221306974, + "grad_norm": 0.4045946002006531, + "learning_rate": 1.999586341768615e-05, + "loss": 0.5797, + "step": 767 + }, + { + "epoch": 0.021087314662273475, + "grad_norm": 0.3764019012451172, + "learning_rate": 1.9995850987050796e-05, + "loss": 0.5147, + "step": 768 + }, + { + "epoch": 0.02111477210323998, + "grad_norm": 0.35261985659599304, + "learning_rate": 1.9995838537769993e-05, + "loss": 0.6225, + "step": 769 + }, + { + "epoch": 0.02114222954420648, + "grad_norm": 0.3653716444969177, + "learning_rate": 1.999582606984376e-05, + "loss": 0.6397, + "step": 770 + }, + { + "epoch": 0.02116968698517298, + "grad_norm": 0.36696428060531616, + "learning_rate": 1.999581358327212e-05, + "loss": 0.5254, + "step": 771 + }, + { + "epoch": 0.021197144426139482, + "grad_norm": 0.4574185013771057, + "learning_rate": 1.9995801078055098e-05, + "loss": 0.6072, + "step": 772 + }, + { + "epoch": 0.021224601867105987, + "grad_norm": 0.3803882598876953, + "learning_rate": 1.999578855419272e-05, + "loss": 0.6184, + "step": 773 + }, + { + "epoch": 0.021252059308072488, + "grad_norm": 0.38566452264785767, + "learning_rate": 1.9995776011685003e-05, + "loss": 0.6319, + "step": 774 + }, + { + "epoch": 0.02127951674903899, + "grad_norm": 0.39810946583747864, + "learning_rate": 1.999576345053197e-05, + "loss": 0.4869, + "step": 775 + }, + { + "epoch": 0.02130697419000549, + "grad_norm": 0.35804641246795654, + "learning_rate": 1.9995750870733653e-05, + "loss": 0.5991, + "step": 776 + }, + { + "epoch": 0.021334431630971994, + "grad_norm": 0.34871742129325867, + "learning_rate": 1.999573827229007e-05, + "loss": 0.546, + "step": 777 + }, + { + "epoch": 0.021361889071938495, + "grad_norm": 0.42481130361557007, + "learning_rate": 1.9995725655201243e-05, + "loss": 0.5686, + "step": 778 + }, + { + "epoch": 0.021389346512904996, + "grad_norm": 0.4556298851966858, + "learning_rate": 1.9995713019467197e-05, + "loss": 0.624, + "step": 779 + }, + { + "epoch": 0.0214168039538715, + "grad_norm": 0.3918381631374359, + "learning_rate": 1.9995700365087957e-05, + "loss": 0.5028, + "step": 780 + }, + { + "epoch": 0.021444261394838002, + "grad_norm": 0.37947458028793335, + "learning_rate": 1.9995687692063547e-05, + "loss": 0.5161, + "step": 781 + }, + { + "epoch": 0.021471718835804503, + "grad_norm": 0.4046136736869812, + "learning_rate": 1.999567500039399e-05, + "loss": 0.5377, + "step": 782 + }, + { + "epoch": 0.021499176276771004, + "grad_norm": 0.3638302683830261, + "learning_rate": 1.9995662290079303e-05, + "loss": 0.591, + "step": 783 + }, + { + "epoch": 0.021526633717737508, + "grad_norm": 0.3677133321762085, + "learning_rate": 1.999564956111952e-05, + "loss": 0.5021, + "step": 784 + }, + { + "epoch": 0.02155409115870401, + "grad_norm": 0.44340354204177856, + "learning_rate": 1.9995636813514658e-05, + "loss": 0.6616, + "step": 785 + }, + { + "epoch": 0.02158154859967051, + "grad_norm": 0.3773060739040375, + "learning_rate": 1.9995624047264745e-05, + "loss": 0.5845, + "step": 786 + }, + { + "epoch": 0.02160900604063701, + "grad_norm": 0.3579763174057007, + "learning_rate": 1.9995611262369803e-05, + "loss": 0.5782, + "step": 787 + }, + { + "epoch": 0.021636463481603516, + "grad_norm": 0.4122910797595978, + "learning_rate": 1.9995598458829853e-05, + "loss": 0.567, + "step": 788 + }, + { + "epoch": 0.021663920922570017, + "grad_norm": 0.32842203974723816, + "learning_rate": 1.9995585636644926e-05, + "loss": 0.5306, + "step": 789 + }, + { + "epoch": 0.021691378363536518, + "grad_norm": 0.34808245301246643, + "learning_rate": 1.999557279581504e-05, + "loss": 0.5881, + "step": 790 + }, + { + "epoch": 0.02171883580450302, + "grad_norm": 0.3741764426231384, + "learning_rate": 1.999555993634022e-05, + "loss": 0.5252, + "step": 791 + }, + { + "epoch": 0.021746293245469523, + "grad_norm": 0.36945030093193054, + "learning_rate": 1.9995547058220488e-05, + "loss": 0.5682, + "step": 792 + }, + { + "epoch": 0.021773750686436024, + "grad_norm": 0.3753634989261627, + "learning_rate": 1.9995534161455876e-05, + "loss": 0.6068, + "step": 793 + }, + { + "epoch": 0.021801208127402525, + "grad_norm": 0.41255176067352295, + "learning_rate": 1.9995521246046398e-05, + "loss": 0.5363, + "step": 794 + }, + { + "epoch": 0.02182866556836903, + "grad_norm": 0.3646013140678406, + "learning_rate": 1.9995508311992084e-05, + "loss": 0.574, + "step": 795 + }, + { + "epoch": 0.02185612300933553, + "grad_norm": 0.4076300263404846, + "learning_rate": 1.9995495359292957e-05, + "loss": 0.5974, + "step": 796 + }, + { + "epoch": 0.02188358045030203, + "grad_norm": 0.3962678611278534, + "learning_rate": 1.999548238794904e-05, + "loss": 0.6868, + "step": 797 + }, + { + "epoch": 0.021911037891268532, + "grad_norm": 0.39624378085136414, + "learning_rate": 1.9995469397960362e-05, + "loss": 0.604, + "step": 798 + }, + { + "epoch": 0.021938495332235037, + "grad_norm": 0.38660988211631775, + "learning_rate": 1.999545638932694e-05, + "loss": 0.6033, + "step": 799 + }, + { + "epoch": 0.021965952773201538, + "grad_norm": 0.42505213618278503, + "learning_rate": 1.9995443362048803e-05, + "loss": 0.6325, + "step": 800 + }, + { + "epoch": 0.02199341021416804, + "grad_norm": 0.3458612859249115, + "learning_rate": 1.999543031612597e-05, + "loss": 0.5341, + "step": 801 + }, + { + "epoch": 0.02202086765513454, + "grad_norm": 0.4429183602333069, + "learning_rate": 1.9995417251558473e-05, + "loss": 0.6424, + "step": 802 + }, + { + "epoch": 0.022048325096101044, + "grad_norm": 0.4475831091403961, + "learning_rate": 1.9995404168346334e-05, + "loss": 0.6302, + "step": 803 + }, + { + "epoch": 0.022075782537067545, + "grad_norm": 0.3606654107570648, + "learning_rate": 1.9995391066489573e-05, + "loss": 0.5564, + "step": 804 + }, + { + "epoch": 0.022103239978034046, + "grad_norm": 0.34195756912231445, + "learning_rate": 1.999537794598822e-05, + "loss": 0.6014, + "step": 805 + }, + { + "epoch": 0.02213069741900055, + "grad_norm": 0.39407142996788025, + "learning_rate": 1.9995364806842294e-05, + "loss": 0.5935, + "step": 806 + }, + { + "epoch": 0.022158154859967052, + "grad_norm": 0.3707405924797058, + "learning_rate": 1.9995351649051824e-05, + "loss": 0.6438, + "step": 807 + }, + { + "epoch": 0.022185612300933553, + "grad_norm": 0.4073667824268341, + "learning_rate": 1.9995338472616834e-05, + "loss": 0.5443, + "step": 808 + }, + { + "epoch": 0.022213069741900054, + "grad_norm": 0.40615957975387573, + "learning_rate": 1.9995325277537348e-05, + "loss": 0.5466, + "step": 809 + }, + { + "epoch": 0.022240527182866558, + "grad_norm": 0.4002559781074524, + "learning_rate": 1.9995312063813386e-05, + "loss": 0.5455, + "step": 810 + }, + { + "epoch": 0.02226798462383306, + "grad_norm": 0.3798239827156067, + "learning_rate": 1.9995298831444982e-05, + "loss": 0.5668, + "step": 811 + }, + { + "epoch": 0.02229544206479956, + "grad_norm": 0.3723065257072449, + "learning_rate": 1.9995285580432153e-05, + "loss": 0.5868, + "step": 812 + }, + { + "epoch": 0.02232289950576606, + "grad_norm": 0.3871300220489502, + "learning_rate": 1.9995272310774926e-05, + "loss": 0.6292, + "step": 813 + }, + { + "epoch": 0.022350356946732566, + "grad_norm": 0.38495635986328125, + "learning_rate": 1.999525902247333e-05, + "loss": 0.6666, + "step": 814 + }, + { + "epoch": 0.022377814387699067, + "grad_norm": 0.3858591914176941, + "learning_rate": 1.999524571552738e-05, + "loss": 0.5645, + "step": 815 + }, + { + "epoch": 0.022405271828665568, + "grad_norm": 0.3730102479457855, + "learning_rate": 1.9995232389937108e-05, + "loss": 0.5775, + "step": 816 + }, + { + "epoch": 0.02243272926963207, + "grad_norm": 0.3483995795249939, + "learning_rate": 1.999521904570254e-05, + "loss": 0.5571, + "step": 817 + }, + { + "epoch": 0.022460186710598573, + "grad_norm": 0.34828364849090576, + "learning_rate": 1.9995205682823696e-05, + "loss": 0.6157, + "step": 818 + }, + { + "epoch": 0.022487644151565074, + "grad_norm": 0.4147208333015442, + "learning_rate": 1.9995192301300606e-05, + "loss": 0.5925, + "step": 819 + }, + { + "epoch": 0.022515101592531575, + "grad_norm": 0.36973336338996887, + "learning_rate": 1.9995178901133288e-05, + "loss": 0.585, + "step": 820 + }, + { + "epoch": 0.02254255903349808, + "grad_norm": 0.35538971424102783, + "learning_rate": 1.9995165482321775e-05, + "loss": 0.4958, + "step": 821 + }, + { + "epoch": 0.02257001647446458, + "grad_norm": 0.3477325439453125, + "learning_rate": 1.999515204486609e-05, + "loss": 0.548, + "step": 822 + }, + { + "epoch": 0.02259747391543108, + "grad_norm": 0.38357576727867126, + "learning_rate": 1.999513858876625e-05, + "loss": 0.5381, + "step": 823 + }, + { + "epoch": 0.022624931356397582, + "grad_norm": 0.37185072898864746, + "learning_rate": 1.9995125114022293e-05, + "loss": 0.625, + "step": 824 + }, + { + "epoch": 0.022652388797364087, + "grad_norm": 0.38245031237602234, + "learning_rate": 1.999511162063423e-05, + "loss": 0.6348, + "step": 825 + }, + { + "epoch": 0.022679846238330588, + "grad_norm": 0.41776806116104126, + "learning_rate": 1.99950981086021e-05, + "loss": 0.5791, + "step": 826 + }, + { + "epoch": 0.02270730367929709, + "grad_norm": 0.3829968571662903, + "learning_rate": 1.9995084577925922e-05, + "loss": 0.5873, + "step": 827 + }, + { + "epoch": 0.02273476112026359, + "grad_norm": 0.3826722502708435, + "learning_rate": 1.999507102860572e-05, + "loss": 0.6369, + "step": 828 + }, + { + "epoch": 0.022762218561230094, + "grad_norm": 0.38322916626930237, + "learning_rate": 1.999505746064152e-05, + "loss": 0.6467, + "step": 829 + }, + { + "epoch": 0.022789676002196595, + "grad_norm": 0.3978785276412964, + "learning_rate": 1.9995043874033347e-05, + "loss": 0.6491, + "step": 830 + }, + { + "epoch": 0.022817133443163096, + "grad_norm": 0.48552563786506653, + "learning_rate": 1.999503026878123e-05, + "loss": 0.562, + "step": 831 + }, + { + "epoch": 0.0228445908841296, + "grad_norm": 0.3718736469745636, + "learning_rate": 1.9995016644885185e-05, + "loss": 0.5921, + "step": 832 + }, + { + "epoch": 0.022872048325096102, + "grad_norm": 0.36356237530708313, + "learning_rate": 1.999500300234525e-05, + "loss": 0.5916, + "step": 833 + }, + { + "epoch": 0.022899505766062603, + "grad_norm": 0.40481147170066833, + "learning_rate": 1.9994989341161443e-05, + "loss": 0.5249, + "step": 834 + }, + { + "epoch": 0.022926963207029104, + "grad_norm": 0.40765437483787537, + "learning_rate": 1.999497566133379e-05, + "loss": 0.6033, + "step": 835 + }, + { + "epoch": 0.022954420647995608, + "grad_norm": 0.3418324887752533, + "learning_rate": 1.999496196286232e-05, + "loss": 0.5186, + "step": 836 + }, + { + "epoch": 0.02298187808896211, + "grad_norm": 0.3712979853153229, + "learning_rate": 1.999494824574705e-05, + "loss": 0.4847, + "step": 837 + }, + { + "epoch": 0.02300933552992861, + "grad_norm": 0.3652055561542511, + "learning_rate": 1.9994934509988018e-05, + "loss": 0.511, + "step": 838 + }, + { + "epoch": 0.02303679297089511, + "grad_norm": 0.3624974489212036, + "learning_rate": 1.999492075558524e-05, + "loss": 0.5184, + "step": 839 + }, + { + "epoch": 0.023064250411861616, + "grad_norm": 0.3694970905780792, + "learning_rate": 1.9994906982538744e-05, + "loss": 0.6439, + "step": 840 + }, + { + "epoch": 0.023091707852828117, + "grad_norm": 0.412337064743042, + "learning_rate": 1.9994893190848556e-05, + "loss": 0.5892, + "step": 841 + }, + { + "epoch": 0.023119165293794618, + "grad_norm": 0.38939744234085083, + "learning_rate": 1.9994879380514708e-05, + "loss": 0.5727, + "step": 842 + }, + { + "epoch": 0.02314662273476112, + "grad_norm": 0.3723776340484619, + "learning_rate": 1.9994865551537214e-05, + "loss": 0.5406, + "step": 843 + }, + { + "epoch": 0.023174080175727623, + "grad_norm": 0.4175414741039276, + "learning_rate": 1.9994851703916106e-05, + "loss": 0.5651, + "step": 844 + }, + { + "epoch": 0.023201537616694124, + "grad_norm": 0.3855356276035309, + "learning_rate": 1.9994837837651412e-05, + "loss": 0.6533, + "step": 845 + }, + { + "epoch": 0.023228995057660625, + "grad_norm": 0.4428286850452423, + "learning_rate": 1.9994823952743157e-05, + "loss": 0.487, + "step": 846 + }, + { + "epoch": 0.02325645249862713, + "grad_norm": 0.39513304829597473, + "learning_rate": 1.999481004919136e-05, + "loss": 0.607, + "step": 847 + }, + { + "epoch": 0.02328390993959363, + "grad_norm": 0.5215888619422913, + "learning_rate": 1.9994796126996056e-05, + "loss": 0.5645, + "step": 848 + }, + { + "epoch": 0.02331136738056013, + "grad_norm": 0.40944522619247437, + "learning_rate": 1.9994782186157267e-05, + "loss": 0.531, + "step": 849 + }, + { + "epoch": 0.023338824821526633, + "grad_norm": 0.35032230615615845, + "learning_rate": 1.999476822667502e-05, + "loss": 0.5174, + "step": 850 + }, + { + "epoch": 0.023366282262493137, + "grad_norm": 0.4147513508796692, + "learning_rate": 1.9994754248549338e-05, + "loss": 0.6151, + "step": 851 + }, + { + "epoch": 0.023393739703459638, + "grad_norm": 0.389904648065567, + "learning_rate": 1.999474025178025e-05, + "loss": 0.5613, + "step": 852 + }, + { + "epoch": 0.02342119714442614, + "grad_norm": 0.3529571294784546, + "learning_rate": 1.9994726236367784e-05, + "loss": 0.5358, + "step": 853 + }, + { + "epoch": 0.02344865458539264, + "grad_norm": 0.36591753363609314, + "learning_rate": 1.999471220231196e-05, + "loss": 0.4879, + "step": 854 + }, + { + "epoch": 0.023476112026359144, + "grad_norm": 0.4022122025489807, + "learning_rate": 1.9994698149612806e-05, + "loss": 0.6666, + "step": 855 + }, + { + "epoch": 0.023503569467325645, + "grad_norm": 0.4660525321960449, + "learning_rate": 1.9994684078270357e-05, + "loss": 0.5235, + "step": 856 + }, + { + "epoch": 0.023531026908292146, + "grad_norm": 0.4081370234489441, + "learning_rate": 1.9994669988284628e-05, + "loss": 0.5513, + "step": 857 + }, + { + "epoch": 0.023558484349258647, + "grad_norm": 0.35713183879852295, + "learning_rate": 1.9994655879655653e-05, + "loss": 0.5263, + "step": 858 + }, + { + "epoch": 0.023585941790225152, + "grad_norm": 0.34501883387565613, + "learning_rate": 1.999464175238345e-05, + "loss": 0.4776, + "step": 859 + }, + { + "epoch": 0.023613399231191653, + "grad_norm": 0.3845241963863373, + "learning_rate": 1.9994627606468052e-05, + "loss": 0.5708, + "step": 860 + }, + { + "epoch": 0.023640856672158154, + "grad_norm": 0.3557480275630951, + "learning_rate": 1.999461344190948e-05, + "loss": 0.5578, + "step": 861 + }, + { + "epoch": 0.02366831411312466, + "grad_norm": 0.38580864667892456, + "learning_rate": 1.999459925870777e-05, + "loss": 0.602, + "step": 862 + }, + { + "epoch": 0.02369577155409116, + "grad_norm": 0.4131057560443878, + "learning_rate": 1.999458505686294e-05, + "loss": 0.5707, + "step": 863 + }, + { + "epoch": 0.02372322899505766, + "grad_norm": 0.35431694984436035, + "learning_rate": 1.999457083637502e-05, + "loss": 0.6412, + "step": 864 + }, + { + "epoch": 0.02375068643602416, + "grad_norm": 0.40537914633750916, + "learning_rate": 1.9994556597244035e-05, + "loss": 0.6022, + "step": 865 + }, + { + "epoch": 0.023778143876990666, + "grad_norm": 0.3799135386943817, + "learning_rate": 1.999454233947001e-05, + "loss": 0.6091, + "step": 866 + }, + { + "epoch": 0.023805601317957167, + "grad_norm": 0.4244007170200348, + "learning_rate": 1.9994528063052976e-05, + "loss": 0.5542, + "step": 867 + }, + { + "epoch": 0.023833058758923668, + "grad_norm": 0.5341901779174805, + "learning_rate": 1.9994513767992957e-05, + "loss": 0.5382, + "step": 868 + }, + { + "epoch": 0.02386051619989017, + "grad_norm": 0.3517626225948334, + "learning_rate": 1.999449945428998e-05, + "loss": 0.6505, + "step": 869 + }, + { + "epoch": 0.023887973640856673, + "grad_norm": 0.4160444438457489, + "learning_rate": 1.9994485121944073e-05, + "loss": 0.645, + "step": 870 + }, + { + "epoch": 0.023915431081823174, + "grad_norm": 0.37870872020721436, + "learning_rate": 1.9994470770955258e-05, + "loss": 0.6245, + "step": 871 + }, + { + "epoch": 0.023942888522789675, + "grad_norm": 0.37536153197288513, + "learning_rate": 1.9994456401323566e-05, + "loss": 0.5695, + "step": 872 + }, + { + "epoch": 0.02397034596375618, + "grad_norm": 0.37391453981399536, + "learning_rate": 1.9994442013049025e-05, + "loss": 0.5937, + "step": 873 + }, + { + "epoch": 0.02399780340472268, + "grad_norm": 0.4423391819000244, + "learning_rate": 1.9994427606131658e-05, + "loss": 0.7197, + "step": 874 + }, + { + "epoch": 0.02402526084568918, + "grad_norm": 0.3652022182941437, + "learning_rate": 1.9994413180571495e-05, + "loss": 0.6957, + "step": 875 + }, + { + "epoch": 0.024052718286655683, + "grad_norm": 0.35951510071754456, + "learning_rate": 1.999439873636856e-05, + "loss": 0.5976, + "step": 876 + }, + { + "epoch": 0.024080175727622187, + "grad_norm": 0.3824165165424347, + "learning_rate": 1.9994384273522886e-05, + "loss": 0.5504, + "step": 877 + }, + { + "epoch": 0.024107633168588688, + "grad_norm": 0.3506315052509308, + "learning_rate": 1.999436979203449e-05, + "loss": 0.5528, + "step": 878 + }, + { + "epoch": 0.02413509060955519, + "grad_norm": 0.37292277812957764, + "learning_rate": 1.9994355291903406e-05, + "loss": 0.5909, + "step": 879 + }, + { + "epoch": 0.02416254805052169, + "grad_norm": 0.40433812141418457, + "learning_rate": 1.999434077312966e-05, + "loss": 0.5879, + "step": 880 + }, + { + "epoch": 0.024190005491488194, + "grad_norm": 0.38165444135665894, + "learning_rate": 1.9994326235713278e-05, + "loss": 0.539, + "step": 881 + }, + { + "epoch": 0.024217462932454695, + "grad_norm": 0.3862334191799164, + "learning_rate": 1.999431167965429e-05, + "loss": 0.6716, + "step": 882 + }, + { + "epoch": 0.024244920373421196, + "grad_norm": 0.5726919174194336, + "learning_rate": 1.999429710495272e-05, + "loss": 0.5252, + "step": 883 + }, + { + "epoch": 0.024272377814387697, + "grad_norm": 0.4128020107746124, + "learning_rate": 1.9994282511608595e-05, + "loss": 0.5836, + "step": 884 + }, + { + "epoch": 0.024299835255354202, + "grad_norm": 0.6838932037353516, + "learning_rate": 1.9994267899621942e-05, + "loss": 0.6209, + "step": 885 + }, + { + "epoch": 0.024327292696320703, + "grad_norm": 0.4296434819698334, + "learning_rate": 1.999425326899279e-05, + "loss": 0.5294, + "step": 886 + }, + { + "epoch": 0.024354750137287204, + "grad_norm": 0.38976433873176575, + "learning_rate": 1.9994238619721166e-05, + "loss": 0.6288, + "step": 887 + }, + { + "epoch": 0.02438220757825371, + "grad_norm": 0.39353659749031067, + "learning_rate": 1.99942239518071e-05, + "loss": 0.6612, + "step": 888 + }, + { + "epoch": 0.02440966501922021, + "grad_norm": 0.44789043068885803, + "learning_rate": 1.9994209265250614e-05, + "loss": 0.6279, + "step": 889 + }, + { + "epoch": 0.02443712246018671, + "grad_norm": 0.3838953673839569, + "learning_rate": 1.999419456005174e-05, + "loss": 0.5842, + "step": 890 + }, + { + "epoch": 0.02446457990115321, + "grad_norm": 0.34852832555770874, + "learning_rate": 1.99941798362105e-05, + "loss": 0.4732, + "step": 891 + }, + { + "epoch": 0.024492037342119716, + "grad_norm": 0.39160799980163574, + "learning_rate": 1.9994165093726926e-05, + "loss": 0.5991, + "step": 892 + }, + { + "epoch": 0.024519494783086217, + "grad_norm": 0.37991052865982056, + "learning_rate": 1.9994150332601045e-05, + "loss": 0.6007, + "step": 893 + }, + { + "epoch": 0.024546952224052718, + "grad_norm": 0.34441566467285156, + "learning_rate": 1.9994135552832887e-05, + "loss": 0.5211, + "step": 894 + }, + { + "epoch": 0.02457440966501922, + "grad_norm": 0.3584813177585602, + "learning_rate": 1.999412075442247e-05, + "loss": 0.5809, + "step": 895 + }, + { + "epoch": 0.024601867105985723, + "grad_norm": 0.36873099207878113, + "learning_rate": 1.9994105937369834e-05, + "loss": 0.5957, + "step": 896 + }, + { + "epoch": 0.024629324546952224, + "grad_norm": 0.35527503490448, + "learning_rate": 1.9994091101674997e-05, + "loss": 0.5939, + "step": 897 + }, + { + "epoch": 0.024656781987918725, + "grad_norm": 0.4367346465587616, + "learning_rate": 1.999407624733799e-05, + "loss": 0.6446, + "step": 898 + }, + { + "epoch": 0.02468423942888523, + "grad_norm": 0.3605690598487854, + "learning_rate": 1.9994061374358848e-05, + "loss": 0.5214, + "step": 899 + }, + { + "epoch": 0.02471169686985173, + "grad_norm": 0.34355461597442627, + "learning_rate": 1.9994046482737586e-05, + "loss": 0.5625, + "step": 900 + }, + { + "epoch": 0.02473915431081823, + "grad_norm": 0.3743392527103424, + "learning_rate": 1.9994031572474238e-05, + "loss": 0.6179, + "step": 901 + }, + { + "epoch": 0.024766611751784733, + "grad_norm": 0.3856125771999359, + "learning_rate": 1.999401664356883e-05, + "loss": 0.6375, + "step": 902 + }, + { + "epoch": 0.024794069192751237, + "grad_norm": 0.40766143798828125, + "learning_rate": 1.999400169602139e-05, + "loss": 0.6175, + "step": 903 + }, + { + "epoch": 0.024821526633717738, + "grad_norm": 0.3600632846355438, + "learning_rate": 1.9993986729831953e-05, + "loss": 0.5247, + "step": 904 + }, + { + "epoch": 0.02484898407468424, + "grad_norm": 0.40059682726860046, + "learning_rate": 1.9993971745000537e-05, + "loss": 0.6002, + "step": 905 + }, + { + "epoch": 0.02487644151565074, + "grad_norm": 0.34220048785209656, + "learning_rate": 1.9993956741527176e-05, + "loss": 0.4656, + "step": 906 + }, + { + "epoch": 0.024903898956617244, + "grad_norm": 0.36982086300849915, + "learning_rate": 1.9993941719411893e-05, + "loss": 0.5617, + "step": 907 + }, + { + "epoch": 0.024931356397583745, + "grad_norm": 0.31426599621772766, + "learning_rate": 1.9993926678654723e-05, + "loss": 0.5512, + "step": 908 + }, + { + "epoch": 0.024958813838550246, + "grad_norm": 0.360524982213974, + "learning_rate": 1.9993911619255686e-05, + "loss": 0.5855, + "step": 909 + }, + { + "epoch": 0.024986271279516747, + "grad_norm": 0.3784053325653076, + "learning_rate": 1.9993896541214813e-05, + "loss": 0.5451, + "step": 910 + }, + { + "epoch": 0.025013728720483252, + "grad_norm": 0.3463555872440338, + "learning_rate": 1.999388144453214e-05, + "loss": 0.5203, + "step": 911 + }, + { + "epoch": 0.025041186161449753, + "grad_norm": 0.3917367458343506, + "learning_rate": 1.9993866329207682e-05, + "loss": 0.5777, + "step": 912 + }, + { + "epoch": 0.025068643602416254, + "grad_norm": 0.39325085282325745, + "learning_rate": 1.9993851195241478e-05, + "loss": 0.6316, + "step": 913 + }, + { + "epoch": 0.02509610104338276, + "grad_norm": 0.3751402497291565, + "learning_rate": 1.999383604263355e-05, + "loss": 0.525, + "step": 914 + }, + { + "epoch": 0.02512355848434926, + "grad_norm": 0.40176859498023987, + "learning_rate": 1.9993820871383928e-05, + "loss": 0.5949, + "step": 915 + }, + { + "epoch": 0.02515101592531576, + "grad_norm": 0.34659355878829956, + "learning_rate": 1.999380568149264e-05, + "loss": 0.588, + "step": 916 + }, + { + "epoch": 0.02517847336628226, + "grad_norm": 0.3828376531600952, + "learning_rate": 1.9993790472959715e-05, + "loss": 0.6039, + "step": 917 + }, + { + "epoch": 0.025205930807248766, + "grad_norm": 0.3541743755340576, + "learning_rate": 1.9993775245785183e-05, + "loss": 0.4975, + "step": 918 + }, + { + "epoch": 0.025233388248215267, + "grad_norm": 0.38034483790397644, + "learning_rate": 1.999375999996907e-05, + "loss": 0.596, + "step": 919 + }, + { + "epoch": 0.025260845689181768, + "grad_norm": 0.40771231055259705, + "learning_rate": 1.9993744735511402e-05, + "loss": 0.6564, + "step": 920 + }, + { + "epoch": 0.02528830313014827, + "grad_norm": 0.381778359413147, + "learning_rate": 1.9993729452412214e-05, + "loss": 0.624, + "step": 921 + }, + { + "epoch": 0.025315760571114773, + "grad_norm": 0.37894031405448914, + "learning_rate": 1.9993714150671528e-05, + "loss": 0.6037, + "step": 922 + }, + { + "epoch": 0.025343218012081274, + "grad_norm": 0.42819708585739136, + "learning_rate": 1.999369883028938e-05, + "loss": 0.5761, + "step": 923 + }, + { + "epoch": 0.025370675453047775, + "grad_norm": 0.3666745722293854, + "learning_rate": 1.999368349126579e-05, + "loss": 0.5828, + "step": 924 + }, + { + "epoch": 0.025398132894014276, + "grad_norm": 0.38612958788871765, + "learning_rate": 1.9993668133600793e-05, + "loss": 0.6499, + "step": 925 + }, + { + "epoch": 0.02542559033498078, + "grad_norm": 0.4302365183830261, + "learning_rate": 1.9993652757294414e-05, + "loss": 0.5088, + "step": 926 + }, + { + "epoch": 0.02545304777594728, + "grad_norm": 0.35726818442344666, + "learning_rate": 1.999363736234668e-05, + "loss": 0.5408, + "step": 927 + }, + { + "epoch": 0.025480505216913783, + "grad_norm": 0.39305344223976135, + "learning_rate": 1.9993621948757625e-05, + "loss": 0.584, + "step": 928 + }, + { + "epoch": 0.025507962657880287, + "grad_norm": 0.36686354875564575, + "learning_rate": 1.999360651652728e-05, + "loss": 0.6421, + "step": 929 + }, + { + "epoch": 0.025535420098846788, + "grad_norm": 0.3874843418598175, + "learning_rate": 1.9993591065655665e-05, + "loss": 0.6635, + "step": 930 + }, + { + "epoch": 0.02556287753981329, + "grad_norm": 0.36083051562309265, + "learning_rate": 1.999357559614281e-05, + "loss": 0.5635, + "step": 931 + }, + { + "epoch": 0.02559033498077979, + "grad_norm": 0.37970593571662903, + "learning_rate": 1.999356010798875e-05, + "loss": 0.5698, + "step": 932 + }, + { + "epoch": 0.025617792421746294, + "grad_norm": 0.4769110083580017, + "learning_rate": 1.9993544601193514e-05, + "loss": 0.5988, + "step": 933 + }, + { + "epoch": 0.025645249862712795, + "grad_norm": 0.3667714297771454, + "learning_rate": 1.9993529075757126e-05, + "loss": 0.5569, + "step": 934 + }, + { + "epoch": 0.025672707303679296, + "grad_norm": 0.41776660084724426, + "learning_rate": 1.9993513531679616e-05, + "loss": 0.551, + "step": 935 + }, + { + "epoch": 0.025700164744645797, + "grad_norm": 0.3929305970668793, + "learning_rate": 1.9993497968961012e-05, + "loss": 0.5556, + "step": 936 + }, + { + "epoch": 0.025727622185612302, + "grad_norm": 0.3798709809780121, + "learning_rate": 1.9993482387601347e-05, + "loss": 0.6442, + "step": 937 + }, + { + "epoch": 0.025755079626578803, + "grad_norm": 0.37590518593788147, + "learning_rate": 1.9993466787600645e-05, + "loss": 0.6371, + "step": 938 + }, + { + "epoch": 0.025782537067545304, + "grad_norm": 0.3935871422290802, + "learning_rate": 1.999345116895894e-05, + "loss": 0.6423, + "step": 939 + }, + { + "epoch": 0.02580999450851181, + "grad_norm": 0.41789913177490234, + "learning_rate": 1.999343553167626e-05, + "loss": 0.5489, + "step": 940 + }, + { + "epoch": 0.02583745194947831, + "grad_norm": 0.3577101230621338, + "learning_rate": 1.9993419875752632e-05, + "loss": 0.485, + "step": 941 + }, + { + "epoch": 0.02586490939044481, + "grad_norm": 0.3502335846424103, + "learning_rate": 1.9993404201188084e-05, + "loss": 0.5739, + "step": 942 + }, + { + "epoch": 0.02589236683141131, + "grad_norm": 0.381287544965744, + "learning_rate": 1.999338850798265e-05, + "loss": 0.5111, + "step": 943 + }, + { + "epoch": 0.025919824272377816, + "grad_norm": 0.36284980177879333, + "learning_rate": 1.9993372796136358e-05, + "loss": 0.5962, + "step": 944 + }, + { + "epoch": 0.025947281713344317, + "grad_norm": 0.38347357511520386, + "learning_rate": 1.9993357065649237e-05, + "loss": 0.5895, + "step": 945 + }, + { + "epoch": 0.025974739154310818, + "grad_norm": 0.36783695220947266, + "learning_rate": 1.9993341316521314e-05, + "loss": 0.6196, + "step": 946 + }, + { + "epoch": 0.02600219659527732, + "grad_norm": 0.39926159381866455, + "learning_rate": 1.9993325548752618e-05, + "loss": 0.5397, + "step": 947 + }, + { + "epoch": 0.026029654036243823, + "grad_norm": 0.41217175126075745, + "learning_rate": 1.9993309762343187e-05, + "loss": 0.5607, + "step": 948 + }, + { + "epoch": 0.026057111477210324, + "grad_norm": 0.4106501340866089, + "learning_rate": 1.9993293957293037e-05, + "loss": 0.6317, + "step": 949 + }, + { + "epoch": 0.026084568918176825, + "grad_norm": 0.3664214611053467, + "learning_rate": 1.9993278133602202e-05, + "loss": 0.6514, + "step": 950 + }, + { + "epoch": 0.026112026359143326, + "grad_norm": 0.5807508826255798, + "learning_rate": 1.999326229127072e-05, + "loss": 0.6002, + "step": 951 + }, + { + "epoch": 0.02613948380010983, + "grad_norm": 0.43355438113212585, + "learning_rate": 1.999324643029861e-05, + "loss": 0.592, + "step": 952 + }, + { + "epoch": 0.02616694124107633, + "grad_norm": 0.37691181898117065, + "learning_rate": 1.999323055068591e-05, + "loss": 0.6003, + "step": 953 + }, + { + "epoch": 0.026194398682042833, + "grad_norm": 0.40786439180374146, + "learning_rate": 1.9993214652432645e-05, + "loss": 0.4571, + "step": 954 + }, + { + "epoch": 0.026221856123009337, + "grad_norm": 0.49035903811454773, + "learning_rate": 1.9993198735538842e-05, + "loss": 0.5913, + "step": 955 + }, + { + "epoch": 0.026249313563975838, + "grad_norm": 0.38124987483024597, + "learning_rate": 1.9993182800004537e-05, + "loss": 0.5974, + "step": 956 + }, + { + "epoch": 0.02627677100494234, + "grad_norm": 0.3930895924568176, + "learning_rate": 1.9993166845829758e-05, + "loss": 0.5837, + "step": 957 + }, + { + "epoch": 0.02630422844590884, + "grad_norm": 0.36349180340766907, + "learning_rate": 1.999315087301453e-05, + "loss": 0.6227, + "step": 958 + }, + { + "epoch": 0.026331685886875345, + "grad_norm": 0.3604130148887634, + "learning_rate": 1.9993134881558887e-05, + "loss": 0.5494, + "step": 959 + }, + { + "epoch": 0.026359143327841845, + "grad_norm": 0.3484971225261688, + "learning_rate": 1.9993118871462857e-05, + "loss": 0.4991, + "step": 960 + }, + { + "epoch": 0.026386600768808346, + "grad_norm": 0.3819892108440399, + "learning_rate": 1.999310284272647e-05, + "loss": 0.5932, + "step": 961 + }, + { + "epoch": 0.026414058209774847, + "grad_norm": 0.34435275197029114, + "learning_rate": 1.999308679534976e-05, + "loss": 0.5761, + "step": 962 + }, + { + "epoch": 0.026441515650741352, + "grad_norm": 0.363552063703537, + "learning_rate": 1.9993070729332757e-05, + "loss": 0.547, + "step": 963 + }, + { + "epoch": 0.026468973091707853, + "grad_norm": 0.4074888825416565, + "learning_rate": 1.999305464467548e-05, + "loss": 0.5508, + "step": 964 + }, + { + "epoch": 0.026496430532674354, + "grad_norm": 0.43979841470718384, + "learning_rate": 1.9993038541377968e-05, + "loss": 0.5906, + "step": 965 + }, + { + "epoch": 0.02652388797364086, + "grad_norm": 0.35189196467399597, + "learning_rate": 1.9993022419440253e-05, + "loss": 0.5592, + "step": 966 + }, + { + "epoch": 0.02655134541460736, + "grad_norm": 0.3797662854194641, + "learning_rate": 1.9993006278862358e-05, + "loss": 0.6101, + "step": 967 + }, + { + "epoch": 0.02657880285557386, + "grad_norm": 0.6282324194908142, + "learning_rate": 1.999299011964432e-05, + "loss": 0.5364, + "step": 968 + }, + { + "epoch": 0.02660626029654036, + "grad_norm": 0.38134366273880005, + "learning_rate": 1.9992973941786165e-05, + "loss": 0.645, + "step": 969 + }, + { + "epoch": 0.026633717737506866, + "grad_norm": 0.354566752910614, + "learning_rate": 1.9992957745287928e-05, + "loss": 0.5745, + "step": 970 + }, + { + "epoch": 0.026661175178473367, + "grad_norm": 0.35665130615234375, + "learning_rate": 1.999294153014963e-05, + "loss": 0.5761, + "step": 971 + }, + { + "epoch": 0.026688632619439868, + "grad_norm": 0.3991217017173767, + "learning_rate": 1.9992925296371307e-05, + "loss": 0.582, + "step": 972 + }, + { + "epoch": 0.02671609006040637, + "grad_norm": 0.4167400300502777, + "learning_rate": 1.999290904395299e-05, + "loss": 0.5691, + "step": 973 + }, + { + "epoch": 0.026743547501372873, + "grad_norm": 0.3843333423137665, + "learning_rate": 1.9992892772894713e-05, + "loss": 0.6058, + "step": 974 + }, + { + "epoch": 0.026771004942339374, + "grad_norm": 0.3385389745235443, + "learning_rate": 1.9992876483196495e-05, + "loss": 0.501, + "step": 975 + }, + { + "epoch": 0.026798462383305875, + "grad_norm": 0.35857513546943665, + "learning_rate": 1.9992860174858375e-05, + "loss": 0.5757, + "step": 976 + }, + { + "epoch": 0.026825919824272376, + "grad_norm": 0.3490881025791168, + "learning_rate": 1.9992843847880384e-05, + "loss": 0.5859, + "step": 977 + }, + { + "epoch": 0.02685337726523888, + "grad_norm": 0.3448120951652527, + "learning_rate": 1.9992827502262545e-05, + "loss": 0.6027, + "step": 978 + }, + { + "epoch": 0.02688083470620538, + "grad_norm": 0.34573960304260254, + "learning_rate": 1.9992811138004896e-05, + "loss": 0.5305, + "step": 979 + }, + { + "epoch": 0.026908292147171883, + "grad_norm": 0.3538866639137268, + "learning_rate": 1.9992794755107466e-05, + "loss": 0.5119, + "step": 980 + }, + { + "epoch": 0.026935749588138387, + "grad_norm": 0.32505765557289124, + "learning_rate": 1.9992778353570284e-05, + "loss": 0.6215, + "step": 981 + }, + { + "epoch": 0.026963207029104888, + "grad_norm": 0.4069283902645111, + "learning_rate": 1.999276193339338e-05, + "loss": 0.6201, + "step": 982 + }, + { + "epoch": 0.02699066447007139, + "grad_norm": 0.3572087287902832, + "learning_rate": 1.9992745494576787e-05, + "loss": 0.5313, + "step": 983 + }, + { + "epoch": 0.02701812191103789, + "grad_norm": 0.3803759217262268, + "learning_rate": 1.9992729037120533e-05, + "loss": 0.5444, + "step": 984 + }, + { + "epoch": 0.027045579352004395, + "grad_norm": 0.3695618510246277, + "learning_rate": 1.999271256102465e-05, + "loss": 0.5511, + "step": 985 + }, + { + "epoch": 0.027073036792970896, + "grad_norm": 0.34669381380081177, + "learning_rate": 1.999269606628917e-05, + "loss": 0.6556, + "step": 986 + }, + { + "epoch": 0.027100494233937397, + "grad_norm": 0.375227689743042, + "learning_rate": 1.999267955291412e-05, + "loss": 0.6045, + "step": 987 + }, + { + "epoch": 0.027127951674903897, + "grad_norm": 0.3376089334487915, + "learning_rate": 1.9992663020899536e-05, + "loss": 0.5405, + "step": 988 + }, + { + "epoch": 0.027155409115870402, + "grad_norm": 0.33699193596839905, + "learning_rate": 1.9992646470245446e-05, + "loss": 0.4245, + "step": 989 + }, + { + "epoch": 0.027182866556836903, + "grad_norm": 0.43719902634620667, + "learning_rate": 1.999262990095188e-05, + "loss": 0.5687, + "step": 990 + }, + { + "epoch": 0.027210323997803404, + "grad_norm": 0.4110218584537506, + "learning_rate": 1.9992613313018872e-05, + "loss": 0.6315, + "step": 991 + }, + { + "epoch": 0.027237781438769905, + "grad_norm": 0.3578897714614868, + "learning_rate": 1.999259670644645e-05, + "loss": 0.5946, + "step": 992 + }, + { + "epoch": 0.02726523887973641, + "grad_norm": 0.3775756061077118, + "learning_rate": 1.999258008123464e-05, + "loss": 0.6004, + "step": 993 + }, + { + "epoch": 0.02729269632070291, + "grad_norm": 0.39287376403808594, + "learning_rate": 1.9992563437383485e-05, + "loss": 0.5453, + "step": 994 + }, + { + "epoch": 0.02732015376166941, + "grad_norm": 0.33550596237182617, + "learning_rate": 1.9992546774893013e-05, + "loss": 0.4885, + "step": 995 + }, + { + "epoch": 0.027347611202635916, + "grad_norm": 0.3771209716796875, + "learning_rate": 1.9992530093763248e-05, + "loss": 0.5756, + "step": 996 + }, + { + "epoch": 0.027375068643602417, + "grad_norm": 0.39248737692832947, + "learning_rate": 1.9992513393994222e-05, + "loss": 0.5959, + "step": 997 + }, + { + "epoch": 0.027402526084568918, + "grad_norm": 0.35922709107398987, + "learning_rate": 1.9992496675585972e-05, + "loss": 0.6094, + "step": 998 + }, + { + "epoch": 0.02742998352553542, + "grad_norm": 0.34583619236946106, + "learning_rate": 1.9992479938538528e-05, + "loss": 0.5425, + "step": 999 + }, + { + "epoch": 0.027457440966501923, + "grad_norm": 0.3636701703071594, + "learning_rate": 1.999246318285192e-05, + "loss": 0.6242, + "step": 1000 + }, + { + "epoch": 0.027484898407468424, + "grad_norm": 0.8384482860565186, + "learning_rate": 1.9992446408526177e-05, + "loss": 0.5727, + "step": 1001 + }, + { + "epoch": 0.027512355848434925, + "grad_norm": 0.33956918120384216, + "learning_rate": 1.9992429615561334e-05, + "loss": 0.5909, + "step": 1002 + }, + { + "epoch": 0.027539813289401426, + "grad_norm": 0.4058050811290741, + "learning_rate": 1.9992412803957417e-05, + "loss": 0.6054, + "step": 1003 + }, + { + "epoch": 0.02756727073036793, + "grad_norm": 0.36798644065856934, + "learning_rate": 1.9992395973714465e-05, + "loss": 0.5416, + "step": 1004 + }, + { + "epoch": 0.02759472817133443, + "grad_norm": 0.3728574514389038, + "learning_rate": 1.99923791248325e-05, + "loss": 0.5384, + "step": 1005 + }, + { + "epoch": 0.027622185612300933, + "grad_norm": 0.45467543601989746, + "learning_rate": 1.9992362257311565e-05, + "loss": 0.5715, + "step": 1006 + }, + { + "epoch": 0.027649643053267437, + "grad_norm": 0.3525679409503937, + "learning_rate": 1.999234537115168e-05, + "loss": 0.512, + "step": 1007 + }, + { + "epoch": 0.027677100494233938, + "grad_norm": 0.3841477334499359, + "learning_rate": 1.9992328466352883e-05, + "loss": 0.5587, + "step": 1008 + }, + { + "epoch": 0.02770455793520044, + "grad_norm": 0.35613855719566345, + "learning_rate": 1.9992311542915205e-05, + "loss": 0.5624, + "step": 1009 + }, + { + "epoch": 0.02773201537616694, + "grad_norm": 0.39970070123672485, + "learning_rate": 1.999229460083868e-05, + "loss": 0.6877, + "step": 1010 + }, + { + "epoch": 0.027759472817133445, + "grad_norm": 0.37386220693588257, + "learning_rate": 1.9992277640123336e-05, + "loss": 0.4992, + "step": 1011 + }, + { + "epoch": 0.027786930258099946, + "grad_norm": 0.37003666162490845, + "learning_rate": 1.9992260660769198e-05, + "loss": 0.6107, + "step": 1012 + }, + { + "epoch": 0.027814387699066447, + "grad_norm": 0.3546055853366852, + "learning_rate": 1.999224366277631e-05, + "loss": 0.4786, + "step": 1013 + }, + { + "epoch": 0.027841845140032948, + "grad_norm": 0.38332098722457886, + "learning_rate": 1.99922266461447e-05, + "loss": 0.5946, + "step": 1014 + }, + { + "epoch": 0.027869302580999452, + "grad_norm": 0.35668495297431946, + "learning_rate": 1.9992209610874394e-05, + "loss": 0.5754, + "step": 1015 + }, + { + "epoch": 0.027896760021965953, + "grad_norm": 0.3704359233379364, + "learning_rate": 1.999219255696543e-05, + "loss": 0.6031, + "step": 1016 + }, + { + "epoch": 0.027924217462932454, + "grad_norm": 0.40456390380859375, + "learning_rate": 1.9992175484417837e-05, + "loss": 0.6537, + "step": 1017 + }, + { + "epoch": 0.027951674903898955, + "grad_norm": 0.3857343792915344, + "learning_rate": 1.9992158393231647e-05, + "loss": 0.5281, + "step": 1018 + }, + { + "epoch": 0.02797913234486546, + "grad_norm": 0.41663751006126404, + "learning_rate": 1.9992141283406897e-05, + "loss": 0.6063, + "step": 1019 + }, + { + "epoch": 0.02800658978583196, + "grad_norm": 0.3914123475551605, + "learning_rate": 1.9992124154943607e-05, + "loss": 0.6007, + "step": 1020 + }, + { + "epoch": 0.02803404722679846, + "grad_norm": 0.3512546122074127, + "learning_rate": 1.999210700784182e-05, + "loss": 0.627, + "step": 1021 + }, + { + "epoch": 0.028061504667764966, + "grad_norm": 0.4507715702056885, + "learning_rate": 1.9992089842101567e-05, + "loss": 0.6909, + "step": 1022 + }, + { + "epoch": 0.028088962108731467, + "grad_norm": 0.37227052450180054, + "learning_rate": 1.9992072657722877e-05, + "loss": 0.5917, + "step": 1023 + }, + { + "epoch": 0.028116419549697968, + "grad_norm": 0.3790050446987152, + "learning_rate": 1.999205545470578e-05, + "loss": 0.582, + "step": 1024 + }, + { + "epoch": 0.02814387699066447, + "grad_norm": 0.4073377847671509, + "learning_rate": 1.999203823305031e-05, + "loss": 0.5731, + "step": 1025 + }, + { + "epoch": 0.028171334431630973, + "grad_norm": 0.44958582520484924, + "learning_rate": 1.99920209927565e-05, + "loss": 0.6524, + "step": 1026 + }, + { + "epoch": 0.028198791872597474, + "grad_norm": 0.37123727798461914, + "learning_rate": 1.999200373382438e-05, + "loss": 0.543, + "step": 1027 + }, + { + "epoch": 0.028226249313563975, + "grad_norm": 0.35310420393943787, + "learning_rate": 1.9991986456253988e-05, + "loss": 0.5625, + "step": 1028 + }, + { + "epoch": 0.028253706754530476, + "grad_norm": 0.3420066237449646, + "learning_rate": 1.999196916004535e-05, + "loss": 0.5135, + "step": 1029 + }, + { + "epoch": 0.02828116419549698, + "grad_norm": 0.40854907035827637, + "learning_rate": 1.99919518451985e-05, + "loss": 0.553, + "step": 1030 + }, + { + "epoch": 0.02830862163646348, + "grad_norm": 0.3704095184803009, + "learning_rate": 1.999193451171347e-05, + "loss": 0.5172, + "step": 1031 + }, + { + "epoch": 0.028336079077429983, + "grad_norm": 0.33348026871681213, + "learning_rate": 1.9991917159590298e-05, + "loss": 0.4688, + "step": 1032 + }, + { + "epoch": 0.028363536518396487, + "grad_norm": 0.4510387182235718, + "learning_rate": 1.9991899788829005e-05, + "loss": 0.6268, + "step": 1033 + }, + { + "epoch": 0.028390993959362988, + "grad_norm": 0.3433258533477783, + "learning_rate": 1.9991882399429636e-05, + "loss": 0.5433, + "step": 1034 + }, + { + "epoch": 0.02841845140032949, + "grad_norm": 0.393830806016922, + "learning_rate": 1.9991864991392212e-05, + "loss": 0.5921, + "step": 1035 + }, + { + "epoch": 0.02844590884129599, + "grad_norm": 0.3533097505569458, + "learning_rate": 1.9991847564716777e-05, + "loss": 0.6632, + "step": 1036 + }, + { + "epoch": 0.028473366282262495, + "grad_norm": 0.3673514127731323, + "learning_rate": 1.999183011940335e-05, + "loss": 0.5775, + "step": 1037 + }, + { + "epoch": 0.028500823723228996, + "grad_norm": 0.3725419342517853, + "learning_rate": 1.9991812655451974e-05, + "loss": 0.61, + "step": 1038 + }, + { + "epoch": 0.028528281164195497, + "grad_norm": 0.375331848859787, + "learning_rate": 1.999179517286268e-05, + "loss": 0.5481, + "step": 1039 + }, + { + "epoch": 0.028555738605161998, + "grad_norm": 0.5290784239768982, + "learning_rate": 1.9991777671635498e-05, + "loss": 0.5292, + "step": 1040 + }, + { + "epoch": 0.028583196046128502, + "grad_norm": 0.35392361879348755, + "learning_rate": 1.999176015177046e-05, + "loss": 0.5858, + "step": 1041 + }, + { + "epoch": 0.028610653487095003, + "grad_norm": 0.3944548964500427, + "learning_rate": 1.99917426132676e-05, + "loss": 0.5517, + "step": 1042 + }, + { + "epoch": 0.028638110928061504, + "grad_norm": 0.3355981409549713, + "learning_rate": 1.9991725056126953e-05, + "loss": 0.5331, + "step": 1043 + }, + { + "epoch": 0.028665568369028005, + "grad_norm": 1.168067216873169, + "learning_rate": 1.999170748034855e-05, + "loss": 0.6595, + "step": 1044 + }, + { + "epoch": 0.02869302580999451, + "grad_norm": 0.3683520257472992, + "learning_rate": 1.999168988593242e-05, + "loss": 0.5855, + "step": 1045 + }, + { + "epoch": 0.02872048325096101, + "grad_norm": 0.4193560779094696, + "learning_rate": 1.9991672272878603e-05, + "loss": 0.5877, + "step": 1046 + }, + { + "epoch": 0.02874794069192751, + "grad_norm": 0.4037628471851349, + "learning_rate": 1.9991654641187127e-05, + "loss": 0.5953, + "step": 1047 + }, + { + "epoch": 0.028775398132894016, + "grad_norm": 0.36947256326675415, + "learning_rate": 1.999163699085803e-05, + "loss": 0.513, + "step": 1048 + }, + { + "epoch": 0.028802855573860517, + "grad_norm": 0.47230595350265503, + "learning_rate": 1.9991619321891336e-05, + "loss": 0.5131, + "step": 1049 + }, + { + "epoch": 0.028830313014827018, + "grad_norm": 0.38674768805503845, + "learning_rate": 1.9991601634287083e-05, + "loss": 0.5004, + "step": 1050 + }, + { + "epoch": 0.02885777045579352, + "grad_norm": 0.3782259225845337, + "learning_rate": 1.9991583928045306e-05, + "loss": 0.5883, + "step": 1051 + }, + { + "epoch": 0.028885227896760023, + "grad_norm": 0.37078866362571716, + "learning_rate": 1.9991566203166037e-05, + "loss": 0.5863, + "step": 1052 + }, + { + "epoch": 0.028912685337726524, + "grad_norm": 0.3927291929721832, + "learning_rate": 1.9991548459649305e-05, + "loss": 0.5285, + "step": 1053 + }, + { + "epoch": 0.028940142778693025, + "grad_norm": 0.36980345845222473, + "learning_rate": 1.999153069749515e-05, + "loss": 0.5759, + "step": 1054 + }, + { + "epoch": 0.028967600219659526, + "grad_norm": 0.3808493912220001, + "learning_rate": 1.99915129167036e-05, + "loss": 0.5403, + "step": 1055 + }, + { + "epoch": 0.02899505766062603, + "grad_norm": 0.42090263962745667, + "learning_rate": 1.9991495117274686e-05, + "loss": 0.6497, + "step": 1056 + }, + { + "epoch": 0.02902251510159253, + "grad_norm": 0.38748452067375183, + "learning_rate": 1.9991477299208446e-05, + "loss": 0.6084, + "step": 1057 + }, + { + "epoch": 0.029049972542559033, + "grad_norm": 0.39346593618392944, + "learning_rate": 1.9991459462504913e-05, + "loss": 0.6373, + "step": 1058 + }, + { + "epoch": 0.029077429983525534, + "grad_norm": 0.3686029613018036, + "learning_rate": 1.999144160716412e-05, + "loss": 0.5658, + "step": 1059 + }, + { + "epoch": 0.029104887424492038, + "grad_norm": 0.365413099527359, + "learning_rate": 1.99914237331861e-05, + "loss": 0.5347, + "step": 1060 + }, + { + "epoch": 0.02913234486545854, + "grad_norm": 0.34688809514045715, + "learning_rate": 1.9991405840570886e-05, + "loss": 0.5749, + "step": 1061 + }, + { + "epoch": 0.02915980230642504, + "grad_norm": 0.3781895041465759, + "learning_rate": 1.999138792931851e-05, + "loss": 0.5949, + "step": 1062 + }, + { + "epoch": 0.029187259747391545, + "grad_norm": 0.34147363901138306, + "learning_rate": 1.9991369999429006e-05, + "loss": 0.5444, + "step": 1063 + }, + { + "epoch": 0.029214717188358046, + "grad_norm": 0.3793453872203827, + "learning_rate": 1.999135205090241e-05, + "loss": 0.6387, + "step": 1064 + }, + { + "epoch": 0.029242174629324547, + "grad_norm": 0.42361509799957275, + "learning_rate": 1.999133408373875e-05, + "loss": 0.6936, + "step": 1065 + }, + { + "epoch": 0.029269632070291048, + "grad_norm": 0.3378000855445862, + "learning_rate": 1.9991316097938066e-05, + "loss": 0.5407, + "step": 1066 + }, + { + "epoch": 0.029297089511257552, + "grad_norm": 0.41527846455574036, + "learning_rate": 1.999129809350039e-05, + "loss": 0.6462, + "step": 1067 + }, + { + "epoch": 0.029324546952224053, + "grad_norm": 0.35297030210494995, + "learning_rate": 1.999128007042575e-05, + "loss": 0.6128, + "step": 1068 + }, + { + "epoch": 0.029352004393190554, + "grad_norm": 0.3846484124660492, + "learning_rate": 1.9991262028714185e-05, + "loss": 0.5488, + "step": 1069 + }, + { + "epoch": 0.029379461834157055, + "grad_norm": 0.35821110010147095, + "learning_rate": 1.999124396836573e-05, + "loss": 0.4963, + "step": 1070 + }, + { + "epoch": 0.02940691927512356, + "grad_norm": 0.4034867286682129, + "learning_rate": 1.9991225889380415e-05, + "loss": 0.5325, + "step": 1071 + }, + { + "epoch": 0.02943437671609006, + "grad_norm": 0.4116135537624359, + "learning_rate": 1.9991207791758276e-05, + "loss": 0.5411, + "step": 1072 + }, + { + "epoch": 0.02946183415705656, + "grad_norm": 0.3718913793563843, + "learning_rate": 1.9991189675499342e-05, + "loss": 0.6349, + "step": 1073 + }, + { + "epoch": 0.029489291598023066, + "grad_norm": 0.36586418747901917, + "learning_rate": 1.9991171540603654e-05, + "loss": 0.5424, + "step": 1074 + }, + { + "epoch": 0.029516749038989567, + "grad_norm": 0.3628186881542206, + "learning_rate": 1.9991153387071242e-05, + "loss": 0.5816, + "step": 1075 + }, + { + "epoch": 0.029544206479956068, + "grad_norm": 0.3632669746875763, + "learning_rate": 1.9991135214902137e-05, + "loss": 0.5272, + "step": 1076 + }, + { + "epoch": 0.02957166392092257, + "grad_norm": 0.36830535531044006, + "learning_rate": 1.999111702409638e-05, + "loss": 0.5201, + "step": 1077 + }, + { + "epoch": 0.029599121361889073, + "grad_norm": 0.41280773282051086, + "learning_rate": 1.9991098814654e-05, + "loss": 0.4716, + "step": 1078 + }, + { + "epoch": 0.029626578802855574, + "grad_norm": 0.38160622119903564, + "learning_rate": 1.9991080586575027e-05, + "loss": 0.6599, + "step": 1079 + }, + { + "epoch": 0.029654036243822075, + "grad_norm": 0.33139970898628235, + "learning_rate": 1.9991062339859507e-05, + "loss": 0.4846, + "step": 1080 + }, + { + "epoch": 0.029681493684788576, + "grad_norm": 0.36915552616119385, + "learning_rate": 1.9991044074507463e-05, + "loss": 0.543, + "step": 1081 + }, + { + "epoch": 0.02970895112575508, + "grad_norm": 0.42304274439811707, + "learning_rate": 1.9991025790518933e-05, + "loss": 0.7284, + "step": 1082 + }, + { + "epoch": 0.029736408566721582, + "grad_norm": 0.3540177047252655, + "learning_rate": 1.999100748789395e-05, + "loss": 0.611, + "step": 1083 + }, + { + "epoch": 0.029763866007688083, + "grad_norm": 0.3768143951892853, + "learning_rate": 1.9990989166632552e-05, + "loss": 0.5706, + "step": 1084 + }, + { + "epoch": 0.029791323448654584, + "grad_norm": 0.33821359276771545, + "learning_rate": 1.9990970826734772e-05, + "loss": 0.6365, + "step": 1085 + }, + { + "epoch": 0.029818780889621088, + "grad_norm": 0.4141998291015625, + "learning_rate": 1.9990952468200637e-05, + "loss": 0.5755, + "step": 1086 + }, + { + "epoch": 0.02984623833058759, + "grad_norm": 0.4378417730331421, + "learning_rate": 1.9990934091030192e-05, + "loss": 0.5097, + "step": 1087 + }, + { + "epoch": 0.02987369577155409, + "grad_norm": 0.3431118130683899, + "learning_rate": 1.9990915695223464e-05, + "loss": 0.4902, + "step": 1088 + }, + { + "epoch": 0.029901153212520595, + "grad_norm": 0.3951015770435333, + "learning_rate": 1.999089728078049e-05, + "loss": 0.6611, + "step": 1089 + }, + { + "epoch": 0.029928610653487096, + "grad_norm": 0.3511399030685425, + "learning_rate": 1.9990878847701306e-05, + "loss": 0.5494, + "step": 1090 + }, + { + "epoch": 0.029956068094453597, + "grad_norm": 0.4076765477657318, + "learning_rate": 1.999086039598594e-05, + "loss": 0.6112, + "step": 1091 + }, + { + "epoch": 0.029983525535420098, + "grad_norm": 0.3971415162086487, + "learning_rate": 1.9990841925634434e-05, + "loss": 0.5042, + "step": 1092 + }, + { + "epoch": 0.030010982976386602, + "grad_norm": 0.40476614236831665, + "learning_rate": 1.9990823436646817e-05, + "loss": 0.5755, + "step": 1093 + }, + { + "epoch": 0.030038440417353103, + "grad_norm": 0.3497704267501831, + "learning_rate": 1.9990804929023127e-05, + "loss": 0.487, + "step": 1094 + }, + { + "epoch": 0.030065897858319604, + "grad_norm": 0.3888041079044342, + "learning_rate": 1.9990786402763397e-05, + "loss": 0.5982, + "step": 1095 + }, + { + "epoch": 0.030093355299286105, + "grad_norm": 0.38021576404571533, + "learning_rate": 1.9990767857867662e-05, + "loss": 0.5539, + "step": 1096 + }, + { + "epoch": 0.03012081274025261, + "grad_norm": 0.36477571725845337, + "learning_rate": 1.9990749294335955e-05, + "loss": 0.5914, + "step": 1097 + }, + { + "epoch": 0.03014827018121911, + "grad_norm": 0.3786374628543854, + "learning_rate": 1.999073071216831e-05, + "loss": 0.5545, + "step": 1098 + }, + { + "epoch": 0.03017572762218561, + "grad_norm": 0.36763229966163635, + "learning_rate": 1.9990712111364766e-05, + "loss": 0.5119, + "step": 1099 + }, + { + "epoch": 0.030203185063152116, + "grad_norm": 0.3615340292453766, + "learning_rate": 1.9990693491925354e-05, + "loss": 0.5437, + "step": 1100 + }, + { + "epoch": 0.030230642504118617, + "grad_norm": 0.39813360571861267, + "learning_rate": 1.9990674853850113e-05, + "loss": 0.5932, + "step": 1101 + }, + { + "epoch": 0.030258099945085118, + "grad_norm": 0.45678070187568665, + "learning_rate": 1.9990656197139073e-05, + "loss": 0.6147, + "step": 1102 + }, + { + "epoch": 0.03028555738605162, + "grad_norm": 0.4664384722709656, + "learning_rate": 1.999063752179227e-05, + "loss": 0.5372, + "step": 1103 + }, + { + "epoch": 0.030313014827018123, + "grad_norm": 0.37013372778892517, + "learning_rate": 1.9990618827809742e-05, + "loss": 0.5952, + "step": 1104 + }, + { + "epoch": 0.030340472267984624, + "grad_norm": 0.36184975504875183, + "learning_rate": 1.9990600115191515e-05, + "loss": 0.5812, + "step": 1105 + }, + { + "epoch": 0.030367929708951125, + "grad_norm": 0.40883713960647583, + "learning_rate": 1.9990581383937635e-05, + "loss": 0.6318, + "step": 1106 + }, + { + "epoch": 0.030395387149917626, + "grad_norm": 0.35589897632598877, + "learning_rate": 1.9990562634048133e-05, + "loss": 0.52, + "step": 1107 + }, + { + "epoch": 0.03042284459088413, + "grad_norm": 0.6323639154434204, + "learning_rate": 1.9990543865523038e-05, + "loss": 0.5457, + "step": 1108 + }, + { + "epoch": 0.030450302031850632, + "grad_norm": 0.38986527919769287, + "learning_rate": 1.99905250783624e-05, + "loss": 0.5215, + "step": 1109 + }, + { + "epoch": 0.030477759472817133, + "grad_norm": 0.38410794734954834, + "learning_rate": 1.999050627256623e-05, + "loss": 0.5393, + "step": 1110 + }, + { + "epoch": 0.030505216913783634, + "grad_norm": 0.36383768916130066, + "learning_rate": 1.9990487448134587e-05, + "loss": 0.6437, + "step": 1111 + }, + { + "epoch": 0.030532674354750138, + "grad_norm": 0.3929080665111542, + "learning_rate": 1.9990468605067493e-05, + "loss": 0.6736, + "step": 1112 + }, + { + "epoch": 0.03056013179571664, + "grad_norm": 0.3574185371398926, + "learning_rate": 1.999044974336499e-05, + "loss": 0.5548, + "step": 1113 + }, + { + "epoch": 0.03058758923668314, + "grad_norm": 0.39505741000175476, + "learning_rate": 1.9990430863027107e-05, + "loss": 0.6055, + "step": 1114 + }, + { + "epoch": 0.030615046677649645, + "grad_norm": 0.4200162887573242, + "learning_rate": 1.999041196405388e-05, + "loss": 0.6549, + "step": 1115 + }, + { + "epoch": 0.030642504118616146, + "grad_norm": 0.401910662651062, + "learning_rate": 1.999039304644535e-05, + "loss": 0.5333, + "step": 1116 + }, + { + "epoch": 0.030669961559582647, + "grad_norm": 0.36436882615089417, + "learning_rate": 1.9990374110201544e-05, + "loss": 0.5921, + "step": 1117 + }, + { + "epoch": 0.030697419000549148, + "grad_norm": 0.40377339720726013, + "learning_rate": 1.9990355155322506e-05, + "loss": 0.6137, + "step": 1118 + }, + { + "epoch": 0.030724876441515652, + "grad_norm": 0.38865959644317627, + "learning_rate": 1.9990336181808266e-05, + "loss": 0.5947, + "step": 1119 + }, + { + "epoch": 0.030752333882482153, + "grad_norm": 0.3577164113521576, + "learning_rate": 1.999031718965886e-05, + "loss": 0.5676, + "step": 1120 + }, + { + "epoch": 0.030779791323448654, + "grad_norm": 0.3906039893627167, + "learning_rate": 1.9990298178874322e-05, + "loss": 0.5914, + "step": 1121 + }, + { + "epoch": 0.030807248764415155, + "grad_norm": 0.41052085161209106, + "learning_rate": 1.999027914945469e-05, + "loss": 0.5646, + "step": 1122 + }, + { + "epoch": 0.03083470620538166, + "grad_norm": 0.34488677978515625, + "learning_rate": 1.9990260101400003e-05, + "loss": 0.5343, + "step": 1123 + }, + { + "epoch": 0.03086216364634816, + "grad_norm": 0.3855760991573334, + "learning_rate": 1.9990241034710288e-05, + "loss": 0.6124, + "step": 1124 + }, + { + "epoch": 0.03088962108731466, + "grad_norm": 0.4068845510482788, + "learning_rate": 1.9990221949385588e-05, + "loss": 0.6484, + "step": 1125 + }, + { + "epoch": 0.030917078528281162, + "grad_norm": 0.3437855839729309, + "learning_rate": 1.9990202845425935e-05, + "loss": 0.4952, + "step": 1126 + }, + { + "epoch": 0.030944535969247667, + "grad_norm": 0.5546271204948425, + "learning_rate": 1.999018372283136e-05, + "loss": 0.626, + "step": 1127 + }, + { + "epoch": 0.030971993410214168, + "grad_norm": 0.3687724173069, + "learning_rate": 1.999016458160191e-05, + "loss": 0.6101, + "step": 1128 + }, + { + "epoch": 0.03099945085118067, + "grad_norm": 0.42107245326042175, + "learning_rate": 1.9990145421737613e-05, + "loss": 0.5464, + "step": 1129 + }, + { + "epoch": 0.031026908292147173, + "grad_norm": 0.40520480275154114, + "learning_rate": 1.9990126243238506e-05, + "loss": 0.5827, + "step": 1130 + }, + { + "epoch": 0.031054365733113674, + "grad_norm": 0.4114346504211426, + "learning_rate": 1.9990107046104623e-05, + "loss": 0.5725, + "step": 1131 + }, + { + "epoch": 0.031081823174080175, + "grad_norm": 0.33798307180404663, + "learning_rate": 1.9990087830336005e-05, + "loss": 0.5956, + "step": 1132 + }, + { + "epoch": 0.031109280615046676, + "grad_norm": 0.3678004741668701, + "learning_rate": 1.9990068595932682e-05, + "loss": 0.5856, + "step": 1133 + }, + { + "epoch": 0.03113673805601318, + "grad_norm": 0.4111005961894989, + "learning_rate": 1.9990049342894696e-05, + "loss": 0.5893, + "step": 1134 + }, + { + "epoch": 0.031164195496979682, + "grad_norm": 0.33263298869132996, + "learning_rate": 1.9990030071222076e-05, + "loss": 0.4931, + "step": 1135 + }, + { + "epoch": 0.031191652937946183, + "grad_norm": 0.38465067744255066, + "learning_rate": 1.9990010780914864e-05, + "loss": 0.6097, + "step": 1136 + }, + { + "epoch": 0.031219110378912684, + "grad_norm": 0.4152178168296814, + "learning_rate": 1.9989991471973093e-05, + "loss": 0.5101, + "step": 1137 + }, + { + "epoch": 0.031246567819879188, + "grad_norm": 0.34545642137527466, + "learning_rate": 1.99899721443968e-05, + "loss": 0.5723, + "step": 1138 + }, + { + "epoch": 0.03127402526084569, + "grad_norm": 0.32596585154533386, + "learning_rate": 1.9989952798186016e-05, + "loss": 0.5128, + "step": 1139 + }, + { + "epoch": 0.03130148270181219, + "grad_norm": 0.3676753342151642, + "learning_rate": 1.9989933433340785e-05, + "loss": 0.6046, + "step": 1140 + }, + { + "epoch": 0.03132894014277869, + "grad_norm": 0.38751524686813354, + "learning_rate": 1.9989914049861145e-05, + "loss": 0.5558, + "step": 1141 + }, + { + "epoch": 0.03135639758374519, + "grad_norm": 0.3586612045764923, + "learning_rate": 1.9989894647747118e-05, + "loss": 0.4406, + "step": 1142 + }, + { + "epoch": 0.0313838550247117, + "grad_norm": 0.4083050489425659, + "learning_rate": 1.9989875226998753e-05, + "loss": 0.5747, + "step": 1143 + }, + { + "epoch": 0.0314113124656782, + "grad_norm": 0.4099906086921692, + "learning_rate": 1.9989855787616083e-05, + "loss": 0.6319, + "step": 1144 + }, + { + "epoch": 0.0314387699066447, + "grad_norm": 0.42153939604759216, + "learning_rate": 1.9989836329599142e-05, + "loss": 0.5808, + "step": 1145 + }, + { + "epoch": 0.0314662273476112, + "grad_norm": 0.366277813911438, + "learning_rate": 1.9989816852947968e-05, + "loss": 0.53, + "step": 1146 + }, + { + "epoch": 0.031493684788577704, + "grad_norm": 0.3842029273509979, + "learning_rate": 1.9989797357662598e-05, + "loss": 0.6357, + "step": 1147 + }, + { + "epoch": 0.031521142229544205, + "grad_norm": 0.37428995966911316, + "learning_rate": 1.9989777843743068e-05, + "loss": 0.5063, + "step": 1148 + }, + { + "epoch": 0.031548599670510706, + "grad_norm": 0.3466823399066925, + "learning_rate": 1.9989758311189413e-05, + "loss": 0.5349, + "step": 1149 + }, + { + "epoch": 0.03157605711147721, + "grad_norm": 0.355495810508728, + "learning_rate": 1.998973876000167e-05, + "loss": 0.5047, + "step": 1150 + }, + { + "epoch": 0.031603514552443715, + "grad_norm": 0.3858616054058075, + "learning_rate": 1.9989719190179877e-05, + "loss": 0.5748, + "step": 1151 + }, + { + "epoch": 0.031630971993410216, + "grad_norm": 0.38830262422561646, + "learning_rate": 1.9989699601724067e-05, + "loss": 0.5983, + "step": 1152 + }, + { + "epoch": 0.03165842943437672, + "grad_norm": 0.3450514078140259, + "learning_rate": 1.9989679994634285e-05, + "loss": 0.6177, + "step": 1153 + }, + { + "epoch": 0.03168588687534322, + "grad_norm": 0.3923671841621399, + "learning_rate": 1.9989660368910557e-05, + "loss": 0.5356, + "step": 1154 + }, + { + "epoch": 0.03171334431630972, + "grad_norm": 0.3242569863796234, + "learning_rate": 1.998964072455292e-05, + "loss": 0.4951, + "step": 1155 + }, + { + "epoch": 0.03174080175727622, + "grad_norm": 0.38521701097488403, + "learning_rate": 1.9989621061561423e-05, + "loss": 0.5055, + "step": 1156 + }, + { + "epoch": 0.03176825919824272, + "grad_norm": 0.3671351969242096, + "learning_rate": 1.998960137993609e-05, + "loss": 0.4833, + "step": 1157 + }, + { + "epoch": 0.03179571663920923, + "grad_norm": 0.34628114104270935, + "learning_rate": 1.998958167967696e-05, + "loss": 0.539, + "step": 1158 + }, + { + "epoch": 0.03182317408017573, + "grad_norm": 0.32310134172439575, + "learning_rate": 1.9989561960784077e-05, + "loss": 0.432, + "step": 1159 + }, + { + "epoch": 0.03185063152114223, + "grad_norm": 0.3981914222240448, + "learning_rate": 1.998954222325747e-05, + "loss": 0.6005, + "step": 1160 + }, + { + "epoch": 0.03187808896210873, + "grad_norm": 0.5005843639373779, + "learning_rate": 1.998952246709718e-05, + "loss": 0.5962, + "step": 1161 + }, + { + "epoch": 0.03190554640307523, + "grad_norm": 0.4504685401916504, + "learning_rate": 1.9989502692303238e-05, + "loss": 0.5613, + "step": 1162 + }, + { + "epoch": 0.031933003844041734, + "grad_norm": 0.3916843831539154, + "learning_rate": 1.9989482898875687e-05, + "loss": 0.6205, + "step": 1163 + }, + { + "epoch": 0.031960461285008235, + "grad_norm": 0.35856735706329346, + "learning_rate": 1.9989463086814566e-05, + "loss": 0.5617, + "step": 1164 + }, + { + "epoch": 0.031987918725974736, + "grad_norm": 0.3771199584007263, + "learning_rate": 1.9989443256119903e-05, + "loss": 0.6294, + "step": 1165 + }, + { + "epoch": 0.032015376166941244, + "grad_norm": 0.35168740153312683, + "learning_rate": 1.9989423406791742e-05, + "loss": 0.5323, + "step": 1166 + }, + { + "epoch": 0.032042833607907745, + "grad_norm": 0.4223398268222809, + "learning_rate": 1.998940353883012e-05, + "loss": 0.6888, + "step": 1167 + }, + { + "epoch": 0.032070291048874246, + "grad_norm": 0.39900821447372437, + "learning_rate": 1.998938365223507e-05, + "loss": 0.6069, + "step": 1168 + }, + { + "epoch": 0.03209774848984075, + "grad_norm": 0.35758841037750244, + "learning_rate": 1.998936374700663e-05, + "loss": 0.5155, + "step": 1169 + }, + { + "epoch": 0.03212520593080725, + "grad_norm": 0.3440866470336914, + "learning_rate": 1.9989343823144842e-05, + "loss": 0.566, + "step": 1170 + }, + { + "epoch": 0.03215266337177375, + "grad_norm": 0.35869526863098145, + "learning_rate": 1.9989323880649738e-05, + "loss": 0.5594, + "step": 1171 + }, + { + "epoch": 0.03218012081274025, + "grad_norm": 0.39065420627593994, + "learning_rate": 1.9989303919521356e-05, + "loss": 0.5445, + "step": 1172 + }, + { + "epoch": 0.03220757825370676, + "grad_norm": 0.3631516993045807, + "learning_rate": 1.9989283939759737e-05, + "loss": 0.5016, + "step": 1173 + }, + { + "epoch": 0.03223503569467326, + "grad_norm": 0.3453170657157898, + "learning_rate": 1.998926394136491e-05, + "loss": 0.5364, + "step": 1174 + }, + { + "epoch": 0.03226249313563976, + "grad_norm": 0.366641640663147, + "learning_rate": 1.998924392433692e-05, + "loss": 0.5915, + "step": 1175 + }, + { + "epoch": 0.03228995057660626, + "grad_norm": 0.3558062016963959, + "learning_rate": 1.9989223888675805e-05, + "loss": 0.5534, + "step": 1176 + }, + { + "epoch": 0.03231740801757276, + "grad_norm": 0.3809729218482971, + "learning_rate": 1.9989203834381595e-05, + "loss": 0.6103, + "step": 1177 + }, + { + "epoch": 0.03234486545853926, + "grad_norm": 0.3748064339160919, + "learning_rate": 1.998918376145433e-05, + "loss": 0.5624, + "step": 1178 + }, + { + "epoch": 0.032372322899505764, + "grad_norm": 0.3522144556045532, + "learning_rate": 1.9989163669894055e-05, + "loss": 0.5899, + "step": 1179 + }, + { + "epoch": 0.032399780340472265, + "grad_norm": 0.38437071442604065, + "learning_rate": 1.99891435597008e-05, + "loss": 0.5974, + "step": 1180 + }, + { + "epoch": 0.03242723778143877, + "grad_norm": 0.4200361669063568, + "learning_rate": 1.9989123430874602e-05, + "loss": 0.6342, + "step": 1181 + }, + { + "epoch": 0.03245469522240527, + "grad_norm": 0.40029168128967285, + "learning_rate": 1.99891032834155e-05, + "loss": 0.6315, + "step": 1182 + }, + { + "epoch": 0.032482152663371774, + "grad_norm": 0.4482196271419525, + "learning_rate": 1.9989083117323535e-05, + "loss": 0.6351, + "step": 1183 + }, + { + "epoch": 0.032509610104338275, + "grad_norm": 0.3813280463218689, + "learning_rate": 1.998906293259874e-05, + "loss": 0.6116, + "step": 1184 + }, + { + "epoch": 0.032537067545304776, + "grad_norm": 0.4204625189304352, + "learning_rate": 1.9989042729241154e-05, + "loss": 0.4899, + "step": 1185 + }, + { + "epoch": 0.03256452498627128, + "grad_norm": 0.40458056330680847, + "learning_rate": 1.9989022507250815e-05, + "loss": 0.519, + "step": 1186 + }, + { + "epoch": 0.03259198242723778, + "grad_norm": 0.3631324768066406, + "learning_rate": 1.9989002266627764e-05, + "loss": 0.6147, + "step": 1187 + }, + { + "epoch": 0.032619439868204286, + "grad_norm": 0.37989094853401184, + "learning_rate": 1.9988982007372035e-05, + "loss": 0.5939, + "step": 1188 + }, + { + "epoch": 0.03264689730917079, + "grad_norm": 0.3791643977165222, + "learning_rate": 1.998896172948366e-05, + "loss": 0.6607, + "step": 1189 + }, + { + "epoch": 0.03267435475013729, + "grad_norm": 0.34027811884880066, + "learning_rate": 1.9988941432962693e-05, + "loss": 0.5404, + "step": 1190 + }, + { + "epoch": 0.03270181219110379, + "grad_norm": 0.3630107045173645, + "learning_rate": 1.9988921117809156e-05, + "loss": 0.533, + "step": 1191 + }, + { + "epoch": 0.03272926963207029, + "grad_norm": 0.4039234220981598, + "learning_rate": 1.9988900784023096e-05, + "loss": 0.5887, + "step": 1192 + }, + { + "epoch": 0.03275672707303679, + "grad_norm": 0.40917861461639404, + "learning_rate": 1.9988880431604544e-05, + "loss": 0.6248, + "step": 1193 + }, + { + "epoch": 0.03278418451400329, + "grad_norm": 0.36764654517173767, + "learning_rate": 1.9988860060553547e-05, + "loss": 0.5172, + "step": 1194 + }, + { + "epoch": 0.0328116419549698, + "grad_norm": 0.3944789469242096, + "learning_rate": 1.9988839670870136e-05, + "loss": 0.6235, + "step": 1195 + }, + { + "epoch": 0.0328390993959363, + "grad_norm": 0.364243745803833, + "learning_rate": 1.9988819262554348e-05, + "loss": 0.5731, + "step": 1196 + }, + { + "epoch": 0.0328665568369028, + "grad_norm": 0.3523035943508148, + "learning_rate": 1.9988798835606227e-05, + "loss": 0.524, + "step": 1197 + }, + { + "epoch": 0.0328940142778693, + "grad_norm": 0.32980474829673767, + "learning_rate": 1.9988778390025808e-05, + "loss": 0.5547, + "step": 1198 + }, + { + "epoch": 0.032921471718835804, + "grad_norm": 0.369876503944397, + "learning_rate": 1.9988757925813127e-05, + "loss": 0.5899, + "step": 1199 + }, + { + "epoch": 0.032948929159802305, + "grad_norm": 0.35442253947257996, + "learning_rate": 1.9988737442968228e-05, + "loss": 0.5681, + "step": 1200 + }, + { + "epoch": 0.032976386600768806, + "grad_norm": 0.37036654353141785, + "learning_rate": 1.9988716941491142e-05, + "loss": 0.5953, + "step": 1201 + }, + { + "epoch": 0.03300384404173531, + "grad_norm": 0.40857645869255066, + "learning_rate": 1.9988696421381913e-05, + "loss": 0.5665, + "step": 1202 + }, + { + "epoch": 0.033031301482701815, + "grad_norm": 0.38524600863456726, + "learning_rate": 1.9988675882640578e-05, + "loss": 0.5646, + "step": 1203 + }, + { + "epoch": 0.033058758923668316, + "grad_norm": 1.2550793886184692, + "learning_rate": 1.9988655325267175e-05, + "loss": 0.6566, + "step": 1204 + }, + { + "epoch": 0.03308621636463482, + "grad_norm": 0.38972964882850647, + "learning_rate": 1.9988634749261737e-05, + "loss": 0.6225, + "step": 1205 + }, + { + "epoch": 0.03311367380560132, + "grad_norm": 0.35768184065818787, + "learning_rate": 1.998861415462431e-05, + "loss": 0.4799, + "step": 1206 + }, + { + "epoch": 0.03314113124656782, + "grad_norm": 0.3882080316543579, + "learning_rate": 1.998859354135493e-05, + "loss": 0.6168, + "step": 1207 + }, + { + "epoch": 0.03316858868753432, + "grad_norm": 0.34855368733406067, + "learning_rate": 1.9988572909453635e-05, + "loss": 0.5341, + "step": 1208 + }, + { + "epoch": 0.03319604612850082, + "grad_norm": 0.3344106674194336, + "learning_rate": 1.9988552258920465e-05, + "loss": 0.5258, + "step": 1209 + }, + { + "epoch": 0.03322350356946733, + "grad_norm": 0.44499003887176514, + "learning_rate": 1.9988531589755453e-05, + "loss": 0.5114, + "step": 1210 + }, + { + "epoch": 0.03325096101043383, + "grad_norm": 0.3726302683353424, + "learning_rate": 1.9988510901958647e-05, + "loss": 0.586, + "step": 1211 + }, + { + "epoch": 0.03327841845140033, + "grad_norm": 0.37287113070487976, + "learning_rate": 1.9988490195530078e-05, + "loss": 0.6161, + "step": 1212 + }, + { + "epoch": 0.03330587589236683, + "grad_norm": 0.35752734541893005, + "learning_rate": 1.9988469470469785e-05, + "loss": 0.5638, + "step": 1213 + }, + { + "epoch": 0.03333333333333333, + "grad_norm": 0.3563767373561859, + "learning_rate": 1.998844872677781e-05, + "loss": 0.5995, + "step": 1214 + }, + { + "epoch": 0.033360790774299834, + "grad_norm": 0.39681869745254517, + "learning_rate": 1.9988427964454188e-05, + "loss": 0.6785, + "step": 1215 + }, + { + "epoch": 0.033388248215266335, + "grad_norm": 0.3929237127304077, + "learning_rate": 1.998840718349896e-05, + "loss": 0.544, + "step": 1216 + }, + { + "epoch": 0.033415705656232836, + "grad_norm": 0.3811745047569275, + "learning_rate": 1.998838638391217e-05, + "loss": 0.5654, + "step": 1217 + }, + { + "epoch": 0.033443163097199344, + "grad_norm": 0.39695554971694946, + "learning_rate": 1.9988365565693848e-05, + "loss": 0.5741, + "step": 1218 + }, + { + "epoch": 0.033470620538165845, + "grad_norm": 0.36237284541130066, + "learning_rate": 1.9988344728844035e-05, + "loss": 0.5865, + "step": 1219 + }, + { + "epoch": 0.033498077979132346, + "grad_norm": 0.41349294781684875, + "learning_rate": 1.9988323873362773e-05, + "loss": 0.5854, + "step": 1220 + }, + { + "epoch": 0.03352553542009885, + "grad_norm": 0.3986033499240875, + "learning_rate": 1.99883029992501e-05, + "loss": 0.66, + "step": 1221 + }, + { + "epoch": 0.03355299286106535, + "grad_norm": 0.49217504262924194, + "learning_rate": 1.9988282106506052e-05, + "loss": 0.4488, + "step": 1222 + }, + { + "epoch": 0.03358045030203185, + "grad_norm": 0.3621750771999359, + "learning_rate": 1.9988261195130668e-05, + "loss": 0.4523, + "step": 1223 + }, + { + "epoch": 0.03360790774299835, + "grad_norm": 0.36717796325683594, + "learning_rate": 1.998824026512399e-05, + "loss": 0.6954, + "step": 1224 + }, + { + "epoch": 0.03363536518396486, + "grad_norm": 0.3753148019313812, + "learning_rate": 1.998821931648606e-05, + "loss": 0.5744, + "step": 1225 + }, + { + "epoch": 0.03366282262493136, + "grad_norm": 0.34411847591400146, + "learning_rate": 1.9988198349216913e-05, + "loss": 0.4994, + "step": 1226 + }, + { + "epoch": 0.03369028006589786, + "grad_norm": 0.39986324310302734, + "learning_rate": 1.998817736331658e-05, + "loss": 0.5856, + "step": 1227 + }, + { + "epoch": 0.03371773750686436, + "grad_norm": 0.40316906571388245, + "learning_rate": 1.9988156358785117e-05, + "loss": 0.5488, + "step": 1228 + }, + { + "epoch": 0.03374519494783086, + "grad_norm": 0.3437943756580353, + "learning_rate": 1.9988135335622553e-05, + "loss": 0.5097, + "step": 1229 + }, + { + "epoch": 0.03377265238879736, + "grad_norm": 0.4427289664745331, + "learning_rate": 1.9988114293828927e-05, + "loss": 0.6977, + "step": 1230 + }, + { + "epoch": 0.033800109829763864, + "grad_norm": 0.3682600259780884, + "learning_rate": 1.998809323340428e-05, + "loss": 0.5585, + "step": 1231 + }, + { + "epoch": 0.033827567270730365, + "grad_norm": 0.4105755388736725, + "learning_rate": 1.9988072154348654e-05, + "loss": 0.5968, + "step": 1232 + }, + { + "epoch": 0.03385502471169687, + "grad_norm": 0.35161149501800537, + "learning_rate": 1.998805105666208e-05, + "loss": 0.4807, + "step": 1233 + }, + { + "epoch": 0.033882482152663373, + "grad_norm": 0.36217236518859863, + "learning_rate": 1.9988029940344606e-05, + "loss": 0.6411, + "step": 1234 + }, + { + "epoch": 0.033909939593629874, + "grad_norm": 0.374201238155365, + "learning_rate": 1.9988008805396268e-05, + "loss": 0.5644, + "step": 1235 + }, + { + "epoch": 0.033937397034596375, + "grad_norm": 0.4078959822654724, + "learning_rate": 1.9987987651817108e-05, + "loss": 0.6384, + "step": 1236 + }, + { + "epoch": 0.033964854475562876, + "grad_norm": 0.3493024408817291, + "learning_rate": 1.9987966479607164e-05, + "loss": 0.508, + "step": 1237 + }, + { + "epoch": 0.03399231191652938, + "grad_norm": 0.34062570333480835, + "learning_rate": 1.998794528876647e-05, + "loss": 0.5053, + "step": 1238 + }, + { + "epoch": 0.03401976935749588, + "grad_norm": 0.38363513350486755, + "learning_rate": 1.9987924079295072e-05, + "loss": 0.5733, + "step": 1239 + }, + { + "epoch": 0.034047226798462386, + "grad_norm": 0.37566471099853516, + "learning_rate": 1.9987902851193008e-05, + "loss": 0.586, + "step": 1240 + }, + { + "epoch": 0.03407468423942889, + "grad_norm": 0.4337230920791626, + "learning_rate": 1.998788160446032e-05, + "loss": 0.5801, + "step": 1241 + }, + { + "epoch": 0.03410214168039539, + "grad_norm": 0.3737572431564331, + "learning_rate": 1.9987860339097043e-05, + "loss": 0.6166, + "step": 1242 + }, + { + "epoch": 0.03412959912136189, + "grad_norm": 0.3531170189380646, + "learning_rate": 1.998783905510322e-05, + "loss": 0.6043, + "step": 1243 + }, + { + "epoch": 0.03415705656232839, + "grad_norm": 0.4008699059486389, + "learning_rate": 1.9987817752478885e-05, + "loss": 0.663, + "step": 1244 + }, + { + "epoch": 0.03418451400329489, + "grad_norm": 0.3955473005771637, + "learning_rate": 1.9987796431224088e-05, + "loss": 0.6157, + "step": 1245 + }, + { + "epoch": 0.03421197144426139, + "grad_norm": 0.3951888084411621, + "learning_rate": 1.998777509133886e-05, + "loss": 0.5827, + "step": 1246 + }, + { + "epoch": 0.03423942888522789, + "grad_norm": 0.3929520845413208, + "learning_rate": 1.998775373282324e-05, + "loss": 0.6527, + "step": 1247 + }, + { + "epoch": 0.0342668863261944, + "grad_norm": 0.4121510684490204, + "learning_rate": 1.9987732355677275e-05, + "loss": 0.6603, + "step": 1248 + }, + { + "epoch": 0.0342943437671609, + "grad_norm": 0.35039830207824707, + "learning_rate": 1.9987710959901005e-05, + "loss": 0.6157, + "step": 1249 + }, + { + "epoch": 0.0343218012081274, + "grad_norm": 0.375547856092453, + "learning_rate": 1.9987689545494465e-05, + "loss": 0.5631, + "step": 1250 + }, + { + "epoch": 0.034349258649093904, + "grad_norm": 0.36310890316963196, + "learning_rate": 1.9987668112457692e-05, + "loss": 0.5549, + "step": 1251 + }, + { + "epoch": 0.034376716090060405, + "grad_norm": 0.42688703536987305, + "learning_rate": 1.9987646660790735e-05, + "loss": 0.6195, + "step": 1252 + }, + { + "epoch": 0.034404173531026906, + "grad_norm": 0.3575834035873413, + "learning_rate": 1.9987625190493624e-05, + "loss": 0.5894, + "step": 1253 + }, + { + "epoch": 0.03443163097199341, + "grad_norm": 0.342949241399765, + "learning_rate": 1.998760370156641e-05, + "loss": 0.5877, + "step": 1254 + }, + { + "epoch": 0.034459088412959915, + "grad_norm": 0.3697826862335205, + "learning_rate": 1.9987582194009125e-05, + "loss": 0.5591, + "step": 1255 + }, + { + "epoch": 0.034486545853926416, + "grad_norm": 0.3995330333709717, + "learning_rate": 1.998756066782181e-05, + "loss": 0.6008, + "step": 1256 + }, + { + "epoch": 0.03451400329489292, + "grad_norm": 0.6094611287117004, + "learning_rate": 1.9987539123004506e-05, + "loss": 0.6265, + "step": 1257 + }, + { + "epoch": 0.03454146073585942, + "grad_norm": 0.34113338589668274, + "learning_rate": 1.9987517559557257e-05, + "loss": 0.5155, + "step": 1258 + }, + { + "epoch": 0.03456891817682592, + "grad_norm": 0.363423228263855, + "learning_rate": 1.99874959774801e-05, + "loss": 0.5066, + "step": 1259 + }, + { + "epoch": 0.03459637561779242, + "grad_norm": 0.3834076225757599, + "learning_rate": 1.9987474376773072e-05, + "loss": 0.6517, + "step": 1260 + }, + { + "epoch": 0.03462383305875892, + "grad_norm": 0.36704230308532715, + "learning_rate": 1.998745275743622e-05, + "loss": 0.531, + "step": 1261 + }, + { + "epoch": 0.03465129049972543, + "grad_norm": 0.35406729578971863, + "learning_rate": 1.998743111946958e-05, + "loss": 0.5549, + "step": 1262 + }, + { + "epoch": 0.03467874794069193, + "grad_norm": 0.3688981533050537, + "learning_rate": 1.9987409462873194e-05, + "loss": 0.4874, + "step": 1263 + }, + { + "epoch": 0.03470620538165843, + "grad_norm": 0.4087311923503876, + "learning_rate": 1.99873877876471e-05, + "loss": 0.5976, + "step": 1264 + }, + { + "epoch": 0.03473366282262493, + "grad_norm": 0.3871917426586151, + "learning_rate": 1.9987366093791346e-05, + "loss": 0.5881, + "step": 1265 + }, + { + "epoch": 0.03476112026359143, + "grad_norm": 0.40225958824157715, + "learning_rate": 1.998734438130596e-05, + "loss": 0.5821, + "step": 1266 + }, + { + "epoch": 0.034788577704557934, + "grad_norm": 0.35959210991859436, + "learning_rate": 1.998732265019099e-05, + "loss": 0.624, + "step": 1267 + }, + { + "epoch": 0.034816035145524435, + "grad_norm": 0.34677913784980774, + "learning_rate": 1.9987300900446476e-05, + "loss": 0.5069, + "step": 1268 + }, + { + "epoch": 0.034843492586490936, + "grad_norm": 0.34094998240470886, + "learning_rate": 1.998727913207246e-05, + "loss": 0.5799, + "step": 1269 + }, + { + "epoch": 0.034870950027457444, + "grad_norm": 0.3932151794433594, + "learning_rate": 1.998725734506898e-05, + "loss": 0.675, + "step": 1270 + }, + { + "epoch": 0.034898407468423945, + "grad_norm": 0.358003705739975, + "learning_rate": 1.9987235539436076e-05, + "loss": 0.5519, + "step": 1271 + }, + { + "epoch": 0.034925864909390446, + "grad_norm": 0.3688274025917053, + "learning_rate": 1.9987213715173793e-05, + "loss": 0.5101, + "step": 1272 + }, + { + "epoch": 0.03495332235035695, + "grad_norm": 0.4790780246257782, + "learning_rate": 1.998719187228217e-05, + "loss": 0.5313, + "step": 1273 + }, + { + "epoch": 0.03498077979132345, + "grad_norm": 0.5002806186676025, + "learning_rate": 1.998717001076124e-05, + "loss": 0.6158, + "step": 1274 + }, + { + "epoch": 0.03500823723228995, + "grad_norm": 0.37346526980400085, + "learning_rate": 1.9987148130611056e-05, + "loss": 0.5024, + "step": 1275 + }, + { + "epoch": 0.03503569467325645, + "grad_norm": 0.3586314022541046, + "learning_rate": 1.9987126231831652e-05, + "loss": 0.6421, + "step": 1276 + }, + { + "epoch": 0.03506315211422296, + "grad_norm": 0.3577103316783905, + "learning_rate": 1.998710431442307e-05, + "loss": 0.6036, + "step": 1277 + }, + { + "epoch": 0.03509060955518946, + "grad_norm": 0.3347107470035553, + "learning_rate": 1.998708237838535e-05, + "loss": 0.4965, + "step": 1278 + }, + { + "epoch": 0.03511806699615596, + "grad_norm": 0.45199474692344666, + "learning_rate": 1.9987060423718533e-05, + "loss": 0.5844, + "step": 1279 + }, + { + "epoch": 0.03514552443712246, + "grad_norm": 0.39720168709754944, + "learning_rate": 1.9987038450422666e-05, + "loss": 0.6527, + "step": 1280 + }, + { + "epoch": 0.03517298187808896, + "grad_norm": 0.4401092231273651, + "learning_rate": 1.998701645849778e-05, + "loss": 0.5747, + "step": 1281 + }, + { + "epoch": 0.03520043931905546, + "grad_norm": 0.3654850423336029, + "learning_rate": 1.998699444794392e-05, + "loss": 0.6415, + "step": 1282 + }, + { + "epoch": 0.035227896760021964, + "grad_norm": 0.4403817355632782, + "learning_rate": 1.998697241876113e-05, + "loss": 0.6674, + "step": 1283 + }, + { + "epoch": 0.035255354200988465, + "grad_norm": 0.35067018866539, + "learning_rate": 1.9986950370949446e-05, + "loss": 0.5749, + "step": 1284 + }, + { + "epoch": 0.03528281164195497, + "grad_norm": 0.4076237678527832, + "learning_rate": 1.998692830450892e-05, + "loss": 0.6251, + "step": 1285 + }, + { + "epoch": 0.035310269082921474, + "grad_norm": 0.3908742368221283, + "learning_rate": 1.9986906219439575e-05, + "loss": 0.6396, + "step": 1286 + }, + { + "epoch": 0.035337726523887975, + "grad_norm": 0.3406464457511902, + "learning_rate": 1.9986884115741468e-05, + "loss": 0.4885, + "step": 1287 + }, + { + "epoch": 0.035365183964854476, + "grad_norm": 0.3983194828033447, + "learning_rate": 1.9986861993414637e-05, + "loss": 0.5469, + "step": 1288 + }, + { + "epoch": 0.035392641405820977, + "grad_norm": 0.3528475761413574, + "learning_rate": 1.9986839852459118e-05, + "loss": 0.5619, + "step": 1289 + }, + { + "epoch": 0.03542009884678748, + "grad_norm": 0.4188925325870514, + "learning_rate": 1.9986817692874956e-05, + "loss": 0.6016, + "step": 1290 + }, + { + "epoch": 0.03544755628775398, + "grad_norm": 0.33856260776519775, + "learning_rate": 1.998679551466219e-05, + "loss": 0.5218, + "step": 1291 + }, + { + "epoch": 0.035475013728720486, + "grad_norm": 0.39022311568260193, + "learning_rate": 1.9986773317820866e-05, + "loss": 0.5955, + "step": 1292 + }, + { + "epoch": 0.03550247116968699, + "grad_norm": 0.38043728470802307, + "learning_rate": 1.998675110235102e-05, + "loss": 0.6192, + "step": 1293 + }, + { + "epoch": 0.03552992861065349, + "grad_norm": 0.34716635942459106, + "learning_rate": 1.9986728868252694e-05, + "loss": 0.5419, + "step": 1294 + }, + { + "epoch": 0.03555738605161999, + "grad_norm": 0.390146404504776, + "learning_rate": 1.9986706615525933e-05, + "loss": 0.641, + "step": 1295 + }, + { + "epoch": 0.03558484349258649, + "grad_norm": 0.39541980624198914, + "learning_rate": 1.998668434417078e-05, + "loss": 0.6573, + "step": 1296 + }, + { + "epoch": 0.03561230093355299, + "grad_norm": 0.3927261233329773, + "learning_rate": 1.9986662054187268e-05, + "loss": 0.5834, + "step": 1297 + }, + { + "epoch": 0.03563975837451949, + "grad_norm": 0.44010382890701294, + "learning_rate": 1.9986639745575445e-05, + "loss": 0.6526, + "step": 1298 + }, + { + "epoch": 0.03566721581548599, + "grad_norm": 0.3553536534309387, + "learning_rate": 1.9986617418335355e-05, + "loss": 0.6426, + "step": 1299 + }, + { + "epoch": 0.0356946732564525, + "grad_norm": 0.34526264667510986, + "learning_rate": 1.9986595072467035e-05, + "loss": 0.6102, + "step": 1300 + }, + { + "epoch": 0.035722130697419, + "grad_norm": 0.3611888587474823, + "learning_rate": 1.9986572707970527e-05, + "loss": 0.6482, + "step": 1301 + }, + { + "epoch": 0.0357495881383855, + "grad_norm": 0.3979525566101074, + "learning_rate": 1.998655032484587e-05, + "loss": 0.5745, + "step": 1302 + }, + { + "epoch": 0.035777045579352004, + "grad_norm": 0.34665295481681824, + "learning_rate": 1.9986527923093114e-05, + "loss": 0.615, + "step": 1303 + }, + { + "epoch": 0.035804503020318505, + "grad_norm": 0.3473665714263916, + "learning_rate": 1.9986505502712295e-05, + "loss": 0.5446, + "step": 1304 + }, + { + "epoch": 0.035831960461285006, + "grad_norm": 0.46632325649261475, + "learning_rate": 1.9986483063703453e-05, + "loss": 0.5934, + "step": 1305 + }, + { + "epoch": 0.03585941790225151, + "grad_norm": 0.3431428372859955, + "learning_rate": 1.998646060606664e-05, + "loss": 0.4752, + "step": 1306 + }, + { + "epoch": 0.035886875343218015, + "grad_norm": 0.3739601969718933, + "learning_rate": 1.9986438129801886e-05, + "loss": 0.6084, + "step": 1307 + }, + { + "epoch": 0.035914332784184516, + "grad_norm": 0.3794978857040405, + "learning_rate": 1.9986415634909236e-05, + "loss": 0.518, + "step": 1308 + }, + { + "epoch": 0.03594179022515102, + "grad_norm": 0.3524473011493683, + "learning_rate": 1.9986393121388735e-05, + "loss": 0.5502, + "step": 1309 + }, + { + "epoch": 0.03596924766611752, + "grad_norm": 0.3723267614841461, + "learning_rate": 1.9986370589240425e-05, + "loss": 0.6309, + "step": 1310 + }, + { + "epoch": 0.03599670510708402, + "grad_norm": 0.41200733184814453, + "learning_rate": 1.9986348038464346e-05, + "loss": 0.6095, + "step": 1311 + }, + { + "epoch": 0.03602416254805052, + "grad_norm": 0.3638576865196228, + "learning_rate": 1.9986325469060538e-05, + "loss": 0.6397, + "step": 1312 + }, + { + "epoch": 0.03605161998901702, + "grad_norm": 0.4164491295814514, + "learning_rate": 1.9986302881029046e-05, + "loss": 0.6418, + "step": 1313 + }, + { + "epoch": 0.03607907742998352, + "grad_norm": 0.3762272298336029, + "learning_rate": 1.9986280274369917e-05, + "loss": 0.6263, + "step": 1314 + }, + { + "epoch": 0.03610653487095003, + "grad_norm": 0.3653043210506439, + "learning_rate": 1.998625764908318e-05, + "loss": 0.567, + "step": 1315 + }, + { + "epoch": 0.03613399231191653, + "grad_norm": 0.4219542145729065, + "learning_rate": 1.9986235005168894e-05, + "loss": 0.5943, + "step": 1316 + }, + { + "epoch": 0.03616144975288303, + "grad_norm": 0.41777148842811584, + "learning_rate": 1.998621234262709e-05, + "loss": 0.5578, + "step": 1317 + }, + { + "epoch": 0.03618890719384953, + "grad_norm": 0.3672006130218506, + "learning_rate": 1.998618966145781e-05, + "loss": 0.5172, + "step": 1318 + }, + { + "epoch": 0.036216364634816034, + "grad_norm": 0.6433749198913574, + "learning_rate": 1.9986166961661098e-05, + "loss": 0.5534, + "step": 1319 + }, + { + "epoch": 0.036243822075782535, + "grad_norm": 0.3567993640899658, + "learning_rate": 1.9986144243237002e-05, + "loss": 0.6146, + "step": 1320 + }, + { + "epoch": 0.036271279516749036, + "grad_norm": 0.3429429829120636, + "learning_rate": 1.9986121506185557e-05, + "loss": 0.5727, + "step": 1321 + }, + { + "epoch": 0.036298736957715544, + "grad_norm": 0.3574250638484955, + "learning_rate": 1.9986098750506807e-05, + "loss": 0.5927, + "step": 1322 + }, + { + "epoch": 0.036326194398682045, + "grad_norm": 0.36117053031921387, + "learning_rate": 1.99860759762008e-05, + "loss": 0.5578, + "step": 1323 + }, + { + "epoch": 0.036353651839648546, + "grad_norm": 0.4171261787414551, + "learning_rate": 1.9986053183267572e-05, + "loss": 0.6365, + "step": 1324 + }, + { + "epoch": 0.03638110928061505, + "grad_norm": 0.3504883050918579, + "learning_rate": 1.9986030371707165e-05, + "loss": 0.6009, + "step": 1325 + }, + { + "epoch": 0.03640856672158155, + "grad_norm": 0.3804073929786682, + "learning_rate": 1.9986007541519626e-05, + "loss": 0.5007, + "step": 1326 + }, + { + "epoch": 0.03643602416254805, + "grad_norm": 0.35574838519096375, + "learning_rate": 1.9985984692704995e-05, + "loss": 0.5779, + "step": 1327 + }, + { + "epoch": 0.03646348160351455, + "grad_norm": 0.4317134916782379, + "learning_rate": 1.9985961825263314e-05, + "loss": 0.5392, + "step": 1328 + }, + { + "epoch": 0.03649093904448106, + "grad_norm": 0.31361091136932373, + "learning_rate": 1.998593893919463e-05, + "loss": 0.5096, + "step": 1329 + }, + { + "epoch": 0.03651839648544756, + "grad_norm": 0.39062777161598206, + "learning_rate": 1.9985916034498983e-05, + "loss": 0.6709, + "step": 1330 + }, + { + "epoch": 0.03654585392641406, + "grad_norm": 0.38672131299972534, + "learning_rate": 1.9985893111176414e-05, + "loss": 0.5767, + "step": 1331 + }, + { + "epoch": 0.03657331136738056, + "grad_norm": 0.3764428198337555, + "learning_rate": 1.998587016922697e-05, + "loss": 0.4977, + "step": 1332 + }, + { + "epoch": 0.03660076880834706, + "grad_norm": 0.33594101667404175, + "learning_rate": 1.998584720865069e-05, + "loss": 0.5536, + "step": 1333 + }, + { + "epoch": 0.03662822624931356, + "grad_norm": 0.37867122888565063, + "learning_rate": 1.9985824229447615e-05, + "loss": 0.5461, + "step": 1334 + }, + { + "epoch": 0.036655683690280064, + "grad_norm": 0.4636308550834656, + "learning_rate": 1.9985801231617792e-05, + "loss": 0.5721, + "step": 1335 + }, + { + "epoch": 0.036683141131246565, + "grad_norm": 0.45492222905158997, + "learning_rate": 1.9985778215161264e-05, + "loss": 0.573, + "step": 1336 + }, + { + "epoch": 0.03671059857221307, + "grad_norm": 0.3679135739803314, + "learning_rate": 1.998575518007807e-05, + "loss": 0.5252, + "step": 1337 + }, + { + "epoch": 0.036738056013179574, + "grad_norm": 0.3396340012550354, + "learning_rate": 1.9985732126368258e-05, + "loss": 0.5996, + "step": 1338 + }, + { + "epoch": 0.036765513454146075, + "grad_norm": 0.35321441292762756, + "learning_rate": 1.9985709054031868e-05, + "loss": 0.5552, + "step": 1339 + }, + { + "epoch": 0.036792970895112576, + "grad_norm": 0.37329745292663574, + "learning_rate": 1.9985685963068942e-05, + "loss": 0.6165, + "step": 1340 + }, + { + "epoch": 0.03682042833607908, + "grad_norm": 0.35924991965293884, + "learning_rate": 1.9985662853479525e-05, + "loss": 0.4732, + "step": 1341 + }, + { + "epoch": 0.03684788577704558, + "grad_norm": 0.3975987136363983, + "learning_rate": 1.998563972526366e-05, + "loss": 0.6623, + "step": 1342 + }, + { + "epoch": 0.03687534321801208, + "grad_norm": 0.3536795675754547, + "learning_rate": 1.9985616578421392e-05, + "loss": 0.678, + "step": 1343 + }, + { + "epoch": 0.036902800658978586, + "grad_norm": 0.41098275780677795, + "learning_rate": 1.998559341295276e-05, + "loss": 0.5952, + "step": 1344 + }, + { + "epoch": 0.03693025809994509, + "grad_norm": 0.34572386741638184, + "learning_rate": 1.9985570228857808e-05, + "loss": 0.5169, + "step": 1345 + }, + { + "epoch": 0.03695771554091159, + "grad_norm": 0.45085200667381287, + "learning_rate": 1.9985547026136586e-05, + "loss": 0.5976, + "step": 1346 + }, + { + "epoch": 0.03698517298187809, + "grad_norm": 0.3493817448616028, + "learning_rate": 1.9985523804789127e-05, + "loss": 0.6205, + "step": 1347 + }, + { + "epoch": 0.03701263042284459, + "grad_norm": 0.3858240246772766, + "learning_rate": 1.9985500564815478e-05, + "loss": 0.5269, + "step": 1348 + }, + { + "epoch": 0.03704008786381109, + "grad_norm": 0.32471320033073425, + "learning_rate": 1.9985477306215686e-05, + "loss": 0.5139, + "step": 1349 + }, + { + "epoch": 0.03706754530477759, + "grad_norm": 0.3415811359882355, + "learning_rate": 1.9985454028989792e-05, + "loss": 0.5502, + "step": 1350 + }, + { + "epoch": 0.03709500274574409, + "grad_norm": 0.37088167667388916, + "learning_rate": 1.998543073313784e-05, + "loss": 0.5629, + "step": 1351 + }, + { + "epoch": 0.0371224601867106, + "grad_norm": 0.35518884658813477, + "learning_rate": 1.9985407418659867e-05, + "loss": 0.5587, + "step": 1352 + }, + { + "epoch": 0.0371499176276771, + "grad_norm": 0.41024041175842285, + "learning_rate": 1.9985384085555928e-05, + "loss": 0.5694, + "step": 1353 + }, + { + "epoch": 0.0371773750686436, + "grad_norm": 0.3652222752571106, + "learning_rate": 1.998536073382606e-05, + "loss": 0.5428, + "step": 1354 + }, + { + "epoch": 0.037204832509610104, + "grad_norm": 0.3796199858188629, + "learning_rate": 1.9985337363470306e-05, + "loss": 0.4688, + "step": 1355 + }, + { + "epoch": 0.037232289950576605, + "grad_norm": 0.35591110587120056, + "learning_rate": 1.998531397448871e-05, + "loss": 0.5115, + "step": 1356 + }, + { + "epoch": 0.037259747391543106, + "grad_norm": 0.3940508961677551, + "learning_rate": 1.9985290566881316e-05, + "loss": 0.5252, + "step": 1357 + }, + { + "epoch": 0.03728720483250961, + "grad_norm": 0.4166024923324585, + "learning_rate": 1.9985267140648174e-05, + "loss": 0.5449, + "step": 1358 + }, + { + "epoch": 0.037314662273476115, + "grad_norm": 0.3663184642791748, + "learning_rate": 1.9985243695789316e-05, + "loss": 0.5512, + "step": 1359 + }, + { + "epoch": 0.037342119714442616, + "grad_norm": 0.3589610457420349, + "learning_rate": 1.9985220232304794e-05, + "loss": 0.569, + "step": 1360 + }, + { + "epoch": 0.03736957715540912, + "grad_norm": 0.3511161804199219, + "learning_rate": 1.9985196750194647e-05, + "loss": 0.505, + "step": 1361 + }, + { + "epoch": 0.03739703459637562, + "grad_norm": 0.36088645458221436, + "learning_rate": 1.9985173249458924e-05, + "loss": 0.6174, + "step": 1362 + }, + { + "epoch": 0.03742449203734212, + "grad_norm": 0.4510972797870636, + "learning_rate": 1.9985149730097664e-05, + "loss": 0.6422, + "step": 1363 + }, + { + "epoch": 0.03745194947830862, + "grad_norm": 0.36744189262390137, + "learning_rate": 1.9985126192110912e-05, + "loss": 0.5352, + "step": 1364 + }, + { + "epoch": 0.03747940691927512, + "grad_norm": 0.3571881055831909, + "learning_rate": 1.9985102635498715e-05, + "loss": 0.5262, + "step": 1365 + }, + { + "epoch": 0.03750686436024162, + "grad_norm": 0.37280380725860596, + "learning_rate": 1.9985079060261118e-05, + "loss": 0.5597, + "step": 1366 + }, + { + "epoch": 0.03753432180120813, + "grad_norm": 0.3582775294780731, + "learning_rate": 1.9985055466398157e-05, + "loss": 0.608, + "step": 1367 + }, + { + "epoch": 0.03756177924217463, + "grad_norm": 0.4250354468822479, + "learning_rate": 1.998503185390988e-05, + "loss": 0.6902, + "step": 1368 + }, + { + "epoch": 0.03758923668314113, + "grad_norm": 0.426248162984848, + "learning_rate": 1.9985008222796334e-05, + "loss": 0.6321, + "step": 1369 + }, + { + "epoch": 0.03761669412410763, + "grad_norm": 0.40823084115982056, + "learning_rate": 1.998498457305756e-05, + "loss": 0.5644, + "step": 1370 + }, + { + "epoch": 0.037644151565074134, + "grad_norm": 0.35153359174728394, + "learning_rate": 1.9984960904693604e-05, + "loss": 0.5109, + "step": 1371 + }, + { + "epoch": 0.037671609006040635, + "grad_norm": 0.36713558435440063, + "learning_rate": 1.9984937217704507e-05, + "loss": 0.5587, + "step": 1372 + }, + { + "epoch": 0.037699066447007136, + "grad_norm": 0.34770628809928894, + "learning_rate": 1.9984913512090317e-05, + "loss": 0.4984, + "step": 1373 + }, + { + "epoch": 0.037726523887973644, + "grad_norm": 0.38485515117645264, + "learning_rate": 1.9984889787851073e-05, + "loss": 0.5534, + "step": 1374 + }, + { + "epoch": 0.037753981328940145, + "grad_norm": 0.4647962749004364, + "learning_rate": 1.9984866044986827e-05, + "loss": 0.5523, + "step": 1375 + }, + { + "epoch": 0.037781438769906646, + "grad_norm": 0.5933514833450317, + "learning_rate": 1.9984842283497618e-05, + "loss": 0.5667, + "step": 1376 + }, + { + "epoch": 0.03780889621087315, + "grad_norm": 0.3392966389656067, + "learning_rate": 1.9984818503383493e-05, + "loss": 0.5417, + "step": 1377 + }, + { + "epoch": 0.03783635365183965, + "grad_norm": 0.350118488073349, + "learning_rate": 1.998479470464449e-05, + "loss": 0.5642, + "step": 1378 + }, + { + "epoch": 0.03786381109280615, + "grad_norm": 0.3626915514469147, + "learning_rate": 1.9984770887280663e-05, + "loss": 0.6028, + "step": 1379 + }, + { + "epoch": 0.03789126853377265, + "grad_norm": 0.4075717628002167, + "learning_rate": 1.9984747051292047e-05, + "loss": 0.6407, + "step": 1380 + }, + { + "epoch": 0.03791872597473915, + "grad_norm": 0.37196436524391174, + "learning_rate": 1.9984723196678694e-05, + "loss": 0.5713, + "step": 1381 + }, + { + "epoch": 0.03794618341570566, + "grad_norm": 0.33494457602500916, + "learning_rate": 1.9984699323440645e-05, + "loss": 0.55, + "step": 1382 + }, + { + "epoch": 0.03797364085667216, + "grad_norm": 0.3736448585987091, + "learning_rate": 1.9984675431577944e-05, + "loss": 0.4671, + "step": 1383 + }, + { + "epoch": 0.03800109829763866, + "grad_norm": 0.3956364095211029, + "learning_rate": 1.9984651521090635e-05, + "loss": 0.6346, + "step": 1384 + }, + { + "epoch": 0.03802855573860516, + "grad_norm": 0.41002312302589417, + "learning_rate": 1.998462759197877e-05, + "loss": 0.578, + "step": 1385 + }, + { + "epoch": 0.03805601317957166, + "grad_norm": 0.33643418550491333, + "learning_rate": 1.9984603644242384e-05, + "loss": 0.5922, + "step": 1386 + }, + { + "epoch": 0.038083470620538164, + "grad_norm": 0.38798728585243225, + "learning_rate": 1.9984579677881526e-05, + "loss": 0.6263, + "step": 1387 + }, + { + "epoch": 0.038110928061504665, + "grad_norm": 0.36448708176612854, + "learning_rate": 1.998455569289624e-05, + "loss": 0.5796, + "step": 1388 + }, + { + "epoch": 0.03813838550247117, + "grad_norm": 0.3806496262550354, + "learning_rate": 1.998453168928657e-05, + "loss": 0.5845, + "step": 1389 + }, + { + "epoch": 0.038165842943437674, + "grad_norm": 0.45116952061653137, + "learning_rate": 1.9984507667052565e-05, + "loss": 0.5427, + "step": 1390 + }, + { + "epoch": 0.038193300384404175, + "grad_norm": 0.37620508670806885, + "learning_rate": 1.998448362619426e-05, + "loss": 0.5341, + "step": 1391 + }, + { + "epoch": 0.038220757825370676, + "grad_norm": 0.4355982840061188, + "learning_rate": 1.9984459566711713e-05, + "loss": 0.6131, + "step": 1392 + }, + { + "epoch": 0.03824821526633718, + "grad_norm": 0.40971219539642334, + "learning_rate": 1.998443548860496e-05, + "loss": 0.6067, + "step": 1393 + }, + { + "epoch": 0.03827567270730368, + "grad_norm": 0.9071754217147827, + "learning_rate": 1.9984411391874048e-05, + "loss": 0.5623, + "step": 1394 + }, + { + "epoch": 0.03830313014827018, + "grad_norm": 0.36216816306114197, + "learning_rate": 1.998438727651902e-05, + "loss": 0.5762, + "step": 1395 + }, + { + "epoch": 0.038330587589236687, + "grad_norm": 0.38394609093666077, + "learning_rate": 1.9984363142539928e-05, + "loss": 0.5427, + "step": 1396 + }, + { + "epoch": 0.03835804503020319, + "grad_norm": 0.37858355045318604, + "learning_rate": 1.998433898993681e-05, + "loss": 0.5558, + "step": 1397 + }, + { + "epoch": 0.03838550247116969, + "grad_norm": 0.3510121703147888, + "learning_rate": 1.998431481870971e-05, + "loss": 0.4667, + "step": 1398 + }, + { + "epoch": 0.03841295991213619, + "grad_norm": 0.3656271696090698, + "learning_rate": 1.998429062885868e-05, + "loss": 0.5903, + "step": 1399 + }, + { + "epoch": 0.03844041735310269, + "grad_norm": 0.3702053129673004, + "learning_rate": 1.9984266420383758e-05, + "loss": 0.5269, + "step": 1400 + }, + { + "epoch": 0.03846787479406919, + "grad_norm": 0.3782820999622345, + "learning_rate": 1.9984242193284996e-05, + "loss": 0.6312, + "step": 1401 + }, + { + "epoch": 0.03849533223503569, + "grad_norm": 0.342664510011673, + "learning_rate": 1.9984217947562433e-05, + "loss": 0.6699, + "step": 1402 + }, + { + "epoch": 0.03852278967600219, + "grad_norm": 0.3822386860847473, + "learning_rate": 1.9984193683216116e-05, + "loss": 0.5265, + "step": 1403 + }, + { + "epoch": 0.0385502471169687, + "grad_norm": 0.33213821053504944, + "learning_rate": 1.9984169400246096e-05, + "loss": 0.5421, + "step": 1404 + }, + { + "epoch": 0.0385777045579352, + "grad_norm": 0.39297229051589966, + "learning_rate": 1.9984145098652413e-05, + "loss": 0.5874, + "step": 1405 + }, + { + "epoch": 0.0386051619989017, + "grad_norm": 0.38491353392601013, + "learning_rate": 1.998412077843511e-05, + "loss": 0.575, + "step": 1406 + }, + { + "epoch": 0.038632619439868204, + "grad_norm": 0.3379661738872528, + "learning_rate": 1.9984096439594233e-05, + "loss": 0.5119, + "step": 1407 + }, + { + "epoch": 0.038660076880834705, + "grad_norm": 0.37386050820350647, + "learning_rate": 1.9984072082129833e-05, + "loss": 0.564, + "step": 1408 + }, + { + "epoch": 0.038687534321801206, + "grad_norm": 0.3413480520248413, + "learning_rate": 1.998404770604195e-05, + "loss": 0.553, + "step": 1409 + }, + { + "epoch": 0.03871499176276771, + "grad_norm": 0.3351229429244995, + "learning_rate": 1.9984023311330636e-05, + "loss": 0.6059, + "step": 1410 + }, + { + "epoch": 0.038742449203734215, + "grad_norm": 0.4778900444507599, + "learning_rate": 1.998399889799593e-05, + "loss": 0.5741, + "step": 1411 + }, + { + "epoch": 0.038769906644700716, + "grad_norm": 0.3922230303287506, + "learning_rate": 1.9983974466037876e-05, + "loss": 0.5676, + "step": 1412 + }, + { + "epoch": 0.03879736408566722, + "grad_norm": 0.39225998520851135, + "learning_rate": 1.9983950015456525e-05, + "loss": 0.5706, + "step": 1413 + }, + { + "epoch": 0.03882482152663372, + "grad_norm": 0.35159313678741455, + "learning_rate": 1.9983925546251922e-05, + "loss": 0.4788, + "step": 1414 + }, + { + "epoch": 0.03885227896760022, + "grad_norm": 0.7194771766662598, + "learning_rate": 1.998390105842411e-05, + "loss": 0.5789, + "step": 1415 + }, + { + "epoch": 0.03887973640856672, + "grad_norm": 0.39846062660217285, + "learning_rate": 1.9983876551973138e-05, + "loss": 0.5799, + "step": 1416 + }, + { + "epoch": 0.03890719384953322, + "grad_norm": 0.40458840131759644, + "learning_rate": 1.9983852026899048e-05, + "loss": 0.619, + "step": 1417 + }, + { + "epoch": 0.03893465129049972, + "grad_norm": 0.36823511123657227, + "learning_rate": 1.998382748320189e-05, + "loss": 0.5731, + "step": 1418 + }, + { + "epoch": 0.03896210873146623, + "grad_norm": 0.36196476221084595, + "learning_rate": 1.9983802920881703e-05, + "loss": 0.5065, + "step": 1419 + }, + { + "epoch": 0.03898956617243273, + "grad_norm": 0.36814600229263306, + "learning_rate": 1.998377833993854e-05, + "loss": 0.6741, + "step": 1420 + }, + { + "epoch": 0.03901702361339923, + "grad_norm": 0.42277851700782776, + "learning_rate": 1.9983753740372442e-05, + "loss": 0.564, + "step": 1421 + }, + { + "epoch": 0.03904448105436573, + "grad_norm": 0.3448120951652527, + "learning_rate": 1.9983729122183458e-05, + "loss": 0.5764, + "step": 1422 + }, + { + "epoch": 0.039071938495332234, + "grad_norm": 0.33831626176834106, + "learning_rate": 1.9983704485371635e-05, + "loss": 0.5491, + "step": 1423 + }, + { + "epoch": 0.039099395936298735, + "grad_norm": 0.39261531829833984, + "learning_rate": 1.9983679829937013e-05, + "loss": 0.6168, + "step": 1424 + }, + { + "epoch": 0.039126853377265236, + "grad_norm": 0.37276944518089294, + "learning_rate": 1.9983655155879643e-05, + "loss": 0.5465, + "step": 1425 + }, + { + "epoch": 0.039154310818231744, + "grad_norm": 0.39492225646972656, + "learning_rate": 1.9983630463199572e-05, + "loss": 0.6269, + "step": 1426 + }, + { + "epoch": 0.039181768259198245, + "grad_norm": 0.35296735167503357, + "learning_rate": 1.998360575189684e-05, + "loss": 0.4761, + "step": 1427 + }, + { + "epoch": 0.039209225700164746, + "grad_norm": 0.402790367603302, + "learning_rate": 1.99835810219715e-05, + "loss": 0.633, + "step": 1428 + }, + { + "epoch": 0.03923668314113125, + "grad_norm": 0.3851383924484253, + "learning_rate": 1.9983556273423593e-05, + "loss": 0.5423, + "step": 1429 + }, + { + "epoch": 0.03926414058209775, + "grad_norm": 0.36005282402038574, + "learning_rate": 1.9983531506253165e-05, + "loss": 0.5059, + "step": 1430 + }, + { + "epoch": 0.03929159802306425, + "grad_norm": 0.35508689284324646, + "learning_rate": 1.9983506720460268e-05, + "loss": 0.5506, + "step": 1431 + }, + { + "epoch": 0.03931905546403075, + "grad_norm": 0.3937515914440155, + "learning_rate": 1.9983481916044947e-05, + "loss": 0.6406, + "step": 1432 + }, + { + "epoch": 0.03934651290499725, + "grad_norm": 0.35777997970581055, + "learning_rate": 1.998345709300724e-05, + "loss": 0.6165, + "step": 1433 + }, + { + "epoch": 0.03937397034596376, + "grad_norm": 0.42319774627685547, + "learning_rate": 1.9983432251347205e-05, + "loss": 0.5883, + "step": 1434 + }, + { + "epoch": 0.03940142778693026, + "grad_norm": 0.37995222210884094, + "learning_rate": 1.9983407391064878e-05, + "loss": 0.5542, + "step": 1435 + }, + { + "epoch": 0.03942888522789676, + "grad_norm": 0.392892062664032, + "learning_rate": 1.998338251216031e-05, + "loss": 0.6081, + "step": 1436 + }, + { + "epoch": 0.03945634266886326, + "grad_norm": 0.4020610451698303, + "learning_rate": 1.9983357614633546e-05, + "loss": 0.5607, + "step": 1437 + }, + { + "epoch": 0.03948380010982976, + "grad_norm": 0.4491632580757141, + "learning_rate": 1.998333269848464e-05, + "loss": 0.5524, + "step": 1438 + }, + { + "epoch": 0.039511257550796264, + "grad_norm": 0.31460535526275635, + "learning_rate": 1.9983307763713628e-05, + "loss": 0.5417, + "step": 1439 + }, + { + "epoch": 0.039538714991762765, + "grad_norm": 0.3834148049354553, + "learning_rate": 1.998328281032056e-05, + "loss": 0.5629, + "step": 1440 + }, + { + "epoch": 0.03956617243272927, + "grad_norm": 0.4132012128829956, + "learning_rate": 1.9983257838305487e-05, + "loss": 0.6206, + "step": 1441 + }, + { + "epoch": 0.039593629873695774, + "grad_norm": 0.389197438955307, + "learning_rate": 1.9983232847668445e-05, + "loss": 0.5619, + "step": 1442 + }, + { + "epoch": 0.039621087314662275, + "grad_norm": 0.3745328187942505, + "learning_rate": 1.9983207838409492e-05, + "loss": 0.5434, + "step": 1443 + }, + { + "epoch": 0.039648544755628776, + "grad_norm": 0.4033775329589844, + "learning_rate": 1.9983182810528673e-05, + "loss": 0.5154, + "step": 1444 + }, + { + "epoch": 0.03967600219659528, + "grad_norm": 0.452808678150177, + "learning_rate": 1.9983157764026027e-05, + "loss": 0.5891, + "step": 1445 + }, + { + "epoch": 0.03970345963756178, + "grad_norm": 0.399745911359787, + "learning_rate": 1.9983132698901606e-05, + "loss": 0.6336, + "step": 1446 + }, + { + "epoch": 0.03973091707852828, + "grad_norm": 0.40538227558135986, + "learning_rate": 1.998310761515546e-05, + "loss": 0.6422, + "step": 1447 + }, + { + "epoch": 0.03975837451949478, + "grad_norm": 0.36251363158226013, + "learning_rate": 1.9983082512787627e-05, + "loss": 0.5449, + "step": 1448 + }, + { + "epoch": 0.03978583196046129, + "grad_norm": 0.32668158411979675, + "learning_rate": 1.9983057391798164e-05, + "loss": 0.4774, + "step": 1449 + }, + { + "epoch": 0.03981328940142779, + "grad_norm": 0.3558714687824249, + "learning_rate": 1.998303225218711e-05, + "loss": 0.5731, + "step": 1450 + }, + { + "epoch": 0.03984074684239429, + "grad_norm": 0.35155555605888367, + "learning_rate": 1.9983007093954515e-05, + "loss": 0.582, + "step": 1451 + }, + { + "epoch": 0.03986820428336079, + "grad_norm": 0.46668800711631775, + "learning_rate": 1.9982981917100424e-05, + "loss": 0.5368, + "step": 1452 + }, + { + "epoch": 0.03989566172432729, + "grad_norm": 0.365637868642807, + "learning_rate": 1.9982956721624886e-05, + "loss": 0.4701, + "step": 1453 + }, + { + "epoch": 0.03992311916529379, + "grad_norm": 0.3650030791759491, + "learning_rate": 1.998293150752795e-05, + "loss": 0.5268, + "step": 1454 + }, + { + "epoch": 0.039950576606260293, + "grad_norm": 0.33889004588127136, + "learning_rate": 1.998290627480966e-05, + "loss": 0.5089, + "step": 1455 + }, + { + "epoch": 0.0399780340472268, + "grad_norm": 0.501200258731842, + "learning_rate": 1.998288102347006e-05, + "loss": 0.5543, + "step": 1456 + }, + { + "epoch": 0.0400054914881933, + "grad_norm": 0.3795984089374542, + "learning_rate": 1.9982855753509205e-05, + "loss": 0.5571, + "step": 1457 + }, + { + "epoch": 0.0400329489291598, + "grad_norm": 0.3601495921611786, + "learning_rate": 1.9982830464927135e-05, + "loss": 0.5648, + "step": 1458 + }, + { + "epoch": 0.040060406370126304, + "grad_norm": 0.37256380915641785, + "learning_rate": 1.9982805157723903e-05, + "loss": 0.5725, + "step": 1459 + }, + { + "epoch": 0.040087863811092805, + "grad_norm": 0.4040667414665222, + "learning_rate": 1.998277983189955e-05, + "loss": 0.5779, + "step": 1460 + }, + { + "epoch": 0.040115321252059306, + "grad_norm": 0.4205073416233063, + "learning_rate": 1.9982754487454124e-05, + "loss": 0.5996, + "step": 1461 + }, + { + "epoch": 0.04014277869302581, + "grad_norm": 0.3432098925113678, + "learning_rate": 1.998272912438768e-05, + "loss": 0.4901, + "step": 1462 + }, + { + "epoch": 0.040170236133992315, + "grad_norm": 0.44499748945236206, + "learning_rate": 1.998270374270026e-05, + "loss": 0.6179, + "step": 1463 + }, + { + "epoch": 0.040197693574958816, + "grad_norm": 0.4408318102359772, + "learning_rate": 1.998267834239191e-05, + "loss": 0.6108, + "step": 1464 + }, + { + "epoch": 0.04022515101592532, + "grad_norm": 0.3823871314525604, + "learning_rate": 1.9982652923462678e-05, + "loss": 0.6101, + "step": 1465 + }, + { + "epoch": 0.04025260845689182, + "grad_norm": 0.47863680124282837, + "learning_rate": 1.9982627485912615e-05, + "loss": 0.6014, + "step": 1466 + }, + { + "epoch": 0.04028006589785832, + "grad_norm": 0.3763941824436188, + "learning_rate": 1.9982602029741762e-05, + "loss": 0.5441, + "step": 1467 + }, + { + "epoch": 0.04030752333882482, + "grad_norm": 0.41168203949928284, + "learning_rate": 1.998257655495017e-05, + "loss": 0.6304, + "step": 1468 + }, + { + "epoch": 0.04033498077979132, + "grad_norm": 0.5156595706939697, + "learning_rate": 1.9982551061537892e-05, + "loss": 0.6214, + "step": 1469 + }, + { + "epoch": 0.04036243822075782, + "grad_norm": 0.36858677864074707, + "learning_rate": 1.9982525549504966e-05, + "loss": 0.5463, + "step": 1470 + }, + { + "epoch": 0.04038989566172433, + "grad_norm": 0.35927948355674744, + "learning_rate": 1.9982500018851444e-05, + "loss": 0.6117, + "step": 1471 + }, + { + "epoch": 0.04041735310269083, + "grad_norm": 0.3643840253353119, + "learning_rate": 1.9982474469577373e-05, + "loss": 0.4597, + "step": 1472 + }, + { + "epoch": 0.04044481054365733, + "grad_norm": 0.40708377957344055, + "learning_rate": 1.9982448901682803e-05, + "loss": 0.5554, + "step": 1473 + }, + { + "epoch": 0.04047226798462383, + "grad_norm": 0.3784312307834625, + "learning_rate": 1.998242331516778e-05, + "loss": 0.6176, + "step": 1474 + }, + { + "epoch": 0.040499725425590334, + "grad_norm": 0.5704900026321411, + "learning_rate": 1.998239771003235e-05, + "loss": 0.5954, + "step": 1475 + }, + { + "epoch": 0.040527182866556835, + "grad_norm": 0.39144372940063477, + "learning_rate": 1.9982372086276564e-05, + "loss": 0.5635, + "step": 1476 + }, + { + "epoch": 0.040554640307523336, + "grad_norm": 0.3544127941131592, + "learning_rate": 1.9982346443900463e-05, + "loss": 0.5578, + "step": 1477 + }, + { + "epoch": 0.040582097748489844, + "grad_norm": 0.36313650012016296, + "learning_rate": 1.9982320782904105e-05, + "loss": 0.4876, + "step": 1478 + }, + { + "epoch": 0.040609555189456345, + "grad_norm": 0.39067843556404114, + "learning_rate": 1.998229510328753e-05, + "loss": 0.5741, + "step": 1479 + }, + { + "epoch": 0.040637012630422846, + "grad_norm": 0.35563233494758606, + "learning_rate": 1.998226940505079e-05, + "loss": 0.5783, + "step": 1480 + }, + { + "epoch": 0.04066447007138935, + "grad_norm": 0.37560442090034485, + "learning_rate": 1.9982243688193935e-05, + "loss": 0.5279, + "step": 1481 + }, + { + "epoch": 0.04069192751235585, + "grad_norm": 0.35053735971450806, + "learning_rate": 1.9982217952717004e-05, + "loss": 0.5484, + "step": 1482 + }, + { + "epoch": 0.04071938495332235, + "grad_norm": 0.37565770745277405, + "learning_rate": 1.9982192198620052e-05, + "loss": 0.5872, + "step": 1483 + }, + { + "epoch": 0.04074684239428885, + "grad_norm": 0.4358329474925995, + "learning_rate": 1.9982166425903128e-05, + "loss": 0.4688, + "step": 1484 + }, + { + "epoch": 0.04077429983525535, + "grad_norm": 0.39445623755455017, + "learning_rate": 1.9982140634566275e-05, + "loss": 0.5895, + "step": 1485 + }, + { + "epoch": 0.04080175727622186, + "grad_norm": 0.39675506949424744, + "learning_rate": 1.9982114824609544e-05, + "loss": 0.5763, + "step": 1486 + }, + { + "epoch": 0.04082921471718836, + "grad_norm": 0.3610275685787201, + "learning_rate": 1.9982088996032982e-05, + "loss": 0.5862, + "step": 1487 + }, + { + "epoch": 0.04085667215815486, + "grad_norm": 0.44647669792175293, + "learning_rate": 1.998206314883664e-05, + "loss": 0.5574, + "step": 1488 + }, + { + "epoch": 0.04088412959912136, + "grad_norm": 0.36456337571144104, + "learning_rate": 1.9982037283020566e-05, + "loss": 0.584, + "step": 1489 + }, + { + "epoch": 0.04091158704008786, + "grad_norm": 0.3951393961906433, + "learning_rate": 1.9982011398584804e-05, + "loss": 0.5615, + "step": 1490 + }, + { + "epoch": 0.040939044481054364, + "grad_norm": 0.3587546944618225, + "learning_rate": 1.9981985495529407e-05, + "loss": 0.5145, + "step": 1491 + }, + { + "epoch": 0.040966501922020865, + "grad_norm": 0.36884433031082153, + "learning_rate": 1.9981959573854417e-05, + "loss": 0.5559, + "step": 1492 + }, + { + "epoch": 0.04099395936298737, + "grad_norm": 0.3929798901081085, + "learning_rate": 1.9981933633559892e-05, + "loss": 0.5849, + "step": 1493 + }, + { + "epoch": 0.041021416803953874, + "grad_norm": 0.40386855602264404, + "learning_rate": 1.998190767464587e-05, + "loss": 0.5762, + "step": 1494 + }, + { + "epoch": 0.041048874244920375, + "grad_norm": 0.377131849527359, + "learning_rate": 1.9981881697112406e-05, + "loss": 0.6043, + "step": 1495 + }, + { + "epoch": 0.041076331685886876, + "grad_norm": 0.40541398525238037, + "learning_rate": 1.998185570095955e-05, + "loss": 0.609, + "step": 1496 + }, + { + "epoch": 0.04110378912685338, + "grad_norm": 0.3475559651851654, + "learning_rate": 1.9981829686187342e-05, + "loss": 0.6455, + "step": 1497 + }, + { + "epoch": 0.04113124656781988, + "grad_norm": 0.38848477602005005, + "learning_rate": 1.9981803652795838e-05, + "loss": 0.6266, + "step": 1498 + }, + { + "epoch": 0.04115870400878638, + "grad_norm": 0.38136783242225647, + "learning_rate": 1.9981777600785083e-05, + "loss": 0.6295, + "step": 1499 + }, + { + "epoch": 0.04118616144975288, + "grad_norm": 0.448990136384964, + "learning_rate": 1.998175153015513e-05, + "loss": 0.7197, + "step": 1500 + }, + { + "epoch": 0.04121361889071939, + "grad_norm": 0.3774144947528839, + "learning_rate": 1.9981725440906023e-05, + "loss": 0.565, + "step": 1501 + }, + { + "epoch": 0.04124107633168589, + "grad_norm": 0.3946588635444641, + "learning_rate": 1.9981699333037813e-05, + "loss": 0.5605, + "step": 1502 + }, + { + "epoch": 0.04126853377265239, + "grad_norm": 0.35287272930145264, + "learning_rate": 1.9981673206550548e-05, + "loss": 0.5308, + "step": 1503 + }, + { + "epoch": 0.04129599121361889, + "grad_norm": 0.36341220140457153, + "learning_rate": 1.9981647061444275e-05, + "loss": 0.5189, + "step": 1504 + }, + { + "epoch": 0.04132344865458539, + "grad_norm": 0.4458792507648468, + "learning_rate": 1.9981620897719046e-05, + "loss": 0.5548, + "step": 1505 + }, + { + "epoch": 0.04135090609555189, + "grad_norm": 0.35031858086586, + "learning_rate": 1.998159471537491e-05, + "loss": 0.5912, + "step": 1506 + }, + { + "epoch": 0.041378363536518394, + "grad_norm": 0.37241825461387634, + "learning_rate": 1.998156851441191e-05, + "loss": 0.5701, + "step": 1507 + }, + { + "epoch": 0.0414058209774849, + "grad_norm": 0.34442421793937683, + "learning_rate": 1.9981542294830102e-05, + "loss": 0.5853, + "step": 1508 + }, + { + "epoch": 0.0414332784184514, + "grad_norm": 0.3662140667438507, + "learning_rate": 1.9981516056629528e-05, + "loss": 0.5103, + "step": 1509 + }, + { + "epoch": 0.0414607358594179, + "grad_norm": 0.3637961745262146, + "learning_rate": 1.9981489799810245e-05, + "loss": 0.5085, + "step": 1510 + }, + { + "epoch": 0.041488193300384404, + "grad_norm": 0.4506815969944, + "learning_rate": 1.9981463524372294e-05, + "loss": 0.5686, + "step": 1511 + }, + { + "epoch": 0.041515650741350905, + "grad_norm": 0.3992023169994354, + "learning_rate": 1.9981437230315732e-05, + "loss": 0.5688, + "step": 1512 + }, + { + "epoch": 0.041543108182317406, + "grad_norm": 0.4627644121646881, + "learning_rate": 1.9981410917640603e-05, + "loss": 0.5711, + "step": 1513 + }, + { + "epoch": 0.04157056562328391, + "grad_norm": 0.4091407358646393, + "learning_rate": 1.9981384586346958e-05, + "loss": 0.6807, + "step": 1514 + }, + { + "epoch": 0.04159802306425041, + "grad_norm": 0.36994150280952454, + "learning_rate": 1.998135823643484e-05, + "loss": 0.6075, + "step": 1515 + }, + { + "epoch": 0.041625480505216916, + "grad_norm": 0.4173072576522827, + "learning_rate": 1.998133186790431e-05, + "loss": 0.6394, + "step": 1516 + }, + { + "epoch": 0.04165293794618342, + "grad_norm": 0.41299736499786377, + "learning_rate": 1.9981305480755404e-05, + "loss": 0.6187, + "step": 1517 + }, + { + "epoch": 0.04168039538714992, + "grad_norm": 0.35795843601226807, + "learning_rate": 1.998127907498818e-05, + "loss": 0.6062, + "step": 1518 + }, + { + "epoch": 0.04170785282811642, + "grad_norm": 0.3957805931568146, + "learning_rate": 1.9981252650602686e-05, + "loss": 0.5766, + "step": 1519 + }, + { + "epoch": 0.04173531026908292, + "grad_norm": 0.38808673620224, + "learning_rate": 1.998122620759897e-05, + "loss": 0.5527, + "step": 1520 + }, + { + "epoch": 0.04176276771004942, + "grad_norm": 0.3621934652328491, + "learning_rate": 1.998119974597708e-05, + "loss": 0.4586, + "step": 1521 + }, + { + "epoch": 0.04179022515101592, + "grad_norm": 0.402288556098938, + "learning_rate": 1.9981173265737065e-05, + "loss": 0.6552, + "step": 1522 + }, + { + "epoch": 0.04181768259198243, + "grad_norm": 0.41721636056900024, + "learning_rate": 1.9981146766878982e-05, + "loss": 0.6612, + "step": 1523 + }, + { + "epoch": 0.04184514003294893, + "grad_norm": 0.6894505620002747, + "learning_rate": 1.9981120249402874e-05, + "loss": 0.5079, + "step": 1524 + }, + { + "epoch": 0.04187259747391543, + "grad_norm": 0.3658975064754486, + "learning_rate": 1.9981093713308785e-05, + "loss": 0.5527, + "step": 1525 + }, + { + "epoch": 0.04190005491488193, + "grad_norm": 0.3439442813396454, + "learning_rate": 1.9981067158596772e-05, + "loss": 0.4958, + "step": 1526 + }, + { + "epoch": 0.041927512355848434, + "grad_norm": 0.4003651440143585, + "learning_rate": 1.9981040585266888e-05, + "loss": 0.5667, + "step": 1527 + }, + { + "epoch": 0.041954969796814935, + "grad_norm": 0.35332822799682617, + "learning_rate": 1.9981013993319176e-05, + "loss": 0.5368, + "step": 1528 + }, + { + "epoch": 0.041982427237781436, + "grad_norm": 0.4027746617794037, + "learning_rate": 1.9980987382753686e-05, + "loss": 0.6459, + "step": 1529 + }, + { + "epoch": 0.042009884678747944, + "grad_norm": 0.3974721431732178, + "learning_rate": 1.998096075357047e-05, + "loss": 0.6098, + "step": 1530 + }, + { + "epoch": 0.042037342119714445, + "grad_norm": 0.3503240942955017, + "learning_rate": 1.9980934105769577e-05, + "loss": 0.4317, + "step": 1531 + }, + { + "epoch": 0.042064799560680946, + "grad_norm": 0.35331302881240845, + "learning_rate": 1.9980907439351053e-05, + "loss": 0.6204, + "step": 1532 + }, + { + "epoch": 0.04209225700164745, + "grad_norm": 0.3674430549144745, + "learning_rate": 1.9980880754314955e-05, + "loss": 0.6133, + "step": 1533 + }, + { + "epoch": 0.04211971444261395, + "grad_norm": 0.38715028762817383, + "learning_rate": 1.9980854050661326e-05, + "loss": 0.5739, + "step": 1534 + }, + { + "epoch": 0.04214717188358045, + "grad_norm": 0.4007578492164612, + "learning_rate": 1.9980827328390222e-05, + "loss": 0.577, + "step": 1535 + }, + { + "epoch": 0.04217462932454695, + "grad_norm": 0.37029266357421875, + "learning_rate": 1.9980800587501686e-05, + "loss": 0.7246, + "step": 1536 + }, + { + "epoch": 0.04220208676551345, + "grad_norm": 0.40150314569473267, + "learning_rate": 1.9980773827995773e-05, + "loss": 0.6511, + "step": 1537 + }, + { + "epoch": 0.04222954420647996, + "grad_norm": 0.370368629693985, + "learning_rate": 1.998074704987253e-05, + "loss": 0.581, + "step": 1538 + }, + { + "epoch": 0.04225700164744646, + "grad_norm": 0.3493688106536865, + "learning_rate": 1.998072025313201e-05, + "loss": 0.5749, + "step": 1539 + }, + { + "epoch": 0.04228445908841296, + "grad_norm": 0.35625311732292175, + "learning_rate": 1.9980693437774265e-05, + "loss": 0.4805, + "step": 1540 + }, + { + "epoch": 0.04231191652937946, + "grad_norm": 0.39882004261016846, + "learning_rate": 1.9980666603799336e-05, + "loss": 0.6301, + "step": 1541 + }, + { + "epoch": 0.04233937397034596, + "grad_norm": 0.3345077931880951, + "learning_rate": 1.998063975120728e-05, + "loss": 0.4452, + "step": 1542 + }, + { + "epoch": 0.042366831411312464, + "grad_norm": 0.3945041000843048, + "learning_rate": 1.9980612879998144e-05, + "loss": 0.5425, + "step": 1543 + }, + { + "epoch": 0.042394288852278965, + "grad_norm": 0.3532535433769226, + "learning_rate": 1.998058599017198e-05, + "loss": 0.569, + "step": 1544 + }, + { + "epoch": 0.04242174629324547, + "grad_norm": 0.37374332547187805, + "learning_rate": 1.9980559081728838e-05, + "loss": 0.5216, + "step": 1545 + }, + { + "epoch": 0.042449203734211974, + "grad_norm": 0.35263243317604065, + "learning_rate": 1.998053215466877e-05, + "loss": 0.5482, + "step": 1546 + }, + { + "epoch": 0.042476661175178475, + "grad_norm": 0.35178399085998535, + "learning_rate": 1.998050520899182e-05, + "loss": 0.5585, + "step": 1547 + }, + { + "epoch": 0.042504118616144976, + "grad_norm": 0.47210147976875305, + "learning_rate": 1.9980478244698046e-05, + "loss": 0.5626, + "step": 1548 + }, + { + "epoch": 0.04253157605711148, + "grad_norm": 0.3967672288417816, + "learning_rate": 1.998045126178749e-05, + "loss": 0.5033, + "step": 1549 + }, + { + "epoch": 0.04255903349807798, + "grad_norm": 0.36692968010902405, + "learning_rate": 1.9980424260260212e-05, + "loss": 0.6623, + "step": 1550 + }, + { + "epoch": 0.04258649093904448, + "grad_norm": 0.3726944923400879, + "learning_rate": 1.9980397240116257e-05, + "loss": 0.5681, + "step": 1551 + }, + { + "epoch": 0.04261394838001098, + "grad_norm": 0.45657438039779663, + "learning_rate": 1.9980370201355673e-05, + "loss": 0.5627, + "step": 1552 + }, + { + "epoch": 0.04264140582097749, + "grad_norm": 0.4005999267101288, + "learning_rate": 1.9980343143978515e-05, + "loss": 0.5323, + "step": 1553 + }, + { + "epoch": 0.04266886326194399, + "grad_norm": 0.3591587543487549, + "learning_rate": 1.9980316067984832e-05, + "loss": 0.5962, + "step": 1554 + }, + { + "epoch": 0.04269632070291049, + "grad_norm": 0.36159202456474304, + "learning_rate": 1.9980288973374674e-05, + "loss": 0.5563, + "step": 1555 + }, + { + "epoch": 0.04272377814387699, + "grad_norm": 0.48796606063842773, + "learning_rate": 1.998026186014809e-05, + "loss": 0.5487, + "step": 1556 + }, + { + "epoch": 0.04275123558484349, + "grad_norm": 0.3308041989803314, + "learning_rate": 1.9980234728305134e-05, + "loss": 0.5664, + "step": 1557 + }, + { + "epoch": 0.04277869302580999, + "grad_norm": 0.3321518301963806, + "learning_rate": 1.9980207577845854e-05, + "loss": 0.4775, + "step": 1558 + }, + { + "epoch": 0.042806150466776494, + "grad_norm": 0.4029240608215332, + "learning_rate": 1.99801804087703e-05, + "loss": 0.5318, + "step": 1559 + }, + { + "epoch": 0.042833607907743, + "grad_norm": 0.3778943419456482, + "learning_rate": 1.9980153221078527e-05, + "loss": 0.5926, + "step": 1560 + }, + { + "epoch": 0.0428610653487095, + "grad_norm": 0.4445226490497589, + "learning_rate": 1.998012601477058e-05, + "loss": 0.5553, + "step": 1561 + }, + { + "epoch": 0.042888522789676004, + "grad_norm": 0.3678400218486786, + "learning_rate": 1.9980098789846517e-05, + "loss": 0.5408, + "step": 1562 + }, + { + "epoch": 0.042915980230642505, + "grad_norm": 1.170819640159607, + "learning_rate": 1.998007154630638e-05, + "loss": 0.6125, + "step": 1563 + }, + { + "epoch": 0.042943437671609005, + "grad_norm": 0.3932032883167267, + "learning_rate": 1.998004428415023e-05, + "loss": 0.546, + "step": 1564 + }, + { + "epoch": 0.042970895112575506, + "grad_norm": 0.3799107074737549, + "learning_rate": 1.9980017003378108e-05, + "loss": 0.5912, + "step": 1565 + }, + { + "epoch": 0.04299835255354201, + "grad_norm": 0.3581136465072632, + "learning_rate": 1.997998970399007e-05, + "loss": 0.6049, + "step": 1566 + }, + { + "epoch": 0.04302580999450851, + "grad_norm": 0.3978174328804016, + "learning_rate": 1.9979962385986166e-05, + "loss": 0.5927, + "step": 1567 + }, + { + "epoch": 0.043053267435475016, + "grad_norm": 0.355208158493042, + "learning_rate": 1.9979935049366446e-05, + "loss": 0.5792, + "step": 1568 + }, + { + "epoch": 0.04308072487644152, + "grad_norm": 0.6855316758155823, + "learning_rate": 1.997990769413096e-05, + "loss": 0.5728, + "step": 1569 + }, + { + "epoch": 0.04310818231740802, + "grad_norm": 0.32909727096557617, + "learning_rate": 1.9979880320279766e-05, + "loss": 0.4973, + "step": 1570 + }, + { + "epoch": 0.04313563975837452, + "grad_norm": 0.3891008794307709, + "learning_rate": 1.9979852927812908e-05, + "loss": 0.5509, + "step": 1571 + }, + { + "epoch": 0.04316309719934102, + "grad_norm": 0.36977487802505493, + "learning_rate": 1.9979825516730437e-05, + "loss": 0.6191, + "step": 1572 + }, + { + "epoch": 0.04319055464030752, + "grad_norm": 0.4252360165119171, + "learning_rate": 1.997979808703241e-05, + "loss": 0.5767, + "step": 1573 + }, + { + "epoch": 0.04321801208127402, + "grad_norm": 0.3528222143650055, + "learning_rate": 1.997977063871887e-05, + "loss": 0.5655, + "step": 1574 + }, + { + "epoch": 0.04324546952224053, + "grad_norm": 0.35072705149650574, + "learning_rate": 1.9979743171789875e-05, + "loss": 0.4999, + "step": 1575 + }, + { + "epoch": 0.04327292696320703, + "grad_norm": 0.3999965190887451, + "learning_rate": 1.9979715686245473e-05, + "loss": 0.6472, + "step": 1576 + }, + { + "epoch": 0.04330038440417353, + "grad_norm": 0.4214687943458557, + "learning_rate": 1.9979688182085716e-05, + "loss": 0.5411, + "step": 1577 + }, + { + "epoch": 0.04332784184514003, + "grad_norm": 0.35870251059532166, + "learning_rate": 1.9979660659310658e-05, + "loss": 0.5393, + "step": 1578 + }, + { + "epoch": 0.043355299286106534, + "grad_norm": 0.3755820393562317, + "learning_rate": 1.9979633117920343e-05, + "loss": 0.5455, + "step": 1579 + }, + { + "epoch": 0.043382756727073035, + "grad_norm": 0.40620195865631104, + "learning_rate": 1.997960555791483e-05, + "loss": 0.6688, + "step": 1580 + }, + { + "epoch": 0.043410214168039536, + "grad_norm": 0.6106656193733215, + "learning_rate": 1.997957797929417e-05, + "loss": 0.5521, + "step": 1581 + }, + { + "epoch": 0.04343767160900604, + "grad_norm": 0.3323543071746826, + "learning_rate": 1.997955038205841e-05, + "loss": 0.5447, + "step": 1582 + }, + { + "epoch": 0.043465129049972545, + "grad_norm": 0.35335952043533325, + "learning_rate": 1.99795227662076e-05, + "loss": 0.4805, + "step": 1583 + }, + { + "epoch": 0.043492586490939046, + "grad_norm": 0.38506680727005005, + "learning_rate": 1.99794951317418e-05, + "loss": 0.6261, + "step": 1584 + }, + { + "epoch": 0.04352004393190555, + "grad_norm": 0.36565372347831726, + "learning_rate": 1.9979467478661053e-05, + "loss": 0.5383, + "step": 1585 + }, + { + "epoch": 0.04354750137287205, + "grad_norm": 0.3636868894100189, + "learning_rate": 1.9979439806965416e-05, + "loss": 0.5559, + "step": 1586 + }, + { + "epoch": 0.04357495881383855, + "grad_norm": 0.36844295263290405, + "learning_rate": 1.9979412116654936e-05, + "loss": 0.5711, + "step": 1587 + }, + { + "epoch": 0.04360241625480505, + "grad_norm": 0.39611735939979553, + "learning_rate": 1.997938440772967e-05, + "loss": 0.568, + "step": 1588 + }, + { + "epoch": 0.04362987369577155, + "grad_norm": 0.4090544581413269, + "learning_rate": 1.9979356680189666e-05, + "loss": 0.6429, + "step": 1589 + }, + { + "epoch": 0.04365733113673806, + "grad_norm": 0.3816700279712677, + "learning_rate": 1.9979328934034978e-05, + "loss": 0.5315, + "step": 1590 + }, + { + "epoch": 0.04368478857770456, + "grad_norm": 0.3633674681186676, + "learning_rate": 1.9979301169265656e-05, + "loss": 0.5525, + "step": 1591 + }, + { + "epoch": 0.04371224601867106, + "grad_norm": 0.367925763130188, + "learning_rate": 1.9979273385881753e-05, + "loss": 0.5735, + "step": 1592 + }, + { + "epoch": 0.04373970345963756, + "grad_norm": 0.4352996349334717, + "learning_rate": 1.997924558388332e-05, + "loss": 0.609, + "step": 1593 + }, + { + "epoch": 0.04376716090060406, + "grad_norm": 0.3500840961933136, + "learning_rate": 1.9979217763270408e-05, + "loss": 0.546, + "step": 1594 + }, + { + "epoch": 0.043794618341570564, + "grad_norm": 0.4347485601902008, + "learning_rate": 1.997918992404307e-05, + "loss": 0.6044, + "step": 1595 + }, + { + "epoch": 0.043822075782537065, + "grad_norm": 0.3432178497314453, + "learning_rate": 1.9979162066201357e-05, + "loss": 0.5072, + "step": 1596 + }, + { + "epoch": 0.04384953322350357, + "grad_norm": 0.41670942306518555, + "learning_rate": 1.997913418974532e-05, + "loss": 0.6063, + "step": 1597 + }, + { + "epoch": 0.043876990664470074, + "grad_norm": 0.35011470317840576, + "learning_rate": 1.9979106294675017e-05, + "loss": 0.492, + "step": 1598 + }, + { + "epoch": 0.043904448105436575, + "grad_norm": 0.4062155783176422, + "learning_rate": 1.9979078380990493e-05, + "loss": 0.6162, + "step": 1599 + }, + { + "epoch": 0.043931905546403076, + "grad_norm": 0.351869136095047, + "learning_rate": 1.9979050448691804e-05, + "loss": 0.6121, + "step": 1600 + }, + { + "epoch": 0.04395936298736958, + "grad_norm": 0.3907490074634552, + "learning_rate": 1.9979022497779003e-05, + "loss": 0.6211, + "step": 1601 + }, + { + "epoch": 0.04398682042833608, + "grad_norm": 0.4329334795475006, + "learning_rate": 1.9978994528252135e-05, + "loss": 0.6195, + "step": 1602 + }, + { + "epoch": 0.04401427786930258, + "grad_norm": 0.4141519069671631, + "learning_rate": 1.9978966540111264e-05, + "loss": 0.582, + "step": 1603 + }, + { + "epoch": 0.04404173531026908, + "grad_norm": 0.357405424118042, + "learning_rate": 1.997893853335643e-05, + "loss": 0.5466, + "step": 1604 + }, + { + "epoch": 0.04406919275123559, + "grad_norm": 0.35625141859054565, + "learning_rate": 1.997891050798769e-05, + "loss": 0.4719, + "step": 1605 + }, + { + "epoch": 0.04409665019220209, + "grad_norm": 0.37754884362220764, + "learning_rate": 1.99788824640051e-05, + "loss": 0.576, + "step": 1606 + }, + { + "epoch": 0.04412410763316859, + "grad_norm": 0.3458520472049713, + "learning_rate": 1.997885440140871e-05, + "loss": 0.5297, + "step": 1607 + }, + { + "epoch": 0.04415156507413509, + "grad_norm": 0.37040504813194275, + "learning_rate": 1.9978826320198573e-05, + "loss": 0.5742, + "step": 1608 + }, + { + "epoch": 0.04417902251510159, + "grad_norm": 0.42641469836235046, + "learning_rate": 1.9978798220374734e-05, + "loss": 0.6425, + "step": 1609 + }, + { + "epoch": 0.04420647995606809, + "grad_norm": 0.3348250091075897, + "learning_rate": 1.9978770101937255e-05, + "loss": 0.5658, + "step": 1610 + }, + { + "epoch": 0.044233937397034594, + "grad_norm": 0.37275129556655884, + "learning_rate": 1.9978741964886185e-05, + "loss": 0.6063, + "step": 1611 + }, + { + "epoch": 0.0442613948380011, + "grad_norm": 0.45644208788871765, + "learning_rate": 1.9978713809221577e-05, + "loss": 0.647, + "step": 1612 + }, + { + "epoch": 0.0442888522789676, + "grad_norm": 0.3386860191822052, + "learning_rate": 1.997868563494348e-05, + "loss": 0.5787, + "step": 1613 + }, + { + "epoch": 0.044316309719934104, + "grad_norm": 0.9339507222175598, + "learning_rate": 1.9978657442051955e-05, + "loss": 0.5359, + "step": 1614 + }, + { + "epoch": 0.044343767160900605, + "grad_norm": 0.4120176434516907, + "learning_rate": 1.9978629230547045e-05, + "loss": 0.529, + "step": 1615 + }, + { + "epoch": 0.044371224601867106, + "grad_norm": 0.39538586139678955, + "learning_rate": 1.9978601000428807e-05, + "loss": 0.5648, + "step": 1616 + }, + { + "epoch": 0.04439868204283361, + "grad_norm": 0.3849935233592987, + "learning_rate": 1.9978572751697294e-05, + "loss": 0.5375, + "step": 1617 + }, + { + "epoch": 0.04442613948380011, + "grad_norm": 0.39220142364501953, + "learning_rate": 1.9978544484352557e-05, + "loss": 0.6646, + "step": 1618 + }, + { + "epoch": 0.04445359692476661, + "grad_norm": 0.3432864546775818, + "learning_rate": 1.997851619839465e-05, + "loss": 0.5156, + "step": 1619 + }, + { + "epoch": 0.044481054365733116, + "grad_norm": 0.353623628616333, + "learning_rate": 1.9978487893823626e-05, + "loss": 0.5652, + "step": 1620 + }, + { + "epoch": 0.04450851180669962, + "grad_norm": 0.37998852133750916, + "learning_rate": 1.997845957063954e-05, + "loss": 0.5714, + "step": 1621 + }, + { + "epoch": 0.04453596924766612, + "grad_norm": 0.3396022617816925, + "learning_rate": 1.9978431228842437e-05, + "loss": 0.5608, + "step": 1622 + }, + { + "epoch": 0.04456342668863262, + "grad_norm": 0.3922903537750244, + "learning_rate": 1.9978402868432378e-05, + "loss": 0.5809, + "step": 1623 + }, + { + "epoch": 0.04459088412959912, + "grad_norm": 0.3459933400154114, + "learning_rate": 1.997837448940941e-05, + "loss": 0.4906, + "step": 1624 + }, + { + "epoch": 0.04461834157056562, + "grad_norm": 0.3655529022216797, + "learning_rate": 1.9978346091773595e-05, + "loss": 0.5151, + "step": 1625 + }, + { + "epoch": 0.04464579901153212, + "grad_norm": 0.43729856610298157, + "learning_rate": 1.9978317675524975e-05, + "loss": 0.5493, + "step": 1626 + }, + { + "epoch": 0.04467325645249863, + "grad_norm": 0.3376643657684326, + "learning_rate": 1.997828924066361e-05, + "loss": 0.5337, + "step": 1627 + }, + { + "epoch": 0.04470071389346513, + "grad_norm": 0.3751026391983032, + "learning_rate": 1.997826078718955e-05, + "loss": 0.5673, + "step": 1628 + }, + { + "epoch": 0.04472817133443163, + "grad_norm": 0.34166353940963745, + "learning_rate": 1.997823231510285e-05, + "loss": 0.584, + "step": 1629 + }, + { + "epoch": 0.04475562877539813, + "grad_norm": 0.40826261043548584, + "learning_rate": 1.997820382440356e-05, + "loss": 0.6144, + "step": 1630 + }, + { + "epoch": 0.044783086216364634, + "grad_norm": 0.4053293466567993, + "learning_rate": 1.9978175315091733e-05, + "loss": 0.6462, + "step": 1631 + }, + { + "epoch": 0.044810543657331135, + "grad_norm": 0.46763524413108826, + "learning_rate": 1.9978146787167427e-05, + "loss": 0.5399, + "step": 1632 + }, + { + "epoch": 0.044838001098297636, + "grad_norm": 0.3321897089481354, + "learning_rate": 1.9978118240630693e-05, + "loss": 0.5876, + "step": 1633 + }, + { + "epoch": 0.04486545853926414, + "grad_norm": 0.33027157187461853, + "learning_rate": 1.9978089675481583e-05, + "loss": 0.513, + "step": 1634 + }, + { + "epoch": 0.044892915980230645, + "grad_norm": 0.39297324419021606, + "learning_rate": 1.9978061091720154e-05, + "loss": 0.5493, + "step": 1635 + }, + { + "epoch": 0.044920373421197146, + "grad_norm": 0.3836534023284912, + "learning_rate": 1.9978032489346453e-05, + "loss": 0.5183, + "step": 1636 + }, + { + "epoch": 0.04494783086216365, + "grad_norm": 0.35657554864883423, + "learning_rate": 1.9978003868360538e-05, + "loss": 0.5489, + "step": 1637 + }, + { + "epoch": 0.04497528830313015, + "grad_norm": 0.36772316694259644, + "learning_rate": 1.9977975228762463e-05, + "loss": 0.5597, + "step": 1638 + }, + { + "epoch": 0.04500274574409665, + "grad_norm": 0.3156841993331909, + "learning_rate": 1.9977946570552276e-05, + "loss": 0.485, + "step": 1639 + }, + { + "epoch": 0.04503020318506315, + "grad_norm": 0.351209431886673, + "learning_rate": 1.9977917893730037e-05, + "loss": 0.4685, + "step": 1640 + }, + { + "epoch": 0.04505766062602965, + "grad_norm": 0.37185484170913696, + "learning_rate": 1.9977889198295794e-05, + "loss": 0.5357, + "step": 1641 + }, + { + "epoch": 0.04508511806699616, + "grad_norm": 0.4787532091140747, + "learning_rate": 1.9977860484249605e-05, + "loss": 0.5116, + "step": 1642 + }, + { + "epoch": 0.04511257550796266, + "grad_norm": 0.43139660358428955, + "learning_rate": 1.9977831751591523e-05, + "loss": 0.5733, + "step": 1643 + }, + { + "epoch": 0.04514003294892916, + "grad_norm": 0.3786834478378296, + "learning_rate": 1.99778030003216e-05, + "loss": 0.6324, + "step": 1644 + }, + { + "epoch": 0.04516749038989566, + "grad_norm": 0.33062466979026794, + "learning_rate": 1.9977774230439887e-05, + "loss": 0.4149, + "step": 1645 + }, + { + "epoch": 0.04519494783086216, + "grad_norm": 0.38944971561431885, + "learning_rate": 1.997774544194644e-05, + "loss": 0.6769, + "step": 1646 + }, + { + "epoch": 0.045222405271828664, + "grad_norm": 0.35034096240997314, + "learning_rate": 1.9977716634841315e-05, + "loss": 0.61, + "step": 1647 + }, + { + "epoch": 0.045249862712795165, + "grad_norm": 0.3655945062637329, + "learning_rate": 1.9977687809124565e-05, + "loss": 0.606, + "step": 1648 + }, + { + "epoch": 0.045277320153761666, + "grad_norm": 0.3509422838687897, + "learning_rate": 1.997765896479624e-05, + "loss": 0.4848, + "step": 1649 + }, + { + "epoch": 0.045304777594728174, + "grad_norm": 0.3329755961894989, + "learning_rate": 1.9977630101856402e-05, + "loss": 0.5234, + "step": 1650 + }, + { + "epoch": 0.045332235035694675, + "grad_norm": 0.3257162868976593, + "learning_rate": 1.9977601220305093e-05, + "loss": 0.5142, + "step": 1651 + }, + { + "epoch": 0.045359692476661176, + "grad_norm": 1.3141242265701294, + "learning_rate": 1.9977572320142374e-05, + "loss": 0.5788, + "step": 1652 + }, + { + "epoch": 0.04538714991762768, + "grad_norm": 0.3501489758491516, + "learning_rate": 1.99775434013683e-05, + "loss": 0.5493, + "step": 1653 + }, + { + "epoch": 0.04541460735859418, + "grad_norm": 0.3833545744419098, + "learning_rate": 1.9977514463982922e-05, + "loss": 0.6259, + "step": 1654 + }, + { + "epoch": 0.04544206479956068, + "grad_norm": 0.340162992477417, + "learning_rate": 1.9977485507986298e-05, + "loss": 0.5203, + "step": 1655 + }, + { + "epoch": 0.04546952224052718, + "grad_norm": 0.41600340604782104, + "learning_rate": 1.9977456533378476e-05, + "loss": 0.6536, + "step": 1656 + }, + { + "epoch": 0.04549697968149369, + "grad_norm": 0.4003397524356842, + "learning_rate": 1.9977427540159514e-05, + "loss": 0.5865, + "step": 1657 + }, + { + "epoch": 0.04552443712246019, + "grad_norm": 0.39493659138679504, + "learning_rate": 1.997739852832947e-05, + "loss": 0.538, + "step": 1658 + }, + { + "epoch": 0.04555189456342669, + "grad_norm": 0.3891777992248535, + "learning_rate": 1.9977369497888387e-05, + "loss": 0.607, + "step": 1659 + }, + { + "epoch": 0.04557935200439319, + "grad_norm": 0.42392173409461975, + "learning_rate": 1.9977340448836327e-05, + "loss": 0.548, + "step": 1660 + }, + { + "epoch": 0.04560680944535969, + "grad_norm": 0.3889339864253998, + "learning_rate": 1.997731138117334e-05, + "loss": 0.4738, + "step": 1661 + }, + { + "epoch": 0.04563426688632619, + "grad_norm": 0.5017538070678711, + "learning_rate": 1.9977282294899488e-05, + "loss": 0.5853, + "step": 1662 + }, + { + "epoch": 0.045661724327292694, + "grad_norm": 0.3721320629119873, + "learning_rate": 1.9977253190014817e-05, + "loss": 0.5246, + "step": 1663 + }, + { + "epoch": 0.0456891817682592, + "grad_norm": 0.39119404554367065, + "learning_rate": 1.9977224066519386e-05, + "loss": 0.5307, + "step": 1664 + }, + { + "epoch": 0.0457166392092257, + "grad_norm": 0.38130030035972595, + "learning_rate": 1.997719492441325e-05, + "loss": 0.5867, + "step": 1665 + }, + { + "epoch": 0.045744096650192204, + "grad_norm": 0.3553202748298645, + "learning_rate": 1.9977165763696455e-05, + "loss": 0.5511, + "step": 1666 + }, + { + "epoch": 0.045771554091158705, + "grad_norm": 0.48479706048965454, + "learning_rate": 1.9977136584369064e-05, + "loss": 0.5789, + "step": 1667 + }, + { + "epoch": 0.045799011532125206, + "grad_norm": 0.3920189142227173, + "learning_rate": 1.9977107386431133e-05, + "loss": 0.56, + "step": 1668 + }, + { + "epoch": 0.04582646897309171, + "grad_norm": 0.3380624055862427, + "learning_rate": 1.997707816988271e-05, + "loss": 0.4645, + "step": 1669 + }, + { + "epoch": 0.04585392641405821, + "grad_norm": 0.3563838601112366, + "learning_rate": 1.997704893472385e-05, + "loss": 0.5059, + "step": 1670 + }, + { + "epoch": 0.04588138385502471, + "grad_norm": 0.3170499801635742, + "learning_rate": 1.9977019680954612e-05, + "loss": 0.4485, + "step": 1671 + }, + { + "epoch": 0.045908841295991216, + "grad_norm": 0.38907405734062195, + "learning_rate": 1.9976990408575044e-05, + "loss": 0.5798, + "step": 1672 + }, + { + "epoch": 0.04593629873695772, + "grad_norm": 0.35074278712272644, + "learning_rate": 1.9976961117585207e-05, + "loss": 0.501, + "step": 1673 + }, + { + "epoch": 0.04596375617792422, + "grad_norm": 0.31192779541015625, + "learning_rate": 1.9976931807985155e-05, + "loss": 0.4577, + "step": 1674 + }, + { + "epoch": 0.04599121361889072, + "grad_norm": 0.3812955319881439, + "learning_rate": 1.997690247977494e-05, + "loss": 0.5114, + "step": 1675 + }, + { + "epoch": 0.04601867105985722, + "grad_norm": 0.4271005094051361, + "learning_rate": 1.9976873132954616e-05, + "loss": 0.6572, + "step": 1676 + }, + { + "epoch": 0.04604612850082372, + "grad_norm": 0.3593882918357849, + "learning_rate": 1.997684376752424e-05, + "loss": 0.6548, + "step": 1677 + }, + { + "epoch": 0.04607358594179022, + "grad_norm": 0.3889383375644684, + "learning_rate": 1.9976814383483867e-05, + "loss": 0.5211, + "step": 1678 + }, + { + "epoch": 0.04610104338275673, + "grad_norm": 0.37002500891685486, + "learning_rate": 1.997678498083355e-05, + "loss": 0.612, + "step": 1679 + }, + { + "epoch": 0.04612850082372323, + "grad_norm": 0.3793419301509857, + "learning_rate": 1.9976755559573345e-05, + "loss": 0.525, + "step": 1680 + }, + { + "epoch": 0.04615595826468973, + "grad_norm": 0.3515072464942932, + "learning_rate": 1.997672611970331e-05, + "loss": 0.4385, + "step": 1681 + }, + { + "epoch": 0.04618341570565623, + "grad_norm": 0.38817518949508667, + "learning_rate": 1.997669666122349e-05, + "loss": 0.6007, + "step": 1682 + }, + { + "epoch": 0.046210873146622734, + "grad_norm": 0.4044186770915985, + "learning_rate": 1.997666718413395e-05, + "loss": 0.6078, + "step": 1683 + }, + { + "epoch": 0.046238330587589235, + "grad_norm": 0.45998576283454895, + "learning_rate": 1.997663768843474e-05, + "loss": 0.5569, + "step": 1684 + }, + { + "epoch": 0.046265788028555736, + "grad_norm": 0.4107573628425598, + "learning_rate": 1.9976608174125918e-05, + "loss": 0.4504, + "step": 1685 + }, + { + "epoch": 0.04629324546952224, + "grad_norm": 0.404364675283432, + "learning_rate": 1.9976578641207537e-05, + "loss": 0.5889, + "step": 1686 + }, + { + "epoch": 0.046320702910488745, + "grad_norm": 0.3890608847141266, + "learning_rate": 1.9976549089679652e-05, + "loss": 0.5216, + "step": 1687 + }, + { + "epoch": 0.046348160351455246, + "grad_norm": 0.4371418058872223, + "learning_rate": 1.9976519519542325e-05, + "loss": 0.5445, + "step": 1688 + }, + { + "epoch": 0.04637561779242175, + "grad_norm": 0.40834856033325195, + "learning_rate": 1.9976489930795596e-05, + "loss": 0.5831, + "step": 1689 + }, + { + "epoch": 0.04640307523338825, + "grad_norm": 0.4211842715740204, + "learning_rate": 1.9976460323439536e-05, + "loss": 0.6272, + "step": 1690 + }, + { + "epoch": 0.04643053267435475, + "grad_norm": 0.3223666250705719, + "learning_rate": 1.997643069747419e-05, + "loss": 0.5498, + "step": 1691 + }, + { + "epoch": 0.04645799011532125, + "grad_norm": 0.36402952671051025, + "learning_rate": 1.9976401052899617e-05, + "loss": 0.5191, + "step": 1692 + }, + { + "epoch": 0.04648544755628775, + "grad_norm": 0.7898536324501038, + "learning_rate": 1.9976371389715873e-05, + "loss": 0.579, + "step": 1693 + }, + { + "epoch": 0.04651290499725426, + "grad_norm": 0.4162236154079437, + "learning_rate": 1.997634170792301e-05, + "loss": 0.5199, + "step": 1694 + }, + { + "epoch": 0.04654036243822076, + "grad_norm": 0.36921828985214233, + "learning_rate": 1.9976312007521087e-05, + "loss": 0.5826, + "step": 1695 + }, + { + "epoch": 0.04656781987918726, + "grad_norm": 0.3712034821510315, + "learning_rate": 1.9976282288510157e-05, + "loss": 0.5095, + "step": 1696 + }, + { + "epoch": 0.04659527732015376, + "grad_norm": 0.49292856454849243, + "learning_rate": 1.9976252550890282e-05, + "loss": 0.5593, + "step": 1697 + }, + { + "epoch": 0.04662273476112026, + "grad_norm": 0.39175304770469666, + "learning_rate": 1.9976222794661504e-05, + "loss": 0.6076, + "step": 1698 + }, + { + "epoch": 0.046650192202086764, + "grad_norm": 0.34162437915802, + "learning_rate": 1.997619301982389e-05, + "loss": 0.5701, + "step": 1699 + }, + { + "epoch": 0.046677649643053265, + "grad_norm": 0.4131351411342621, + "learning_rate": 1.9976163226377493e-05, + "loss": 0.5755, + "step": 1700 + }, + { + "epoch": 0.046705107084019766, + "grad_norm": 0.37309128046035767, + "learning_rate": 1.997613341432237e-05, + "loss": 0.5804, + "step": 1701 + }, + { + "epoch": 0.046732564524986274, + "grad_norm": 0.3355151414871216, + "learning_rate": 1.9976103583658567e-05, + "loss": 0.5347, + "step": 1702 + }, + { + "epoch": 0.046760021965952775, + "grad_norm": 0.3353118300437927, + "learning_rate": 1.997607373438615e-05, + "loss": 0.5515, + "step": 1703 + }, + { + "epoch": 0.046787479406919276, + "grad_norm": 0.3552364408969879, + "learning_rate": 1.997604386650517e-05, + "loss": 0.5306, + "step": 1704 + }, + { + "epoch": 0.04681493684788578, + "grad_norm": 0.354777991771698, + "learning_rate": 1.9976013980015686e-05, + "loss": 0.6186, + "step": 1705 + }, + { + "epoch": 0.04684239428885228, + "grad_norm": 0.3374803066253662, + "learning_rate": 1.9975984074917753e-05, + "loss": 0.489, + "step": 1706 + }, + { + "epoch": 0.04686985172981878, + "grad_norm": 0.3448299169540405, + "learning_rate": 1.9975954151211425e-05, + "loss": 0.5755, + "step": 1707 + }, + { + "epoch": 0.04689730917078528, + "grad_norm": 0.3431239724159241, + "learning_rate": 1.9975924208896758e-05, + "loss": 0.6134, + "step": 1708 + }, + { + "epoch": 0.04692476661175179, + "grad_norm": 0.42584022879600525, + "learning_rate": 1.997589424797381e-05, + "loss": 0.6296, + "step": 1709 + }, + { + "epoch": 0.04695222405271829, + "grad_norm": 0.43279021978378296, + "learning_rate": 1.997586426844263e-05, + "loss": 0.4994, + "step": 1710 + }, + { + "epoch": 0.04697968149368479, + "grad_norm": 0.376655638217926, + "learning_rate": 1.9975834270303286e-05, + "loss": 0.5828, + "step": 1711 + }, + { + "epoch": 0.04700713893465129, + "grad_norm": 0.42796212434768677, + "learning_rate": 1.9975804253555827e-05, + "loss": 0.6233, + "step": 1712 + }, + { + "epoch": 0.04703459637561779, + "grad_norm": 0.3703799843788147, + "learning_rate": 1.9975774218200307e-05, + "loss": 0.6113, + "step": 1713 + }, + { + "epoch": 0.04706205381658429, + "grad_norm": 0.3934277594089508, + "learning_rate": 1.997574416423678e-05, + "loss": 0.6374, + "step": 1714 + }, + { + "epoch": 0.047089511257550794, + "grad_norm": 0.35334357619285583, + "learning_rate": 1.9975714091665313e-05, + "loss": 0.5808, + "step": 1715 + }, + { + "epoch": 0.047116968698517295, + "grad_norm": 0.35406729578971863, + "learning_rate": 1.9975684000485952e-05, + "loss": 0.5039, + "step": 1716 + }, + { + "epoch": 0.0471444261394838, + "grad_norm": 0.36556097865104675, + "learning_rate": 1.997565389069876e-05, + "loss": 0.5895, + "step": 1717 + }, + { + "epoch": 0.047171883580450304, + "grad_norm": 0.44138839840888977, + "learning_rate": 1.9975623762303783e-05, + "loss": 0.568, + "step": 1718 + }, + { + "epoch": 0.047199341021416805, + "grad_norm": 0.3740490972995758, + "learning_rate": 1.9975593615301087e-05, + "loss": 0.5476, + "step": 1719 + }, + { + "epoch": 0.047226798462383306, + "grad_norm": 0.4442768096923828, + "learning_rate": 1.9975563449690725e-05, + "loss": 0.6089, + "step": 1720 + }, + { + "epoch": 0.04725425590334981, + "grad_norm": 0.3277495801448822, + "learning_rate": 1.9975533265472756e-05, + "loss": 0.5346, + "step": 1721 + }, + { + "epoch": 0.04728171334431631, + "grad_norm": 0.3677663207054138, + "learning_rate": 1.997550306264723e-05, + "loss": 0.6225, + "step": 1722 + }, + { + "epoch": 0.04730917078528281, + "grad_norm": 0.3433849513530731, + "learning_rate": 1.9975472841214206e-05, + "loss": 0.5522, + "step": 1723 + }, + { + "epoch": 0.04733662822624932, + "grad_norm": 0.3698031008243561, + "learning_rate": 1.9975442601173747e-05, + "loss": 0.5432, + "step": 1724 + }, + { + "epoch": 0.04736408566721582, + "grad_norm": 0.3758487105369568, + "learning_rate": 1.99754123425259e-05, + "loss": 0.5289, + "step": 1725 + }, + { + "epoch": 0.04739154310818232, + "grad_norm": 0.3385457396507263, + "learning_rate": 1.9975382065270725e-05, + "loss": 0.5202, + "step": 1726 + }, + { + "epoch": 0.04741900054914882, + "grad_norm": 0.37068232893943787, + "learning_rate": 1.997535176940828e-05, + "loss": 0.6018, + "step": 1727 + }, + { + "epoch": 0.04744645799011532, + "grad_norm": 0.4369012415409088, + "learning_rate": 1.997532145493862e-05, + "loss": 0.6161, + "step": 1728 + }, + { + "epoch": 0.04747391543108182, + "grad_norm": 0.38823720812797546, + "learning_rate": 1.99752911218618e-05, + "loss": 0.5609, + "step": 1729 + }, + { + "epoch": 0.04750137287204832, + "grad_norm": 0.40428680181503296, + "learning_rate": 1.997526077017788e-05, + "loss": 0.6734, + "step": 1730 + }, + { + "epoch": 0.04752883031301483, + "grad_norm": 0.3778148889541626, + "learning_rate": 1.9975230399886914e-05, + "loss": 0.6295, + "step": 1731 + }, + { + "epoch": 0.04755628775398133, + "grad_norm": 0.5080970525741577, + "learning_rate": 1.997520001098896e-05, + "loss": 0.6168, + "step": 1732 + }, + { + "epoch": 0.04758374519494783, + "grad_norm": 0.4745655059814453, + "learning_rate": 1.9975169603484073e-05, + "loss": 0.5916, + "step": 1733 + }, + { + "epoch": 0.04761120263591433, + "grad_norm": 0.34783831238746643, + "learning_rate": 1.9975139177372312e-05, + "loss": 0.53, + "step": 1734 + }, + { + "epoch": 0.047638660076880834, + "grad_norm": 0.3691735565662384, + "learning_rate": 1.9975108732653738e-05, + "loss": 0.5076, + "step": 1735 + }, + { + "epoch": 0.047666117517847335, + "grad_norm": 0.3799746334552765, + "learning_rate": 1.9975078269328394e-05, + "loss": 0.5494, + "step": 1736 + }, + { + "epoch": 0.047693574958813836, + "grad_norm": 0.38361912965774536, + "learning_rate": 1.997504778739635e-05, + "loss": 0.6442, + "step": 1737 + }, + { + "epoch": 0.04772103239978034, + "grad_norm": 0.4452016055583954, + "learning_rate": 1.997501728685766e-05, + "loss": 0.5577, + "step": 1738 + }, + { + "epoch": 0.047748489840746845, + "grad_norm": 1.1470799446105957, + "learning_rate": 1.9974986767712373e-05, + "loss": 0.5293, + "step": 1739 + }, + { + "epoch": 0.047775947281713346, + "grad_norm": 0.35418373346328735, + "learning_rate": 1.9974956229960555e-05, + "loss": 0.5092, + "step": 1740 + }, + { + "epoch": 0.04780340472267985, + "grad_norm": 0.35853996872901917, + "learning_rate": 1.9974925673602263e-05, + "loss": 0.5795, + "step": 1741 + }, + { + "epoch": 0.04783086216364635, + "grad_norm": 0.33686500787734985, + "learning_rate": 1.9974895098637546e-05, + "loss": 0.5617, + "step": 1742 + }, + { + "epoch": 0.04785831960461285, + "grad_norm": 0.40688657760620117, + "learning_rate": 1.997486450506647e-05, + "loss": 0.5466, + "step": 1743 + }, + { + "epoch": 0.04788577704557935, + "grad_norm": 0.3369748294353485, + "learning_rate": 1.9974833892889086e-05, + "loss": 0.4981, + "step": 1744 + }, + { + "epoch": 0.04791323448654585, + "grad_norm": 0.34575265645980835, + "learning_rate": 1.9974803262105454e-05, + "loss": 0.6352, + "step": 1745 + }, + { + "epoch": 0.04794069192751236, + "grad_norm": 0.3886786699295044, + "learning_rate": 1.997477261271563e-05, + "loss": 0.5706, + "step": 1746 + }, + { + "epoch": 0.04796814936847886, + "grad_norm": 0.38180017471313477, + "learning_rate": 1.997474194471967e-05, + "loss": 0.5854, + "step": 1747 + }, + { + "epoch": 0.04799560680944536, + "grad_norm": 0.39295628666877747, + "learning_rate": 1.9974711258117635e-05, + "loss": 0.5254, + "step": 1748 + }, + { + "epoch": 0.04802306425041186, + "grad_norm": 0.40700751543045044, + "learning_rate": 1.9974680552909578e-05, + "loss": 0.5521, + "step": 1749 + }, + { + "epoch": 0.04805052169137836, + "grad_norm": 0.3621041774749756, + "learning_rate": 1.9974649829095562e-05, + "loss": 0.5475, + "step": 1750 + }, + { + "epoch": 0.048077979132344864, + "grad_norm": 0.33428534865379333, + "learning_rate": 1.9974619086675633e-05, + "loss": 0.5676, + "step": 1751 + }, + { + "epoch": 0.048105436573311365, + "grad_norm": 0.4122284948825836, + "learning_rate": 1.9974588325649864e-05, + "loss": 0.622, + "step": 1752 + }, + { + "epoch": 0.048132894014277866, + "grad_norm": 0.3641304075717926, + "learning_rate": 1.99745575460183e-05, + "loss": 0.5141, + "step": 1753 + }, + { + "epoch": 0.048160351455244374, + "grad_norm": 0.3531661331653595, + "learning_rate": 1.9974526747781006e-05, + "loss": 0.6285, + "step": 1754 + }, + { + "epoch": 0.048187808896210875, + "grad_norm": 0.40635645389556885, + "learning_rate": 1.997449593093803e-05, + "loss": 0.52, + "step": 1755 + }, + { + "epoch": 0.048215266337177376, + "grad_norm": 0.39193737506866455, + "learning_rate": 1.997446509548944e-05, + "loss": 0.5684, + "step": 1756 + }, + { + "epoch": 0.04824272377814388, + "grad_norm": 0.3323000967502594, + "learning_rate": 1.997443424143529e-05, + "loss": 0.5578, + "step": 1757 + }, + { + "epoch": 0.04827018121911038, + "grad_norm": 0.4137215316295624, + "learning_rate": 1.9974403368775636e-05, + "loss": 0.6293, + "step": 1758 + }, + { + "epoch": 0.04829763866007688, + "grad_norm": 0.6278602480888367, + "learning_rate": 1.9974372477510534e-05, + "loss": 0.6141, + "step": 1759 + }, + { + "epoch": 0.04832509610104338, + "grad_norm": 0.347959965467453, + "learning_rate": 1.9974341567640046e-05, + "loss": 0.586, + "step": 1760 + }, + { + "epoch": 0.04835255354200989, + "grad_norm": 0.36387717723846436, + "learning_rate": 1.997431063916423e-05, + "loss": 0.5306, + "step": 1761 + }, + { + "epoch": 0.04838001098297639, + "grad_norm": 0.3765870928764343, + "learning_rate": 1.9974279692083135e-05, + "loss": 0.5817, + "step": 1762 + }, + { + "epoch": 0.04840746842394289, + "grad_norm": 0.3361295759677887, + "learning_rate": 1.9974248726396828e-05, + "loss": 0.5671, + "step": 1763 + }, + { + "epoch": 0.04843492586490939, + "grad_norm": 0.4213378131389618, + "learning_rate": 1.9974217742105364e-05, + "loss": 0.6365, + "step": 1764 + }, + { + "epoch": 0.04846238330587589, + "grad_norm": 0.3795631229877472, + "learning_rate": 1.9974186739208798e-05, + "loss": 0.5625, + "step": 1765 + }, + { + "epoch": 0.04848984074684239, + "grad_norm": 0.3485829532146454, + "learning_rate": 1.9974155717707194e-05, + "loss": 0.5763, + "step": 1766 + }, + { + "epoch": 0.048517298187808894, + "grad_norm": 0.3717228174209595, + "learning_rate": 1.9974124677600603e-05, + "loss": 0.6544, + "step": 1767 + }, + { + "epoch": 0.048544755628775395, + "grad_norm": 0.40393179655075073, + "learning_rate": 1.997409361888909e-05, + "loss": 0.5798, + "step": 1768 + }, + { + "epoch": 0.0485722130697419, + "grad_norm": 0.43523648381233215, + "learning_rate": 1.997406254157271e-05, + "loss": 0.6642, + "step": 1769 + }, + { + "epoch": 0.048599670510708404, + "grad_norm": 0.3997601568698883, + "learning_rate": 1.9974031445651515e-05, + "loss": 0.5706, + "step": 1770 + }, + { + "epoch": 0.048627127951674905, + "grad_norm": 0.35494181513786316, + "learning_rate": 1.9974000331125568e-05, + "loss": 0.5046, + "step": 1771 + }, + { + "epoch": 0.048654585392641406, + "grad_norm": 0.3958304822444916, + "learning_rate": 1.9973969197994928e-05, + "loss": 0.6218, + "step": 1772 + }, + { + "epoch": 0.04868204283360791, + "grad_norm": 0.3997637927532196, + "learning_rate": 1.9973938046259653e-05, + "loss": 0.5732, + "step": 1773 + }, + { + "epoch": 0.04870950027457441, + "grad_norm": 0.3628101050853729, + "learning_rate": 1.99739068759198e-05, + "loss": 0.491, + "step": 1774 + }, + { + "epoch": 0.04873695771554091, + "grad_norm": 0.395282506942749, + "learning_rate": 1.9973875686975427e-05, + "loss": 0.508, + "step": 1775 + }, + { + "epoch": 0.04876441515650742, + "grad_norm": 0.3738830089569092, + "learning_rate": 1.9973844479426593e-05, + "loss": 0.5556, + "step": 1776 + }, + { + "epoch": 0.04879187259747392, + "grad_norm": 0.37502390146255493, + "learning_rate": 1.997381325327336e-05, + "loss": 0.5722, + "step": 1777 + }, + { + "epoch": 0.04881933003844042, + "grad_norm": 0.40069982409477234, + "learning_rate": 1.9973782008515777e-05, + "loss": 0.6521, + "step": 1778 + }, + { + "epoch": 0.04884678747940692, + "grad_norm": 0.393072247505188, + "learning_rate": 1.997375074515391e-05, + "loss": 0.5564, + "step": 1779 + }, + { + "epoch": 0.04887424492037342, + "grad_norm": 0.3785655200481415, + "learning_rate": 1.997371946318781e-05, + "loss": 0.6237, + "step": 1780 + }, + { + "epoch": 0.04890170236133992, + "grad_norm": 0.35999783873558044, + "learning_rate": 1.9973688162617545e-05, + "loss": 0.5459, + "step": 1781 + }, + { + "epoch": 0.04892915980230642, + "grad_norm": 0.4492824375629425, + "learning_rate": 1.9973656843443162e-05, + "loss": 0.503, + "step": 1782 + }, + { + "epoch": 0.048956617243272924, + "grad_norm": 0.39231082797050476, + "learning_rate": 1.9973625505664734e-05, + "loss": 0.544, + "step": 1783 + }, + { + "epoch": 0.04898407468423943, + "grad_norm": 0.3255685865879059, + "learning_rate": 1.9973594149282305e-05, + "loss": 0.4917, + "step": 1784 + }, + { + "epoch": 0.04901153212520593, + "grad_norm": 0.43804335594177246, + "learning_rate": 1.9973562774295942e-05, + "loss": 0.6478, + "step": 1785 + }, + { + "epoch": 0.04903898956617243, + "grad_norm": 0.39409971237182617, + "learning_rate": 1.9973531380705703e-05, + "loss": 0.6134, + "step": 1786 + }, + { + "epoch": 0.049066447007138934, + "grad_norm": 0.3804078996181488, + "learning_rate": 1.9973499968511643e-05, + "loss": 0.5475, + "step": 1787 + }, + { + "epoch": 0.049093904448105435, + "grad_norm": 0.34638506174087524, + "learning_rate": 1.9973468537713822e-05, + "loss": 0.6199, + "step": 1788 + }, + { + "epoch": 0.049121361889071936, + "grad_norm": 0.39052829146385193, + "learning_rate": 1.9973437088312298e-05, + "loss": 0.6045, + "step": 1789 + }, + { + "epoch": 0.04914881933003844, + "grad_norm": 0.36124342679977417, + "learning_rate": 1.997340562030713e-05, + "loss": 0.5874, + "step": 1790 + }, + { + "epoch": 0.049176276771004945, + "grad_norm": 0.4390476644039154, + "learning_rate": 1.997337413369838e-05, + "loss": 0.5432, + "step": 1791 + }, + { + "epoch": 0.049203734211971446, + "grad_norm": 0.34413209557533264, + "learning_rate": 1.9973342628486106e-05, + "loss": 0.5558, + "step": 1792 + }, + { + "epoch": 0.04923119165293795, + "grad_norm": 0.39440158009529114, + "learning_rate": 1.9973311104670363e-05, + "loss": 0.5713, + "step": 1793 + }, + { + "epoch": 0.04925864909390445, + "grad_norm": 0.36803197860717773, + "learning_rate": 1.9973279562251207e-05, + "loss": 0.5472, + "step": 1794 + }, + { + "epoch": 0.04928610653487095, + "grad_norm": 0.3541286289691925, + "learning_rate": 1.9973248001228708e-05, + "loss": 0.5591, + "step": 1795 + }, + { + "epoch": 0.04931356397583745, + "grad_norm": 0.4497540295124054, + "learning_rate": 1.9973216421602915e-05, + "loss": 0.5405, + "step": 1796 + }, + { + "epoch": 0.04934102141680395, + "grad_norm": 0.33534368872642517, + "learning_rate": 1.997318482337389e-05, + "loss": 0.5277, + "step": 1797 + }, + { + "epoch": 0.04936847885777046, + "grad_norm": 0.38229721784591675, + "learning_rate": 1.9973153206541695e-05, + "loss": 0.5436, + "step": 1798 + }, + { + "epoch": 0.04939593629873696, + "grad_norm": 0.39799225330352783, + "learning_rate": 1.9973121571106385e-05, + "loss": 0.5563, + "step": 1799 + }, + { + "epoch": 0.04942339373970346, + "grad_norm": 0.40024998784065247, + "learning_rate": 1.997308991706802e-05, + "loss": 0.5555, + "step": 1800 + }, + { + "epoch": 0.04945085118066996, + "grad_norm": 0.5081374645233154, + "learning_rate": 1.9973058244426663e-05, + "loss": 0.5557, + "step": 1801 + }, + { + "epoch": 0.04947830862163646, + "grad_norm": 0.3543853461742401, + "learning_rate": 1.9973026553182362e-05, + "loss": 0.5063, + "step": 1802 + }, + { + "epoch": 0.049505766062602964, + "grad_norm": 0.4867360591888428, + "learning_rate": 1.9972994843335188e-05, + "loss": 0.65, + "step": 1803 + }, + { + "epoch": 0.049533223503569465, + "grad_norm": 0.36622342467308044, + "learning_rate": 1.9972963114885195e-05, + "loss": 0.6245, + "step": 1804 + }, + { + "epoch": 0.049560680944535966, + "grad_norm": 0.37493157386779785, + "learning_rate": 1.997293136783244e-05, + "loss": 0.5471, + "step": 1805 + }, + { + "epoch": 0.049588138385502474, + "grad_norm": 0.37974101305007935, + "learning_rate": 1.997289960217699e-05, + "loss": 0.5557, + "step": 1806 + }, + { + "epoch": 0.049615595826468975, + "grad_norm": 0.401750773191452, + "learning_rate": 1.9972867817918896e-05, + "loss": 0.5763, + "step": 1807 + }, + { + "epoch": 0.049643053267435476, + "grad_norm": 0.3102671802043915, + "learning_rate": 1.997283601505822e-05, + "loss": 0.4984, + "step": 1808 + }, + { + "epoch": 0.04967051070840198, + "grad_norm": 1.6420553922653198, + "learning_rate": 1.9972804193595022e-05, + "loss": 0.55, + "step": 1809 + }, + { + "epoch": 0.04969796814936848, + "grad_norm": 0.3835165500640869, + "learning_rate": 1.9972772353529363e-05, + "loss": 0.5268, + "step": 1810 + }, + { + "epoch": 0.04972542559033498, + "grad_norm": 0.33358123898506165, + "learning_rate": 1.99727404948613e-05, + "loss": 0.4984, + "step": 1811 + }, + { + "epoch": 0.04975288303130148, + "grad_norm": 0.35960647463798523, + "learning_rate": 1.9972708617590893e-05, + "loss": 0.5046, + "step": 1812 + }, + { + "epoch": 0.04978034047226799, + "grad_norm": 0.33748507499694824, + "learning_rate": 1.9972676721718204e-05, + "loss": 0.495, + "step": 1813 + }, + { + "epoch": 0.04980779791323449, + "grad_norm": 0.38767698407173157, + "learning_rate": 1.9972644807243286e-05, + "loss": 0.5667, + "step": 1814 + }, + { + "epoch": 0.04983525535420099, + "grad_norm": 0.35820385813713074, + "learning_rate": 1.9972612874166203e-05, + "loss": 0.6194, + "step": 1815 + }, + { + "epoch": 0.04986271279516749, + "grad_norm": 0.30981922149658203, + "learning_rate": 1.9972580922487016e-05, + "loss": 0.4729, + "step": 1816 + }, + { + "epoch": 0.04989017023613399, + "grad_norm": 0.37196460366249084, + "learning_rate": 1.9972548952205783e-05, + "loss": 0.5691, + "step": 1817 + }, + { + "epoch": 0.04991762767710049, + "grad_norm": 0.4547102451324463, + "learning_rate": 1.9972516963322564e-05, + "loss": 0.6663, + "step": 1818 + }, + { + "epoch": 0.049945085118066994, + "grad_norm": 0.39466118812561035, + "learning_rate": 1.9972484955837414e-05, + "loss": 0.5019, + "step": 1819 + }, + { + "epoch": 0.049972542559033495, + "grad_norm": 0.3360445201396942, + "learning_rate": 1.9972452929750398e-05, + "loss": 0.5675, + "step": 1820 + }, + { + "epoch": 0.05, + "grad_norm": 0.49518972635269165, + "learning_rate": 1.9972420885061576e-05, + "loss": 0.575, + "step": 1821 + }, + { + "epoch": 0.050027457440966504, + "grad_norm": 0.3713243007659912, + "learning_rate": 1.997238882177101e-05, + "loss": 0.5736, + "step": 1822 + }, + { + "epoch": 0.050054914881933005, + "grad_norm": 0.3652655780315399, + "learning_rate": 1.9972356739878748e-05, + "loss": 0.4706, + "step": 1823 + }, + { + "epoch": 0.050082372322899506, + "grad_norm": 0.36481449007987976, + "learning_rate": 1.997232463938486e-05, + "loss": 0.6004, + "step": 1824 + }, + { + "epoch": 0.05010982976386601, + "grad_norm": 0.39170143008232117, + "learning_rate": 1.9972292520289405e-05, + "loss": 0.5661, + "step": 1825 + }, + { + "epoch": 0.05013728720483251, + "grad_norm": 0.4235903024673462, + "learning_rate": 1.9972260382592442e-05, + "loss": 0.5977, + "step": 1826 + }, + { + "epoch": 0.05016474464579901, + "grad_norm": 0.36186689138412476, + "learning_rate": 1.9972228226294032e-05, + "loss": 0.5678, + "step": 1827 + }, + { + "epoch": 0.05019220208676552, + "grad_norm": 0.4754292964935303, + "learning_rate": 1.997219605139423e-05, + "loss": 0.6312, + "step": 1828 + }, + { + "epoch": 0.05021965952773202, + "grad_norm": 0.4184439480304718, + "learning_rate": 1.9972163857893103e-05, + "loss": 0.5948, + "step": 1829 + }, + { + "epoch": 0.05024711696869852, + "grad_norm": 0.3711458742618561, + "learning_rate": 1.9972131645790705e-05, + "loss": 0.5484, + "step": 1830 + }, + { + "epoch": 0.05027457440966502, + "grad_norm": 0.39244458079338074, + "learning_rate": 1.9972099415087102e-05, + "loss": 0.5983, + "step": 1831 + }, + { + "epoch": 0.05030203185063152, + "grad_norm": 0.38231831789016724, + "learning_rate": 1.997206716578235e-05, + "loss": 0.6482, + "step": 1832 + }, + { + "epoch": 0.05032948929159802, + "grad_norm": 0.379594624042511, + "learning_rate": 1.9972034897876507e-05, + "loss": 0.5437, + "step": 1833 + }, + { + "epoch": 0.05035694673256452, + "grad_norm": 0.3779386878013611, + "learning_rate": 1.9972002611369638e-05, + "loss": 0.6174, + "step": 1834 + }, + { + "epoch": 0.050384404173531024, + "grad_norm": 0.3438434898853302, + "learning_rate": 1.9971970306261804e-05, + "loss": 0.6091, + "step": 1835 + }, + { + "epoch": 0.05041186161449753, + "grad_norm": 0.3756929337978363, + "learning_rate": 1.9971937982553058e-05, + "loss": 0.5689, + "step": 1836 + }, + { + "epoch": 0.05043931905546403, + "grad_norm": 0.3578527271747589, + "learning_rate": 1.9971905640243468e-05, + "loss": 0.596, + "step": 1837 + }, + { + "epoch": 0.050466776496430533, + "grad_norm": 0.38765448331832886, + "learning_rate": 1.997187327933309e-05, + "loss": 0.6344, + "step": 1838 + }, + { + "epoch": 0.050494233937397034, + "grad_norm": 0.3439255356788635, + "learning_rate": 1.9971840899821986e-05, + "loss": 0.5319, + "step": 1839 + }, + { + "epoch": 0.050521691378363535, + "grad_norm": 0.3683120012283325, + "learning_rate": 1.9971808501710218e-05, + "loss": 0.4952, + "step": 1840 + }, + { + "epoch": 0.050549148819330036, + "grad_norm": 0.35682010650634766, + "learning_rate": 1.9971776084997844e-05, + "loss": 0.5197, + "step": 1841 + }, + { + "epoch": 0.05057660626029654, + "grad_norm": 0.4267752468585968, + "learning_rate": 1.997174364968492e-05, + "loss": 0.6842, + "step": 1842 + }, + { + "epoch": 0.050604063701263045, + "grad_norm": 0.3641623854637146, + "learning_rate": 1.9971711195771517e-05, + "loss": 0.5864, + "step": 1843 + }, + { + "epoch": 0.050631521142229546, + "grad_norm": 0.38410714268684387, + "learning_rate": 1.9971678723257687e-05, + "loss": 0.5848, + "step": 1844 + }, + { + "epoch": 0.05065897858319605, + "grad_norm": 0.4092848598957062, + "learning_rate": 1.9971646232143497e-05, + "loss": 0.6289, + "step": 1845 + }, + { + "epoch": 0.05068643602416255, + "grad_norm": 0.3922099769115448, + "learning_rate": 1.9971613722429003e-05, + "loss": 0.5204, + "step": 1846 + }, + { + "epoch": 0.05071389346512905, + "grad_norm": 0.348134845495224, + "learning_rate": 1.9971581194114264e-05, + "loss": 0.5128, + "step": 1847 + }, + { + "epoch": 0.05074135090609555, + "grad_norm": 0.35261133313179016, + "learning_rate": 1.9971548647199347e-05, + "loss": 0.5666, + "step": 1848 + }, + { + "epoch": 0.05076880834706205, + "grad_norm": 0.39459383487701416, + "learning_rate": 1.9971516081684307e-05, + "loss": 0.5857, + "step": 1849 + }, + { + "epoch": 0.05079626578802855, + "grad_norm": 0.35726556181907654, + "learning_rate": 1.9971483497569207e-05, + "loss": 0.4745, + "step": 1850 + }, + { + "epoch": 0.05082372322899506, + "grad_norm": 0.3958752751350403, + "learning_rate": 1.997145089485411e-05, + "loss": 0.6552, + "step": 1851 + }, + { + "epoch": 0.05085118066996156, + "grad_norm": 0.3550335466861725, + "learning_rate": 1.9971418273539074e-05, + "loss": 0.5541, + "step": 1852 + }, + { + "epoch": 0.05087863811092806, + "grad_norm": 0.34962233901023865, + "learning_rate": 1.9971385633624157e-05, + "loss": 0.5461, + "step": 1853 + }, + { + "epoch": 0.05090609555189456, + "grad_norm": 0.4213283360004425, + "learning_rate": 1.9971352975109427e-05, + "loss": 0.5573, + "step": 1854 + }, + { + "epoch": 0.050933552992861064, + "grad_norm": 0.36787307262420654, + "learning_rate": 1.9971320297994937e-05, + "loss": 0.5772, + "step": 1855 + }, + { + "epoch": 0.050961010433827565, + "grad_norm": 0.380305677652359, + "learning_rate": 1.997128760228076e-05, + "loss": 0.6275, + "step": 1856 + }, + { + "epoch": 0.050988467874794066, + "grad_norm": 0.4087391495704651, + "learning_rate": 1.9971254887966943e-05, + "loss": 0.6092, + "step": 1857 + }, + { + "epoch": 0.051015925315760574, + "grad_norm": 0.37025395035743713, + "learning_rate": 1.9971222155053555e-05, + "loss": 0.5622, + "step": 1858 + }, + { + "epoch": 0.051043382756727075, + "grad_norm": 0.3716244101524353, + "learning_rate": 1.9971189403540654e-05, + "loss": 0.6932, + "step": 1859 + }, + { + "epoch": 0.051070840197693576, + "grad_norm": 0.3616083562374115, + "learning_rate": 1.99711566334283e-05, + "loss": 0.555, + "step": 1860 + }, + { + "epoch": 0.05109829763866008, + "grad_norm": 0.33523502945899963, + "learning_rate": 1.9971123844716562e-05, + "loss": 0.533, + "step": 1861 + }, + { + "epoch": 0.05112575507962658, + "grad_norm": 0.3595324456691742, + "learning_rate": 1.997109103740549e-05, + "loss": 0.5759, + "step": 1862 + }, + { + "epoch": 0.05115321252059308, + "grad_norm": 0.3867854177951813, + "learning_rate": 1.9971058211495156e-05, + "loss": 0.6165, + "step": 1863 + }, + { + "epoch": 0.05118066996155958, + "grad_norm": 0.42644843459129333, + "learning_rate": 1.9971025366985614e-05, + "loss": 0.5937, + "step": 1864 + }, + { + "epoch": 0.05120812740252609, + "grad_norm": 0.37566736340522766, + "learning_rate": 1.997099250387693e-05, + "loss": 0.6033, + "step": 1865 + }, + { + "epoch": 0.05123558484349259, + "grad_norm": 0.3363610804080963, + "learning_rate": 1.997095962216916e-05, + "loss": 0.5124, + "step": 1866 + }, + { + "epoch": 0.05126304228445909, + "grad_norm": 0.331209272146225, + "learning_rate": 1.9970926721862365e-05, + "loss": 0.5225, + "step": 1867 + }, + { + "epoch": 0.05129049972542559, + "grad_norm": 0.3431551158428192, + "learning_rate": 1.9970893802956612e-05, + "loss": 0.5133, + "step": 1868 + }, + { + "epoch": 0.05131795716639209, + "grad_norm": 0.4807696044445038, + "learning_rate": 1.997086086545196e-05, + "loss": 0.4962, + "step": 1869 + }, + { + "epoch": 0.05134541460735859, + "grad_norm": 0.41332411766052246, + "learning_rate": 1.997082790934847e-05, + "loss": 0.6258, + "step": 1870 + }, + { + "epoch": 0.051372872048325094, + "grad_norm": 0.40335801243782043, + "learning_rate": 1.9970794934646206e-05, + "loss": 0.545, + "step": 1871 + }, + { + "epoch": 0.051400329489291595, + "grad_norm": 0.38011857867240906, + "learning_rate": 1.9970761941345223e-05, + "loss": 0.5686, + "step": 1872 + }, + { + "epoch": 0.0514277869302581, + "grad_norm": 0.35664230585098267, + "learning_rate": 1.997072892944559e-05, + "loss": 0.6033, + "step": 1873 + }, + { + "epoch": 0.051455244371224604, + "grad_norm": 0.37166547775268555, + "learning_rate": 1.997069589894736e-05, + "loss": 0.589, + "step": 1874 + }, + { + "epoch": 0.051482701812191105, + "grad_norm": 0.3257485330104828, + "learning_rate": 1.9970662849850607e-05, + "loss": 0.5676, + "step": 1875 + }, + { + "epoch": 0.051510159253157606, + "grad_norm": 0.4023725092411041, + "learning_rate": 1.997062978215538e-05, + "loss": 0.574, + "step": 1876 + }, + { + "epoch": 0.05153761669412411, + "grad_norm": 0.33804449439048767, + "learning_rate": 1.9970596695861748e-05, + "loss": 0.567, + "step": 1877 + }, + { + "epoch": 0.05156507413509061, + "grad_norm": 0.374999076128006, + "learning_rate": 1.9970563590969775e-05, + "loss": 0.5703, + "step": 1878 + }, + { + "epoch": 0.05159253157605711, + "grad_norm": 0.36269867420196533, + "learning_rate": 1.9970530467479513e-05, + "loss": 0.5653, + "step": 1879 + }, + { + "epoch": 0.05161998901702362, + "grad_norm": 0.3454335629940033, + "learning_rate": 1.997049732539103e-05, + "loss": 0.5238, + "step": 1880 + }, + { + "epoch": 0.05164744645799012, + "grad_norm": 0.34760940074920654, + "learning_rate": 1.9970464164704387e-05, + "loss": 0.5325, + "step": 1881 + }, + { + "epoch": 0.05167490389895662, + "grad_norm": 0.4599643647670746, + "learning_rate": 1.9970430985419652e-05, + "loss": 0.6429, + "step": 1882 + }, + { + "epoch": 0.05170236133992312, + "grad_norm": 0.37288418412208557, + "learning_rate": 1.9970397787536875e-05, + "loss": 0.5919, + "step": 1883 + }, + { + "epoch": 0.05172981878088962, + "grad_norm": 0.3666027784347534, + "learning_rate": 1.9970364571056128e-05, + "loss": 0.5327, + "step": 1884 + }, + { + "epoch": 0.05175727622185612, + "grad_norm": 0.4380134642124176, + "learning_rate": 1.9970331335977464e-05, + "loss": 0.6175, + "step": 1885 + }, + { + "epoch": 0.05178473366282262, + "grad_norm": 0.4335097372531891, + "learning_rate": 1.9970298082300956e-05, + "loss": 0.6043, + "step": 1886 + }, + { + "epoch": 0.051812191103789124, + "grad_norm": 0.3626493513584137, + "learning_rate": 1.9970264810026653e-05, + "loss": 0.5745, + "step": 1887 + }, + { + "epoch": 0.05183964854475563, + "grad_norm": 0.362321674823761, + "learning_rate": 1.9970231519154627e-05, + "loss": 0.5524, + "step": 1888 + }, + { + "epoch": 0.05186710598572213, + "grad_norm": 0.3227526843547821, + "learning_rate": 1.9970198209684936e-05, + "loss": 0.5296, + "step": 1889 + }, + { + "epoch": 0.051894563426688634, + "grad_norm": 0.3227916657924652, + "learning_rate": 1.9970164881617647e-05, + "loss": 0.4682, + "step": 1890 + }, + { + "epoch": 0.051922020867655135, + "grad_norm": 0.3825254440307617, + "learning_rate": 1.9970131534952815e-05, + "loss": 0.55, + "step": 1891 + }, + { + "epoch": 0.051949478308621636, + "grad_norm": 0.3648739755153656, + "learning_rate": 1.9970098169690506e-05, + "loss": 0.5656, + "step": 1892 + }, + { + "epoch": 0.051976935749588137, + "grad_norm": 0.40551474690437317, + "learning_rate": 1.9970064785830784e-05, + "loss": 0.5394, + "step": 1893 + }, + { + "epoch": 0.05200439319055464, + "grad_norm": 0.3743736743927002, + "learning_rate": 1.997003138337371e-05, + "loss": 0.5738, + "step": 1894 + }, + { + "epoch": 0.052031850631521145, + "grad_norm": 0.3500254154205322, + "learning_rate": 1.996999796231934e-05, + "loss": 0.5572, + "step": 1895 + }, + { + "epoch": 0.052059308072487646, + "grad_norm": 0.48554638028144836, + "learning_rate": 1.9969964522667747e-05, + "loss": 0.5766, + "step": 1896 + }, + { + "epoch": 0.05208676551345415, + "grad_norm": 0.42437511682510376, + "learning_rate": 1.9969931064418985e-05, + "loss": 0.5791, + "step": 1897 + }, + { + "epoch": 0.05211422295442065, + "grad_norm": 0.3432849049568176, + "learning_rate": 1.996989758757312e-05, + "loss": 0.5483, + "step": 1898 + }, + { + "epoch": 0.05214168039538715, + "grad_norm": 0.3748025596141815, + "learning_rate": 1.9969864092130217e-05, + "loss": 0.5151, + "step": 1899 + }, + { + "epoch": 0.05216913783635365, + "grad_norm": 0.47756150364875793, + "learning_rate": 1.996983057809033e-05, + "loss": 0.562, + "step": 1900 + }, + { + "epoch": 0.05219659527732015, + "grad_norm": 0.4014303982257843, + "learning_rate": 1.996979704545353e-05, + "loss": 0.6337, + "step": 1901 + }, + { + "epoch": 0.05222405271828665, + "grad_norm": 2.7102608680725098, + "learning_rate": 1.9969763494219878e-05, + "loss": 0.5327, + "step": 1902 + }, + { + "epoch": 0.05225151015925316, + "grad_norm": 0.3542889654636383, + "learning_rate": 1.9969729924389433e-05, + "loss": 0.6304, + "step": 1903 + }, + { + "epoch": 0.05227896760021966, + "grad_norm": 0.3389890193939209, + "learning_rate": 1.9969696335962258e-05, + "loss": 0.5501, + "step": 1904 + }, + { + "epoch": 0.05230642504118616, + "grad_norm": 0.43655356764793396, + "learning_rate": 1.996966272893842e-05, + "loss": 0.6864, + "step": 1905 + }, + { + "epoch": 0.05233388248215266, + "grad_norm": 0.3507354259490967, + "learning_rate": 1.996962910331798e-05, + "loss": 0.5368, + "step": 1906 + }, + { + "epoch": 0.052361339923119164, + "grad_norm": 0.36367684602737427, + "learning_rate": 1.9969595459100998e-05, + "loss": 0.5665, + "step": 1907 + }, + { + "epoch": 0.052388797364085665, + "grad_norm": 0.3746825158596039, + "learning_rate": 1.9969561796287538e-05, + "loss": 0.4528, + "step": 1908 + }, + { + "epoch": 0.052416254805052166, + "grad_norm": 0.38441458344459534, + "learning_rate": 1.9969528114877667e-05, + "loss": 0.5754, + "step": 1909 + }, + { + "epoch": 0.052443712246018674, + "grad_norm": 0.4207031726837158, + "learning_rate": 1.996949441487144e-05, + "loss": 0.5201, + "step": 1910 + }, + { + "epoch": 0.052471169686985175, + "grad_norm": 0.4016045331954956, + "learning_rate": 1.9969460696268926e-05, + "loss": 0.6386, + "step": 1911 + }, + { + "epoch": 0.052498627127951676, + "grad_norm": 0.36780083179473877, + "learning_rate": 1.9969426959070185e-05, + "loss": 0.5646, + "step": 1912 + }, + { + "epoch": 0.05252608456891818, + "grad_norm": 0.36451947689056396, + "learning_rate": 1.996939320327528e-05, + "loss": 0.5155, + "step": 1913 + }, + { + "epoch": 0.05255354200988468, + "grad_norm": 0.39193567633628845, + "learning_rate": 1.9969359428884277e-05, + "loss": 0.56, + "step": 1914 + }, + { + "epoch": 0.05258099945085118, + "grad_norm": 0.3539312183856964, + "learning_rate": 1.9969325635897236e-05, + "loss": 0.5687, + "step": 1915 + }, + { + "epoch": 0.05260845689181768, + "grad_norm": 0.3835406005382538, + "learning_rate": 1.996929182431422e-05, + "loss": 0.5404, + "step": 1916 + }, + { + "epoch": 0.05263591433278418, + "grad_norm": 0.35497432947158813, + "learning_rate": 1.9969257994135293e-05, + "loss": 0.4967, + "step": 1917 + }, + { + "epoch": 0.05266337177375069, + "grad_norm": 0.3840617537498474, + "learning_rate": 1.996922414536052e-05, + "loss": 0.5767, + "step": 1918 + }, + { + "epoch": 0.05269082921471719, + "grad_norm": 0.3723994791507721, + "learning_rate": 1.996919027798996e-05, + "loss": 0.4957, + "step": 1919 + }, + { + "epoch": 0.05271828665568369, + "grad_norm": 0.3579917848110199, + "learning_rate": 1.9969156392023682e-05, + "loss": 0.5527, + "step": 1920 + }, + { + "epoch": 0.05274574409665019, + "grad_norm": 0.39644885063171387, + "learning_rate": 1.996912248746174e-05, + "loss": 0.6028, + "step": 1921 + }, + { + "epoch": 0.05277320153761669, + "grad_norm": 0.36882805824279785, + "learning_rate": 1.9969088564304204e-05, + "loss": 0.5815, + "step": 1922 + }, + { + "epoch": 0.052800658978583194, + "grad_norm": 0.39110493659973145, + "learning_rate": 1.996905462255114e-05, + "loss": 0.5948, + "step": 1923 + }, + { + "epoch": 0.052828116419549695, + "grad_norm": 0.3762013614177704, + "learning_rate": 1.9969020662202606e-05, + "loss": 0.5491, + "step": 1924 + }, + { + "epoch": 0.0528555738605162, + "grad_norm": 0.3976396322250366, + "learning_rate": 1.9968986683258666e-05, + "loss": 0.6082, + "step": 1925 + }, + { + "epoch": 0.052883031301482704, + "grad_norm": 0.38490593433380127, + "learning_rate": 1.996895268571938e-05, + "loss": 0.5644, + "step": 1926 + }, + { + "epoch": 0.052910488742449205, + "grad_norm": 0.4161694645881653, + "learning_rate": 1.9968918669584824e-05, + "loss": 0.5839, + "step": 1927 + }, + { + "epoch": 0.052937946183415706, + "grad_norm": 0.3447498083114624, + "learning_rate": 1.9968884634855047e-05, + "loss": 0.5461, + "step": 1928 + }, + { + "epoch": 0.05296540362438221, + "grad_norm": 0.40057653188705444, + "learning_rate": 1.996885058153012e-05, + "loss": 0.5956, + "step": 1929 + }, + { + "epoch": 0.05299286106534871, + "grad_norm": 0.3923904001712799, + "learning_rate": 1.9968816509610103e-05, + "loss": 0.6227, + "step": 1930 + }, + { + "epoch": 0.05302031850631521, + "grad_norm": 0.4147428870201111, + "learning_rate": 1.996878241909506e-05, + "loss": 0.6155, + "step": 1931 + }, + { + "epoch": 0.05304777594728172, + "grad_norm": 0.32197341322898865, + "learning_rate": 1.9968748309985062e-05, + "loss": 0.4848, + "step": 1932 + }, + { + "epoch": 0.05307523338824822, + "grad_norm": 0.3821795880794525, + "learning_rate": 1.9968714182280165e-05, + "loss": 0.5429, + "step": 1933 + }, + { + "epoch": 0.05310269082921472, + "grad_norm": 0.4186863899230957, + "learning_rate": 1.9968680035980434e-05, + "loss": 0.5628, + "step": 1934 + }, + { + "epoch": 0.05313014827018122, + "grad_norm": 0.3584262430667877, + "learning_rate": 1.9968645871085932e-05, + "loss": 0.5923, + "step": 1935 + }, + { + "epoch": 0.05315760571114772, + "grad_norm": 0.3366561830043793, + "learning_rate": 1.9968611687596722e-05, + "loss": 0.4573, + "step": 1936 + }, + { + "epoch": 0.05318506315211422, + "grad_norm": 0.3582332730293274, + "learning_rate": 1.9968577485512876e-05, + "loss": 0.5626, + "step": 1937 + }, + { + "epoch": 0.05321252059308072, + "grad_norm": 0.3172548711299896, + "learning_rate": 1.9968543264834444e-05, + "loss": 0.512, + "step": 1938 + }, + { + "epoch": 0.053239978034047224, + "grad_norm": 0.3788275420665741, + "learning_rate": 1.99685090255615e-05, + "loss": 0.4983, + "step": 1939 + }, + { + "epoch": 0.05326743547501373, + "grad_norm": 0.3999404013156891, + "learning_rate": 1.9968474767694108e-05, + "loss": 0.5089, + "step": 1940 + }, + { + "epoch": 0.05329489291598023, + "grad_norm": 0.3673028349876404, + "learning_rate": 1.9968440491232326e-05, + "loss": 0.534, + "step": 1941 + }, + { + "epoch": 0.053322350356946734, + "grad_norm": 0.4109498858451843, + "learning_rate": 1.9968406196176222e-05, + "loss": 0.596, + "step": 1942 + }, + { + "epoch": 0.053349807797913235, + "grad_norm": 0.3752717971801758, + "learning_rate": 1.9968371882525858e-05, + "loss": 0.5523, + "step": 1943 + }, + { + "epoch": 0.053377265238879736, + "grad_norm": 0.332479864358902, + "learning_rate": 1.99683375502813e-05, + "loss": 0.5534, + "step": 1944 + }, + { + "epoch": 0.05340472267984624, + "grad_norm": 0.4025343358516693, + "learning_rate": 1.9968303199442613e-05, + "loss": 0.5511, + "step": 1945 + }, + { + "epoch": 0.05343218012081274, + "grad_norm": 0.36609092354774475, + "learning_rate": 1.9968268830009854e-05, + "loss": 0.5352, + "step": 1946 + }, + { + "epoch": 0.053459637561779245, + "grad_norm": 0.4145871698856354, + "learning_rate": 1.9968234441983096e-05, + "loss": 0.6911, + "step": 1947 + }, + { + "epoch": 0.053487095002745746, + "grad_norm": 0.36430829763412476, + "learning_rate": 1.9968200035362395e-05, + "loss": 0.5811, + "step": 1948 + }, + { + "epoch": 0.05351455244371225, + "grad_norm": 0.38215717673301697, + "learning_rate": 1.9968165610147824e-05, + "loss": 0.5603, + "step": 1949 + }, + { + "epoch": 0.05354200988467875, + "grad_norm": 0.3737226724624634, + "learning_rate": 1.996813116633944e-05, + "loss": 0.5536, + "step": 1950 + }, + { + "epoch": 0.05356946732564525, + "grad_norm": 1.022645115852356, + "learning_rate": 1.996809670393731e-05, + "loss": 0.5749, + "step": 1951 + }, + { + "epoch": 0.05359692476661175, + "grad_norm": 0.3291313648223877, + "learning_rate": 1.99680622229415e-05, + "loss": 0.5191, + "step": 1952 + }, + { + "epoch": 0.05362438220757825, + "grad_norm": 0.37627366185188293, + "learning_rate": 1.9968027723352073e-05, + "loss": 0.5853, + "step": 1953 + }, + { + "epoch": 0.05365183964854475, + "grad_norm": 0.3818928301334381, + "learning_rate": 1.996799320516909e-05, + "loss": 0.5545, + "step": 1954 + }, + { + "epoch": 0.05367929708951126, + "grad_norm": 0.3633921444416046, + "learning_rate": 1.996795866839262e-05, + "loss": 0.4966, + "step": 1955 + }, + { + "epoch": 0.05370675453047776, + "grad_norm": 0.3660745918750763, + "learning_rate": 1.9967924113022725e-05, + "loss": 0.5228, + "step": 1956 + }, + { + "epoch": 0.05373421197144426, + "grad_norm": 0.353555828332901, + "learning_rate": 1.996788953905947e-05, + "loss": 0.5138, + "step": 1957 + }, + { + "epoch": 0.05376166941241076, + "grad_norm": 0.3804539442062378, + "learning_rate": 1.996785494650292e-05, + "loss": 0.5823, + "step": 1958 + }, + { + "epoch": 0.053789126853377264, + "grad_norm": 0.4119977355003357, + "learning_rate": 1.996782033535314e-05, + "loss": 0.4778, + "step": 1959 + }, + { + "epoch": 0.053816584294343765, + "grad_norm": 0.39536595344543457, + "learning_rate": 1.9967785705610193e-05, + "loss": 0.6366, + "step": 1960 + }, + { + "epoch": 0.053844041735310266, + "grad_norm": 0.3434202969074249, + "learning_rate": 1.9967751057274147e-05, + "loss": 0.5028, + "step": 1961 + }, + { + "epoch": 0.053871499176276774, + "grad_norm": 0.380874902009964, + "learning_rate": 1.9967716390345056e-05, + "loss": 0.5961, + "step": 1962 + }, + { + "epoch": 0.053898956617243275, + "grad_norm": 0.33766821026802063, + "learning_rate": 1.9967681704823e-05, + "loss": 0.645, + "step": 1963 + }, + { + "epoch": 0.053926414058209776, + "grad_norm": 0.3723110556602478, + "learning_rate": 1.9967647000708035e-05, + "loss": 0.569, + "step": 1964 + }, + { + "epoch": 0.05395387149917628, + "grad_norm": 0.5142785310745239, + "learning_rate": 1.996761227800023e-05, + "loss": 0.6009, + "step": 1965 + }, + { + "epoch": 0.05398132894014278, + "grad_norm": 0.3816337287425995, + "learning_rate": 1.996757753669964e-05, + "loss": 0.5039, + "step": 1966 + }, + { + "epoch": 0.05400878638110928, + "grad_norm": 0.39781779050827026, + "learning_rate": 1.996754277680634e-05, + "loss": 0.5758, + "step": 1967 + }, + { + "epoch": 0.05403624382207578, + "grad_norm": 0.33051854372024536, + "learning_rate": 1.996750799832039e-05, + "loss": 0.5256, + "step": 1968 + }, + { + "epoch": 0.05406370126304228, + "grad_norm": 0.3353223204612732, + "learning_rate": 1.9967473201241856e-05, + "loss": 0.5441, + "step": 1969 + }, + { + "epoch": 0.05409115870400879, + "grad_norm": 0.3472880423069, + "learning_rate": 1.996743838557081e-05, + "loss": 0.5482, + "step": 1970 + }, + { + "epoch": 0.05411861614497529, + "grad_norm": 0.32928189635276794, + "learning_rate": 1.99674035513073e-05, + "loss": 0.544, + "step": 1971 + }, + { + "epoch": 0.05414607358594179, + "grad_norm": 0.3723282814025879, + "learning_rate": 1.996736869845141e-05, + "loss": 0.6027, + "step": 1972 + }, + { + "epoch": 0.05417353102690829, + "grad_norm": 0.37033510208129883, + "learning_rate": 1.996733382700319e-05, + "loss": 0.5398, + "step": 1973 + }, + { + "epoch": 0.05420098846787479, + "grad_norm": 0.33934634923934937, + "learning_rate": 1.9967298936962712e-05, + "loss": 0.4855, + "step": 1974 + }, + { + "epoch": 0.054228445908841294, + "grad_norm": 0.33855217695236206, + "learning_rate": 1.9967264028330043e-05, + "loss": 0.5237, + "step": 1975 + }, + { + "epoch": 0.054255903349807795, + "grad_norm": 0.39195215702056885, + "learning_rate": 1.9967229101105244e-05, + "loss": 0.5544, + "step": 1976 + }, + { + "epoch": 0.0542833607907743, + "grad_norm": 0.43763822317123413, + "learning_rate": 1.996719415528838e-05, + "loss": 0.5685, + "step": 1977 + }, + { + "epoch": 0.054310818231740804, + "grad_norm": 0.3820432722568512, + "learning_rate": 1.996715919087952e-05, + "loss": 0.5293, + "step": 1978 + }, + { + "epoch": 0.054338275672707305, + "grad_norm": 0.49195462465286255, + "learning_rate": 1.9967124207878727e-05, + "loss": 0.5789, + "step": 1979 + }, + { + "epoch": 0.054365733113673806, + "grad_norm": 0.38214603066444397, + "learning_rate": 1.9967089206286062e-05, + "loss": 0.5677, + "step": 1980 + }, + { + "epoch": 0.05439319055464031, + "grad_norm": 0.3766654133796692, + "learning_rate": 1.99670541861016e-05, + "loss": 0.543, + "step": 1981 + }, + { + "epoch": 0.05442064799560681, + "grad_norm": 0.3439669907093048, + "learning_rate": 1.99670191473254e-05, + "loss": 0.5434, + "step": 1982 + }, + { + "epoch": 0.05444810543657331, + "grad_norm": 0.394232839345932, + "learning_rate": 1.9966984089957525e-05, + "loss": 0.5771, + "step": 1983 + }, + { + "epoch": 0.05447556287753981, + "grad_norm": 0.37702706456184387, + "learning_rate": 1.9966949013998047e-05, + "loss": 0.5724, + "step": 1984 + }, + { + "epoch": 0.05450302031850632, + "grad_norm": 0.43418416380882263, + "learning_rate": 1.9966913919447026e-05, + "loss": 0.6491, + "step": 1985 + }, + { + "epoch": 0.05453047775947282, + "grad_norm": 0.43077394366264343, + "learning_rate": 1.996687880630453e-05, + "loss": 0.6297, + "step": 1986 + }, + { + "epoch": 0.05455793520043932, + "grad_norm": 0.3564531207084656, + "learning_rate": 1.9966843674570623e-05, + "loss": 0.5039, + "step": 1987 + }, + { + "epoch": 0.05458539264140582, + "grad_norm": 0.39182329177856445, + "learning_rate": 1.9966808524245373e-05, + "loss": 0.5408, + "step": 1988 + }, + { + "epoch": 0.05461285008237232, + "grad_norm": 0.4099125564098358, + "learning_rate": 1.9966773355328847e-05, + "loss": 0.6587, + "step": 1989 + }, + { + "epoch": 0.05464030752333882, + "grad_norm": 0.3784736096858978, + "learning_rate": 1.9966738167821103e-05, + "loss": 0.5272, + "step": 1990 + }, + { + "epoch": 0.054667764964305324, + "grad_norm": 0.40466517210006714, + "learning_rate": 1.996670296172221e-05, + "loss": 0.6046, + "step": 1991 + }, + { + "epoch": 0.05469522240527183, + "grad_norm": 0.35350197553634644, + "learning_rate": 1.996666773703224e-05, + "loss": 0.565, + "step": 1992 + }, + { + "epoch": 0.05472267984623833, + "grad_norm": 0.3548986315727234, + "learning_rate": 1.996663249375125e-05, + "loss": 0.5458, + "step": 1993 + }, + { + "epoch": 0.054750137287204834, + "grad_norm": 0.3931201994419098, + "learning_rate": 1.9966597231879313e-05, + "loss": 0.6154, + "step": 1994 + }, + { + "epoch": 0.054777594728171335, + "grad_norm": 0.32984408736228943, + "learning_rate": 1.9966561951416486e-05, + "loss": 0.4501, + "step": 1995 + }, + { + "epoch": 0.054805052169137836, + "grad_norm": 0.36772221326828003, + "learning_rate": 1.9966526652362845e-05, + "loss": 0.5679, + "step": 1996 + }, + { + "epoch": 0.05483250961010434, + "grad_norm": 0.5863059163093567, + "learning_rate": 1.9966491334718447e-05, + "loss": 0.5416, + "step": 1997 + }, + { + "epoch": 0.05485996705107084, + "grad_norm": 0.35672613978385925, + "learning_rate": 1.9966455998483366e-05, + "loss": 0.5438, + "step": 1998 + }, + { + "epoch": 0.054887424492037346, + "grad_norm": 0.3641097843647003, + "learning_rate": 1.9966420643657657e-05, + "loss": 0.5081, + "step": 1999 + }, + { + "epoch": 0.054914881933003847, + "grad_norm": 0.3254946768283844, + "learning_rate": 1.99663852702414e-05, + "loss": 0.5307, + "step": 2000 + }, + { + "epoch": 0.05494233937397035, + "grad_norm": 0.39165404438972473, + "learning_rate": 1.996634987823465e-05, + "loss": 0.5616, + "step": 2001 + }, + { + "epoch": 0.05496979681493685, + "grad_norm": 0.3822854161262512, + "learning_rate": 1.996631446763748e-05, + "loss": 0.626, + "step": 2002 + }, + { + "epoch": 0.05499725425590335, + "grad_norm": 0.3435845673084259, + "learning_rate": 1.9966279038449947e-05, + "loss": 0.5283, + "step": 2003 + }, + { + "epoch": 0.05502471169686985, + "grad_norm": 0.3328026533126831, + "learning_rate": 1.9966243590672123e-05, + "loss": 0.611, + "step": 2004 + }, + { + "epoch": 0.05505216913783635, + "grad_norm": 0.3749738335609436, + "learning_rate": 1.9966208124304078e-05, + "loss": 0.5491, + "step": 2005 + }, + { + "epoch": 0.05507962657880285, + "grad_norm": 0.38101232051849365, + "learning_rate": 1.996617263934587e-05, + "loss": 0.6033, + "step": 2006 + }, + { + "epoch": 0.05510708401976936, + "grad_norm": 0.3583291471004486, + "learning_rate": 1.9966137135797572e-05, + "loss": 0.5916, + "step": 2007 + }, + { + "epoch": 0.05513454146073586, + "grad_norm": 0.3315559923648834, + "learning_rate": 1.9966101613659247e-05, + "loss": 0.5326, + "step": 2008 + }, + { + "epoch": 0.05516199890170236, + "grad_norm": 0.38871026039123535, + "learning_rate": 1.9966066072930962e-05, + "loss": 0.5678, + "step": 2009 + }, + { + "epoch": 0.05518945634266886, + "grad_norm": 0.45660313963890076, + "learning_rate": 1.996603051361278e-05, + "loss": 0.6601, + "step": 2010 + }, + { + "epoch": 0.055216913783635364, + "grad_norm": 0.4075772762298584, + "learning_rate": 1.9965994935704773e-05, + "loss": 0.6638, + "step": 2011 + }, + { + "epoch": 0.055244371224601865, + "grad_norm": 0.4331810176372528, + "learning_rate": 1.9965959339207005e-05, + "loss": 0.6178, + "step": 2012 + }, + { + "epoch": 0.055271828665568366, + "grad_norm": 0.33049675822257996, + "learning_rate": 1.996592372411954e-05, + "loss": 0.5172, + "step": 2013 + }, + { + "epoch": 0.055299286106534874, + "grad_norm": 0.3745536506175995, + "learning_rate": 1.996588809044245e-05, + "loss": 0.5338, + "step": 2014 + }, + { + "epoch": 0.055326743547501375, + "grad_norm": 0.3538389801979065, + "learning_rate": 1.9965852438175795e-05, + "loss": 0.5499, + "step": 2015 + }, + { + "epoch": 0.055354200988467876, + "grad_norm": 0.32021448016166687, + "learning_rate": 1.9965816767319647e-05, + "loss": 0.6278, + "step": 2016 + }, + { + "epoch": 0.05538165842943438, + "grad_norm": 0.3408103585243225, + "learning_rate": 1.9965781077874067e-05, + "loss": 0.4662, + "step": 2017 + }, + { + "epoch": 0.05540911587040088, + "grad_norm": 0.4726349413394928, + "learning_rate": 1.9965745369839126e-05, + "loss": 0.5268, + "step": 2018 + }, + { + "epoch": 0.05543657331136738, + "grad_norm": 0.37714600563049316, + "learning_rate": 1.9965709643214888e-05, + "loss": 0.5525, + "step": 2019 + }, + { + "epoch": 0.05546403075233388, + "grad_norm": 0.3966827392578125, + "learning_rate": 1.9965673898001424e-05, + "loss": 0.5729, + "step": 2020 + }, + { + "epoch": 0.05549148819330038, + "grad_norm": 0.47160837054252625, + "learning_rate": 1.9965638134198792e-05, + "loss": 0.5332, + "step": 2021 + }, + { + "epoch": 0.05551894563426689, + "grad_norm": 0.45000022649765015, + "learning_rate": 1.996560235180707e-05, + "loss": 0.6143, + "step": 2022 + }, + { + "epoch": 0.05554640307523339, + "grad_norm": 0.35568341612815857, + "learning_rate": 1.9965566550826316e-05, + "loss": 0.5013, + "step": 2023 + }, + { + "epoch": 0.05557386051619989, + "grad_norm": 0.4731135964393616, + "learning_rate": 1.99655307312566e-05, + "loss": 0.5195, + "step": 2024 + }, + { + "epoch": 0.05560131795716639, + "grad_norm": 0.3742121160030365, + "learning_rate": 1.996549489309799e-05, + "loss": 0.5551, + "step": 2025 + }, + { + "epoch": 0.05562877539813289, + "grad_norm": 0.3754158914089203, + "learning_rate": 1.996545903635055e-05, + "loss": 0.4215, + "step": 2026 + }, + { + "epoch": 0.055656232839099394, + "grad_norm": 0.3337489366531372, + "learning_rate": 1.996542316101435e-05, + "loss": 0.4872, + "step": 2027 + }, + { + "epoch": 0.055683690280065895, + "grad_norm": 0.3623185157775879, + "learning_rate": 1.9965387267089453e-05, + "loss": 0.5688, + "step": 2028 + }, + { + "epoch": 0.0557111477210324, + "grad_norm": 0.36729708313941956, + "learning_rate": 1.996535135457593e-05, + "loss": 0.527, + "step": 2029 + }, + { + "epoch": 0.055738605161998904, + "grad_norm": 0.3253529667854309, + "learning_rate": 1.9965315423473842e-05, + "loss": 0.5183, + "step": 2030 + }, + { + "epoch": 0.055766062602965405, + "grad_norm": 0.38443174958229065, + "learning_rate": 1.9965279473783267e-05, + "loss": 0.6316, + "step": 2031 + }, + { + "epoch": 0.055793520043931906, + "grad_norm": 0.5800321698188782, + "learning_rate": 1.996524350550426e-05, + "loss": 0.5619, + "step": 2032 + }, + { + "epoch": 0.05582097748489841, + "grad_norm": 0.6226053833961487, + "learning_rate": 1.9965207518636897e-05, + "loss": 0.5755, + "step": 2033 + }, + { + "epoch": 0.05584843492586491, + "grad_norm": 0.33981379866600037, + "learning_rate": 1.9965171513181237e-05, + "loss": 0.5159, + "step": 2034 + }, + { + "epoch": 0.05587589236683141, + "grad_norm": 0.4213826060295105, + "learning_rate": 1.996513548913735e-05, + "loss": 0.6902, + "step": 2035 + }, + { + "epoch": 0.05590334980779791, + "grad_norm": 0.45275676250457764, + "learning_rate": 1.996509944650531e-05, + "loss": 0.4717, + "step": 2036 + }, + { + "epoch": 0.05593080724876442, + "grad_norm": 0.366296648979187, + "learning_rate": 1.996506338528518e-05, + "loss": 0.6725, + "step": 2037 + }, + { + "epoch": 0.05595826468973092, + "grad_norm": 0.3707183301448822, + "learning_rate": 1.9965027305477025e-05, + "loss": 0.6182, + "step": 2038 + }, + { + "epoch": 0.05598572213069742, + "grad_norm": 0.3783794045448303, + "learning_rate": 1.9964991207080912e-05, + "loss": 0.6264, + "step": 2039 + }, + { + "epoch": 0.05601317957166392, + "grad_norm": 0.3620065152645111, + "learning_rate": 1.996495509009691e-05, + "loss": 0.6018, + "step": 2040 + }, + { + "epoch": 0.05604063701263042, + "grad_norm": 0.3712809979915619, + "learning_rate": 1.9964918954525086e-05, + "loss": 0.5817, + "step": 2041 + }, + { + "epoch": 0.05606809445359692, + "grad_norm": 0.39888373017311096, + "learning_rate": 1.9964882800365508e-05, + "loss": 0.6361, + "step": 2042 + }, + { + "epoch": 0.056095551894563424, + "grad_norm": 0.42834100127220154, + "learning_rate": 1.9964846627618246e-05, + "loss": 0.5717, + "step": 2043 + }, + { + "epoch": 0.05612300933552993, + "grad_norm": 0.3709574341773987, + "learning_rate": 1.996481043628336e-05, + "loss": 0.5782, + "step": 2044 + }, + { + "epoch": 0.05615046677649643, + "grad_norm": 0.35595080256462097, + "learning_rate": 1.9964774226360927e-05, + "loss": 0.4876, + "step": 2045 + }, + { + "epoch": 0.056177924217462934, + "grad_norm": 0.42987295985221863, + "learning_rate": 1.9964737997851006e-05, + "loss": 0.6421, + "step": 2046 + }, + { + "epoch": 0.056205381658429435, + "grad_norm": 0.37864950299263, + "learning_rate": 1.996470175075367e-05, + "loss": 0.6578, + "step": 2047 + }, + { + "epoch": 0.056232839099395936, + "grad_norm": 0.3595561981201172, + "learning_rate": 1.9964665485068985e-05, + "loss": 0.4964, + "step": 2048 + }, + { + "epoch": 0.05626029654036244, + "grad_norm": 0.3331921398639679, + "learning_rate": 1.9964629200797015e-05, + "loss": 0.5776, + "step": 2049 + }, + { + "epoch": 0.05628775398132894, + "grad_norm": 0.358014315366745, + "learning_rate": 1.9964592897937835e-05, + "loss": 0.5617, + "step": 2050 + }, + { + "epoch": 0.05631521142229544, + "grad_norm": 0.3295709788799286, + "learning_rate": 1.996455657649151e-05, + "loss": 0.4767, + "step": 2051 + }, + { + "epoch": 0.05634266886326195, + "grad_norm": 0.3911502957344055, + "learning_rate": 1.9964520236458104e-05, + "loss": 0.4986, + "step": 2052 + }, + { + "epoch": 0.05637012630422845, + "grad_norm": 0.3673626184463501, + "learning_rate": 1.9964483877837688e-05, + "loss": 0.5375, + "step": 2053 + }, + { + "epoch": 0.05639758374519495, + "grad_norm": 0.3907355070114136, + "learning_rate": 1.996444750063033e-05, + "loss": 0.5577, + "step": 2054 + }, + { + "epoch": 0.05642504118616145, + "grad_norm": 0.7137215733528137, + "learning_rate": 1.9964411104836097e-05, + "loss": 0.6828, + "step": 2055 + }, + { + "epoch": 0.05645249862712795, + "grad_norm": 0.4165397882461548, + "learning_rate": 1.9964374690455055e-05, + "loss": 0.673, + "step": 2056 + }, + { + "epoch": 0.05647995606809445, + "grad_norm": 0.42495349049568176, + "learning_rate": 1.9964338257487274e-05, + "loss": 0.5968, + "step": 2057 + }, + { + "epoch": 0.05650741350906095, + "grad_norm": 0.4476298689842224, + "learning_rate": 1.9964301805932826e-05, + "loss": 0.6406, + "step": 2058 + }, + { + "epoch": 0.05653487095002746, + "grad_norm": 0.3488427698612213, + "learning_rate": 1.996426533579177e-05, + "loss": 0.5124, + "step": 2059 + }, + { + "epoch": 0.05656232839099396, + "grad_norm": 0.37953710556030273, + "learning_rate": 1.9964228847064183e-05, + "loss": 0.5728, + "step": 2060 + }, + { + "epoch": 0.05658978583196046, + "grad_norm": 0.32830098271369934, + "learning_rate": 1.996419233975013e-05, + "loss": 0.5364, + "step": 2061 + }, + { + "epoch": 0.05661724327292696, + "grad_norm": 0.47980713844299316, + "learning_rate": 1.9964155813849674e-05, + "loss": 0.5793, + "step": 2062 + }, + { + "epoch": 0.056644700713893464, + "grad_norm": 0.3663296401500702, + "learning_rate": 1.9964119269362887e-05, + "loss": 0.5166, + "step": 2063 + }, + { + "epoch": 0.056672158154859965, + "grad_norm": 0.6913097500801086, + "learning_rate": 1.996408270628984e-05, + "loss": 0.592, + "step": 2064 + }, + { + "epoch": 0.056699615595826466, + "grad_norm": 0.3918541371822357, + "learning_rate": 1.9964046124630595e-05, + "loss": 0.5815, + "step": 2065 + }, + { + "epoch": 0.056727073036792974, + "grad_norm": 0.3882789611816406, + "learning_rate": 1.9964009524385228e-05, + "loss": 0.5611, + "step": 2066 + }, + { + "epoch": 0.056754530477759475, + "grad_norm": 0.3591477572917938, + "learning_rate": 1.99639729055538e-05, + "loss": 0.5299, + "step": 2067 + }, + { + "epoch": 0.056781987918725976, + "grad_norm": 0.34083282947540283, + "learning_rate": 1.9963936268136383e-05, + "loss": 0.6021, + "step": 2068 + }, + { + "epoch": 0.05680944535969248, + "grad_norm": 0.3997838795185089, + "learning_rate": 1.996389961213305e-05, + "loss": 0.6232, + "step": 2069 + }, + { + "epoch": 0.05683690280065898, + "grad_norm": 0.38902944326400757, + "learning_rate": 1.9963862937543855e-05, + "loss": 0.662, + "step": 2070 + }, + { + "epoch": 0.05686436024162548, + "grad_norm": 0.41650712490081787, + "learning_rate": 1.996382624436888e-05, + "loss": 0.6102, + "step": 2071 + }, + { + "epoch": 0.05689181768259198, + "grad_norm": 0.4586417078971863, + "learning_rate": 1.996378953260819e-05, + "loss": 0.6335, + "step": 2072 + }, + { + "epoch": 0.05691927512355848, + "grad_norm": 0.38544201850891113, + "learning_rate": 1.9963752802261854e-05, + "loss": 0.5136, + "step": 2073 + }, + { + "epoch": 0.05694673256452499, + "grad_norm": 0.3732902407646179, + "learning_rate": 1.9963716053329937e-05, + "loss": 0.603, + "step": 2074 + }, + { + "epoch": 0.05697419000549149, + "grad_norm": 0.42070090770721436, + "learning_rate": 1.996367928581251e-05, + "loss": 0.4846, + "step": 2075 + }, + { + "epoch": 0.05700164744645799, + "grad_norm": 0.380567342042923, + "learning_rate": 1.996364249970964e-05, + "loss": 0.5739, + "step": 2076 + }, + { + "epoch": 0.05702910488742449, + "grad_norm": 0.37890395522117615, + "learning_rate": 1.9963605695021396e-05, + "loss": 0.619, + "step": 2077 + }, + { + "epoch": 0.05705656232839099, + "grad_norm": 0.3676146864891052, + "learning_rate": 1.9963568871747846e-05, + "loss": 0.4917, + "step": 2078 + }, + { + "epoch": 0.057084019769357494, + "grad_norm": 0.3497373163700104, + "learning_rate": 1.9963532029889062e-05, + "loss": 0.493, + "step": 2079 + }, + { + "epoch": 0.057111477210323995, + "grad_norm": 0.5582741498947144, + "learning_rate": 1.996349516944511e-05, + "loss": 0.5268, + "step": 2080 + }, + { + "epoch": 0.0571389346512905, + "grad_norm": 0.35925808548927307, + "learning_rate": 1.9963458290416066e-05, + "loss": 0.5935, + "step": 2081 + }, + { + "epoch": 0.057166392092257004, + "grad_norm": 0.371040940284729, + "learning_rate": 1.9963421392801985e-05, + "loss": 0.6186, + "step": 2082 + }, + { + "epoch": 0.057193849533223505, + "grad_norm": 0.35558146238327026, + "learning_rate": 1.9963384476602944e-05, + "loss": 0.5648, + "step": 2083 + }, + { + "epoch": 0.057221306974190006, + "grad_norm": 0.34265777468681335, + "learning_rate": 1.9963347541819012e-05, + "loss": 0.5956, + "step": 2084 + }, + { + "epoch": 0.05724876441515651, + "grad_norm": 0.39103201031684875, + "learning_rate": 1.9963310588450258e-05, + "loss": 0.5989, + "step": 2085 + }, + { + "epoch": 0.05727622185612301, + "grad_norm": 0.32918792963027954, + "learning_rate": 1.9963273616496747e-05, + "loss": 0.611, + "step": 2086 + }, + { + "epoch": 0.05730367929708951, + "grad_norm": 0.38253819942474365, + "learning_rate": 1.9963236625958555e-05, + "loss": 0.5723, + "step": 2087 + }, + { + "epoch": 0.05733113673805601, + "grad_norm": 0.373649001121521, + "learning_rate": 1.9963199616835745e-05, + "loss": 0.5901, + "step": 2088 + }, + { + "epoch": 0.05735859417902252, + "grad_norm": 0.3540079593658447, + "learning_rate": 1.9963162589128388e-05, + "loss": 0.6477, + "step": 2089 + }, + { + "epoch": 0.05738605161998902, + "grad_norm": 0.40538671612739563, + "learning_rate": 1.996312554283655e-05, + "loss": 0.6546, + "step": 2090 + }, + { + "epoch": 0.05741350906095552, + "grad_norm": 0.35379964113235474, + "learning_rate": 1.9963088477960305e-05, + "loss": 0.5089, + "step": 2091 + }, + { + "epoch": 0.05744096650192202, + "grad_norm": 0.334396094083786, + "learning_rate": 1.9963051394499718e-05, + "loss": 0.5228, + "step": 2092 + }, + { + "epoch": 0.05746842394288852, + "grad_norm": 0.3276139497756958, + "learning_rate": 1.9963014292454863e-05, + "loss": 0.5395, + "step": 2093 + }, + { + "epoch": 0.05749588138385502, + "grad_norm": 0.37385308742523193, + "learning_rate": 1.996297717182581e-05, + "loss": 0.4796, + "step": 2094 + }, + { + "epoch": 0.057523338824821524, + "grad_norm": 0.38942891359329224, + "learning_rate": 1.996294003261262e-05, + "loss": 0.6693, + "step": 2095 + }, + { + "epoch": 0.05755079626578803, + "grad_norm": 0.38757753372192383, + "learning_rate": 1.9962902874815367e-05, + "loss": 0.5123, + "step": 2096 + }, + { + "epoch": 0.05757825370675453, + "grad_norm": 0.38430896401405334, + "learning_rate": 1.996286569843412e-05, + "loss": 0.6012, + "step": 2097 + }, + { + "epoch": 0.057605711147721034, + "grad_norm": 0.3397964835166931, + "learning_rate": 1.996282850346895e-05, + "loss": 0.4714, + "step": 2098 + }, + { + "epoch": 0.057633168588687535, + "grad_norm": 0.3666517436504364, + "learning_rate": 1.9962791289919927e-05, + "loss": 0.4795, + "step": 2099 + }, + { + "epoch": 0.057660626029654036, + "grad_norm": 0.4206869900226593, + "learning_rate": 1.9962754057787114e-05, + "loss": 0.5854, + "step": 2100 + }, + { + "epoch": 0.05768808347062054, + "grad_norm": 0.3855239450931549, + "learning_rate": 1.9962716807070592e-05, + "loss": 0.4526, + "step": 2101 + }, + { + "epoch": 0.05771554091158704, + "grad_norm": 0.36273616552352905, + "learning_rate": 1.9962679537770414e-05, + "loss": 0.4751, + "step": 2102 + }, + { + "epoch": 0.05774299835255354, + "grad_norm": 0.36599552631378174, + "learning_rate": 1.9962642249886665e-05, + "loss": 0.5279, + "step": 2103 + }, + { + "epoch": 0.05777045579352005, + "grad_norm": 0.3558511734008789, + "learning_rate": 1.9962604943419407e-05, + "loss": 0.5198, + "step": 2104 + }, + { + "epoch": 0.05779791323448655, + "grad_norm": 0.3543054163455963, + "learning_rate": 1.9962567618368713e-05, + "loss": 0.497, + "step": 2105 + }, + { + "epoch": 0.05782537067545305, + "grad_norm": 0.3627586364746094, + "learning_rate": 1.9962530274734648e-05, + "loss": 0.5009, + "step": 2106 + }, + { + "epoch": 0.05785282811641955, + "grad_norm": 0.3531317114830017, + "learning_rate": 1.9962492912517287e-05, + "loss": 0.493, + "step": 2107 + }, + { + "epoch": 0.05788028555738605, + "grad_norm": 0.422013521194458, + "learning_rate": 1.9962455531716697e-05, + "loss": 0.5789, + "step": 2108 + }, + { + "epoch": 0.05790774299835255, + "grad_norm": 0.35329610109329224, + "learning_rate": 1.9962418132332943e-05, + "loss": 0.5622, + "step": 2109 + }, + { + "epoch": 0.05793520043931905, + "grad_norm": 0.7353420257568359, + "learning_rate": 1.9962380714366106e-05, + "loss": 0.65, + "step": 2110 + }, + { + "epoch": 0.05796265788028556, + "grad_norm": 0.3453896641731262, + "learning_rate": 1.9962343277816244e-05, + "loss": 0.6194, + "step": 2111 + }, + { + "epoch": 0.05799011532125206, + "grad_norm": 0.39047321677207947, + "learning_rate": 1.9962305822683434e-05, + "loss": 0.5473, + "step": 2112 + }, + { + "epoch": 0.05801757276221856, + "grad_norm": 0.38865065574645996, + "learning_rate": 1.9962268348967745e-05, + "loss": 0.5524, + "step": 2113 + }, + { + "epoch": 0.05804503020318506, + "grad_norm": 0.3683394491672516, + "learning_rate": 1.9962230856669243e-05, + "loss": 0.5767, + "step": 2114 + }, + { + "epoch": 0.058072487644151564, + "grad_norm": 0.3613957464694977, + "learning_rate": 1.9962193345788005e-05, + "loss": 0.5695, + "step": 2115 + }, + { + "epoch": 0.058099945085118065, + "grad_norm": 0.33223995566368103, + "learning_rate": 1.9962155816324097e-05, + "loss": 0.5418, + "step": 2116 + }, + { + "epoch": 0.058127402526084566, + "grad_norm": 0.3556272089481354, + "learning_rate": 1.9962118268277587e-05, + "loss": 0.5227, + "step": 2117 + }, + { + "epoch": 0.05815485996705107, + "grad_norm": 0.3495692312717438, + "learning_rate": 1.9962080701648546e-05, + "loss": 0.5261, + "step": 2118 + }, + { + "epoch": 0.058182317408017575, + "grad_norm": 0.3517887592315674, + "learning_rate": 1.9962043116437046e-05, + "loss": 0.5019, + "step": 2119 + }, + { + "epoch": 0.058209774848984076, + "grad_norm": 0.3904082179069519, + "learning_rate": 1.9962005512643157e-05, + "loss": 0.5882, + "step": 2120 + }, + { + "epoch": 0.05823723228995058, + "grad_norm": 0.41929295659065247, + "learning_rate": 1.9961967890266948e-05, + "loss": 0.5862, + "step": 2121 + }, + { + "epoch": 0.05826468973091708, + "grad_norm": 0.3513755202293396, + "learning_rate": 1.9961930249308486e-05, + "loss": 0.561, + "step": 2122 + }, + { + "epoch": 0.05829214717188358, + "grad_norm": 0.32787853479385376, + "learning_rate": 1.996189258976785e-05, + "loss": 0.5964, + "step": 2123 + }, + { + "epoch": 0.05831960461285008, + "grad_norm": 0.34504589438438416, + "learning_rate": 1.99618549116451e-05, + "loss": 0.5218, + "step": 2124 + }, + { + "epoch": 0.05834706205381658, + "grad_norm": 0.40498968958854675, + "learning_rate": 1.9961817214940315e-05, + "loss": 0.5111, + "step": 2125 + }, + { + "epoch": 0.05837451949478309, + "grad_norm": 0.3565294146537781, + "learning_rate": 1.9961779499653557e-05, + "loss": 0.4954, + "step": 2126 + }, + { + "epoch": 0.05840197693574959, + "grad_norm": 0.3774467408657074, + "learning_rate": 1.9961741765784904e-05, + "loss": 0.568, + "step": 2127 + }, + { + "epoch": 0.05842943437671609, + "grad_norm": 0.3867673873901367, + "learning_rate": 1.996170401333442e-05, + "loss": 0.4966, + "step": 2128 + }, + { + "epoch": 0.05845689181768259, + "grad_norm": 0.36321911215782166, + "learning_rate": 1.9961666242302183e-05, + "loss": 0.6001, + "step": 2129 + }, + { + "epoch": 0.05848434925864909, + "grad_norm": 0.35775139927864075, + "learning_rate": 1.9961628452688257e-05, + "loss": 0.5813, + "step": 2130 + }, + { + "epoch": 0.058511806699615594, + "grad_norm": 0.5144543051719666, + "learning_rate": 1.9961590644492714e-05, + "loss": 0.5887, + "step": 2131 + }, + { + "epoch": 0.058539264140582095, + "grad_norm": 0.4455825686454773, + "learning_rate": 1.9961552817715627e-05, + "loss": 0.6404, + "step": 2132 + }, + { + "epoch": 0.0585667215815486, + "grad_norm": 0.4945022165775299, + "learning_rate": 1.9961514972357062e-05, + "loss": 0.5868, + "step": 2133 + }, + { + "epoch": 0.058594179022515104, + "grad_norm": 0.36157357692718506, + "learning_rate": 1.9961477108417092e-05, + "loss": 0.5822, + "step": 2134 + }, + { + "epoch": 0.058621636463481605, + "grad_norm": 0.3817666471004486, + "learning_rate": 1.996143922589579e-05, + "loss": 0.5131, + "step": 2135 + }, + { + "epoch": 0.058649093904448106, + "grad_norm": 0.3608619272708893, + "learning_rate": 1.9961401324793226e-05, + "loss": 0.5419, + "step": 2136 + }, + { + "epoch": 0.05867655134541461, + "grad_norm": 0.3493039309978485, + "learning_rate": 1.9961363405109466e-05, + "loss": 0.5099, + "step": 2137 + }, + { + "epoch": 0.05870400878638111, + "grad_norm": 0.37090176343917847, + "learning_rate": 1.9961325466844587e-05, + "loss": 0.5914, + "step": 2138 + }, + { + "epoch": 0.05873146622734761, + "grad_norm": 0.3708887994289398, + "learning_rate": 1.9961287509998655e-05, + "loss": 0.5226, + "step": 2139 + }, + { + "epoch": 0.05875892366831411, + "grad_norm": 0.497320294380188, + "learning_rate": 1.996124953457174e-05, + "loss": 0.6046, + "step": 2140 + }, + { + "epoch": 0.05878638110928062, + "grad_norm": 0.3263620436191559, + "learning_rate": 1.996121154056392e-05, + "loss": 0.5559, + "step": 2141 + }, + { + "epoch": 0.05881383855024712, + "grad_norm": 0.435139924287796, + "learning_rate": 1.996117352797526e-05, + "loss": 0.5239, + "step": 2142 + }, + { + "epoch": 0.05884129599121362, + "grad_norm": 0.3795925974845886, + "learning_rate": 1.996113549680583e-05, + "loss": 0.4495, + "step": 2143 + }, + { + "epoch": 0.05886875343218012, + "grad_norm": 0.358169823884964, + "learning_rate": 1.9961097447055705e-05, + "loss": 0.5846, + "step": 2144 + }, + { + "epoch": 0.05889621087314662, + "grad_norm": 0.3654024004936218, + "learning_rate": 1.9961059378724953e-05, + "loss": 0.5682, + "step": 2145 + }, + { + "epoch": 0.05892366831411312, + "grad_norm": 0.37363845109939575, + "learning_rate": 1.9961021291813643e-05, + "loss": 0.658, + "step": 2146 + }, + { + "epoch": 0.058951125755079624, + "grad_norm": 0.36140117049217224, + "learning_rate": 1.9960983186321855e-05, + "loss": 0.5529, + "step": 2147 + }, + { + "epoch": 0.05897858319604613, + "grad_norm": 0.4407306909561157, + "learning_rate": 1.996094506224965e-05, + "loss": 0.584, + "step": 2148 + }, + { + "epoch": 0.05900604063701263, + "grad_norm": 0.33430108428001404, + "learning_rate": 1.9960906919597108e-05, + "loss": 0.5232, + "step": 2149 + }, + { + "epoch": 0.059033498077979134, + "grad_norm": 0.32781267166137695, + "learning_rate": 1.9960868758364295e-05, + "loss": 0.5856, + "step": 2150 + }, + { + "epoch": 0.059060955518945635, + "grad_norm": 0.9499065279960632, + "learning_rate": 1.9960830578551275e-05, + "loss": 0.464, + "step": 2151 + }, + { + "epoch": 0.059088412959912136, + "grad_norm": 0.3495224714279175, + "learning_rate": 1.9960792380158133e-05, + "loss": 0.5385, + "step": 2152 + }, + { + "epoch": 0.05911587040087864, + "grad_norm": 0.3699169456958771, + "learning_rate": 1.9960754163184934e-05, + "loss": 0.4808, + "step": 2153 + }, + { + "epoch": 0.05914332784184514, + "grad_norm": 0.3452378809452057, + "learning_rate": 1.996071592763175e-05, + "loss": 0.594, + "step": 2154 + }, + { + "epoch": 0.05917078528281164, + "grad_norm": 0.40316420793533325, + "learning_rate": 1.9960677673498648e-05, + "loss": 0.6708, + "step": 2155 + }, + { + "epoch": 0.05919824272377815, + "grad_norm": 0.33986324071884155, + "learning_rate": 1.9960639400785707e-05, + "loss": 0.5426, + "step": 2156 + }, + { + "epoch": 0.05922570016474465, + "grad_norm": 0.40857383608818054, + "learning_rate": 1.996060110949299e-05, + "loss": 0.587, + "step": 2157 + }, + { + "epoch": 0.05925315760571115, + "grad_norm": 0.3709402084350586, + "learning_rate": 1.9960562799620576e-05, + "loss": 0.5557, + "step": 2158 + }, + { + "epoch": 0.05928061504667765, + "grad_norm": 0.4072776734828949, + "learning_rate": 1.9960524471168533e-05, + "loss": 0.4753, + "step": 2159 + }, + { + "epoch": 0.05930807248764415, + "grad_norm": 0.36226820945739746, + "learning_rate": 1.9960486124136932e-05, + "loss": 0.5427, + "step": 2160 + }, + { + "epoch": 0.05933552992861065, + "grad_norm": 0.34444883465766907, + "learning_rate": 1.9960447758525846e-05, + "loss": 0.5539, + "step": 2161 + }, + { + "epoch": 0.05936298736957715, + "grad_norm": 0.5509684681892395, + "learning_rate": 1.9960409374335346e-05, + "loss": 0.4419, + "step": 2162 + }, + { + "epoch": 0.05939044481054366, + "grad_norm": 0.38677430152893066, + "learning_rate": 1.9960370971565504e-05, + "loss": 0.599, + "step": 2163 + }, + { + "epoch": 0.05941790225151016, + "grad_norm": 0.8592802286148071, + "learning_rate": 1.996033255021639e-05, + "loss": 0.5908, + "step": 2164 + }, + { + "epoch": 0.05944535969247666, + "grad_norm": 0.38140520453453064, + "learning_rate": 1.9960294110288077e-05, + "loss": 0.5912, + "step": 2165 + }, + { + "epoch": 0.059472817133443164, + "grad_norm": 0.3671068251132965, + "learning_rate": 1.9960255651780638e-05, + "loss": 0.6039, + "step": 2166 + }, + { + "epoch": 0.059500274574409664, + "grad_norm": 0.38614559173583984, + "learning_rate": 1.996021717469414e-05, + "loss": 0.4954, + "step": 2167 + }, + { + "epoch": 0.059527732015376165, + "grad_norm": 0.38289177417755127, + "learning_rate": 1.9960178679028664e-05, + "loss": 0.5795, + "step": 2168 + }, + { + "epoch": 0.059555189456342666, + "grad_norm": 0.36286115646362305, + "learning_rate": 1.996014016478427e-05, + "loss": 0.5734, + "step": 2169 + }, + { + "epoch": 0.05958264689730917, + "grad_norm": 4.5834455490112305, + "learning_rate": 1.996010163196104e-05, + "loss": 0.5406, + "step": 2170 + }, + { + "epoch": 0.059610104338275675, + "grad_norm": 0.4072335958480835, + "learning_rate": 1.9960063080559038e-05, + "loss": 0.5521, + "step": 2171 + }, + { + "epoch": 0.059637561779242176, + "grad_norm": 0.39108744263648987, + "learning_rate": 1.996002451057834e-05, + "loss": 0.5834, + "step": 2172 + }, + { + "epoch": 0.05966501922020868, + "grad_norm": 0.35315045714378357, + "learning_rate": 1.995998592201902e-05, + "loss": 0.5252, + "step": 2173 + }, + { + "epoch": 0.05969247666117518, + "grad_norm": 0.36881497502326965, + "learning_rate": 1.9959947314881144e-05, + "loss": 0.6557, + "step": 2174 + }, + { + "epoch": 0.05971993410214168, + "grad_norm": 0.38067081570625305, + "learning_rate": 1.995990868916479e-05, + "loss": 0.5765, + "step": 2175 + }, + { + "epoch": 0.05974739154310818, + "grad_norm": 0.4525109827518463, + "learning_rate": 1.9959870044870025e-05, + "loss": 0.6257, + "step": 2176 + }, + { + "epoch": 0.05977484898407468, + "grad_norm": 0.38258153200149536, + "learning_rate": 1.9959831381996927e-05, + "loss": 0.5767, + "step": 2177 + }, + { + "epoch": 0.05980230642504119, + "grad_norm": 0.3335186839103699, + "learning_rate": 1.995979270054556e-05, + "loss": 0.5242, + "step": 2178 + }, + { + "epoch": 0.05982976386600769, + "grad_norm": 0.3927525281906128, + "learning_rate": 1.9959754000516005e-05, + "loss": 0.5754, + "step": 2179 + }, + { + "epoch": 0.05985722130697419, + "grad_norm": 0.3810999095439911, + "learning_rate": 1.995971528190833e-05, + "loss": 0.5785, + "step": 2180 + }, + { + "epoch": 0.05988467874794069, + "grad_norm": 0.42955848574638367, + "learning_rate": 1.9959676544722605e-05, + "loss": 0.5668, + "step": 2181 + }, + { + "epoch": 0.05991213618890719, + "grad_norm": 0.3748985230922699, + "learning_rate": 1.9959637788958904e-05, + "loss": 0.6214, + "step": 2182 + }, + { + "epoch": 0.059939593629873694, + "grad_norm": 0.3618476688861847, + "learning_rate": 1.99595990146173e-05, + "loss": 0.5992, + "step": 2183 + }, + { + "epoch": 0.059967051070840195, + "grad_norm": 0.38910743594169617, + "learning_rate": 1.9959560221697865e-05, + "loss": 0.5502, + "step": 2184 + }, + { + "epoch": 0.059994508511806696, + "grad_norm": 0.357039213180542, + "learning_rate": 1.9959521410200674e-05, + "loss": 0.5658, + "step": 2185 + }, + { + "epoch": 0.060021965952773204, + "grad_norm": 0.36861079931259155, + "learning_rate": 1.9959482580125796e-05, + "loss": 0.5083, + "step": 2186 + }, + { + "epoch": 0.060049423393739705, + "grad_norm": 0.3543212413787842, + "learning_rate": 1.99594437314733e-05, + "loss": 0.5093, + "step": 2187 + }, + { + "epoch": 0.060076880834706206, + "grad_norm": 0.4011175334453583, + "learning_rate": 1.995940486424327e-05, + "loss": 0.6243, + "step": 2188 + }, + { + "epoch": 0.06010433827567271, + "grad_norm": 0.6190702319145203, + "learning_rate": 1.995936597843577e-05, + "loss": 0.636, + "step": 2189 + }, + { + "epoch": 0.06013179571663921, + "grad_norm": 0.33835357427597046, + "learning_rate": 1.995932707405087e-05, + "loss": 0.574, + "step": 2190 + }, + { + "epoch": 0.06015925315760571, + "grad_norm": 0.4024796783924103, + "learning_rate": 1.9959288151088646e-05, + "loss": 0.5967, + "step": 2191 + }, + { + "epoch": 0.06018671059857221, + "grad_norm": 0.3792473077774048, + "learning_rate": 1.9959249209549172e-05, + "loss": 0.6307, + "step": 2192 + }, + { + "epoch": 0.06021416803953872, + "grad_norm": 0.3466573655605316, + "learning_rate": 1.9959210249432522e-05, + "loss": 0.5278, + "step": 2193 + }, + { + "epoch": 0.06024162548050522, + "grad_norm": 0.36213958263397217, + "learning_rate": 1.9959171270738765e-05, + "loss": 0.5288, + "step": 2194 + }, + { + "epoch": 0.06026908292147172, + "grad_norm": 0.4089621901512146, + "learning_rate": 1.9959132273467973e-05, + "loss": 0.5333, + "step": 2195 + }, + { + "epoch": 0.06029654036243822, + "grad_norm": 0.41900789737701416, + "learning_rate": 1.9959093257620227e-05, + "loss": 0.7082, + "step": 2196 + }, + { + "epoch": 0.06032399780340472, + "grad_norm": 0.35812780261039734, + "learning_rate": 1.9959054223195588e-05, + "loss": 0.6205, + "step": 2197 + }, + { + "epoch": 0.06035145524437122, + "grad_norm": 0.3465331792831421, + "learning_rate": 1.9959015170194134e-05, + "loss": 0.5471, + "step": 2198 + }, + { + "epoch": 0.060378912685337724, + "grad_norm": 0.35243669152259827, + "learning_rate": 1.9958976098615937e-05, + "loss": 0.6123, + "step": 2199 + }, + { + "epoch": 0.06040637012630423, + "grad_norm": 0.38020363450050354, + "learning_rate": 1.9958937008461077e-05, + "loss": 0.6008, + "step": 2200 + }, + { + "epoch": 0.06043382756727073, + "grad_norm": 0.42968353629112244, + "learning_rate": 1.9958897899729616e-05, + "loss": 0.5614, + "step": 2201 + }, + { + "epoch": 0.060461285008237234, + "grad_norm": 0.7304350137710571, + "learning_rate": 1.9958858772421635e-05, + "loss": 0.5193, + "step": 2202 + }, + { + "epoch": 0.060488742449203735, + "grad_norm": 0.39762037992477417, + "learning_rate": 1.99588196265372e-05, + "loss": 0.5339, + "step": 2203 + }, + { + "epoch": 0.060516199890170236, + "grad_norm": 0.36051496863365173, + "learning_rate": 1.995878046207639e-05, + "loss": 0.5897, + "step": 2204 + }, + { + "epoch": 0.06054365733113674, + "grad_norm": 0.33115264773368835, + "learning_rate": 1.9958741279039278e-05, + "loss": 0.5356, + "step": 2205 + }, + { + "epoch": 0.06057111477210324, + "grad_norm": 0.3882325291633606, + "learning_rate": 1.995870207742593e-05, + "loss": 0.6257, + "step": 2206 + }, + { + "epoch": 0.06059857221306974, + "grad_norm": 0.39441296458244324, + "learning_rate": 1.9958662857236427e-05, + "loss": 0.6537, + "step": 2207 + }, + { + "epoch": 0.06062602965403625, + "grad_norm": 0.3537082374095917, + "learning_rate": 1.9958623618470842e-05, + "loss": 0.6159, + "step": 2208 + }, + { + "epoch": 0.06065348709500275, + "grad_norm": 0.34285151958465576, + "learning_rate": 1.9958584361129243e-05, + "loss": 0.522, + "step": 2209 + }, + { + "epoch": 0.06068094453596925, + "grad_norm": 0.33795827627182007, + "learning_rate": 1.9958545085211706e-05, + "loss": 0.6028, + "step": 2210 + }, + { + "epoch": 0.06070840197693575, + "grad_norm": 0.36470019817352295, + "learning_rate": 1.9958505790718303e-05, + "loss": 0.591, + "step": 2211 + }, + { + "epoch": 0.06073585941790225, + "grad_norm": 0.3774943947792053, + "learning_rate": 1.9958466477649108e-05, + "loss": 0.6532, + "step": 2212 + }, + { + "epoch": 0.06076331685886875, + "grad_norm": 0.4967968165874481, + "learning_rate": 1.9958427146004196e-05, + "loss": 0.528, + "step": 2213 + }, + { + "epoch": 0.06079077429983525, + "grad_norm": 0.36023372411727905, + "learning_rate": 1.9958387795783642e-05, + "loss": 0.5801, + "step": 2214 + }, + { + "epoch": 0.06081823174080176, + "grad_norm": 0.38253340125083923, + "learning_rate": 1.9958348426987513e-05, + "loss": 0.6204, + "step": 2215 + }, + { + "epoch": 0.06084568918176826, + "grad_norm": 0.3428211212158203, + "learning_rate": 1.9958309039615886e-05, + "loss": 0.5237, + "step": 2216 + }, + { + "epoch": 0.06087314662273476, + "grad_norm": 0.34544041752815247, + "learning_rate": 1.9958269633668834e-05, + "loss": 0.4874, + "step": 2217 + }, + { + "epoch": 0.060900604063701264, + "grad_norm": 0.38082388043403625, + "learning_rate": 1.995823020914643e-05, + "loss": 0.5675, + "step": 2218 + }, + { + "epoch": 0.060928061504667765, + "grad_norm": 0.4328271746635437, + "learning_rate": 1.995819076604875e-05, + "loss": 0.6572, + "step": 2219 + }, + { + "epoch": 0.060955518945634266, + "grad_norm": 0.39280998706817627, + "learning_rate": 1.9958151304375866e-05, + "loss": 0.5616, + "step": 2220 + }, + { + "epoch": 0.060982976386600767, + "grad_norm": 0.3945459723472595, + "learning_rate": 1.995811182412785e-05, + "loss": 0.6064, + "step": 2221 + }, + { + "epoch": 0.06101043382756727, + "grad_norm": 0.3881964087486267, + "learning_rate": 1.9958072325304777e-05, + "loss": 0.5552, + "step": 2222 + }, + { + "epoch": 0.061037891268533775, + "grad_norm": 0.366852343082428, + "learning_rate": 1.9958032807906725e-05, + "loss": 0.5253, + "step": 2223 + }, + { + "epoch": 0.061065348709500276, + "grad_norm": 0.4133734703063965, + "learning_rate": 1.995799327193376e-05, + "loss": 0.601, + "step": 2224 + }, + { + "epoch": 0.06109280615046678, + "grad_norm": 0.36045631766319275, + "learning_rate": 1.995795371738596e-05, + "loss": 0.5718, + "step": 2225 + }, + { + "epoch": 0.06112026359143328, + "grad_norm": 0.38050082325935364, + "learning_rate": 1.9957914144263398e-05, + "loss": 0.5908, + "step": 2226 + }, + { + "epoch": 0.06114772103239978, + "grad_norm": 0.42873719334602356, + "learning_rate": 1.9957874552566147e-05, + "loss": 0.5557, + "step": 2227 + }, + { + "epoch": 0.06117517847336628, + "grad_norm": 0.4077243208885193, + "learning_rate": 1.9957834942294284e-05, + "loss": 0.6465, + "step": 2228 + }, + { + "epoch": 0.06120263591433278, + "grad_norm": 0.3590684235095978, + "learning_rate": 1.995779531344788e-05, + "loss": 0.4935, + "step": 2229 + }, + { + "epoch": 0.06123009335529929, + "grad_norm": 0.35945841670036316, + "learning_rate": 1.995775566602701e-05, + "loss": 0.4746, + "step": 2230 + }, + { + "epoch": 0.06125755079626579, + "grad_norm": 0.36589139699935913, + "learning_rate": 1.9957716000031748e-05, + "loss": 0.5114, + "step": 2231 + }, + { + "epoch": 0.06128500823723229, + "grad_norm": 0.41690871119499207, + "learning_rate": 1.9957676315462166e-05, + "loss": 0.539, + "step": 2232 + }, + { + "epoch": 0.06131246567819879, + "grad_norm": 0.39166584610939026, + "learning_rate": 1.9957636612318337e-05, + "loss": 0.6121, + "step": 2233 + }, + { + "epoch": 0.06133992311916529, + "grad_norm": 0.3611486554145813, + "learning_rate": 1.995759689060034e-05, + "loss": 0.4411, + "step": 2234 + }, + { + "epoch": 0.061367380560131794, + "grad_norm": 0.34359443187713623, + "learning_rate": 1.995755715030825e-05, + "loss": 0.4769, + "step": 2235 + }, + { + "epoch": 0.061394838001098295, + "grad_norm": 0.3115439713001251, + "learning_rate": 1.9957517391442134e-05, + "loss": 0.47, + "step": 2236 + }, + { + "epoch": 0.061422295442064796, + "grad_norm": 0.34263476729393005, + "learning_rate": 1.995747761400207e-05, + "loss": 0.4703, + "step": 2237 + }, + { + "epoch": 0.061449752883031304, + "grad_norm": 0.35825952887535095, + "learning_rate": 1.9957437817988134e-05, + "loss": 0.5211, + "step": 2238 + }, + { + "epoch": 0.061477210323997805, + "grad_norm": 0.3702717125415802, + "learning_rate": 1.9957398003400398e-05, + "loss": 0.624, + "step": 2239 + }, + { + "epoch": 0.061504667764964306, + "grad_norm": 0.3753089904785156, + "learning_rate": 1.9957358170238933e-05, + "loss": 0.6685, + "step": 2240 + }, + { + "epoch": 0.06153212520593081, + "grad_norm": 0.3798760175704956, + "learning_rate": 1.995731831850382e-05, + "loss": 0.5347, + "step": 2241 + }, + { + "epoch": 0.06155958264689731, + "grad_norm": 0.34956783056259155, + "learning_rate": 1.9957278448195134e-05, + "loss": 0.5167, + "step": 2242 + }, + { + "epoch": 0.06158704008786381, + "grad_norm": 0.3399032652378082, + "learning_rate": 1.9957238559312938e-05, + "loss": 0.4865, + "step": 2243 + }, + { + "epoch": 0.06161449752883031, + "grad_norm": 0.4953603446483612, + "learning_rate": 1.9957198651857322e-05, + "loss": 0.5925, + "step": 2244 + }, + { + "epoch": 0.06164195496979682, + "grad_norm": 0.3936220109462738, + "learning_rate": 1.9957158725828348e-05, + "loss": 0.5858, + "step": 2245 + }, + { + "epoch": 0.06166941241076332, + "grad_norm": 0.5521969795227051, + "learning_rate": 1.9957118781226095e-05, + "loss": 0.4939, + "step": 2246 + }, + { + "epoch": 0.06169686985172982, + "grad_norm": 0.3506004512310028, + "learning_rate": 1.995707881805064e-05, + "loss": 0.5319, + "step": 2247 + }, + { + "epoch": 0.06172432729269632, + "grad_norm": 0.4359992742538452, + "learning_rate": 1.9957038836302052e-05, + "loss": 0.548, + "step": 2248 + }, + { + "epoch": 0.06175178473366282, + "grad_norm": 0.3661154806613922, + "learning_rate": 1.995699883598041e-05, + "loss": 0.5042, + "step": 2249 + }, + { + "epoch": 0.06177924217462932, + "grad_norm": 0.3790014982223511, + "learning_rate": 1.9956958817085786e-05, + "loss": 0.5444, + "step": 2250 + }, + { + "epoch": 0.061806699615595824, + "grad_norm": 0.38349899649620056, + "learning_rate": 1.995691877961826e-05, + "loss": 0.5851, + "step": 2251 + }, + { + "epoch": 0.061834157056562325, + "grad_norm": 0.40254703164100647, + "learning_rate": 1.99568787235779e-05, + "loss": 0.5331, + "step": 2252 + }, + { + "epoch": 0.06186161449752883, + "grad_norm": 0.33847615122795105, + "learning_rate": 1.995683864896478e-05, + "loss": 0.4876, + "step": 2253 + }, + { + "epoch": 0.061889071938495334, + "grad_norm": 0.47499531507492065, + "learning_rate": 1.9956798555778984e-05, + "loss": 0.5626, + "step": 2254 + }, + { + "epoch": 0.061916529379461835, + "grad_norm": 0.3656770586967468, + "learning_rate": 1.9956758444020577e-05, + "loss": 0.4675, + "step": 2255 + }, + { + "epoch": 0.061943986820428336, + "grad_norm": 0.37194526195526123, + "learning_rate": 1.9956718313689637e-05, + "loss": 0.5739, + "step": 2256 + }, + { + "epoch": 0.06197144426139484, + "grad_norm": 0.3548049032688141, + "learning_rate": 1.9956678164786246e-05, + "loss": 0.6028, + "step": 2257 + }, + { + "epoch": 0.06199890170236134, + "grad_norm": 0.39905068278312683, + "learning_rate": 1.9956637997310466e-05, + "loss": 0.572, + "step": 2258 + }, + { + "epoch": 0.06202635914332784, + "grad_norm": 0.33913111686706543, + "learning_rate": 1.995659781126238e-05, + "loss": 0.6195, + "step": 2259 + }, + { + "epoch": 0.06205381658429435, + "grad_norm": 0.3583167791366577, + "learning_rate": 1.9956557606642063e-05, + "loss": 0.5625, + "step": 2260 + }, + { + "epoch": 0.06208127402526085, + "grad_norm": 0.39295777678489685, + "learning_rate": 1.9956517383449587e-05, + "loss": 0.6595, + "step": 2261 + }, + { + "epoch": 0.06210873146622735, + "grad_norm": 0.3511582911014557, + "learning_rate": 1.9956477141685025e-05, + "loss": 0.5338, + "step": 2262 + }, + { + "epoch": 0.06213618890719385, + "grad_norm": 0.3706660866737366, + "learning_rate": 1.995643688134846e-05, + "loss": 0.4894, + "step": 2263 + }, + { + "epoch": 0.06216364634816035, + "grad_norm": 0.41938576102256775, + "learning_rate": 1.995639660243996e-05, + "loss": 0.618, + "step": 2264 + }, + { + "epoch": 0.06219110378912685, + "grad_norm": 0.3404916226863861, + "learning_rate": 1.9956356304959607e-05, + "loss": 0.5432, + "step": 2265 + }, + { + "epoch": 0.06221856123009335, + "grad_norm": 0.3488747179508209, + "learning_rate": 1.9956315988907464e-05, + "loss": 0.5845, + "step": 2266 + }, + { + "epoch": 0.06224601867105986, + "grad_norm": 0.3924195468425751, + "learning_rate": 1.995627565428362e-05, + "loss": 0.6417, + "step": 2267 + }, + { + "epoch": 0.06227347611202636, + "grad_norm": 0.4261990487575531, + "learning_rate": 1.9956235301088144e-05, + "loss": 0.5988, + "step": 2268 + }, + { + "epoch": 0.06230093355299286, + "grad_norm": 0.41748592257499695, + "learning_rate": 1.9956194929321108e-05, + "loss": 0.6291, + "step": 2269 + }, + { + "epoch": 0.062328390993959364, + "grad_norm": 0.35313042998313904, + "learning_rate": 1.9956154538982593e-05, + "loss": 0.6238, + "step": 2270 + }, + { + "epoch": 0.062355848434925865, + "grad_norm": 0.35685089230537415, + "learning_rate": 1.995611413007267e-05, + "loss": 0.5225, + "step": 2271 + }, + { + "epoch": 0.062383305875892366, + "grad_norm": 0.32201531529426575, + "learning_rate": 1.995607370259142e-05, + "loss": 0.5438, + "step": 2272 + }, + { + "epoch": 0.06241076331685887, + "grad_norm": 0.38347142934799194, + "learning_rate": 1.9956033256538914e-05, + "loss": 0.6097, + "step": 2273 + }, + { + "epoch": 0.06243822075782537, + "grad_norm": 0.34300997853279114, + "learning_rate": 1.9955992791915227e-05, + "loss": 0.5604, + "step": 2274 + }, + { + "epoch": 0.062465678198791875, + "grad_norm": 0.3473573923110962, + "learning_rate": 1.9955952308720438e-05, + "loss": 0.5257, + "step": 2275 + }, + { + "epoch": 0.062493135639758376, + "grad_norm": 0.40131157636642456, + "learning_rate": 1.995591180695462e-05, + "loss": 0.5358, + "step": 2276 + }, + { + "epoch": 0.06252059308072487, + "grad_norm": 0.3179967701435089, + "learning_rate": 1.9955871286617847e-05, + "loss": 0.5663, + "step": 2277 + }, + { + "epoch": 0.06254805052169138, + "grad_norm": 0.484841525554657, + "learning_rate": 1.9955830747710198e-05, + "loss": 0.5293, + "step": 2278 + }, + { + "epoch": 0.06257550796265789, + "grad_norm": 0.34153032302856445, + "learning_rate": 1.9955790190231744e-05, + "loss": 0.5094, + "step": 2279 + }, + { + "epoch": 0.06260296540362438, + "grad_norm": 0.415095716714859, + "learning_rate": 1.9955749614182567e-05, + "loss": 0.6684, + "step": 2280 + }, + { + "epoch": 0.06263042284459089, + "grad_norm": 0.36621731519699097, + "learning_rate": 1.995570901956274e-05, + "loss": 0.6181, + "step": 2281 + }, + { + "epoch": 0.06265788028555738, + "grad_norm": 0.37299177050590515, + "learning_rate": 1.9955668406372336e-05, + "loss": 0.5142, + "step": 2282 + }, + { + "epoch": 0.06268533772652389, + "grad_norm": 0.38442304730415344, + "learning_rate": 1.9955627774611437e-05, + "loss": 0.5777, + "step": 2283 + }, + { + "epoch": 0.06271279516749038, + "grad_norm": 0.5352806448936462, + "learning_rate": 1.995558712428011e-05, + "loss": 0.5238, + "step": 2284 + }, + { + "epoch": 0.06274025260845689, + "grad_norm": 0.3169478476047516, + "learning_rate": 1.9955546455378436e-05, + "loss": 0.5389, + "step": 2285 + }, + { + "epoch": 0.0627677100494234, + "grad_norm": 0.3604527711868286, + "learning_rate": 1.9955505767906493e-05, + "loss": 0.5506, + "step": 2286 + }, + { + "epoch": 0.0627951674903899, + "grad_norm": 0.4571734368801117, + "learning_rate": 1.9955465061864352e-05, + "loss": 0.635, + "step": 2287 + }, + { + "epoch": 0.0628226249313564, + "grad_norm": 0.34403151273727417, + "learning_rate": 1.9955424337252095e-05, + "loss": 0.5086, + "step": 2288 + }, + { + "epoch": 0.0628500823723229, + "grad_norm": 0.44114401936531067, + "learning_rate": 1.9955383594069792e-05, + "loss": 0.612, + "step": 2289 + }, + { + "epoch": 0.0628775398132894, + "grad_norm": 0.4709815979003906, + "learning_rate": 1.995534283231752e-05, + "loss": 0.6116, + "step": 2290 + }, + { + "epoch": 0.0629049972542559, + "grad_norm": 0.3816196024417877, + "learning_rate": 1.9955302051995355e-05, + "loss": 0.5459, + "step": 2291 + }, + { + "epoch": 0.0629324546952224, + "grad_norm": 0.38121655583381653, + "learning_rate": 1.9955261253103377e-05, + "loss": 0.6082, + "step": 2292 + }, + { + "epoch": 0.0629599121361889, + "grad_norm": 0.377704381942749, + "learning_rate": 1.995522043564166e-05, + "loss": 0.4812, + "step": 2293 + }, + { + "epoch": 0.06298736957715541, + "grad_norm": 0.32774117588996887, + "learning_rate": 1.9955179599610277e-05, + "loss": 0.5398, + "step": 2294 + }, + { + "epoch": 0.06301482701812192, + "grad_norm": 0.3724874258041382, + "learning_rate": 1.9955138745009308e-05, + "loss": 0.5277, + "step": 2295 + }, + { + "epoch": 0.06304228445908841, + "grad_norm": 0.4296702742576599, + "learning_rate": 1.995509787183883e-05, + "loss": 0.6025, + "step": 2296 + }, + { + "epoch": 0.06306974190005492, + "grad_norm": 0.34256622195243835, + "learning_rate": 1.9955056980098914e-05, + "loss": 0.507, + "step": 2297 + }, + { + "epoch": 0.06309719934102141, + "grad_norm": 0.34510764479637146, + "learning_rate": 1.9955016069789638e-05, + "loss": 0.5837, + "step": 2298 + }, + { + "epoch": 0.06312465678198792, + "grad_norm": 0.36298176646232605, + "learning_rate": 1.9954975140911083e-05, + "loss": 0.6197, + "step": 2299 + }, + { + "epoch": 0.06315211422295441, + "grad_norm": 0.4207666218280792, + "learning_rate": 1.9954934193463322e-05, + "loss": 0.5901, + "step": 2300 + }, + { + "epoch": 0.06317957166392092, + "grad_norm": 0.34578725695610046, + "learning_rate": 1.995489322744643e-05, + "loss": 0.6002, + "step": 2301 + }, + { + "epoch": 0.06320702910488743, + "grad_norm": 0.7501724362373352, + "learning_rate": 1.9954852242860487e-05, + "loss": 0.5604, + "step": 2302 + }, + { + "epoch": 0.06323448654585392, + "grad_norm": 0.35167786478996277, + "learning_rate": 1.9954811239705565e-05, + "loss": 0.5193, + "step": 2303 + }, + { + "epoch": 0.06326194398682043, + "grad_norm": 0.39194926619529724, + "learning_rate": 1.9954770217981742e-05, + "loss": 0.5909, + "step": 2304 + }, + { + "epoch": 0.06328940142778693, + "grad_norm": 0.42285704612731934, + "learning_rate": 1.9954729177689098e-05, + "loss": 0.5684, + "step": 2305 + }, + { + "epoch": 0.06331685886875343, + "grad_norm": 0.33957818150520325, + "learning_rate": 1.995468811882771e-05, + "loss": 0.6116, + "step": 2306 + }, + { + "epoch": 0.06334431630971993, + "grad_norm": 0.46363919973373413, + "learning_rate": 1.9954647041397647e-05, + "loss": 0.519, + "step": 2307 + }, + { + "epoch": 0.06337177375068644, + "grad_norm": 0.6705470085144043, + "learning_rate": 1.995460594539899e-05, + "loss": 0.6881, + "step": 2308 + }, + { + "epoch": 0.06339923119165294, + "grad_norm": 0.5446553826332092, + "learning_rate": 1.9954564830831814e-05, + "loss": 0.6161, + "step": 2309 + }, + { + "epoch": 0.06342668863261944, + "grad_norm": 0.3758768141269684, + "learning_rate": 1.9954523697696203e-05, + "loss": 0.6039, + "step": 2310 + }, + { + "epoch": 0.06345414607358595, + "grad_norm": 0.5052087903022766, + "learning_rate": 1.9954482545992224e-05, + "loss": 0.5199, + "step": 2311 + }, + { + "epoch": 0.06348160351455244, + "grad_norm": 0.4090321958065033, + "learning_rate": 1.9954441375719958e-05, + "loss": 0.5276, + "step": 2312 + }, + { + "epoch": 0.06350906095551895, + "grad_norm": 0.3880351185798645, + "learning_rate": 1.9954400186879483e-05, + "loss": 0.5743, + "step": 2313 + }, + { + "epoch": 0.06353651839648544, + "grad_norm": 0.3884424567222595, + "learning_rate": 1.9954358979470877e-05, + "loss": 0.6075, + "step": 2314 + }, + { + "epoch": 0.06356397583745195, + "grad_norm": 0.3533661663532257, + "learning_rate": 1.995431775349421e-05, + "loss": 0.5487, + "step": 2315 + }, + { + "epoch": 0.06359143327841846, + "grad_norm": 0.4300060570240021, + "learning_rate": 1.9954276508949566e-05, + "loss": 0.6232, + "step": 2316 + }, + { + "epoch": 0.06361889071938495, + "grad_norm": 0.34378761053085327, + "learning_rate": 1.9954235245837017e-05, + "loss": 0.4507, + "step": 2317 + }, + { + "epoch": 0.06364634816035146, + "grad_norm": 0.35687997937202454, + "learning_rate": 1.9954193964156645e-05, + "loss": 0.5023, + "step": 2318 + }, + { + "epoch": 0.06367380560131795, + "grad_norm": 0.3871772289276123, + "learning_rate": 1.9954152663908522e-05, + "loss": 0.6216, + "step": 2319 + }, + { + "epoch": 0.06370126304228446, + "grad_norm": 0.33289089798927307, + "learning_rate": 1.9954111345092728e-05, + "loss": 0.4748, + "step": 2320 + }, + { + "epoch": 0.06372872048325096, + "grad_norm": 0.33130377531051636, + "learning_rate": 1.995407000770934e-05, + "loss": 0.5653, + "step": 2321 + }, + { + "epoch": 0.06375617792421746, + "grad_norm": 0.35807961225509644, + "learning_rate": 1.9954028651758435e-05, + "loss": 0.5619, + "step": 2322 + }, + { + "epoch": 0.06378363536518397, + "grad_norm": 0.3314899504184723, + "learning_rate": 1.9953987277240087e-05, + "loss": 0.5152, + "step": 2323 + }, + { + "epoch": 0.06381109280615047, + "grad_norm": 0.4388476014137268, + "learning_rate": 1.9953945884154378e-05, + "loss": 0.583, + "step": 2324 + }, + { + "epoch": 0.06383855024711697, + "grad_norm": 0.3738574981689453, + "learning_rate": 1.9953904472501385e-05, + "loss": 0.5615, + "step": 2325 + }, + { + "epoch": 0.06386600768808347, + "grad_norm": 0.3551120162010193, + "learning_rate": 1.995386304228118e-05, + "loss": 0.5096, + "step": 2326 + }, + { + "epoch": 0.06389346512904998, + "grad_norm": 0.40389296412467957, + "learning_rate": 1.9953821593493844e-05, + "loss": 0.5465, + "step": 2327 + }, + { + "epoch": 0.06392092257001647, + "grad_norm": 0.343547523021698, + "learning_rate": 1.9953780126139453e-05, + "loss": 0.5997, + "step": 2328 + }, + { + "epoch": 0.06394838001098298, + "grad_norm": 0.3800989091396332, + "learning_rate": 1.995373864021809e-05, + "loss": 0.5982, + "step": 2329 + }, + { + "epoch": 0.06397583745194947, + "grad_norm": 0.43190714716911316, + "learning_rate": 1.995369713572982e-05, + "loss": 0.5738, + "step": 2330 + }, + { + "epoch": 0.06400329489291598, + "grad_norm": 0.35931316018104553, + "learning_rate": 1.995365561267473e-05, + "loss": 0.5967, + "step": 2331 + }, + { + "epoch": 0.06403075233388249, + "grad_norm": 0.3362046480178833, + "learning_rate": 1.99536140710529e-05, + "loss": 0.568, + "step": 2332 + }, + { + "epoch": 0.06405820977484898, + "grad_norm": 0.3802679479122162, + "learning_rate": 1.99535725108644e-05, + "loss": 0.5152, + "step": 2333 + }, + { + "epoch": 0.06408566721581549, + "grad_norm": 0.40355223417282104, + "learning_rate": 1.995353093210931e-05, + "loss": 0.6161, + "step": 2334 + }, + { + "epoch": 0.06411312465678198, + "grad_norm": 0.4156351089477539, + "learning_rate": 1.9953489334787707e-05, + "loss": 0.5688, + "step": 2335 + }, + { + "epoch": 0.06414058209774849, + "grad_norm": 0.38148412108421326, + "learning_rate": 1.9953447718899674e-05, + "loss": 0.5619, + "step": 2336 + }, + { + "epoch": 0.06416803953871499, + "grad_norm": 0.3595396876335144, + "learning_rate": 1.995340608444528e-05, + "loss": 0.6102, + "step": 2337 + }, + { + "epoch": 0.0641954969796815, + "grad_norm": 0.392189085483551, + "learning_rate": 1.995336443142461e-05, + "loss": 0.5758, + "step": 2338 + }, + { + "epoch": 0.064222954420648, + "grad_norm": 0.36414793133735657, + "learning_rate": 1.9953322759837737e-05, + "loss": 0.56, + "step": 2339 + }, + { + "epoch": 0.0642504118616145, + "grad_norm": 0.35379132628440857, + "learning_rate": 1.995328106968474e-05, + "loss": 0.4934, + "step": 2340 + }, + { + "epoch": 0.064277869302581, + "grad_norm": 0.3400535583496094, + "learning_rate": 1.9953239360965697e-05, + "loss": 0.4699, + "step": 2341 + }, + { + "epoch": 0.0643053267435475, + "grad_norm": 0.3722015619277954, + "learning_rate": 1.9953197633680685e-05, + "loss": 0.5347, + "step": 2342 + }, + { + "epoch": 0.064332784184514, + "grad_norm": 0.41912591457366943, + "learning_rate": 1.9953155887829785e-05, + "loss": 0.619, + "step": 2343 + }, + { + "epoch": 0.0643602416254805, + "grad_norm": 0.4313147962093353, + "learning_rate": 1.9953114123413072e-05, + "loss": 0.4778, + "step": 2344 + }, + { + "epoch": 0.06438769906644701, + "grad_norm": 0.4037111699581146, + "learning_rate": 1.9953072340430623e-05, + "loss": 0.6029, + "step": 2345 + }, + { + "epoch": 0.06441515650741352, + "grad_norm": 0.3588685095310211, + "learning_rate": 1.995303053888252e-05, + "loss": 0.5645, + "step": 2346 + }, + { + "epoch": 0.06444261394838001, + "grad_norm": 0.3372650444507599, + "learning_rate": 1.9952988718768836e-05, + "loss": 0.6063, + "step": 2347 + }, + { + "epoch": 0.06447007138934652, + "grad_norm": 0.4008887708187103, + "learning_rate": 1.995294688008965e-05, + "loss": 0.6082, + "step": 2348 + }, + { + "epoch": 0.06449752883031301, + "grad_norm": 0.394862562417984, + "learning_rate": 1.9952905022845045e-05, + "loss": 0.5774, + "step": 2349 + }, + { + "epoch": 0.06452498627127952, + "grad_norm": 0.41483163833618164, + "learning_rate": 1.9952863147035096e-05, + "loss": 0.5861, + "step": 2350 + }, + { + "epoch": 0.06455244371224601, + "grad_norm": 0.38506001234054565, + "learning_rate": 1.995282125265988e-05, + "loss": 0.6398, + "step": 2351 + }, + { + "epoch": 0.06457990115321252, + "grad_norm": 0.46177539229393005, + "learning_rate": 1.9952779339719473e-05, + "loss": 0.5611, + "step": 2352 + }, + { + "epoch": 0.06460735859417903, + "grad_norm": 0.347177654504776, + "learning_rate": 1.9952737408213957e-05, + "loss": 0.5159, + "step": 2353 + }, + { + "epoch": 0.06463481603514552, + "grad_norm": 0.3991475999355316, + "learning_rate": 1.995269545814341e-05, + "loss": 0.5205, + "step": 2354 + }, + { + "epoch": 0.06466227347611203, + "grad_norm": 0.37093180418014526, + "learning_rate": 1.995265348950791e-05, + "loss": 0.608, + "step": 2355 + }, + { + "epoch": 0.06468973091707853, + "grad_norm": 0.39164698123931885, + "learning_rate": 1.9952611502307535e-05, + "loss": 0.5165, + "step": 2356 + }, + { + "epoch": 0.06471718835804503, + "grad_norm": 0.4027473032474518, + "learning_rate": 1.9952569496542363e-05, + "loss": 0.6494, + "step": 2357 + }, + { + "epoch": 0.06474464579901153, + "grad_norm": 0.5456291437149048, + "learning_rate": 1.9952527472212472e-05, + "loss": 0.5251, + "step": 2358 + }, + { + "epoch": 0.06477210323997803, + "grad_norm": 0.39037951827049255, + "learning_rate": 1.995248542931794e-05, + "loss": 0.6345, + "step": 2359 + }, + { + "epoch": 0.06479956068094453, + "grad_norm": 0.3340372145175934, + "learning_rate": 1.9952443367858843e-05, + "loss": 0.5391, + "step": 2360 + }, + { + "epoch": 0.06482701812191104, + "grad_norm": 0.3582030236721039, + "learning_rate": 1.995240128783527e-05, + "loss": 0.6511, + "step": 2361 + }, + { + "epoch": 0.06485447556287754, + "grad_norm": 0.4054940938949585, + "learning_rate": 1.9952359189247286e-05, + "loss": 0.6254, + "step": 2362 + }, + { + "epoch": 0.06488193300384404, + "grad_norm": 0.37794166803359985, + "learning_rate": 1.9952317072094977e-05, + "loss": 0.5589, + "step": 2363 + }, + { + "epoch": 0.06490939044481055, + "grad_norm": 0.41617465019226074, + "learning_rate": 1.995227493637842e-05, + "loss": 0.4759, + "step": 2364 + }, + { + "epoch": 0.06493684788577704, + "grad_norm": 0.4125503897666931, + "learning_rate": 1.9952232782097697e-05, + "loss": 0.5837, + "step": 2365 + }, + { + "epoch": 0.06496430532674355, + "grad_norm": 0.40291157364845276, + "learning_rate": 1.995219060925288e-05, + "loss": 0.5432, + "step": 2366 + }, + { + "epoch": 0.06499176276771004, + "grad_norm": 0.37631815671920776, + "learning_rate": 1.9952148417844056e-05, + "loss": 0.591, + "step": 2367 + }, + { + "epoch": 0.06501922020867655, + "grad_norm": 0.33996298909187317, + "learning_rate": 1.9952106207871295e-05, + "loss": 0.6312, + "step": 2368 + }, + { + "epoch": 0.06504667764964306, + "grad_norm": 0.3862976133823395, + "learning_rate": 1.9952063979334683e-05, + "loss": 0.5445, + "step": 2369 + }, + { + "epoch": 0.06507413509060955, + "grad_norm": 0.4430011510848999, + "learning_rate": 1.995202173223429e-05, + "loss": 0.5691, + "step": 2370 + }, + { + "epoch": 0.06510159253157606, + "grad_norm": 0.3977324068546295, + "learning_rate": 1.99519794665702e-05, + "loss": 0.5261, + "step": 2371 + }, + { + "epoch": 0.06512904997254255, + "grad_norm": 0.3757913112640381, + "learning_rate": 1.9951937182342496e-05, + "loss": 0.5547, + "step": 2372 + }, + { + "epoch": 0.06515650741350906, + "grad_norm": 0.34055274724960327, + "learning_rate": 1.995189487955125e-05, + "loss": 0.5336, + "step": 2373 + }, + { + "epoch": 0.06518396485447556, + "grad_norm": 0.4109501838684082, + "learning_rate": 1.995185255819655e-05, + "loss": 0.6224, + "step": 2374 + }, + { + "epoch": 0.06521142229544206, + "grad_norm": 0.4686250388622284, + "learning_rate": 1.995181021827846e-05, + "loss": 0.5278, + "step": 2375 + }, + { + "epoch": 0.06523887973640857, + "grad_norm": 0.3437930643558502, + "learning_rate": 1.9951767859797074e-05, + "loss": 0.5356, + "step": 2376 + }, + { + "epoch": 0.06526633717737507, + "grad_norm": 0.3621160089969635, + "learning_rate": 1.995172548275246e-05, + "loss": 0.5497, + "step": 2377 + }, + { + "epoch": 0.06529379461834157, + "grad_norm": 0.4090946912765503, + "learning_rate": 1.9951683087144705e-05, + "loss": 0.5848, + "step": 2378 + }, + { + "epoch": 0.06532125205930807, + "grad_norm": 0.3702278435230255, + "learning_rate": 1.9951640672973887e-05, + "loss": 0.4896, + "step": 2379 + }, + { + "epoch": 0.06534870950027458, + "grad_norm": 0.42307791113853455, + "learning_rate": 1.995159824024008e-05, + "loss": 0.4921, + "step": 2380 + }, + { + "epoch": 0.06537616694124107, + "grad_norm": 0.3260853886604309, + "learning_rate": 1.9951555788943364e-05, + "loss": 0.529, + "step": 2381 + }, + { + "epoch": 0.06540362438220758, + "grad_norm": 0.34623345732688904, + "learning_rate": 1.9951513319083822e-05, + "loss": 0.5616, + "step": 2382 + }, + { + "epoch": 0.06543108182317409, + "grad_norm": 0.35533228516578674, + "learning_rate": 1.9951470830661533e-05, + "loss": 0.5077, + "step": 2383 + }, + { + "epoch": 0.06545853926414058, + "grad_norm": 0.40130531787872314, + "learning_rate": 1.9951428323676575e-05, + "loss": 0.5544, + "step": 2384 + }, + { + "epoch": 0.06548599670510709, + "grad_norm": 0.4143134653568268, + "learning_rate": 1.9951385798129025e-05, + "loss": 0.5961, + "step": 2385 + }, + { + "epoch": 0.06551345414607358, + "grad_norm": 0.3523617684841156, + "learning_rate": 1.9951343254018965e-05, + "loss": 0.4967, + "step": 2386 + }, + { + "epoch": 0.06554091158704009, + "grad_norm": 0.3218756914138794, + "learning_rate": 1.9951300691346472e-05, + "loss": 0.5101, + "step": 2387 + }, + { + "epoch": 0.06556836902800658, + "grad_norm": 0.4726930558681488, + "learning_rate": 1.9951258110111632e-05, + "loss": 0.7072, + "step": 2388 + }, + { + "epoch": 0.06559582646897309, + "grad_norm": 0.36453983187675476, + "learning_rate": 1.9951215510314515e-05, + "loss": 0.5275, + "step": 2389 + }, + { + "epoch": 0.0656232839099396, + "grad_norm": 0.40244221687316895, + "learning_rate": 1.9951172891955207e-05, + "loss": 0.7143, + "step": 2390 + }, + { + "epoch": 0.0656507413509061, + "grad_norm": 0.3376240134239197, + "learning_rate": 1.9951130255033785e-05, + "loss": 0.5386, + "step": 2391 + }, + { + "epoch": 0.0656781987918726, + "grad_norm": 0.35504403710365295, + "learning_rate": 1.9951087599550327e-05, + "loss": 0.4933, + "step": 2392 + }, + { + "epoch": 0.0657056562328391, + "grad_norm": 0.3846206068992615, + "learning_rate": 1.9951044925504915e-05, + "loss": 0.5841, + "step": 2393 + }, + { + "epoch": 0.0657331136738056, + "grad_norm": 0.3544868230819702, + "learning_rate": 1.9951002232897633e-05, + "loss": 0.6378, + "step": 2394 + }, + { + "epoch": 0.0657605711147721, + "grad_norm": 0.3465864062309265, + "learning_rate": 1.9950959521728552e-05, + "loss": 0.4862, + "step": 2395 + }, + { + "epoch": 0.0657880285557386, + "grad_norm": 0.4300004839897156, + "learning_rate": 1.9950916791997757e-05, + "loss": 0.5508, + "step": 2396 + }, + { + "epoch": 0.0658154859967051, + "grad_norm": 0.35596659779548645, + "learning_rate": 1.9950874043705322e-05, + "loss": 0.5493, + "step": 2397 + }, + { + "epoch": 0.06584294343767161, + "grad_norm": 0.3728666305541992, + "learning_rate": 1.9950831276851337e-05, + "loss": 0.5294, + "step": 2398 + }, + { + "epoch": 0.06587040087863812, + "grad_norm": 0.3770555853843689, + "learning_rate": 1.9950788491435874e-05, + "loss": 0.6146, + "step": 2399 + }, + { + "epoch": 0.06589785831960461, + "grad_norm": 0.37264615297317505, + "learning_rate": 1.9950745687459013e-05, + "loss": 0.5663, + "step": 2400 + }, + { + "epoch": 0.06592531576057112, + "grad_norm": 0.37756600975990295, + "learning_rate": 1.9950702864920837e-05, + "loss": 0.5783, + "step": 2401 + }, + { + "epoch": 0.06595277320153761, + "grad_norm": 0.35577312111854553, + "learning_rate": 1.9950660023821422e-05, + "loss": 0.6352, + "step": 2402 + }, + { + "epoch": 0.06598023064250412, + "grad_norm": 0.36462417244911194, + "learning_rate": 1.995061716416085e-05, + "loss": 0.5111, + "step": 2403 + }, + { + "epoch": 0.06600768808347061, + "grad_norm": 0.35594162344932556, + "learning_rate": 1.9950574285939204e-05, + "loss": 0.4975, + "step": 2404 + }, + { + "epoch": 0.06603514552443712, + "grad_norm": 0.3404890298843384, + "learning_rate": 1.995053138915656e-05, + "loss": 0.4943, + "step": 2405 + }, + { + "epoch": 0.06606260296540363, + "grad_norm": 0.34419503808021545, + "learning_rate": 1.9950488473812997e-05, + "loss": 0.6186, + "step": 2406 + }, + { + "epoch": 0.06609006040637012, + "grad_norm": 0.33428871631622314, + "learning_rate": 1.99504455399086e-05, + "loss": 0.5512, + "step": 2407 + }, + { + "epoch": 0.06611751784733663, + "grad_norm": 0.31703981757164, + "learning_rate": 1.9950402587443448e-05, + "loss": 0.4772, + "step": 2408 + }, + { + "epoch": 0.06614497528830313, + "grad_norm": 0.35289838910102844, + "learning_rate": 1.9950359616417615e-05, + "loss": 0.561, + "step": 2409 + }, + { + "epoch": 0.06617243272926963, + "grad_norm": 0.4048164188861847, + "learning_rate": 1.9950316626831186e-05, + "loss": 0.5394, + "step": 2410 + }, + { + "epoch": 0.06619989017023613, + "grad_norm": 0.3773423731327057, + "learning_rate": 1.9950273618684243e-05, + "loss": 0.6116, + "step": 2411 + }, + { + "epoch": 0.06622734761120264, + "grad_norm": 0.37130776047706604, + "learning_rate": 1.9950230591976862e-05, + "loss": 0.5263, + "step": 2412 + }, + { + "epoch": 0.06625480505216914, + "grad_norm": 0.6545937061309814, + "learning_rate": 1.9950187546709127e-05, + "loss": 0.6434, + "step": 2413 + }, + { + "epoch": 0.06628226249313564, + "grad_norm": 0.3710745871067047, + "learning_rate": 1.9950144482881114e-05, + "loss": 0.6272, + "step": 2414 + }, + { + "epoch": 0.06630971993410215, + "grad_norm": 0.3910004496574402, + "learning_rate": 1.995010140049291e-05, + "loss": 0.5648, + "step": 2415 + }, + { + "epoch": 0.06633717737506864, + "grad_norm": 0.358593225479126, + "learning_rate": 1.9950058299544585e-05, + "loss": 0.5897, + "step": 2416 + }, + { + "epoch": 0.06636463481603515, + "grad_norm": 0.33523327112197876, + "learning_rate": 1.995001518003623e-05, + "loss": 0.4351, + "step": 2417 + }, + { + "epoch": 0.06639209225700164, + "grad_norm": 0.36261075735092163, + "learning_rate": 1.994997204196792e-05, + "loss": 0.5554, + "step": 2418 + }, + { + "epoch": 0.06641954969796815, + "grad_norm": 0.4395487606525421, + "learning_rate": 1.9949928885339735e-05, + "loss": 0.5593, + "step": 2419 + }, + { + "epoch": 0.06644700713893466, + "grad_norm": 0.36798161268234253, + "learning_rate": 1.9949885710151758e-05, + "loss": 0.557, + "step": 2420 + }, + { + "epoch": 0.06647446457990115, + "grad_norm": 0.37082457542419434, + "learning_rate": 1.9949842516404073e-05, + "loss": 0.59, + "step": 2421 + }, + { + "epoch": 0.06650192202086766, + "grad_norm": 0.3638967275619507, + "learning_rate": 1.994979930409675e-05, + "loss": 0.5234, + "step": 2422 + }, + { + "epoch": 0.06652937946183415, + "grad_norm": 0.36279240250587463, + "learning_rate": 1.9949756073229877e-05, + "loss": 0.4795, + "step": 2423 + }, + { + "epoch": 0.06655683690280066, + "grad_norm": 0.3546316623687744, + "learning_rate": 1.9949712823803535e-05, + "loss": 0.5204, + "step": 2424 + }, + { + "epoch": 0.06658429434376716, + "grad_norm": 0.3369678854942322, + "learning_rate": 1.9949669555817804e-05, + "loss": 0.5588, + "step": 2425 + }, + { + "epoch": 0.06661175178473366, + "grad_norm": 0.35158663988113403, + "learning_rate": 1.994962626927276e-05, + "loss": 0.4849, + "step": 2426 + }, + { + "epoch": 0.06663920922570016, + "grad_norm": 0.3536481261253357, + "learning_rate": 1.994958296416849e-05, + "loss": 0.5305, + "step": 2427 + }, + { + "epoch": 0.06666666666666667, + "grad_norm": 0.3814046382904053, + "learning_rate": 1.994953964050507e-05, + "loss": 0.6067, + "step": 2428 + }, + { + "epoch": 0.06669412410763317, + "grad_norm": 0.35357314348220825, + "learning_rate": 1.9949496298282586e-05, + "loss": 0.4843, + "step": 2429 + }, + { + "epoch": 0.06672158154859967, + "grad_norm": 0.4029700458049774, + "learning_rate": 1.9949452937501116e-05, + "loss": 0.4546, + "step": 2430 + }, + { + "epoch": 0.06674903898956618, + "grad_norm": 0.40141725540161133, + "learning_rate": 1.9949409558160736e-05, + "loss": 0.6528, + "step": 2431 + }, + { + "epoch": 0.06677649643053267, + "grad_norm": 0.3752491772174835, + "learning_rate": 1.994936616026154e-05, + "loss": 0.5254, + "step": 2432 + }, + { + "epoch": 0.06680395387149918, + "grad_norm": 0.41975846886634827, + "learning_rate": 1.9949322743803594e-05, + "loss": 0.5283, + "step": 2433 + }, + { + "epoch": 0.06683141131246567, + "grad_norm": 0.3690173327922821, + "learning_rate": 1.9949279308786987e-05, + "loss": 0.5615, + "step": 2434 + }, + { + "epoch": 0.06685886875343218, + "grad_norm": 0.33703848719596863, + "learning_rate": 1.9949235855211797e-05, + "loss": 0.5446, + "step": 2435 + }, + { + "epoch": 0.06688632619439869, + "grad_norm": 0.3546878397464752, + "learning_rate": 1.9949192383078108e-05, + "loss": 0.4764, + "step": 2436 + }, + { + "epoch": 0.06691378363536518, + "grad_norm": 0.4123169183731079, + "learning_rate": 1.9949148892386005e-05, + "loss": 0.5649, + "step": 2437 + }, + { + "epoch": 0.06694124107633169, + "grad_norm": 0.34833216667175293, + "learning_rate": 1.9949105383135556e-05, + "loss": 0.5151, + "step": 2438 + }, + { + "epoch": 0.06696869851729818, + "grad_norm": 0.3220106065273285, + "learning_rate": 1.9949061855326852e-05, + "loss": 0.5169, + "step": 2439 + }, + { + "epoch": 0.06699615595826469, + "grad_norm": 0.4123360216617584, + "learning_rate": 1.9949018308959974e-05, + "loss": 0.5286, + "step": 2440 + }, + { + "epoch": 0.06702361339923119, + "grad_norm": 0.34656408429145813, + "learning_rate": 1.9948974744035002e-05, + "loss": 0.5576, + "step": 2441 + }, + { + "epoch": 0.0670510708401977, + "grad_norm": 0.37669599056243896, + "learning_rate": 1.9948931160552015e-05, + "loss": 0.5156, + "step": 2442 + }, + { + "epoch": 0.0670785282811642, + "grad_norm": 0.42906442284584045, + "learning_rate": 1.99488875585111e-05, + "loss": 0.5789, + "step": 2443 + }, + { + "epoch": 0.0671059857221307, + "grad_norm": 0.34615379571914673, + "learning_rate": 1.994884393791233e-05, + "loss": 0.5442, + "step": 2444 + }, + { + "epoch": 0.0671334431630972, + "grad_norm": 0.4075731933116913, + "learning_rate": 1.9948800298755793e-05, + "loss": 0.5074, + "step": 2445 + }, + { + "epoch": 0.0671609006040637, + "grad_norm": 0.3915160000324249, + "learning_rate": 1.9948756641041566e-05, + "loss": 0.475, + "step": 2446 + }, + { + "epoch": 0.0671883580450302, + "grad_norm": 0.3490441143512726, + "learning_rate": 1.9948712964769735e-05, + "loss": 0.4751, + "step": 2447 + }, + { + "epoch": 0.0672158154859967, + "grad_norm": 0.39158669114112854, + "learning_rate": 1.9948669269940377e-05, + "loss": 0.5774, + "step": 2448 + }, + { + "epoch": 0.06724327292696321, + "grad_norm": 0.35388022661209106, + "learning_rate": 1.9948625556553575e-05, + "loss": 0.634, + "step": 2449 + }, + { + "epoch": 0.06727073036792972, + "grad_norm": 0.4043738842010498, + "learning_rate": 1.9948581824609416e-05, + "loss": 0.5089, + "step": 2450 + }, + { + "epoch": 0.06729818780889621, + "grad_norm": 0.3756684958934784, + "learning_rate": 1.994853807410797e-05, + "loss": 0.5308, + "step": 2451 + }, + { + "epoch": 0.06732564524986272, + "grad_norm": 0.3822261393070221, + "learning_rate": 1.9948494305049332e-05, + "loss": 0.5578, + "step": 2452 + }, + { + "epoch": 0.06735310269082921, + "grad_norm": 0.39924734830856323, + "learning_rate": 1.994845051743357e-05, + "loss": 0.5698, + "step": 2453 + }, + { + "epoch": 0.06738056013179572, + "grad_norm": 0.3949912190437317, + "learning_rate": 1.9948406711260776e-05, + "loss": 0.5325, + "step": 2454 + }, + { + "epoch": 0.06740801757276221, + "grad_norm": 0.3612290620803833, + "learning_rate": 1.994836288653103e-05, + "loss": 0.5852, + "step": 2455 + }, + { + "epoch": 0.06743547501372872, + "grad_norm": 0.38283222913742065, + "learning_rate": 1.994831904324441e-05, + "loss": 0.5586, + "step": 2456 + }, + { + "epoch": 0.06746293245469523, + "grad_norm": 0.3937833905220032, + "learning_rate": 1.9948275181401e-05, + "loss": 0.5566, + "step": 2457 + }, + { + "epoch": 0.06749038989566172, + "grad_norm": 0.34893596172332764, + "learning_rate": 1.994823130100088e-05, + "loss": 0.5843, + "step": 2458 + }, + { + "epoch": 0.06751784733662823, + "grad_norm": 0.3804466426372528, + "learning_rate": 1.994818740204414e-05, + "loss": 0.5362, + "step": 2459 + }, + { + "epoch": 0.06754530477759473, + "grad_norm": 0.3672961890697479, + "learning_rate": 1.994814348453085e-05, + "loss": 0.4723, + "step": 2460 + }, + { + "epoch": 0.06757276221856123, + "grad_norm": 0.35358694195747375, + "learning_rate": 1.9948099548461098e-05, + "loss": 0.5043, + "step": 2461 + }, + { + "epoch": 0.06760021965952773, + "grad_norm": 0.3814089596271515, + "learning_rate": 1.9948055593834965e-05, + "loss": 0.5549, + "step": 2462 + }, + { + "epoch": 0.06762767710049424, + "grad_norm": 0.366769939661026, + "learning_rate": 1.9948011620652533e-05, + "loss": 0.6147, + "step": 2463 + }, + { + "epoch": 0.06765513454146073, + "grad_norm": 0.33227694034576416, + "learning_rate": 1.9947967628913885e-05, + "loss": 0.5221, + "step": 2464 + }, + { + "epoch": 0.06768259198242724, + "grad_norm": 0.46404004096984863, + "learning_rate": 1.9947923618619104e-05, + "loss": 0.6504, + "step": 2465 + }, + { + "epoch": 0.06771004942339374, + "grad_norm": 0.35283759236335754, + "learning_rate": 1.9947879589768267e-05, + "loss": 0.5643, + "step": 2466 + }, + { + "epoch": 0.06773750686436024, + "grad_norm": 0.4014884829521179, + "learning_rate": 1.9947835542361462e-05, + "loss": 0.5034, + "step": 2467 + }, + { + "epoch": 0.06776496430532675, + "grad_norm": 0.3382922112941742, + "learning_rate": 1.9947791476398768e-05, + "loss": 0.494, + "step": 2468 + }, + { + "epoch": 0.06779242174629324, + "grad_norm": 0.36997005343437195, + "learning_rate": 1.9947747391880268e-05, + "loss": 0.6103, + "step": 2469 + }, + { + "epoch": 0.06781987918725975, + "grad_norm": 0.41770434379577637, + "learning_rate": 1.9947703288806045e-05, + "loss": 0.5704, + "step": 2470 + }, + { + "epoch": 0.06784733662822624, + "grad_norm": 0.6453843712806702, + "learning_rate": 1.994765916717618e-05, + "loss": 0.6629, + "step": 2471 + }, + { + "epoch": 0.06787479406919275, + "grad_norm": 0.382752388715744, + "learning_rate": 1.9947615026990755e-05, + "loss": 0.5635, + "step": 2472 + }, + { + "epoch": 0.06790225151015926, + "grad_norm": 0.379184752702713, + "learning_rate": 1.9947570868249852e-05, + "loss": 0.6075, + "step": 2473 + }, + { + "epoch": 0.06792970895112575, + "grad_norm": 0.37019670009613037, + "learning_rate": 1.9947526690953557e-05, + "loss": 0.5596, + "step": 2474 + }, + { + "epoch": 0.06795716639209226, + "grad_norm": 0.3334265947341919, + "learning_rate": 1.9947482495101948e-05, + "loss": 0.4917, + "step": 2475 + }, + { + "epoch": 0.06798462383305875, + "grad_norm": 0.36810392141342163, + "learning_rate": 1.994743828069511e-05, + "loss": 0.5286, + "step": 2476 + }, + { + "epoch": 0.06801208127402526, + "grad_norm": 0.34017282724380493, + "learning_rate": 1.9947394047733124e-05, + "loss": 0.5557, + "step": 2477 + }, + { + "epoch": 0.06803953871499176, + "grad_norm": 0.36422809958457947, + "learning_rate": 1.9947349796216075e-05, + "loss": 0.5731, + "step": 2478 + }, + { + "epoch": 0.06806699615595826, + "grad_norm": 0.36500614881515503, + "learning_rate": 1.9947305526144044e-05, + "loss": 0.6077, + "step": 2479 + }, + { + "epoch": 0.06809445359692477, + "grad_norm": 0.3905034363269806, + "learning_rate": 1.9947261237517113e-05, + "loss": 0.6033, + "step": 2480 + }, + { + "epoch": 0.06812191103789127, + "grad_norm": 0.36006250977516174, + "learning_rate": 1.9947216930335363e-05, + "loss": 0.5495, + "step": 2481 + }, + { + "epoch": 0.06814936847885777, + "grad_norm": 0.3371071219444275, + "learning_rate": 1.9947172604598878e-05, + "loss": 0.5162, + "step": 2482 + }, + { + "epoch": 0.06817682591982427, + "grad_norm": 0.37942707538604736, + "learning_rate": 1.9947128260307744e-05, + "loss": 0.5762, + "step": 2483 + }, + { + "epoch": 0.06820428336079078, + "grad_norm": 0.38507717847824097, + "learning_rate": 1.994708389746204e-05, + "loss": 0.5462, + "step": 2484 + }, + { + "epoch": 0.06823174080175727, + "grad_norm": 0.3783193528652191, + "learning_rate": 1.994703951606185e-05, + "loss": 0.6364, + "step": 2485 + }, + { + "epoch": 0.06825919824272378, + "grad_norm": 0.35543766617774963, + "learning_rate": 1.994699511610726e-05, + "loss": 0.6027, + "step": 2486 + }, + { + "epoch": 0.06828665568369029, + "grad_norm": 0.34314975142478943, + "learning_rate": 1.994695069759834e-05, + "loss": 0.5122, + "step": 2487 + }, + { + "epoch": 0.06831411312465678, + "grad_norm": 0.3640054166316986, + "learning_rate": 1.994690626053519e-05, + "loss": 0.4736, + "step": 2488 + }, + { + "epoch": 0.06834157056562329, + "grad_norm": 0.3699384033679962, + "learning_rate": 1.9946861804917887e-05, + "loss": 0.5929, + "step": 2489 + }, + { + "epoch": 0.06836902800658978, + "grad_norm": 0.3777710199356079, + "learning_rate": 1.9946817330746505e-05, + "loss": 0.5186, + "step": 2490 + }, + { + "epoch": 0.06839648544755629, + "grad_norm": 0.3909810483455658, + "learning_rate": 1.9946772838021137e-05, + "loss": 0.6544, + "step": 2491 + }, + { + "epoch": 0.06842394288852278, + "grad_norm": 0.3749752342700958, + "learning_rate": 1.9946728326741865e-05, + "loss": 0.6736, + "step": 2492 + }, + { + "epoch": 0.06845140032948929, + "grad_norm": 0.3217942416667938, + "learning_rate": 1.994668379690877e-05, + "loss": 0.5239, + "step": 2493 + }, + { + "epoch": 0.06847885777045579, + "grad_norm": 0.3616379499435425, + "learning_rate": 1.9946639248521933e-05, + "loss": 0.598, + "step": 2494 + }, + { + "epoch": 0.0685063152114223, + "grad_norm": 0.33275681734085083, + "learning_rate": 1.994659468158144e-05, + "loss": 0.5187, + "step": 2495 + }, + { + "epoch": 0.0685337726523888, + "grad_norm": 0.3579504191875458, + "learning_rate": 1.9946550096087373e-05, + "loss": 0.4963, + "step": 2496 + }, + { + "epoch": 0.0685612300933553, + "grad_norm": 0.37384963035583496, + "learning_rate": 1.9946505492039816e-05, + "loss": 0.4772, + "step": 2497 + }, + { + "epoch": 0.0685886875343218, + "grad_norm": 0.4308069944381714, + "learning_rate": 1.994646086943885e-05, + "loss": 0.491, + "step": 2498 + }, + { + "epoch": 0.0686161449752883, + "grad_norm": 0.3591962456703186, + "learning_rate": 1.994641622828456e-05, + "loss": 0.5769, + "step": 2499 + }, + { + "epoch": 0.0686436024162548, + "grad_norm": 0.497003436088562, + "learning_rate": 1.9946371568577032e-05, + "loss": 0.536, + "step": 2500 + }, + { + "epoch": 0.0686710598572213, + "grad_norm": 0.31872129440307617, + "learning_rate": 1.9946326890316345e-05, + "loss": 0.4871, + "step": 2501 + }, + { + "epoch": 0.06869851729818781, + "grad_norm": 0.40617093443870544, + "learning_rate": 1.9946282193502583e-05, + "loss": 0.5242, + "step": 2502 + }, + { + "epoch": 0.06872597473915432, + "grad_norm": 0.422521710395813, + "learning_rate": 1.994623747813583e-05, + "loss": 0.6196, + "step": 2503 + }, + { + "epoch": 0.06875343218012081, + "grad_norm": 0.3546566665172577, + "learning_rate": 1.9946192744216172e-05, + "loss": 0.5432, + "step": 2504 + }, + { + "epoch": 0.06878088962108732, + "grad_norm": 0.4822406768798828, + "learning_rate": 1.994614799174369e-05, + "loss": 0.5351, + "step": 2505 + }, + { + "epoch": 0.06880834706205381, + "grad_norm": 0.3569478988647461, + "learning_rate": 1.9946103220718463e-05, + "loss": 0.5522, + "step": 2506 + }, + { + "epoch": 0.06883580450302032, + "grad_norm": 0.3895514905452728, + "learning_rate": 1.9946058431140587e-05, + "loss": 0.5628, + "step": 2507 + }, + { + "epoch": 0.06886326194398681, + "grad_norm": 0.4540875256061554, + "learning_rate": 1.994601362301013e-05, + "loss": 0.5085, + "step": 2508 + }, + { + "epoch": 0.06889071938495332, + "grad_norm": 0.4012500047683716, + "learning_rate": 1.9945968796327188e-05, + "loss": 0.578, + "step": 2509 + }, + { + "epoch": 0.06891817682591983, + "grad_norm": 0.3989761769771576, + "learning_rate": 1.9945923951091837e-05, + "loss": 0.5584, + "step": 2510 + }, + { + "epoch": 0.06894563426688632, + "grad_norm": 0.3539775013923645, + "learning_rate": 1.9945879087304164e-05, + "loss": 0.5375, + "step": 2511 + }, + { + "epoch": 0.06897309170785283, + "grad_norm": 0.5623977184295654, + "learning_rate": 1.9945834204964254e-05, + "loss": 0.4889, + "step": 2512 + }, + { + "epoch": 0.06900054914881933, + "grad_norm": 0.38151729106903076, + "learning_rate": 1.9945789304072188e-05, + "loss": 0.5252, + "step": 2513 + }, + { + "epoch": 0.06902800658978583, + "grad_norm": 0.35390251874923706, + "learning_rate": 1.994574438462805e-05, + "loss": 0.4848, + "step": 2514 + }, + { + "epoch": 0.06905546403075233, + "grad_norm": 0.3761693835258484, + "learning_rate": 1.9945699446631927e-05, + "loss": 0.4746, + "step": 2515 + }, + { + "epoch": 0.06908292147171884, + "grad_norm": 0.3529433608055115, + "learning_rate": 1.9945654490083894e-05, + "loss": 0.5028, + "step": 2516 + }, + { + "epoch": 0.06911037891268534, + "grad_norm": 0.4220900535583496, + "learning_rate": 1.9945609514984047e-05, + "loss": 0.5625, + "step": 2517 + }, + { + "epoch": 0.06913783635365184, + "grad_norm": 0.3410559296607971, + "learning_rate": 1.9945564521332458e-05, + "loss": 0.5622, + "step": 2518 + }, + { + "epoch": 0.06916529379461835, + "grad_norm": 0.3723577857017517, + "learning_rate": 1.9945519509129224e-05, + "loss": 0.566, + "step": 2519 + }, + { + "epoch": 0.06919275123558484, + "grad_norm": 0.38781461119651794, + "learning_rate": 1.994547447837442e-05, + "loss": 0.5647, + "step": 2520 + }, + { + "epoch": 0.06922020867655135, + "grad_norm": 0.40047988295555115, + "learning_rate": 1.9945429429068127e-05, + "loss": 0.6192, + "step": 2521 + }, + { + "epoch": 0.06924766611751784, + "grad_norm": 0.3830215036869049, + "learning_rate": 1.9945384361210438e-05, + "loss": 0.5921, + "step": 2522 + }, + { + "epoch": 0.06927512355848435, + "grad_norm": 0.3816046416759491, + "learning_rate": 1.9945339274801432e-05, + "loss": 0.5835, + "step": 2523 + }, + { + "epoch": 0.06930258099945086, + "grad_norm": 0.5515255331993103, + "learning_rate": 1.9945294169841196e-05, + "loss": 0.5578, + "step": 2524 + }, + { + "epoch": 0.06933003844041735, + "grad_norm": 0.3673606514930725, + "learning_rate": 1.994524904632981e-05, + "loss": 0.5701, + "step": 2525 + }, + { + "epoch": 0.06935749588138386, + "grad_norm": 0.4832536578178406, + "learning_rate": 1.994520390426736e-05, + "loss": 0.5459, + "step": 2526 + }, + { + "epoch": 0.06938495332235035, + "grad_norm": 0.3989959955215454, + "learning_rate": 1.9945158743653933e-05, + "loss": 0.5179, + "step": 2527 + }, + { + "epoch": 0.06941241076331686, + "grad_norm": 0.37179499864578247, + "learning_rate": 1.994511356448961e-05, + "loss": 0.5526, + "step": 2528 + }, + { + "epoch": 0.06943986820428336, + "grad_norm": 0.3651149272918701, + "learning_rate": 1.9945068366774474e-05, + "loss": 0.5907, + "step": 2529 + }, + { + "epoch": 0.06946732564524986, + "grad_norm": 0.36114662885665894, + "learning_rate": 1.9945023150508613e-05, + "loss": 0.5719, + "step": 2530 + }, + { + "epoch": 0.06949478308621636, + "grad_norm": 0.3603382706642151, + "learning_rate": 1.9944977915692113e-05, + "loss": 0.5284, + "step": 2531 + }, + { + "epoch": 0.06952224052718287, + "grad_norm": 0.350202351808548, + "learning_rate": 1.994493266232505e-05, + "loss": 0.5453, + "step": 2532 + }, + { + "epoch": 0.06954969796814937, + "grad_norm": 0.37228116393089294, + "learning_rate": 1.9944887390407515e-05, + "loss": 0.5289, + "step": 2533 + }, + { + "epoch": 0.06957715540911587, + "grad_norm": 0.3300260603427887, + "learning_rate": 1.9944842099939592e-05, + "loss": 0.5354, + "step": 2534 + }, + { + "epoch": 0.06960461285008238, + "grad_norm": 0.4777238667011261, + "learning_rate": 1.994479679092136e-05, + "loss": 0.6283, + "step": 2535 + }, + { + "epoch": 0.06963207029104887, + "grad_norm": 0.36970382928848267, + "learning_rate": 1.9944751463352913e-05, + "loss": 0.575, + "step": 2536 + }, + { + "epoch": 0.06965952773201538, + "grad_norm": 0.34163615107536316, + "learning_rate": 1.9944706117234332e-05, + "loss": 0.5336, + "step": 2537 + }, + { + "epoch": 0.06968698517298187, + "grad_norm": 0.3359769880771637, + "learning_rate": 1.9944660752565697e-05, + "loss": 0.5434, + "step": 2538 + }, + { + "epoch": 0.06971444261394838, + "grad_norm": 0.3446539342403412, + "learning_rate": 1.9944615369347097e-05, + "loss": 0.5494, + "step": 2539 + }, + { + "epoch": 0.06974190005491489, + "grad_norm": 0.3866296410560608, + "learning_rate": 1.9944569967578615e-05, + "loss": 0.5488, + "step": 2540 + }, + { + "epoch": 0.06976935749588138, + "grad_norm": 0.3613860011100769, + "learning_rate": 1.9944524547260334e-05, + "loss": 0.5598, + "step": 2541 + }, + { + "epoch": 0.06979681493684789, + "grad_norm": 0.4183613359928131, + "learning_rate": 1.994447910839234e-05, + "loss": 0.5789, + "step": 2542 + }, + { + "epoch": 0.06982427237781438, + "grad_norm": 0.3624984323978424, + "learning_rate": 1.9944433650974722e-05, + "loss": 0.4791, + "step": 2543 + }, + { + "epoch": 0.06985172981878089, + "grad_norm": 0.3814305365085602, + "learning_rate": 1.994438817500756e-05, + "loss": 0.5398, + "step": 2544 + }, + { + "epoch": 0.06987918725974739, + "grad_norm": 0.33490052819252014, + "learning_rate": 1.994434268049094e-05, + "loss": 0.5373, + "step": 2545 + }, + { + "epoch": 0.0699066447007139, + "grad_norm": 0.41264626383781433, + "learning_rate": 1.9944297167424946e-05, + "loss": 0.5318, + "step": 2546 + }, + { + "epoch": 0.0699341021416804, + "grad_norm": 0.4095516800880432, + "learning_rate": 1.9944251635809667e-05, + "loss": 0.5329, + "step": 2547 + }, + { + "epoch": 0.0699615595826469, + "grad_norm": 0.41390979290008545, + "learning_rate": 1.9944206085645183e-05, + "loss": 0.5535, + "step": 2548 + }, + { + "epoch": 0.0699890170236134, + "grad_norm": 0.4160715341567993, + "learning_rate": 1.9944160516931582e-05, + "loss": 0.6511, + "step": 2549 + }, + { + "epoch": 0.0700164744645799, + "grad_norm": 0.4027978777885437, + "learning_rate": 1.9944114929668946e-05, + "loss": 0.5978, + "step": 2550 + }, + { + "epoch": 0.0700439319055464, + "grad_norm": 0.3429988622665405, + "learning_rate": 1.9944069323857365e-05, + "loss": 0.442, + "step": 2551 + }, + { + "epoch": 0.0700713893465129, + "grad_norm": 0.41126179695129395, + "learning_rate": 1.9944023699496918e-05, + "loss": 0.61, + "step": 2552 + }, + { + "epoch": 0.07009884678747941, + "grad_norm": 0.36616581678390503, + "learning_rate": 1.9943978056587693e-05, + "loss": 0.4892, + "step": 2553 + }, + { + "epoch": 0.07012630422844592, + "grad_norm": 0.3552047610282898, + "learning_rate": 1.994393239512978e-05, + "loss": 0.6039, + "step": 2554 + }, + { + "epoch": 0.07015376166941241, + "grad_norm": 0.37143319845199585, + "learning_rate": 1.9943886715123252e-05, + "loss": 0.6143, + "step": 2555 + }, + { + "epoch": 0.07018121911037892, + "grad_norm": 0.3834437429904938, + "learning_rate": 1.99438410165682e-05, + "loss": 0.6, + "step": 2556 + }, + { + "epoch": 0.07020867655134541, + "grad_norm": 0.377986341714859, + "learning_rate": 1.9943795299464717e-05, + "loss": 0.5734, + "step": 2557 + }, + { + "epoch": 0.07023613399231192, + "grad_norm": 0.3634434640407562, + "learning_rate": 1.9943749563812883e-05, + "loss": 0.543, + "step": 2558 + }, + { + "epoch": 0.07026359143327841, + "grad_norm": 0.36504262685775757, + "learning_rate": 1.994370380961278e-05, + "loss": 0.5832, + "step": 2559 + }, + { + "epoch": 0.07029104887424492, + "grad_norm": 0.3603762090206146, + "learning_rate": 1.9943658036864495e-05, + "loss": 0.5572, + "step": 2560 + }, + { + "epoch": 0.07031850631521142, + "grad_norm": 0.3836669325828552, + "learning_rate": 1.9943612245568115e-05, + "loss": 0.5705, + "step": 2561 + }, + { + "epoch": 0.07034596375617792, + "grad_norm": 0.32436054944992065, + "learning_rate": 1.9943566435723723e-05, + "loss": 0.5561, + "step": 2562 + }, + { + "epoch": 0.07037342119714443, + "grad_norm": 0.3204456865787506, + "learning_rate": 1.994352060733141e-05, + "loss": 0.4835, + "step": 2563 + }, + { + "epoch": 0.07040087863811093, + "grad_norm": 0.36620965600013733, + "learning_rate": 1.9943474760391253e-05, + "loss": 0.5591, + "step": 2564 + }, + { + "epoch": 0.07042833607907743, + "grad_norm": 0.33208566904067993, + "learning_rate": 1.9943428894903346e-05, + "loss": 0.5497, + "step": 2565 + }, + { + "epoch": 0.07045579352004393, + "grad_norm": 0.35680022835731506, + "learning_rate": 1.9943383010867768e-05, + "loss": 0.5936, + "step": 2566 + }, + { + "epoch": 0.07048325096101044, + "grad_norm": 0.36450469493865967, + "learning_rate": 1.9943337108284608e-05, + "loss": 0.5568, + "step": 2567 + }, + { + "epoch": 0.07051070840197693, + "grad_norm": 0.32450729608535767, + "learning_rate": 1.994329118715395e-05, + "loss": 0.4821, + "step": 2568 + }, + { + "epoch": 0.07053816584294344, + "grad_norm": 0.39028382301330566, + "learning_rate": 1.994324524747588e-05, + "loss": 0.5462, + "step": 2569 + }, + { + "epoch": 0.07056562328390995, + "grad_norm": 0.3685503304004669, + "learning_rate": 1.9943199289250486e-05, + "loss": 0.5855, + "step": 2570 + }, + { + "epoch": 0.07059308072487644, + "grad_norm": 0.31089234352111816, + "learning_rate": 1.9943153312477848e-05, + "loss": 0.4961, + "step": 2571 + }, + { + "epoch": 0.07062053816584295, + "grad_norm": 0.35604190826416016, + "learning_rate": 1.9943107317158058e-05, + "loss": 0.4681, + "step": 2572 + }, + { + "epoch": 0.07064799560680944, + "grad_norm": 0.3767017126083374, + "learning_rate": 1.99430613032912e-05, + "loss": 0.4762, + "step": 2573 + }, + { + "epoch": 0.07067545304777595, + "grad_norm": 0.3224920332431793, + "learning_rate": 1.994301527087736e-05, + "loss": 0.5132, + "step": 2574 + }, + { + "epoch": 0.07070291048874244, + "grad_norm": 0.3483562171459198, + "learning_rate": 1.994296921991662e-05, + "loss": 0.5656, + "step": 2575 + }, + { + "epoch": 0.07073036792970895, + "grad_norm": 0.37184998393058777, + "learning_rate": 1.994292315040907e-05, + "loss": 0.5567, + "step": 2576 + }, + { + "epoch": 0.07075782537067546, + "grad_norm": 0.39539819955825806, + "learning_rate": 1.9942877062354797e-05, + "loss": 0.5812, + "step": 2577 + }, + { + "epoch": 0.07078528281164195, + "grad_norm": 0.41819050908088684, + "learning_rate": 1.9942830955753885e-05, + "loss": 0.6274, + "step": 2578 + }, + { + "epoch": 0.07081274025260846, + "grad_norm": 0.3662780225276947, + "learning_rate": 1.9942784830606418e-05, + "loss": 0.5762, + "step": 2579 + }, + { + "epoch": 0.07084019769357495, + "grad_norm": 0.39505141973495483, + "learning_rate": 1.9942738686912484e-05, + "loss": 0.6463, + "step": 2580 + }, + { + "epoch": 0.07086765513454146, + "grad_norm": 0.3770921528339386, + "learning_rate": 1.994269252467217e-05, + "loss": 0.5053, + "step": 2581 + }, + { + "epoch": 0.07089511257550796, + "grad_norm": 0.3661443293094635, + "learning_rate": 1.9942646343885558e-05, + "loss": 0.5327, + "step": 2582 + }, + { + "epoch": 0.07092257001647446, + "grad_norm": 0.4816809594631195, + "learning_rate": 1.9942600144552743e-05, + "loss": 0.5706, + "step": 2583 + }, + { + "epoch": 0.07095002745744097, + "grad_norm": 0.3881710171699524, + "learning_rate": 1.99425539266738e-05, + "loss": 0.5405, + "step": 2584 + }, + { + "epoch": 0.07097748489840747, + "grad_norm": 0.3701106607913971, + "learning_rate": 1.9942507690248826e-05, + "loss": 0.5234, + "step": 2585 + }, + { + "epoch": 0.07100494233937397, + "grad_norm": 0.5333350896835327, + "learning_rate": 1.9942461435277896e-05, + "loss": 0.5625, + "step": 2586 + }, + { + "epoch": 0.07103239978034047, + "grad_norm": 0.47856807708740234, + "learning_rate": 1.9942415161761106e-05, + "loss": 0.4935, + "step": 2587 + }, + { + "epoch": 0.07105985722130698, + "grad_norm": 0.34923648834228516, + "learning_rate": 1.9942368869698536e-05, + "loss": 0.48, + "step": 2588 + }, + { + "epoch": 0.07108731466227347, + "grad_norm": 0.37891772389411926, + "learning_rate": 1.9942322559090277e-05, + "loss": 0.5526, + "step": 2589 + }, + { + "epoch": 0.07111477210323998, + "grad_norm": 0.3615868091583252, + "learning_rate": 1.9942276229936412e-05, + "loss": 0.5871, + "step": 2590 + }, + { + "epoch": 0.07114222954420649, + "grad_norm": 0.36254000663757324, + "learning_rate": 1.994222988223703e-05, + "loss": 0.6181, + "step": 2591 + }, + { + "epoch": 0.07116968698517298, + "grad_norm": 0.4926735758781433, + "learning_rate": 1.9942183515992214e-05, + "loss": 0.6063, + "step": 2592 + }, + { + "epoch": 0.07119714442613949, + "grad_norm": 0.372385710477829, + "learning_rate": 1.9942137131202054e-05, + "loss": 0.5909, + "step": 2593 + }, + { + "epoch": 0.07122460186710598, + "grad_norm": 0.41848719120025635, + "learning_rate": 1.9942090727866636e-05, + "loss": 0.5422, + "step": 2594 + }, + { + "epoch": 0.07125205930807249, + "grad_norm": 0.31702950596809387, + "learning_rate": 1.9942044305986045e-05, + "loss": 0.4573, + "step": 2595 + }, + { + "epoch": 0.07127951674903898, + "grad_norm": 0.39123478531837463, + "learning_rate": 1.9941997865560365e-05, + "loss": 0.5776, + "step": 2596 + }, + { + "epoch": 0.07130697419000549, + "grad_norm": 0.39267152547836304, + "learning_rate": 1.994195140658969e-05, + "loss": 0.5643, + "step": 2597 + }, + { + "epoch": 0.07133443163097199, + "grad_norm": 0.35269755125045776, + "learning_rate": 1.99419049290741e-05, + "loss": 0.5256, + "step": 2598 + }, + { + "epoch": 0.0713618890719385, + "grad_norm": 0.4239741861820221, + "learning_rate": 1.9941858433013686e-05, + "loss": 0.6336, + "step": 2599 + }, + { + "epoch": 0.071389346512905, + "grad_norm": 0.37426701188087463, + "learning_rate": 1.994181191840853e-05, + "loss": 0.4909, + "step": 2600 + }, + { + "epoch": 0.0714168039538715, + "grad_norm": 0.413070946931839, + "learning_rate": 1.9941765385258723e-05, + "loss": 0.6359, + "step": 2601 + }, + { + "epoch": 0.071444261394838, + "grad_norm": 0.4034333825111389, + "learning_rate": 1.9941718833564353e-05, + "loss": 0.6032, + "step": 2602 + }, + { + "epoch": 0.0714717188358045, + "grad_norm": 0.3669786751270294, + "learning_rate": 1.9941672263325504e-05, + "loss": 0.5612, + "step": 2603 + }, + { + "epoch": 0.071499176276771, + "grad_norm": 0.4065932035446167, + "learning_rate": 1.994162567454226e-05, + "loss": 0.5675, + "step": 2604 + }, + { + "epoch": 0.0715266337177375, + "grad_norm": 0.357594758272171, + "learning_rate": 1.9941579067214712e-05, + "loss": 0.5461, + "step": 2605 + }, + { + "epoch": 0.07155409115870401, + "grad_norm": 0.4127942621707916, + "learning_rate": 1.994153244134295e-05, + "loss": 0.6106, + "step": 2606 + }, + { + "epoch": 0.07158154859967052, + "grad_norm": 0.3475908935070038, + "learning_rate": 1.9941485796927053e-05, + "loss": 0.5656, + "step": 2607 + }, + { + "epoch": 0.07160900604063701, + "grad_norm": 0.36307427287101746, + "learning_rate": 1.9941439133967116e-05, + "loss": 0.4098, + "step": 2608 + }, + { + "epoch": 0.07163646348160352, + "grad_norm": 0.3814275860786438, + "learning_rate": 1.9941392452463218e-05, + "loss": 0.4961, + "step": 2609 + }, + { + "epoch": 0.07166392092257001, + "grad_norm": 0.32227155566215515, + "learning_rate": 1.9941345752415452e-05, + "loss": 0.5097, + "step": 2610 + }, + { + "epoch": 0.07169137836353652, + "grad_norm": 0.3656122088432312, + "learning_rate": 1.99412990338239e-05, + "loss": 0.5816, + "step": 2611 + }, + { + "epoch": 0.07171883580450301, + "grad_norm": 0.3937719166278839, + "learning_rate": 1.9941252296688655e-05, + "loss": 0.4721, + "step": 2612 + }, + { + "epoch": 0.07174629324546952, + "grad_norm": 0.3834493160247803, + "learning_rate": 1.99412055410098e-05, + "loss": 0.5784, + "step": 2613 + }, + { + "epoch": 0.07177375068643603, + "grad_norm": 0.3467981517314911, + "learning_rate": 1.994115876678743e-05, + "loss": 0.5613, + "step": 2614 + }, + { + "epoch": 0.07180120812740252, + "grad_norm": 0.3624782860279083, + "learning_rate": 1.9941111974021622e-05, + "loss": 0.5285, + "step": 2615 + }, + { + "epoch": 0.07182866556836903, + "grad_norm": 0.3708520531654358, + "learning_rate": 1.9941065162712467e-05, + "loss": 0.6291, + "step": 2616 + }, + { + "epoch": 0.07185612300933553, + "grad_norm": 0.3851027488708496, + "learning_rate": 1.994101833286005e-05, + "loss": 0.5669, + "step": 2617 + }, + { + "epoch": 0.07188358045030203, + "grad_norm": 0.3704240322113037, + "learning_rate": 1.9940971484464463e-05, + "loss": 0.5297, + "step": 2618 + }, + { + "epoch": 0.07191103789126853, + "grad_norm": 0.3824291527271271, + "learning_rate": 1.9940924617525792e-05, + "loss": 0.5336, + "step": 2619 + }, + { + "epoch": 0.07193849533223504, + "grad_norm": 0.33124932646751404, + "learning_rate": 1.9940877732044123e-05, + "loss": 0.5206, + "step": 2620 + }, + { + "epoch": 0.07196595277320154, + "grad_norm": 0.7106751799583435, + "learning_rate": 1.9940830828019547e-05, + "loss": 0.5201, + "step": 2621 + }, + { + "epoch": 0.07199341021416804, + "grad_norm": 0.34947019815444946, + "learning_rate": 1.9940783905452146e-05, + "loss": 0.5384, + "step": 2622 + }, + { + "epoch": 0.07202086765513455, + "grad_norm": 0.35613083839416504, + "learning_rate": 1.9940736964342012e-05, + "loss": 0.4831, + "step": 2623 + }, + { + "epoch": 0.07204832509610104, + "grad_norm": 0.36345064640045166, + "learning_rate": 1.9940690004689228e-05, + "loss": 0.5687, + "step": 2624 + }, + { + "epoch": 0.07207578253706755, + "grad_norm": 0.40493467450141907, + "learning_rate": 1.9940643026493887e-05, + "loss": 0.5381, + "step": 2625 + }, + { + "epoch": 0.07210323997803404, + "grad_norm": 0.36398592591285706, + "learning_rate": 1.9940596029756073e-05, + "loss": 0.595, + "step": 2626 + }, + { + "epoch": 0.07213069741900055, + "grad_norm": 0.38166388869285583, + "learning_rate": 1.9940549014475875e-05, + "loss": 0.6265, + "step": 2627 + }, + { + "epoch": 0.07215815485996704, + "grad_norm": 0.4015948176383972, + "learning_rate": 1.9940501980653383e-05, + "loss": 0.5774, + "step": 2628 + }, + { + "epoch": 0.07218561230093355, + "grad_norm": 0.4453607201576233, + "learning_rate": 1.994045492828868e-05, + "loss": 0.6102, + "step": 2629 + }, + { + "epoch": 0.07221306974190006, + "grad_norm": 0.36877599358558655, + "learning_rate": 1.9940407857381852e-05, + "loss": 0.5927, + "step": 2630 + }, + { + "epoch": 0.07224052718286655, + "grad_norm": 0.3988637626171112, + "learning_rate": 1.9940360767932996e-05, + "loss": 0.5317, + "step": 2631 + }, + { + "epoch": 0.07226798462383306, + "grad_norm": 0.5136372447013855, + "learning_rate": 1.9940313659942192e-05, + "loss": 0.5544, + "step": 2632 + }, + { + "epoch": 0.07229544206479956, + "grad_norm": 0.38091790676116943, + "learning_rate": 1.9940266533409532e-05, + "loss": 0.6279, + "step": 2633 + }, + { + "epoch": 0.07232289950576606, + "grad_norm": 0.3934011161327362, + "learning_rate": 1.9940219388335104e-05, + "loss": 0.5152, + "step": 2634 + }, + { + "epoch": 0.07235035694673256, + "grad_norm": 0.44202902913093567, + "learning_rate": 1.994017222471899e-05, + "loss": 0.5914, + "step": 2635 + }, + { + "epoch": 0.07237781438769907, + "grad_norm": 0.38018250465393066, + "learning_rate": 1.9940125042561285e-05, + "loss": 0.4886, + "step": 2636 + }, + { + "epoch": 0.07240527182866557, + "grad_norm": 0.3356010317802429, + "learning_rate": 1.9940077841862075e-05, + "loss": 0.5247, + "step": 2637 + }, + { + "epoch": 0.07243272926963207, + "grad_norm": 0.5002802610397339, + "learning_rate": 1.9940030622621442e-05, + "loss": 0.563, + "step": 2638 + }, + { + "epoch": 0.07246018671059858, + "grad_norm": 0.36134690046310425, + "learning_rate": 1.9939983384839485e-05, + "loss": 0.5543, + "step": 2639 + }, + { + "epoch": 0.07248764415156507, + "grad_norm": 0.3746606111526489, + "learning_rate": 1.9939936128516284e-05, + "loss": 0.5406, + "step": 2640 + }, + { + "epoch": 0.07251510159253158, + "grad_norm": 0.36766335368156433, + "learning_rate": 1.9939888853651933e-05, + "loss": 0.5591, + "step": 2641 + }, + { + "epoch": 0.07254255903349807, + "grad_norm": 0.3396856188774109, + "learning_rate": 1.9939841560246515e-05, + "loss": 0.5441, + "step": 2642 + }, + { + "epoch": 0.07257001647446458, + "grad_norm": 0.3870697319507599, + "learning_rate": 1.9939794248300118e-05, + "loss": 0.5012, + "step": 2643 + }, + { + "epoch": 0.07259747391543109, + "grad_norm": 0.44808945059776306, + "learning_rate": 1.9939746917812834e-05, + "loss": 0.6261, + "step": 2644 + }, + { + "epoch": 0.07262493135639758, + "grad_norm": 0.4103437662124634, + "learning_rate": 1.9939699568784747e-05, + "loss": 0.5333, + "step": 2645 + }, + { + "epoch": 0.07265238879736409, + "grad_norm": 0.36931341886520386, + "learning_rate": 1.993965220121595e-05, + "loss": 0.553, + "step": 2646 + }, + { + "epoch": 0.07267984623833058, + "grad_norm": 0.37470391392707825, + "learning_rate": 1.9939604815106533e-05, + "loss": 0.574, + "step": 2647 + }, + { + "epoch": 0.07270730367929709, + "grad_norm": 0.3661896288394928, + "learning_rate": 1.9939557410456574e-05, + "loss": 0.4792, + "step": 2648 + }, + { + "epoch": 0.07273476112026359, + "grad_norm": 0.35213884711265564, + "learning_rate": 1.9939509987266173e-05, + "loss": 0.5057, + "step": 2649 + }, + { + "epoch": 0.0727622185612301, + "grad_norm": 0.3676280081272125, + "learning_rate": 1.9939462545535412e-05, + "loss": 0.5966, + "step": 2650 + }, + { + "epoch": 0.0727896760021966, + "grad_norm": 0.3317960798740387, + "learning_rate": 1.9939415085264378e-05, + "loss": 0.5475, + "step": 2651 + }, + { + "epoch": 0.0728171334431631, + "grad_norm": 0.3404351770877838, + "learning_rate": 1.9939367606453168e-05, + "loss": 0.4912, + "step": 2652 + }, + { + "epoch": 0.0728445908841296, + "grad_norm": 0.3500071167945862, + "learning_rate": 1.9939320109101864e-05, + "loss": 0.5338, + "step": 2653 + }, + { + "epoch": 0.0728720483250961, + "grad_norm": 0.3270661532878876, + "learning_rate": 1.9939272593210554e-05, + "loss": 0.4755, + "step": 2654 + }, + { + "epoch": 0.0728995057660626, + "grad_norm": 0.42786136269569397, + "learning_rate": 1.9939225058779325e-05, + "loss": 0.5839, + "step": 2655 + }, + { + "epoch": 0.0729269632070291, + "grad_norm": 0.34090664982795715, + "learning_rate": 1.9939177505808277e-05, + "loss": 0.4481, + "step": 2656 + }, + { + "epoch": 0.07295442064799561, + "grad_norm": 0.35125553607940674, + "learning_rate": 1.9939129934297483e-05, + "loss": 0.5032, + "step": 2657 + }, + { + "epoch": 0.07298187808896212, + "grad_norm": 0.37812626361846924, + "learning_rate": 1.9939082344247045e-05, + "loss": 0.615, + "step": 2658 + }, + { + "epoch": 0.07300933552992861, + "grad_norm": 0.34745416045188904, + "learning_rate": 1.9939034735657042e-05, + "loss": 0.6247, + "step": 2659 + }, + { + "epoch": 0.07303679297089512, + "grad_norm": 0.32580873370170593, + "learning_rate": 1.993898710852757e-05, + "loss": 0.5678, + "step": 2660 + }, + { + "epoch": 0.07306425041186161, + "grad_norm": 0.33764344453811646, + "learning_rate": 1.9938939462858714e-05, + "loss": 0.5297, + "step": 2661 + }, + { + "epoch": 0.07309170785282812, + "grad_norm": 0.3577463626861572, + "learning_rate": 1.9938891798650563e-05, + "loss": 0.582, + "step": 2662 + }, + { + "epoch": 0.07311916529379461, + "grad_norm": 0.3572021722793579, + "learning_rate": 1.9938844115903208e-05, + "loss": 0.6164, + "step": 2663 + }, + { + "epoch": 0.07314662273476112, + "grad_norm": 0.35050785541534424, + "learning_rate": 1.9938796414616738e-05, + "loss": 0.5533, + "step": 2664 + }, + { + "epoch": 0.07317408017572762, + "grad_norm": 0.37762248516082764, + "learning_rate": 1.9938748694791237e-05, + "loss": 0.549, + "step": 2665 + }, + { + "epoch": 0.07320153761669412, + "grad_norm": 0.43513795733451843, + "learning_rate": 1.99387009564268e-05, + "loss": 0.6419, + "step": 2666 + }, + { + "epoch": 0.07322899505766063, + "grad_norm": 0.34502169489860535, + "learning_rate": 1.993865319952351e-05, + "loss": 0.5465, + "step": 2667 + }, + { + "epoch": 0.07325645249862713, + "grad_norm": 0.41204503178596497, + "learning_rate": 1.9938605424081464e-05, + "loss": 0.6111, + "step": 2668 + }, + { + "epoch": 0.07328390993959363, + "grad_norm": 0.3497854173183441, + "learning_rate": 1.9938557630100747e-05, + "loss": 0.5607, + "step": 2669 + }, + { + "epoch": 0.07331136738056013, + "grad_norm": 0.36050671339035034, + "learning_rate": 1.9938509817581446e-05, + "loss": 0.6843, + "step": 2670 + }, + { + "epoch": 0.07333882482152664, + "grad_norm": 0.3455282747745514, + "learning_rate": 1.9938461986523653e-05, + "loss": 0.4999, + "step": 2671 + }, + { + "epoch": 0.07336628226249313, + "grad_norm": 0.35529136657714844, + "learning_rate": 1.9938414136927457e-05, + "loss": 0.5364, + "step": 2672 + }, + { + "epoch": 0.07339373970345964, + "grad_norm": 0.3630426824092865, + "learning_rate": 1.9938366268792945e-05, + "loss": 0.6261, + "step": 2673 + }, + { + "epoch": 0.07342119714442615, + "grad_norm": 0.41040003299713135, + "learning_rate": 1.993831838212021e-05, + "loss": 0.6342, + "step": 2674 + }, + { + "epoch": 0.07344865458539264, + "grad_norm": 0.33441323041915894, + "learning_rate": 1.9938270476909338e-05, + "loss": 0.5257, + "step": 2675 + }, + { + "epoch": 0.07347611202635915, + "grad_norm": 0.4260815382003784, + "learning_rate": 1.9938222553160418e-05, + "loss": 0.5308, + "step": 2676 + }, + { + "epoch": 0.07350356946732564, + "grad_norm": 0.3370666205883026, + "learning_rate": 1.993817461087354e-05, + "loss": 0.5218, + "step": 2677 + }, + { + "epoch": 0.07353102690829215, + "grad_norm": 0.38374847173690796, + "learning_rate": 1.99381266500488e-05, + "loss": 0.6373, + "step": 2678 + }, + { + "epoch": 0.07355848434925864, + "grad_norm": 0.402051717042923, + "learning_rate": 1.993807867068628e-05, + "loss": 0.5807, + "step": 2679 + }, + { + "epoch": 0.07358594179022515, + "grad_norm": 0.3351864516735077, + "learning_rate": 1.993803067278607e-05, + "loss": 0.6055, + "step": 2680 + }, + { + "epoch": 0.07361339923119166, + "grad_norm": 0.382453590631485, + "learning_rate": 1.9937982656348262e-05, + "loss": 0.5891, + "step": 2681 + }, + { + "epoch": 0.07364085667215815, + "grad_norm": 0.3522912859916687, + "learning_rate": 1.9937934621372942e-05, + "loss": 0.5662, + "step": 2682 + }, + { + "epoch": 0.07366831411312466, + "grad_norm": 0.36873912811279297, + "learning_rate": 1.9937886567860205e-05, + "loss": 0.5698, + "step": 2683 + }, + { + "epoch": 0.07369577155409116, + "grad_norm": 0.37353515625, + "learning_rate": 1.9937838495810137e-05, + "loss": 0.5452, + "step": 2684 + }, + { + "epoch": 0.07372322899505766, + "grad_norm": 0.3329957127571106, + "learning_rate": 1.993779040522283e-05, + "loss": 0.5815, + "step": 2685 + }, + { + "epoch": 0.07375068643602416, + "grad_norm": 0.37043604254722595, + "learning_rate": 1.9937742296098367e-05, + "loss": 0.6056, + "step": 2686 + }, + { + "epoch": 0.07377814387699067, + "grad_norm": 0.4267311096191406, + "learning_rate": 1.9937694168436846e-05, + "loss": 0.5962, + "step": 2687 + }, + { + "epoch": 0.07380560131795717, + "grad_norm": 0.35558298230171204, + "learning_rate": 1.9937646022238355e-05, + "loss": 0.6044, + "step": 2688 + }, + { + "epoch": 0.07383305875892367, + "grad_norm": 0.34687885642051697, + "learning_rate": 1.993759785750298e-05, + "loss": 0.5431, + "step": 2689 + }, + { + "epoch": 0.07386051619989017, + "grad_norm": 0.4082138240337372, + "learning_rate": 1.9937549674230817e-05, + "loss": 0.6081, + "step": 2690 + }, + { + "epoch": 0.07388797364085667, + "grad_norm": 0.35377195477485657, + "learning_rate": 1.993750147242195e-05, + "loss": 0.4899, + "step": 2691 + }, + { + "epoch": 0.07391543108182318, + "grad_norm": 0.40484917163848877, + "learning_rate": 1.9937453252076468e-05, + "loss": 0.6295, + "step": 2692 + }, + { + "epoch": 0.07394288852278967, + "grad_norm": 0.39375248551368713, + "learning_rate": 1.993740501319447e-05, + "loss": 0.5626, + "step": 2693 + }, + { + "epoch": 0.07397034596375618, + "grad_norm": 0.37707382440567017, + "learning_rate": 1.9937356755776033e-05, + "loss": 0.6261, + "step": 2694 + }, + { + "epoch": 0.07399780340472267, + "grad_norm": 0.36441782116889954, + "learning_rate": 1.993730847982126e-05, + "loss": 0.6041, + "step": 2695 + }, + { + "epoch": 0.07402526084568918, + "grad_norm": 0.3367854356765747, + "learning_rate": 1.9937260185330233e-05, + "loss": 0.5212, + "step": 2696 + }, + { + "epoch": 0.07405271828665569, + "grad_norm": 0.34719085693359375, + "learning_rate": 1.9937211872303043e-05, + "loss": 0.5571, + "step": 2697 + }, + { + "epoch": 0.07408017572762218, + "grad_norm": 0.3888559639453888, + "learning_rate": 1.9937163540739784e-05, + "loss": 0.5951, + "step": 2698 + }, + { + "epoch": 0.07410763316858869, + "grad_norm": 0.37381651997566223, + "learning_rate": 1.993711519064054e-05, + "loss": 0.4864, + "step": 2699 + }, + { + "epoch": 0.07413509060955518, + "grad_norm": 0.3443273603916168, + "learning_rate": 1.9937066822005407e-05, + "loss": 0.6229, + "step": 2700 + }, + { + "epoch": 0.07416254805052169, + "grad_norm": 0.3469371199607849, + "learning_rate": 1.9937018434834472e-05, + "loss": 0.5302, + "step": 2701 + }, + { + "epoch": 0.07419000549148819, + "grad_norm": 0.36280110478401184, + "learning_rate": 1.9936970029127827e-05, + "loss": 0.5917, + "step": 2702 + }, + { + "epoch": 0.0742174629324547, + "grad_norm": 0.3265971839427948, + "learning_rate": 1.993692160488556e-05, + "loss": 0.503, + "step": 2703 + }, + { + "epoch": 0.0742449203734212, + "grad_norm": 0.3804887533187866, + "learning_rate": 1.9936873162107762e-05, + "loss": 0.5456, + "step": 2704 + }, + { + "epoch": 0.0742723778143877, + "grad_norm": 0.3863884508609772, + "learning_rate": 1.9936824700794526e-05, + "loss": 0.5528, + "step": 2705 + }, + { + "epoch": 0.0742998352553542, + "grad_norm": 0.3280385434627533, + "learning_rate": 1.9936776220945942e-05, + "loss": 0.5653, + "step": 2706 + }, + { + "epoch": 0.0743272926963207, + "grad_norm": 0.3830016553401947, + "learning_rate": 1.9936727722562095e-05, + "loss": 0.5479, + "step": 2707 + }, + { + "epoch": 0.0743547501372872, + "grad_norm": 0.48339787125587463, + "learning_rate": 1.993667920564308e-05, + "loss": 0.5333, + "step": 2708 + }, + { + "epoch": 0.0743822075782537, + "grad_norm": 0.3274328112602234, + "learning_rate": 1.993663067018899e-05, + "loss": 0.5685, + "step": 2709 + }, + { + "epoch": 0.07440966501922021, + "grad_norm": 0.40899938344955444, + "learning_rate": 1.993658211619991e-05, + "loss": 0.5827, + "step": 2710 + }, + { + "epoch": 0.07443712246018672, + "grad_norm": 0.3796854317188263, + "learning_rate": 1.9936533543675932e-05, + "loss": 0.502, + "step": 2711 + }, + { + "epoch": 0.07446457990115321, + "grad_norm": 0.3383747637271881, + "learning_rate": 1.9936484952617147e-05, + "loss": 0.5776, + "step": 2712 + }, + { + "epoch": 0.07449203734211972, + "grad_norm": 0.3405179977416992, + "learning_rate": 1.993643634302365e-05, + "loss": 0.593, + "step": 2713 + }, + { + "epoch": 0.07451949478308621, + "grad_norm": 0.40838173031806946, + "learning_rate": 1.9936387714895525e-05, + "loss": 0.6096, + "step": 2714 + }, + { + "epoch": 0.07454695222405272, + "grad_norm": 0.35725435614585876, + "learning_rate": 1.993633906823287e-05, + "loss": 0.5453, + "step": 2715 + }, + { + "epoch": 0.07457440966501921, + "grad_norm": 0.3567812740802765, + "learning_rate": 1.9936290403035766e-05, + "loss": 0.5678, + "step": 2716 + }, + { + "epoch": 0.07460186710598572, + "grad_norm": 0.4028964340686798, + "learning_rate": 1.993624171930431e-05, + "loss": 0.6442, + "step": 2717 + }, + { + "epoch": 0.07462932454695223, + "grad_norm": 0.37923797965049744, + "learning_rate": 1.9936193017038594e-05, + "loss": 0.5921, + "step": 2718 + }, + { + "epoch": 0.07465678198791872, + "grad_norm": 0.4018227458000183, + "learning_rate": 1.9936144296238705e-05, + "loss": 0.4904, + "step": 2719 + }, + { + "epoch": 0.07468423942888523, + "grad_norm": 0.3664076626300812, + "learning_rate": 1.9936095556904735e-05, + "loss": 0.5354, + "step": 2720 + }, + { + "epoch": 0.07471169686985173, + "grad_norm": 0.3344953656196594, + "learning_rate": 1.993604679903678e-05, + "loss": 0.4649, + "step": 2721 + }, + { + "epoch": 0.07473915431081823, + "grad_norm": 0.3914625644683838, + "learning_rate": 1.993599802263492e-05, + "loss": 0.5601, + "step": 2722 + }, + { + "epoch": 0.07476661175178473, + "grad_norm": 0.35467520356178284, + "learning_rate": 1.993594922769926e-05, + "loss": 0.5186, + "step": 2723 + }, + { + "epoch": 0.07479406919275124, + "grad_norm": 1.1804643869400024, + "learning_rate": 1.9935900414229875e-05, + "loss": 0.6175, + "step": 2724 + }, + { + "epoch": 0.07482152663371774, + "grad_norm": 0.39759641885757446, + "learning_rate": 1.993585158222687e-05, + "loss": 0.6662, + "step": 2725 + }, + { + "epoch": 0.07484898407468424, + "grad_norm": 0.3593357801437378, + "learning_rate": 1.993580273169033e-05, + "loss": 0.562, + "step": 2726 + }, + { + "epoch": 0.07487644151565075, + "grad_norm": 0.3799419105052948, + "learning_rate": 1.9935753862620347e-05, + "loss": 0.4993, + "step": 2727 + }, + { + "epoch": 0.07490389895661724, + "grad_norm": 0.3500645160675049, + "learning_rate": 1.993570497501701e-05, + "loss": 0.5517, + "step": 2728 + }, + { + "epoch": 0.07493135639758375, + "grad_norm": 0.3719981908798218, + "learning_rate": 1.9935656068880417e-05, + "loss": 0.6272, + "step": 2729 + }, + { + "epoch": 0.07495881383855024, + "grad_norm": 0.34457510709762573, + "learning_rate": 1.9935607144210648e-05, + "loss": 0.4795, + "step": 2730 + }, + { + "epoch": 0.07498627127951675, + "grad_norm": 0.38582348823547363, + "learning_rate": 1.9935558201007804e-05, + "loss": 0.5951, + "step": 2731 + }, + { + "epoch": 0.07501372872048324, + "grad_norm": 0.38662785291671753, + "learning_rate": 1.993550923927197e-05, + "loss": 0.5194, + "step": 2732 + }, + { + "epoch": 0.07504118616144975, + "grad_norm": 0.413790225982666, + "learning_rate": 1.9935460259003244e-05, + "loss": 0.7141, + "step": 2733 + }, + { + "epoch": 0.07506864360241626, + "grad_norm": 0.35950368642807007, + "learning_rate": 1.993541126020171e-05, + "loss": 0.5413, + "step": 2734 + }, + { + "epoch": 0.07509610104338275, + "grad_norm": 0.37346598505973816, + "learning_rate": 1.9935362242867465e-05, + "loss": 0.56, + "step": 2735 + }, + { + "epoch": 0.07512355848434926, + "grad_norm": 0.66705721616745, + "learning_rate": 1.99353132070006e-05, + "loss": 0.6236, + "step": 2736 + }, + { + "epoch": 0.07515101592531576, + "grad_norm": 0.3876762390136719, + "learning_rate": 1.9935264152601205e-05, + "loss": 0.5416, + "step": 2737 + }, + { + "epoch": 0.07517847336628226, + "grad_norm": 0.3597573935985565, + "learning_rate": 1.9935215079669367e-05, + "loss": 0.5831, + "step": 2738 + }, + { + "epoch": 0.07520593080724876, + "grad_norm": 0.3984428346157074, + "learning_rate": 1.9935165988205184e-05, + "loss": 0.6571, + "step": 2739 + }, + { + "epoch": 0.07523338824821527, + "grad_norm": 0.3651711642742157, + "learning_rate": 1.993511687820875e-05, + "loss": 0.5967, + "step": 2740 + }, + { + "epoch": 0.07526084568918177, + "grad_norm": 0.35446274280548096, + "learning_rate": 1.9935067749680145e-05, + "loss": 0.5662, + "step": 2741 + }, + { + "epoch": 0.07528830313014827, + "grad_norm": 0.3749960660934448, + "learning_rate": 1.993501860261947e-05, + "loss": 0.4836, + "step": 2742 + }, + { + "epoch": 0.07531576057111478, + "grad_norm": 0.4806305766105652, + "learning_rate": 1.9934969437026815e-05, + "loss": 0.7408, + "step": 2743 + }, + { + "epoch": 0.07534321801208127, + "grad_norm": 0.3815155327320099, + "learning_rate": 1.9934920252902272e-05, + "loss": 0.5858, + "step": 2744 + }, + { + "epoch": 0.07537067545304778, + "grad_norm": 0.38132649660110474, + "learning_rate": 1.9934871050245932e-05, + "loss": 0.5681, + "step": 2745 + }, + { + "epoch": 0.07539813289401427, + "grad_norm": 0.418954998254776, + "learning_rate": 1.9934821829057887e-05, + "loss": 0.6034, + "step": 2746 + }, + { + "epoch": 0.07542559033498078, + "grad_norm": 0.3933415412902832, + "learning_rate": 1.9934772589338228e-05, + "loss": 0.4925, + "step": 2747 + }, + { + "epoch": 0.07545304777594729, + "grad_norm": 0.3763349652290344, + "learning_rate": 1.9934723331087046e-05, + "loss": 0.548, + "step": 2748 + }, + { + "epoch": 0.07548050521691378, + "grad_norm": 0.3689199984073639, + "learning_rate": 1.9934674054304434e-05, + "loss": 0.5298, + "step": 2749 + }, + { + "epoch": 0.07550796265788029, + "grad_norm": 0.3422185480594635, + "learning_rate": 1.9934624758990486e-05, + "loss": 0.4868, + "step": 2750 + }, + { + "epoch": 0.07553542009884678, + "grad_norm": 0.37855806946754456, + "learning_rate": 1.993457544514529e-05, + "loss": 0.5408, + "step": 2751 + }, + { + "epoch": 0.07556287753981329, + "grad_norm": 0.38406747579574585, + "learning_rate": 1.993452611276894e-05, + "loss": 0.5341, + "step": 2752 + }, + { + "epoch": 0.07559033498077979, + "grad_norm": 0.43737491965293884, + "learning_rate": 1.9934476761861533e-05, + "loss": 0.5955, + "step": 2753 + }, + { + "epoch": 0.0756177924217463, + "grad_norm": 0.3628994822502136, + "learning_rate": 1.9934427392423152e-05, + "loss": 0.5448, + "step": 2754 + }, + { + "epoch": 0.0756452498627128, + "grad_norm": 0.3423815369606018, + "learning_rate": 1.9934378004453892e-05, + "loss": 0.5301, + "step": 2755 + }, + { + "epoch": 0.0756727073036793, + "grad_norm": 0.406340628862381, + "learning_rate": 1.9934328597953846e-05, + "loss": 0.6875, + "step": 2756 + }, + { + "epoch": 0.0757001647446458, + "grad_norm": 0.3692006766796112, + "learning_rate": 1.9934279172923114e-05, + "loss": 0.5997, + "step": 2757 + }, + { + "epoch": 0.0757276221856123, + "grad_norm": 0.435729444026947, + "learning_rate": 1.993422972936177e-05, + "loss": 0.5786, + "step": 2758 + }, + { + "epoch": 0.0757550796265788, + "grad_norm": 0.34369081258773804, + "learning_rate": 1.9934180267269925e-05, + "loss": 0.5418, + "step": 2759 + }, + { + "epoch": 0.0757825370675453, + "grad_norm": 0.41339290142059326, + "learning_rate": 1.9934130786647658e-05, + "loss": 0.6335, + "step": 2760 + }, + { + "epoch": 0.07580999450851181, + "grad_norm": 0.36868974566459656, + "learning_rate": 1.993408128749507e-05, + "loss": 0.6019, + "step": 2761 + }, + { + "epoch": 0.0758374519494783, + "grad_norm": 0.3593771457672119, + "learning_rate": 1.9934031769812247e-05, + "loss": 0.5058, + "step": 2762 + }, + { + "epoch": 0.07586490939044481, + "grad_norm": 0.3793041706085205, + "learning_rate": 1.9933982233599286e-05, + "loss": 0.5277, + "step": 2763 + }, + { + "epoch": 0.07589236683141132, + "grad_norm": 0.3581700921058655, + "learning_rate": 1.9933932678856277e-05, + "loss": 0.5577, + "step": 2764 + }, + { + "epoch": 0.07591982427237781, + "grad_norm": 0.3905010521411896, + "learning_rate": 1.9933883105583313e-05, + "loss": 0.5377, + "step": 2765 + }, + { + "epoch": 0.07594728171334432, + "grad_norm": 0.36551010608673096, + "learning_rate": 1.9933833513780488e-05, + "loss": 0.5277, + "step": 2766 + }, + { + "epoch": 0.07597473915431081, + "grad_norm": 0.35551154613494873, + "learning_rate": 1.993378390344789e-05, + "loss": 0.5782, + "step": 2767 + }, + { + "epoch": 0.07600219659527732, + "grad_norm": 0.4203924238681793, + "learning_rate": 1.9933734274585616e-05, + "loss": 0.5811, + "step": 2768 + }, + { + "epoch": 0.07602965403624382, + "grad_norm": 0.3575420379638672, + "learning_rate": 1.993368462719376e-05, + "loss": 0.5321, + "step": 2769 + }, + { + "epoch": 0.07605711147721032, + "grad_norm": 0.5529618859291077, + "learning_rate": 1.9933634961272408e-05, + "loss": 0.5298, + "step": 2770 + }, + { + "epoch": 0.07608456891817683, + "grad_norm": 0.4183511734008789, + "learning_rate": 1.9933585276821657e-05, + "loss": 0.584, + "step": 2771 + }, + { + "epoch": 0.07611202635914333, + "grad_norm": 0.3422856032848358, + "learning_rate": 1.9933535573841597e-05, + "loss": 0.5349, + "step": 2772 + }, + { + "epoch": 0.07613948380010983, + "grad_norm": 0.6585968136787415, + "learning_rate": 1.9933485852332328e-05, + "loss": 0.5797, + "step": 2773 + }, + { + "epoch": 0.07616694124107633, + "grad_norm": 0.3618282675743103, + "learning_rate": 1.9933436112293932e-05, + "loss": 0.5282, + "step": 2774 + }, + { + "epoch": 0.07619439868204284, + "grad_norm": 0.38805195689201355, + "learning_rate": 1.993338635372651e-05, + "loss": 0.4877, + "step": 2775 + }, + { + "epoch": 0.07622185612300933, + "grad_norm": 0.452722430229187, + "learning_rate": 1.9933336576630157e-05, + "loss": 0.6424, + "step": 2776 + }, + { + "epoch": 0.07624931356397584, + "grad_norm": 0.3949277698993683, + "learning_rate": 1.9933286781004953e-05, + "loss": 0.5361, + "step": 2777 + }, + { + "epoch": 0.07627677100494235, + "grad_norm": 0.4401606023311615, + "learning_rate": 1.9933236966851002e-05, + "loss": 0.6415, + "step": 2778 + }, + { + "epoch": 0.07630422844590884, + "grad_norm": 0.35570091009140015, + "learning_rate": 1.9933187134168395e-05, + "loss": 0.6253, + "step": 2779 + }, + { + "epoch": 0.07633168588687535, + "grad_norm": 0.3576837480068207, + "learning_rate": 1.993313728295722e-05, + "loss": 0.5406, + "step": 2780 + }, + { + "epoch": 0.07635914332784184, + "grad_norm": 0.37266233563423157, + "learning_rate": 1.9933087413217575e-05, + "loss": 0.6677, + "step": 2781 + }, + { + "epoch": 0.07638660076880835, + "grad_norm": 0.3768152594566345, + "learning_rate": 1.9933037524949555e-05, + "loss": 0.6057, + "step": 2782 + }, + { + "epoch": 0.07641405820977484, + "grad_norm": 0.3335483968257904, + "learning_rate": 1.9932987618153245e-05, + "loss": 0.4939, + "step": 2783 + }, + { + "epoch": 0.07644151565074135, + "grad_norm": 0.3640168011188507, + "learning_rate": 1.9932937692828744e-05, + "loss": 0.5317, + "step": 2784 + }, + { + "epoch": 0.07646897309170786, + "grad_norm": 0.35526973009109497, + "learning_rate": 1.993288774897615e-05, + "loss": 0.6087, + "step": 2785 + }, + { + "epoch": 0.07649643053267435, + "grad_norm": 0.33862540125846863, + "learning_rate": 1.9932837786595542e-05, + "loss": 0.4516, + "step": 2786 + }, + { + "epoch": 0.07652388797364086, + "grad_norm": 0.37815752625465393, + "learning_rate": 1.9932787805687024e-05, + "loss": 0.4676, + "step": 2787 + }, + { + "epoch": 0.07655134541460736, + "grad_norm": 0.3728600740432739, + "learning_rate": 1.9932737806250687e-05, + "loss": 0.6107, + "step": 2788 + }, + { + "epoch": 0.07657880285557386, + "grad_norm": 0.39178186655044556, + "learning_rate": 1.9932687788286626e-05, + "loss": 0.5741, + "step": 2789 + }, + { + "epoch": 0.07660626029654036, + "grad_norm": 0.4382000267505646, + "learning_rate": 1.993263775179493e-05, + "loss": 0.5623, + "step": 2790 + }, + { + "epoch": 0.07663371773750687, + "grad_norm": 0.3897673487663269, + "learning_rate": 1.9932587696775693e-05, + "loss": 0.5485, + "step": 2791 + }, + { + "epoch": 0.07666117517847337, + "grad_norm": 0.38330283761024475, + "learning_rate": 1.993253762322901e-05, + "loss": 0.5688, + "step": 2792 + }, + { + "epoch": 0.07668863261943987, + "grad_norm": 0.423299640417099, + "learning_rate": 1.9932487531154975e-05, + "loss": 0.5775, + "step": 2793 + }, + { + "epoch": 0.07671609006040638, + "grad_norm": 0.3697313368320465, + "learning_rate": 1.9932437420553685e-05, + "loss": 0.5091, + "step": 2794 + }, + { + "epoch": 0.07674354750137287, + "grad_norm": 0.40409529209136963, + "learning_rate": 1.9932387291425223e-05, + "loss": 0.5543, + "step": 2795 + }, + { + "epoch": 0.07677100494233938, + "grad_norm": 0.38750290870666504, + "learning_rate": 1.9932337143769694e-05, + "loss": 0.5899, + "step": 2796 + }, + { + "epoch": 0.07679846238330587, + "grad_norm": 0.3626382648944855, + "learning_rate": 1.9932286977587183e-05, + "loss": 0.5859, + "step": 2797 + }, + { + "epoch": 0.07682591982427238, + "grad_norm": 0.3714325726032257, + "learning_rate": 1.9932236792877784e-05, + "loss": 0.5327, + "step": 2798 + }, + { + "epoch": 0.07685337726523887, + "grad_norm": 0.5344110131263733, + "learning_rate": 1.9932186589641597e-05, + "loss": 0.5314, + "step": 2799 + }, + { + "epoch": 0.07688083470620538, + "grad_norm": 0.34094947576522827, + "learning_rate": 1.9932136367878715e-05, + "loss": 0.4438, + "step": 2800 + }, + { + "epoch": 0.07690829214717189, + "grad_norm": 0.42264747619628906, + "learning_rate": 1.993208612758922e-05, + "loss": 0.5256, + "step": 2801 + }, + { + "epoch": 0.07693574958813838, + "grad_norm": 0.35935506224632263, + "learning_rate": 1.9932035868773224e-05, + "loss": 0.5352, + "step": 2802 + }, + { + "epoch": 0.07696320702910489, + "grad_norm": 0.3253059983253479, + "learning_rate": 1.9931985591430805e-05, + "loss": 0.5526, + "step": 2803 + }, + { + "epoch": 0.07699066447007138, + "grad_norm": 0.3399925231933594, + "learning_rate": 1.9931935295562064e-05, + "loss": 0.4883, + "step": 2804 + }, + { + "epoch": 0.07701812191103789, + "grad_norm": 0.3638738989830017, + "learning_rate": 1.9931884981167094e-05, + "loss": 0.5109, + "step": 2805 + }, + { + "epoch": 0.07704557935200439, + "grad_norm": 0.4416954219341278, + "learning_rate": 1.993183464824599e-05, + "loss": 0.5924, + "step": 2806 + }, + { + "epoch": 0.0770730367929709, + "grad_norm": 0.38718482851982117, + "learning_rate": 1.9931784296798845e-05, + "loss": 0.5762, + "step": 2807 + }, + { + "epoch": 0.0771004942339374, + "grad_norm": 0.33059263229370117, + "learning_rate": 1.993173392682575e-05, + "loss": 0.5841, + "step": 2808 + }, + { + "epoch": 0.0771279516749039, + "grad_norm": 0.32983866333961487, + "learning_rate": 1.99316835383268e-05, + "loss": 0.4634, + "step": 2809 + }, + { + "epoch": 0.0771554091158704, + "grad_norm": 0.48451074957847595, + "learning_rate": 1.9931633131302095e-05, + "loss": 0.5628, + "step": 2810 + }, + { + "epoch": 0.0771828665568369, + "grad_norm": 0.3593306243419647, + "learning_rate": 1.9931582705751722e-05, + "loss": 0.5369, + "step": 2811 + }, + { + "epoch": 0.0772103239978034, + "grad_norm": 0.3475426137447357, + "learning_rate": 1.9931532261675777e-05, + "loss": 0.5185, + "step": 2812 + }, + { + "epoch": 0.0772377814387699, + "grad_norm": 0.3185271620750427, + "learning_rate": 1.9931481799074354e-05, + "loss": 0.5471, + "step": 2813 + }, + { + "epoch": 0.07726523887973641, + "grad_norm": 0.3926304876804352, + "learning_rate": 1.993143131794755e-05, + "loss": 0.5918, + "step": 2814 + }, + { + "epoch": 0.07729269632070292, + "grad_norm": 0.36520200967788696, + "learning_rate": 1.9931380818295456e-05, + "loss": 0.4961, + "step": 2815 + }, + { + "epoch": 0.07732015376166941, + "grad_norm": 0.3411597013473511, + "learning_rate": 1.9931330300118165e-05, + "loss": 0.5761, + "step": 2816 + }, + { + "epoch": 0.07734761120263592, + "grad_norm": 0.4121137261390686, + "learning_rate": 1.993127976341578e-05, + "loss": 0.4895, + "step": 2817 + }, + { + "epoch": 0.07737506864360241, + "grad_norm": 0.5671923756599426, + "learning_rate": 1.9931229208188382e-05, + "loss": 0.5424, + "step": 2818 + }, + { + "epoch": 0.07740252608456892, + "grad_norm": 0.3693098723888397, + "learning_rate": 1.9931178634436073e-05, + "loss": 0.5845, + "step": 2819 + }, + { + "epoch": 0.07742998352553541, + "grad_norm": 0.37176698446273804, + "learning_rate": 1.9931128042158944e-05, + "loss": 0.5635, + "step": 2820 + }, + { + "epoch": 0.07745744096650192, + "grad_norm": 0.4026034474372864, + "learning_rate": 1.9931077431357095e-05, + "loss": 0.6562, + "step": 2821 + }, + { + "epoch": 0.07748489840746843, + "grad_norm": 0.48332399129867554, + "learning_rate": 1.9931026802030616e-05, + "loss": 0.5605, + "step": 2822 + }, + { + "epoch": 0.07751235584843492, + "grad_norm": 0.32080546021461487, + "learning_rate": 1.9930976154179604e-05, + "loss": 0.4994, + "step": 2823 + }, + { + "epoch": 0.07753981328940143, + "grad_norm": 0.37902727723121643, + "learning_rate": 1.9930925487804148e-05, + "loss": 0.599, + "step": 2824 + }, + { + "epoch": 0.07756727073036793, + "grad_norm": 0.3590219020843506, + "learning_rate": 1.993087480290435e-05, + "loss": 0.4767, + "step": 2825 + }, + { + "epoch": 0.07759472817133443, + "grad_norm": 0.37361615896224976, + "learning_rate": 1.99308240994803e-05, + "loss": 0.5788, + "step": 2826 + }, + { + "epoch": 0.07762218561230093, + "grad_norm": 0.34117591381073, + "learning_rate": 1.9930773377532094e-05, + "loss": 0.5361, + "step": 2827 + }, + { + "epoch": 0.07764964305326744, + "grad_norm": 0.37882521748542786, + "learning_rate": 1.9930722637059825e-05, + "loss": 0.6524, + "step": 2828 + }, + { + "epoch": 0.07767710049423393, + "grad_norm": 0.4004732072353363, + "learning_rate": 1.993067187806359e-05, + "loss": 0.5514, + "step": 2829 + }, + { + "epoch": 0.07770455793520044, + "grad_norm": 0.35850226879119873, + "learning_rate": 1.993062110054348e-05, + "loss": 0.6091, + "step": 2830 + }, + { + "epoch": 0.07773201537616695, + "grad_norm": 0.31252509355545044, + "learning_rate": 1.9930570304499596e-05, + "loss": 0.4506, + "step": 2831 + }, + { + "epoch": 0.07775947281713344, + "grad_norm": 0.31133925914764404, + "learning_rate": 1.9930519489932022e-05, + "loss": 0.5297, + "step": 2832 + }, + { + "epoch": 0.07778693025809995, + "grad_norm": 0.4107705056667328, + "learning_rate": 1.9930468656840868e-05, + "loss": 0.6131, + "step": 2833 + }, + { + "epoch": 0.07781438769906644, + "grad_norm": 0.35909274220466614, + "learning_rate": 1.9930417805226218e-05, + "loss": 0.5188, + "step": 2834 + }, + { + "epoch": 0.07784184514003295, + "grad_norm": 0.3245830535888672, + "learning_rate": 1.9930366935088167e-05, + "loss": 0.5357, + "step": 2835 + }, + { + "epoch": 0.07786930258099944, + "grad_norm": 0.36577746272087097, + "learning_rate": 1.9930316046426813e-05, + "loss": 0.7189, + "step": 2836 + }, + { + "epoch": 0.07789676002196595, + "grad_norm": 0.41771578788757324, + "learning_rate": 1.993026513924225e-05, + "loss": 0.7226, + "step": 2837 + }, + { + "epoch": 0.07792421746293246, + "grad_norm": 0.5177350640296936, + "learning_rate": 1.9930214213534573e-05, + "loss": 0.486, + "step": 2838 + }, + { + "epoch": 0.07795167490389895, + "grad_norm": 0.39596351981163025, + "learning_rate": 1.9930163269303876e-05, + "loss": 0.73, + "step": 2839 + }, + { + "epoch": 0.07797913234486546, + "grad_norm": 0.37434571981430054, + "learning_rate": 1.993011230655026e-05, + "loss": 0.5959, + "step": 2840 + }, + { + "epoch": 0.07800658978583196, + "grad_norm": 0.34406188130378723, + "learning_rate": 1.9930061325273812e-05, + "loss": 0.5164, + "step": 2841 + }, + { + "epoch": 0.07803404722679846, + "grad_norm": 0.3807098865509033, + "learning_rate": 1.993001032547463e-05, + "loss": 0.6478, + "step": 2842 + }, + { + "epoch": 0.07806150466776496, + "grad_norm": 0.3742447793483734, + "learning_rate": 1.992995930715281e-05, + "loss": 0.5555, + "step": 2843 + }, + { + "epoch": 0.07808896210873147, + "grad_norm": 0.3860563635826111, + "learning_rate": 1.9929908270308446e-05, + "loss": 0.6261, + "step": 2844 + }, + { + "epoch": 0.07811641954969797, + "grad_norm": 0.4248245656490326, + "learning_rate": 1.9929857214941637e-05, + "loss": 0.5783, + "step": 2845 + }, + { + "epoch": 0.07814387699066447, + "grad_norm": 0.43635210394859314, + "learning_rate": 1.992980614105247e-05, + "loss": 0.6263, + "step": 2846 + }, + { + "epoch": 0.07817133443163098, + "grad_norm": 0.3440542221069336, + "learning_rate": 1.9929755048641047e-05, + "loss": 0.5511, + "step": 2847 + }, + { + "epoch": 0.07819879187259747, + "grad_norm": 0.4962725341320038, + "learning_rate": 1.9929703937707467e-05, + "loss": 0.6372, + "step": 2848 + }, + { + "epoch": 0.07822624931356398, + "grad_norm": 0.37182122468948364, + "learning_rate": 1.9929652808251813e-05, + "loss": 0.5708, + "step": 2849 + }, + { + "epoch": 0.07825370675453047, + "grad_norm": 0.3625844717025757, + "learning_rate": 1.9929601660274192e-05, + "loss": 0.5736, + "step": 2850 + }, + { + "epoch": 0.07828116419549698, + "grad_norm": 0.3484930396080017, + "learning_rate": 1.9929550493774692e-05, + "loss": 0.5258, + "step": 2851 + }, + { + "epoch": 0.07830862163646349, + "grad_norm": 0.4039776623249054, + "learning_rate": 1.992949930875341e-05, + "loss": 0.5334, + "step": 2852 + }, + { + "epoch": 0.07833607907742998, + "grad_norm": 0.45663824677467346, + "learning_rate": 1.9929448105210444e-05, + "loss": 0.553, + "step": 2853 + }, + { + "epoch": 0.07836353651839649, + "grad_norm": 0.33520007133483887, + "learning_rate": 1.9929396883145887e-05, + "loss": 0.5285, + "step": 2854 + }, + { + "epoch": 0.07839099395936298, + "grad_norm": 0.43612414598464966, + "learning_rate": 1.992934564255984e-05, + "loss": 0.6272, + "step": 2855 + }, + { + "epoch": 0.07841845140032949, + "grad_norm": 0.35779187083244324, + "learning_rate": 1.992929438345239e-05, + "loss": 0.5591, + "step": 2856 + }, + { + "epoch": 0.07844590884129599, + "grad_norm": 0.3973114788532257, + "learning_rate": 1.9929243105823638e-05, + "loss": 0.5776, + "step": 2857 + }, + { + "epoch": 0.0784733662822625, + "grad_norm": 0.47988036274909973, + "learning_rate": 1.992919180967368e-05, + "loss": 0.6081, + "step": 2858 + }, + { + "epoch": 0.078500823723229, + "grad_norm": 0.3700225055217743, + "learning_rate": 1.992914049500261e-05, + "loss": 0.575, + "step": 2859 + }, + { + "epoch": 0.0785282811641955, + "grad_norm": 0.5254287123680115, + "learning_rate": 1.9929089161810525e-05, + "loss": 0.6146, + "step": 2860 + }, + { + "epoch": 0.078555738605162, + "grad_norm": 0.3430519700050354, + "learning_rate": 1.9929037810097516e-05, + "loss": 0.4668, + "step": 2861 + }, + { + "epoch": 0.0785831960461285, + "grad_norm": 0.44405412673950195, + "learning_rate": 1.9928986439863684e-05, + "loss": 0.5619, + "step": 2862 + }, + { + "epoch": 0.078610653487095, + "grad_norm": 0.34817326068878174, + "learning_rate": 1.9928935051109125e-05, + "loss": 0.568, + "step": 2863 + }, + { + "epoch": 0.0786381109280615, + "grad_norm": 0.3485410213470459, + "learning_rate": 1.9928883643833933e-05, + "loss": 0.5256, + "step": 2864 + }, + { + "epoch": 0.07866556836902801, + "grad_norm": 0.33600303530693054, + "learning_rate": 1.9928832218038202e-05, + "loss": 0.4936, + "step": 2865 + }, + { + "epoch": 0.0786930258099945, + "grad_norm": 2.556021213531494, + "learning_rate": 1.9928780773722032e-05, + "loss": 0.5674, + "step": 2866 + }, + { + "epoch": 0.07872048325096101, + "grad_norm": 0.4196873605251312, + "learning_rate": 1.9928729310885514e-05, + "loss": 0.5025, + "step": 2867 + }, + { + "epoch": 0.07874794069192752, + "grad_norm": 0.3804168105125427, + "learning_rate": 1.9928677829528752e-05, + "loss": 0.6343, + "step": 2868 + }, + { + "epoch": 0.07877539813289401, + "grad_norm": 0.37788933515548706, + "learning_rate": 1.9928626329651832e-05, + "loss": 0.5502, + "step": 2869 + }, + { + "epoch": 0.07880285557386052, + "grad_norm": 0.3128436207771301, + "learning_rate": 1.9928574811254858e-05, + "loss": 0.431, + "step": 2870 + }, + { + "epoch": 0.07883031301482701, + "grad_norm": 0.378238320350647, + "learning_rate": 1.9928523274337922e-05, + "loss": 0.5945, + "step": 2871 + }, + { + "epoch": 0.07885777045579352, + "grad_norm": 0.3867018222808838, + "learning_rate": 1.992847171890112e-05, + "loss": 0.6275, + "step": 2872 + }, + { + "epoch": 0.07888522789676002, + "grad_norm": 0.3865893483161926, + "learning_rate": 1.992842014494455e-05, + "loss": 0.5225, + "step": 2873 + }, + { + "epoch": 0.07891268533772652, + "grad_norm": 0.34317150712013245, + "learning_rate": 1.992836855246831e-05, + "loss": 0.5988, + "step": 2874 + }, + { + "epoch": 0.07894014277869303, + "grad_norm": 0.3600977957248688, + "learning_rate": 1.9928316941472493e-05, + "loss": 0.5122, + "step": 2875 + }, + { + "epoch": 0.07896760021965953, + "grad_norm": 0.36574992537498474, + "learning_rate": 1.9928265311957194e-05, + "loss": 0.6097, + "step": 2876 + }, + { + "epoch": 0.07899505766062603, + "grad_norm": 0.3493749499320984, + "learning_rate": 1.9928213663922512e-05, + "loss": 0.4906, + "step": 2877 + }, + { + "epoch": 0.07902251510159253, + "grad_norm": 0.32657596468925476, + "learning_rate": 1.9928161997368546e-05, + "loss": 0.5312, + "step": 2878 + }, + { + "epoch": 0.07904997254255904, + "grad_norm": 0.332365483045578, + "learning_rate": 1.9928110312295387e-05, + "loss": 0.5685, + "step": 2879 + }, + { + "epoch": 0.07907742998352553, + "grad_norm": 0.3845902383327484, + "learning_rate": 1.9928058608703133e-05, + "loss": 0.4743, + "step": 2880 + }, + { + "epoch": 0.07910488742449204, + "grad_norm": 0.3781723380088806, + "learning_rate": 1.992800688659188e-05, + "loss": 0.531, + "step": 2881 + }, + { + "epoch": 0.07913234486545855, + "grad_norm": 0.3839815855026245, + "learning_rate": 1.9927955145961727e-05, + "loss": 0.5319, + "step": 2882 + }, + { + "epoch": 0.07915980230642504, + "grad_norm": 0.3775777816772461, + "learning_rate": 1.9927903386812772e-05, + "loss": 0.4933, + "step": 2883 + }, + { + "epoch": 0.07918725974739155, + "grad_norm": 0.3619346618652344, + "learning_rate": 1.9927851609145105e-05, + "loss": 0.5258, + "step": 2884 + }, + { + "epoch": 0.07921471718835804, + "grad_norm": 0.3510991036891937, + "learning_rate": 1.9927799812958824e-05, + "loss": 0.6414, + "step": 2885 + }, + { + "epoch": 0.07924217462932455, + "grad_norm": 0.3552727699279785, + "learning_rate": 1.992774799825403e-05, + "loss": 0.5913, + "step": 2886 + }, + { + "epoch": 0.07926963207029104, + "grad_norm": 0.3576870858669281, + "learning_rate": 1.9927696165030822e-05, + "loss": 0.5615, + "step": 2887 + }, + { + "epoch": 0.07929708951125755, + "grad_norm": 0.343036413192749, + "learning_rate": 1.9927644313289283e-05, + "loss": 0.5293, + "step": 2888 + }, + { + "epoch": 0.07932454695222406, + "grad_norm": 0.38303953409194946, + "learning_rate": 1.9927592443029528e-05, + "loss": 0.6379, + "step": 2889 + }, + { + "epoch": 0.07935200439319055, + "grad_norm": 0.3803226351737976, + "learning_rate": 1.992754055425164e-05, + "loss": 0.6456, + "step": 2890 + }, + { + "epoch": 0.07937946183415706, + "grad_norm": 0.4017510414123535, + "learning_rate": 1.992748864695572e-05, + "loss": 0.6087, + "step": 2891 + }, + { + "epoch": 0.07940691927512356, + "grad_norm": 0.3569180965423584, + "learning_rate": 1.9927436721141866e-05, + "loss": 0.5977, + "step": 2892 + }, + { + "epoch": 0.07943437671609006, + "grad_norm": 0.4353973865509033, + "learning_rate": 1.9927384776810173e-05, + "loss": 0.607, + "step": 2893 + }, + { + "epoch": 0.07946183415705656, + "grad_norm": 0.45006659626960754, + "learning_rate": 1.992733281396074e-05, + "loss": 0.534, + "step": 2894 + }, + { + "epoch": 0.07948929159802307, + "grad_norm": 0.4490526616573334, + "learning_rate": 1.992728083259366e-05, + "loss": 0.4647, + "step": 2895 + }, + { + "epoch": 0.07951674903898956, + "grad_norm": 0.3992559313774109, + "learning_rate": 1.992722883270904e-05, + "loss": 0.5246, + "step": 2896 + }, + { + "epoch": 0.07954420647995607, + "grad_norm": 0.3607464134693146, + "learning_rate": 1.9927176814306964e-05, + "loss": 0.4822, + "step": 2897 + }, + { + "epoch": 0.07957166392092258, + "grad_norm": 0.3901677429676056, + "learning_rate": 1.9927124777387537e-05, + "loss": 0.5891, + "step": 2898 + }, + { + "epoch": 0.07959912136188907, + "grad_norm": 0.38980552554130554, + "learning_rate": 1.9927072721950854e-05, + "loss": 0.5208, + "step": 2899 + }, + { + "epoch": 0.07962657880285558, + "grad_norm": 0.3503669500350952, + "learning_rate": 1.9927020647997012e-05, + "loss": 0.5909, + "step": 2900 + }, + { + "epoch": 0.07965403624382207, + "grad_norm": 0.37473902106285095, + "learning_rate": 1.9926968555526108e-05, + "loss": 0.558, + "step": 2901 + }, + { + "epoch": 0.07968149368478858, + "grad_norm": 0.3491728603839874, + "learning_rate": 1.9926916444538237e-05, + "loss": 0.5312, + "step": 2902 + }, + { + "epoch": 0.07970895112575507, + "grad_norm": 0.36730116605758667, + "learning_rate": 1.99268643150335e-05, + "loss": 0.5133, + "step": 2903 + }, + { + "epoch": 0.07973640856672158, + "grad_norm": 0.3758183419704437, + "learning_rate": 1.9926812167011997e-05, + "loss": 0.5637, + "step": 2904 + }, + { + "epoch": 0.07976386600768809, + "grad_norm": 0.3552912771701813, + "learning_rate": 1.9926760000473814e-05, + "loss": 0.5446, + "step": 2905 + }, + { + "epoch": 0.07979132344865458, + "grad_norm": 0.371304452419281, + "learning_rate": 1.992670781541906e-05, + "loss": 0.5928, + "step": 2906 + }, + { + "epoch": 0.07981878088962109, + "grad_norm": 0.481494665145874, + "learning_rate": 1.9926655611847826e-05, + "loss": 0.5548, + "step": 2907 + }, + { + "epoch": 0.07984623833058759, + "grad_norm": 0.325808048248291, + "learning_rate": 1.992660338976021e-05, + "loss": 0.4954, + "step": 2908 + }, + { + "epoch": 0.07987369577155409, + "grad_norm": 0.4524689316749573, + "learning_rate": 1.992655114915631e-05, + "loss": 0.6007, + "step": 2909 + }, + { + "epoch": 0.07990115321252059, + "grad_norm": 0.3450329601764679, + "learning_rate": 1.9926498890036228e-05, + "loss": 0.5695, + "step": 2910 + }, + { + "epoch": 0.0799286106534871, + "grad_norm": 0.3693739175796509, + "learning_rate": 1.9926446612400056e-05, + "loss": 0.5419, + "step": 2911 + }, + { + "epoch": 0.0799560680944536, + "grad_norm": 0.4213643968105316, + "learning_rate": 1.992639431624789e-05, + "loss": 0.5381, + "step": 2912 + }, + { + "epoch": 0.0799835255354201, + "grad_norm": 0.48941484093666077, + "learning_rate": 1.9926342001579833e-05, + "loss": 0.662, + "step": 2913 + }, + { + "epoch": 0.0800109829763866, + "grad_norm": 0.30614471435546875, + "learning_rate": 1.9926289668395978e-05, + "loss": 0.472, + "step": 2914 + }, + { + "epoch": 0.0800384404173531, + "grad_norm": 0.38299858570098877, + "learning_rate": 1.9926237316696427e-05, + "loss": 0.5578, + "step": 2915 + }, + { + "epoch": 0.0800658978583196, + "grad_norm": 0.3362836539745331, + "learning_rate": 1.9926184946481275e-05, + "loss": 0.5276, + "step": 2916 + }, + { + "epoch": 0.0800933552992861, + "grad_norm": 0.38571280241012573, + "learning_rate": 1.992613255775062e-05, + "loss": 0.6457, + "step": 2917 + }, + { + "epoch": 0.08012081274025261, + "grad_norm": 0.3981505334377289, + "learning_rate": 1.992608015050456e-05, + "loss": 0.5975, + "step": 2918 + }, + { + "epoch": 0.08014827018121912, + "grad_norm": 0.3718164563179016, + "learning_rate": 1.992602772474319e-05, + "loss": 0.6075, + "step": 2919 + }, + { + "epoch": 0.08017572762218561, + "grad_norm": 0.3259344696998596, + "learning_rate": 1.9925975280466612e-05, + "loss": 0.5222, + "step": 2920 + }, + { + "epoch": 0.08020318506315212, + "grad_norm": 0.3399087190628052, + "learning_rate": 1.9925922817674923e-05, + "loss": 0.5845, + "step": 2921 + }, + { + "epoch": 0.08023064250411861, + "grad_norm": 0.3290942311286926, + "learning_rate": 1.9925870336368218e-05, + "loss": 0.5314, + "step": 2922 + }, + { + "epoch": 0.08025809994508512, + "grad_norm": 0.3782019317150116, + "learning_rate": 1.9925817836546596e-05, + "loss": 0.6494, + "step": 2923 + }, + { + "epoch": 0.08028555738605161, + "grad_norm": 0.3859346807003021, + "learning_rate": 1.992576531821016e-05, + "loss": 0.3941, + "step": 2924 + }, + { + "epoch": 0.08031301482701812, + "grad_norm": 0.41803237795829773, + "learning_rate": 1.9925712781359e-05, + "loss": 0.5989, + "step": 2925 + }, + { + "epoch": 0.08034047226798463, + "grad_norm": 0.3629205524921417, + "learning_rate": 1.992566022599322e-05, + "loss": 0.5005, + "step": 2926 + }, + { + "epoch": 0.08036792970895112, + "grad_norm": 0.38809043169021606, + "learning_rate": 1.9925607652112916e-05, + "loss": 0.5833, + "step": 2927 + }, + { + "epoch": 0.08039538714991763, + "grad_norm": 0.35910719633102417, + "learning_rate": 1.9925555059718182e-05, + "loss": 0.511, + "step": 2928 + }, + { + "epoch": 0.08042284459088413, + "grad_norm": 0.4892318844795227, + "learning_rate": 1.992550244880912e-05, + "loss": 0.5588, + "step": 2929 + }, + { + "epoch": 0.08045030203185063, + "grad_norm": 0.3623504340648651, + "learning_rate": 1.9925449819385834e-05, + "loss": 0.5922, + "step": 2930 + }, + { + "epoch": 0.08047775947281713, + "grad_norm": 0.4201202690601349, + "learning_rate": 1.9925397171448412e-05, + "loss": 0.6213, + "step": 2931 + }, + { + "epoch": 0.08050521691378364, + "grad_norm": 0.3393542468547821, + "learning_rate": 1.992534450499696e-05, + "loss": 0.4986, + "step": 2932 + }, + { + "epoch": 0.08053267435475013, + "grad_norm": 0.38193461298942566, + "learning_rate": 1.992529182003157e-05, + "loss": 0.4899, + "step": 2933 + }, + { + "epoch": 0.08056013179571664, + "grad_norm": 0.43295520544052124, + "learning_rate": 1.992523911655234e-05, + "loss": 0.5158, + "step": 2934 + }, + { + "epoch": 0.08058758923668315, + "grad_norm": 0.41032472252845764, + "learning_rate": 1.9925186394559377e-05, + "loss": 0.5848, + "step": 2935 + }, + { + "epoch": 0.08061504667764964, + "grad_norm": 0.4271424114704132, + "learning_rate": 1.992513365405277e-05, + "loss": 0.537, + "step": 2936 + }, + { + "epoch": 0.08064250411861615, + "grad_norm": 0.33038344979286194, + "learning_rate": 1.9925080895032622e-05, + "loss": 0.5167, + "step": 2937 + }, + { + "epoch": 0.08066996155958264, + "grad_norm": 0.461454302072525, + "learning_rate": 1.992502811749903e-05, + "loss": 0.6101, + "step": 2938 + }, + { + "epoch": 0.08069741900054915, + "grad_norm": 0.39107322692871094, + "learning_rate": 1.9924975321452095e-05, + "loss": 0.5574, + "step": 2939 + }, + { + "epoch": 0.08072487644151564, + "grad_norm": 0.38988909125328064, + "learning_rate": 1.9924922506891912e-05, + "loss": 0.5904, + "step": 2940 + }, + { + "epoch": 0.08075233388248215, + "grad_norm": 0.4036739468574524, + "learning_rate": 1.992486967381858e-05, + "loss": 0.5324, + "step": 2941 + }, + { + "epoch": 0.08077979132344866, + "grad_norm": 0.3991895914077759, + "learning_rate": 1.99248168222322e-05, + "loss": 0.6519, + "step": 2942 + }, + { + "epoch": 0.08080724876441515, + "grad_norm": 0.4499501883983612, + "learning_rate": 1.9924763952132865e-05, + "loss": 0.5253, + "step": 2943 + }, + { + "epoch": 0.08083470620538166, + "grad_norm": 0.3219243586063385, + "learning_rate": 1.992471106352068e-05, + "loss": 0.5789, + "step": 2944 + }, + { + "epoch": 0.08086216364634816, + "grad_norm": 0.33325114846229553, + "learning_rate": 1.9924658156395744e-05, + "loss": 0.5099, + "step": 2945 + }, + { + "epoch": 0.08088962108731466, + "grad_norm": 0.3900696635246277, + "learning_rate": 1.992460523075815e-05, + "loss": 0.5578, + "step": 2946 + }, + { + "epoch": 0.08091707852828116, + "grad_norm": 0.39600586891174316, + "learning_rate": 1.9924552286607998e-05, + "loss": 0.5859, + "step": 2947 + }, + { + "epoch": 0.08094453596924767, + "grad_norm": 0.38592851161956787, + "learning_rate": 1.9924499323945392e-05, + "loss": 0.6131, + "step": 2948 + }, + { + "epoch": 0.08097199341021417, + "grad_norm": 0.4094874858856201, + "learning_rate": 1.9924446342770426e-05, + "loss": 0.5852, + "step": 2949 + }, + { + "epoch": 0.08099945085118067, + "grad_norm": 0.3711739778518677, + "learning_rate": 1.99243933430832e-05, + "loss": 0.5757, + "step": 2950 + }, + { + "epoch": 0.08102690829214718, + "grad_norm": 0.42694398760795593, + "learning_rate": 1.9924340324883815e-05, + "loss": 0.558, + "step": 2951 + }, + { + "epoch": 0.08105436573311367, + "grad_norm": 0.38341933488845825, + "learning_rate": 1.9924287288172365e-05, + "loss": 0.624, + "step": 2952 + }, + { + "epoch": 0.08108182317408018, + "grad_norm": 0.3375255763530731, + "learning_rate": 1.9924234232948952e-05, + "loss": 0.51, + "step": 2953 + }, + { + "epoch": 0.08110928061504667, + "grad_norm": 0.3824305236339569, + "learning_rate": 1.9924181159213676e-05, + "loss": 0.474, + "step": 2954 + }, + { + "epoch": 0.08113673805601318, + "grad_norm": 0.3944445848464966, + "learning_rate": 1.9924128066966633e-05, + "loss": 0.5921, + "step": 2955 + }, + { + "epoch": 0.08116419549697969, + "grad_norm": 0.3738815188407898, + "learning_rate": 1.9924074956207925e-05, + "loss": 0.5527, + "step": 2956 + }, + { + "epoch": 0.08119165293794618, + "grad_norm": 0.4116462469100952, + "learning_rate": 1.9924021826937648e-05, + "loss": 0.5411, + "step": 2957 + }, + { + "epoch": 0.08121911037891269, + "grad_norm": 0.359149307012558, + "learning_rate": 1.9923968679155906e-05, + "loss": 0.5336, + "step": 2958 + }, + { + "epoch": 0.08124656781987918, + "grad_norm": 0.3946773111820221, + "learning_rate": 1.9923915512862795e-05, + "loss": 0.5863, + "step": 2959 + }, + { + "epoch": 0.08127402526084569, + "grad_norm": 0.3651440441608429, + "learning_rate": 1.9923862328058412e-05, + "loss": 0.6559, + "step": 2960 + }, + { + "epoch": 0.08130148270181219, + "grad_norm": 0.42253732681274414, + "learning_rate": 1.992380912474286e-05, + "loss": 0.5675, + "step": 2961 + }, + { + "epoch": 0.0813289401427787, + "grad_norm": 0.4080330431461334, + "learning_rate": 1.9923755902916235e-05, + "loss": 0.5773, + "step": 2962 + }, + { + "epoch": 0.08135639758374519, + "grad_norm": 0.3515836000442505, + "learning_rate": 1.9923702662578637e-05, + "loss": 0.4322, + "step": 2963 + }, + { + "epoch": 0.0813838550247117, + "grad_norm": 0.3470718562602997, + "learning_rate": 1.9923649403730168e-05, + "loss": 0.5893, + "step": 2964 + }, + { + "epoch": 0.0814113124656782, + "grad_norm": 0.32626453042030334, + "learning_rate": 1.9923596126370928e-05, + "loss": 0.4671, + "step": 2965 + }, + { + "epoch": 0.0814387699066447, + "grad_norm": 0.3677840232849121, + "learning_rate": 1.992354283050101e-05, + "loss": 0.5136, + "step": 2966 + }, + { + "epoch": 0.0814662273476112, + "grad_norm": 0.3241141736507416, + "learning_rate": 1.992348951612052e-05, + "loss": 0.4912, + "step": 2967 + }, + { + "epoch": 0.0814936847885777, + "grad_norm": 0.4776799976825714, + "learning_rate": 1.9923436183229552e-05, + "loss": 0.5477, + "step": 2968 + }, + { + "epoch": 0.08152114222954421, + "grad_norm": 0.44733965396881104, + "learning_rate": 1.992338283182821e-05, + "loss": 0.5244, + "step": 2969 + }, + { + "epoch": 0.0815485996705107, + "grad_norm": 0.35925260186195374, + "learning_rate": 1.9923329461916592e-05, + "loss": 0.4831, + "step": 2970 + }, + { + "epoch": 0.08157605711147721, + "grad_norm": 0.4334067404270172, + "learning_rate": 1.9923276073494796e-05, + "loss": 0.6141, + "step": 2971 + }, + { + "epoch": 0.08160351455244372, + "grad_norm": 0.3816995918750763, + "learning_rate": 1.9923222666562928e-05, + "loss": 0.538, + "step": 2972 + }, + { + "epoch": 0.08163097199341021, + "grad_norm": 0.37118253111839294, + "learning_rate": 1.9923169241121076e-05, + "loss": 0.5175, + "step": 2973 + }, + { + "epoch": 0.08165842943437672, + "grad_norm": 0.3629944324493408, + "learning_rate": 1.992311579716935e-05, + "loss": 0.541, + "step": 2974 + }, + { + "epoch": 0.08168588687534321, + "grad_norm": 0.38288435339927673, + "learning_rate": 1.992306233470785e-05, + "loss": 0.6226, + "step": 2975 + }, + { + "epoch": 0.08171334431630972, + "grad_norm": 0.4302304983139038, + "learning_rate": 1.992300885373666e-05, + "loss": 0.6313, + "step": 2976 + }, + { + "epoch": 0.08174080175727622, + "grad_norm": 0.4304451048374176, + "learning_rate": 1.9922955354255903e-05, + "loss": 0.6165, + "step": 2977 + }, + { + "epoch": 0.08176825919824272, + "grad_norm": 0.42249906063079834, + "learning_rate": 1.9922901836265662e-05, + "loss": 0.6133, + "step": 2978 + }, + { + "epoch": 0.08179571663920923, + "grad_norm": 0.4036064147949219, + "learning_rate": 1.992284829976604e-05, + "loss": 0.4748, + "step": 2979 + }, + { + "epoch": 0.08182317408017573, + "grad_norm": 0.3455677032470703, + "learning_rate": 1.9922794744757143e-05, + "loss": 0.5184, + "step": 2980 + }, + { + "epoch": 0.08185063152114223, + "grad_norm": 0.4761973023414612, + "learning_rate": 1.9922741171239064e-05, + "loss": 0.6345, + "step": 2981 + }, + { + "epoch": 0.08187808896210873, + "grad_norm": 0.434856116771698, + "learning_rate": 1.9922687579211906e-05, + "loss": 0.5556, + "step": 2982 + }, + { + "epoch": 0.08190554640307524, + "grad_norm": 0.37445515394210815, + "learning_rate": 1.992263396867577e-05, + "loss": 0.5523, + "step": 2983 + }, + { + "epoch": 0.08193300384404173, + "grad_norm": 0.3755737245082855, + "learning_rate": 1.992258033963076e-05, + "loss": 0.4932, + "step": 2984 + }, + { + "epoch": 0.08196046128500824, + "grad_norm": 0.3619568645954132, + "learning_rate": 1.9922526692076962e-05, + "loss": 0.4652, + "step": 2985 + }, + { + "epoch": 0.08198791872597475, + "grad_norm": 0.3754488229751587, + "learning_rate": 1.992247302601449e-05, + "loss": 0.5195, + "step": 2986 + }, + { + "epoch": 0.08201537616694124, + "grad_norm": 0.36593571305274963, + "learning_rate": 1.9922419341443433e-05, + "loss": 0.5565, + "step": 2987 + }, + { + "epoch": 0.08204283360790775, + "grad_norm": 0.34616708755493164, + "learning_rate": 1.9922365638363902e-05, + "loss": 0.5019, + "step": 2988 + }, + { + "epoch": 0.08207029104887424, + "grad_norm": 0.3773635923862457, + "learning_rate": 1.992231191677599e-05, + "loss": 0.4736, + "step": 2989 + }, + { + "epoch": 0.08209774848984075, + "grad_norm": 0.35893407464027405, + "learning_rate": 1.9922258176679803e-05, + "loss": 0.6407, + "step": 2990 + }, + { + "epoch": 0.08212520593080724, + "grad_norm": 0.39510247111320496, + "learning_rate": 1.9922204418075434e-05, + "loss": 0.5369, + "step": 2991 + }, + { + "epoch": 0.08215266337177375, + "grad_norm": 0.3639708161354065, + "learning_rate": 1.992215064096299e-05, + "loss": 0.5451, + "step": 2992 + }, + { + "epoch": 0.08218012081274026, + "grad_norm": 0.36584392189979553, + "learning_rate": 1.9922096845342564e-05, + "loss": 0.6017, + "step": 2993 + }, + { + "epoch": 0.08220757825370675, + "grad_norm": 0.4056588113307953, + "learning_rate": 1.9922043031214263e-05, + "loss": 0.6251, + "step": 2994 + }, + { + "epoch": 0.08223503569467326, + "grad_norm": 0.43352216482162476, + "learning_rate": 1.9921989198578187e-05, + "loss": 0.6132, + "step": 2995 + }, + { + "epoch": 0.08226249313563976, + "grad_norm": 0.3369750380516052, + "learning_rate": 1.9921935347434435e-05, + "loss": 0.4993, + "step": 2996 + }, + { + "epoch": 0.08228995057660626, + "grad_norm": 0.37279918789863586, + "learning_rate": 1.9921881477783104e-05, + "loss": 0.5179, + "step": 2997 + }, + { + "epoch": 0.08231740801757276, + "grad_norm": 0.419405460357666, + "learning_rate": 1.9921827589624297e-05, + "loss": 0.6962, + "step": 2998 + }, + { + "epoch": 0.08234486545853927, + "grad_norm": 0.42972368001937866, + "learning_rate": 1.9921773682958114e-05, + "loss": 0.5114, + "step": 2999 + }, + { + "epoch": 0.08237232289950576, + "grad_norm": 0.35561293363571167, + "learning_rate": 1.9921719757784658e-05, + "loss": 0.5028, + "step": 3000 + }, + { + "epoch": 0.08239978034047227, + "grad_norm": 0.3175599277019501, + "learning_rate": 1.9921665814104027e-05, + "loss": 0.5518, + "step": 3001 + }, + { + "epoch": 0.08242723778143878, + "grad_norm": 0.36875686049461365, + "learning_rate": 1.992161185191633e-05, + "loss": 0.5895, + "step": 3002 + }, + { + "epoch": 0.08245469522240527, + "grad_norm": 0.33067041635513306, + "learning_rate": 1.992155787122165e-05, + "loss": 0.6587, + "step": 3003 + }, + { + "epoch": 0.08248215266337178, + "grad_norm": 0.33115819096565247, + "learning_rate": 1.99215038720201e-05, + "loss": 0.5332, + "step": 3004 + }, + { + "epoch": 0.08250961010433827, + "grad_norm": 0.3827197849750519, + "learning_rate": 1.9921449854311782e-05, + "loss": 0.6507, + "step": 3005 + }, + { + "epoch": 0.08253706754530478, + "grad_norm": 0.4356773793697357, + "learning_rate": 1.9921395818096792e-05, + "loss": 0.5779, + "step": 3006 + }, + { + "epoch": 0.08256452498627127, + "grad_norm": 0.4465574324131012, + "learning_rate": 1.9921341763375234e-05, + "loss": 0.5568, + "step": 3007 + }, + { + "epoch": 0.08259198242723778, + "grad_norm": 0.39597493410110474, + "learning_rate": 1.9921287690147205e-05, + "loss": 0.5744, + "step": 3008 + }, + { + "epoch": 0.08261943986820429, + "grad_norm": 0.3700196146965027, + "learning_rate": 1.9921233598412808e-05, + "loss": 0.489, + "step": 3009 + }, + { + "epoch": 0.08264689730917078, + "grad_norm": 0.38273143768310547, + "learning_rate": 1.9921179488172146e-05, + "loss": 0.6361, + "step": 3010 + }, + { + "epoch": 0.08267435475013729, + "grad_norm": 0.373741090297699, + "learning_rate": 1.9921125359425315e-05, + "loss": 0.6075, + "step": 3011 + }, + { + "epoch": 0.08270181219110379, + "grad_norm": 0.4957992434501648, + "learning_rate": 1.992107121217242e-05, + "loss": 0.5183, + "step": 3012 + }, + { + "epoch": 0.08272926963207029, + "grad_norm": 0.3566875755786896, + "learning_rate": 1.992101704641356e-05, + "loss": 0.4961, + "step": 3013 + }, + { + "epoch": 0.08275672707303679, + "grad_norm": 0.37970170378685, + "learning_rate": 1.9920962862148837e-05, + "loss": 0.4791, + "step": 3014 + }, + { + "epoch": 0.0827841845140033, + "grad_norm": 0.3923743963241577, + "learning_rate": 1.9920908659378354e-05, + "loss": 0.5504, + "step": 3015 + }, + { + "epoch": 0.0828116419549698, + "grad_norm": 0.43154841661453247, + "learning_rate": 1.9920854438102207e-05, + "loss": 0.6419, + "step": 3016 + }, + { + "epoch": 0.0828390993959363, + "grad_norm": 0.3938272297382355, + "learning_rate": 1.99208001983205e-05, + "loss": 0.6334, + "step": 3017 + }, + { + "epoch": 0.0828665568369028, + "grad_norm": 0.3559965193271637, + "learning_rate": 1.9920745940033337e-05, + "loss": 0.5746, + "step": 3018 + }, + { + "epoch": 0.0828940142778693, + "grad_norm": 0.3944675624370575, + "learning_rate": 1.9920691663240814e-05, + "loss": 0.56, + "step": 3019 + }, + { + "epoch": 0.0829214717188358, + "grad_norm": 0.3873023986816406, + "learning_rate": 1.9920637367943037e-05, + "loss": 0.6084, + "step": 3020 + }, + { + "epoch": 0.0829489291598023, + "grad_norm": 0.34189581871032715, + "learning_rate": 1.9920583054140104e-05, + "loss": 0.5169, + "step": 3021 + }, + { + "epoch": 0.08297638660076881, + "grad_norm": 0.4123912453651428, + "learning_rate": 1.9920528721832116e-05, + "loss": 0.6334, + "step": 3022 + }, + { + "epoch": 0.08300384404173532, + "grad_norm": 0.36992576718330383, + "learning_rate": 1.992047437101918e-05, + "loss": 0.5991, + "step": 3023 + }, + { + "epoch": 0.08303130148270181, + "grad_norm": 0.367628276348114, + "learning_rate": 1.992042000170139e-05, + "loss": 0.5553, + "step": 3024 + }, + { + "epoch": 0.08305875892366832, + "grad_norm": 0.4319625794887543, + "learning_rate": 1.9920365613878845e-05, + "loss": 0.4801, + "step": 3025 + }, + { + "epoch": 0.08308621636463481, + "grad_norm": 0.44401952624320984, + "learning_rate": 1.9920311207551658e-05, + "loss": 0.5672, + "step": 3026 + }, + { + "epoch": 0.08311367380560132, + "grad_norm": 0.4952750504016876, + "learning_rate": 1.9920256782719924e-05, + "loss": 0.5561, + "step": 3027 + }, + { + "epoch": 0.08314113124656781, + "grad_norm": 0.39820578694343567, + "learning_rate": 1.992020233938375e-05, + "loss": 0.6143, + "step": 3028 + }, + { + "epoch": 0.08316858868753432, + "grad_norm": 0.32245948910713196, + "learning_rate": 1.9920147877543224e-05, + "loss": 0.5352, + "step": 3029 + }, + { + "epoch": 0.08319604612850082, + "grad_norm": 0.42104482650756836, + "learning_rate": 1.9920093397198455e-05, + "loss": 0.553, + "step": 3030 + }, + { + "epoch": 0.08322350356946732, + "grad_norm": 0.3802202641963959, + "learning_rate": 1.992003889834955e-05, + "loss": 0.5043, + "step": 3031 + }, + { + "epoch": 0.08325096101043383, + "grad_norm": 0.38581180572509766, + "learning_rate": 1.9919984380996607e-05, + "loss": 0.5011, + "step": 3032 + }, + { + "epoch": 0.08327841845140033, + "grad_norm": 0.39783766865730286, + "learning_rate": 1.991992984513973e-05, + "loss": 0.5504, + "step": 3033 + }, + { + "epoch": 0.08330587589236683, + "grad_norm": 0.3662765324115753, + "learning_rate": 1.991987529077901e-05, + "loss": 0.5669, + "step": 3034 + }, + { + "epoch": 0.08333333333333333, + "grad_norm": 0.35435813665390015, + "learning_rate": 1.9919820717914562e-05, + "loss": 0.5949, + "step": 3035 + }, + { + "epoch": 0.08336079077429984, + "grad_norm": 0.3844055235385895, + "learning_rate": 1.9919766126546484e-05, + "loss": 0.6105, + "step": 3036 + }, + { + "epoch": 0.08338824821526633, + "grad_norm": 0.32428309321403503, + "learning_rate": 1.991971151667487e-05, + "loss": 0.5671, + "step": 3037 + }, + { + "epoch": 0.08341570565623284, + "grad_norm": 0.3872242271900177, + "learning_rate": 1.991965688829983e-05, + "loss": 0.605, + "step": 3038 + }, + { + "epoch": 0.08344316309719935, + "grad_norm": 0.3888581693172455, + "learning_rate": 1.9919602241421466e-05, + "loss": 0.5297, + "step": 3039 + }, + { + "epoch": 0.08347062053816584, + "grad_norm": 0.34023338556289673, + "learning_rate": 1.9919547576039877e-05, + "loss": 0.5428, + "step": 3040 + }, + { + "epoch": 0.08349807797913235, + "grad_norm": 0.3719659745693207, + "learning_rate": 1.9919492892155164e-05, + "loss": 0.5452, + "step": 3041 + }, + { + "epoch": 0.08352553542009884, + "grad_norm": 0.4186488687992096, + "learning_rate": 1.9919438189767434e-05, + "loss": 0.5555, + "step": 3042 + }, + { + "epoch": 0.08355299286106535, + "grad_norm": 0.37303292751312256, + "learning_rate": 1.9919383468876787e-05, + "loss": 0.4633, + "step": 3043 + }, + { + "epoch": 0.08358045030203184, + "grad_norm": 0.415324866771698, + "learning_rate": 1.9919328729483318e-05, + "loss": 0.5653, + "step": 3044 + }, + { + "epoch": 0.08360790774299835, + "grad_norm": 0.37103843688964844, + "learning_rate": 1.991927397158714e-05, + "loss": 0.5828, + "step": 3045 + }, + { + "epoch": 0.08363536518396486, + "grad_norm": 0.29787832498550415, + "learning_rate": 1.9919219195188347e-05, + "loss": 0.4469, + "step": 3046 + }, + { + "epoch": 0.08366282262493135, + "grad_norm": 0.3436926305294037, + "learning_rate": 1.9919164400287044e-05, + "loss": 0.4913, + "step": 3047 + }, + { + "epoch": 0.08369028006589786, + "grad_norm": 0.34001094102859497, + "learning_rate": 1.9919109586883337e-05, + "loss": 0.5923, + "step": 3048 + }, + { + "epoch": 0.08371773750686436, + "grad_norm": 0.3286226689815521, + "learning_rate": 1.9919054754977323e-05, + "loss": 0.4359, + "step": 3049 + }, + { + "epoch": 0.08374519494783086, + "grad_norm": 0.36464717984199524, + "learning_rate": 1.9918999904569104e-05, + "loss": 0.5533, + "step": 3050 + }, + { + "epoch": 0.08377265238879736, + "grad_norm": 0.3548579812049866, + "learning_rate": 1.991894503565879e-05, + "loss": 0.5859, + "step": 3051 + }, + { + "epoch": 0.08380010982976387, + "grad_norm": 0.3492199182510376, + "learning_rate": 1.9918890148246472e-05, + "loss": 0.5503, + "step": 3052 + }, + { + "epoch": 0.08382756727073037, + "grad_norm": 0.3771657645702362, + "learning_rate": 1.991883524233226e-05, + "loss": 0.5398, + "step": 3053 + }, + { + "epoch": 0.08385502471169687, + "grad_norm": 0.3610140383243561, + "learning_rate": 1.9918780317916258e-05, + "loss": 0.5348, + "step": 3054 + }, + { + "epoch": 0.08388248215266338, + "grad_norm": 0.37360113859176636, + "learning_rate": 1.991872537499856e-05, + "loss": 0.5061, + "step": 3055 + }, + { + "epoch": 0.08390993959362987, + "grad_norm": 0.3414529263973236, + "learning_rate": 1.9918670413579274e-05, + "loss": 0.5572, + "step": 3056 + }, + { + "epoch": 0.08393739703459638, + "grad_norm": 0.41530749201774597, + "learning_rate": 1.9918615433658504e-05, + "loss": 0.5713, + "step": 3057 + }, + { + "epoch": 0.08396485447556287, + "grad_norm": 0.3743249475955963, + "learning_rate": 1.991856043523635e-05, + "loss": 0.5976, + "step": 3058 + }, + { + "epoch": 0.08399231191652938, + "grad_norm": 0.3908880949020386, + "learning_rate": 1.9918505418312916e-05, + "loss": 0.5935, + "step": 3059 + }, + { + "epoch": 0.08401976935749589, + "grad_norm": 0.37082165479660034, + "learning_rate": 1.9918450382888304e-05, + "loss": 0.5727, + "step": 3060 + }, + { + "epoch": 0.08404722679846238, + "grad_norm": 0.4048471450805664, + "learning_rate": 1.9918395328962615e-05, + "loss": 0.6256, + "step": 3061 + }, + { + "epoch": 0.08407468423942889, + "grad_norm": 0.346057265996933, + "learning_rate": 1.9918340256535954e-05, + "loss": 0.5468, + "step": 3062 + }, + { + "epoch": 0.08410214168039538, + "grad_norm": 0.3500054180622101, + "learning_rate": 1.9918285165608424e-05, + "loss": 0.5073, + "step": 3063 + }, + { + "epoch": 0.08412959912136189, + "grad_norm": 0.354735791683197, + "learning_rate": 1.991823005618012e-05, + "loss": 0.6059, + "step": 3064 + }, + { + "epoch": 0.08415705656232839, + "grad_norm": 0.3932260572910309, + "learning_rate": 1.991817492825116e-05, + "loss": 0.4772, + "step": 3065 + }, + { + "epoch": 0.0841845140032949, + "grad_norm": 0.4607710838317871, + "learning_rate": 1.9918119781821636e-05, + "loss": 0.5828, + "step": 3066 + }, + { + "epoch": 0.08421197144426139, + "grad_norm": 0.38526609539985657, + "learning_rate": 1.991806461689165e-05, + "loss": 0.6468, + "step": 3067 + }, + { + "epoch": 0.0842394288852279, + "grad_norm": 0.355780690908432, + "learning_rate": 1.991800943346131e-05, + "loss": 0.49, + "step": 3068 + }, + { + "epoch": 0.0842668863261944, + "grad_norm": 0.38191473484039307, + "learning_rate": 1.9917954231530716e-05, + "loss": 0.51, + "step": 3069 + }, + { + "epoch": 0.0842943437671609, + "grad_norm": 0.3502902686595917, + "learning_rate": 1.9917899011099972e-05, + "loss": 0.4632, + "step": 3070 + }, + { + "epoch": 0.0843218012081274, + "grad_norm": 1.0015678405761719, + "learning_rate": 1.9917843772169182e-05, + "loss": 0.5576, + "step": 3071 + }, + { + "epoch": 0.0843492586490939, + "grad_norm": 0.3703102171421051, + "learning_rate": 1.9917788514738447e-05, + "loss": 0.5109, + "step": 3072 + }, + { + "epoch": 0.08437671609006041, + "grad_norm": 0.3525243401527405, + "learning_rate": 1.9917733238807872e-05, + "loss": 0.4878, + "step": 3073 + }, + { + "epoch": 0.0844041735310269, + "grad_norm": 0.4019070267677307, + "learning_rate": 1.991767794437756e-05, + "loss": 0.5873, + "step": 3074 + }, + { + "epoch": 0.08443163097199341, + "grad_norm": 0.3399433493614197, + "learning_rate": 1.991762263144761e-05, + "loss": 0.522, + "step": 3075 + }, + { + "epoch": 0.08445908841295992, + "grad_norm": 0.4678155779838562, + "learning_rate": 1.991756730001813e-05, + "loss": 0.5594, + "step": 3076 + }, + { + "epoch": 0.08448654585392641, + "grad_norm": 0.39123469591140747, + "learning_rate": 1.9917511950089224e-05, + "loss": 0.5387, + "step": 3077 + }, + { + "epoch": 0.08451400329489292, + "grad_norm": 0.3664635419845581, + "learning_rate": 1.991745658166099e-05, + "loss": 0.53, + "step": 3078 + }, + { + "epoch": 0.08454146073585941, + "grad_norm": 0.3622594475746155, + "learning_rate": 1.9917401194733538e-05, + "loss": 0.555, + "step": 3079 + }, + { + "epoch": 0.08456891817682592, + "grad_norm": 0.35493385791778564, + "learning_rate": 1.991734578930696e-05, + "loss": 0.5901, + "step": 3080 + }, + { + "epoch": 0.08459637561779242, + "grad_norm": 0.51849764585495, + "learning_rate": 1.9917290365381373e-05, + "loss": 0.5319, + "step": 3081 + }, + { + "epoch": 0.08462383305875892, + "grad_norm": 0.34235748648643494, + "learning_rate": 1.9917234922956874e-05, + "loss": 0.5791, + "step": 3082 + }, + { + "epoch": 0.08465129049972543, + "grad_norm": 0.39979732036590576, + "learning_rate": 1.9917179462033568e-05, + "loss": 0.6167, + "step": 3083 + }, + { + "epoch": 0.08467874794069193, + "grad_norm": 0.38724082708358765, + "learning_rate": 1.991712398261155e-05, + "loss": 0.6287, + "step": 3084 + }, + { + "epoch": 0.08470620538165843, + "grad_norm": 0.3478383421897888, + "learning_rate": 1.9917068484690937e-05, + "loss": 0.521, + "step": 3085 + }, + { + "epoch": 0.08473366282262493, + "grad_norm": 0.3568466305732727, + "learning_rate": 1.9917012968271824e-05, + "loss": 0.6104, + "step": 3086 + }, + { + "epoch": 0.08476112026359144, + "grad_norm": 0.3691876232624054, + "learning_rate": 1.9916957433354316e-05, + "loss": 0.5038, + "step": 3087 + }, + { + "epoch": 0.08478857770455793, + "grad_norm": 0.3892597556114197, + "learning_rate": 1.991690187993852e-05, + "loss": 0.5951, + "step": 3088 + }, + { + "epoch": 0.08481603514552444, + "grad_norm": 0.3788401484489441, + "learning_rate": 1.9916846308024532e-05, + "loss": 0.5338, + "step": 3089 + }, + { + "epoch": 0.08484349258649095, + "grad_norm": 0.3392800986766815, + "learning_rate": 1.9916790717612467e-05, + "loss": 0.5263, + "step": 3090 + }, + { + "epoch": 0.08487095002745744, + "grad_norm": 0.36497923731803894, + "learning_rate": 1.991673510870242e-05, + "loss": 0.5213, + "step": 3091 + }, + { + "epoch": 0.08489840746842395, + "grad_norm": 0.485361248254776, + "learning_rate": 1.9916679481294493e-05, + "loss": 0.5762, + "step": 3092 + }, + { + "epoch": 0.08492586490939044, + "grad_norm": 0.4000490605831146, + "learning_rate": 1.9916623835388797e-05, + "loss": 0.5841, + "step": 3093 + }, + { + "epoch": 0.08495332235035695, + "grad_norm": 0.34389007091522217, + "learning_rate": 1.991656817098543e-05, + "loss": 0.5151, + "step": 3094 + }, + { + "epoch": 0.08498077979132344, + "grad_norm": 0.35685890913009644, + "learning_rate": 1.9916512488084502e-05, + "loss": 0.5094, + "step": 3095 + }, + { + "epoch": 0.08500823723228995, + "grad_norm": 0.43347710371017456, + "learning_rate": 1.9916456786686113e-05, + "loss": 0.5382, + "step": 3096 + }, + { + "epoch": 0.08503569467325645, + "grad_norm": 0.38122981786727905, + "learning_rate": 1.9916401066790365e-05, + "loss": 0.57, + "step": 3097 + }, + { + "epoch": 0.08506315211422295, + "grad_norm": 0.33660954236984253, + "learning_rate": 1.9916345328397365e-05, + "loss": 0.489, + "step": 3098 + }, + { + "epoch": 0.08509060955518946, + "grad_norm": 0.3994135856628418, + "learning_rate": 1.9916289571507215e-05, + "loss": 0.5919, + "step": 3099 + }, + { + "epoch": 0.08511806699615596, + "grad_norm": 0.39044275879859924, + "learning_rate": 1.991623379612002e-05, + "loss": 0.5619, + "step": 3100 + }, + { + "epoch": 0.08514552443712246, + "grad_norm": 0.3638702929019928, + "learning_rate": 1.9916178002235886e-05, + "loss": 0.545, + "step": 3101 + }, + { + "epoch": 0.08517298187808896, + "grad_norm": 0.3598957359790802, + "learning_rate": 1.9916122189854918e-05, + "loss": 0.5219, + "step": 3102 + }, + { + "epoch": 0.08520043931905547, + "grad_norm": 0.353473961353302, + "learning_rate": 1.991606635897721e-05, + "loss": 0.5363, + "step": 3103 + }, + { + "epoch": 0.08522789676002196, + "grad_norm": 0.3923407196998596, + "learning_rate": 1.991601050960288e-05, + "loss": 0.6281, + "step": 3104 + }, + { + "epoch": 0.08525535420098847, + "grad_norm": 0.4042568504810333, + "learning_rate": 1.991595464173202e-05, + "loss": 0.5601, + "step": 3105 + }, + { + "epoch": 0.08528281164195498, + "grad_norm": 0.3507134020328522, + "learning_rate": 1.9915898755364743e-05, + "loss": 0.6007, + "step": 3106 + }, + { + "epoch": 0.08531026908292147, + "grad_norm": 0.38078415393829346, + "learning_rate": 1.991584285050115e-05, + "loss": 0.612, + "step": 3107 + }, + { + "epoch": 0.08533772652388798, + "grad_norm": 0.3405579626560211, + "learning_rate": 1.9915786927141344e-05, + "loss": 0.5189, + "step": 3108 + }, + { + "epoch": 0.08536518396485447, + "grad_norm": 0.34907203912734985, + "learning_rate": 1.991573098528543e-05, + "loss": 0.5221, + "step": 3109 + }, + { + "epoch": 0.08539264140582098, + "grad_norm": 0.3139292299747467, + "learning_rate": 1.9915675024933514e-05, + "loss": 0.5014, + "step": 3110 + }, + { + "epoch": 0.08542009884678747, + "grad_norm": 0.39074277877807617, + "learning_rate": 1.99156190460857e-05, + "loss": 0.616, + "step": 3111 + }, + { + "epoch": 0.08544755628775398, + "grad_norm": 0.3312952518463135, + "learning_rate": 1.991556304874209e-05, + "loss": 0.458, + "step": 3112 + }, + { + "epoch": 0.08547501372872049, + "grad_norm": 0.6165147423744202, + "learning_rate": 1.9915507032902793e-05, + "loss": 0.6427, + "step": 3113 + }, + { + "epoch": 0.08550247116968698, + "grad_norm": 0.33941954374313354, + "learning_rate": 1.991545099856791e-05, + "loss": 0.5458, + "step": 3114 + }, + { + "epoch": 0.08552992861065349, + "grad_norm": 0.37801194190979004, + "learning_rate": 1.9915394945737543e-05, + "loss": 0.5744, + "step": 3115 + }, + { + "epoch": 0.08555738605161999, + "grad_norm": 0.3633836507797241, + "learning_rate": 1.9915338874411803e-05, + "loss": 0.555, + "step": 3116 + }, + { + "epoch": 0.0855848434925865, + "grad_norm": 0.3239288628101349, + "learning_rate": 1.9915282784590793e-05, + "loss": 0.4792, + "step": 3117 + }, + { + "epoch": 0.08561230093355299, + "grad_norm": 0.34880468249320984, + "learning_rate": 1.991522667627461e-05, + "loss": 0.6396, + "step": 3118 + }, + { + "epoch": 0.0856397583745195, + "grad_norm": 0.38951992988586426, + "learning_rate": 1.991517054946337e-05, + "loss": 0.5702, + "step": 3119 + }, + { + "epoch": 0.085667215815486, + "grad_norm": 0.4017128050327301, + "learning_rate": 1.991511440415717e-05, + "loss": 0.5329, + "step": 3120 + }, + { + "epoch": 0.0856946732564525, + "grad_norm": 0.3894684314727783, + "learning_rate": 1.991505824035612e-05, + "loss": 0.5378, + "step": 3121 + }, + { + "epoch": 0.085722130697419, + "grad_norm": 0.36977410316467285, + "learning_rate": 1.9915002058060318e-05, + "loss": 0.5595, + "step": 3122 + }, + { + "epoch": 0.0857495881383855, + "grad_norm": 0.38841933012008667, + "learning_rate": 1.9914945857269874e-05, + "loss": 0.5602, + "step": 3123 + }, + { + "epoch": 0.08577704557935201, + "grad_norm": 0.34277161955833435, + "learning_rate": 1.9914889637984892e-05, + "loss": 0.5242, + "step": 3124 + }, + { + "epoch": 0.0858045030203185, + "grad_norm": 0.40681585669517517, + "learning_rate": 1.9914833400205474e-05, + "loss": 0.5235, + "step": 3125 + }, + { + "epoch": 0.08583196046128501, + "grad_norm": 0.3350759446620941, + "learning_rate": 1.9914777143931733e-05, + "loss": 0.4867, + "step": 3126 + }, + { + "epoch": 0.08585941790225152, + "grad_norm": 0.37349146604537964, + "learning_rate": 1.9914720869163762e-05, + "loss": 0.554, + "step": 3127 + }, + { + "epoch": 0.08588687534321801, + "grad_norm": 0.38777318596839905, + "learning_rate": 1.9914664575901678e-05, + "loss": 0.5376, + "step": 3128 + }, + { + "epoch": 0.08591433278418452, + "grad_norm": 0.4212503731250763, + "learning_rate": 1.9914608264145574e-05, + "loss": 0.6626, + "step": 3129 + }, + { + "epoch": 0.08594179022515101, + "grad_norm": 0.4425583481788635, + "learning_rate": 1.9914551933895566e-05, + "loss": 0.5283, + "step": 3130 + }, + { + "epoch": 0.08596924766611752, + "grad_norm": 0.35119709372520447, + "learning_rate": 1.9914495585151753e-05, + "loss": 0.494, + "step": 3131 + }, + { + "epoch": 0.08599670510708401, + "grad_norm": 0.3854362964630127, + "learning_rate": 1.9914439217914242e-05, + "loss": 0.6496, + "step": 3132 + }, + { + "epoch": 0.08602416254805052, + "grad_norm": 0.375833123922348, + "learning_rate": 1.9914382832183136e-05, + "loss": 0.5762, + "step": 3133 + }, + { + "epoch": 0.08605161998901702, + "grad_norm": 0.3516687750816345, + "learning_rate": 1.9914326427958543e-05, + "loss": 0.47, + "step": 3134 + }, + { + "epoch": 0.08607907742998352, + "grad_norm": 0.34540843963623047, + "learning_rate": 1.9914270005240564e-05, + "loss": 0.5408, + "step": 3135 + }, + { + "epoch": 0.08610653487095003, + "grad_norm": 0.3656224012374878, + "learning_rate": 1.9914213564029308e-05, + "loss": 0.532, + "step": 3136 + }, + { + "epoch": 0.08613399231191653, + "grad_norm": 0.33878353238105774, + "learning_rate": 1.991415710432488e-05, + "loss": 0.5682, + "step": 3137 + }, + { + "epoch": 0.08616144975288303, + "grad_norm": 0.3688545823097229, + "learning_rate": 1.9914100626127388e-05, + "loss": 0.5719, + "step": 3138 + }, + { + "epoch": 0.08618890719384953, + "grad_norm": 0.43538740277290344, + "learning_rate": 1.991404412943693e-05, + "loss": 0.6257, + "step": 3139 + }, + { + "epoch": 0.08621636463481604, + "grad_norm": 0.3873177766799927, + "learning_rate": 1.9913987614253616e-05, + "loss": 0.5849, + "step": 3140 + }, + { + "epoch": 0.08624382207578253, + "grad_norm": 0.3697783350944519, + "learning_rate": 1.9913931080577554e-05, + "loss": 0.5475, + "step": 3141 + }, + { + "epoch": 0.08627127951674904, + "grad_norm": 0.34691882133483887, + "learning_rate": 1.9913874528408844e-05, + "loss": 0.532, + "step": 3142 + }, + { + "epoch": 0.08629873695771555, + "grad_norm": 0.3783372938632965, + "learning_rate": 1.9913817957747593e-05, + "loss": 0.6379, + "step": 3143 + }, + { + "epoch": 0.08632619439868204, + "grad_norm": 0.3556547164916992, + "learning_rate": 1.9913761368593908e-05, + "loss": 0.4955, + "step": 3144 + }, + { + "epoch": 0.08635365183964855, + "grad_norm": 0.3570651412010193, + "learning_rate": 1.9913704760947898e-05, + "loss": 0.572, + "step": 3145 + }, + { + "epoch": 0.08638110928061504, + "grad_norm": 0.3521994352340698, + "learning_rate": 1.991364813480966e-05, + "loss": 0.6704, + "step": 3146 + }, + { + "epoch": 0.08640856672158155, + "grad_norm": 0.39765864610671997, + "learning_rate": 1.9913591490179306e-05, + "loss": 0.5703, + "step": 3147 + }, + { + "epoch": 0.08643602416254804, + "grad_norm": 0.3677360713481903, + "learning_rate": 1.991353482705694e-05, + "loss": 0.5795, + "step": 3148 + }, + { + "epoch": 0.08646348160351455, + "grad_norm": 0.42152076959609985, + "learning_rate": 1.9913478145442667e-05, + "loss": 0.5003, + "step": 3149 + }, + { + "epoch": 0.08649093904448106, + "grad_norm": 0.43698254227638245, + "learning_rate": 1.991342144533659e-05, + "loss": 0.6216, + "step": 3150 + }, + { + "epoch": 0.08651839648544755, + "grad_norm": 0.438378244638443, + "learning_rate": 1.9913364726738822e-05, + "loss": 0.5804, + "step": 3151 + }, + { + "epoch": 0.08654585392641406, + "grad_norm": 1.5449484586715698, + "learning_rate": 1.9913307989649464e-05, + "loss": 0.5545, + "step": 3152 + }, + { + "epoch": 0.08657331136738056, + "grad_norm": 0.38163113594055176, + "learning_rate": 1.991325123406862e-05, + "loss": 0.633, + "step": 3153 + }, + { + "epoch": 0.08660076880834706, + "grad_norm": 0.3833751678466797, + "learning_rate": 1.9913194459996402e-05, + "loss": 0.6094, + "step": 3154 + }, + { + "epoch": 0.08662822624931356, + "grad_norm": 0.38165175914764404, + "learning_rate": 1.991313766743291e-05, + "loss": 0.6012, + "step": 3155 + }, + { + "epoch": 0.08665568369028007, + "grad_norm": 0.37796810269355774, + "learning_rate": 1.9913080856378254e-05, + "loss": 0.5772, + "step": 3156 + }, + { + "epoch": 0.08668314113124657, + "grad_norm": 0.37214577198028564, + "learning_rate": 1.991302402683254e-05, + "loss": 0.5849, + "step": 3157 + }, + { + "epoch": 0.08671059857221307, + "grad_norm": 0.39197593927383423, + "learning_rate": 1.991296717879587e-05, + "loss": 0.6396, + "step": 3158 + }, + { + "epoch": 0.08673805601317958, + "grad_norm": 0.3607301414012909, + "learning_rate": 1.9912910312268353e-05, + "loss": 0.4805, + "step": 3159 + }, + { + "epoch": 0.08676551345414607, + "grad_norm": 0.3576720654964447, + "learning_rate": 1.9912853427250095e-05, + "loss": 0.5125, + "step": 3160 + }, + { + "epoch": 0.08679297089511258, + "grad_norm": 0.356453001499176, + "learning_rate": 1.99127965237412e-05, + "loss": 0.5763, + "step": 3161 + }, + { + "epoch": 0.08682042833607907, + "grad_norm": 0.40957915782928467, + "learning_rate": 1.9912739601741774e-05, + "loss": 0.566, + "step": 3162 + }, + { + "epoch": 0.08684788577704558, + "grad_norm": 0.3592242896556854, + "learning_rate": 1.9912682661251928e-05, + "loss": 0.5737, + "step": 3163 + }, + { + "epoch": 0.08687534321801207, + "grad_norm": 0.3964890241622925, + "learning_rate": 1.9912625702271765e-05, + "loss": 0.5506, + "step": 3164 + }, + { + "epoch": 0.08690280065897858, + "grad_norm": 0.46052658557891846, + "learning_rate": 1.991256872480139e-05, + "loss": 0.5128, + "step": 3165 + }, + { + "epoch": 0.08693025809994509, + "grad_norm": 0.3741527497768402, + "learning_rate": 1.991251172884091e-05, + "loss": 0.5418, + "step": 3166 + }, + { + "epoch": 0.08695771554091158, + "grad_norm": 0.378779798746109, + "learning_rate": 1.991245471439043e-05, + "loss": 0.526, + "step": 3167 + }, + { + "epoch": 0.08698517298187809, + "grad_norm": 0.3797495365142822, + "learning_rate": 1.9912397681450066e-05, + "loss": 0.5048, + "step": 3168 + }, + { + "epoch": 0.08701263042284459, + "grad_norm": 0.33703213930130005, + "learning_rate": 1.991234063001991e-05, + "loss": 0.5335, + "step": 3169 + }, + { + "epoch": 0.0870400878638111, + "grad_norm": 0.3675273060798645, + "learning_rate": 1.9912283560100078e-05, + "loss": 0.6253, + "step": 3170 + }, + { + "epoch": 0.08706754530477759, + "grad_norm": 0.364955335855484, + "learning_rate": 1.9912226471690673e-05, + "loss": 0.6119, + "step": 3171 + }, + { + "epoch": 0.0870950027457441, + "grad_norm": 0.44833487272262573, + "learning_rate": 1.99121693647918e-05, + "loss": 0.4763, + "step": 3172 + }, + { + "epoch": 0.0871224601867106, + "grad_norm": 0.3704459071159363, + "learning_rate": 1.991211223940357e-05, + "loss": 0.5657, + "step": 3173 + }, + { + "epoch": 0.0871499176276771, + "grad_norm": 0.3818432092666626, + "learning_rate": 1.9912055095526088e-05, + "loss": 0.5817, + "step": 3174 + }, + { + "epoch": 0.0871773750686436, + "grad_norm": 0.4610784947872162, + "learning_rate": 1.9911997933159454e-05, + "loss": 0.6047, + "step": 3175 + }, + { + "epoch": 0.0872048325096101, + "grad_norm": 0.4219365119934082, + "learning_rate": 1.9911940752303785e-05, + "loss": 0.5953, + "step": 3176 + }, + { + "epoch": 0.08723228995057661, + "grad_norm": 0.36055439710617065, + "learning_rate": 1.9911883552959183e-05, + "loss": 0.558, + "step": 3177 + }, + { + "epoch": 0.0872597473915431, + "grad_norm": 0.344022661447525, + "learning_rate": 1.9911826335125752e-05, + "loss": 0.4904, + "step": 3178 + }, + { + "epoch": 0.08728720483250961, + "grad_norm": 0.40213847160339355, + "learning_rate": 1.9911769098803605e-05, + "loss": 0.474, + "step": 3179 + }, + { + "epoch": 0.08731466227347612, + "grad_norm": 0.38067054748535156, + "learning_rate": 1.9911711843992842e-05, + "loss": 0.5852, + "step": 3180 + }, + { + "epoch": 0.08734211971444261, + "grad_norm": 0.35548821091651917, + "learning_rate": 1.9911654570693576e-05, + "loss": 0.5455, + "step": 3181 + }, + { + "epoch": 0.08736957715540912, + "grad_norm": 0.38751277327537537, + "learning_rate": 1.991159727890591e-05, + "loss": 0.4953, + "step": 3182 + }, + { + "epoch": 0.08739703459637561, + "grad_norm": 0.4133763909339905, + "learning_rate": 1.991153996862995e-05, + "loss": 0.5387, + "step": 3183 + }, + { + "epoch": 0.08742449203734212, + "grad_norm": 0.4060620665550232, + "learning_rate": 1.9911482639865803e-05, + "loss": 0.5214, + "step": 3184 + }, + { + "epoch": 0.08745194947830862, + "grad_norm": 0.41407129168510437, + "learning_rate": 1.9911425292613578e-05, + "loss": 0.5348, + "step": 3185 + }, + { + "epoch": 0.08747940691927512, + "grad_norm": 0.38008028268814087, + "learning_rate": 1.9911367926873385e-05, + "loss": 0.6123, + "step": 3186 + }, + { + "epoch": 0.08750686436024163, + "grad_norm": 0.47845888137817383, + "learning_rate": 1.991131054264533e-05, + "loss": 0.5662, + "step": 3187 + }, + { + "epoch": 0.08753432180120813, + "grad_norm": 0.4051632285118103, + "learning_rate": 1.9911253139929513e-05, + "loss": 0.4652, + "step": 3188 + }, + { + "epoch": 0.08756177924217463, + "grad_norm": 0.35869109630584717, + "learning_rate": 1.9911195718726043e-05, + "loss": 0.5244, + "step": 3189 + }, + { + "epoch": 0.08758923668314113, + "grad_norm": 0.4087565243244171, + "learning_rate": 1.9911138279035032e-05, + "loss": 0.5899, + "step": 3190 + }, + { + "epoch": 0.08761669412410764, + "grad_norm": 0.351574182510376, + "learning_rate": 1.9911080820856585e-05, + "loss": 0.5304, + "step": 3191 + }, + { + "epoch": 0.08764415156507413, + "grad_norm": 0.40920358896255493, + "learning_rate": 1.991102334419081e-05, + "loss": 0.5368, + "step": 3192 + }, + { + "epoch": 0.08767160900604064, + "grad_norm": 0.35106855630874634, + "learning_rate": 1.9910965849037814e-05, + "loss": 0.4712, + "step": 3193 + }, + { + "epoch": 0.08769906644700715, + "grad_norm": 0.36936140060424805, + "learning_rate": 1.99109083353977e-05, + "loss": 0.4543, + "step": 3194 + }, + { + "epoch": 0.08772652388797364, + "grad_norm": 0.37517067790031433, + "learning_rate": 1.9910850803270583e-05, + "loss": 0.6124, + "step": 3195 + }, + { + "epoch": 0.08775398132894015, + "grad_norm": 0.356742262840271, + "learning_rate": 1.9910793252656565e-05, + "loss": 0.5004, + "step": 3196 + }, + { + "epoch": 0.08778143876990664, + "grad_norm": 0.39151257276535034, + "learning_rate": 1.9910735683555752e-05, + "loss": 0.4602, + "step": 3197 + }, + { + "epoch": 0.08780889621087315, + "grad_norm": 0.33860597014427185, + "learning_rate": 1.9910678095968256e-05, + "loss": 0.5282, + "step": 3198 + }, + { + "epoch": 0.08783635365183964, + "grad_norm": 0.3463510274887085, + "learning_rate": 1.991062048989418e-05, + "loss": 0.4902, + "step": 3199 + }, + { + "epoch": 0.08786381109280615, + "grad_norm": 0.3738035261631012, + "learning_rate": 1.9910562865333634e-05, + "loss": 0.4942, + "step": 3200 + }, + { + "epoch": 0.08789126853377265, + "grad_norm": 0.39467930793762207, + "learning_rate": 1.991050522228673e-05, + "loss": 0.5634, + "step": 3201 + }, + { + "epoch": 0.08791872597473915, + "grad_norm": 0.3497433364391327, + "learning_rate": 1.9910447560753565e-05, + "loss": 0.5018, + "step": 3202 + }, + { + "epoch": 0.08794618341570566, + "grad_norm": 0.37710538506507874, + "learning_rate": 1.9910389880734255e-05, + "loss": 0.6506, + "step": 3203 + }, + { + "epoch": 0.08797364085667216, + "grad_norm": 0.33815863728523254, + "learning_rate": 1.9910332182228905e-05, + "loss": 0.4454, + "step": 3204 + }, + { + "epoch": 0.08800109829763866, + "grad_norm": 0.485881507396698, + "learning_rate": 1.991027446523762e-05, + "loss": 0.5638, + "step": 3205 + }, + { + "epoch": 0.08802855573860516, + "grad_norm": 0.38838228583335876, + "learning_rate": 1.9910216729760512e-05, + "loss": 0.4917, + "step": 3206 + }, + { + "epoch": 0.08805601317957167, + "grad_norm": 0.3868916630744934, + "learning_rate": 1.9910158975797686e-05, + "loss": 0.5997, + "step": 3207 + }, + { + "epoch": 0.08808347062053816, + "grad_norm": 0.45558539032936096, + "learning_rate": 1.991010120334925e-05, + "loss": 0.5815, + "step": 3208 + }, + { + "epoch": 0.08811092806150467, + "grad_norm": 0.34407952427864075, + "learning_rate": 1.9910043412415316e-05, + "loss": 0.5396, + "step": 3209 + }, + { + "epoch": 0.08813838550247118, + "grad_norm": 0.3800027370452881, + "learning_rate": 1.9909985602995985e-05, + "loss": 0.523, + "step": 3210 + }, + { + "epoch": 0.08816584294343767, + "grad_norm": 0.43269798159599304, + "learning_rate": 1.9909927775091367e-05, + "loss": 0.5053, + "step": 3211 + }, + { + "epoch": 0.08819330038440418, + "grad_norm": 0.4874632656574249, + "learning_rate": 1.9909869928701573e-05, + "loss": 0.62, + "step": 3212 + }, + { + "epoch": 0.08822075782537067, + "grad_norm": 0.3243730366230011, + "learning_rate": 1.9909812063826707e-05, + "loss": 0.4477, + "step": 3213 + }, + { + "epoch": 0.08824821526633718, + "grad_norm": 0.3117091655731201, + "learning_rate": 1.990975418046688e-05, + "loss": 0.5566, + "step": 3214 + }, + { + "epoch": 0.08827567270730367, + "grad_norm": 0.47732481360435486, + "learning_rate": 1.99096962786222e-05, + "loss": 0.5949, + "step": 3215 + }, + { + "epoch": 0.08830313014827018, + "grad_norm": 0.3632173240184784, + "learning_rate": 1.9909638358292772e-05, + "loss": 0.5925, + "step": 3216 + }, + { + "epoch": 0.08833058758923669, + "grad_norm": 0.38406211137771606, + "learning_rate": 1.9909580419478703e-05, + "loss": 0.6046, + "step": 3217 + }, + { + "epoch": 0.08835804503020318, + "grad_norm": 0.3682786524295807, + "learning_rate": 1.9909522462180106e-05, + "loss": 0.613, + "step": 3218 + }, + { + "epoch": 0.08838550247116969, + "grad_norm": 0.3922516405582428, + "learning_rate": 1.9909464486397087e-05, + "loss": 0.5204, + "step": 3219 + }, + { + "epoch": 0.08841295991213619, + "grad_norm": 0.40254315733909607, + "learning_rate": 1.9909406492129757e-05, + "loss": 0.5711, + "step": 3220 + }, + { + "epoch": 0.0884404173531027, + "grad_norm": 0.32269617915153503, + "learning_rate": 1.9909348479378216e-05, + "loss": 0.4996, + "step": 3221 + }, + { + "epoch": 0.08846787479406919, + "grad_norm": 0.3453953266143799, + "learning_rate": 1.9909290448142583e-05, + "loss": 0.5412, + "step": 3222 + }, + { + "epoch": 0.0884953322350357, + "grad_norm": 0.37068918347358704, + "learning_rate": 1.9909232398422956e-05, + "loss": 0.5436, + "step": 3223 + }, + { + "epoch": 0.0885227896760022, + "grad_norm": 0.34113579988479614, + "learning_rate": 1.9909174330219448e-05, + "loss": 0.5209, + "step": 3224 + }, + { + "epoch": 0.0885502471169687, + "grad_norm": 0.3979324400424957, + "learning_rate": 1.990911624353217e-05, + "loss": 0.5778, + "step": 3225 + }, + { + "epoch": 0.0885777045579352, + "grad_norm": 1.2976291179656982, + "learning_rate": 1.9909058138361227e-05, + "loss": 0.5628, + "step": 3226 + }, + { + "epoch": 0.0886051619989017, + "grad_norm": 0.3309061825275421, + "learning_rate": 1.9909000014706727e-05, + "loss": 0.5404, + "step": 3227 + }, + { + "epoch": 0.08863261943986821, + "grad_norm": 0.503724217414856, + "learning_rate": 1.9908941872568775e-05, + "loss": 0.5285, + "step": 3228 + }, + { + "epoch": 0.0886600768808347, + "grad_norm": 0.40295854210853577, + "learning_rate": 1.990888371194749e-05, + "loss": 0.5184, + "step": 3229 + }, + { + "epoch": 0.08868753432180121, + "grad_norm": 0.3858764171600342, + "learning_rate": 1.9908825532842972e-05, + "loss": 0.5014, + "step": 3230 + }, + { + "epoch": 0.0887149917627677, + "grad_norm": 0.3580276072025299, + "learning_rate": 1.9908767335255333e-05, + "loss": 0.4904, + "step": 3231 + }, + { + "epoch": 0.08874244920373421, + "grad_norm": 0.4700264036655426, + "learning_rate": 1.990870911918468e-05, + "loss": 0.5487, + "step": 3232 + }, + { + "epoch": 0.08876990664470072, + "grad_norm": 0.47418081760406494, + "learning_rate": 1.9908650884631125e-05, + "loss": 0.6401, + "step": 3233 + }, + { + "epoch": 0.08879736408566721, + "grad_norm": 0.34153982996940613, + "learning_rate": 1.9908592631594768e-05, + "loss": 0.5517, + "step": 3234 + }, + { + "epoch": 0.08882482152663372, + "grad_norm": 0.4043576419353485, + "learning_rate": 1.9908534360075726e-05, + "loss": 0.5917, + "step": 3235 + }, + { + "epoch": 0.08885227896760022, + "grad_norm": 0.3442865312099457, + "learning_rate": 1.9908476070074106e-05, + "loss": 0.505, + "step": 3236 + }, + { + "epoch": 0.08887973640856672, + "grad_norm": 0.4370473325252533, + "learning_rate": 1.9908417761590014e-05, + "loss": 0.5944, + "step": 3237 + }, + { + "epoch": 0.08890719384953322, + "grad_norm": 0.350563645362854, + "learning_rate": 1.990835943462356e-05, + "loss": 0.5141, + "step": 3238 + }, + { + "epoch": 0.08893465129049972, + "grad_norm": 0.3351345360279083, + "learning_rate": 1.9908301089174856e-05, + "loss": 0.5673, + "step": 3239 + }, + { + "epoch": 0.08896210873146623, + "grad_norm": 0.38889992237091064, + "learning_rate": 1.9908242725244004e-05, + "loss": 0.5701, + "step": 3240 + }, + { + "epoch": 0.08898956617243273, + "grad_norm": 0.39961302280426025, + "learning_rate": 1.990818434283112e-05, + "loss": 0.5994, + "step": 3241 + }, + { + "epoch": 0.08901702361339923, + "grad_norm": 0.361337274312973, + "learning_rate": 1.990812594193631e-05, + "loss": 0.494, + "step": 3242 + }, + { + "epoch": 0.08904448105436573, + "grad_norm": 0.47824323177337646, + "learning_rate": 1.9908067522559684e-05, + "loss": 0.5308, + "step": 3243 + }, + { + "epoch": 0.08907193849533224, + "grad_norm": 0.34467095136642456, + "learning_rate": 1.9908009084701344e-05, + "loss": 0.5732, + "step": 3244 + }, + { + "epoch": 0.08909939593629873, + "grad_norm": 0.42201581597328186, + "learning_rate": 1.9907950628361414e-05, + "loss": 0.5472, + "step": 3245 + }, + { + "epoch": 0.08912685337726524, + "grad_norm": 0.36185574531555176, + "learning_rate": 1.9907892153539986e-05, + "loss": 0.641, + "step": 3246 + }, + { + "epoch": 0.08915431081823175, + "grad_norm": 0.42578285932540894, + "learning_rate": 1.9907833660237178e-05, + "loss": 0.6558, + "step": 3247 + }, + { + "epoch": 0.08918176825919824, + "grad_norm": 0.44082021713256836, + "learning_rate": 1.9907775148453103e-05, + "loss": 0.5125, + "step": 3248 + }, + { + "epoch": 0.08920922570016475, + "grad_norm": 0.3511110544204712, + "learning_rate": 1.9907716618187857e-05, + "loss": 0.5795, + "step": 3249 + }, + { + "epoch": 0.08923668314113124, + "grad_norm": 0.3756183385848999, + "learning_rate": 1.990765806944156e-05, + "loss": 0.5426, + "step": 3250 + }, + { + "epoch": 0.08926414058209775, + "grad_norm": 0.35160455107688904, + "learning_rate": 1.9907599502214325e-05, + "loss": 0.5534, + "step": 3251 + }, + { + "epoch": 0.08929159802306424, + "grad_norm": 0.40765464305877686, + "learning_rate": 1.990754091650625e-05, + "loss": 0.6697, + "step": 3252 + }, + { + "epoch": 0.08931905546403075, + "grad_norm": 0.37326428294181824, + "learning_rate": 1.990748231231745e-05, + "loss": 0.6373, + "step": 3253 + }, + { + "epoch": 0.08934651290499726, + "grad_norm": 0.4163500666618347, + "learning_rate": 1.990742368964803e-05, + "loss": 0.5298, + "step": 3254 + }, + { + "epoch": 0.08937397034596375, + "grad_norm": 0.43508976697921753, + "learning_rate": 1.9907365048498107e-05, + "loss": 0.564, + "step": 3255 + }, + { + "epoch": 0.08940142778693026, + "grad_norm": 0.34837794303894043, + "learning_rate": 1.9907306388867783e-05, + "loss": 0.5273, + "step": 3256 + }, + { + "epoch": 0.08942888522789676, + "grad_norm": 0.36456912755966187, + "learning_rate": 1.990724771075717e-05, + "loss": 0.5688, + "step": 3257 + }, + { + "epoch": 0.08945634266886326, + "grad_norm": 0.38311299681663513, + "learning_rate": 1.990718901416638e-05, + "loss": 0.6306, + "step": 3258 + }, + { + "epoch": 0.08948380010982976, + "grad_norm": 0.3830330967903137, + "learning_rate": 1.9907130299095523e-05, + "loss": 0.581, + "step": 3259 + }, + { + "epoch": 0.08951125755079627, + "grad_norm": 0.36771926283836365, + "learning_rate": 1.9907071565544704e-05, + "loss": 0.5317, + "step": 3260 + }, + { + "epoch": 0.08953871499176277, + "grad_norm": 0.6281590461730957, + "learning_rate": 1.990701281351403e-05, + "loss": 0.4892, + "step": 3261 + }, + { + "epoch": 0.08956617243272927, + "grad_norm": 0.35221514105796814, + "learning_rate": 1.9906954043003624e-05, + "loss": 0.4505, + "step": 3262 + }, + { + "epoch": 0.08959362987369578, + "grad_norm": 0.3861498534679413, + "learning_rate": 1.9906895254013577e-05, + "loss": 0.5761, + "step": 3263 + }, + { + "epoch": 0.08962108731466227, + "grad_norm": 0.381055623292923, + "learning_rate": 1.990683644654402e-05, + "loss": 0.6021, + "step": 3264 + }, + { + "epoch": 0.08964854475562878, + "grad_norm": 0.38482892513275146, + "learning_rate": 1.990677762059504e-05, + "loss": 0.5664, + "step": 3265 + }, + { + "epoch": 0.08967600219659527, + "grad_norm": 0.3514528274536133, + "learning_rate": 1.9906718776166762e-05, + "loss": 0.5392, + "step": 3266 + }, + { + "epoch": 0.08970345963756178, + "grad_norm": 0.37568554282188416, + "learning_rate": 1.9906659913259294e-05, + "loss": 0.6031, + "step": 3267 + }, + { + "epoch": 0.08973091707852827, + "grad_norm": 0.38094088435173035, + "learning_rate": 1.990660103187274e-05, + "loss": 0.5906, + "step": 3268 + }, + { + "epoch": 0.08975837451949478, + "grad_norm": 0.3820612132549286, + "learning_rate": 1.9906542132007217e-05, + "loss": 0.5201, + "step": 3269 + }, + { + "epoch": 0.08978583196046129, + "grad_norm": 0.3533404469490051, + "learning_rate": 1.9906483213662828e-05, + "loss": 0.6214, + "step": 3270 + }, + { + "epoch": 0.08981328940142778, + "grad_norm": 0.4257062077522278, + "learning_rate": 1.9906424276839687e-05, + "loss": 0.5819, + "step": 3271 + }, + { + "epoch": 0.08984074684239429, + "grad_norm": 0.3551250994205475, + "learning_rate": 1.9906365321537902e-05, + "loss": 0.5308, + "step": 3272 + }, + { + "epoch": 0.08986820428336079, + "grad_norm": 0.36842861771583557, + "learning_rate": 1.9906306347757585e-05, + "loss": 0.5757, + "step": 3273 + }, + { + "epoch": 0.0898956617243273, + "grad_norm": 0.33690592646598816, + "learning_rate": 1.9906247355498845e-05, + "loss": 0.4816, + "step": 3274 + }, + { + "epoch": 0.08992311916529379, + "grad_norm": 0.34285932779312134, + "learning_rate": 1.990618834476179e-05, + "loss": 0.544, + "step": 3275 + }, + { + "epoch": 0.0899505766062603, + "grad_norm": 0.3523218035697937, + "learning_rate": 1.9906129315546537e-05, + "loss": 0.5945, + "step": 3276 + }, + { + "epoch": 0.0899780340472268, + "grad_norm": 0.40730714797973633, + "learning_rate": 1.9906070267853187e-05, + "loss": 0.5816, + "step": 3277 + }, + { + "epoch": 0.0900054914881933, + "grad_norm": 0.45997700095176697, + "learning_rate": 1.9906011201681854e-05, + "loss": 0.5108, + "step": 3278 + }, + { + "epoch": 0.0900329489291598, + "grad_norm": 0.31813183426856995, + "learning_rate": 1.990595211703265e-05, + "loss": 0.3245, + "step": 3279 + }, + { + "epoch": 0.0900604063701263, + "grad_norm": 0.36020800471305847, + "learning_rate": 1.9905893013905682e-05, + "loss": 0.5138, + "step": 3280 + }, + { + "epoch": 0.09008786381109281, + "grad_norm": 0.37830880284309387, + "learning_rate": 1.9905833892301067e-05, + "loss": 0.6027, + "step": 3281 + }, + { + "epoch": 0.0901153212520593, + "grad_norm": 0.581026554107666, + "learning_rate": 1.9905774752218905e-05, + "loss": 0.683, + "step": 3282 + }, + { + "epoch": 0.09014277869302581, + "grad_norm": 0.3953525125980377, + "learning_rate": 1.990571559365931e-05, + "loss": 0.509, + "step": 3283 + }, + { + "epoch": 0.09017023613399232, + "grad_norm": 0.35911622643470764, + "learning_rate": 1.99056564166224e-05, + "loss": 0.5607, + "step": 3284 + }, + { + "epoch": 0.09019769357495881, + "grad_norm": 0.45917800068855286, + "learning_rate": 1.9905597221108274e-05, + "loss": 0.5731, + "step": 3285 + }, + { + "epoch": 0.09022515101592532, + "grad_norm": 0.43529415130615234, + "learning_rate": 1.990553800711705e-05, + "loss": 0.6893, + "step": 3286 + }, + { + "epoch": 0.09025260845689181, + "grad_norm": 0.3546833097934723, + "learning_rate": 1.9905478774648836e-05, + "loss": 0.5174, + "step": 3287 + }, + { + "epoch": 0.09028006589785832, + "grad_norm": 0.3950420618057251, + "learning_rate": 1.990541952370374e-05, + "loss": 0.5849, + "step": 3288 + }, + { + "epoch": 0.09030752333882482, + "grad_norm": 0.38023409247398376, + "learning_rate": 1.990536025428188e-05, + "loss": 0.5783, + "step": 3289 + }, + { + "epoch": 0.09033498077979132, + "grad_norm": 0.39877286553382874, + "learning_rate": 1.990530096638336e-05, + "loss": 0.5562, + "step": 3290 + }, + { + "epoch": 0.09036243822075783, + "grad_norm": 0.4194156229496002, + "learning_rate": 1.9905241660008292e-05, + "loss": 0.586, + "step": 3291 + }, + { + "epoch": 0.09038989566172433, + "grad_norm": 0.3331613838672638, + "learning_rate": 1.9905182335156786e-05, + "loss": 0.5556, + "step": 3292 + }, + { + "epoch": 0.09041735310269083, + "grad_norm": 0.3199431002140045, + "learning_rate": 1.9905122991828954e-05, + "loss": 0.4813, + "step": 3293 + }, + { + "epoch": 0.09044481054365733, + "grad_norm": 0.34835976362228394, + "learning_rate": 1.9905063630024908e-05, + "loss": 0.5366, + "step": 3294 + }, + { + "epoch": 0.09047226798462384, + "grad_norm": 0.4026298522949219, + "learning_rate": 1.9905004249744755e-05, + "loss": 0.6356, + "step": 3295 + }, + { + "epoch": 0.09049972542559033, + "grad_norm": 0.3244951367378235, + "learning_rate": 1.9904944850988608e-05, + "loss": 0.4211, + "step": 3296 + }, + { + "epoch": 0.09052718286655684, + "grad_norm": 0.3981974422931671, + "learning_rate": 1.990488543375658e-05, + "loss": 0.6026, + "step": 3297 + }, + { + "epoch": 0.09055464030752333, + "grad_norm": 0.3729385435581207, + "learning_rate": 1.9904825998048776e-05, + "loss": 0.5789, + "step": 3298 + }, + { + "epoch": 0.09058209774848984, + "grad_norm": 0.3340352177619934, + "learning_rate": 1.990476654386531e-05, + "loss": 0.5189, + "step": 3299 + }, + { + "epoch": 0.09060955518945635, + "grad_norm": 0.396192342042923, + "learning_rate": 1.9904707071206294e-05, + "loss": 0.5299, + "step": 3300 + }, + { + "epoch": 0.09063701263042284, + "grad_norm": 0.33879032731056213, + "learning_rate": 1.990464758007184e-05, + "loss": 0.5102, + "step": 3301 + }, + { + "epoch": 0.09066447007138935, + "grad_norm": 0.35069993138313293, + "learning_rate": 1.9904588070462055e-05, + "loss": 0.5485, + "step": 3302 + }, + { + "epoch": 0.09069192751235584, + "grad_norm": 0.3700973093509674, + "learning_rate": 1.9904528542377052e-05, + "loss": 0.6101, + "step": 3303 + }, + { + "epoch": 0.09071938495332235, + "grad_norm": 0.35640865564346313, + "learning_rate": 1.9904468995816946e-05, + "loss": 0.5214, + "step": 3304 + }, + { + "epoch": 0.09074684239428885, + "grad_norm": 0.3724553883075714, + "learning_rate": 1.9904409430781838e-05, + "loss": 0.5011, + "step": 3305 + }, + { + "epoch": 0.09077429983525535, + "grad_norm": 0.3634449243545532, + "learning_rate": 1.990434984727185e-05, + "loss": 0.5365, + "step": 3306 + }, + { + "epoch": 0.09080175727622186, + "grad_norm": 0.33786654472351074, + "learning_rate": 1.990429024528709e-05, + "loss": 0.5776, + "step": 3307 + }, + { + "epoch": 0.09082921471718836, + "grad_norm": 0.36381298303604126, + "learning_rate": 1.990423062482766e-05, + "loss": 0.6049, + "step": 3308 + }, + { + "epoch": 0.09085667215815486, + "grad_norm": 0.36748743057250977, + "learning_rate": 1.9904170985893685e-05, + "loss": 0.5322, + "step": 3309 + }, + { + "epoch": 0.09088412959912136, + "grad_norm": 0.3544635474681854, + "learning_rate": 1.990411132848527e-05, + "loss": 0.5203, + "step": 3310 + }, + { + "epoch": 0.09091158704008787, + "grad_norm": 0.38370415568351746, + "learning_rate": 1.9904051652602522e-05, + "loss": 0.4434, + "step": 3311 + }, + { + "epoch": 0.09093904448105436, + "grad_norm": 0.36442410945892334, + "learning_rate": 1.9903991958245558e-05, + "loss": 0.488, + "step": 3312 + }, + { + "epoch": 0.09096650192202087, + "grad_norm": 0.3688850402832031, + "learning_rate": 1.9903932245414493e-05, + "loss": 0.5716, + "step": 3313 + }, + { + "epoch": 0.09099395936298738, + "grad_norm": 0.39044809341430664, + "learning_rate": 1.9903872514109427e-05, + "loss": 0.5285, + "step": 3314 + }, + { + "epoch": 0.09102141680395387, + "grad_norm": 0.35171544551849365, + "learning_rate": 1.990381276433048e-05, + "loss": 0.5642, + "step": 3315 + }, + { + "epoch": 0.09104887424492038, + "grad_norm": 0.39038726687431335, + "learning_rate": 1.9903752996077762e-05, + "loss": 0.5551, + "step": 3316 + }, + { + "epoch": 0.09107633168588687, + "grad_norm": 0.43575209379196167, + "learning_rate": 1.9903693209351384e-05, + "loss": 0.5942, + "step": 3317 + }, + { + "epoch": 0.09110378912685338, + "grad_norm": 0.3155750334262848, + "learning_rate": 1.9903633404151458e-05, + "loss": 0.4902, + "step": 3318 + }, + { + "epoch": 0.09113124656781987, + "grad_norm": 0.3335115909576416, + "learning_rate": 1.9903573580478092e-05, + "loss": 0.5184, + "step": 3319 + }, + { + "epoch": 0.09115870400878638, + "grad_norm": 0.37053316831588745, + "learning_rate": 1.9903513738331402e-05, + "loss": 0.5031, + "step": 3320 + }, + { + "epoch": 0.09118616144975289, + "grad_norm": 0.3350338339805603, + "learning_rate": 1.99034538777115e-05, + "loss": 0.5159, + "step": 3321 + }, + { + "epoch": 0.09121361889071938, + "grad_norm": 0.38758522272109985, + "learning_rate": 1.9903393998618493e-05, + "loss": 0.5561, + "step": 3322 + }, + { + "epoch": 0.09124107633168589, + "grad_norm": 0.38113933801651, + "learning_rate": 1.9903334101052497e-05, + "loss": 0.5824, + "step": 3323 + }, + { + "epoch": 0.09126853377265239, + "grad_norm": 0.37687766551971436, + "learning_rate": 1.990327418501362e-05, + "loss": 0.6104, + "step": 3324 + }, + { + "epoch": 0.0912959912136189, + "grad_norm": 0.34970465302467346, + "learning_rate": 1.990321425050198e-05, + "loss": 0.4723, + "step": 3325 + }, + { + "epoch": 0.09132344865458539, + "grad_norm": 0.515536367893219, + "learning_rate": 1.990315429751768e-05, + "loss": 0.5795, + "step": 3326 + }, + { + "epoch": 0.0913509060955519, + "grad_norm": 0.38595297932624817, + "learning_rate": 1.990309432606084e-05, + "loss": 0.6075, + "step": 3327 + }, + { + "epoch": 0.0913783635365184, + "grad_norm": 0.34674564003944397, + "learning_rate": 1.9903034336131566e-05, + "loss": 0.5248, + "step": 3328 + }, + { + "epoch": 0.0914058209774849, + "grad_norm": 0.35332274436950684, + "learning_rate": 1.9902974327729974e-05, + "loss": 0.5464, + "step": 3329 + }, + { + "epoch": 0.0914332784184514, + "grad_norm": 0.5265550017356873, + "learning_rate": 1.9902914300856173e-05, + "loss": 0.6205, + "step": 3330 + }, + { + "epoch": 0.0914607358594179, + "grad_norm": 0.35014790296554565, + "learning_rate": 1.9902854255510277e-05, + "loss": 0.5144, + "step": 3331 + }, + { + "epoch": 0.09148819330038441, + "grad_norm": 0.38389652967453003, + "learning_rate": 1.9902794191692398e-05, + "loss": 0.5643, + "step": 3332 + }, + { + "epoch": 0.0915156507413509, + "grad_norm": 0.47311073541641235, + "learning_rate": 1.9902734109402645e-05, + "loss": 0.6407, + "step": 3333 + }, + { + "epoch": 0.09154310818231741, + "grad_norm": 0.3439444303512573, + "learning_rate": 1.9902674008641133e-05, + "loss": 0.5394, + "step": 3334 + }, + { + "epoch": 0.0915705656232839, + "grad_norm": 0.3157111406326294, + "learning_rate": 1.9902613889407973e-05, + "loss": 0.4521, + "step": 3335 + }, + { + "epoch": 0.09159802306425041, + "grad_norm": 0.4152909219264984, + "learning_rate": 1.9902553751703278e-05, + "loss": 0.5966, + "step": 3336 + }, + { + "epoch": 0.09162548050521692, + "grad_norm": 0.3670664131641388, + "learning_rate": 1.9902493595527163e-05, + "loss": 0.6135, + "step": 3337 + }, + { + "epoch": 0.09165293794618341, + "grad_norm": 0.3874143958091736, + "learning_rate": 1.9902433420879733e-05, + "loss": 0.5597, + "step": 3338 + }, + { + "epoch": 0.09168039538714992, + "grad_norm": 0.3405759036540985, + "learning_rate": 1.9902373227761105e-05, + "loss": 0.5361, + "step": 3339 + }, + { + "epoch": 0.09170785282811642, + "grad_norm": 0.4539763331413269, + "learning_rate": 1.990231301617139e-05, + "loss": 0.5832, + "step": 3340 + }, + { + "epoch": 0.09173531026908292, + "grad_norm": 0.32107529044151306, + "learning_rate": 1.9902252786110702e-05, + "loss": 0.487, + "step": 3341 + }, + { + "epoch": 0.09176276771004942, + "grad_norm": 0.390642911195755, + "learning_rate": 1.990219253757915e-05, + "loss": 0.4933, + "step": 3342 + }, + { + "epoch": 0.09179022515101593, + "grad_norm": 0.3959125280380249, + "learning_rate": 1.990213227057685e-05, + "loss": 0.5973, + "step": 3343 + }, + { + "epoch": 0.09181768259198243, + "grad_norm": 0.3482478857040405, + "learning_rate": 1.9902071985103913e-05, + "loss": 0.5664, + "step": 3344 + }, + { + "epoch": 0.09184514003294893, + "grad_norm": 0.3694511950016022, + "learning_rate": 1.990201168116045e-05, + "loss": 0.5507, + "step": 3345 + }, + { + "epoch": 0.09187259747391543, + "grad_norm": 0.35642778873443604, + "learning_rate": 1.9901951358746578e-05, + "loss": 0.5391, + "step": 3346 + }, + { + "epoch": 0.09190005491488193, + "grad_norm": 0.4408516585826874, + "learning_rate": 1.9901891017862402e-05, + "loss": 0.5298, + "step": 3347 + }, + { + "epoch": 0.09192751235584844, + "grad_norm": 0.3846726715564728, + "learning_rate": 1.990183065850804e-05, + "loss": 0.6422, + "step": 3348 + }, + { + "epoch": 0.09195496979681493, + "grad_norm": 0.5086424946784973, + "learning_rate": 1.990177028068361e-05, + "loss": 0.6184, + "step": 3349 + }, + { + "epoch": 0.09198242723778144, + "grad_norm": 0.371178537607193, + "learning_rate": 1.9901709884389212e-05, + "loss": 0.5762, + "step": 3350 + }, + { + "epoch": 0.09200988467874795, + "grad_norm": 0.3566742539405823, + "learning_rate": 1.9901649469624963e-05, + "loss": 0.6229, + "step": 3351 + }, + { + "epoch": 0.09203734211971444, + "grad_norm": 0.34832340478897095, + "learning_rate": 1.990158903639098e-05, + "loss": 0.567, + "step": 3352 + }, + { + "epoch": 0.09206479956068095, + "grad_norm": 0.5979407429695129, + "learning_rate": 1.9901528584687374e-05, + "loss": 0.6577, + "step": 3353 + }, + { + "epoch": 0.09209225700164744, + "grad_norm": 0.38518014550209045, + "learning_rate": 1.990146811451426e-05, + "loss": 0.5362, + "step": 3354 + }, + { + "epoch": 0.09211971444261395, + "grad_norm": 0.44032055139541626, + "learning_rate": 1.990140762587174e-05, + "loss": 0.5339, + "step": 3355 + }, + { + "epoch": 0.09214717188358044, + "grad_norm": 0.3392046391963959, + "learning_rate": 1.9901347118759942e-05, + "loss": 0.4805, + "step": 3356 + }, + { + "epoch": 0.09217462932454695, + "grad_norm": 0.946993887424469, + "learning_rate": 1.990128659317897e-05, + "loss": 0.5842, + "step": 3357 + }, + { + "epoch": 0.09220208676551346, + "grad_norm": 0.3699509799480438, + "learning_rate": 1.9901226049128934e-05, + "loss": 0.4787, + "step": 3358 + }, + { + "epoch": 0.09222954420647995, + "grad_norm": 0.370581716299057, + "learning_rate": 1.9901165486609955e-05, + "loss": 0.565, + "step": 3359 + }, + { + "epoch": 0.09225700164744646, + "grad_norm": 0.34993109107017517, + "learning_rate": 1.990110490562214e-05, + "loss": 0.6178, + "step": 3360 + }, + { + "epoch": 0.09228445908841296, + "grad_norm": 0.4378306269645691, + "learning_rate": 1.9901044306165607e-05, + "loss": 0.5873, + "step": 3361 + }, + { + "epoch": 0.09231191652937946, + "grad_norm": 0.35035014152526855, + "learning_rate": 1.9900983688240465e-05, + "loss": 0.5738, + "step": 3362 + }, + { + "epoch": 0.09233937397034596, + "grad_norm": 0.35814690589904785, + "learning_rate": 1.9900923051846826e-05, + "loss": 0.4746, + "step": 3363 + }, + { + "epoch": 0.09236683141131247, + "grad_norm": 0.39350980520248413, + "learning_rate": 1.990086239698481e-05, + "loss": 0.6135, + "step": 3364 + }, + { + "epoch": 0.09239428885227896, + "grad_norm": 0.40833815932273865, + "learning_rate": 1.9900801723654525e-05, + "loss": 0.6035, + "step": 3365 + }, + { + "epoch": 0.09242174629324547, + "grad_norm": 0.3711360991001129, + "learning_rate": 1.9900741031856082e-05, + "loss": 0.5181, + "step": 3366 + }, + { + "epoch": 0.09244920373421198, + "grad_norm": 0.38028642535209656, + "learning_rate": 1.9900680321589597e-05, + "loss": 0.639, + "step": 3367 + }, + { + "epoch": 0.09247666117517847, + "grad_norm": 0.45201563835144043, + "learning_rate": 1.9900619592855184e-05, + "loss": 0.617, + "step": 3368 + }, + { + "epoch": 0.09250411861614498, + "grad_norm": 0.6397286057472229, + "learning_rate": 1.9900558845652957e-05, + "loss": 0.6061, + "step": 3369 + }, + { + "epoch": 0.09253157605711147, + "grad_norm": 0.416930615901947, + "learning_rate": 1.990049807998303e-05, + "loss": 0.509, + "step": 3370 + }, + { + "epoch": 0.09255903349807798, + "grad_norm": 0.3858082592487335, + "learning_rate": 1.9900437295845513e-05, + "loss": 0.6243, + "step": 3371 + }, + { + "epoch": 0.09258649093904447, + "grad_norm": 0.34903237223625183, + "learning_rate": 1.9900376493240517e-05, + "loss": 0.5225, + "step": 3372 + }, + { + "epoch": 0.09261394838001098, + "grad_norm": 0.352664589881897, + "learning_rate": 1.990031567216816e-05, + "loss": 0.6224, + "step": 3373 + }, + { + "epoch": 0.09264140582097749, + "grad_norm": 0.3579198718070984, + "learning_rate": 1.9900254832628553e-05, + "loss": 0.5675, + "step": 3374 + }, + { + "epoch": 0.09266886326194398, + "grad_norm": 0.39280208945274353, + "learning_rate": 1.9900193974621815e-05, + "loss": 0.5689, + "step": 3375 + }, + { + "epoch": 0.09269632070291049, + "grad_norm": 0.4074755012989044, + "learning_rate": 1.9900133098148052e-05, + "loss": 0.5228, + "step": 3376 + }, + { + "epoch": 0.09272377814387699, + "grad_norm": 0.3777313232421875, + "learning_rate": 1.9900072203207384e-05, + "loss": 0.5478, + "step": 3377 + }, + { + "epoch": 0.0927512355848435, + "grad_norm": 0.3825697898864746, + "learning_rate": 1.990001128979992e-05, + "loss": 0.599, + "step": 3378 + }, + { + "epoch": 0.09277869302580999, + "grad_norm": 0.3777071535587311, + "learning_rate": 1.989995035792578e-05, + "loss": 0.5015, + "step": 3379 + }, + { + "epoch": 0.0928061504667765, + "grad_norm": 0.38451850414276123, + "learning_rate": 1.9899889407585067e-05, + "loss": 0.6015, + "step": 3380 + }, + { + "epoch": 0.092833607907743, + "grad_norm": 0.3894476592540741, + "learning_rate": 1.98998284387779e-05, + "loss": 0.5971, + "step": 3381 + }, + { + "epoch": 0.0928610653487095, + "grad_norm": 0.35124829411506653, + "learning_rate": 1.9899767451504396e-05, + "loss": 0.5053, + "step": 3382 + }, + { + "epoch": 0.092888522789676, + "grad_norm": 0.36521998047828674, + "learning_rate": 1.9899706445764666e-05, + "loss": 0.5857, + "step": 3383 + }, + { + "epoch": 0.0929159802306425, + "grad_norm": 0.35375410318374634, + "learning_rate": 1.9899645421558823e-05, + "loss": 0.5197, + "step": 3384 + }, + { + "epoch": 0.09294343767160901, + "grad_norm": 0.36403024196624756, + "learning_rate": 1.989958437888698e-05, + "loss": 0.5606, + "step": 3385 + }, + { + "epoch": 0.0929708951125755, + "grad_norm": 0.3416014611721039, + "learning_rate": 1.9899523317749256e-05, + "loss": 0.5924, + "step": 3386 + }, + { + "epoch": 0.09299835255354201, + "grad_norm": 0.3591092526912689, + "learning_rate": 1.989946223814576e-05, + "loss": 0.4471, + "step": 3387 + }, + { + "epoch": 0.09302580999450852, + "grad_norm": 0.3537389039993286, + "learning_rate": 1.9899401140076607e-05, + "loss": 0.6531, + "step": 3388 + }, + { + "epoch": 0.09305326743547501, + "grad_norm": 0.3484795093536377, + "learning_rate": 1.9899340023541912e-05, + "loss": 0.5751, + "step": 3389 + }, + { + "epoch": 0.09308072487644152, + "grad_norm": 0.38329634070396423, + "learning_rate": 1.9899278888541788e-05, + "loss": 0.5141, + "step": 3390 + }, + { + "epoch": 0.09310818231740801, + "grad_norm": 0.3362230956554413, + "learning_rate": 1.989921773507635e-05, + "loss": 0.4657, + "step": 3391 + }, + { + "epoch": 0.09313563975837452, + "grad_norm": 0.360150009393692, + "learning_rate": 1.9899156563145712e-05, + "loss": 0.5389, + "step": 3392 + }, + { + "epoch": 0.09316309719934102, + "grad_norm": 0.3663862347602844, + "learning_rate": 1.9899095372749984e-05, + "loss": 0.5404, + "step": 3393 + }, + { + "epoch": 0.09319055464030752, + "grad_norm": 0.38615378737449646, + "learning_rate": 1.9899034163889288e-05, + "loss": 0.5797, + "step": 3394 + }, + { + "epoch": 0.09321801208127403, + "grad_norm": 0.36257484555244446, + "learning_rate": 1.989897293656373e-05, + "loss": 0.5439, + "step": 3395 + }, + { + "epoch": 0.09324546952224053, + "grad_norm": 0.35438841581344604, + "learning_rate": 1.989891169077343e-05, + "loss": 0.5804, + "step": 3396 + }, + { + "epoch": 0.09327292696320703, + "grad_norm": 0.47881948947906494, + "learning_rate": 1.9898850426518504e-05, + "loss": 0.6015, + "step": 3397 + }, + { + "epoch": 0.09330038440417353, + "grad_norm": 0.3705235421657562, + "learning_rate": 1.989878914379906e-05, + "loss": 0.5405, + "step": 3398 + }, + { + "epoch": 0.09332784184514004, + "grad_norm": 0.3596845865249634, + "learning_rate": 1.9898727842615213e-05, + "loss": 0.5738, + "step": 3399 + }, + { + "epoch": 0.09335529928610653, + "grad_norm": 0.3623288571834564, + "learning_rate": 1.989866652296708e-05, + "loss": 0.4869, + "step": 3400 + }, + { + "epoch": 0.09338275672707304, + "grad_norm": 0.3373353183269501, + "learning_rate": 1.9898605184854773e-05, + "loss": 0.5091, + "step": 3401 + }, + { + "epoch": 0.09341021416803953, + "grad_norm": 0.35418644547462463, + "learning_rate": 1.9898543828278408e-05, + "loss": 0.4781, + "step": 3402 + }, + { + "epoch": 0.09343767160900604, + "grad_norm": 0.3418307900428772, + "learning_rate": 1.9898482453238108e-05, + "loss": 0.5739, + "step": 3403 + }, + { + "epoch": 0.09346512904997255, + "grad_norm": 0.38305917382240295, + "learning_rate": 1.9898421059733966e-05, + "loss": 0.6026, + "step": 3404 + }, + { + "epoch": 0.09349258649093904, + "grad_norm": 0.3696637451648712, + "learning_rate": 1.989835964776612e-05, + "loss": 0.6124, + "step": 3405 + }, + { + "epoch": 0.09352004393190555, + "grad_norm": 0.3388640880584717, + "learning_rate": 1.989829821733467e-05, + "loss": 0.5274, + "step": 3406 + }, + { + "epoch": 0.09354750137287204, + "grad_norm": 0.3434932231903076, + "learning_rate": 1.9898236768439735e-05, + "loss": 0.5355, + "step": 3407 + }, + { + "epoch": 0.09357495881383855, + "grad_norm": 0.360390841960907, + "learning_rate": 1.989817530108143e-05, + "loss": 0.5937, + "step": 3408 + }, + { + "epoch": 0.09360241625480505, + "grad_norm": 0.657248854637146, + "learning_rate": 1.9898113815259865e-05, + "loss": 0.6228, + "step": 3409 + }, + { + "epoch": 0.09362987369577155, + "grad_norm": 0.39952245354652405, + "learning_rate": 1.9898052310975164e-05, + "loss": 0.6106, + "step": 3410 + }, + { + "epoch": 0.09365733113673806, + "grad_norm": 0.3973071575164795, + "learning_rate": 1.989799078822743e-05, + "loss": 0.6205, + "step": 3411 + }, + { + "epoch": 0.09368478857770456, + "grad_norm": 0.34527167677879333, + "learning_rate": 1.989792924701679e-05, + "loss": 0.5285, + "step": 3412 + }, + { + "epoch": 0.09371224601867106, + "grad_norm": 2.2323694229125977, + "learning_rate": 1.989786768734335e-05, + "loss": 0.5093, + "step": 3413 + }, + { + "epoch": 0.09373970345963756, + "grad_norm": 0.4025181829929352, + "learning_rate": 1.989780610920723e-05, + "loss": 0.5928, + "step": 3414 + }, + { + "epoch": 0.09376716090060407, + "grad_norm": 0.35538190603256226, + "learning_rate": 1.9897744512608542e-05, + "loss": 0.5988, + "step": 3415 + }, + { + "epoch": 0.09379461834157056, + "grad_norm": 0.3549397587776184, + "learning_rate": 1.9897682897547402e-05, + "loss": 0.5257, + "step": 3416 + }, + { + "epoch": 0.09382207578253707, + "grad_norm": 0.3269728124141693, + "learning_rate": 1.9897621264023922e-05, + "loss": 0.4512, + "step": 3417 + }, + { + "epoch": 0.09384953322350358, + "grad_norm": 0.3744167983531952, + "learning_rate": 1.989755961203822e-05, + "loss": 0.6002, + "step": 3418 + }, + { + "epoch": 0.09387699066447007, + "grad_norm": 0.3477419912815094, + "learning_rate": 1.9897497941590412e-05, + "loss": 0.5013, + "step": 3419 + }, + { + "epoch": 0.09390444810543658, + "grad_norm": 0.33220696449279785, + "learning_rate": 1.989743625268061e-05, + "loss": 0.501, + "step": 3420 + }, + { + "epoch": 0.09393190554640307, + "grad_norm": 0.36729490756988525, + "learning_rate": 1.9897374545308928e-05, + "loss": 0.533, + "step": 3421 + }, + { + "epoch": 0.09395936298736958, + "grad_norm": 0.4926360547542572, + "learning_rate": 1.989731281947549e-05, + "loss": 0.5656, + "step": 3422 + }, + { + "epoch": 0.09398682042833607, + "grad_norm": 0.35383841395378113, + "learning_rate": 1.98972510751804e-05, + "loss": 0.5479, + "step": 3423 + }, + { + "epoch": 0.09401427786930258, + "grad_norm": 0.3999338448047638, + "learning_rate": 1.989718931242378e-05, + "loss": 0.5334, + "step": 3424 + }, + { + "epoch": 0.09404173531026909, + "grad_norm": 0.3541015684604645, + "learning_rate": 1.989712753120574e-05, + "loss": 0.5031, + "step": 3425 + }, + { + "epoch": 0.09406919275123558, + "grad_norm": 0.3791353106498718, + "learning_rate": 1.9897065731526402e-05, + "loss": 0.6643, + "step": 3426 + }, + { + "epoch": 0.09409665019220209, + "grad_norm": 0.38609379529953003, + "learning_rate": 1.9897003913385874e-05, + "loss": 0.5845, + "step": 3427 + }, + { + "epoch": 0.09412410763316859, + "grad_norm": 0.3451629877090454, + "learning_rate": 1.989694207678428e-05, + "loss": 0.514, + "step": 3428 + }, + { + "epoch": 0.0941515650741351, + "grad_norm": 0.31919342279434204, + "learning_rate": 1.9896880221721727e-05, + "loss": 0.5469, + "step": 3429 + }, + { + "epoch": 0.09417902251510159, + "grad_norm": 0.36091935634613037, + "learning_rate": 1.9896818348198336e-05, + "loss": 0.5712, + "step": 3430 + }, + { + "epoch": 0.0942064799560681, + "grad_norm": 0.3924546539783478, + "learning_rate": 1.9896756456214214e-05, + "loss": 0.5818, + "step": 3431 + }, + { + "epoch": 0.09423393739703459, + "grad_norm": 0.37218177318573, + "learning_rate": 1.9896694545769487e-05, + "loss": 0.5645, + "step": 3432 + }, + { + "epoch": 0.0942613948380011, + "grad_norm": 0.3264714479446411, + "learning_rate": 1.9896632616864266e-05, + "loss": 0.5092, + "step": 3433 + }, + { + "epoch": 0.0942888522789676, + "grad_norm": 0.36203232407569885, + "learning_rate": 1.989657066949867e-05, + "loss": 0.607, + "step": 3434 + }, + { + "epoch": 0.0943163097199341, + "grad_norm": 0.4394599497318268, + "learning_rate": 1.9896508703672804e-05, + "loss": 0.5357, + "step": 3435 + }, + { + "epoch": 0.09434376716090061, + "grad_norm": 0.37444010376930237, + "learning_rate": 1.9896446719386794e-05, + "loss": 0.5325, + "step": 3436 + }, + { + "epoch": 0.0943712246018671, + "grad_norm": 0.47420066595077515, + "learning_rate": 1.9896384716640754e-05, + "loss": 0.5821, + "step": 3437 + }, + { + "epoch": 0.09439868204283361, + "grad_norm": 0.37569135427474976, + "learning_rate": 1.9896322695434797e-05, + "loss": 0.5169, + "step": 3438 + }, + { + "epoch": 0.0944261394838001, + "grad_norm": 0.39137929677963257, + "learning_rate": 1.9896260655769037e-05, + "loss": 0.6035, + "step": 3439 + }, + { + "epoch": 0.09445359692476661, + "grad_norm": 0.3653644323348999, + "learning_rate": 1.9896198597643593e-05, + "loss": 0.6186, + "step": 3440 + }, + { + "epoch": 0.09448105436573312, + "grad_norm": 0.3543521761894226, + "learning_rate": 1.989613652105858e-05, + "loss": 0.5046, + "step": 3441 + }, + { + "epoch": 0.09450851180669961, + "grad_norm": 0.3402203619480133, + "learning_rate": 1.989607442601412e-05, + "loss": 0.542, + "step": 3442 + }, + { + "epoch": 0.09453596924766612, + "grad_norm": 0.39587920904159546, + "learning_rate": 1.989601231251032e-05, + "loss": 0.6175, + "step": 3443 + }, + { + "epoch": 0.09456342668863262, + "grad_norm": 0.37106457352638245, + "learning_rate": 1.9895950180547296e-05, + "loss": 0.508, + "step": 3444 + }, + { + "epoch": 0.09459088412959912, + "grad_norm": 0.39482274651527405, + "learning_rate": 1.9895888030125165e-05, + "loss": 0.5859, + "step": 3445 + }, + { + "epoch": 0.09461834157056562, + "grad_norm": 0.35948488116264343, + "learning_rate": 1.9895825861244048e-05, + "loss": 0.4445, + "step": 3446 + }, + { + "epoch": 0.09464579901153213, + "grad_norm": 0.3508606553077698, + "learning_rate": 1.9895763673904054e-05, + "loss": 0.539, + "step": 3447 + }, + { + "epoch": 0.09467325645249863, + "grad_norm": 0.35918480157852173, + "learning_rate": 1.9895701468105304e-05, + "loss": 0.4974, + "step": 3448 + }, + { + "epoch": 0.09470071389346513, + "grad_norm": 0.35035401582717896, + "learning_rate": 1.9895639243847914e-05, + "loss": 0.5751, + "step": 3449 + }, + { + "epoch": 0.09472817133443164, + "grad_norm": 0.35891908407211304, + "learning_rate": 1.9895577001131995e-05, + "loss": 0.5223, + "step": 3450 + }, + { + "epoch": 0.09475562877539813, + "grad_norm": 0.3968767523765564, + "learning_rate": 1.9895514739957667e-05, + "loss": 0.535, + "step": 3451 + }, + { + "epoch": 0.09478308621636464, + "grad_norm": 0.37734493613243103, + "learning_rate": 1.989545246032505e-05, + "loss": 0.6085, + "step": 3452 + }, + { + "epoch": 0.09481054365733113, + "grad_norm": 0.35918161273002625, + "learning_rate": 1.9895390162234255e-05, + "loss": 0.6522, + "step": 3453 + }, + { + "epoch": 0.09483800109829764, + "grad_norm": 0.49983954429626465, + "learning_rate": 1.98953278456854e-05, + "loss": 0.6168, + "step": 3454 + }, + { + "epoch": 0.09486545853926415, + "grad_norm": 0.3979390263557434, + "learning_rate": 1.9895265510678593e-05, + "loss": 0.5338, + "step": 3455 + }, + { + "epoch": 0.09489291598023064, + "grad_norm": 0.32399553060531616, + "learning_rate": 1.9895203157213964e-05, + "loss": 0.524, + "step": 3456 + }, + { + "epoch": 0.09492037342119715, + "grad_norm": 0.3515511155128479, + "learning_rate": 1.9895140785291623e-05, + "loss": 0.5191, + "step": 3457 + }, + { + "epoch": 0.09494783086216364, + "grad_norm": 0.3620997965335846, + "learning_rate": 1.9895078394911685e-05, + "loss": 0.5637, + "step": 3458 + }, + { + "epoch": 0.09497528830313015, + "grad_norm": 0.36275407671928406, + "learning_rate": 1.9895015986074266e-05, + "loss": 0.6176, + "step": 3459 + }, + { + "epoch": 0.09500274574409664, + "grad_norm": 0.4276838004589081, + "learning_rate": 1.9894953558779483e-05, + "loss": 0.6303, + "step": 3460 + }, + { + "epoch": 0.09503020318506315, + "grad_norm": 0.43101316690444946, + "learning_rate": 1.9894891113027457e-05, + "loss": 0.7029, + "step": 3461 + }, + { + "epoch": 0.09505766062602966, + "grad_norm": 0.9723101854324341, + "learning_rate": 1.98948286488183e-05, + "loss": 0.5072, + "step": 3462 + }, + { + "epoch": 0.09508511806699615, + "grad_norm": 0.3556780219078064, + "learning_rate": 1.989476616615213e-05, + "loss": 0.6366, + "step": 3463 + }, + { + "epoch": 0.09511257550796266, + "grad_norm": 0.3611419200897217, + "learning_rate": 1.9894703665029063e-05, + "loss": 0.5442, + "step": 3464 + }, + { + "epoch": 0.09514003294892916, + "grad_norm": 0.33991414308547974, + "learning_rate": 1.9894641145449218e-05, + "loss": 0.5348, + "step": 3465 + }, + { + "epoch": 0.09516749038989566, + "grad_norm": 0.4211452305316925, + "learning_rate": 1.98945786074127e-05, + "loss": 0.6589, + "step": 3466 + }, + { + "epoch": 0.09519494783086216, + "grad_norm": 0.42497605085372925, + "learning_rate": 1.9894516050919644e-05, + "loss": 0.5349, + "step": 3467 + }, + { + "epoch": 0.09522240527182867, + "grad_norm": 0.3140662610530853, + "learning_rate": 1.9894453475970157e-05, + "loss": 0.5303, + "step": 3468 + }, + { + "epoch": 0.09524986271279516, + "grad_norm": 0.33659735321998596, + "learning_rate": 1.9894390882564353e-05, + "loss": 0.4863, + "step": 3469 + }, + { + "epoch": 0.09527732015376167, + "grad_norm": 0.3719843626022339, + "learning_rate": 1.9894328270702355e-05, + "loss": 0.5224, + "step": 3470 + }, + { + "epoch": 0.09530477759472818, + "grad_norm": 0.44416165351867676, + "learning_rate": 1.9894265640384275e-05, + "loss": 0.6584, + "step": 3471 + }, + { + "epoch": 0.09533223503569467, + "grad_norm": 0.36652106046676636, + "learning_rate": 1.989420299161023e-05, + "loss": 0.5424, + "step": 3472 + }, + { + "epoch": 0.09535969247666118, + "grad_norm": 0.38638192415237427, + "learning_rate": 1.9894140324380343e-05, + "loss": 0.5322, + "step": 3473 + }, + { + "epoch": 0.09538714991762767, + "grad_norm": 0.3891766667366028, + "learning_rate": 1.989407763869472e-05, + "loss": 0.542, + "step": 3474 + }, + { + "epoch": 0.09541460735859418, + "grad_norm": 0.34113895893096924, + "learning_rate": 1.989401493455349e-05, + "loss": 0.5389, + "step": 3475 + }, + { + "epoch": 0.09544206479956067, + "grad_norm": 0.34819495677948, + "learning_rate": 1.9893952211956763e-05, + "loss": 0.5945, + "step": 3476 + }, + { + "epoch": 0.09546952224052718, + "grad_norm": 0.33019593358039856, + "learning_rate": 1.9893889470904656e-05, + "loss": 0.6183, + "step": 3477 + }, + { + "epoch": 0.09549697968149369, + "grad_norm": 0.3333442509174347, + "learning_rate": 1.9893826711397287e-05, + "loss": 0.491, + "step": 3478 + }, + { + "epoch": 0.09552443712246018, + "grad_norm": 0.32975566387176514, + "learning_rate": 1.9893763933434775e-05, + "loss": 0.5944, + "step": 3479 + }, + { + "epoch": 0.09555189456342669, + "grad_norm": 0.3307519257068634, + "learning_rate": 1.9893701137017237e-05, + "loss": 0.5, + "step": 3480 + }, + { + "epoch": 0.09557935200439319, + "grad_norm": 0.3824635148048401, + "learning_rate": 1.989363832214479e-05, + "loss": 0.5898, + "step": 3481 + }, + { + "epoch": 0.0956068094453597, + "grad_norm": 0.3317599594593048, + "learning_rate": 1.9893575488817544e-05, + "loss": 0.4722, + "step": 3482 + }, + { + "epoch": 0.09563426688632619, + "grad_norm": 0.3494565486907959, + "learning_rate": 1.9893512637035622e-05, + "loss": 0.5706, + "step": 3483 + }, + { + "epoch": 0.0956617243272927, + "grad_norm": 0.37023550271987915, + "learning_rate": 1.9893449766799146e-05, + "loss": 0.4855, + "step": 3484 + }, + { + "epoch": 0.0956891817682592, + "grad_norm": 0.3768095374107361, + "learning_rate": 1.9893386878108228e-05, + "loss": 0.5134, + "step": 3485 + }, + { + "epoch": 0.0957166392092257, + "grad_norm": 0.5182499289512634, + "learning_rate": 1.9893323970962983e-05, + "loss": 0.5072, + "step": 3486 + }, + { + "epoch": 0.0957440966501922, + "grad_norm": 0.3611791133880615, + "learning_rate": 1.9893261045363535e-05, + "loss": 0.5091, + "step": 3487 + }, + { + "epoch": 0.0957715540911587, + "grad_norm": 0.3808910548686981, + "learning_rate": 1.9893198101309995e-05, + "loss": 0.5811, + "step": 3488 + }, + { + "epoch": 0.09579901153212521, + "grad_norm": 0.45492175221443176, + "learning_rate": 1.9893135138802483e-05, + "loss": 0.6198, + "step": 3489 + }, + { + "epoch": 0.0958264689730917, + "grad_norm": 0.32423749566078186, + "learning_rate": 1.9893072157841118e-05, + "loss": 0.491, + "step": 3490 + }, + { + "epoch": 0.09585392641405821, + "grad_norm": 0.3587932586669922, + "learning_rate": 1.9893009158426012e-05, + "loss": 0.532, + "step": 3491 + }, + { + "epoch": 0.09588138385502472, + "grad_norm": 0.3773055672645569, + "learning_rate": 1.989294614055729e-05, + "loss": 0.5544, + "step": 3492 + }, + { + "epoch": 0.09590884129599121, + "grad_norm": 1.1692163944244385, + "learning_rate": 1.9892883104235065e-05, + "loss": 0.5606, + "step": 3493 + }, + { + "epoch": 0.09593629873695772, + "grad_norm": 0.37721046805381775, + "learning_rate": 1.989282004945946e-05, + "loss": 0.6271, + "step": 3494 + }, + { + "epoch": 0.09596375617792421, + "grad_norm": 0.34470996260643005, + "learning_rate": 1.989275697623058e-05, + "loss": 0.5518, + "step": 3495 + }, + { + "epoch": 0.09599121361889072, + "grad_norm": 0.4128940999507904, + "learning_rate": 1.9892693884548556e-05, + "loss": 0.5795, + "step": 3496 + }, + { + "epoch": 0.09601867105985722, + "grad_norm": 0.39666882157325745, + "learning_rate": 1.9892630774413498e-05, + "loss": 0.6227, + "step": 3497 + }, + { + "epoch": 0.09604612850082372, + "grad_norm": 0.3994831442832947, + "learning_rate": 1.9892567645825525e-05, + "loss": 0.5109, + "step": 3498 + }, + { + "epoch": 0.09607358594179022, + "grad_norm": 0.35274678468704224, + "learning_rate": 1.989250449878476e-05, + "loss": 0.5128, + "step": 3499 + }, + { + "epoch": 0.09610104338275673, + "grad_norm": 0.3613601326942444, + "learning_rate": 1.9892441333291315e-05, + "loss": 0.5863, + "step": 3500 + }, + { + "epoch": 0.09612850082372323, + "grad_norm": 0.3756472170352936, + "learning_rate": 1.989237814934531e-05, + "loss": 0.6302, + "step": 3501 + }, + { + "epoch": 0.09615595826468973, + "grad_norm": 0.4070020616054535, + "learning_rate": 1.9892314946946864e-05, + "loss": 0.6044, + "step": 3502 + }, + { + "epoch": 0.09618341570565624, + "grad_norm": 0.34237125515937805, + "learning_rate": 1.9892251726096092e-05, + "loss": 0.5237, + "step": 3503 + }, + { + "epoch": 0.09621087314662273, + "grad_norm": 0.3345862329006195, + "learning_rate": 1.9892188486793114e-05, + "loss": 0.5011, + "step": 3504 + }, + { + "epoch": 0.09623833058758924, + "grad_norm": 0.3565434515476227, + "learning_rate": 1.9892125229038045e-05, + "loss": 0.4762, + "step": 3505 + }, + { + "epoch": 0.09626578802855573, + "grad_norm": 0.368267685174942, + "learning_rate": 1.9892061952831007e-05, + "loss": 0.5024, + "step": 3506 + }, + { + "epoch": 0.09629324546952224, + "grad_norm": 0.36739382147789, + "learning_rate": 1.9891998658172115e-05, + "loss": 0.6527, + "step": 3507 + }, + { + "epoch": 0.09632070291048875, + "grad_norm": 0.504655122756958, + "learning_rate": 1.989193534506149e-05, + "loss": 0.6182, + "step": 3508 + }, + { + "epoch": 0.09634816035145524, + "grad_norm": 0.34144777059555054, + "learning_rate": 1.9891872013499247e-05, + "loss": 0.466, + "step": 3509 + }, + { + "epoch": 0.09637561779242175, + "grad_norm": 0.37317395210266113, + "learning_rate": 1.989180866348551e-05, + "loss": 0.5797, + "step": 3510 + }, + { + "epoch": 0.09640307523338824, + "grad_norm": 0.35044172406196594, + "learning_rate": 1.9891745295020387e-05, + "loss": 0.623, + "step": 3511 + }, + { + "epoch": 0.09643053267435475, + "grad_norm": 0.4684470295906067, + "learning_rate": 1.9891681908104005e-05, + "loss": 0.5474, + "step": 3512 + }, + { + "epoch": 0.09645799011532125, + "grad_norm": 0.38734039664268494, + "learning_rate": 1.9891618502736477e-05, + "loss": 0.4653, + "step": 3513 + }, + { + "epoch": 0.09648544755628775, + "grad_norm": 0.3316654562950134, + "learning_rate": 1.9891555078917922e-05, + "loss": 0.4967, + "step": 3514 + }, + { + "epoch": 0.09651290499725426, + "grad_norm": 0.34695619344711304, + "learning_rate": 1.9891491636648465e-05, + "loss": 0.4539, + "step": 3515 + }, + { + "epoch": 0.09654036243822076, + "grad_norm": 0.3590364456176758, + "learning_rate": 1.9891428175928215e-05, + "loss": 0.5704, + "step": 3516 + }, + { + "epoch": 0.09656781987918726, + "grad_norm": 0.36466220021247864, + "learning_rate": 1.9891364696757297e-05, + "loss": 0.4685, + "step": 3517 + }, + { + "epoch": 0.09659527732015376, + "grad_norm": 0.3801593780517578, + "learning_rate": 1.9891301199135826e-05, + "loss": 0.5702, + "step": 3518 + }, + { + "epoch": 0.09662273476112027, + "grad_norm": 0.4039519429206848, + "learning_rate": 1.989123768306392e-05, + "loss": 0.5627, + "step": 3519 + }, + { + "epoch": 0.09665019220208676, + "grad_norm": 0.33174341917037964, + "learning_rate": 1.98911741485417e-05, + "loss": 0.4603, + "step": 3520 + }, + { + "epoch": 0.09667764964305327, + "grad_norm": 0.4142787754535675, + "learning_rate": 1.9891110595569283e-05, + "loss": 0.5153, + "step": 3521 + }, + { + "epoch": 0.09670510708401978, + "grad_norm": 0.3311069905757904, + "learning_rate": 1.9891047024146787e-05, + "loss": 0.5475, + "step": 3522 + }, + { + "epoch": 0.09673256452498627, + "grad_norm": 0.45340561866760254, + "learning_rate": 1.9890983434274334e-05, + "loss": 0.613, + "step": 3523 + }, + { + "epoch": 0.09676002196595278, + "grad_norm": 0.35684454441070557, + "learning_rate": 1.9890919825952037e-05, + "loss": 0.5186, + "step": 3524 + }, + { + "epoch": 0.09678747940691927, + "grad_norm": 0.3558177053928375, + "learning_rate": 1.989085619918002e-05, + "loss": 0.4547, + "step": 3525 + }, + { + "epoch": 0.09681493684788578, + "grad_norm": 0.49769270420074463, + "learning_rate": 1.98907925539584e-05, + "loss": 0.5751, + "step": 3526 + }, + { + "epoch": 0.09684239428885227, + "grad_norm": 0.35300588607788086, + "learning_rate": 1.989072889028729e-05, + "loss": 0.5881, + "step": 3527 + }, + { + "epoch": 0.09686985172981878, + "grad_norm": 0.37353241443634033, + "learning_rate": 1.989066520816682e-05, + "loss": 0.5883, + "step": 3528 + }, + { + "epoch": 0.09689730917078529, + "grad_norm": 0.3843049705028534, + "learning_rate": 1.98906015075971e-05, + "loss": 0.6096, + "step": 3529 + }, + { + "epoch": 0.09692476661175178, + "grad_norm": 0.33402693271636963, + "learning_rate": 1.989053778857825e-05, + "loss": 0.5129, + "step": 3530 + }, + { + "epoch": 0.09695222405271829, + "grad_norm": 0.35944026708602905, + "learning_rate": 1.9890474051110396e-05, + "loss": 0.5773, + "step": 3531 + }, + { + "epoch": 0.09697968149368479, + "grad_norm": 0.35858574509620667, + "learning_rate": 1.9890410295193648e-05, + "loss": 0.5511, + "step": 3532 + }, + { + "epoch": 0.0970071389346513, + "grad_norm": 0.37889590859413147, + "learning_rate": 1.9890346520828126e-05, + "loss": 0.5949, + "step": 3533 + }, + { + "epoch": 0.09703459637561779, + "grad_norm": 0.41988304257392883, + "learning_rate": 1.989028272801395e-05, + "loss": 0.5523, + "step": 3534 + }, + { + "epoch": 0.0970620538165843, + "grad_norm": 0.3533228039741516, + "learning_rate": 1.9890218916751247e-05, + "loss": 0.4894, + "step": 3535 + }, + { + "epoch": 0.09708951125755079, + "grad_norm": 0.3477717339992523, + "learning_rate": 1.9890155087040125e-05, + "loss": 0.5775, + "step": 3536 + }, + { + "epoch": 0.0971169686985173, + "grad_norm": 0.34496837854385376, + "learning_rate": 1.9890091238880706e-05, + "loss": 0.5502, + "step": 3537 + }, + { + "epoch": 0.0971444261394838, + "grad_norm": 0.33033570647239685, + "learning_rate": 1.989002737227311e-05, + "loss": 0.4924, + "step": 3538 + }, + { + "epoch": 0.0971718835804503, + "grad_norm": 0.34897157549858093, + "learning_rate": 1.9889963487217457e-05, + "loss": 0.614, + "step": 3539 + }, + { + "epoch": 0.09719934102141681, + "grad_norm": 0.3401179015636444, + "learning_rate": 1.9889899583713868e-05, + "loss": 0.532, + "step": 3540 + }, + { + "epoch": 0.0972267984623833, + "grad_norm": 0.37104567885398865, + "learning_rate": 1.9889835661762457e-05, + "loss": 0.5563, + "step": 3541 + }, + { + "epoch": 0.09725425590334981, + "grad_norm": 0.38156986236572266, + "learning_rate": 1.988977172136335e-05, + "loss": 0.641, + "step": 3542 + }, + { + "epoch": 0.0972817133443163, + "grad_norm": 0.34695109724998474, + "learning_rate": 1.988970776251666e-05, + "loss": 0.6138, + "step": 3543 + }, + { + "epoch": 0.09730917078528281, + "grad_norm": 0.34380486607551575, + "learning_rate": 1.9889643785222505e-05, + "loss": 0.558, + "step": 3544 + }, + { + "epoch": 0.09733662822624932, + "grad_norm": 0.32578280568122864, + "learning_rate": 1.9889579789481012e-05, + "loss": 0.5645, + "step": 3545 + }, + { + "epoch": 0.09736408566721581, + "grad_norm": 0.3539084196090698, + "learning_rate": 1.9889515775292297e-05, + "loss": 0.5663, + "step": 3546 + }, + { + "epoch": 0.09739154310818232, + "grad_norm": 0.3522771894931793, + "learning_rate": 1.9889451742656475e-05, + "loss": 0.6001, + "step": 3547 + }, + { + "epoch": 0.09741900054914882, + "grad_norm": 0.3829791247844696, + "learning_rate": 1.988938769157367e-05, + "loss": 0.421, + "step": 3548 + }, + { + "epoch": 0.09744645799011532, + "grad_norm": 0.48487117886543274, + "learning_rate": 1.9889323622044e-05, + "loss": 0.5747, + "step": 3549 + }, + { + "epoch": 0.09747391543108182, + "grad_norm": 0.38056084513664246, + "learning_rate": 1.988925953406759e-05, + "loss": 0.6018, + "step": 3550 + }, + { + "epoch": 0.09750137287204833, + "grad_norm": 0.44457364082336426, + "learning_rate": 1.988919542764455e-05, + "loss": 0.5126, + "step": 3551 + }, + { + "epoch": 0.09752883031301483, + "grad_norm": 0.4096907675266266, + "learning_rate": 1.9889131302775007e-05, + "loss": 0.6592, + "step": 3552 + }, + { + "epoch": 0.09755628775398133, + "grad_norm": 0.359571635723114, + "learning_rate": 1.9889067159459077e-05, + "loss": 0.5318, + "step": 3553 + }, + { + "epoch": 0.09758374519494784, + "grad_norm": 0.3630951941013336, + "learning_rate": 1.988900299769688e-05, + "loss": 0.5161, + "step": 3554 + }, + { + "epoch": 0.09761120263591433, + "grad_norm": 0.3847862184047699, + "learning_rate": 1.9888938817488536e-05, + "loss": 0.5894, + "step": 3555 + }, + { + "epoch": 0.09763866007688084, + "grad_norm": 0.3550190031528473, + "learning_rate": 1.9888874618834168e-05, + "loss": 0.5395, + "step": 3556 + }, + { + "epoch": 0.09766611751784733, + "grad_norm": 0.33573204278945923, + "learning_rate": 1.988881040173389e-05, + "loss": 0.5234, + "step": 3557 + }, + { + "epoch": 0.09769357495881384, + "grad_norm": 0.38134053349494934, + "learning_rate": 1.988874616618782e-05, + "loss": 0.6045, + "step": 3558 + }, + { + "epoch": 0.09772103239978035, + "grad_norm": 0.38448596000671387, + "learning_rate": 1.988868191219609e-05, + "loss": 0.5224, + "step": 3559 + }, + { + "epoch": 0.09774848984074684, + "grad_norm": 0.3896487057209015, + "learning_rate": 1.988861763975881e-05, + "loss": 0.5937, + "step": 3560 + }, + { + "epoch": 0.09777594728171335, + "grad_norm": 0.37976008653640747, + "learning_rate": 1.98885533488761e-05, + "loss": 0.5414, + "step": 3561 + }, + { + "epoch": 0.09780340472267984, + "grad_norm": 0.36724749207496643, + "learning_rate": 1.988848903954808e-05, + "loss": 0.5818, + "step": 3562 + }, + { + "epoch": 0.09783086216364635, + "grad_norm": 0.35942816734313965, + "learning_rate": 1.9888424711774877e-05, + "loss": 0.5625, + "step": 3563 + }, + { + "epoch": 0.09785831960461285, + "grad_norm": 0.3793608844280243, + "learning_rate": 1.9888360365556602e-05, + "loss": 0.6834, + "step": 3564 + }, + { + "epoch": 0.09788577704557935, + "grad_norm": 0.4071831703186035, + "learning_rate": 1.9888296000893382e-05, + "loss": 0.5579, + "step": 3565 + }, + { + "epoch": 0.09791323448654585, + "grad_norm": 0.4291229546070099, + "learning_rate": 1.9888231617785332e-05, + "loss": 0.5512, + "step": 3566 + }, + { + "epoch": 0.09794069192751235, + "grad_norm": 0.33545809984207153, + "learning_rate": 1.9888167216232573e-05, + "loss": 0.4754, + "step": 3567 + }, + { + "epoch": 0.09796814936847886, + "grad_norm": 0.3911801874637604, + "learning_rate": 1.988810279623523e-05, + "loss": 0.4978, + "step": 3568 + }, + { + "epoch": 0.09799560680944536, + "grad_norm": 0.3877221345901489, + "learning_rate": 1.988803835779342e-05, + "loss": 0.4528, + "step": 3569 + }, + { + "epoch": 0.09802306425041186, + "grad_norm": 0.37875303626060486, + "learning_rate": 1.9887973900907254e-05, + "loss": 0.5478, + "step": 3570 + }, + { + "epoch": 0.09805052169137836, + "grad_norm": 0.3422386646270752, + "learning_rate": 1.988790942557687e-05, + "loss": 0.5802, + "step": 3571 + }, + { + "epoch": 0.09807797913234487, + "grad_norm": 0.36366236209869385, + "learning_rate": 1.9887844931802375e-05, + "loss": 0.6156, + "step": 3572 + }, + { + "epoch": 0.09810543657331136, + "grad_norm": 0.44602468609809875, + "learning_rate": 1.9887780419583894e-05, + "loss": 0.4868, + "step": 3573 + }, + { + "epoch": 0.09813289401427787, + "grad_norm": 0.3724503219127655, + "learning_rate": 1.9887715888921546e-05, + "loss": 0.545, + "step": 3574 + }, + { + "epoch": 0.09816035145524438, + "grad_norm": 0.4117805063724518, + "learning_rate": 1.9887651339815455e-05, + "loss": 0.5867, + "step": 3575 + }, + { + "epoch": 0.09818780889621087, + "grad_norm": 0.3923654854297638, + "learning_rate": 1.9887586772265736e-05, + "loss": 0.5777, + "step": 3576 + }, + { + "epoch": 0.09821526633717738, + "grad_norm": 0.41430962085723877, + "learning_rate": 1.988752218627251e-05, + "loss": 0.6211, + "step": 3577 + }, + { + "epoch": 0.09824272377814387, + "grad_norm": 0.37244170904159546, + "learning_rate": 1.98874575818359e-05, + "loss": 0.5153, + "step": 3578 + }, + { + "epoch": 0.09827018121911038, + "grad_norm": 0.3664463460445404, + "learning_rate": 1.9887392958956032e-05, + "loss": 0.497, + "step": 3579 + }, + { + "epoch": 0.09829763866007687, + "grad_norm": 0.39220693707466125, + "learning_rate": 1.9887328317633013e-05, + "loss": 0.5767, + "step": 3580 + }, + { + "epoch": 0.09832509610104338, + "grad_norm": 0.39501598477363586, + "learning_rate": 1.9887263657866974e-05, + "loss": 0.4803, + "step": 3581 + }, + { + "epoch": 0.09835255354200989, + "grad_norm": 0.3672422766685486, + "learning_rate": 1.988719897965803e-05, + "loss": 0.5547, + "step": 3582 + }, + { + "epoch": 0.09838001098297638, + "grad_norm": 0.3282439410686493, + "learning_rate": 1.988713428300631e-05, + "loss": 0.4763, + "step": 3583 + }, + { + "epoch": 0.09840746842394289, + "grad_norm": 0.3634583353996277, + "learning_rate": 1.988706956791193e-05, + "loss": 0.6102, + "step": 3584 + }, + { + "epoch": 0.09843492586490939, + "grad_norm": 0.6410273313522339, + "learning_rate": 1.9887004834375e-05, + "loss": 0.535, + "step": 3585 + }, + { + "epoch": 0.0984623833058759, + "grad_norm": 0.6124294996261597, + "learning_rate": 1.988694008239566e-05, + "loss": 0.5549, + "step": 3586 + }, + { + "epoch": 0.09848984074684239, + "grad_norm": 0.33909326791763306, + "learning_rate": 1.9886875311974014e-05, + "loss": 0.5354, + "step": 3587 + }, + { + "epoch": 0.0985172981878089, + "grad_norm": 0.3788776695728302, + "learning_rate": 1.9886810523110192e-05, + "loss": 0.5931, + "step": 3588 + }, + { + "epoch": 0.0985447556287754, + "grad_norm": 0.4040602445602417, + "learning_rate": 1.9886745715804315e-05, + "loss": 0.5245, + "step": 3589 + }, + { + "epoch": 0.0985722130697419, + "grad_norm": 0.34940508008003235, + "learning_rate": 1.98866808900565e-05, + "loss": 0.4769, + "step": 3590 + }, + { + "epoch": 0.0985996705107084, + "grad_norm": 0.3336066007614136, + "learning_rate": 1.9886616045866872e-05, + "loss": 0.426, + "step": 3591 + }, + { + "epoch": 0.0986271279516749, + "grad_norm": 0.36807960271835327, + "learning_rate": 1.9886551183235547e-05, + "loss": 0.5722, + "step": 3592 + }, + { + "epoch": 0.09865458539264141, + "grad_norm": 0.3348952531814575, + "learning_rate": 1.988648630216265e-05, + "loss": 0.5907, + "step": 3593 + }, + { + "epoch": 0.0986820428336079, + "grad_norm": 0.3802056610584259, + "learning_rate": 1.9886421402648303e-05, + "loss": 0.538, + "step": 3594 + }, + { + "epoch": 0.09870950027457441, + "grad_norm": 0.3609468936920166, + "learning_rate": 1.9886356484692618e-05, + "loss": 0.5057, + "step": 3595 + }, + { + "epoch": 0.09873695771554092, + "grad_norm": 0.3289881646633148, + "learning_rate": 1.9886291548295728e-05, + "loss": 0.5684, + "step": 3596 + }, + { + "epoch": 0.09876441515650741, + "grad_norm": 0.3743343949317932, + "learning_rate": 1.9886226593457748e-05, + "loss": 0.5325, + "step": 3597 + }, + { + "epoch": 0.09879187259747392, + "grad_norm": 0.3631688058376312, + "learning_rate": 1.98861616201788e-05, + "loss": 0.5581, + "step": 3598 + }, + { + "epoch": 0.09881933003844041, + "grad_norm": 0.3475742042064667, + "learning_rate": 1.9886096628459004e-05, + "loss": 0.4997, + "step": 3599 + }, + { + "epoch": 0.09884678747940692, + "grad_norm": 0.39240872859954834, + "learning_rate": 1.9886031618298483e-05, + "loss": 0.5524, + "step": 3600 + }, + { + "epoch": 0.09887424492037342, + "grad_norm": 0.3526679277420044, + "learning_rate": 1.988596658969736e-05, + "loss": 0.4828, + "step": 3601 + }, + { + "epoch": 0.09890170236133992, + "grad_norm": 0.407236248254776, + "learning_rate": 1.9885901542655752e-05, + "loss": 0.5865, + "step": 3602 + }, + { + "epoch": 0.09892915980230642, + "grad_norm": 0.36110520362854004, + "learning_rate": 1.9885836477173782e-05, + "loss": 0.5074, + "step": 3603 + }, + { + "epoch": 0.09895661724327293, + "grad_norm": 0.3415331542491913, + "learning_rate": 1.9885771393251572e-05, + "loss": 0.5664, + "step": 3604 + }, + { + "epoch": 0.09898407468423943, + "grad_norm": 0.34186282753944397, + "learning_rate": 1.9885706290889245e-05, + "loss": 0.5343, + "step": 3605 + }, + { + "epoch": 0.09901153212520593, + "grad_norm": 0.39023756980895996, + "learning_rate": 1.988564117008692e-05, + "loss": 0.5583, + "step": 3606 + }, + { + "epoch": 0.09903898956617244, + "grad_norm": 0.36189743876457214, + "learning_rate": 1.988557603084472e-05, + "loss": 0.5136, + "step": 3607 + }, + { + "epoch": 0.09906644700713893, + "grad_norm": 0.37914222478866577, + "learning_rate": 1.9885510873162764e-05, + "loss": 0.561, + "step": 3608 + }, + { + "epoch": 0.09909390444810544, + "grad_norm": 0.3526715338230133, + "learning_rate": 1.9885445697041174e-05, + "loss": 0.6085, + "step": 3609 + }, + { + "epoch": 0.09912136188907193, + "grad_norm": 0.36381399631500244, + "learning_rate": 1.9885380502480073e-05, + "loss": 0.5324, + "step": 3610 + }, + { + "epoch": 0.09914881933003844, + "grad_norm": 0.4069061577320099, + "learning_rate": 1.9885315289479587e-05, + "loss": 0.5786, + "step": 3611 + }, + { + "epoch": 0.09917627677100495, + "grad_norm": 0.4079681634902954, + "learning_rate": 1.9885250058039827e-05, + "loss": 0.5896, + "step": 3612 + }, + { + "epoch": 0.09920373421197144, + "grad_norm": 0.4059285819530487, + "learning_rate": 1.9885184808160925e-05, + "loss": 0.5825, + "step": 3613 + }, + { + "epoch": 0.09923119165293795, + "grad_norm": 0.3730524182319641, + "learning_rate": 1.9885119539842994e-05, + "loss": 0.5691, + "step": 3614 + }, + { + "epoch": 0.09925864909390444, + "grad_norm": 0.3590240180492401, + "learning_rate": 1.988505425308616e-05, + "loss": 0.5598, + "step": 3615 + }, + { + "epoch": 0.09928610653487095, + "grad_norm": 0.3533720374107361, + "learning_rate": 1.988498894789055e-05, + "loss": 0.6012, + "step": 3616 + }, + { + "epoch": 0.09931356397583745, + "grad_norm": 0.33175280690193176, + "learning_rate": 1.988492362425628e-05, + "loss": 0.5197, + "step": 3617 + }, + { + "epoch": 0.09934102141680395, + "grad_norm": 0.37959831953048706, + "learning_rate": 1.9884858282183468e-05, + "loss": 0.5025, + "step": 3618 + }, + { + "epoch": 0.09936847885777046, + "grad_norm": 0.340808242559433, + "learning_rate": 1.988479292167224e-05, + "loss": 0.5095, + "step": 3619 + }, + { + "epoch": 0.09939593629873696, + "grad_norm": 0.36388471722602844, + "learning_rate": 1.9884727542722724e-05, + "loss": 0.4468, + "step": 3620 + }, + { + "epoch": 0.09942339373970346, + "grad_norm": 0.3724011778831482, + "learning_rate": 1.9884662145335033e-05, + "loss": 0.614, + "step": 3621 + }, + { + "epoch": 0.09945085118066996, + "grad_norm": 0.34433940052986145, + "learning_rate": 1.9884596729509293e-05, + "loss": 0.509, + "step": 3622 + }, + { + "epoch": 0.09947830862163647, + "grad_norm": 0.458732932806015, + "learning_rate": 1.9884531295245626e-05, + "loss": 0.5681, + "step": 3623 + }, + { + "epoch": 0.09950576606260296, + "grad_norm": 0.4233742952346802, + "learning_rate": 1.988446584254415e-05, + "loss": 0.5868, + "step": 3624 + }, + { + "epoch": 0.09953322350356947, + "grad_norm": 0.3689969480037689, + "learning_rate": 1.9884400371404996e-05, + "loss": 0.5987, + "step": 3625 + }, + { + "epoch": 0.09956068094453598, + "grad_norm": 0.38740184903144836, + "learning_rate": 1.9884334881828276e-05, + "loss": 0.5966, + "step": 3626 + }, + { + "epoch": 0.09958813838550247, + "grad_norm": 0.36507171392440796, + "learning_rate": 1.988426937381412e-05, + "loss": 0.4808, + "step": 3627 + }, + { + "epoch": 0.09961559582646898, + "grad_norm": 0.3502161204814911, + "learning_rate": 1.9884203847362643e-05, + "loss": 0.5885, + "step": 3628 + }, + { + "epoch": 0.09964305326743547, + "grad_norm": 0.4651767611503601, + "learning_rate": 1.9884138302473974e-05, + "loss": 0.7005, + "step": 3629 + }, + { + "epoch": 0.09967051070840198, + "grad_norm": 0.3845900893211365, + "learning_rate": 1.988407273914823e-05, + "loss": 0.5759, + "step": 3630 + }, + { + "epoch": 0.09969796814936847, + "grad_norm": 0.36267364025115967, + "learning_rate": 1.988400715738554e-05, + "loss": 0.5762, + "step": 3631 + }, + { + "epoch": 0.09972542559033498, + "grad_norm": 0.3803366422653198, + "learning_rate": 1.9883941557186018e-05, + "loss": 0.5841, + "step": 3632 + }, + { + "epoch": 0.09975288303130148, + "grad_norm": 0.3361320495605469, + "learning_rate": 1.988387593854979e-05, + "loss": 0.5275, + "step": 3633 + }, + { + "epoch": 0.09978034047226798, + "grad_norm": 0.39372843503952026, + "learning_rate": 1.988381030147698e-05, + "loss": 0.5754, + "step": 3634 + }, + { + "epoch": 0.09980779791323449, + "grad_norm": 0.33151230216026306, + "learning_rate": 1.9883744645967713e-05, + "loss": 0.508, + "step": 3635 + }, + { + "epoch": 0.09983525535420099, + "grad_norm": 0.3384150266647339, + "learning_rate": 1.9883678972022105e-05, + "loss": 0.5156, + "step": 3636 + }, + { + "epoch": 0.0998627127951675, + "grad_norm": 0.4019555151462555, + "learning_rate": 1.988361327964028e-05, + "loss": 0.4695, + "step": 3637 + }, + { + "epoch": 0.09989017023613399, + "grad_norm": 0.37466368079185486, + "learning_rate": 1.9883547568822362e-05, + "loss": 0.5385, + "step": 3638 + }, + { + "epoch": 0.0999176276771005, + "grad_norm": 0.4065605401992798, + "learning_rate": 1.9883481839568474e-05, + "loss": 0.5753, + "step": 3639 + }, + { + "epoch": 0.09994508511806699, + "grad_norm": 0.36632034182548523, + "learning_rate": 1.988341609187874e-05, + "loss": 0.5504, + "step": 3640 + }, + { + "epoch": 0.0999725425590335, + "grad_norm": 0.32964998483657837, + "learning_rate": 1.9883350325753276e-05, + "loss": 0.5133, + "step": 3641 + }, + { + "epoch": 0.1, + "grad_norm": 0.3609088361263275, + "learning_rate": 1.9883284541192213e-05, + "loss": 0.5451, + "step": 3642 + }, + { + "epoch": 0.1000274574409665, + "grad_norm": 0.3225267231464386, + "learning_rate": 1.9883218738195667e-05, + "loss": 0.4846, + "step": 3643 + }, + { + "epoch": 0.10005491488193301, + "grad_norm": 0.3798198103904724, + "learning_rate": 1.9883152916763767e-05, + "loss": 0.6001, + "step": 3644 + }, + { + "epoch": 0.1000823723228995, + "grad_norm": 0.34675946831703186, + "learning_rate": 1.988308707689663e-05, + "loss": 0.508, + "step": 3645 + }, + { + "epoch": 0.10010982976386601, + "grad_norm": 0.4197950065135956, + "learning_rate": 1.9883021218594382e-05, + "loss": 0.5485, + "step": 3646 + }, + { + "epoch": 0.1001372872048325, + "grad_norm": 0.35124471783638, + "learning_rate": 1.9882955341857144e-05, + "loss": 0.5457, + "step": 3647 + }, + { + "epoch": 0.10016474464579901, + "grad_norm": 0.40457579493522644, + "learning_rate": 1.9882889446685043e-05, + "loss": 0.6387, + "step": 3648 + }, + { + "epoch": 0.10019220208676552, + "grad_norm": 0.37092846632003784, + "learning_rate": 1.9882823533078195e-05, + "loss": 0.5925, + "step": 3649 + }, + { + "epoch": 0.10021965952773201, + "grad_norm": 0.47242459654808044, + "learning_rate": 1.9882757601036732e-05, + "loss": 0.5333, + "step": 3650 + }, + { + "epoch": 0.10024711696869852, + "grad_norm": 0.49493351578712463, + "learning_rate": 1.988269165056077e-05, + "loss": 0.6618, + "step": 3651 + }, + { + "epoch": 0.10027457440966502, + "grad_norm": 0.38014158606529236, + "learning_rate": 1.988262568165043e-05, + "loss": 0.6363, + "step": 3652 + }, + { + "epoch": 0.10030203185063152, + "grad_norm": 0.40587669610977173, + "learning_rate": 1.9882559694305842e-05, + "loss": 0.6299, + "step": 3653 + }, + { + "epoch": 0.10032948929159802, + "grad_norm": 0.3674899935722351, + "learning_rate": 1.9882493688527125e-05, + "loss": 0.5582, + "step": 3654 + }, + { + "epoch": 0.10035694673256453, + "grad_norm": 0.3656216859817505, + "learning_rate": 1.9882427664314403e-05, + "loss": 0.6827, + "step": 3655 + }, + { + "epoch": 0.10038440417353103, + "grad_norm": 0.3836021423339844, + "learning_rate": 1.98823616216678e-05, + "loss": 0.5468, + "step": 3656 + }, + { + "epoch": 0.10041186161449753, + "grad_norm": 0.39705690741539, + "learning_rate": 1.9882295560587442e-05, + "loss": 0.5481, + "step": 3657 + }, + { + "epoch": 0.10043931905546404, + "grad_norm": 0.4003710150718689, + "learning_rate": 1.9882229481073443e-05, + "loss": 0.6227, + "step": 3658 + }, + { + "epoch": 0.10046677649643053, + "grad_norm": 0.36636263132095337, + "learning_rate": 1.9882163383125934e-05, + "loss": 0.6714, + "step": 3659 + }, + { + "epoch": 0.10049423393739704, + "grad_norm": 0.37111955881118774, + "learning_rate": 1.9882097266745036e-05, + "loss": 0.545, + "step": 3660 + }, + { + "epoch": 0.10052169137836353, + "grad_norm": 0.3639630079269409, + "learning_rate": 1.9882031131930876e-05, + "loss": 0.6285, + "step": 3661 + }, + { + "epoch": 0.10054914881933004, + "grad_norm": 0.4843692481517792, + "learning_rate": 1.988196497868357e-05, + "loss": 0.4922, + "step": 3662 + }, + { + "epoch": 0.10057660626029655, + "grad_norm": 0.5989887714385986, + "learning_rate": 1.9881898807003246e-05, + "loss": 0.567, + "step": 3663 + }, + { + "epoch": 0.10060406370126304, + "grad_norm": 0.44745945930480957, + "learning_rate": 1.9881832616890027e-05, + "loss": 0.5828, + "step": 3664 + }, + { + "epoch": 0.10063152114222955, + "grad_norm": 0.3645488917827606, + "learning_rate": 1.9881766408344037e-05, + "loss": 0.5332, + "step": 3665 + }, + { + "epoch": 0.10065897858319604, + "grad_norm": 1.9621652364730835, + "learning_rate": 1.9881700181365397e-05, + "loss": 0.4575, + "step": 3666 + }, + { + "epoch": 0.10068643602416255, + "grad_norm": 0.4034222960472107, + "learning_rate": 1.9881633935954235e-05, + "loss": 0.4747, + "step": 3667 + }, + { + "epoch": 0.10071389346512905, + "grad_norm": 0.4081016778945923, + "learning_rate": 1.9881567672110668e-05, + "loss": 0.5817, + "step": 3668 + }, + { + "epoch": 0.10074135090609555, + "grad_norm": 0.42450493574142456, + "learning_rate": 1.9881501389834827e-05, + "loss": 0.542, + "step": 3669 + }, + { + "epoch": 0.10076880834706205, + "grad_norm": 0.3669437766075134, + "learning_rate": 1.988143508912683e-05, + "loss": 0.5203, + "step": 3670 + }, + { + "epoch": 0.10079626578802856, + "grad_norm": 0.38931936025619507, + "learning_rate": 1.9881368769986805e-05, + "loss": 0.5527, + "step": 3671 + }, + { + "epoch": 0.10082372322899506, + "grad_norm": 0.36357739567756653, + "learning_rate": 1.9881302432414874e-05, + "loss": 0.462, + "step": 3672 + }, + { + "epoch": 0.10085118066996156, + "grad_norm": 0.38488224148750305, + "learning_rate": 1.9881236076411158e-05, + "loss": 0.5933, + "step": 3673 + }, + { + "epoch": 0.10087863811092806, + "grad_norm": 0.3460012376308441, + "learning_rate": 1.988116970197578e-05, + "loss": 0.5468, + "step": 3674 + }, + { + "epoch": 0.10090609555189456, + "grad_norm": 0.343228280544281, + "learning_rate": 1.9881103309108872e-05, + "loss": 0.5122, + "step": 3675 + }, + { + "epoch": 0.10093355299286107, + "grad_norm": 0.4061126112937927, + "learning_rate": 1.9881036897810553e-05, + "loss": 0.4853, + "step": 3676 + }, + { + "epoch": 0.10096101043382756, + "grad_norm": 0.3478251099586487, + "learning_rate": 1.9880970468080943e-05, + "loss": 0.6047, + "step": 3677 + }, + { + "epoch": 0.10098846787479407, + "grad_norm": 0.3577525317668915, + "learning_rate": 1.9880904019920174e-05, + "loss": 0.5531, + "step": 3678 + }, + { + "epoch": 0.10101592531576058, + "grad_norm": 0.37251660227775574, + "learning_rate": 1.988083755332836e-05, + "loss": 0.5942, + "step": 3679 + }, + { + "epoch": 0.10104338275672707, + "grad_norm": 0.37358933687210083, + "learning_rate": 1.9880771068305633e-05, + "loss": 0.5311, + "step": 3680 + }, + { + "epoch": 0.10107084019769358, + "grad_norm": 0.36396849155426025, + "learning_rate": 1.9880704564852112e-05, + "loss": 0.5572, + "step": 3681 + }, + { + "epoch": 0.10109829763866007, + "grad_norm": 0.4194788634777069, + "learning_rate": 1.988063804296793e-05, + "loss": 0.4283, + "step": 3682 + }, + { + "epoch": 0.10112575507962658, + "grad_norm": 0.3561704158782959, + "learning_rate": 1.9880571502653198e-05, + "loss": 0.5556, + "step": 3683 + }, + { + "epoch": 0.10115321252059307, + "grad_norm": 0.3436970114707947, + "learning_rate": 1.988050494390805e-05, + "loss": 0.5128, + "step": 3684 + }, + { + "epoch": 0.10118066996155958, + "grad_norm": 0.369693785905838, + "learning_rate": 1.9880438366732605e-05, + "loss": 0.4853, + "step": 3685 + }, + { + "epoch": 0.10120812740252609, + "grad_norm": 0.341208815574646, + "learning_rate": 1.988037177112699e-05, + "loss": 0.519, + "step": 3686 + }, + { + "epoch": 0.10123558484349258, + "grad_norm": 0.36568590998649597, + "learning_rate": 1.9880305157091327e-05, + "loss": 0.5719, + "step": 3687 + }, + { + "epoch": 0.10126304228445909, + "grad_norm": 0.3450008034706116, + "learning_rate": 1.988023852462574e-05, + "loss": 0.4703, + "step": 3688 + }, + { + "epoch": 0.10129049972542559, + "grad_norm": 0.35612383484840393, + "learning_rate": 1.9880171873730356e-05, + "loss": 0.5182, + "step": 3689 + }, + { + "epoch": 0.1013179571663921, + "grad_norm": 0.33502310514450073, + "learning_rate": 1.98801052044053e-05, + "loss": 0.5323, + "step": 3690 + }, + { + "epoch": 0.10134541460735859, + "grad_norm": 0.7402406334877014, + "learning_rate": 1.9880038516650694e-05, + "loss": 0.5274, + "step": 3691 + }, + { + "epoch": 0.1013728720483251, + "grad_norm": 0.39143913984298706, + "learning_rate": 1.987997181046666e-05, + "loss": 0.5163, + "step": 3692 + }, + { + "epoch": 0.1014003294892916, + "grad_norm": 0.3534885346889496, + "learning_rate": 1.9879905085853328e-05, + "loss": 0.5372, + "step": 3693 + }, + { + "epoch": 0.1014277869302581, + "grad_norm": 0.40381374955177307, + "learning_rate": 1.9879838342810818e-05, + "loss": 0.5906, + "step": 3694 + }, + { + "epoch": 0.1014552443712246, + "grad_norm": 0.37369129061698914, + "learning_rate": 1.987977158133926e-05, + "loss": 0.5029, + "step": 3695 + }, + { + "epoch": 0.1014827018121911, + "grad_norm": 0.3889693319797516, + "learning_rate": 1.987970480143877e-05, + "loss": 0.4527, + "step": 3696 + }, + { + "epoch": 0.10151015925315761, + "grad_norm": 0.35453000664711, + "learning_rate": 1.987963800310948e-05, + "loss": 0.5734, + "step": 3697 + }, + { + "epoch": 0.1015376166941241, + "grad_norm": 0.43763452768325806, + "learning_rate": 1.9879571186351513e-05, + "loss": 0.5543, + "step": 3698 + }, + { + "epoch": 0.10156507413509061, + "grad_norm": 0.34610670804977417, + "learning_rate": 1.987950435116499e-05, + "loss": 0.5681, + "step": 3699 + }, + { + "epoch": 0.1015925315760571, + "grad_norm": 0.4607323706150055, + "learning_rate": 1.987943749755004e-05, + "loss": 0.6391, + "step": 3700 + }, + { + "epoch": 0.10161998901702361, + "grad_norm": 0.3961438834667206, + "learning_rate": 1.9879370625506783e-05, + "loss": 0.6408, + "step": 3701 + }, + { + "epoch": 0.10164744645799012, + "grad_norm": 0.3470100462436676, + "learning_rate": 1.987930373503535e-05, + "loss": 0.5275, + "step": 3702 + }, + { + "epoch": 0.10167490389895661, + "grad_norm": 0.31323787569999695, + "learning_rate": 1.9879236826135858e-05, + "loss": 0.486, + "step": 3703 + }, + { + "epoch": 0.10170236133992312, + "grad_norm": 0.36904895305633545, + "learning_rate": 1.987916989880844e-05, + "loss": 0.5657, + "step": 3704 + }, + { + "epoch": 0.10172981878088962, + "grad_norm": 0.3412191569805145, + "learning_rate": 1.9879102953053215e-05, + "loss": 0.5351, + "step": 3705 + }, + { + "epoch": 0.10175727622185612, + "grad_norm": 0.339280366897583, + "learning_rate": 1.9879035988870315e-05, + "loss": 0.5507, + "step": 3706 + }, + { + "epoch": 0.10178473366282262, + "grad_norm": 0.41161108016967773, + "learning_rate": 1.9878969006259854e-05, + "loss": 0.6781, + "step": 3707 + }, + { + "epoch": 0.10181219110378913, + "grad_norm": 0.34671422839164734, + "learning_rate": 1.9878902005221964e-05, + "loss": 0.5794, + "step": 3708 + }, + { + "epoch": 0.10183964854475563, + "grad_norm": 0.3544754683971405, + "learning_rate": 1.987883498575677e-05, + "loss": 0.4963, + "step": 3709 + }, + { + "epoch": 0.10186710598572213, + "grad_norm": 0.342465877532959, + "learning_rate": 1.9878767947864396e-05, + "loss": 0.5061, + "step": 3710 + }, + { + "epoch": 0.10189456342668864, + "grad_norm": 0.3471302092075348, + "learning_rate": 1.9878700891544966e-05, + "loss": 0.4656, + "step": 3711 + }, + { + "epoch": 0.10192202086765513, + "grad_norm": 0.39402854442596436, + "learning_rate": 1.9878633816798608e-05, + "loss": 0.4787, + "step": 3712 + }, + { + "epoch": 0.10194947830862164, + "grad_norm": 0.3881931006908417, + "learning_rate": 1.9878566723625444e-05, + "loss": 0.5837, + "step": 3713 + }, + { + "epoch": 0.10197693574958813, + "grad_norm": 0.33060622215270996, + "learning_rate": 1.9878499612025598e-05, + "loss": 0.4908, + "step": 3714 + }, + { + "epoch": 0.10200439319055464, + "grad_norm": 0.4108772873878479, + "learning_rate": 1.9878432481999196e-05, + "loss": 0.4801, + "step": 3715 + }, + { + "epoch": 0.10203185063152115, + "grad_norm": 0.35200801491737366, + "learning_rate": 1.987836533354637e-05, + "loss": 0.543, + "step": 3716 + }, + { + "epoch": 0.10205930807248764, + "grad_norm": 0.3465738594532013, + "learning_rate": 1.9878298166667238e-05, + "loss": 0.5879, + "step": 3717 + }, + { + "epoch": 0.10208676551345415, + "grad_norm": 0.3674907088279724, + "learning_rate": 1.9878230981361925e-05, + "loss": 0.5426, + "step": 3718 + }, + { + "epoch": 0.10211422295442064, + "grad_norm": 0.35191377997398376, + "learning_rate": 1.9878163777630562e-05, + "loss": 0.6574, + "step": 3719 + }, + { + "epoch": 0.10214168039538715, + "grad_norm": 0.3897986114025116, + "learning_rate": 1.9878096555473268e-05, + "loss": 0.6042, + "step": 3720 + }, + { + "epoch": 0.10216913783635365, + "grad_norm": 0.3558349609375, + "learning_rate": 1.987802931489017e-05, + "loss": 0.5657, + "step": 3721 + }, + { + "epoch": 0.10219659527732015, + "grad_norm": 0.36671921610832214, + "learning_rate": 1.98779620558814e-05, + "loss": 0.5827, + "step": 3722 + }, + { + "epoch": 0.10222405271828666, + "grad_norm": 0.34111905097961426, + "learning_rate": 1.9877894778447072e-05, + "loss": 0.5594, + "step": 3723 + }, + { + "epoch": 0.10225151015925316, + "grad_norm": 0.379687637090683, + "learning_rate": 1.9877827482587323e-05, + "loss": 0.6461, + "step": 3724 + }, + { + "epoch": 0.10227896760021966, + "grad_norm": 0.3512583076953888, + "learning_rate": 1.987776016830227e-05, + "loss": 0.5029, + "step": 3725 + }, + { + "epoch": 0.10230642504118616, + "grad_norm": 0.43273666501045227, + "learning_rate": 1.987769283559204e-05, + "loss": 0.6566, + "step": 3726 + }, + { + "epoch": 0.10233388248215267, + "grad_norm": 0.7826879620552063, + "learning_rate": 1.9877625484456763e-05, + "loss": 0.6599, + "step": 3727 + }, + { + "epoch": 0.10236133992311916, + "grad_norm": 0.3890058994293213, + "learning_rate": 1.987755811489656e-05, + "loss": 0.5708, + "step": 3728 + }, + { + "epoch": 0.10238879736408567, + "grad_norm": 0.6155503392219543, + "learning_rate": 1.987749072691156e-05, + "loss": 0.4309, + "step": 3729 + }, + { + "epoch": 0.10241625480505218, + "grad_norm": 0.4001244306564331, + "learning_rate": 1.987742332050189e-05, + "loss": 0.5654, + "step": 3730 + }, + { + "epoch": 0.10244371224601867, + "grad_norm": 0.3392367660999298, + "learning_rate": 1.9877355895667666e-05, + "loss": 0.4845, + "step": 3731 + }, + { + "epoch": 0.10247116968698518, + "grad_norm": 0.40737399458885193, + "learning_rate": 1.9877288452409026e-05, + "loss": 0.5278, + "step": 3732 + }, + { + "epoch": 0.10249862712795167, + "grad_norm": 0.3245091438293457, + "learning_rate": 1.9877220990726088e-05, + "loss": 0.5364, + "step": 3733 + }, + { + "epoch": 0.10252608456891818, + "grad_norm": 0.39107823371887207, + "learning_rate": 1.987715351061898e-05, + "loss": 0.4837, + "step": 3734 + }, + { + "epoch": 0.10255354200988467, + "grad_norm": 0.3771151602268219, + "learning_rate": 1.9877086012087833e-05, + "loss": 0.623, + "step": 3735 + }, + { + "epoch": 0.10258099945085118, + "grad_norm": 0.39206475019454956, + "learning_rate": 1.9877018495132763e-05, + "loss": 0.5623, + "step": 3736 + }, + { + "epoch": 0.10260845689181768, + "grad_norm": 0.35196352005004883, + "learning_rate": 1.9876950959753906e-05, + "loss": 0.5885, + "step": 3737 + }, + { + "epoch": 0.10263591433278418, + "grad_norm": 0.38927483558654785, + "learning_rate": 1.9876883405951378e-05, + "loss": 0.5413, + "step": 3738 + }, + { + "epoch": 0.10266337177375069, + "grad_norm": 0.6136228442192078, + "learning_rate": 1.9876815833725314e-05, + "loss": 0.5748, + "step": 3739 + }, + { + "epoch": 0.10269082921471719, + "grad_norm": 0.3340201675891876, + "learning_rate": 1.9876748243075834e-05, + "loss": 0.5677, + "step": 3740 + }, + { + "epoch": 0.1027182866556837, + "grad_norm": 0.4499374330043793, + "learning_rate": 1.9876680634003068e-05, + "loss": 0.5531, + "step": 3741 + }, + { + "epoch": 0.10274574409665019, + "grad_norm": 0.3772997558116913, + "learning_rate": 1.987661300650714e-05, + "loss": 0.5191, + "step": 3742 + }, + { + "epoch": 0.1027732015376167, + "grad_norm": 0.3793255686759949, + "learning_rate": 1.9876545360588175e-05, + "loss": 0.6249, + "step": 3743 + }, + { + "epoch": 0.10280065897858319, + "grad_norm": 0.4102172255516052, + "learning_rate": 1.98764776962463e-05, + "loss": 0.5377, + "step": 3744 + }, + { + "epoch": 0.1028281164195497, + "grad_norm": 0.32869404554367065, + "learning_rate": 1.9876410013481643e-05, + "loss": 0.4825, + "step": 3745 + }, + { + "epoch": 0.1028555738605162, + "grad_norm": 0.3301064670085907, + "learning_rate": 1.987634231229433e-05, + "loss": 0.5309, + "step": 3746 + }, + { + "epoch": 0.1028830313014827, + "grad_norm": 0.3402499556541443, + "learning_rate": 1.9876274592684485e-05, + "loss": 0.5436, + "step": 3747 + }, + { + "epoch": 0.10291048874244921, + "grad_norm": 0.37275901436805725, + "learning_rate": 1.9876206854652237e-05, + "loss": 0.5614, + "step": 3748 + }, + { + "epoch": 0.1029379461834157, + "grad_norm": 0.32092857360839844, + "learning_rate": 1.987613909819771e-05, + "loss": 0.4638, + "step": 3749 + }, + { + "epoch": 0.10296540362438221, + "grad_norm": 0.4086604118347168, + "learning_rate": 1.9876071323321033e-05, + "loss": 0.5845, + "step": 3750 + }, + { + "epoch": 0.1029928610653487, + "grad_norm": 0.33194127678871155, + "learning_rate": 1.987600353002233e-05, + "loss": 0.4838, + "step": 3751 + }, + { + "epoch": 0.10302031850631521, + "grad_norm": 0.36046385765075684, + "learning_rate": 1.987593571830173e-05, + "loss": 0.5821, + "step": 3752 + }, + { + "epoch": 0.10304777594728172, + "grad_norm": 0.42381903529167175, + "learning_rate": 1.987586788815936e-05, + "loss": 0.5651, + "step": 3753 + }, + { + "epoch": 0.10307523338824821, + "grad_norm": 0.3796504735946655, + "learning_rate": 1.9875800039595338e-05, + "loss": 0.5391, + "step": 3754 + }, + { + "epoch": 0.10310269082921472, + "grad_norm": 0.3642529249191284, + "learning_rate": 1.9875732172609798e-05, + "loss": 0.5281, + "step": 3755 + }, + { + "epoch": 0.10313014827018122, + "grad_norm": 0.3446398377418518, + "learning_rate": 1.9875664287202867e-05, + "loss": 0.5025, + "step": 3756 + }, + { + "epoch": 0.10315760571114772, + "grad_norm": 0.3937079608440399, + "learning_rate": 1.987559638337467e-05, + "loss": 0.4599, + "step": 3757 + }, + { + "epoch": 0.10318506315211422, + "grad_norm": 0.3722397983074188, + "learning_rate": 1.9875528461125336e-05, + "loss": 0.5674, + "step": 3758 + }, + { + "epoch": 0.10321252059308073, + "grad_norm": 0.3857128918170929, + "learning_rate": 1.9875460520454987e-05, + "loss": 0.5423, + "step": 3759 + }, + { + "epoch": 0.10323997803404723, + "grad_norm": 0.3713667690753937, + "learning_rate": 1.9875392561363755e-05, + "loss": 0.5117, + "step": 3760 + }, + { + "epoch": 0.10326743547501373, + "grad_norm": 0.3877888023853302, + "learning_rate": 1.987532458385176e-05, + "loss": 0.6346, + "step": 3761 + }, + { + "epoch": 0.10329489291598024, + "grad_norm": 0.37657803297042847, + "learning_rate": 1.9875256587919134e-05, + "loss": 0.5142, + "step": 3762 + }, + { + "epoch": 0.10332235035694673, + "grad_norm": 0.39413389563560486, + "learning_rate": 1.9875188573566e-05, + "loss": 0.498, + "step": 3763 + }, + { + "epoch": 0.10334980779791324, + "grad_norm": 0.36739182472229004, + "learning_rate": 1.9875120540792495e-05, + "loss": 0.5466, + "step": 3764 + }, + { + "epoch": 0.10337726523887973, + "grad_norm": 0.40594416856765747, + "learning_rate": 1.9875052489598732e-05, + "loss": 0.637, + "step": 3765 + }, + { + "epoch": 0.10340472267984624, + "grad_norm": 0.48338863253593445, + "learning_rate": 1.9874984419984846e-05, + "loss": 0.4924, + "step": 3766 + }, + { + "epoch": 0.10343218012081273, + "grad_norm": 0.3458135426044464, + "learning_rate": 1.9874916331950964e-05, + "loss": 0.4986, + "step": 3767 + }, + { + "epoch": 0.10345963756177924, + "grad_norm": 0.35718342661857605, + "learning_rate": 1.9874848225497212e-05, + "loss": 0.5293, + "step": 3768 + }, + { + "epoch": 0.10348709500274575, + "grad_norm": 0.4661085903644562, + "learning_rate": 1.9874780100623713e-05, + "loss": 0.461, + "step": 3769 + }, + { + "epoch": 0.10351455244371224, + "grad_norm": 0.39744827151298523, + "learning_rate": 1.9874711957330598e-05, + "loss": 0.5448, + "step": 3770 + }, + { + "epoch": 0.10354200988467875, + "grad_norm": 0.33603620529174805, + "learning_rate": 1.9874643795617995e-05, + "loss": 0.5171, + "step": 3771 + }, + { + "epoch": 0.10356946732564525, + "grad_norm": 0.43475016951560974, + "learning_rate": 1.987457561548603e-05, + "loss": 0.6482, + "step": 3772 + }, + { + "epoch": 0.10359692476661175, + "grad_norm": 0.3865177631378174, + "learning_rate": 1.987450741693483e-05, + "loss": 0.5457, + "step": 3773 + }, + { + "epoch": 0.10362438220757825, + "grad_norm": 0.40809866786003113, + "learning_rate": 1.987443919996452e-05, + "loss": 0.5245, + "step": 3774 + }, + { + "epoch": 0.10365183964854476, + "grad_norm": 0.4723277986049652, + "learning_rate": 1.9874370964575234e-05, + "loss": 0.6136, + "step": 3775 + }, + { + "epoch": 0.10367929708951126, + "grad_norm": 0.3428969383239746, + "learning_rate": 1.987430271076709e-05, + "loss": 0.566, + "step": 3776 + }, + { + "epoch": 0.10370675453047776, + "grad_norm": 0.34798505902290344, + "learning_rate": 1.9874234438540222e-05, + "loss": 0.4591, + "step": 3777 + }, + { + "epoch": 0.10373421197144427, + "grad_norm": 0.3913669288158417, + "learning_rate": 1.9874166147894752e-05, + "loss": 0.5721, + "step": 3778 + }, + { + "epoch": 0.10376166941241076, + "grad_norm": 0.3714759647846222, + "learning_rate": 1.9874097838830814e-05, + "loss": 0.5103, + "step": 3779 + }, + { + "epoch": 0.10378912685337727, + "grad_norm": 0.33035799860954285, + "learning_rate": 1.987402951134853e-05, + "loss": 0.5493, + "step": 3780 + }, + { + "epoch": 0.10381658429434376, + "grad_norm": 0.4073542058467865, + "learning_rate": 1.987396116544803e-05, + "loss": 0.4923, + "step": 3781 + }, + { + "epoch": 0.10384404173531027, + "grad_norm": 0.3643590211868286, + "learning_rate": 1.9873892801129444e-05, + "loss": 0.5632, + "step": 3782 + }, + { + "epoch": 0.10387149917627678, + "grad_norm": 0.34655168652534485, + "learning_rate": 1.9873824418392895e-05, + "loss": 0.4608, + "step": 3783 + }, + { + "epoch": 0.10389895661724327, + "grad_norm": 0.3425053656101227, + "learning_rate": 1.987375601723851e-05, + "loss": 0.6181, + "step": 3784 + }, + { + "epoch": 0.10392641405820978, + "grad_norm": 0.321377158164978, + "learning_rate": 1.987368759766642e-05, + "loss": 0.5309, + "step": 3785 + }, + { + "epoch": 0.10395387149917627, + "grad_norm": 0.4320198595523834, + "learning_rate": 1.9873619159676753e-05, + "loss": 0.5979, + "step": 3786 + }, + { + "epoch": 0.10398132894014278, + "grad_norm": 0.34884563088417053, + "learning_rate": 1.9873550703269635e-05, + "loss": 0.4853, + "step": 3787 + }, + { + "epoch": 0.10400878638110927, + "grad_norm": 0.40331414341926575, + "learning_rate": 1.9873482228445192e-05, + "loss": 0.6197, + "step": 3788 + }, + { + "epoch": 0.10403624382207578, + "grad_norm": 0.394056499004364, + "learning_rate": 1.9873413735203552e-05, + "loss": 0.5205, + "step": 3789 + }, + { + "epoch": 0.10406370126304229, + "grad_norm": 0.4187641739845276, + "learning_rate": 1.9873345223544847e-05, + "loss": 0.6397, + "step": 3790 + }, + { + "epoch": 0.10409115870400878, + "grad_norm": 0.39512133598327637, + "learning_rate": 1.98732766934692e-05, + "loss": 0.5883, + "step": 3791 + }, + { + "epoch": 0.10411861614497529, + "grad_norm": 0.4228011965751648, + "learning_rate": 1.987320814497674e-05, + "loss": 0.603, + "step": 3792 + }, + { + "epoch": 0.10414607358594179, + "grad_norm": 0.3022291958332062, + "learning_rate": 1.9873139578067597e-05, + "loss": 0.6128, + "step": 3793 + }, + { + "epoch": 0.1041735310269083, + "grad_norm": 0.39699631929397583, + "learning_rate": 1.98730709927419e-05, + "loss": 0.5353, + "step": 3794 + }, + { + "epoch": 0.10420098846787479, + "grad_norm": 0.33036336302757263, + "learning_rate": 1.9873002388999772e-05, + "loss": 0.442, + "step": 3795 + }, + { + "epoch": 0.1042284459088413, + "grad_norm": 0.4097613990306854, + "learning_rate": 1.9872933766841344e-05, + "loss": 0.6227, + "step": 3796 + }, + { + "epoch": 0.1042559033498078, + "grad_norm": 0.4052391052246094, + "learning_rate": 1.9872865126266742e-05, + "loss": 0.5681, + "step": 3797 + }, + { + "epoch": 0.1042833607907743, + "grad_norm": 0.3291495740413666, + "learning_rate": 1.9872796467276096e-05, + "loss": 0.5467, + "step": 3798 + }, + { + "epoch": 0.1043108182317408, + "grad_norm": 0.34201833605766296, + "learning_rate": 1.9872727789869534e-05, + "loss": 0.5562, + "step": 3799 + }, + { + "epoch": 0.1043382756727073, + "grad_norm": 0.36519426107406616, + "learning_rate": 1.9872659094047184e-05, + "loss": 0.4567, + "step": 3800 + }, + { + "epoch": 0.10436573311367381, + "grad_norm": 0.38032835721969604, + "learning_rate": 1.9872590379809173e-05, + "loss": 0.5351, + "step": 3801 + }, + { + "epoch": 0.1043931905546403, + "grad_norm": 0.3412076532840729, + "learning_rate": 1.987252164715563e-05, + "loss": 0.5292, + "step": 3802 + }, + { + "epoch": 0.10442064799560681, + "grad_norm": 0.3484349250793457, + "learning_rate": 1.9872452896086684e-05, + "loss": 0.5571, + "step": 3803 + }, + { + "epoch": 0.1044481054365733, + "grad_norm": 0.6059137582778931, + "learning_rate": 1.9872384126602463e-05, + "loss": 0.5296, + "step": 3804 + }, + { + "epoch": 0.10447556287753981, + "grad_norm": 0.32972705364227295, + "learning_rate": 1.9872315338703096e-05, + "loss": 0.5582, + "step": 3805 + }, + { + "epoch": 0.10450302031850632, + "grad_norm": 0.36167091131210327, + "learning_rate": 1.9872246532388707e-05, + "loss": 0.5228, + "step": 3806 + }, + { + "epoch": 0.10453047775947281, + "grad_norm": 0.3177085816860199, + "learning_rate": 1.9872177707659425e-05, + "loss": 0.4593, + "step": 3807 + }, + { + "epoch": 0.10455793520043932, + "grad_norm": 0.39206618070602417, + "learning_rate": 1.9872108864515386e-05, + "loss": 0.5746, + "step": 3808 + }, + { + "epoch": 0.10458539264140582, + "grad_norm": 0.3659050762653351, + "learning_rate": 1.987204000295671e-05, + "loss": 0.5936, + "step": 3809 + }, + { + "epoch": 0.10461285008237232, + "grad_norm": 0.577154278755188, + "learning_rate": 1.9871971122983532e-05, + "loss": 0.5444, + "step": 3810 + }, + { + "epoch": 0.10464030752333882, + "grad_norm": 0.35058191418647766, + "learning_rate": 1.987190222459597e-05, + "loss": 0.5623, + "step": 3811 + }, + { + "epoch": 0.10466776496430533, + "grad_norm": 0.3693815767765045, + "learning_rate": 1.9871833307794167e-05, + "loss": 0.5382, + "step": 3812 + }, + { + "epoch": 0.10469522240527183, + "grad_norm": 0.38984403014183044, + "learning_rate": 1.9871764372578243e-05, + "loss": 0.5758, + "step": 3813 + }, + { + "epoch": 0.10472267984623833, + "grad_norm": 0.39996346831321716, + "learning_rate": 1.9871695418948324e-05, + "loss": 0.4671, + "step": 3814 + }, + { + "epoch": 0.10475013728720484, + "grad_norm": 0.36269518733024597, + "learning_rate": 1.9871626446904547e-05, + "loss": 0.5834, + "step": 3815 + }, + { + "epoch": 0.10477759472817133, + "grad_norm": 0.3340657651424408, + "learning_rate": 1.987155745644703e-05, + "loss": 0.4784, + "step": 3816 + }, + { + "epoch": 0.10480505216913784, + "grad_norm": 0.5332630276679993, + "learning_rate": 1.9871488447575914e-05, + "loss": 0.5755, + "step": 3817 + }, + { + "epoch": 0.10483250961010433, + "grad_norm": 0.406204491853714, + "learning_rate": 1.9871419420291317e-05, + "loss": 0.6531, + "step": 3818 + }, + { + "epoch": 0.10485996705107084, + "grad_norm": 0.3646356165409088, + "learning_rate": 1.9871350374593376e-05, + "loss": 0.5777, + "step": 3819 + }, + { + "epoch": 0.10488742449203735, + "grad_norm": 0.3302288353443146, + "learning_rate": 1.9871281310482212e-05, + "loss": 0.5892, + "step": 3820 + }, + { + "epoch": 0.10491488193300384, + "grad_norm": 0.32823213934898376, + "learning_rate": 1.9871212227957962e-05, + "loss": 0.5022, + "step": 3821 + }, + { + "epoch": 0.10494233937397035, + "grad_norm": 0.37528273463249207, + "learning_rate": 1.9871143127020747e-05, + "loss": 0.5983, + "step": 3822 + }, + { + "epoch": 0.10496979681493684, + "grad_norm": 0.4582796096801758, + "learning_rate": 1.9871074007670702e-05, + "loss": 0.578, + "step": 3823 + }, + { + "epoch": 0.10499725425590335, + "grad_norm": 0.37758493423461914, + "learning_rate": 1.9871004869907954e-05, + "loss": 0.5223, + "step": 3824 + }, + { + "epoch": 0.10502471169686985, + "grad_norm": 0.339484840631485, + "learning_rate": 1.987093571373263e-05, + "loss": 0.5586, + "step": 3825 + }, + { + "epoch": 0.10505216913783635, + "grad_norm": 0.3803310990333557, + "learning_rate": 1.987086653914486e-05, + "loss": 0.5, + "step": 3826 + }, + { + "epoch": 0.10507962657880286, + "grad_norm": 0.416075736284256, + "learning_rate": 1.9870797346144772e-05, + "loss": 0.5979, + "step": 3827 + }, + { + "epoch": 0.10510708401976936, + "grad_norm": 0.32400840520858765, + "learning_rate": 1.98707281347325e-05, + "loss": 0.5511, + "step": 3828 + }, + { + "epoch": 0.10513454146073586, + "grad_norm": 0.346810907125473, + "learning_rate": 1.9870658904908165e-05, + "loss": 0.561, + "step": 3829 + }, + { + "epoch": 0.10516199890170236, + "grad_norm": 0.43753641843795776, + "learning_rate": 1.9870589656671907e-05, + "loss": 0.5266, + "step": 3830 + }, + { + "epoch": 0.10518945634266887, + "grad_norm": 0.47127383947372437, + "learning_rate": 1.9870520390023843e-05, + "loss": 0.6117, + "step": 3831 + }, + { + "epoch": 0.10521691378363536, + "grad_norm": 0.4203130304813385, + "learning_rate": 1.9870451104964114e-05, + "loss": 0.5367, + "step": 3832 + }, + { + "epoch": 0.10524437122460187, + "grad_norm": 0.3533201813697815, + "learning_rate": 1.987038180149284e-05, + "loss": 0.5412, + "step": 3833 + }, + { + "epoch": 0.10527182866556836, + "grad_norm": 0.33527401089668274, + "learning_rate": 1.9870312479610154e-05, + "loss": 0.4716, + "step": 3834 + }, + { + "epoch": 0.10529928610653487, + "grad_norm": 0.448081374168396, + "learning_rate": 1.9870243139316187e-05, + "loss": 0.5757, + "step": 3835 + }, + { + "epoch": 0.10532674354750138, + "grad_norm": 0.41182827949523926, + "learning_rate": 1.987017378061106e-05, + "loss": 0.5641, + "step": 3836 + }, + { + "epoch": 0.10535420098846787, + "grad_norm": 0.41510093212127686, + "learning_rate": 1.9870104403494914e-05, + "loss": 0.5707, + "step": 3837 + }, + { + "epoch": 0.10538165842943438, + "grad_norm": 0.4042965769767761, + "learning_rate": 1.9870035007967873e-05, + "loss": 0.5379, + "step": 3838 + }, + { + "epoch": 0.10540911587040087, + "grad_norm": 0.3770391047000885, + "learning_rate": 1.9869965594030066e-05, + "loss": 0.6505, + "step": 3839 + }, + { + "epoch": 0.10543657331136738, + "grad_norm": 0.48352956771850586, + "learning_rate": 1.9869896161681622e-05, + "loss": 0.5498, + "step": 3840 + }, + { + "epoch": 0.10546403075233388, + "grad_norm": 0.41967737674713135, + "learning_rate": 1.9869826710922676e-05, + "loss": 0.4997, + "step": 3841 + }, + { + "epoch": 0.10549148819330038, + "grad_norm": 0.39243850111961365, + "learning_rate": 1.9869757241753347e-05, + "loss": 0.6316, + "step": 3842 + }, + { + "epoch": 0.10551894563426689, + "grad_norm": 0.3841986060142517, + "learning_rate": 1.986968775417377e-05, + "loss": 0.5987, + "step": 3843 + }, + { + "epoch": 0.10554640307523339, + "grad_norm": 0.3687991499900818, + "learning_rate": 1.9869618248184082e-05, + "loss": 0.5208, + "step": 3844 + }, + { + "epoch": 0.1055738605161999, + "grad_norm": 0.925459623336792, + "learning_rate": 1.9869548723784403e-05, + "loss": 0.5929, + "step": 3845 + }, + { + "epoch": 0.10560131795716639, + "grad_norm": 0.47142472863197327, + "learning_rate": 1.9869479180974863e-05, + "loss": 0.4895, + "step": 3846 + }, + { + "epoch": 0.1056287753981329, + "grad_norm": 0.375644713640213, + "learning_rate": 1.98694096197556e-05, + "loss": 0.5071, + "step": 3847 + }, + { + "epoch": 0.10565623283909939, + "grad_norm": 0.3523799777030945, + "learning_rate": 1.986934004012673e-05, + "loss": 0.4671, + "step": 3848 + }, + { + "epoch": 0.1056836902800659, + "grad_norm": 0.3381350338459015, + "learning_rate": 1.98692704420884e-05, + "loss": 0.5974, + "step": 3849 + }, + { + "epoch": 0.1057111477210324, + "grad_norm": 0.3653413951396942, + "learning_rate": 1.9869200825640723e-05, + "loss": 0.4238, + "step": 3850 + }, + { + "epoch": 0.1057386051619989, + "grad_norm": 0.3673236072063446, + "learning_rate": 1.986913119078384e-05, + "loss": 0.5607, + "step": 3851 + }, + { + "epoch": 0.10576606260296541, + "grad_norm": 0.3395763635635376, + "learning_rate": 1.9869061537517878e-05, + "loss": 0.5204, + "step": 3852 + }, + { + "epoch": 0.1057935200439319, + "grad_norm": 0.35546445846557617, + "learning_rate": 1.9868991865842965e-05, + "loss": 0.4429, + "step": 3853 + }, + { + "epoch": 0.10582097748489841, + "grad_norm": 0.3710554838180542, + "learning_rate": 1.9868922175759234e-05, + "loss": 0.5599, + "step": 3854 + }, + { + "epoch": 0.1058484349258649, + "grad_norm": 0.4383479654788971, + "learning_rate": 1.9868852467266814e-05, + "loss": 0.6035, + "step": 3855 + }, + { + "epoch": 0.10587589236683141, + "grad_norm": 0.2930375337600708, + "learning_rate": 1.986878274036583e-05, + "loss": 0.4621, + "step": 3856 + }, + { + "epoch": 0.10590334980779792, + "grad_norm": 0.33804062008857727, + "learning_rate": 1.986871299505642e-05, + "loss": 0.5309, + "step": 3857 + }, + { + "epoch": 0.10593080724876441, + "grad_norm": 0.38290271162986755, + "learning_rate": 1.986864323133871e-05, + "loss": 0.5569, + "step": 3858 + }, + { + "epoch": 0.10595826468973092, + "grad_norm": 0.36935073137283325, + "learning_rate": 1.986857344921283e-05, + "loss": 0.4677, + "step": 3859 + }, + { + "epoch": 0.10598572213069742, + "grad_norm": 0.4082733988761902, + "learning_rate": 1.986850364867891e-05, + "loss": 0.6187, + "step": 3860 + }, + { + "epoch": 0.10601317957166392, + "grad_norm": 0.34845176339149475, + "learning_rate": 1.9868433829737085e-05, + "loss": 0.5029, + "step": 3861 + }, + { + "epoch": 0.10604063701263042, + "grad_norm": 0.34293392300605774, + "learning_rate": 1.9868363992387477e-05, + "loss": 0.6125, + "step": 3862 + }, + { + "epoch": 0.10606809445359693, + "grad_norm": 0.367667019367218, + "learning_rate": 1.986829413663022e-05, + "loss": 0.6475, + "step": 3863 + }, + { + "epoch": 0.10609555189456343, + "grad_norm": 0.3714020252227783, + "learning_rate": 1.986822426246545e-05, + "loss": 0.6712, + "step": 3864 + }, + { + "epoch": 0.10612300933552993, + "grad_norm": 0.3475090563297272, + "learning_rate": 1.986815436989329e-05, + "loss": 0.5974, + "step": 3865 + }, + { + "epoch": 0.10615046677649644, + "grad_norm": 0.3499235212802887, + "learning_rate": 1.986808445891387e-05, + "loss": 0.4703, + "step": 3866 + }, + { + "epoch": 0.10617792421746293, + "grad_norm": 0.37134361267089844, + "learning_rate": 1.9868014529527324e-05, + "loss": 0.5824, + "step": 3867 + }, + { + "epoch": 0.10620538165842944, + "grad_norm": 0.3374619781970978, + "learning_rate": 1.9867944581733782e-05, + "loss": 0.518, + "step": 3868 + }, + { + "epoch": 0.10623283909939593, + "grad_norm": 0.3901131749153137, + "learning_rate": 1.9867874615533373e-05, + "loss": 0.5425, + "step": 3869 + }, + { + "epoch": 0.10626029654036244, + "grad_norm": 0.3555409610271454, + "learning_rate": 1.986780463092623e-05, + "loss": 0.5846, + "step": 3870 + }, + { + "epoch": 0.10628775398132893, + "grad_norm": 0.34077024459838867, + "learning_rate": 1.9867734627912482e-05, + "loss": 0.552, + "step": 3871 + }, + { + "epoch": 0.10631521142229544, + "grad_norm": 0.4143122732639313, + "learning_rate": 1.9867664606492258e-05, + "loss": 0.6516, + "step": 3872 + }, + { + "epoch": 0.10634266886326195, + "grad_norm": 0.36835795640945435, + "learning_rate": 1.9867594566665692e-05, + "loss": 0.4786, + "step": 3873 + }, + { + "epoch": 0.10637012630422844, + "grad_norm": 0.3371666967868805, + "learning_rate": 1.986752450843291e-05, + "loss": 0.554, + "step": 3874 + }, + { + "epoch": 0.10639758374519495, + "grad_norm": 0.33243823051452637, + "learning_rate": 1.986745443179405e-05, + "loss": 0.4817, + "step": 3875 + }, + { + "epoch": 0.10642504118616145, + "grad_norm": 0.41295409202575684, + "learning_rate": 1.9867384336749232e-05, + "loss": 0.471, + "step": 3876 + }, + { + "epoch": 0.10645249862712795, + "grad_norm": 0.42155101895332336, + "learning_rate": 1.9867314223298593e-05, + "loss": 0.5979, + "step": 3877 + }, + { + "epoch": 0.10647995606809445, + "grad_norm": 0.43157750368118286, + "learning_rate": 1.986724409144227e-05, + "loss": 0.5077, + "step": 3878 + }, + { + "epoch": 0.10650741350906096, + "grad_norm": 0.37829843163490295, + "learning_rate": 1.9867173941180383e-05, + "loss": 0.5335, + "step": 3879 + }, + { + "epoch": 0.10653487095002746, + "grad_norm": 0.33894768357276917, + "learning_rate": 1.986710377251307e-05, + "loss": 0.5643, + "step": 3880 + }, + { + "epoch": 0.10656232839099396, + "grad_norm": 0.39431583881378174, + "learning_rate": 1.9867033585440457e-05, + "loss": 0.5272, + "step": 3881 + }, + { + "epoch": 0.10658978583196047, + "grad_norm": 0.3784754276275635, + "learning_rate": 1.9866963379962677e-05, + "loss": 0.5733, + "step": 3882 + }, + { + "epoch": 0.10661724327292696, + "grad_norm": 0.3714334964752197, + "learning_rate": 1.9866893156079863e-05, + "loss": 0.5331, + "step": 3883 + }, + { + "epoch": 0.10664470071389347, + "grad_norm": 0.3975332975387573, + "learning_rate": 1.986682291379214e-05, + "loss": 0.5918, + "step": 3884 + }, + { + "epoch": 0.10667215815485996, + "grad_norm": 0.3915683329105377, + "learning_rate": 1.986675265309965e-05, + "loss": 0.521, + "step": 3885 + }, + { + "epoch": 0.10669961559582647, + "grad_norm": 0.33201098442077637, + "learning_rate": 1.9866682374002514e-05, + "loss": 0.547, + "step": 3886 + }, + { + "epoch": 0.10672707303679298, + "grad_norm": 0.3589036166667938, + "learning_rate": 1.9866612076500865e-05, + "loss": 0.5798, + "step": 3887 + }, + { + "epoch": 0.10675453047775947, + "grad_norm": 0.35575756430625916, + "learning_rate": 1.9866541760594837e-05, + "loss": 0.5868, + "step": 3888 + }, + { + "epoch": 0.10678198791872598, + "grad_norm": 0.34954458475112915, + "learning_rate": 1.9866471426284558e-05, + "loss": 0.4404, + "step": 3889 + }, + { + "epoch": 0.10680944535969247, + "grad_norm": 0.356108158826828, + "learning_rate": 1.9866401073570164e-05, + "loss": 0.4828, + "step": 3890 + }, + { + "epoch": 0.10683690280065898, + "grad_norm": 0.4012312591075897, + "learning_rate": 1.986633070245178e-05, + "loss": 0.5977, + "step": 3891 + }, + { + "epoch": 0.10686436024162548, + "grad_norm": 0.3317927122116089, + "learning_rate": 1.986626031292954e-05, + "loss": 0.5052, + "step": 3892 + }, + { + "epoch": 0.10689181768259198, + "grad_norm": 0.356916218996048, + "learning_rate": 1.9866189905003577e-05, + "loss": 0.4527, + "step": 3893 + }, + { + "epoch": 0.10691927512355849, + "grad_norm": 0.39416465163230896, + "learning_rate": 1.986611947867402e-05, + "loss": 0.5388, + "step": 3894 + }, + { + "epoch": 0.10694673256452498, + "grad_norm": 0.4298718571662903, + "learning_rate": 1.9866049033941003e-05, + "loss": 0.5836, + "step": 3895 + }, + { + "epoch": 0.10697419000549149, + "grad_norm": 0.34931573271751404, + "learning_rate": 1.9865978570804653e-05, + "loss": 0.5632, + "step": 3896 + }, + { + "epoch": 0.10700164744645799, + "grad_norm": 0.37951377034187317, + "learning_rate": 1.9865908089265103e-05, + "loss": 0.5133, + "step": 3897 + }, + { + "epoch": 0.1070291048874245, + "grad_norm": 0.3519764244556427, + "learning_rate": 1.9865837589322492e-05, + "loss": 0.5346, + "step": 3898 + }, + { + "epoch": 0.10705656232839099, + "grad_norm": 0.35335278511047363, + "learning_rate": 1.986576707097694e-05, + "loss": 0.5214, + "step": 3899 + }, + { + "epoch": 0.1070840197693575, + "grad_norm": 0.4014730453491211, + "learning_rate": 1.9865696534228583e-05, + "loss": 0.608, + "step": 3900 + }, + { + "epoch": 0.10711147721032399, + "grad_norm": 0.38673245906829834, + "learning_rate": 1.9865625979077557e-05, + "loss": 0.5819, + "step": 3901 + }, + { + "epoch": 0.1071389346512905, + "grad_norm": 0.38143277168273926, + "learning_rate": 1.9865555405523985e-05, + "loss": 0.5511, + "step": 3902 + }, + { + "epoch": 0.107166392092257, + "grad_norm": 0.3618567883968353, + "learning_rate": 1.9865484813568008e-05, + "loss": 0.5384, + "step": 3903 + }, + { + "epoch": 0.1071938495332235, + "grad_norm": 0.3953111469745636, + "learning_rate": 1.9865414203209753e-05, + "loss": 0.5931, + "step": 3904 + }, + { + "epoch": 0.10722130697419001, + "grad_norm": 0.37477439641952515, + "learning_rate": 1.986534357444935e-05, + "loss": 0.5251, + "step": 3905 + }, + { + "epoch": 0.1072487644151565, + "grad_norm": 0.36615970730781555, + "learning_rate": 1.986527292728693e-05, + "loss": 0.5913, + "step": 3906 + }, + { + "epoch": 0.10727622185612301, + "grad_norm": 0.37437349557876587, + "learning_rate": 1.9865202261722633e-05, + "loss": 0.5727, + "step": 3907 + }, + { + "epoch": 0.1073036792970895, + "grad_norm": 0.3684615194797516, + "learning_rate": 1.986513157775658e-05, + "loss": 0.5531, + "step": 3908 + }, + { + "epoch": 0.10733113673805601, + "grad_norm": 0.3959915041923523, + "learning_rate": 1.9865060875388915e-05, + "loss": 0.473, + "step": 3909 + }, + { + "epoch": 0.10735859417902252, + "grad_norm": 0.38141483068466187, + "learning_rate": 1.9864990154619755e-05, + "loss": 0.5911, + "step": 3910 + }, + { + "epoch": 0.10738605161998901, + "grad_norm": 0.35106831789016724, + "learning_rate": 1.9864919415449243e-05, + "loss": 0.5767, + "step": 3911 + }, + { + "epoch": 0.10741350906095552, + "grad_norm": 0.374546617269516, + "learning_rate": 1.9864848657877508e-05, + "loss": 0.5956, + "step": 3912 + }, + { + "epoch": 0.10744096650192202, + "grad_norm": 0.3573896884918213, + "learning_rate": 1.986477788190468e-05, + "loss": 0.4648, + "step": 3913 + }, + { + "epoch": 0.10746842394288852, + "grad_norm": 0.4275065064430237, + "learning_rate": 1.9864707087530893e-05, + "loss": 0.5626, + "step": 3914 + }, + { + "epoch": 0.10749588138385502, + "grad_norm": 0.34382104873657227, + "learning_rate": 1.9864636274756283e-05, + "loss": 0.5306, + "step": 3915 + }, + { + "epoch": 0.10752333882482153, + "grad_norm": 0.41410595178604126, + "learning_rate": 1.9864565443580974e-05, + "loss": 0.5566, + "step": 3916 + }, + { + "epoch": 0.10755079626578803, + "grad_norm": 0.3861497938632965, + "learning_rate": 1.9864494594005107e-05, + "loss": 0.4913, + "step": 3917 + }, + { + "epoch": 0.10757825370675453, + "grad_norm": 0.39049485325813293, + "learning_rate": 1.9864423726028804e-05, + "loss": 0.6196, + "step": 3918 + }, + { + "epoch": 0.10760571114772104, + "grad_norm": 0.3583229184150696, + "learning_rate": 1.98643528396522e-05, + "loss": 0.5822, + "step": 3919 + }, + { + "epoch": 0.10763316858868753, + "grad_norm": 0.36045151948928833, + "learning_rate": 1.9864281934875434e-05, + "loss": 0.5237, + "step": 3920 + }, + { + "epoch": 0.10766062602965404, + "grad_norm": 0.3459170162677765, + "learning_rate": 1.9864211011698635e-05, + "loss": 0.5419, + "step": 3921 + }, + { + "epoch": 0.10768808347062053, + "grad_norm": 0.42030325531959534, + "learning_rate": 1.9864140070121932e-05, + "loss": 0.5805, + "step": 3922 + }, + { + "epoch": 0.10771554091158704, + "grad_norm": 0.40273481607437134, + "learning_rate": 1.986406911014546e-05, + "loss": 0.5432, + "step": 3923 + }, + { + "epoch": 0.10774299835255355, + "grad_norm": 0.39949485659599304, + "learning_rate": 1.9863998131769354e-05, + "loss": 0.5173, + "step": 3924 + }, + { + "epoch": 0.10777045579352004, + "grad_norm": 0.4521189033985138, + "learning_rate": 1.986392713499374e-05, + "loss": 0.5258, + "step": 3925 + }, + { + "epoch": 0.10779791323448655, + "grad_norm": 0.40132278203964233, + "learning_rate": 1.986385611981875e-05, + "loss": 0.5191, + "step": 3926 + }, + { + "epoch": 0.10782537067545304, + "grad_norm": 0.3166329860687256, + "learning_rate": 1.9863785086244525e-05, + "loss": 0.491, + "step": 3927 + }, + { + "epoch": 0.10785282811641955, + "grad_norm": 0.3947601914405823, + "learning_rate": 1.986371403427119e-05, + "loss": 0.6157, + "step": 3928 + }, + { + "epoch": 0.10788028555738605, + "grad_norm": 0.38338449597358704, + "learning_rate": 1.9863642963898885e-05, + "loss": 0.5933, + "step": 3929 + }, + { + "epoch": 0.10790774299835255, + "grad_norm": 0.4174915552139282, + "learning_rate": 1.9863571875127734e-05, + "loss": 0.5091, + "step": 3930 + }, + { + "epoch": 0.10793520043931906, + "grad_norm": 0.39134132862091064, + "learning_rate": 1.9863500767957872e-05, + "loss": 0.543, + "step": 3931 + }, + { + "epoch": 0.10796265788028556, + "grad_norm": 0.4175630509853363, + "learning_rate": 1.9863429642389434e-05, + "loss": 0.5431, + "step": 3932 + }, + { + "epoch": 0.10799011532125206, + "grad_norm": 0.3955199718475342, + "learning_rate": 1.9863358498422554e-05, + "loss": 0.5556, + "step": 3933 + }, + { + "epoch": 0.10801757276221856, + "grad_norm": 0.5020374655723572, + "learning_rate": 1.9863287336057362e-05, + "loss": 0.6199, + "step": 3934 + }, + { + "epoch": 0.10804503020318507, + "grad_norm": 0.4130048453807831, + "learning_rate": 1.986321615529399e-05, + "loss": 0.6334, + "step": 3935 + }, + { + "epoch": 0.10807248764415156, + "grad_norm": 0.34505125880241394, + "learning_rate": 1.9863144956132573e-05, + "loss": 0.4968, + "step": 3936 + }, + { + "epoch": 0.10809994508511807, + "grad_norm": 0.8618636131286621, + "learning_rate": 1.986307373857324e-05, + "loss": 0.6281, + "step": 3937 + }, + { + "epoch": 0.10812740252608456, + "grad_norm": 0.388460636138916, + "learning_rate": 1.986300250261613e-05, + "loss": 0.5592, + "step": 3938 + }, + { + "epoch": 0.10815485996705107, + "grad_norm": 0.40597933530807495, + "learning_rate": 1.986293124826137e-05, + "loss": 0.5684, + "step": 3939 + }, + { + "epoch": 0.10818231740801758, + "grad_norm": 0.36292046308517456, + "learning_rate": 1.9862859975509096e-05, + "loss": 0.5962, + "step": 3940 + }, + { + "epoch": 0.10820977484898407, + "grad_norm": 0.45751529932022095, + "learning_rate": 1.986278868435944e-05, + "loss": 0.5192, + "step": 3941 + }, + { + "epoch": 0.10823723228995058, + "grad_norm": 0.35921022295951843, + "learning_rate": 1.9862717374812535e-05, + "loss": 0.5891, + "step": 3942 + }, + { + "epoch": 0.10826468973091707, + "grad_norm": 0.31273770332336426, + "learning_rate": 1.9862646046868516e-05, + "loss": 0.4805, + "step": 3943 + }, + { + "epoch": 0.10829214717188358, + "grad_norm": 0.4266352355480194, + "learning_rate": 1.9862574700527514e-05, + "loss": 0.5353, + "step": 3944 + }, + { + "epoch": 0.10831960461285008, + "grad_norm": 0.3353807032108307, + "learning_rate": 1.986250333578966e-05, + "loss": 0.569, + "step": 3945 + }, + { + "epoch": 0.10834706205381658, + "grad_norm": 0.3556755781173706, + "learning_rate": 1.986243195265509e-05, + "loss": 0.5957, + "step": 3946 + }, + { + "epoch": 0.10837451949478309, + "grad_norm": 0.37778326869010925, + "learning_rate": 1.9862360551123937e-05, + "loss": 0.5846, + "step": 3947 + }, + { + "epoch": 0.10840197693574959, + "grad_norm": 0.390471875667572, + "learning_rate": 1.9862289131196335e-05, + "loss": 0.5808, + "step": 3948 + }, + { + "epoch": 0.1084294343767161, + "grad_norm": 0.3672283887863159, + "learning_rate": 1.9862217692872416e-05, + "loss": 0.584, + "step": 3949 + }, + { + "epoch": 0.10845689181768259, + "grad_norm": 0.35633155703544617, + "learning_rate": 1.9862146236152315e-05, + "loss": 0.4549, + "step": 3950 + }, + { + "epoch": 0.1084843492586491, + "grad_norm": 0.3635578453540802, + "learning_rate": 1.9862074761036162e-05, + "loss": 0.5264, + "step": 3951 + }, + { + "epoch": 0.10851180669961559, + "grad_norm": 0.32456451654434204, + "learning_rate": 1.986200326752409e-05, + "loss": 0.6064, + "step": 3952 + }, + { + "epoch": 0.1085392641405821, + "grad_norm": 0.39114803075790405, + "learning_rate": 1.9861931755616237e-05, + "loss": 0.5058, + "step": 3953 + }, + { + "epoch": 0.1085667215815486, + "grad_norm": 0.41415759921073914, + "learning_rate": 1.9861860225312733e-05, + "loss": 0.6826, + "step": 3954 + }, + { + "epoch": 0.1085941790225151, + "grad_norm": 0.4190730154514313, + "learning_rate": 1.986178867661371e-05, + "loss": 0.5927, + "step": 3955 + }, + { + "epoch": 0.10862163646348161, + "grad_norm": 0.41117870807647705, + "learning_rate": 1.9861717109519307e-05, + "loss": 0.5698, + "step": 3956 + }, + { + "epoch": 0.1086490939044481, + "grad_norm": 0.36093124747276306, + "learning_rate": 1.9861645524029655e-05, + "loss": 0.5381, + "step": 3957 + }, + { + "epoch": 0.10867655134541461, + "grad_norm": 0.38000255823135376, + "learning_rate": 1.986157392014488e-05, + "loss": 0.5666, + "step": 3958 + }, + { + "epoch": 0.1087040087863811, + "grad_norm": 0.3537401258945465, + "learning_rate": 1.9861502297865128e-05, + "loss": 0.5495, + "step": 3959 + }, + { + "epoch": 0.10873146622734761, + "grad_norm": 0.37150415778160095, + "learning_rate": 1.9861430657190524e-05, + "loss": 0.5714, + "step": 3960 + }, + { + "epoch": 0.10875892366831412, + "grad_norm": 0.3561224937438965, + "learning_rate": 1.9861358998121207e-05, + "loss": 0.5442, + "step": 3961 + }, + { + "epoch": 0.10878638110928061, + "grad_norm": 0.3501085937023163, + "learning_rate": 1.986128732065731e-05, + "loss": 0.5416, + "step": 3962 + }, + { + "epoch": 0.10881383855024712, + "grad_norm": 0.3778669834136963, + "learning_rate": 1.9861215624798956e-05, + "loss": 0.5821, + "step": 3963 + }, + { + "epoch": 0.10884129599121362, + "grad_norm": 0.32850706577301025, + "learning_rate": 1.9861143910546293e-05, + "loss": 0.576, + "step": 3964 + }, + { + "epoch": 0.10886875343218012, + "grad_norm": 0.37771663069725037, + "learning_rate": 1.986107217789945e-05, + "loss": 0.5123, + "step": 3965 + }, + { + "epoch": 0.10889621087314662, + "grad_norm": 0.372470885515213, + "learning_rate": 1.986100042685856e-05, + "loss": 0.4771, + "step": 3966 + }, + { + "epoch": 0.10892366831411313, + "grad_norm": 0.3709075450897217, + "learning_rate": 1.9860928657423756e-05, + "loss": 0.4778, + "step": 3967 + }, + { + "epoch": 0.10895112575507962, + "grad_norm": 0.340224951505661, + "learning_rate": 1.9860856869595172e-05, + "loss": 0.4882, + "step": 3968 + }, + { + "epoch": 0.10897858319604613, + "grad_norm": 0.3827364444732666, + "learning_rate": 1.9860785063372945e-05, + "loss": 0.5938, + "step": 3969 + }, + { + "epoch": 0.10900604063701264, + "grad_norm": 0.39398160576820374, + "learning_rate": 1.9860713238757205e-05, + "loss": 0.5865, + "step": 3970 + }, + { + "epoch": 0.10903349807797913, + "grad_norm": 0.34342360496520996, + "learning_rate": 1.986064139574809e-05, + "loss": 0.5018, + "step": 3971 + }, + { + "epoch": 0.10906095551894564, + "grad_norm": 0.3409058153629303, + "learning_rate": 1.986056953434573e-05, + "loss": 0.5361, + "step": 3972 + }, + { + "epoch": 0.10908841295991213, + "grad_norm": 0.36984938383102417, + "learning_rate": 1.986049765455026e-05, + "loss": 0.5288, + "step": 3973 + }, + { + "epoch": 0.10911587040087864, + "grad_norm": 0.3538043200969696, + "learning_rate": 1.9860425756361813e-05, + "loss": 0.5566, + "step": 3974 + }, + { + "epoch": 0.10914332784184513, + "grad_norm": 0.34838637709617615, + "learning_rate": 1.9860353839780528e-05, + "loss": 0.596, + "step": 3975 + }, + { + "epoch": 0.10917078528281164, + "grad_norm": 0.37920719385147095, + "learning_rate": 1.9860281904806535e-05, + "loss": 0.4391, + "step": 3976 + }, + { + "epoch": 0.10919824272377815, + "grad_norm": 0.5259626507759094, + "learning_rate": 1.986020995143997e-05, + "loss": 0.5645, + "step": 3977 + }, + { + "epoch": 0.10922570016474464, + "grad_norm": 0.3524862229824066, + "learning_rate": 1.9860137979680964e-05, + "loss": 0.5079, + "step": 3978 + }, + { + "epoch": 0.10925315760571115, + "grad_norm": 0.3903754651546478, + "learning_rate": 1.9860065989529656e-05, + "loss": 0.5793, + "step": 3979 + }, + { + "epoch": 0.10928061504667765, + "grad_norm": 0.368459552526474, + "learning_rate": 1.9859993980986177e-05, + "loss": 0.5514, + "step": 3980 + }, + { + "epoch": 0.10930807248764415, + "grad_norm": 0.4309787452220917, + "learning_rate": 1.9859921954050664e-05, + "loss": 0.6118, + "step": 3981 + }, + { + "epoch": 0.10933552992861065, + "grad_norm": 0.35527539253234863, + "learning_rate": 1.985984990872325e-05, + "loss": 0.5286, + "step": 3982 + }, + { + "epoch": 0.10936298736957716, + "grad_norm": 0.363607794046402, + "learning_rate": 1.9859777845004066e-05, + "loss": 0.5984, + "step": 3983 + }, + { + "epoch": 0.10939044481054366, + "grad_norm": 0.34513360261917114, + "learning_rate": 1.985970576289325e-05, + "loss": 0.499, + "step": 3984 + }, + { + "epoch": 0.10941790225151016, + "grad_norm": 0.3455013930797577, + "learning_rate": 1.9859633662390938e-05, + "loss": 0.5398, + "step": 3985 + }, + { + "epoch": 0.10944535969247667, + "grad_norm": 0.37129929661750793, + "learning_rate": 1.985956154349726e-05, + "loss": 0.6246, + "step": 3986 + }, + { + "epoch": 0.10947281713344316, + "grad_norm": 0.3403526544570923, + "learning_rate": 1.9859489406212355e-05, + "loss": 0.5358, + "step": 3987 + }, + { + "epoch": 0.10950027457440967, + "grad_norm": 0.3818034827709198, + "learning_rate": 1.9859417250536355e-05, + "loss": 0.5606, + "step": 3988 + }, + { + "epoch": 0.10952773201537616, + "grad_norm": 0.3211546838283539, + "learning_rate": 1.9859345076469394e-05, + "loss": 0.5412, + "step": 3989 + }, + { + "epoch": 0.10955518945634267, + "grad_norm": 0.3827301561832428, + "learning_rate": 1.985927288401161e-05, + "loss": 0.582, + "step": 3990 + }, + { + "epoch": 0.10958264689730918, + "grad_norm": 0.3984532952308655, + "learning_rate": 1.9859200673163132e-05, + "loss": 0.5957, + "step": 3991 + }, + { + "epoch": 0.10961010433827567, + "grad_norm": 0.3133056163787842, + "learning_rate": 1.9859128443924097e-05, + "loss": 0.5198, + "step": 3992 + }, + { + "epoch": 0.10963756177924218, + "grad_norm": 0.40924543142318726, + "learning_rate": 1.9859056196294645e-05, + "loss": 0.5929, + "step": 3993 + }, + { + "epoch": 0.10966501922020867, + "grad_norm": 0.5046197175979614, + "learning_rate": 1.9858983930274908e-05, + "loss": 0.5359, + "step": 3994 + }, + { + "epoch": 0.10969247666117518, + "grad_norm": 0.38889026641845703, + "learning_rate": 1.9858911645865015e-05, + "loss": 0.4918, + "step": 3995 + }, + { + "epoch": 0.10971993410214168, + "grad_norm": 0.3846395015716553, + "learning_rate": 1.9858839343065105e-05, + "loss": 0.6252, + "step": 3996 + }, + { + "epoch": 0.10974739154310818, + "grad_norm": 0.3960987627506256, + "learning_rate": 1.9858767021875317e-05, + "loss": 0.4861, + "step": 3997 + }, + { + "epoch": 0.10977484898407469, + "grad_norm": 0.37865686416625977, + "learning_rate": 1.985869468229578e-05, + "loss": 0.5672, + "step": 3998 + }, + { + "epoch": 0.10980230642504119, + "grad_norm": 0.3352625370025635, + "learning_rate": 1.985862232432663e-05, + "loss": 0.5326, + "step": 3999 + }, + { + "epoch": 0.10982976386600769, + "grad_norm": 0.3997047245502472, + "learning_rate": 1.9858549947968003e-05, + "loss": 0.6091, + "step": 4000 + }, + { + "epoch": 0.10985722130697419, + "grad_norm": 0.4317460358142853, + "learning_rate": 1.9858477553220034e-05, + "loss": 0.6146, + "step": 4001 + }, + { + "epoch": 0.1098846787479407, + "grad_norm": 0.3737805485725403, + "learning_rate": 1.9858405140082858e-05, + "loss": 0.6374, + "step": 4002 + }, + { + "epoch": 0.10991213618890719, + "grad_norm": 0.43477872014045715, + "learning_rate": 1.985833270855661e-05, + "loss": 0.6008, + "step": 4003 + }, + { + "epoch": 0.1099395936298737, + "grad_norm": 0.38140445947647095, + "learning_rate": 1.9858260258641426e-05, + "loss": 0.585, + "step": 4004 + }, + { + "epoch": 0.10996705107084019, + "grad_norm": 0.47957196831703186, + "learning_rate": 1.9858187790337437e-05, + "loss": 0.656, + "step": 4005 + }, + { + "epoch": 0.1099945085118067, + "grad_norm": 0.37022799253463745, + "learning_rate": 1.9858115303644784e-05, + "loss": 0.4987, + "step": 4006 + }, + { + "epoch": 0.1100219659527732, + "grad_norm": 0.36663544178009033, + "learning_rate": 1.9858042798563598e-05, + "loss": 0.5534, + "step": 4007 + }, + { + "epoch": 0.1100494233937397, + "grad_norm": 0.3790111541748047, + "learning_rate": 1.9857970275094017e-05, + "loss": 0.5184, + "step": 4008 + }, + { + "epoch": 0.11007688083470621, + "grad_norm": 0.36379122734069824, + "learning_rate": 1.985789773323618e-05, + "loss": 0.6046, + "step": 4009 + }, + { + "epoch": 0.1101043382756727, + "grad_norm": 0.43821364641189575, + "learning_rate": 1.985782517299021e-05, + "loss": 0.5057, + "step": 4010 + }, + { + "epoch": 0.11013179571663921, + "grad_norm": 0.3486274480819702, + "learning_rate": 1.9857752594356253e-05, + "loss": 0.4844, + "step": 4011 + }, + { + "epoch": 0.1101592531576057, + "grad_norm": 0.32822877168655396, + "learning_rate": 1.985767999733444e-05, + "loss": 0.5323, + "step": 4012 + }, + { + "epoch": 0.11018671059857221, + "grad_norm": 0.350456178188324, + "learning_rate": 1.985760738192491e-05, + "loss": 0.4883, + "step": 4013 + }, + { + "epoch": 0.11021416803953872, + "grad_norm": 0.3455464541912079, + "learning_rate": 1.9857534748127794e-05, + "loss": 0.5194, + "step": 4014 + }, + { + "epoch": 0.11024162548050521, + "grad_norm": 0.38779500126838684, + "learning_rate": 1.985746209594323e-05, + "loss": 0.5258, + "step": 4015 + }, + { + "epoch": 0.11026908292147172, + "grad_norm": 0.3662167489528656, + "learning_rate": 1.985738942537135e-05, + "loss": 0.552, + "step": 4016 + }, + { + "epoch": 0.11029654036243822, + "grad_norm": 0.36652469635009766, + "learning_rate": 1.98573167364123e-05, + "loss": 0.5093, + "step": 4017 + }, + { + "epoch": 0.11032399780340472, + "grad_norm": 0.41379132866859436, + "learning_rate": 1.98572440290662e-05, + "loss": 0.5422, + "step": 4018 + }, + { + "epoch": 0.11035145524437122, + "grad_norm": 0.3547268807888031, + "learning_rate": 1.98571713033332e-05, + "loss": 0.5235, + "step": 4019 + }, + { + "epoch": 0.11037891268533773, + "grad_norm": 0.3779987096786499, + "learning_rate": 1.9857098559213426e-05, + "loss": 0.5709, + "step": 4020 + }, + { + "epoch": 0.11040637012630423, + "grad_norm": 0.4075092673301697, + "learning_rate": 1.9857025796707018e-05, + "loss": 0.5713, + "step": 4021 + }, + { + "epoch": 0.11043382756727073, + "grad_norm": 0.44708681106567383, + "learning_rate": 1.9856953015814113e-05, + "loss": 0.5571, + "step": 4022 + }, + { + "epoch": 0.11046128500823724, + "grad_norm": 0.3658026158809662, + "learning_rate": 1.985688021653484e-05, + "loss": 0.588, + "step": 4023 + }, + { + "epoch": 0.11048874244920373, + "grad_norm": 0.37584641575813293, + "learning_rate": 1.9856807398869345e-05, + "loss": 0.5013, + "step": 4024 + }, + { + "epoch": 0.11051619989017024, + "grad_norm": 0.38247647881507874, + "learning_rate": 1.9856734562817756e-05, + "loss": 0.5488, + "step": 4025 + }, + { + "epoch": 0.11054365733113673, + "grad_norm": 0.36218592524528503, + "learning_rate": 1.985666170838021e-05, + "loss": 0.527, + "step": 4026 + }, + { + "epoch": 0.11057111477210324, + "grad_norm": 0.3936878740787506, + "learning_rate": 1.9856588835556843e-05, + "loss": 0.5812, + "step": 4027 + }, + { + "epoch": 0.11059857221306975, + "grad_norm": 0.3802419602870941, + "learning_rate": 1.9856515944347797e-05, + "loss": 0.4793, + "step": 4028 + }, + { + "epoch": 0.11062602965403624, + "grad_norm": 0.3565483093261719, + "learning_rate": 1.98564430347532e-05, + "loss": 0.4714, + "step": 4029 + }, + { + "epoch": 0.11065348709500275, + "grad_norm": 0.339617520570755, + "learning_rate": 1.985637010677319e-05, + "loss": 0.4697, + "step": 4030 + }, + { + "epoch": 0.11068094453596924, + "grad_norm": 0.33839449286460876, + "learning_rate": 1.98562971604079e-05, + "loss": 0.6306, + "step": 4031 + }, + { + "epoch": 0.11070840197693575, + "grad_norm": 0.3494991958141327, + "learning_rate": 1.9856224195657476e-05, + "loss": 0.5107, + "step": 4032 + }, + { + "epoch": 0.11073585941790225, + "grad_norm": 0.33218225836753845, + "learning_rate": 1.9856151212522048e-05, + "loss": 0.5561, + "step": 4033 + }, + { + "epoch": 0.11076331685886875, + "grad_norm": 0.35247015953063965, + "learning_rate": 1.985607821100175e-05, + "loss": 0.5988, + "step": 4034 + }, + { + "epoch": 0.11079077429983525, + "grad_norm": 0.3589276671409607, + "learning_rate": 1.9856005191096723e-05, + "loss": 0.5791, + "step": 4035 + }, + { + "epoch": 0.11081823174080176, + "grad_norm": 0.378555566072464, + "learning_rate": 1.9855932152807097e-05, + "loss": 0.5456, + "step": 4036 + }, + { + "epoch": 0.11084568918176826, + "grad_norm": 0.38541316986083984, + "learning_rate": 1.9855859096133014e-05, + "loss": 0.558, + "step": 4037 + }, + { + "epoch": 0.11087314662273476, + "grad_norm": 0.352606862783432, + "learning_rate": 1.985578602107461e-05, + "loss": 0.4927, + "step": 4038 + }, + { + "epoch": 0.11090060406370127, + "grad_norm": 0.35152381658554077, + "learning_rate": 1.9855712927632015e-05, + "loss": 0.4645, + "step": 4039 + }, + { + "epoch": 0.11092806150466776, + "grad_norm": 0.3829084038734436, + "learning_rate": 1.9855639815805374e-05, + "loss": 0.5153, + "step": 4040 + }, + { + "epoch": 0.11095551894563427, + "grad_norm": 0.38063299655914307, + "learning_rate": 1.9855566685594817e-05, + "loss": 0.4724, + "step": 4041 + }, + { + "epoch": 0.11098297638660076, + "grad_norm": 0.35385727882385254, + "learning_rate": 1.985549353700048e-05, + "loss": 0.5449, + "step": 4042 + }, + { + "epoch": 0.11101043382756727, + "grad_norm": 0.407499760389328, + "learning_rate": 1.985542037002251e-05, + "loss": 0.5293, + "step": 4043 + }, + { + "epoch": 0.11103789126853378, + "grad_norm": 0.3590410649776459, + "learning_rate": 1.9855347184661027e-05, + "loss": 0.5141, + "step": 4044 + }, + { + "epoch": 0.11106534870950027, + "grad_norm": 0.5200411081314087, + "learning_rate": 1.9855273980916183e-05, + "loss": 0.5494, + "step": 4045 + }, + { + "epoch": 0.11109280615046678, + "grad_norm": 0.3730369210243225, + "learning_rate": 1.9855200758788102e-05, + "loss": 0.5104, + "step": 4046 + }, + { + "epoch": 0.11112026359143327, + "grad_norm": 0.35227975249290466, + "learning_rate": 1.9855127518276932e-05, + "loss": 0.5256, + "step": 4047 + }, + { + "epoch": 0.11114772103239978, + "grad_norm": 0.37153616547584534, + "learning_rate": 1.9855054259382796e-05, + "loss": 0.546, + "step": 4048 + }, + { + "epoch": 0.11117517847336628, + "grad_norm": 0.3858408033847809, + "learning_rate": 1.9854980982105846e-05, + "loss": 0.5576, + "step": 4049 + }, + { + "epoch": 0.11120263591433278, + "grad_norm": 0.3595297336578369, + "learning_rate": 1.9854907686446205e-05, + "loss": 0.5823, + "step": 4050 + }, + { + "epoch": 0.11123009335529929, + "grad_norm": 0.34671834111213684, + "learning_rate": 1.985483437240402e-05, + "loss": 0.5383, + "step": 4051 + }, + { + "epoch": 0.11125755079626579, + "grad_norm": 0.49240052700042725, + "learning_rate": 1.985476103997942e-05, + "loss": 0.5039, + "step": 4052 + }, + { + "epoch": 0.1112850082372323, + "grad_norm": 0.3700600564479828, + "learning_rate": 1.9854687689172548e-05, + "loss": 0.5451, + "step": 4053 + }, + { + "epoch": 0.11131246567819879, + "grad_norm": 0.3544233739376068, + "learning_rate": 1.985461431998354e-05, + "loss": 0.5979, + "step": 4054 + }, + { + "epoch": 0.1113399231191653, + "grad_norm": 0.37619009613990784, + "learning_rate": 1.985454093241253e-05, + "loss": 0.5601, + "step": 4055 + }, + { + "epoch": 0.11136738056013179, + "grad_norm": 0.34408509731292725, + "learning_rate": 1.9854467526459655e-05, + "loss": 0.4718, + "step": 4056 + }, + { + "epoch": 0.1113948380010983, + "grad_norm": 0.46332937479019165, + "learning_rate": 1.9854394102125053e-05, + "loss": 0.5365, + "step": 4057 + }, + { + "epoch": 0.1114222954420648, + "grad_norm": 0.3431167006492615, + "learning_rate": 1.985432065940886e-05, + "loss": 0.5104, + "step": 4058 + }, + { + "epoch": 0.1114497528830313, + "grad_norm": 0.3374008536338806, + "learning_rate": 1.9854247198311215e-05, + "loss": 0.4355, + "step": 4059 + }, + { + "epoch": 0.11147721032399781, + "grad_norm": 0.3567059636116028, + "learning_rate": 1.9854173718832254e-05, + "loss": 0.5743, + "step": 4060 + }, + { + "epoch": 0.1115046677649643, + "grad_norm": 0.3606155812740326, + "learning_rate": 1.9854100220972112e-05, + "loss": 0.5899, + "step": 4061 + }, + { + "epoch": 0.11153212520593081, + "grad_norm": 0.35930269956588745, + "learning_rate": 1.985402670473093e-05, + "loss": 0.5576, + "step": 4062 + }, + { + "epoch": 0.1115595826468973, + "grad_norm": 0.3509731888771057, + "learning_rate": 1.985395317010884e-05, + "loss": 0.5373, + "step": 4063 + }, + { + "epoch": 0.11158704008786381, + "grad_norm": 0.3881068229675293, + "learning_rate": 1.9853879617105988e-05, + "loss": 0.5766, + "step": 4064 + }, + { + "epoch": 0.11161449752883032, + "grad_norm": 0.38228127360343933, + "learning_rate": 1.98538060457225e-05, + "loss": 0.536, + "step": 4065 + }, + { + "epoch": 0.11164195496979681, + "grad_norm": 0.3625740706920624, + "learning_rate": 1.985373245595852e-05, + "loss": 0.5957, + "step": 4066 + }, + { + "epoch": 0.11166941241076332, + "grad_norm": 0.35884225368499756, + "learning_rate": 1.9853658847814187e-05, + "loss": 0.6206, + "step": 4067 + }, + { + "epoch": 0.11169686985172982, + "grad_norm": 0.3951311409473419, + "learning_rate": 1.9853585221289632e-05, + "loss": 0.519, + "step": 4068 + }, + { + "epoch": 0.11172432729269632, + "grad_norm": 0.35461950302124023, + "learning_rate": 1.9853511576384998e-05, + "loss": 0.5493, + "step": 4069 + }, + { + "epoch": 0.11175178473366282, + "grad_norm": 0.3540794253349304, + "learning_rate": 1.985343791310042e-05, + "loss": 0.4908, + "step": 4070 + }, + { + "epoch": 0.11177924217462933, + "grad_norm": 0.34181687235832214, + "learning_rate": 1.9853364231436032e-05, + "loss": 0.5955, + "step": 4071 + }, + { + "epoch": 0.11180669961559582, + "grad_norm": 0.36895066499710083, + "learning_rate": 1.985329053139198e-05, + "loss": 0.5873, + "step": 4072 + }, + { + "epoch": 0.11183415705656233, + "grad_norm": 0.4041939377784729, + "learning_rate": 1.985321681296839e-05, + "loss": 0.5872, + "step": 4073 + }, + { + "epoch": 0.11186161449752884, + "grad_norm": 0.3615841865539551, + "learning_rate": 1.985314307616541e-05, + "loss": 0.5616, + "step": 4074 + }, + { + "epoch": 0.11188907193849533, + "grad_norm": 0.3597436845302582, + "learning_rate": 1.9853069320983173e-05, + "loss": 0.5134, + "step": 4075 + }, + { + "epoch": 0.11191652937946184, + "grad_norm": 0.3820773661136627, + "learning_rate": 1.985299554742182e-05, + "loss": 0.5648, + "step": 4076 + }, + { + "epoch": 0.11194398682042833, + "grad_norm": 0.48970553278923035, + "learning_rate": 1.985292175548148e-05, + "loss": 0.5709, + "step": 4077 + }, + { + "epoch": 0.11197144426139484, + "grad_norm": 0.3757760524749756, + "learning_rate": 1.98528479451623e-05, + "loss": 0.5381, + "step": 4078 + }, + { + "epoch": 0.11199890170236133, + "grad_norm": 0.35946306586265564, + "learning_rate": 1.9852774116464414e-05, + "loss": 0.5163, + "step": 4079 + }, + { + "epoch": 0.11202635914332784, + "grad_norm": 0.3923642039299011, + "learning_rate": 1.9852700269387957e-05, + "loss": 0.5791, + "step": 4080 + }, + { + "epoch": 0.11205381658429435, + "grad_norm": 0.314471572637558, + "learning_rate": 1.985262640393307e-05, + "loss": 0.4531, + "step": 4081 + }, + { + "epoch": 0.11208127402526084, + "grad_norm": 0.37800249457359314, + "learning_rate": 1.9852552520099892e-05, + "loss": 0.5067, + "step": 4082 + }, + { + "epoch": 0.11210873146622735, + "grad_norm": 0.3128666579723358, + "learning_rate": 1.9852478617888556e-05, + "loss": 0.3852, + "step": 4083 + }, + { + "epoch": 0.11213618890719385, + "grad_norm": 0.41324499249458313, + "learning_rate": 1.9852404697299207e-05, + "loss": 0.618, + "step": 4084 + }, + { + "epoch": 0.11216364634816035, + "grad_norm": 0.40907084941864014, + "learning_rate": 1.9852330758331977e-05, + "loss": 0.5276, + "step": 4085 + }, + { + "epoch": 0.11219110378912685, + "grad_norm": 0.38857921957969666, + "learning_rate": 1.9852256800987006e-05, + "loss": 0.5538, + "step": 4086 + }, + { + "epoch": 0.11221856123009336, + "grad_norm": 0.34586748480796814, + "learning_rate": 1.985218282526443e-05, + "loss": 0.5477, + "step": 4087 + }, + { + "epoch": 0.11224601867105986, + "grad_norm": 0.3319321870803833, + "learning_rate": 1.9852108831164386e-05, + "loss": 0.4691, + "step": 4088 + }, + { + "epoch": 0.11227347611202636, + "grad_norm": 0.40570083260536194, + "learning_rate": 1.985203481868702e-05, + "loss": 0.5753, + "step": 4089 + }, + { + "epoch": 0.11230093355299287, + "grad_norm": 0.33897367119789124, + "learning_rate": 1.9851960787832462e-05, + "loss": 0.4866, + "step": 4090 + }, + { + "epoch": 0.11232839099395936, + "grad_norm": 0.3339914381504059, + "learning_rate": 1.9851886738600855e-05, + "loss": 0.5279, + "step": 4091 + }, + { + "epoch": 0.11235584843492587, + "grad_norm": 0.43470486998558044, + "learning_rate": 1.9851812670992333e-05, + "loss": 0.6067, + "step": 4092 + }, + { + "epoch": 0.11238330587589236, + "grad_norm": 0.4028666913509369, + "learning_rate": 1.9851738585007038e-05, + "loss": 0.507, + "step": 4093 + }, + { + "epoch": 0.11241076331685887, + "grad_norm": 0.36804062128067017, + "learning_rate": 1.9851664480645102e-05, + "loss": 0.5163, + "step": 4094 + }, + { + "epoch": 0.11243822075782538, + "grad_norm": 0.36617839336395264, + "learning_rate": 1.9851590357906668e-05, + "loss": 0.6337, + "step": 4095 + }, + { + "epoch": 0.11246567819879187, + "grad_norm": 0.3709811270236969, + "learning_rate": 1.9851516216791878e-05, + "loss": 0.5086, + "step": 4096 + }, + { + "epoch": 0.11249313563975838, + "grad_norm": 0.39433562755584717, + "learning_rate": 1.9851442057300863e-05, + "loss": 0.6625, + "step": 4097 + }, + { + "epoch": 0.11252059308072487, + "grad_norm": 0.3529183268547058, + "learning_rate": 1.9851367879433764e-05, + "loss": 0.4744, + "step": 4098 + }, + { + "epoch": 0.11254805052169138, + "grad_norm": 0.37831541895866394, + "learning_rate": 1.985129368319072e-05, + "loss": 0.5098, + "step": 4099 + }, + { + "epoch": 0.11257550796265788, + "grad_norm": 0.3643631637096405, + "learning_rate": 1.985121946857187e-05, + "loss": 0.5188, + "step": 4100 + }, + { + "epoch": 0.11260296540362438, + "grad_norm": 0.36182543635368347, + "learning_rate": 1.9851145235577352e-05, + "loss": 0.509, + "step": 4101 + }, + { + "epoch": 0.11263042284459088, + "grad_norm": 0.37973377108573914, + "learning_rate": 1.9851070984207302e-05, + "loss": 0.5284, + "step": 4102 + }, + { + "epoch": 0.11265788028555739, + "grad_norm": 0.3289060890674591, + "learning_rate": 1.985099671446186e-05, + "loss": 0.449, + "step": 4103 + }, + { + "epoch": 0.1126853377265239, + "grad_norm": 0.40577611327171326, + "learning_rate": 1.985092242634117e-05, + "loss": 0.5417, + "step": 4104 + }, + { + "epoch": 0.11271279516749039, + "grad_norm": 0.3411047160625458, + "learning_rate": 1.985084811984536e-05, + "loss": 0.5529, + "step": 4105 + }, + { + "epoch": 0.1127402526084569, + "grad_norm": 0.31905806064605713, + "learning_rate": 1.985077379497458e-05, + "loss": 0.4489, + "step": 4106 + }, + { + "epoch": 0.11276771004942339, + "grad_norm": 0.38043197989463806, + "learning_rate": 1.985069945172896e-05, + "loss": 0.5901, + "step": 4107 + }, + { + "epoch": 0.1127951674903899, + "grad_norm": 0.3472435474395752, + "learning_rate": 1.985062509010864e-05, + "loss": 0.5745, + "step": 4108 + }, + { + "epoch": 0.11282262493135639, + "grad_norm": 0.4254503846168518, + "learning_rate": 1.985055071011376e-05, + "loss": 0.52, + "step": 4109 + }, + { + "epoch": 0.1128500823723229, + "grad_norm": 0.42407822608947754, + "learning_rate": 1.9850476311744462e-05, + "loss": 0.5107, + "step": 4110 + }, + { + "epoch": 0.11287753981328941, + "grad_norm": 0.37014007568359375, + "learning_rate": 1.985040189500088e-05, + "loss": 0.4598, + "step": 4111 + }, + { + "epoch": 0.1129049972542559, + "grad_norm": 0.3566095232963562, + "learning_rate": 1.9850327459883155e-05, + "loss": 0.5822, + "step": 4112 + }, + { + "epoch": 0.11293245469522241, + "grad_norm": 0.3826529383659363, + "learning_rate": 1.9850253006391426e-05, + "loss": 0.5052, + "step": 4113 + }, + { + "epoch": 0.1129599121361889, + "grad_norm": 0.33563855290412903, + "learning_rate": 1.985017853452583e-05, + "loss": 0.5663, + "step": 4114 + }, + { + "epoch": 0.11298736957715541, + "grad_norm": 0.4095176160335541, + "learning_rate": 1.9850104044286507e-05, + "loss": 0.551, + "step": 4115 + }, + { + "epoch": 0.1130148270181219, + "grad_norm": 0.3572610020637512, + "learning_rate": 1.9850029535673598e-05, + "loss": 0.5512, + "step": 4116 + }, + { + "epoch": 0.11304228445908841, + "grad_norm": 0.3405890166759491, + "learning_rate": 1.984995500868724e-05, + "loss": 0.5373, + "step": 4117 + }, + { + "epoch": 0.11306974190005492, + "grad_norm": 0.36768409609794617, + "learning_rate": 1.9849880463327567e-05, + "loss": 0.5107, + "step": 4118 + }, + { + "epoch": 0.11309719934102141, + "grad_norm": 0.4499264359474182, + "learning_rate": 1.984980589959473e-05, + "loss": 0.5383, + "step": 4119 + }, + { + "epoch": 0.11312465678198792, + "grad_norm": 0.36219334602355957, + "learning_rate": 1.984973131748886e-05, + "loss": 0.5753, + "step": 4120 + }, + { + "epoch": 0.11315211422295442, + "grad_norm": 0.5831260681152344, + "learning_rate": 1.9849656717010094e-05, + "loss": 0.5154, + "step": 4121 + }, + { + "epoch": 0.11317957166392092, + "grad_norm": 0.4036034345626831, + "learning_rate": 1.984958209815858e-05, + "loss": 0.4951, + "step": 4122 + }, + { + "epoch": 0.11320702910488742, + "grad_norm": 0.36296018958091736, + "learning_rate": 1.9849507460934445e-05, + "loss": 0.5371, + "step": 4123 + }, + { + "epoch": 0.11323448654585393, + "grad_norm": 0.3515297472476959, + "learning_rate": 1.984943280533784e-05, + "loss": 0.5899, + "step": 4124 + }, + { + "epoch": 0.11326194398682043, + "grad_norm": 0.3688106834888458, + "learning_rate": 1.9849358131368896e-05, + "loss": 0.5457, + "step": 4125 + }, + { + "epoch": 0.11328940142778693, + "grad_norm": 0.3548530042171478, + "learning_rate": 1.9849283439027758e-05, + "loss": 0.5141, + "step": 4126 + }, + { + "epoch": 0.11331685886875344, + "grad_norm": 0.4081372618675232, + "learning_rate": 1.9849208728314563e-05, + "loss": 0.5224, + "step": 4127 + }, + { + "epoch": 0.11334431630971993, + "grad_norm": 0.34947437047958374, + "learning_rate": 1.984913399922945e-05, + "loss": 0.5513, + "step": 4128 + }, + { + "epoch": 0.11337177375068644, + "grad_norm": 7.501532554626465, + "learning_rate": 1.984905925177256e-05, + "loss": 0.4914, + "step": 4129 + }, + { + "epoch": 0.11339923119165293, + "grad_norm": 0.4358418583869934, + "learning_rate": 1.9848984485944027e-05, + "loss": 0.59, + "step": 4130 + }, + { + "epoch": 0.11342668863261944, + "grad_norm": 0.4103964567184448, + "learning_rate": 1.9848909701744e-05, + "loss": 0.5567, + "step": 4131 + }, + { + "epoch": 0.11345414607358595, + "grad_norm": 0.4298754930496216, + "learning_rate": 1.9848834899172608e-05, + "loss": 0.4509, + "step": 4132 + }, + { + "epoch": 0.11348160351455244, + "grad_norm": 0.3420647978782654, + "learning_rate": 1.9848760078229997e-05, + "loss": 0.5147, + "step": 4133 + }, + { + "epoch": 0.11350906095551895, + "grad_norm": 0.3658965229988098, + "learning_rate": 1.9848685238916303e-05, + "loss": 0.5536, + "step": 4134 + }, + { + "epoch": 0.11353651839648544, + "grad_norm": 0.36154550313949585, + "learning_rate": 1.984861038123167e-05, + "loss": 0.6287, + "step": 4135 + }, + { + "epoch": 0.11356397583745195, + "grad_norm": 0.3354254364967346, + "learning_rate": 1.9848535505176234e-05, + "loss": 0.5009, + "step": 4136 + }, + { + "epoch": 0.11359143327841845, + "grad_norm": 0.325506329536438, + "learning_rate": 1.9848460610750137e-05, + "loss": 0.5108, + "step": 4137 + }, + { + "epoch": 0.11361889071938495, + "grad_norm": 0.364805668592453, + "learning_rate": 1.984838569795352e-05, + "loss": 0.5594, + "step": 4138 + }, + { + "epoch": 0.11364634816035145, + "grad_norm": 0.3829098343849182, + "learning_rate": 1.9848310766786518e-05, + "loss": 0.6202, + "step": 4139 + }, + { + "epoch": 0.11367380560131796, + "grad_norm": 0.3727422058582306, + "learning_rate": 1.9848235817249273e-05, + "loss": 0.5811, + "step": 4140 + }, + { + "epoch": 0.11370126304228446, + "grad_norm": 0.3694833815097809, + "learning_rate": 1.9848160849341927e-05, + "loss": 0.5648, + "step": 4141 + }, + { + "epoch": 0.11372872048325096, + "grad_norm": 0.3513180613517761, + "learning_rate": 1.9848085863064614e-05, + "loss": 0.5173, + "step": 4142 + }, + { + "epoch": 0.11375617792421747, + "grad_norm": 0.35888150334358215, + "learning_rate": 1.984801085841748e-05, + "loss": 0.5116, + "step": 4143 + }, + { + "epoch": 0.11378363536518396, + "grad_norm": 0.390251100063324, + "learning_rate": 1.9847935835400663e-05, + "loss": 0.5636, + "step": 4144 + }, + { + "epoch": 0.11381109280615047, + "grad_norm": 0.3496544659137726, + "learning_rate": 1.9847860794014303e-05, + "loss": 0.5625, + "step": 4145 + }, + { + "epoch": 0.11383855024711696, + "grad_norm": 0.3689925968647003, + "learning_rate": 1.984778573425854e-05, + "loss": 0.5296, + "step": 4146 + }, + { + "epoch": 0.11386600768808347, + "grad_norm": 0.3790142238140106, + "learning_rate": 1.984771065613351e-05, + "loss": 0.5849, + "step": 4147 + }, + { + "epoch": 0.11389346512904998, + "grad_norm": 0.3604047894477844, + "learning_rate": 1.984763555963936e-05, + "loss": 0.5147, + "step": 4148 + }, + { + "epoch": 0.11392092257001647, + "grad_norm": 0.5891048908233643, + "learning_rate": 1.9847560444776225e-05, + "loss": 0.5687, + "step": 4149 + }, + { + "epoch": 0.11394838001098298, + "grad_norm": 0.35904812812805176, + "learning_rate": 1.984748531154425e-05, + "loss": 0.5261, + "step": 4150 + }, + { + "epoch": 0.11397583745194947, + "grad_norm": 0.33957844972610474, + "learning_rate": 1.9847410159943568e-05, + "loss": 0.5035, + "step": 4151 + }, + { + "epoch": 0.11400329489291598, + "grad_norm": 0.3630381226539612, + "learning_rate": 1.9847334989974323e-05, + "loss": 0.6038, + "step": 4152 + }, + { + "epoch": 0.11403075233388248, + "grad_norm": 0.34112903475761414, + "learning_rate": 1.9847259801636657e-05, + "loss": 0.5199, + "step": 4153 + }, + { + "epoch": 0.11405820977484898, + "grad_norm": 0.34171876311302185, + "learning_rate": 1.984718459493071e-05, + "loss": 0.5574, + "step": 4154 + }, + { + "epoch": 0.11408566721581549, + "grad_norm": 0.29017317295074463, + "learning_rate": 1.9847109369856618e-05, + "loss": 0.3992, + "step": 4155 + }, + { + "epoch": 0.11411312465678199, + "grad_norm": 0.38415712118148804, + "learning_rate": 1.9847034126414525e-05, + "loss": 0.6872, + "step": 4156 + }, + { + "epoch": 0.1141405820977485, + "grad_norm": 0.334358811378479, + "learning_rate": 1.984695886460457e-05, + "loss": 0.5041, + "step": 4157 + }, + { + "epoch": 0.11416803953871499, + "grad_norm": 0.4600684642791748, + "learning_rate": 1.9846883584426897e-05, + "loss": 0.4959, + "step": 4158 + }, + { + "epoch": 0.1141954969796815, + "grad_norm": 0.3284221589565277, + "learning_rate": 1.984680828588164e-05, + "loss": 0.5551, + "step": 4159 + }, + { + "epoch": 0.11422295442064799, + "grad_norm": 0.37273934483528137, + "learning_rate": 1.9846732968968946e-05, + "loss": 0.6048, + "step": 4160 + }, + { + "epoch": 0.1142504118616145, + "grad_norm": 0.5212000608444214, + "learning_rate": 1.984665763368895e-05, + "loss": 0.5511, + "step": 4161 + }, + { + "epoch": 0.114277869302581, + "grad_norm": 0.36044690012931824, + "learning_rate": 1.9846582280041796e-05, + "loss": 0.5282, + "step": 4162 + }, + { + "epoch": 0.1143053267435475, + "grad_norm": 0.32257845997810364, + "learning_rate": 1.984650690802762e-05, + "loss": 0.482, + "step": 4163 + }, + { + "epoch": 0.11433278418451401, + "grad_norm": 0.34769800305366516, + "learning_rate": 1.9846431517646573e-05, + "loss": 0.4921, + "step": 4164 + }, + { + "epoch": 0.1143602416254805, + "grad_norm": 0.4396451711654663, + "learning_rate": 1.9846356108898784e-05, + "loss": 0.5081, + "step": 4165 + }, + { + "epoch": 0.11438769906644701, + "grad_norm": 1.6036192178726196, + "learning_rate": 1.9846280681784397e-05, + "loss": 0.5874, + "step": 4166 + }, + { + "epoch": 0.1144151565074135, + "grad_norm": 0.35703063011169434, + "learning_rate": 1.9846205236303554e-05, + "loss": 0.4805, + "step": 4167 + }, + { + "epoch": 0.11444261394838001, + "grad_norm": 0.4357331097126007, + "learning_rate": 1.9846129772456398e-05, + "loss": 0.535, + "step": 4168 + }, + { + "epoch": 0.1144700713893465, + "grad_norm": 0.35103943943977356, + "learning_rate": 1.9846054290243067e-05, + "loss": 0.5722, + "step": 4169 + }, + { + "epoch": 0.11449752883031301, + "grad_norm": 0.41969531774520874, + "learning_rate": 1.98459787896637e-05, + "loss": 0.5568, + "step": 4170 + }, + { + "epoch": 0.11452498627127952, + "grad_norm": 0.6014300584793091, + "learning_rate": 1.9845903270718443e-05, + "loss": 0.5552, + "step": 4171 + }, + { + "epoch": 0.11455244371224602, + "grad_norm": 0.5062885284423828, + "learning_rate": 1.984582773340743e-05, + "loss": 0.454, + "step": 4172 + }, + { + "epoch": 0.11457990115321252, + "grad_norm": 0.38537123799324036, + "learning_rate": 1.984575217773081e-05, + "loss": 0.5902, + "step": 4173 + }, + { + "epoch": 0.11460735859417902, + "grad_norm": 0.3320772647857666, + "learning_rate": 1.984567660368872e-05, + "loss": 0.5944, + "step": 4174 + }, + { + "epoch": 0.11463481603514553, + "grad_norm": 0.37333858013153076, + "learning_rate": 1.9845601011281298e-05, + "loss": 0.5906, + "step": 4175 + }, + { + "epoch": 0.11466227347611202, + "grad_norm": 0.35021984577178955, + "learning_rate": 1.984552540050869e-05, + "loss": 0.5863, + "step": 4176 + }, + { + "epoch": 0.11468973091707853, + "grad_norm": 0.3794737160205841, + "learning_rate": 1.9845449771371033e-05, + "loss": 0.6478, + "step": 4177 + }, + { + "epoch": 0.11471718835804504, + "grad_norm": 0.3535974323749542, + "learning_rate": 1.9845374123868472e-05, + "loss": 0.5139, + "step": 4178 + }, + { + "epoch": 0.11474464579901153, + "grad_norm": 0.3855864405632019, + "learning_rate": 1.9845298458001146e-05, + "loss": 0.5048, + "step": 4179 + }, + { + "epoch": 0.11477210323997804, + "grad_norm": 0.3546598255634308, + "learning_rate": 1.9845222773769196e-05, + "loss": 0.5746, + "step": 4180 + }, + { + "epoch": 0.11479956068094453, + "grad_norm": 0.3276958763599396, + "learning_rate": 1.984514707117276e-05, + "loss": 0.4756, + "step": 4181 + }, + { + "epoch": 0.11482701812191104, + "grad_norm": 0.34229776263237, + "learning_rate": 1.9845071350211984e-05, + "loss": 0.5029, + "step": 4182 + }, + { + "epoch": 0.11485447556287753, + "grad_norm": 0.3287321925163269, + "learning_rate": 1.984499561088701e-05, + "loss": 0.4635, + "step": 4183 + }, + { + "epoch": 0.11488193300384404, + "grad_norm": 0.3378652036190033, + "learning_rate": 1.9844919853197974e-05, + "loss": 0.5311, + "step": 4184 + }, + { + "epoch": 0.11490939044481055, + "grad_norm": 0.4110613763332367, + "learning_rate": 1.9844844077145024e-05, + "loss": 0.5638, + "step": 4185 + }, + { + "epoch": 0.11493684788577704, + "grad_norm": 0.47741952538490295, + "learning_rate": 1.9844768282728294e-05, + "loss": 0.5849, + "step": 4186 + }, + { + "epoch": 0.11496430532674355, + "grad_norm": 0.3623121678829193, + "learning_rate": 1.984469246994793e-05, + "loss": 0.5333, + "step": 4187 + }, + { + "epoch": 0.11499176276771005, + "grad_norm": 0.39770805835723877, + "learning_rate": 1.9844616638804073e-05, + "loss": 0.5667, + "step": 4188 + }, + { + "epoch": 0.11501922020867655, + "grad_norm": 0.3497883677482605, + "learning_rate": 1.9844540789296863e-05, + "loss": 0.4807, + "step": 4189 + }, + { + "epoch": 0.11504667764964305, + "grad_norm": 0.3554711937904358, + "learning_rate": 1.9844464921426443e-05, + "loss": 0.5624, + "step": 4190 + }, + { + "epoch": 0.11507413509060956, + "grad_norm": 0.3578208088874817, + "learning_rate": 1.9844389035192954e-05, + "loss": 0.5449, + "step": 4191 + }, + { + "epoch": 0.11510159253157606, + "grad_norm": 0.3598921597003937, + "learning_rate": 1.984431313059654e-05, + "loss": 0.5522, + "step": 4192 + }, + { + "epoch": 0.11512904997254256, + "grad_norm": 0.3346679210662842, + "learning_rate": 1.9844237207637337e-05, + "loss": 0.4762, + "step": 4193 + }, + { + "epoch": 0.11515650741350907, + "grad_norm": 0.46697360277175903, + "learning_rate": 1.984416126631549e-05, + "loss": 0.5563, + "step": 4194 + }, + { + "epoch": 0.11518396485447556, + "grad_norm": 0.3646450638771057, + "learning_rate": 1.9844085306631142e-05, + "loss": 0.5205, + "step": 4195 + }, + { + "epoch": 0.11521142229544207, + "grad_norm": 0.35714468359947205, + "learning_rate": 1.984400932858443e-05, + "loss": 0.5075, + "step": 4196 + }, + { + "epoch": 0.11523887973640856, + "grad_norm": 0.3419697880744934, + "learning_rate": 1.9843933332175502e-05, + "loss": 0.5096, + "step": 4197 + }, + { + "epoch": 0.11526633717737507, + "grad_norm": 0.35312512516975403, + "learning_rate": 1.9843857317404496e-05, + "loss": 0.4845, + "step": 4198 + }, + { + "epoch": 0.11529379461834158, + "grad_norm": 0.413107305765152, + "learning_rate": 1.984378128427155e-05, + "loss": 0.6427, + "step": 4199 + }, + { + "epoch": 0.11532125205930807, + "grad_norm": 0.36925172805786133, + "learning_rate": 1.9843705232776815e-05, + "loss": 0.5237, + "step": 4200 + }, + { + "epoch": 0.11534870950027458, + "grad_norm": 0.35858896374702454, + "learning_rate": 1.9843629162920428e-05, + "loss": 0.5457, + "step": 4201 + }, + { + "epoch": 0.11537616694124107, + "grad_norm": 0.681543231010437, + "learning_rate": 1.9843553074702528e-05, + "loss": 0.4684, + "step": 4202 + }, + { + "epoch": 0.11540362438220758, + "grad_norm": 0.424679696559906, + "learning_rate": 1.9843476968123265e-05, + "loss": 0.5888, + "step": 4203 + }, + { + "epoch": 0.11543108182317408, + "grad_norm": 0.3636133670806885, + "learning_rate": 1.984340084318277e-05, + "loss": 0.4717, + "step": 4204 + }, + { + "epoch": 0.11545853926414058, + "grad_norm": 0.46361660957336426, + "learning_rate": 1.9843324699881196e-05, + "loss": 0.555, + "step": 4205 + }, + { + "epoch": 0.11548599670510708, + "grad_norm": 0.3949008285999298, + "learning_rate": 1.9843248538218675e-05, + "loss": 0.5723, + "step": 4206 + }, + { + "epoch": 0.11551345414607359, + "grad_norm": 0.33406275510787964, + "learning_rate": 1.9843172358195356e-05, + "loss": 0.5598, + "step": 4207 + }, + { + "epoch": 0.1155409115870401, + "grad_norm": 0.6197773814201355, + "learning_rate": 1.9843096159811375e-05, + "loss": 0.5663, + "step": 4208 + }, + { + "epoch": 0.11556836902800659, + "grad_norm": 0.44938984513282776, + "learning_rate": 1.9843019943066885e-05, + "loss": 0.6081, + "step": 4209 + }, + { + "epoch": 0.1155958264689731, + "grad_norm": 0.3471980392932892, + "learning_rate": 1.9842943707962016e-05, + "loss": 0.4499, + "step": 4210 + }, + { + "epoch": 0.11562328390993959, + "grad_norm": 0.37636837363243103, + "learning_rate": 1.984286745449692e-05, + "loss": 0.5757, + "step": 4211 + }, + { + "epoch": 0.1156507413509061, + "grad_norm": 0.3579261004924774, + "learning_rate": 1.984279118267173e-05, + "loss": 0.5022, + "step": 4212 + }, + { + "epoch": 0.11567819879187259, + "grad_norm": 0.3555963337421417, + "learning_rate": 1.9842714892486596e-05, + "loss": 0.5444, + "step": 4213 + }, + { + "epoch": 0.1157056562328391, + "grad_norm": 0.32961294054985046, + "learning_rate": 1.9842638583941657e-05, + "loss": 0.4274, + "step": 4214 + }, + { + "epoch": 0.11573311367380561, + "grad_norm": 0.35115379095077515, + "learning_rate": 1.9842562257037055e-05, + "loss": 0.5778, + "step": 4215 + }, + { + "epoch": 0.1157605711147721, + "grad_norm": 0.4391459822654724, + "learning_rate": 1.984248591177293e-05, + "loss": 0.5783, + "step": 4216 + }, + { + "epoch": 0.11578802855573861, + "grad_norm": 0.3486507534980774, + "learning_rate": 1.984240954814943e-05, + "loss": 0.5172, + "step": 4217 + }, + { + "epoch": 0.1158154859967051, + "grad_norm": 0.3970213234424591, + "learning_rate": 1.9842333166166697e-05, + "loss": 0.4913, + "step": 4218 + }, + { + "epoch": 0.11584294343767161, + "grad_norm": 0.3639698326587677, + "learning_rate": 1.984225676582487e-05, + "loss": 0.5213, + "step": 4219 + }, + { + "epoch": 0.1158704008786381, + "grad_norm": 0.34656840562820435, + "learning_rate": 1.9842180347124093e-05, + "loss": 0.4985, + "step": 4220 + }, + { + "epoch": 0.11589785831960461, + "grad_norm": 0.46203258633613586, + "learning_rate": 1.9842103910064507e-05, + "loss": 0.6297, + "step": 4221 + }, + { + "epoch": 0.11592531576057112, + "grad_norm": 0.40180420875549316, + "learning_rate": 1.9842027454646257e-05, + "loss": 0.5677, + "step": 4222 + }, + { + "epoch": 0.11595277320153762, + "grad_norm": 0.4292154908180237, + "learning_rate": 1.9841950980869487e-05, + "loss": 0.5861, + "step": 4223 + }, + { + "epoch": 0.11598023064250412, + "grad_norm": 0.38251960277557373, + "learning_rate": 1.984187448873433e-05, + "loss": 0.5959, + "step": 4224 + }, + { + "epoch": 0.11600768808347062, + "grad_norm": 0.39926809072494507, + "learning_rate": 1.984179797824094e-05, + "loss": 0.5663, + "step": 4225 + }, + { + "epoch": 0.11603514552443712, + "grad_norm": 0.3891090750694275, + "learning_rate": 1.984172144938946e-05, + "loss": 0.543, + "step": 4226 + }, + { + "epoch": 0.11606260296540362, + "grad_norm": 0.35699373483657837, + "learning_rate": 1.9841644902180024e-05, + "loss": 0.5225, + "step": 4227 + }, + { + "epoch": 0.11609006040637013, + "grad_norm": 0.3626454472541809, + "learning_rate": 1.984156833661278e-05, + "loss": 0.5572, + "step": 4228 + }, + { + "epoch": 0.11611751784733663, + "grad_norm": 0.3828919529914856, + "learning_rate": 1.984149175268787e-05, + "loss": 0.623, + "step": 4229 + }, + { + "epoch": 0.11614497528830313, + "grad_norm": 0.36768749356269836, + "learning_rate": 1.9841415150405435e-05, + "loss": 0.5828, + "step": 4230 + }, + { + "epoch": 0.11617243272926964, + "grad_norm": 0.3888963460922241, + "learning_rate": 1.9841338529765623e-05, + "loss": 0.5318, + "step": 4231 + }, + { + "epoch": 0.11619989017023613, + "grad_norm": 0.3801022469997406, + "learning_rate": 1.984126189076857e-05, + "loss": 0.5379, + "step": 4232 + }, + { + "epoch": 0.11622734761120264, + "grad_norm": 0.40141505002975464, + "learning_rate": 1.9841185233414428e-05, + "loss": 0.5517, + "step": 4233 + }, + { + "epoch": 0.11625480505216913, + "grad_norm": 0.38527941703796387, + "learning_rate": 1.984110855770333e-05, + "loss": 0.4709, + "step": 4234 + }, + { + "epoch": 0.11628226249313564, + "grad_norm": 0.3471679389476776, + "learning_rate": 1.9841031863635423e-05, + "loss": 0.5576, + "step": 4235 + }, + { + "epoch": 0.11630971993410213, + "grad_norm": 0.3640578091144562, + "learning_rate": 1.9840955151210854e-05, + "loss": 0.5199, + "step": 4236 + }, + { + "epoch": 0.11633717737506864, + "grad_norm": 0.41536715626716614, + "learning_rate": 1.9840878420429762e-05, + "loss": 0.6857, + "step": 4237 + }, + { + "epoch": 0.11636463481603515, + "grad_norm": 0.3798644542694092, + "learning_rate": 1.984080167129229e-05, + "loss": 0.5524, + "step": 4238 + }, + { + "epoch": 0.11639209225700164, + "grad_norm": 0.3628763258457184, + "learning_rate": 1.984072490379858e-05, + "loss": 0.5834, + "step": 4239 + }, + { + "epoch": 0.11641954969796815, + "grad_norm": 0.42631757259368896, + "learning_rate": 1.984064811794878e-05, + "loss": 0.5408, + "step": 4240 + }, + { + "epoch": 0.11644700713893465, + "grad_norm": 0.34816816449165344, + "learning_rate": 1.984057131374303e-05, + "loss": 0.5024, + "step": 4241 + }, + { + "epoch": 0.11647446457990115, + "grad_norm": 0.3807898163795471, + "learning_rate": 1.9840494491181475e-05, + "loss": 0.5229, + "step": 4242 + }, + { + "epoch": 0.11650192202086765, + "grad_norm": 0.3971134424209595, + "learning_rate": 1.9840417650264257e-05, + "loss": 0.5361, + "step": 4243 + }, + { + "epoch": 0.11652937946183416, + "grad_norm": 0.37945255637168884, + "learning_rate": 1.9840340790991516e-05, + "loss": 0.5797, + "step": 4244 + }, + { + "epoch": 0.11655683690280066, + "grad_norm": 0.40804168581962585, + "learning_rate": 1.9840263913363402e-05, + "loss": 0.5351, + "step": 4245 + }, + { + "epoch": 0.11658429434376716, + "grad_norm": 0.3872681260108948, + "learning_rate": 1.9840187017380054e-05, + "loss": 0.6222, + "step": 4246 + }, + { + "epoch": 0.11661175178473367, + "grad_norm": 0.34761250019073486, + "learning_rate": 1.984011010304162e-05, + "loss": 0.5602, + "step": 4247 + }, + { + "epoch": 0.11663920922570016, + "grad_norm": 0.37196213006973267, + "learning_rate": 1.9840033170348233e-05, + "loss": 0.5545, + "step": 4248 + }, + { + "epoch": 0.11666666666666667, + "grad_norm": 0.3382221758365631, + "learning_rate": 1.9839956219300048e-05, + "loss": 0.5562, + "step": 4249 + }, + { + "epoch": 0.11669412410763316, + "grad_norm": 0.34125590324401855, + "learning_rate": 1.9839879249897205e-05, + "loss": 0.5171, + "step": 4250 + }, + { + "epoch": 0.11672158154859967, + "grad_norm": 0.33845072984695435, + "learning_rate": 1.9839802262139846e-05, + "loss": 0.6263, + "step": 4251 + }, + { + "epoch": 0.11674903898956618, + "grad_norm": 0.3664212226867676, + "learning_rate": 1.9839725256028113e-05, + "loss": 0.614, + "step": 4252 + }, + { + "epoch": 0.11677649643053267, + "grad_norm": 0.4009544849395752, + "learning_rate": 1.9839648231562152e-05, + "loss": 0.6154, + "step": 4253 + }, + { + "epoch": 0.11680395387149918, + "grad_norm": 0.3520594537258148, + "learning_rate": 1.9839571188742108e-05, + "loss": 0.5777, + "step": 4254 + }, + { + "epoch": 0.11683141131246567, + "grad_norm": 0.4419446587562561, + "learning_rate": 1.9839494127568124e-05, + "loss": 0.5154, + "step": 4255 + }, + { + "epoch": 0.11685886875343218, + "grad_norm": 0.3649204969406128, + "learning_rate": 1.9839417048040343e-05, + "loss": 0.5492, + "step": 4256 + }, + { + "epoch": 0.11688632619439868, + "grad_norm": 0.3953251540660858, + "learning_rate": 1.9839339950158905e-05, + "loss": 0.6009, + "step": 4257 + }, + { + "epoch": 0.11691378363536518, + "grad_norm": 0.3759520351886749, + "learning_rate": 1.983926283392396e-05, + "loss": 0.5521, + "step": 4258 + }, + { + "epoch": 0.11694124107633169, + "grad_norm": 0.4148910939693451, + "learning_rate": 1.9839185699335653e-05, + "loss": 0.4753, + "step": 4259 + }, + { + "epoch": 0.11696869851729819, + "grad_norm": 0.34228071570396423, + "learning_rate": 1.983910854639412e-05, + "loss": 0.527, + "step": 4260 + }, + { + "epoch": 0.1169961559582647, + "grad_norm": 0.3581376373767853, + "learning_rate": 1.9839031375099514e-05, + "loss": 0.5292, + "step": 4261 + }, + { + "epoch": 0.11702361339923119, + "grad_norm": 0.3850286900997162, + "learning_rate": 1.9838954185451967e-05, + "loss": 0.6341, + "step": 4262 + }, + { + "epoch": 0.1170510708401977, + "grad_norm": 0.3255082666873932, + "learning_rate": 1.983887697745164e-05, + "loss": 0.5299, + "step": 4263 + }, + { + "epoch": 0.11707852828116419, + "grad_norm": 0.3631250560283661, + "learning_rate": 1.9838799751098657e-05, + "loss": 0.5468, + "step": 4264 + }, + { + "epoch": 0.1171059857221307, + "grad_norm": 0.36529797315597534, + "learning_rate": 1.9838722506393176e-05, + "loss": 0.6234, + "step": 4265 + }, + { + "epoch": 0.1171334431630972, + "grad_norm": 0.36167314648628235, + "learning_rate": 1.983864524333534e-05, + "loss": 0.4918, + "step": 4266 + }, + { + "epoch": 0.1171609006040637, + "grad_norm": 0.3611524701118469, + "learning_rate": 1.983856796192529e-05, + "loss": 0.5664, + "step": 4267 + }, + { + "epoch": 0.11718835804503021, + "grad_norm": 0.3888254761695862, + "learning_rate": 1.9838490662163167e-05, + "loss": 0.4715, + "step": 4268 + }, + { + "epoch": 0.1172158154859967, + "grad_norm": 0.4659492075443268, + "learning_rate": 1.9838413344049122e-05, + "loss": 0.5449, + "step": 4269 + }, + { + "epoch": 0.11724327292696321, + "grad_norm": 0.40978166460990906, + "learning_rate": 1.9838336007583298e-05, + "loss": 0.6012, + "step": 4270 + }, + { + "epoch": 0.1172707303679297, + "grad_norm": 0.3967670798301697, + "learning_rate": 1.9838258652765834e-05, + "loss": 0.5914, + "step": 4271 + }, + { + "epoch": 0.11729818780889621, + "grad_norm": 0.3783209025859833, + "learning_rate": 1.983818127959688e-05, + "loss": 0.5871, + "step": 4272 + }, + { + "epoch": 0.1173256452498627, + "grad_norm": 0.3439953029155731, + "learning_rate": 1.9838103888076573e-05, + "loss": 0.5523, + "step": 4273 + }, + { + "epoch": 0.11735310269082921, + "grad_norm": 0.3168397545814514, + "learning_rate": 1.9838026478205064e-05, + "loss": 0.5542, + "step": 4274 + }, + { + "epoch": 0.11738056013179572, + "grad_norm": 0.3501748740673065, + "learning_rate": 1.98379490499825e-05, + "loss": 0.5215, + "step": 4275 + }, + { + "epoch": 0.11740801757276222, + "grad_norm": 0.3594259023666382, + "learning_rate": 1.9837871603409015e-05, + "loss": 0.5607, + "step": 4276 + }, + { + "epoch": 0.11743547501372872, + "grad_norm": 0.3451467454433441, + "learning_rate": 1.9837794138484763e-05, + "loss": 0.471, + "step": 4277 + }, + { + "epoch": 0.11746293245469522, + "grad_norm": 0.36498814821243286, + "learning_rate": 1.9837716655209887e-05, + "loss": 0.5177, + "step": 4278 + }, + { + "epoch": 0.11749038989566173, + "grad_norm": 0.3491741120815277, + "learning_rate": 1.9837639153584524e-05, + "loss": 0.4916, + "step": 4279 + }, + { + "epoch": 0.11751784733662822, + "grad_norm": 0.43015438318252563, + "learning_rate": 1.9837561633608828e-05, + "loss": 0.636, + "step": 4280 + }, + { + "epoch": 0.11754530477759473, + "grad_norm": 0.3575936555862427, + "learning_rate": 1.983748409528294e-05, + "loss": 0.457, + "step": 4281 + }, + { + "epoch": 0.11757276221856124, + "grad_norm": 0.4023541808128357, + "learning_rate": 1.9837406538607005e-05, + "loss": 0.5836, + "step": 4282 + }, + { + "epoch": 0.11760021965952773, + "grad_norm": 0.37698858976364136, + "learning_rate": 1.9837328963581164e-05, + "loss": 0.6033, + "step": 4283 + }, + { + "epoch": 0.11762767710049424, + "grad_norm": 0.32033872604370117, + "learning_rate": 1.983725137020557e-05, + "loss": 0.5361, + "step": 4284 + }, + { + "epoch": 0.11765513454146073, + "grad_norm": 0.33542105555534363, + "learning_rate": 1.983717375848036e-05, + "loss": 0.4519, + "step": 4285 + }, + { + "epoch": 0.11768259198242724, + "grad_norm": 0.3028580844402313, + "learning_rate": 1.9837096128405678e-05, + "loss": 0.515, + "step": 4286 + }, + { + "epoch": 0.11771004942339373, + "grad_norm": 0.3386352062225342, + "learning_rate": 1.9837018479981675e-05, + "loss": 0.5304, + "step": 4287 + }, + { + "epoch": 0.11773750686436024, + "grad_norm": 0.38026896119117737, + "learning_rate": 1.983694081320849e-05, + "loss": 0.6108, + "step": 4288 + }, + { + "epoch": 0.11776496430532675, + "grad_norm": 0.3676658868789673, + "learning_rate": 1.9836863128086274e-05, + "loss": 0.5904, + "step": 4289 + }, + { + "epoch": 0.11779242174629324, + "grad_norm": 0.35059982538223267, + "learning_rate": 1.983678542461517e-05, + "loss": 0.5941, + "step": 4290 + }, + { + "epoch": 0.11781987918725975, + "grad_norm": 0.33458882570266724, + "learning_rate": 1.983670770279532e-05, + "loss": 0.5331, + "step": 4291 + }, + { + "epoch": 0.11784733662822625, + "grad_norm": 0.33370712399482727, + "learning_rate": 1.983662996262687e-05, + "loss": 0.4313, + "step": 4292 + }, + { + "epoch": 0.11787479406919275, + "grad_norm": 0.3713378608226776, + "learning_rate": 1.9836552204109964e-05, + "loss": 0.536, + "step": 4293 + }, + { + "epoch": 0.11790225151015925, + "grad_norm": 0.3809208571910858, + "learning_rate": 1.983647442724475e-05, + "loss": 0.5844, + "step": 4294 + }, + { + "epoch": 0.11792970895112576, + "grad_norm": 0.34019288420677185, + "learning_rate": 1.983639663203137e-05, + "loss": 0.6045, + "step": 4295 + }, + { + "epoch": 0.11795716639209226, + "grad_norm": 0.41290023922920227, + "learning_rate": 1.9836318818469978e-05, + "loss": 0.7355, + "step": 4296 + }, + { + "epoch": 0.11798462383305876, + "grad_norm": 0.3842349350452423, + "learning_rate": 1.9836240986560705e-05, + "loss": 0.6305, + "step": 4297 + }, + { + "epoch": 0.11801208127402527, + "grad_norm": 0.41502055525779724, + "learning_rate": 1.9836163136303705e-05, + "loss": 0.5435, + "step": 4298 + }, + { + "epoch": 0.11803953871499176, + "grad_norm": 0.3837473690509796, + "learning_rate": 1.9836085267699122e-05, + "loss": 0.5609, + "step": 4299 + }, + { + "epoch": 0.11806699615595827, + "grad_norm": 0.3647226393222809, + "learning_rate": 1.98360073807471e-05, + "loss": 0.5895, + "step": 4300 + }, + { + "epoch": 0.11809445359692476, + "grad_norm": 0.33923929929733276, + "learning_rate": 1.9835929475447786e-05, + "loss": 0.5228, + "step": 4301 + }, + { + "epoch": 0.11812191103789127, + "grad_norm": 0.3621585965156555, + "learning_rate": 1.9835851551801325e-05, + "loss": 0.5352, + "step": 4302 + }, + { + "epoch": 0.11814936847885776, + "grad_norm": 0.3683701455593109, + "learning_rate": 1.983577360980786e-05, + "loss": 0.6076, + "step": 4303 + }, + { + "epoch": 0.11817682591982427, + "grad_norm": 0.3310531675815582, + "learning_rate": 1.9835695649467537e-05, + "loss": 0.4872, + "step": 4304 + }, + { + "epoch": 0.11820428336079078, + "grad_norm": 0.3872511386871338, + "learning_rate": 1.9835617670780506e-05, + "loss": 0.5628, + "step": 4305 + }, + { + "epoch": 0.11823174080175727, + "grad_norm": 0.38014519214630127, + "learning_rate": 1.9835539673746903e-05, + "loss": 0.5659, + "step": 4306 + }, + { + "epoch": 0.11825919824272378, + "grad_norm": 0.3512997627258301, + "learning_rate": 1.9835461658366882e-05, + "loss": 0.5363, + "step": 4307 + }, + { + "epoch": 0.11828665568369028, + "grad_norm": 0.3635537922382355, + "learning_rate": 1.9835383624640588e-05, + "loss": 0.4473, + "step": 4308 + }, + { + "epoch": 0.11831411312465678, + "grad_norm": 0.38315892219543457, + "learning_rate": 1.9835305572568163e-05, + "loss": 0.5494, + "step": 4309 + }, + { + "epoch": 0.11834157056562328, + "grad_norm": 0.33772698044776917, + "learning_rate": 1.9835227502149754e-05, + "loss": 0.5098, + "step": 4310 + }, + { + "epoch": 0.11836902800658979, + "grad_norm": 0.30654942989349365, + "learning_rate": 1.983514941338551e-05, + "loss": 0.5279, + "step": 4311 + }, + { + "epoch": 0.1183964854475563, + "grad_norm": 0.49251797795295715, + "learning_rate": 1.9835071306275567e-05, + "loss": 0.5271, + "step": 4312 + }, + { + "epoch": 0.11842394288852279, + "grad_norm": 0.3411617577075958, + "learning_rate": 1.983499318082008e-05, + "loss": 0.4674, + "step": 4313 + }, + { + "epoch": 0.1184514003294893, + "grad_norm": 0.3665089011192322, + "learning_rate": 1.9834915037019192e-05, + "loss": 0.5278, + "step": 4314 + }, + { + "epoch": 0.11847885777045579, + "grad_norm": 1.7441482543945312, + "learning_rate": 1.983483687487305e-05, + "loss": 0.5092, + "step": 4315 + }, + { + "epoch": 0.1185063152114223, + "grad_norm": 0.37579283118247986, + "learning_rate": 1.9834758694381798e-05, + "loss": 0.5228, + "step": 4316 + }, + { + "epoch": 0.11853377265238879, + "grad_norm": 0.3992732763290405, + "learning_rate": 1.983468049554558e-05, + "loss": 0.6188, + "step": 4317 + }, + { + "epoch": 0.1185612300933553, + "grad_norm": 0.36934909224510193, + "learning_rate": 1.9834602278364544e-05, + "loss": 0.565, + "step": 4318 + }, + { + "epoch": 0.11858868753432181, + "grad_norm": 0.39809319376945496, + "learning_rate": 1.9834524042838837e-05, + "loss": 0.5739, + "step": 4319 + }, + { + "epoch": 0.1186161449752883, + "grad_norm": 0.37034982442855835, + "learning_rate": 1.9834445788968602e-05, + "loss": 0.5429, + "step": 4320 + }, + { + "epoch": 0.11864360241625481, + "grad_norm": 0.3836788833141327, + "learning_rate": 1.9834367516753992e-05, + "loss": 0.4707, + "step": 4321 + }, + { + "epoch": 0.1186710598572213, + "grad_norm": 0.37811779975891113, + "learning_rate": 1.983428922619514e-05, + "loss": 0.5938, + "step": 4322 + }, + { + "epoch": 0.11869851729818781, + "grad_norm": 0.3261350095272064, + "learning_rate": 1.9834210917292207e-05, + "loss": 0.5618, + "step": 4323 + }, + { + "epoch": 0.1187259747391543, + "grad_norm": 0.36373066902160645, + "learning_rate": 1.983413259004533e-05, + "loss": 0.5529, + "step": 4324 + }, + { + "epoch": 0.11875343218012081, + "grad_norm": 0.4428917467594147, + "learning_rate": 1.9834054244454655e-05, + "loss": 0.6149, + "step": 4325 + }, + { + "epoch": 0.11878088962108732, + "grad_norm": 0.3130910396575928, + "learning_rate": 1.9833975880520332e-05, + "loss": 0.5231, + "step": 4326 + }, + { + "epoch": 0.11880834706205382, + "grad_norm": 0.349582314491272, + "learning_rate": 1.9833897498242508e-05, + "loss": 0.5839, + "step": 4327 + }, + { + "epoch": 0.11883580450302032, + "grad_norm": 0.33817189931869507, + "learning_rate": 1.983381909762132e-05, + "loss": 0.5281, + "step": 4328 + }, + { + "epoch": 0.11886326194398682, + "grad_norm": 0.40204575657844543, + "learning_rate": 1.9833740678656925e-05, + "loss": 0.5875, + "step": 4329 + }, + { + "epoch": 0.11889071938495333, + "grad_norm": 0.37649425864219666, + "learning_rate": 1.9833662241349464e-05, + "loss": 0.5096, + "step": 4330 + }, + { + "epoch": 0.11891817682591982, + "grad_norm": 0.4050710201263428, + "learning_rate": 1.9833583785699084e-05, + "loss": 0.5159, + "step": 4331 + }, + { + "epoch": 0.11894563426688633, + "grad_norm": 0.3602672219276428, + "learning_rate": 1.9833505311705932e-05, + "loss": 0.544, + "step": 4332 + }, + { + "epoch": 0.11897309170785283, + "grad_norm": 0.3396686315536499, + "learning_rate": 1.9833426819370156e-05, + "loss": 0.5009, + "step": 4333 + }, + { + "epoch": 0.11900054914881933, + "grad_norm": 0.3839317262172699, + "learning_rate": 1.9833348308691898e-05, + "loss": 0.5251, + "step": 4334 + }, + { + "epoch": 0.11902800658978584, + "grad_norm": 0.37074077129364014, + "learning_rate": 1.9833269779671308e-05, + "loss": 0.6582, + "step": 4335 + }, + { + "epoch": 0.11905546403075233, + "grad_norm": 0.4075047969818115, + "learning_rate": 1.983319123230853e-05, + "loss": 0.6552, + "step": 4336 + }, + { + "epoch": 0.11908292147171884, + "grad_norm": 0.35695329308509827, + "learning_rate": 1.9833112666603712e-05, + "loss": 0.5325, + "step": 4337 + }, + { + "epoch": 0.11911037891268533, + "grad_norm": 0.37662944197654724, + "learning_rate": 1.9833034082557002e-05, + "loss": 0.4535, + "step": 4338 + }, + { + "epoch": 0.11913783635365184, + "grad_norm": 0.36760398745536804, + "learning_rate": 1.9832955480168546e-05, + "loss": 0.6185, + "step": 4339 + }, + { + "epoch": 0.11916529379461833, + "grad_norm": 0.39729052782058716, + "learning_rate": 1.983287685943849e-05, + "loss": 0.5513, + "step": 4340 + }, + { + "epoch": 0.11919275123558484, + "grad_norm": 0.33333903551101685, + "learning_rate": 1.9832798220366977e-05, + "loss": 0.5742, + "step": 4341 + }, + { + "epoch": 0.11922020867655135, + "grad_norm": 0.36943915486335754, + "learning_rate": 1.9832719562954163e-05, + "loss": 0.5614, + "step": 4342 + }, + { + "epoch": 0.11924766611751784, + "grad_norm": 0.3326357305049896, + "learning_rate": 1.9832640887200185e-05, + "loss": 0.5154, + "step": 4343 + }, + { + "epoch": 0.11927512355848435, + "grad_norm": 0.36940327286720276, + "learning_rate": 1.9832562193105192e-05, + "loss": 0.5329, + "step": 4344 + }, + { + "epoch": 0.11930258099945085, + "grad_norm": 0.412171870470047, + "learning_rate": 1.983248348066933e-05, + "loss": 0.5513, + "step": 4345 + }, + { + "epoch": 0.11933003844041735, + "grad_norm": 0.3785339891910553, + "learning_rate": 1.9832404749892753e-05, + "loss": 0.6072, + "step": 4346 + }, + { + "epoch": 0.11935749588138385, + "grad_norm": 0.35041067004203796, + "learning_rate": 1.9832326000775602e-05, + "loss": 0.4891, + "step": 4347 + }, + { + "epoch": 0.11938495332235036, + "grad_norm": 0.3763292133808136, + "learning_rate": 1.9832247233318025e-05, + "loss": 0.5586, + "step": 4348 + }, + { + "epoch": 0.11941241076331686, + "grad_norm": 0.39175355434417725, + "learning_rate": 1.983216844752017e-05, + "loss": 0.58, + "step": 4349 + }, + { + "epoch": 0.11943986820428336, + "grad_norm": 0.393155574798584, + "learning_rate": 1.983208964338218e-05, + "loss": 0.5176, + "step": 4350 + }, + { + "epoch": 0.11946732564524987, + "grad_norm": 0.5005965828895569, + "learning_rate": 1.9832010820904207e-05, + "loss": 0.5727, + "step": 4351 + }, + { + "epoch": 0.11949478308621636, + "grad_norm": 0.32930856943130493, + "learning_rate": 1.9831931980086394e-05, + "loss": 0.5441, + "step": 4352 + }, + { + "epoch": 0.11952224052718287, + "grad_norm": 0.3729868233203888, + "learning_rate": 1.983185312092889e-05, + "loss": 0.5334, + "step": 4353 + }, + { + "epoch": 0.11954969796814936, + "grad_norm": 0.37040573358535767, + "learning_rate": 1.9831774243431842e-05, + "loss": 0.5134, + "step": 4354 + }, + { + "epoch": 0.11957715540911587, + "grad_norm": 0.4061732292175293, + "learning_rate": 1.9831695347595398e-05, + "loss": 0.5631, + "step": 4355 + }, + { + "epoch": 0.11960461285008238, + "grad_norm": 0.3730664551258087, + "learning_rate": 1.9831616433419704e-05, + "loss": 0.5213, + "step": 4356 + }, + { + "epoch": 0.11963207029104887, + "grad_norm": 0.3753736615180969, + "learning_rate": 1.9831537500904906e-05, + "loss": 0.5465, + "step": 4357 + }, + { + "epoch": 0.11965952773201538, + "grad_norm": 0.34763067960739136, + "learning_rate": 1.9831458550051158e-05, + "loss": 0.5877, + "step": 4358 + }, + { + "epoch": 0.11968698517298187, + "grad_norm": 0.4155239462852478, + "learning_rate": 1.9831379580858597e-05, + "loss": 0.5935, + "step": 4359 + }, + { + "epoch": 0.11971444261394838, + "grad_norm": 0.3937745988368988, + "learning_rate": 1.9831300593327377e-05, + "loss": 0.539, + "step": 4360 + }, + { + "epoch": 0.11974190005491488, + "grad_norm": 0.34593167901039124, + "learning_rate": 1.9831221587457642e-05, + "loss": 0.5118, + "step": 4361 + }, + { + "epoch": 0.11976935749588138, + "grad_norm": 0.348603755235672, + "learning_rate": 1.9831142563249543e-05, + "loss": 0.5587, + "step": 4362 + }, + { + "epoch": 0.11979681493684789, + "grad_norm": 0.3623400926589966, + "learning_rate": 1.983106352070322e-05, + "loss": 0.5197, + "step": 4363 + }, + { + "epoch": 0.11982427237781439, + "grad_norm": 0.3395392596721649, + "learning_rate": 1.9830984459818832e-05, + "loss": 0.5392, + "step": 4364 + }, + { + "epoch": 0.1198517298187809, + "grad_norm": 0.3711217939853668, + "learning_rate": 1.983090538059652e-05, + "loss": 0.6533, + "step": 4365 + }, + { + "epoch": 0.11987918725974739, + "grad_norm": 0.3738940358161926, + "learning_rate": 1.983082628303643e-05, + "loss": 0.5519, + "step": 4366 + }, + { + "epoch": 0.1199066447007139, + "grad_norm": 0.3511923551559448, + "learning_rate": 1.983074716713871e-05, + "loss": 0.446, + "step": 4367 + }, + { + "epoch": 0.11993410214168039, + "grad_norm": 0.37447214126586914, + "learning_rate": 1.9830668032903512e-05, + "loss": 0.5112, + "step": 4368 + }, + { + "epoch": 0.1199615595826469, + "grad_norm": 0.3574022650718689, + "learning_rate": 1.9830588880330978e-05, + "loss": 0.5966, + "step": 4369 + }, + { + "epoch": 0.11998901702361339, + "grad_norm": 0.3980100452899933, + "learning_rate": 1.9830509709421258e-05, + "loss": 0.567, + "step": 4370 + }, + { + "epoch": 0.1200164744645799, + "grad_norm": 0.9603545069694519, + "learning_rate": 1.9830430520174505e-05, + "loss": 0.4463, + "step": 4371 + }, + { + "epoch": 0.12004393190554641, + "grad_norm": 0.34502410888671875, + "learning_rate": 1.9830351312590857e-05, + "loss": 0.4739, + "step": 4372 + }, + { + "epoch": 0.1200713893465129, + "grad_norm": 0.34829363226890564, + "learning_rate": 1.9830272086670467e-05, + "loss": 0.4834, + "step": 4373 + }, + { + "epoch": 0.12009884678747941, + "grad_norm": 0.3658924102783203, + "learning_rate": 1.983019284241348e-05, + "loss": 0.545, + "step": 4374 + }, + { + "epoch": 0.1201263042284459, + "grad_norm": 0.39429062604904175, + "learning_rate": 1.983011357982005e-05, + "loss": 0.5531, + "step": 4375 + }, + { + "epoch": 0.12015376166941241, + "grad_norm": 0.35094669461250305, + "learning_rate": 1.983003429889032e-05, + "loss": 0.5183, + "step": 4376 + }, + { + "epoch": 0.1201812191103789, + "grad_norm": 0.4926547110080719, + "learning_rate": 1.9829954999624434e-05, + "loss": 0.577, + "step": 4377 + }, + { + "epoch": 0.12020867655134541, + "grad_norm": 0.32219961285591125, + "learning_rate": 1.9829875682022546e-05, + "loss": 0.4787, + "step": 4378 + }, + { + "epoch": 0.12023613399231192, + "grad_norm": 0.4216304123401642, + "learning_rate": 1.9829796346084808e-05, + "loss": 0.5561, + "step": 4379 + }, + { + "epoch": 0.12026359143327842, + "grad_norm": 0.36240512132644653, + "learning_rate": 1.9829716991811357e-05, + "loss": 0.5583, + "step": 4380 + }, + { + "epoch": 0.12029104887424492, + "grad_norm": 0.4094868004322052, + "learning_rate": 1.982963761920235e-05, + "loss": 0.6212, + "step": 4381 + }, + { + "epoch": 0.12031850631521142, + "grad_norm": 0.41385418176651, + "learning_rate": 1.982955822825793e-05, + "loss": 0.6435, + "step": 4382 + }, + { + "epoch": 0.12034596375617793, + "grad_norm": 0.35844698548316956, + "learning_rate": 1.9829478818978247e-05, + "loss": 0.562, + "step": 4383 + }, + { + "epoch": 0.12037342119714442, + "grad_norm": 0.43544918298721313, + "learning_rate": 1.982939939136345e-05, + "loss": 0.5876, + "step": 4384 + }, + { + "epoch": 0.12040087863811093, + "grad_norm": 0.3851318359375, + "learning_rate": 1.982931994541368e-05, + "loss": 0.5637, + "step": 4385 + }, + { + "epoch": 0.12042833607907744, + "grad_norm": 0.35776981711387634, + "learning_rate": 1.98292404811291e-05, + "loss": 0.585, + "step": 4386 + }, + { + "epoch": 0.12045579352004393, + "grad_norm": 0.40571829676628113, + "learning_rate": 1.9829160998509845e-05, + "loss": 0.5985, + "step": 4387 + }, + { + "epoch": 0.12048325096101044, + "grad_norm": 0.32351094484329224, + "learning_rate": 1.982908149755607e-05, + "loss": 0.5059, + "step": 4388 + }, + { + "epoch": 0.12051070840197693, + "grad_norm": 0.3647138476371765, + "learning_rate": 1.9829001978267914e-05, + "loss": 0.4933, + "step": 4389 + }, + { + "epoch": 0.12053816584294344, + "grad_norm": 0.38570430874824524, + "learning_rate": 1.982892244064554e-05, + "loss": 0.5579, + "step": 4390 + }, + { + "epoch": 0.12056562328390993, + "grad_norm": 0.3582010567188263, + "learning_rate": 1.9828842884689085e-05, + "loss": 0.5811, + "step": 4391 + }, + { + "epoch": 0.12059308072487644, + "grad_norm": 0.3458060324192047, + "learning_rate": 1.98287633103987e-05, + "loss": 0.4998, + "step": 4392 + }, + { + "epoch": 0.12062053816584295, + "grad_norm": 0.43845903873443604, + "learning_rate": 1.9828683717774536e-05, + "loss": 0.5588, + "step": 4393 + }, + { + "epoch": 0.12064799560680944, + "grad_norm": 0.4444512128829956, + "learning_rate": 1.9828604106816743e-05, + "loss": 0.5135, + "step": 4394 + }, + { + "epoch": 0.12067545304777595, + "grad_norm": 0.38536563515663147, + "learning_rate": 1.982852447752546e-05, + "loss": 0.5149, + "step": 4395 + }, + { + "epoch": 0.12070291048874245, + "grad_norm": 0.5001475811004639, + "learning_rate": 1.9828444829900847e-05, + "loss": 0.5634, + "step": 4396 + }, + { + "epoch": 0.12073036792970895, + "grad_norm": 0.37451645731925964, + "learning_rate": 1.9828365163943046e-05, + "loss": 0.5046, + "step": 4397 + }, + { + "epoch": 0.12075782537067545, + "grad_norm": 0.4188501834869385, + "learning_rate": 1.9828285479652203e-05, + "loss": 0.5954, + "step": 4398 + }, + { + "epoch": 0.12078528281164196, + "grad_norm": 0.38071173429489136, + "learning_rate": 1.9828205777028478e-05, + "loss": 0.5392, + "step": 4399 + }, + { + "epoch": 0.12081274025260846, + "grad_norm": 0.3504979908466339, + "learning_rate": 1.982812605607201e-05, + "loss": 0.5446, + "step": 4400 + }, + { + "epoch": 0.12084019769357496, + "grad_norm": 0.3245278596878052, + "learning_rate": 1.982804631678295e-05, + "loss": 0.4406, + "step": 4401 + }, + { + "epoch": 0.12086765513454147, + "grad_norm": 0.34069398045539856, + "learning_rate": 1.9827966559161445e-05, + "loss": 0.5259, + "step": 4402 + }, + { + "epoch": 0.12089511257550796, + "grad_norm": 0.3438992500305176, + "learning_rate": 1.9827886783207645e-05, + "loss": 0.588, + "step": 4403 + }, + { + "epoch": 0.12092257001647447, + "grad_norm": 0.3452449142932892, + "learning_rate": 1.9827806988921703e-05, + "loss": 0.5954, + "step": 4404 + }, + { + "epoch": 0.12095002745744096, + "grad_norm": 0.40946444869041443, + "learning_rate": 1.9827727176303762e-05, + "loss": 0.5198, + "step": 4405 + }, + { + "epoch": 0.12097748489840747, + "grad_norm": 0.3553365170955658, + "learning_rate": 1.9827647345353972e-05, + "loss": 0.6055, + "step": 4406 + }, + { + "epoch": 0.12100494233937396, + "grad_norm": 0.3381158411502838, + "learning_rate": 1.9827567496072485e-05, + "loss": 0.5601, + "step": 4407 + }, + { + "epoch": 0.12103239978034047, + "grad_norm": 0.3465712368488312, + "learning_rate": 1.9827487628459447e-05, + "loss": 0.5837, + "step": 4408 + }, + { + "epoch": 0.12105985722130698, + "grad_norm": 0.37039709091186523, + "learning_rate": 1.982740774251501e-05, + "loss": 0.5238, + "step": 4409 + }, + { + "epoch": 0.12108731466227347, + "grad_norm": 0.34742099046707153, + "learning_rate": 1.9827327838239322e-05, + "loss": 0.5575, + "step": 4410 + }, + { + "epoch": 0.12111477210323998, + "grad_norm": 0.4354720711708069, + "learning_rate": 1.9827247915632527e-05, + "loss": 0.5409, + "step": 4411 + }, + { + "epoch": 0.12114222954420648, + "grad_norm": 0.3863984942436218, + "learning_rate": 1.982716797469478e-05, + "loss": 0.6206, + "step": 4412 + }, + { + "epoch": 0.12116968698517298, + "grad_norm": 0.37640589475631714, + "learning_rate": 1.9827088015426228e-05, + "loss": 0.5816, + "step": 4413 + }, + { + "epoch": 0.12119714442613948, + "grad_norm": 0.3277275860309601, + "learning_rate": 1.982700803782702e-05, + "loss": 0.489, + "step": 4414 + }, + { + "epoch": 0.12122460186710599, + "grad_norm": 0.43336087465286255, + "learning_rate": 1.9826928041897307e-05, + "loss": 0.555, + "step": 4415 + }, + { + "epoch": 0.1212520593080725, + "grad_norm": 0.36345458030700684, + "learning_rate": 1.9826848027637234e-05, + "loss": 0.4991, + "step": 4416 + }, + { + "epoch": 0.12127951674903899, + "grad_norm": 0.35821524262428284, + "learning_rate": 1.982676799504696e-05, + "loss": 0.4739, + "step": 4417 + }, + { + "epoch": 0.1213069741900055, + "grad_norm": 0.4192046523094177, + "learning_rate": 1.982668794412662e-05, + "loss": 0.5689, + "step": 4418 + }, + { + "epoch": 0.12133443163097199, + "grad_norm": 0.3432762026786804, + "learning_rate": 1.9826607874876374e-05, + "loss": 0.5623, + "step": 4419 + }, + { + "epoch": 0.1213618890719385, + "grad_norm": 0.3570500612258911, + "learning_rate": 1.9826527787296367e-05, + "loss": 0.5859, + "step": 4420 + }, + { + "epoch": 0.12138934651290499, + "grad_norm": 0.36303475499153137, + "learning_rate": 1.982644768138675e-05, + "loss": 0.573, + "step": 4421 + }, + { + "epoch": 0.1214168039538715, + "grad_norm": 0.361372172832489, + "learning_rate": 1.982636755714767e-05, + "loss": 0.4629, + "step": 4422 + }, + { + "epoch": 0.12144426139483801, + "grad_norm": 0.34453481435775757, + "learning_rate": 1.982628741457928e-05, + "loss": 0.5547, + "step": 4423 + }, + { + "epoch": 0.1214717188358045, + "grad_norm": 0.38676899671554565, + "learning_rate": 1.982620725368173e-05, + "loss": 0.6063, + "step": 4424 + }, + { + "epoch": 0.12149917627677101, + "grad_norm": 0.3573000133037567, + "learning_rate": 1.9826127074455162e-05, + "loss": 0.5155, + "step": 4425 + }, + { + "epoch": 0.1215266337177375, + "grad_norm": 0.34688103199005127, + "learning_rate": 1.9826046876899734e-05, + "loss": 0.5318, + "step": 4426 + }, + { + "epoch": 0.12155409115870401, + "grad_norm": 0.3390115201473236, + "learning_rate": 1.982596666101559e-05, + "loss": 0.5128, + "step": 4427 + }, + { + "epoch": 0.1215815485996705, + "grad_norm": 0.4050641357898712, + "learning_rate": 1.9825886426802888e-05, + "loss": 0.5821, + "step": 4428 + }, + { + "epoch": 0.12160900604063701, + "grad_norm": 0.4352811574935913, + "learning_rate": 1.9825806174261764e-05, + "loss": 0.5786, + "step": 4429 + }, + { + "epoch": 0.12163646348160352, + "grad_norm": 0.34241983294487, + "learning_rate": 1.9825725903392382e-05, + "loss": 0.5134, + "step": 4430 + }, + { + "epoch": 0.12166392092257002, + "grad_norm": 0.7976049184799194, + "learning_rate": 1.9825645614194885e-05, + "loss": 0.5645, + "step": 4431 + }, + { + "epoch": 0.12169137836353652, + "grad_norm": 0.37559396028518677, + "learning_rate": 1.982556530666942e-05, + "loss": 0.4945, + "step": 4432 + }, + { + "epoch": 0.12171883580450302, + "grad_norm": 0.3490278720855713, + "learning_rate": 1.9825484980816138e-05, + "loss": 0.5677, + "step": 4433 + }, + { + "epoch": 0.12174629324546953, + "grad_norm": 0.3461850583553314, + "learning_rate": 1.9825404636635194e-05, + "loss": 0.3935, + "step": 4434 + }, + { + "epoch": 0.12177375068643602, + "grad_norm": 0.36581331491470337, + "learning_rate": 1.9825324274126733e-05, + "loss": 0.6082, + "step": 4435 + }, + { + "epoch": 0.12180120812740253, + "grad_norm": 0.3520062565803528, + "learning_rate": 1.9825243893290906e-05, + "loss": 0.6316, + "step": 4436 + }, + { + "epoch": 0.12182866556836902, + "grad_norm": 0.37399837374687195, + "learning_rate": 1.9825163494127864e-05, + "loss": 0.4493, + "step": 4437 + }, + { + "epoch": 0.12185612300933553, + "grad_norm": 0.3887555003166199, + "learning_rate": 1.9825083076637757e-05, + "loss": 0.4774, + "step": 4438 + }, + { + "epoch": 0.12188358045030204, + "grad_norm": 0.3307332694530487, + "learning_rate": 1.9825002640820733e-05, + "loss": 0.513, + "step": 4439 + }, + { + "epoch": 0.12191103789126853, + "grad_norm": 0.33402732014656067, + "learning_rate": 1.9824922186676945e-05, + "loss": 0.6136, + "step": 4440 + }, + { + "epoch": 0.12193849533223504, + "grad_norm": 0.4366951882839203, + "learning_rate": 1.982484171420654e-05, + "loss": 0.5128, + "step": 4441 + }, + { + "epoch": 0.12196595277320153, + "grad_norm": 0.44313961267471313, + "learning_rate": 1.982476122340967e-05, + "loss": 0.441, + "step": 4442 + }, + { + "epoch": 0.12199341021416804, + "grad_norm": 0.3746240735054016, + "learning_rate": 1.9824680714286485e-05, + "loss": 0.5133, + "step": 4443 + }, + { + "epoch": 0.12202086765513454, + "grad_norm": 0.4416984021663666, + "learning_rate": 1.982460018683713e-05, + "loss": 0.5977, + "step": 4444 + }, + { + "epoch": 0.12204832509610104, + "grad_norm": 0.33317235112190247, + "learning_rate": 1.9824519641061767e-05, + "loss": 0.5073, + "step": 4445 + }, + { + "epoch": 0.12207578253706755, + "grad_norm": 0.3381946086883545, + "learning_rate": 1.9824439076960536e-05, + "loss": 0.5384, + "step": 4446 + }, + { + "epoch": 0.12210323997803404, + "grad_norm": 0.3394647240638733, + "learning_rate": 1.982435849453359e-05, + "loss": 0.5332, + "step": 4447 + }, + { + "epoch": 0.12213069741900055, + "grad_norm": 0.3510815501213074, + "learning_rate": 1.982427789378108e-05, + "loss": 0.5833, + "step": 4448 + }, + { + "epoch": 0.12215815485996705, + "grad_norm": 0.3221791684627533, + "learning_rate": 1.982419727470316e-05, + "loss": 0.5268, + "step": 4449 + }, + { + "epoch": 0.12218561230093355, + "grad_norm": 0.35876035690307617, + "learning_rate": 1.9824116637299972e-05, + "loss": 0.5565, + "step": 4450 + }, + { + "epoch": 0.12221306974190005, + "grad_norm": 0.5808611512184143, + "learning_rate": 1.982403598157167e-05, + "loss": 0.5925, + "step": 4451 + }, + { + "epoch": 0.12224052718286656, + "grad_norm": 0.3830517828464508, + "learning_rate": 1.982395530751841e-05, + "loss": 0.5246, + "step": 4452 + }, + { + "epoch": 0.12226798462383306, + "grad_norm": 1.6483724117279053, + "learning_rate": 1.9823874615140335e-05, + "loss": 0.6146, + "step": 4453 + }, + { + "epoch": 0.12229544206479956, + "grad_norm": 0.37066078186035156, + "learning_rate": 1.98237939044376e-05, + "loss": 0.5266, + "step": 4454 + }, + { + "epoch": 0.12232289950576607, + "grad_norm": 0.3495706021785736, + "learning_rate": 1.9823713175410352e-05, + "loss": 0.5063, + "step": 4455 + }, + { + "epoch": 0.12235035694673256, + "grad_norm": 0.3710753321647644, + "learning_rate": 1.9823632428058744e-05, + "loss": 0.5452, + "step": 4456 + }, + { + "epoch": 0.12237781438769907, + "grad_norm": 0.34229040145874023, + "learning_rate": 1.9823551662382926e-05, + "loss": 0.5798, + "step": 4457 + }, + { + "epoch": 0.12240527182866556, + "grad_norm": 0.44905513525009155, + "learning_rate": 1.982347087838305e-05, + "loss": 0.5721, + "step": 4458 + }, + { + "epoch": 0.12243272926963207, + "grad_norm": 0.34064796566963196, + "learning_rate": 1.982339007605927e-05, + "loss": 0.5656, + "step": 4459 + }, + { + "epoch": 0.12246018671059858, + "grad_norm": 0.36782774329185486, + "learning_rate": 1.9823309255411725e-05, + "loss": 0.5016, + "step": 4460 + }, + { + "epoch": 0.12248764415156507, + "grad_norm": 0.3486008644104004, + "learning_rate": 1.9823228416440577e-05, + "loss": 0.528, + "step": 4461 + }, + { + "epoch": 0.12251510159253158, + "grad_norm": 0.3348163664340973, + "learning_rate": 1.982314755914597e-05, + "loss": 0.57, + "step": 4462 + }, + { + "epoch": 0.12254255903349807, + "grad_norm": 0.37935537099838257, + "learning_rate": 1.9823066683528057e-05, + "loss": 0.4647, + "step": 4463 + }, + { + "epoch": 0.12257001647446458, + "grad_norm": 0.36315077543258667, + "learning_rate": 1.9822985789586992e-05, + "loss": 0.5554, + "step": 4464 + }, + { + "epoch": 0.12259747391543108, + "grad_norm": 0.4080066680908203, + "learning_rate": 1.9822904877322924e-05, + "loss": 0.5817, + "step": 4465 + }, + { + "epoch": 0.12262493135639758, + "grad_norm": 0.4378693401813507, + "learning_rate": 1.9822823946736002e-05, + "loss": 0.5017, + "step": 4466 + }, + { + "epoch": 0.12265238879736409, + "grad_norm": 0.3536355793476105, + "learning_rate": 1.9822742997826378e-05, + "loss": 0.5724, + "step": 4467 + }, + { + "epoch": 0.12267984623833059, + "grad_norm": 0.7946407794952393, + "learning_rate": 1.9822662030594202e-05, + "loss": 0.5324, + "step": 4468 + }, + { + "epoch": 0.1227073036792971, + "grad_norm": 0.365622878074646, + "learning_rate": 1.982258104503963e-05, + "loss": 0.5247, + "step": 4469 + }, + { + "epoch": 0.12273476112026359, + "grad_norm": 0.3522932827472687, + "learning_rate": 1.9822500041162808e-05, + "loss": 0.5245, + "step": 4470 + }, + { + "epoch": 0.1227622185612301, + "grad_norm": 0.33957645297050476, + "learning_rate": 1.9822419018963886e-05, + "loss": 0.4581, + "step": 4471 + }, + { + "epoch": 0.12278967600219659, + "grad_norm": 0.3887788951396942, + "learning_rate": 1.9822337978443017e-05, + "loss": 0.4911, + "step": 4472 + }, + { + "epoch": 0.1228171334431631, + "grad_norm": 0.3589390218257904, + "learning_rate": 1.982225691960036e-05, + "loss": 0.6144, + "step": 4473 + }, + { + "epoch": 0.12284459088412959, + "grad_norm": 0.3854289948940277, + "learning_rate": 1.982217584243605e-05, + "loss": 0.4485, + "step": 4474 + }, + { + "epoch": 0.1228720483250961, + "grad_norm": 0.3577403426170349, + "learning_rate": 1.9822094746950253e-05, + "loss": 0.4974, + "step": 4475 + }, + { + "epoch": 0.12289950576606261, + "grad_norm": 0.3502238690853119, + "learning_rate": 1.982201363314311e-05, + "loss": 0.6232, + "step": 4476 + }, + { + "epoch": 0.1229269632070291, + "grad_norm": 0.3329123556613922, + "learning_rate": 1.982193250101478e-05, + "loss": 0.5471, + "step": 4477 + }, + { + "epoch": 0.12295442064799561, + "grad_norm": 0.3653741478919983, + "learning_rate": 1.9821851350565412e-05, + "loss": 0.5619, + "step": 4478 + }, + { + "epoch": 0.1229818780889621, + "grad_norm": 0.4308152496814728, + "learning_rate": 1.9821770181795156e-05, + "loss": 0.5205, + "step": 4479 + }, + { + "epoch": 0.12300933552992861, + "grad_norm": 0.45550432801246643, + "learning_rate": 1.982168899470416e-05, + "loss": 0.5861, + "step": 4480 + }, + { + "epoch": 0.1230367929708951, + "grad_norm": 0.4742860794067383, + "learning_rate": 1.9821607789292584e-05, + "loss": 0.5487, + "step": 4481 + }, + { + "epoch": 0.12306425041186161, + "grad_norm": 0.3706261217594147, + "learning_rate": 1.9821526565560573e-05, + "loss": 0.5384, + "step": 4482 + }, + { + "epoch": 0.12309170785282812, + "grad_norm": 0.35688507556915283, + "learning_rate": 1.982144532350828e-05, + "loss": 0.5478, + "step": 4483 + }, + { + "epoch": 0.12311916529379462, + "grad_norm": 0.37396326661109924, + "learning_rate": 1.9821364063135855e-05, + "loss": 0.5248, + "step": 4484 + }, + { + "epoch": 0.12314662273476112, + "grad_norm": 0.4188917875289917, + "learning_rate": 1.9821282784443454e-05, + "loss": 0.5888, + "step": 4485 + }, + { + "epoch": 0.12317408017572762, + "grad_norm": 0.3921259939670563, + "learning_rate": 1.9821201487431224e-05, + "loss": 0.5215, + "step": 4486 + }, + { + "epoch": 0.12320153761669413, + "grad_norm": 0.34143099188804626, + "learning_rate": 1.982112017209932e-05, + "loss": 0.4988, + "step": 4487 + }, + { + "epoch": 0.12322899505766062, + "grad_norm": 0.4915735423564911, + "learning_rate": 1.9821038838447895e-05, + "loss": 0.4973, + "step": 4488 + }, + { + "epoch": 0.12325645249862713, + "grad_norm": 0.3705565631389618, + "learning_rate": 1.9820957486477094e-05, + "loss": 0.4905, + "step": 4489 + }, + { + "epoch": 0.12328390993959364, + "grad_norm": 0.3982454240322113, + "learning_rate": 1.9820876116187072e-05, + "loss": 0.5099, + "step": 4490 + }, + { + "epoch": 0.12331136738056013, + "grad_norm": 0.3848978281021118, + "learning_rate": 1.9820794727577987e-05, + "loss": 0.546, + "step": 4491 + }, + { + "epoch": 0.12333882482152664, + "grad_norm": 0.3555212616920471, + "learning_rate": 1.982071332064998e-05, + "loss": 0.4974, + "step": 4492 + }, + { + "epoch": 0.12336628226249313, + "grad_norm": 0.4396597445011139, + "learning_rate": 1.982063189540321e-05, + "loss": 0.6043, + "step": 4493 + }, + { + "epoch": 0.12339373970345964, + "grad_norm": 0.3869277834892273, + "learning_rate": 1.982055045183783e-05, + "loss": 0.6017, + "step": 4494 + }, + { + "epoch": 0.12342119714442613, + "grad_norm": 0.3532942831516266, + "learning_rate": 1.9820468989953986e-05, + "loss": 0.4852, + "step": 4495 + }, + { + "epoch": 0.12344865458539264, + "grad_norm": 0.33970898389816284, + "learning_rate": 1.9820387509751833e-05, + "loss": 0.4782, + "step": 4496 + }, + { + "epoch": 0.12347611202635915, + "grad_norm": 0.4042155146598816, + "learning_rate": 1.9820306011231522e-05, + "loss": 0.6054, + "step": 4497 + }, + { + "epoch": 0.12350356946732564, + "grad_norm": 0.39494016766548157, + "learning_rate": 1.982022449439321e-05, + "loss": 0.6421, + "step": 4498 + }, + { + "epoch": 0.12353102690829215, + "grad_norm": 0.3989347219467163, + "learning_rate": 1.9820142959237042e-05, + "loss": 0.6076, + "step": 4499 + }, + { + "epoch": 0.12355848434925865, + "grad_norm": 0.405865877866745, + "learning_rate": 1.9820061405763175e-05, + "loss": 0.6346, + "step": 4500 + }, + { + "epoch": 0.12358594179022515, + "grad_norm": 0.3560645878314972, + "learning_rate": 1.9819979833971756e-05, + "loss": 0.5277, + "step": 4501 + }, + { + "epoch": 0.12361339923119165, + "grad_norm": 0.3665539026260376, + "learning_rate": 1.981989824386294e-05, + "loss": 0.639, + "step": 4502 + }, + { + "epoch": 0.12364085667215816, + "grad_norm": 0.38791871070861816, + "learning_rate": 1.9819816635436883e-05, + "loss": 0.6396, + "step": 4503 + }, + { + "epoch": 0.12366831411312465, + "grad_norm": 0.41537898778915405, + "learning_rate": 1.9819735008693734e-05, + "loss": 0.519, + "step": 4504 + }, + { + "epoch": 0.12369577155409116, + "grad_norm": 0.3463146984577179, + "learning_rate": 1.9819653363633647e-05, + "loss": 0.5834, + "step": 4505 + }, + { + "epoch": 0.12372322899505767, + "grad_norm": 0.3613133430480957, + "learning_rate": 1.9819571700256767e-05, + "loss": 0.543, + "step": 4506 + }, + { + "epoch": 0.12375068643602416, + "grad_norm": 0.3808510899543762, + "learning_rate": 1.9819490018563254e-05, + "loss": 0.5964, + "step": 4507 + }, + { + "epoch": 0.12377814387699067, + "grad_norm": 0.4057452380657196, + "learning_rate": 1.981940831855326e-05, + "loss": 0.5139, + "step": 4508 + }, + { + "epoch": 0.12380560131795716, + "grad_norm": 0.4041282534599304, + "learning_rate": 1.9819326600226933e-05, + "loss": 0.5718, + "step": 4509 + }, + { + "epoch": 0.12383305875892367, + "grad_norm": 0.3776470720767975, + "learning_rate": 1.9819244863584427e-05, + "loss": 0.5965, + "step": 4510 + }, + { + "epoch": 0.12386051619989016, + "grad_norm": 0.4001157581806183, + "learning_rate": 1.9819163108625898e-05, + "loss": 0.5126, + "step": 4511 + }, + { + "epoch": 0.12388797364085667, + "grad_norm": 0.32709410786628723, + "learning_rate": 1.9819081335351497e-05, + "loss": 0.5239, + "step": 4512 + }, + { + "epoch": 0.12391543108182318, + "grad_norm": 0.41309496760368347, + "learning_rate": 1.981899954376137e-05, + "loss": 0.5328, + "step": 4513 + }, + { + "epoch": 0.12394288852278967, + "grad_norm": 0.3411867320537567, + "learning_rate": 1.9818917733855682e-05, + "loss": 0.5097, + "step": 4514 + }, + { + "epoch": 0.12397034596375618, + "grad_norm": 0.36101090908050537, + "learning_rate": 1.9818835905634573e-05, + "loss": 0.5307, + "step": 4515 + }, + { + "epoch": 0.12399780340472268, + "grad_norm": 0.37608838081359863, + "learning_rate": 1.9818754059098205e-05, + "loss": 0.5236, + "step": 4516 + }, + { + "epoch": 0.12402526084568918, + "grad_norm": 0.3685913681983948, + "learning_rate": 1.9818672194246726e-05, + "loss": 0.5244, + "step": 4517 + }, + { + "epoch": 0.12405271828665568, + "grad_norm": 0.36835530400276184, + "learning_rate": 1.9818590311080286e-05, + "loss": 0.5735, + "step": 4518 + }, + { + "epoch": 0.12408017572762219, + "grad_norm": 0.3671985864639282, + "learning_rate": 1.9818508409599047e-05, + "loss": 0.5545, + "step": 4519 + }, + { + "epoch": 0.1241076331685887, + "grad_norm": 0.392423152923584, + "learning_rate": 1.9818426489803152e-05, + "loss": 0.5884, + "step": 4520 + }, + { + "epoch": 0.12413509060955519, + "grad_norm": 0.34275850653648376, + "learning_rate": 1.981834455169276e-05, + "loss": 0.5504, + "step": 4521 + }, + { + "epoch": 0.1241625480505217, + "grad_norm": 0.3889680504798889, + "learning_rate": 1.981826259526802e-05, + "loss": 0.6218, + "step": 4522 + }, + { + "epoch": 0.12419000549148819, + "grad_norm": 0.3857469856739044, + "learning_rate": 1.9818180620529088e-05, + "loss": 0.5784, + "step": 4523 + }, + { + "epoch": 0.1242174629324547, + "grad_norm": 0.3293091952800751, + "learning_rate": 1.9818098627476116e-05, + "loss": 0.5917, + "step": 4524 + }, + { + "epoch": 0.12424492037342119, + "grad_norm": 0.329585999250412, + "learning_rate": 1.981801661610925e-05, + "loss": 0.4885, + "step": 4525 + }, + { + "epoch": 0.1242723778143877, + "grad_norm": 0.4473568797111511, + "learning_rate": 1.9817934586428657e-05, + "loss": 0.5969, + "step": 4526 + }, + { + "epoch": 0.12429983525535421, + "grad_norm": 0.3558028042316437, + "learning_rate": 1.981785253843448e-05, + "loss": 0.547, + "step": 4527 + }, + { + "epoch": 0.1243272926963207, + "grad_norm": 0.3729296028614044, + "learning_rate": 1.9817770472126878e-05, + "loss": 0.4732, + "step": 4528 + }, + { + "epoch": 0.12435475013728721, + "grad_norm": 0.3698374927043915, + "learning_rate": 1.9817688387505995e-05, + "loss": 0.5633, + "step": 4529 + }, + { + "epoch": 0.1243822075782537, + "grad_norm": 0.3829362392425537, + "learning_rate": 1.9817606284571992e-05, + "loss": 0.527, + "step": 4530 + }, + { + "epoch": 0.12440966501922021, + "grad_norm": 0.36799001693725586, + "learning_rate": 1.9817524163325022e-05, + "loss": 0.5406, + "step": 4531 + }, + { + "epoch": 0.1244371224601867, + "grad_norm": 0.3593764305114746, + "learning_rate": 1.9817442023765237e-05, + "loss": 0.5126, + "step": 4532 + }, + { + "epoch": 0.12446457990115321, + "grad_norm": 0.3810892701148987, + "learning_rate": 1.9817359865892785e-05, + "loss": 0.5565, + "step": 4533 + }, + { + "epoch": 0.12449203734211972, + "grad_norm": 0.3468562960624695, + "learning_rate": 1.9817277689707827e-05, + "loss": 0.5553, + "step": 4534 + }, + { + "epoch": 0.12451949478308622, + "grad_norm": 0.35383379459381104, + "learning_rate": 1.9817195495210508e-05, + "loss": 0.5819, + "step": 4535 + }, + { + "epoch": 0.12454695222405272, + "grad_norm": 0.4723353385925293, + "learning_rate": 1.981711328240099e-05, + "loss": 0.4977, + "step": 4536 + }, + { + "epoch": 0.12457440966501922, + "grad_norm": 0.3190907835960388, + "learning_rate": 1.981703105127942e-05, + "loss": 0.4701, + "step": 4537 + }, + { + "epoch": 0.12460186710598573, + "grad_norm": 0.3448849022388458, + "learning_rate": 1.9816948801845957e-05, + "loss": 0.5509, + "step": 4538 + }, + { + "epoch": 0.12462932454695222, + "grad_norm": 0.3485904633998871, + "learning_rate": 1.981686653410075e-05, + "loss": 0.4907, + "step": 4539 + }, + { + "epoch": 0.12465678198791873, + "grad_norm": 0.4145749807357788, + "learning_rate": 1.9816784248043952e-05, + "loss": 0.5109, + "step": 4540 + }, + { + "epoch": 0.12468423942888522, + "grad_norm": 0.33381637930870056, + "learning_rate": 1.981670194367572e-05, + "loss": 0.5699, + "step": 4541 + }, + { + "epoch": 0.12471169686985173, + "grad_norm": 0.3373650908470154, + "learning_rate": 1.9816619620996206e-05, + "loss": 0.5492, + "step": 4542 + }, + { + "epoch": 0.12473915431081824, + "grad_norm": 0.3808574378490448, + "learning_rate": 1.9816537280005562e-05, + "loss": 0.5352, + "step": 4543 + }, + { + "epoch": 0.12476661175178473, + "grad_norm": 0.34907492995262146, + "learning_rate": 1.9816454920703943e-05, + "loss": 0.6171, + "step": 4544 + }, + { + "epoch": 0.12479406919275124, + "grad_norm": 0.3677372634410858, + "learning_rate": 1.98163725430915e-05, + "loss": 0.6476, + "step": 4545 + }, + { + "epoch": 0.12482152663371773, + "grad_norm": 0.4435366988182068, + "learning_rate": 1.9816290147168395e-05, + "loss": 0.4682, + "step": 4546 + }, + { + "epoch": 0.12484898407468424, + "grad_norm": 0.3766632676124573, + "learning_rate": 1.9816207732934774e-05, + "loss": 0.5299, + "step": 4547 + }, + { + "epoch": 0.12487644151565074, + "grad_norm": 0.34825363755226135, + "learning_rate": 1.9816125300390792e-05, + "loss": 0.5274, + "step": 4548 + }, + { + "epoch": 0.12490389895661724, + "grad_norm": 0.460807204246521, + "learning_rate": 1.98160428495366e-05, + "loss": 0.6617, + "step": 4549 + }, + { + "epoch": 0.12493135639758375, + "grad_norm": 0.3821741044521332, + "learning_rate": 1.9815960380372358e-05, + "loss": 0.5602, + "step": 4550 + }, + { + "epoch": 0.12495881383855025, + "grad_norm": 0.3629308342933655, + "learning_rate": 1.9815877892898218e-05, + "loss": 0.5247, + "step": 4551 + }, + { + "epoch": 0.12498627127951675, + "grad_norm": 0.37050512433052063, + "learning_rate": 1.9815795387114335e-05, + "loss": 0.4854, + "step": 4552 + }, + { + "epoch": 0.12501372872048325, + "grad_norm": 0.3886166214942932, + "learning_rate": 1.9815712863020854e-05, + "loss": 0.5081, + "step": 4553 + }, + { + "epoch": 0.12504118616144974, + "grad_norm": 0.39674025774002075, + "learning_rate": 1.981563032061794e-05, + "loss": 0.5675, + "step": 4554 + }, + { + "epoch": 0.12506864360241626, + "grad_norm": 0.36375951766967773, + "learning_rate": 1.981554775990574e-05, + "loss": 0.5482, + "step": 4555 + }, + { + "epoch": 0.12509610104338276, + "grad_norm": 0.4314548969268799, + "learning_rate": 1.9815465180884416e-05, + "loss": 0.6085, + "step": 4556 + }, + { + "epoch": 0.12512355848434925, + "grad_norm": 0.33327341079711914, + "learning_rate": 1.9815382583554114e-05, + "loss": 0.5775, + "step": 4557 + }, + { + "epoch": 0.12515101592531577, + "grad_norm": 0.3660721778869629, + "learning_rate": 1.9815299967914988e-05, + "loss": 0.4915, + "step": 4558 + }, + { + "epoch": 0.12517847336628227, + "grad_norm": 0.3982091546058655, + "learning_rate": 1.9815217333967198e-05, + "loss": 0.5616, + "step": 4559 + }, + { + "epoch": 0.12520593080724876, + "grad_norm": 0.4000290036201477, + "learning_rate": 1.9815134681710895e-05, + "loss": 0.5606, + "step": 4560 + }, + { + "epoch": 0.12523338824821525, + "grad_norm": 0.3396747410297394, + "learning_rate": 1.9815052011146233e-05, + "loss": 0.5714, + "step": 4561 + }, + { + "epoch": 0.12526084568918178, + "grad_norm": 0.41182446479797363, + "learning_rate": 1.9814969322273368e-05, + "loss": 0.5831, + "step": 4562 + }, + { + "epoch": 0.12528830313014827, + "grad_norm": 0.38821956515312195, + "learning_rate": 1.9814886615092447e-05, + "loss": 0.5393, + "step": 4563 + }, + { + "epoch": 0.12531576057111476, + "grad_norm": 0.3326866328716278, + "learning_rate": 1.9814803889603634e-05, + "loss": 0.5521, + "step": 4564 + }, + { + "epoch": 0.1253432180120813, + "grad_norm": 0.3558941185474396, + "learning_rate": 1.981472114580708e-05, + "loss": 0.5037, + "step": 4565 + }, + { + "epoch": 0.12537067545304778, + "grad_norm": 0.3617771863937378, + "learning_rate": 1.9814638383702936e-05, + "loss": 0.5158, + "step": 4566 + }, + { + "epoch": 0.12539813289401427, + "grad_norm": 0.3950182795524597, + "learning_rate": 1.981455560329136e-05, + "loss": 0.5097, + "step": 4567 + }, + { + "epoch": 0.12542559033498077, + "grad_norm": 0.35856372117996216, + "learning_rate": 1.9814472804572505e-05, + "loss": 0.5662, + "step": 4568 + }, + { + "epoch": 0.1254530477759473, + "grad_norm": 0.4754500985145569, + "learning_rate": 1.9814389987546526e-05, + "loss": 0.6284, + "step": 4569 + }, + { + "epoch": 0.12548050521691378, + "grad_norm": 0.4066259264945984, + "learning_rate": 1.981430715221358e-05, + "loss": 0.5777, + "step": 4570 + }, + { + "epoch": 0.12550796265788028, + "grad_norm": 0.3523061275482178, + "learning_rate": 1.9814224298573818e-05, + "loss": 0.5515, + "step": 4571 + }, + { + "epoch": 0.1255354200988468, + "grad_norm": 0.3919089436531067, + "learning_rate": 1.981414142662739e-05, + "loss": 0.5592, + "step": 4572 + }, + { + "epoch": 0.1255628775398133, + "grad_norm": 0.3298614025115967, + "learning_rate": 1.981405853637446e-05, + "loss": 0.5147, + "step": 4573 + }, + { + "epoch": 0.1255903349807798, + "grad_norm": 0.479017436504364, + "learning_rate": 1.9813975627815178e-05, + "loss": 0.5472, + "step": 4574 + }, + { + "epoch": 0.12561779242174628, + "grad_norm": 0.46824684739112854, + "learning_rate": 1.9813892700949703e-05, + "loss": 0.4911, + "step": 4575 + }, + { + "epoch": 0.1256452498627128, + "grad_norm": 0.6302682757377625, + "learning_rate": 1.981380975577818e-05, + "loss": 0.5693, + "step": 4576 + }, + { + "epoch": 0.1256727073036793, + "grad_norm": 0.36443573236465454, + "learning_rate": 1.9813726792300772e-05, + "loss": 0.594, + "step": 4577 + }, + { + "epoch": 0.1257001647446458, + "grad_norm": 0.38088783621788025, + "learning_rate": 1.9813643810517632e-05, + "loss": 0.558, + "step": 4578 + }, + { + "epoch": 0.12572762218561231, + "grad_norm": 0.40735623240470886, + "learning_rate": 1.981356081042891e-05, + "loss": 0.5335, + "step": 4579 + }, + { + "epoch": 0.1257550796265788, + "grad_norm": 0.3989115357398987, + "learning_rate": 1.981347779203477e-05, + "loss": 0.553, + "step": 4580 + }, + { + "epoch": 0.1257825370675453, + "grad_norm": 0.49549898505210876, + "learning_rate": 1.981339475533536e-05, + "loss": 0.5686, + "step": 4581 + }, + { + "epoch": 0.1258099945085118, + "grad_norm": 0.3734446167945862, + "learning_rate": 1.9813311700330838e-05, + "loss": 0.5999, + "step": 4582 + }, + { + "epoch": 0.12583745194947832, + "grad_norm": 0.3769661486148834, + "learning_rate": 1.9813228627021355e-05, + "loss": 0.5373, + "step": 4583 + }, + { + "epoch": 0.1258649093904448, + "grad_norm": 0.4106237292289734, + "learning_rate": 1.981314553540707e-05, + "loss": 0.5402, + "step": 4584 + }, + { + "epoch": 0.1258923668314113, + "grad_norm": 0.3978353440761566, + "learning_rate": 1.9813062425488137e-05, + "loss": 0.552, + "step": 4585 + }, + { + "epoch": 0.1259198242723778, + "grad_norm": 0.363187700510025, + "learning_rate": 1.9812979297264713e-05, + "loss": 0.5798, + "step": 4586 + }, + { + "epoch": 0.12594728171334432, + "grad_norm": 0.3238270580768585, + "learning_rate": 1.9812896150736947e-05, + "loss": 0.4637, + "step": 4587 + }, + { + "epoch": 0.12597473915431082, + "grad_norm": 0.3515819311141968, + "learning_rate": 1.9812812985904996e-05, + "loss": 0.4821, + "step": 4588 + }, + { + "epoch": 0.1260021965952773, + "grad_norm": 0.356880784034729, + "learning_rate": 1.981272980276902e-05, + "loss": 0.4942, + "step": 4589 + }, + { + "epoch": 0.12602965403624383, + "grad_norm": 0.367489218711853, + "learning_rate": 1.9812646601329174e-05, + "loss": 0.5193, + "step": 4590 + }, + { + "epoch": 0.12605711147721033, + "grad_norm": 0.3482157289981842, + "learning_rate": 1.9812563381585605e-05, + "loss": 0.5327, + "step": 4591 + }, + { + "epoch": 0.12608456891817682, + "grad_norm": 0.34965598583221436, + "learning_rate": 1.9812480143538473e-05, + "loss": 0.6748, + "step": 4592 + }, + { + "epoch": 0.12611202635914331, + "grad_norm": 0.343345046043396, + "learning_rate": 1.9812396887187936e-05, + "loss": 0.5835, + "step": 4593 + }, + { + "epoch": 0.12613948380010984, + "grad_norm": 0.43978390097618103, + "learning_rate": 1.9812313612534148e-05, + "loss": 0.6394, + "step": 4594 + }, + { + "epoch": 0.12616694124107633, + "grad_norm": 0.3992292582988739, + "learning_rate": 1.9812230319577262e-05, + "loss": 0.5691, + "step": 4595 + }, + { + "epoch": 0.12619439868204282, + "grad_norm": 0.3587813973426819, + "learning_rate": 1.9812147008317438e-05, + "loss": 0.4806, + "step": 4596 + }, + { + "epoch": 0.12622185612300935, + "grad_norm": 0.3747231364250183, + "learning_rate": 1.9812063678754824e-05, + "loss": 0.5265, + "step": 4597 + }, + { + "epoch": 0.12624931356397584, + "grad_norm": 0.3854464590549469, + "learning_rate": 1.9811980330889583e-05, + "loss": 0.479, + "step": 4598 + }, + { + "epoch": 0.12627677100494233, + "grad_norm": 0.37662485241889954, + "learning_rate": 1.9811896964721865e-05, + "loss": 0.5776, + "step": 4599 + }, + { + "epoch": 0.12630422844590883, + "grad_norm": 0.3242397904396057, + "learning_rate": 1.9811813580251827e-05, + "loss": 0.4831, + "step": 4600 + }, + { + "epoch": 0.12633168588687535, + "grad_norm": 0.3922351896762848, + "learning_rate": 1.9811730177479624e-05, + "loss": 0.6241, + "step": 4601 + }, + { + "epoch": 0.12635914332784184, + "grad_norm": 0.34332069754600525, + "learning_rate": 1.9811646756405417e-05, + "loss": 0.501, + "step": 4602 + }, + { + "epoch": 0.12638660076880834, + "grad_norm": 0.3683393895626068, + "learning_rate": 1.9811563317029356e-05, + "loss": 0.5545, + "step": 4603 + }, + { + "epoch": 0.12641405820977486, + "grad_norm": 0.36037373542785645, + "learning_rate": 1.9811479859351596e-05, + "loss": 0.5479, + "step": 4604 + }, + { + "epoch": 0.12644151565074135, + "grad_norm": 0.36243706941604614, + "learning_rate": 1.9811396383372293e-05, + "loss": 0.4875, + "step": 4605 + }, + { + "epoch": 0.12646897309170785, + "grad_norm": 0.42911162972450256, + "learning_rate": 1.9811312889091607e-05, + "loss": 0.5095, + "step": 4606 + }, + { + "epoch": 0.12649643053267434, + "grad_norm": 0.3874106705188751, + "learning_rate": 1.981122937650969e-05, + "loss": 0.5604, + "step": 4607 + }, + { + "epoch": 0.12652388797364086, + "grad_norm": 0.41102334856987, + "learning_rate": 1.98111458456267e-05, + "loss": 0.5224, + "step": 4608 + }, + { + "epoch": 0.12655134541460736, + "grad_norm": 0.3587576448917389, + "learning_rate": 1.9811062296442792e-05, + "loss": 0.5293, + "step": 4609 + }, + { + "epoch": 0.12657880285557385, + "grad_norm": 0.40790796279907227, + "learning_rate": 1.981097872895812e-05, + "loss": 0.6135, + "step": 4610 + }, + { + "epoch": 0.12660626029654037, + "grad_norm": 0.3439798057079315, + "learning_rate": 1.9810895143172844e-05, + "loss": 0.5182, + "step": 4611 + }, + { + "epoch": 0.12663371773750687, + "grad_norm": 0.35111042857170105, + "learning_rate": 1.9810811539087113e-05, + "loss": 0.5458, + "step": 4612 + }, + { + "epoch": 0.12666117517847336, + "grad_norm": 0.4094531834125519, + "learning_rate": 1.981072791670109e-05, + "loss": 0.544, + "step": 4613 + }, + { + "epoch": 0.12668863261943986, + "grad_norm": 0.4188336730003357, + "learning_rate": 1.981064427601493e-05, + "loss": 0.5966, + "step": 4614 + }, + { + "epoch": 0.12671609006040638, + "grad_norm": 0.3792400360107422, + "learning_rate": 1.981056061702878e-05, + "loss": 0.5373, + "step": 4615 + }, + { + "epoch": 0.12674354750137287, + "grad_norm": 0.41572368144989014, + "learning_rate": 1.981047693974281e-05, + "loss": 0.5726, + "step": 4616 + }, + { + "epoch": 0.12677100494233937, + "grad_norm": 0.3353620767593384, + "learning_rate": 1.981039324415717e-05, + "loss": 0.5189, + "step": 4617 + }, + { + "epoch": 0.1267984623833059, + "grad_norm": 0.3309624493122101, + "learning_rate": 1.981030953027201e-05, + "loss": 0.4638, + "step": 4618 + }, + { + "epoch": 0.12682591982427238, + "grad_norm": 0.3376156687736511, + "learning_rate": 1.9810225798087498e-05, + "loss": 0.5837, + "step": 4619 + }, + { + "epoch": 0.12685337726523888, + "grad_norm": 0.3366253674030304, + "learning_rate": 1.9810142047603777e-05, + "loss": 0.4843, + "step": 4620 + }, + { + "epoch": 0.12688083470620537, + "grad_norm": 0.36876431107521057, + "learning_rate": 1.9810058278821017e-05, + "loss": 0.5543, + "step": 4621 + }, + { + "epoch": 0.1269082921471719, + "grad_norm": 0.4178040027618408, + "learning_rate": 1.9809974491739364e-05, + "loss": 0.5277, + "step": 4622 + }, + { + "epoch": 0.12693574958813839, + "grad_norm": 0.39849385619163513, + "learning_rate": 1.9809890686358977e-05, + "loss": 0.5406, + "step": 4623 + }, + { + "epoch": 0.12696320702910488, + "grad_norm": 0.44090771675109863, + "learning_rate": 1.9809806862680012e-05, + "loss": 0.633, + "step": 4624 + }, + { + "epoch": 0.1269906644700714, + "grad_norm": 0.35264307260513306, + "learning_rate": 1.9809723020702628e-05, + "loss": 0.5185, + "step": 4625 + }, + { + "epoch": 0.1270181219110379, + "grad_norm": 0.4347602128982544, + "learning_rate": 1.9809639160426977e-05, + "loss": 0.4859, + "step": 4626 + }, + { + "epoch": 0.1270455793520044, + "grad_norm": 0.33474084734916687, + "learning_rate": 1.9809555281853223e-05, + "loss": 0.4794, + "step": 4627 + }, + { + "epoch": 0.12707303679297088, + "grad_norm": 0.37490859627723694, + "learning_rate": 1.9809471384981514e-05, + "loss": 0.5467, + "step": 4628 + }, + { + "epoch": 0.1271004942339374, + "grad_norm": 0.35054439306259155, + "learning_rate": 1.9809387469812013e-05, + "loss": 0.4803, + "step": 4629 + }, + { + "epoch": 0.1271279516749039, + "grad_norm": 0.39731365442276, + "learning_rate": 1.980930353634487e-05, + "loss": 0.4599, + "step": 4630 + }, + { + "epoch": 0.1271554091158704, + "grad_norm": 0.3572462201118469, + "learning_rate": 1.9809219584580245e-05, + "loss": 0.5236, + "step": 4631 + }, + { + "epoch": 0.12718286655683692, + "grad_norm": 0.3291398286819458, + "learning_rate": 1.98091356145183e-05, + "loss": 0.4892, + "step": 4632 + }, + { + "epoch": 0.1272103239978034, + "grad_norm": 0.3566029667854309, + "learning_rate": 1.980905162615918e-05, + "loss": 0.5793, + "step": 4633 + }, + { + "epoch": 0.1272377814387699, + "grad_norm": 0.3716113567352295, + "learning_rate": 1.980896761950305e-05, + "loss": 0.6111, + "step": 4634 + }, + { + "epoch": 0.1272652388797364, + "grad_norm": 0.3750631809234619, + "learning_rate": 1.980888359455007e-05, + "loss": 0.4202, + "step": 4635 + }, + { + "epoch": 0.12729269632070292, + "grad_norm": 0.4013785123825073, + "learning_rate": 1.9808799551300386e-05, + "loss": 0.5263, + "step": 4636 + }, + { + "epoch": 0.1273201537616694, + "grad_norm": 0.3738197684288025, + "learning_rate": 1.980871548975416e-05, + "loss": 0.5691, + "step": 4637 + }, + { + "epoch": 0.1273476112026359, + "grad_norm": 0.3599986732006073, + "learning_rate": 1.9808631409911553e-05, + "loss": 0.5102, + "step": 4638 + }, + { + "epoch": 0.12737506864360243, + "grad_norm": 0.3562566936016083, + "learning_rate": 1.9808547311772713e-05, + "loss": 0.5076, + "step": 4639 + }, + { + "epoch": 0.12740252608456892, + "grad_norm": 0.4316199719905853, + "learning_rate": 1.9808463195337806e-05, + "loss": 0.5869, + "step": 4640 + }, + { + "epoch": 0.12742998352553542, + "grad_norm": 0.3709467053413391, + "learning_rate": 1.980837906060698e-05, + "loss": 0.5865, + "step": 4641 + }, + { + "epoch": 0.1274574409665019, + "grad_norm": 0.5408498048782349, + "learning_rate": 1.9808294907580402e-05, + "loss": 0.6069, + "step": 4642 + }, + { + "epoch": 0.12748489840746843, + "grad_norm": 0.3829575479030609, + "learning_rate": 1.9808210736258217e-05, + "loss": 0.5187, + "step": 4643 + }, + { + "epoch": 0.12751235584843493, + "grad_norm": 0.33770987391471863, + "learning_rate": 1.9808126546640592e-05, + "loss": 0.5726, + "step": 4644 + }, + { + "epoch": 0.12753981328940142, + "grad_norm": 0.35748112201690674, + "learning_rate": 1.9808042338727685e-05, + "loss": 0.5236, + "step": 4645 + }, + { + "epoch": 0.12756727073036794, + "grad_norm": 0.36625924706459045, + "learning_rate": 1.9807958112519642e-05, + "loss": 0.6007, + "step": 4646 + }, + { + "epoch": 0.12759472817133444, + "grad_norm": 0.3561141788959503, + "learning_rate": 1.980787386801663e-05, + "loss": 0.5816, + "step": 4647 + }, + { + "epoch": 0.12762218561230093, + "grad_norm": 0.3550746440887451, + "learning_rate": 1.9807789605218804e-05, + "loss": 0.5106, + "step": 4648 + }, + { + "epoch": 0.12764964305326743, + "grad_norm": 0.30563101172447205, + "learning_rate": 1.9807705324126316e-05, + "loss": 0.4876, + "step": 4649 + }, + { + "epoch": 0.12767710049423395, + "grad_norm": 0.3240388035774231, + "learning_rate": 1.980762102473933e-05, + "loss": 0.4035, + "step": 4650 + }, + { + "epoch": 0.12770455793520044, + "grad_norm": 0.3525928854942322, + "learning_rate": 1.9807536707058e-05, + "loss": 0.5662, + "step": 4651 + }, + { + "epoch": 0.12773201537616694, + "grad_norm": 0.3636147975921631, + "learning_rate": 1.9807452371082486e-05, + "loss": 0.5255, + "step": 4652 + }, + { + "epoch": 0.12775947281713343, + "grad_norm": 0.3470696210861206, + "learning_rate": 1.980736801681294e-05, + "loss": 0.5777, + "step": 4653 + }, + { + "epoch": 0.12778693025809995, + "grad_norm": 0.4342823624610901, + "learning_rate": 1.9807283644249525e-05, + "loss": 0.5906, + "step": 4654 + }, + { + "epoch": 0.12781438769906645, + "grad_norm": 0.3733419179916382, + "learning_rate": 1.9807199253392393e-05, + "loss": 0.4728, + "step": 4655 + }, + { + "epoch": 0.12784184514003294, + "grad_norm": 0.4401317238807678, + "learning_rate": 1.9807114844241706e-05, + "loss": 0.6095, + "step": 4656 + }, + { + "epoch": 0.12786930258099946, + "grad_norm": 0.3589775264263153, + "learning_rate": 1.9807030416797623e-05, + "loss": 0.5125, + "step": 4657 + }, + { + "epoch": 0.12789676002196596, + "grad_norm": 0.4197657108306885, + "learning_rate": 1.980694597106029e-05, + "loss": 0.551, + "step": 4658 + }, + { + "epoch": 0.12792421746293245, + "grad_norm": 0.38242799043655396, + "learning_rate": 1.980686150702988e-05, + "loss": 0.5215, + "step": 4659 + }, + { + "epoch": 0.12795167490389894, + "grad_norm": 0.39155423641204834, + "learning_rate": 1.980677702470654e-05, + "loss": 0.6202, + "step": 4660 + }, + { + "epoch": 0.12797913234486546, + "grad_norm": 0.3757980167865753, + "learning_rate": 1.9806692524090434e-05, + "loss": 0.6214, + "step": 4661 + }, + { + "epoch": 0.12800658978583196, + "grad_norm": 0.3857920467853546, + "learning_rate": 1.9806608005181713e-05, + "loss": 0.5259, + "step": 4662 + }, + { + "epoch": 0.12803404722679845, + "grad_norm": 0.3504411578178406, + "learning_rate": 1.980652346798054e-05, + "loss": 0.5666, + "step": 4663 + }, + { + "epoch": 0.12806150466776497, + "grad_norm": 0.34494057297706604, + "learning_rate": 1.980643891248707e-05, + "loss": 0.549, + "step": 4664 + }, + { + "epoch": 0.12808896210873147, + "grad_norm": 0.3728983998298645, + "learning_rate": 1.980635433870146e-05, + "loss": 0.4463, + "step": 4665 + }, + { + "epoch": 0.12811641954969796, + "grad_norm": 0.34523314237594604, + "learning_rate": 1.9806269746623875e-05, + "loss": 0.4277, + "step": 4666 + }, + { + "epoch": 0.12814387699066446, + "grad_norm": 0.3845461905002594, + "learning_rate": 1.980618513625446e-05, + "loss": 0.5737, + "step": 4667 + }, + { + "epoch": 0.12817133443163098, + "grad_norm": 0.3400353789329529, + "learning_rate": 1.9806100507593387e-05, + "loss": 0.4713, + "step": 4668 + }, + { + "epoch": 0.12819879187259747, + "grad_norm": 0.3931112587451935, + "learning_rate": 1.98060158606408e-05, + "loss": 0.5635, + "step": 4669 + }, + { + "epoch": 0.12822624931356397, + "grad_norm": 0.424045205116272, + "learning_rate": 1.9805931195396868e-05, + "loss": 0.5004, + "step": 4670 + }, + { + "epoch": 0.1282537067545305, + "grad_norm": 0.37648284435272217, + "learning_rate": 1.9805846511861744e-05, + "loss": 0.5825, + "step": 4671 + }, + { + "epoch": 0.12828116419549698, + "grad_norm": 0.35095030069351196, + "learning_rate": 1.9805761810035588e-05, + "loss": 0.4578, + "step": 4672 + }, + { + "epoch": 0.12830862163646348, + "grad_norm": 0.32021138072013855, + "learning_rate": 1.9805677089918554e-05, + "loss": 0.4599, + "step": 4673 + }, + { + "epoch": 0.12833607907742997, + "grad_norm": 0.3179025650024414, + "learning_rate": 1.98055923515108e-05, + "loss": 0.4724, + "step": 4674 + }, + { + "epoch": 0.1283635365183965, + "grad_norm": 0.36373889446258545, + "learning_rate": 1.9805507594812493e-05, + "loss": 0.5513, + "step": 4675 + }, + { + "epoch": 0.128390993959363, + "grad_norm": 0.34057289361953735, + "learning_rate": 1.980542281982378e-05, + "loss": 0.545, + "step": 4676 + }, + { + "epoch": 0.12841845140032948, + "grad_norm": 3.3009727001190186, + "learning_rate": 1.9805338026544826e-05, + "loss": 0.5835, + "step": 4677 + }, + { + "epoch": 0.128445908841296, + "grad_norm": 0.33715271949768066, + "learning_rate": 1.9805253214975786e-05, + "loss": 0.6193, + "step": 4678 + }, + { + "epoch": 0.1284733662822625, + "grad_norm": 0.3553815186023712, + "learning_rate": 1.980516838511682e-05, + "loss": 0.5059, + "step": 4679 + }, + { + "epoch": 0.128500823723229, + "grad_norm": 0.3491773009300232, + "learning_rate": 1.9805083536968088e-05, + "loss": 0.5758, + "step": 4680 + }, + { + "epoch": 0.12852828116419548, + "grad_norm": 0.3717077970504761, + "learning_rate": 1.9804998670529742e-05, + "loss": 0.5182, + "step": 4681 + }, + { + "epoch": 0.128555738605162, + "grad_norm": 0.3661896586418152, + "learning_rate": 1.9804913785801948e-05, + "loss": 0.5048, + "step": 4682 + }, + { + "epoch": 0.1285831960461285, + "grad_norm": 0.4295479953289032, + "learning_rate": 1.9804828882784853e-05, + "loss": 0.5543, + "step": 4683 + }, + { + "epoch": 0.128610653487095, + "grad_norm": 0.32970130443573, + "learning_rate": 1.980474396147863e-05, + "loss": 0.5082, + "step": 4684 + }, + { + "epoch": 0.12863811092806152, + "grad_norm": 0.3419676125049591, + "learning_rate": 1.9804659021883426e-05, + "loss": 0.5625, + "step": 4685 + }, + { + "epoch": 0.128665568369028, + "grad_norm": 0.3935696482658386, + "learning_rate": 1.980457406399941e-05, + "loss": 0.6187, + "step": 4686 + }, + { + "epoch": 0.1286930258099945, + "grad_norm": 0.3392459452152252, + "learning_rate": 1.980448908782673e-05, + "loss": 0.4879, + "step": 4687 + }, + { + "epoch": 0.128720483250961, + "grad_norm": 0.39766690135002136, + "learning_rate": 1.9804404093365546e-05, + "loss": 0.6513, + "step": 4688 + }, + { + "epoch": 0.12874794069192752, + "grad_norm": 0.40386730432510376, + "learning_rate": 1.980431908061602e-05, + "loss": 0.5999, + "step": 4689 + }, + { + "epoch": 0.12877539813289401, + "grad_norm": 0.3893100619316101, + "learning_rate": 1.980423404957831e-05, + "loss": 0.572, + "step": 4690 + }, + { + "epoch": 0.1288028555738605, + "grad_norm": 0.3883233666419983, + "learning_rate": 1.9804149000252578e-05, + "loss": 0.4662, + "step": 4691 + }, + { + "epoch": 0.12883031301482703, + "grad_norm": 0.3404282331466675, + "learning_rate": 1.9804063932638974e-05, + "loss": 0.5113, + "step": 4692 + }, + { + "epoch": 0.12885777045579352, + "grad_norm": 0.34899866580963135, + "learning_rate": 1.9803978846737667e-05, + "loss": 0.5158, + "step": 4693 + }, + { + "epoch": 0.12888522789676002, + "grad_norm": 0.3995777666568756, + "learning_rate": 1.9803893742548807e-05, + "loss": 0.5395, + "step": 4694 + }, + { + "epoch": 0.1289126853377265, + "grad_norm": 0.3717349171638489, + "learning_rate": 1.9803808620072556e-05, + "loss": 0.6263, + "step": 4695 + }, + { + "epoch": 0.12894014277869303, + "grad_norm": 0.3653763234615326, + "learning_rate": 1.9803723479309075e-05, + "loss": 0.5636, + "step": 4696 + }, + { + "epoch": 0.12896760021965953, + "grad_norm": 0.3593781292438507, + "learning_rate": 1.9803638320258518e-05, + "loss": 0.4991, + "step": 4697 + }, + { + "epoch": 0.12899505766062602, + "grad_norm": 0.33587780594825745, + "learning_rate": 1.9803553142921048e-05, + "loss": 0.5517, + "step": 4698 + }, + { + "epoch": 0.12902251510159254, + "grad_norm": 0.39920517802238464, + "learning_rate": 1.980346794729682e-05, + "loss": 0.614, + "step": 4699 + }, + { + "epoch": 0.12904997254255904, + "grad_norm": 0.33639827370643616, + "learning_rate": 1.9803382733386e-05, + "loss": 0.575, + "step": 4700 + }, + { + "epoch": 0.12907742998352553, + "grad_norm": 0.37912821769714355, + "learning_rate": 1.980329750118874e-05, + "loss": 0.6222, + "step": 4701 + }, + { + "epoch": 0.12910488742449203, + "grad_norm": 0.33052918314933777, + "learning_rate": 1.98032122507052e-05, + "loss": 0.4965, + "step": 4702 + }, + { + "epoch": 0.12913234486545855, + "grad_norm": 0.3617566227912903, + "learning_rate": 1.9803126981935543e-05, + "loss": 0.603, + "step": 4703 + }, + { + "epoch": 0.12915980230642504, + "grad_norm": 0.3575611114501953, + "learning_rate": 1.9803041694879926e-05, + "loss": 0.5267, + "step": 4704 + }, + { + "epoch": 0.12918725974739154, + "grad_norm": 0.4053252935409546, + "learning_rate": 1.9802956389538502e-05, + "loss": 0.4742, + "step": 4705 + }, + { + "epoch": 0.12921471718835806, + "grad_norm": 0.3746340572834015, + "learning_rate": 1.9802871065911442e-05, + "loss": 0.5456, + "step": 4706 + }, + { + "epoch": 0.12924217462932455, + "grad_norm": 0.3243280351161957, + "learning_rate": 1.9802785723998893e-05, + "loss": 0.5274, + "step": 4707 + }, + { + "epoch": 0.12926963207029105, + "grad_norm": 0.3887551426887512, + "learning_rate": 1.9802700363801023e-05, + "loss": 0.5664, + "step": 4708 + }, + { + "epoch": 0.12929708951125754, + "grad_norm": 0.34058913588523865, + "learning_rate": 1.980261498531799e-05, + "loss": 0.5421, + "step": 4709 + }, + { + "epoch": 0.12932454695222406, + "grad_norm": 0.38874146342277527, + "learning_rate": 1.980252958854995e-05, + "loss": 0.5922, + "step": 4710 + }, + { + "epoch": 0.12935200439319056, + "grad_norm": 0.3872394263744354, + "learning_rate": 1.9802444173497063e-05, + "loss": 0.4967, + "step": 4711 + }, + { + "epoch": 0.12937946183415705, + "grad_norm": 0.36738237738609314, + "learning_rate": 1.9802358740159487e-05, + "loss": 0.4789, + "step": 4712 + }, + { + "epoch": 0.12940691927512357, + "grad_norm": 0.46389031410217285, + "learning_rate": 1.9802273288537387e-05, + "loss": 0.6317, + "step": 4713 + }, + { + "epoch": 0.12943437671609007, + "grad_norm": 0.39626696705818176, + "learning_rate": 1.9802187818630915e-05, + "loss": 0.5171, + "step": 4714 + }, + { + "epoch": 0.12946183415705656, + "grad_norm": 0.5049596428871155, + "learning_rate": 1.9802102330440238e-05, + "loss": 0.4896, + "step": 4715 + }, + { + "epoch": 0.12948929159802305, + "grad_norm": 0.36169591546058655, + "learning_rate": 1.980201682396551e-05, + "loss": 0.5107, + "step": 4716 + }, + { + "epoch": 0.12951674903898958, + "grad_norm": 0.35634663701057434, + "learning_rate": 1.9801931299206893e-05, + "loss": 0.5953, + "step": 4717 + }, + { + "epoch": 0.12954420647995607, + "grad_norm": 0.3695346713066101, + "learning_rate": 1.9801845756164547e-05, + "loss": 0.5403, + "step": 4718 + }, + { + "epoch": 0.12957166392092256, + "grad_norm": 0.35969969630241394, + "learning_rate": 1.9801760194838626e-05, + "loss": 0.5527, + "step": 4719 + }, + { + "epoch": 0.12959912136188906, + "grad_norm": 0.31247997283935547, + "learning_rate": 1.9801674615229296e-05, + "loss": 0.5927, + "step": 4720 + }, + { + "epoch": 0.12962657880285558, + "grad_norm": 0.3519132137298584, + "learning_rate": 1.9801589017336715e-05, + "loss": 0.4857, + "step": 4721 + }, + { + "epoch": 0.12965403624382207, + "grad_norm": 0.32654133439064026, + "learning_rate": 1.9801503401161043e-05, + "loss": 0.5428, + "step": 4722 + }, + { + "epoch": 0.12968149368478857, + "grad_norm": 0.32837679982185364, + "learning_rate": 1.9801417766702436e-05, + "loss": 0.5056, + "step": 4723 + }, + { + "epoch": 0.1297089511257551, + "grad_norm": 0.391488641500473, + "learning_rate": 1.980133211396106e-05, + "loss": 0.5802, + "step": 4724 + }, + { + "epoch": 0.12973640856672158, + "grad_norm": 0.3670549988746643, + "learning_rate": 1.980124644293707e-05, + "loss": 0.5567, + "step": 4725 + }, + { + "epoch": 0.12976386600768808, + "grad_norm": 0.3985184133052826, + "learning_rate": 1.9801160753630628e-05, + "loss": 0.5998, + "step": 4726 + }, + { + "epoch": 0.12979132344865457, + "grad_norm": 0.37249550223350525, + "learning_rate": 1.980107504604189e-05, + "loss": 0.5387, + "step": 4727 + }, + { + "epoch": 0.1298187808896211, + "grad_norm": 0.36497747898101807, + "learning_rate": 1.9800989320171023e-05, + "loss": 0.4711, + "step": 4728 + }, + { + "epoch": 0.1298462383305876, + "grad_norm": 0.35467416048049927, + "learning_rate": 1.980090357601818e-05, + "loss": 0.5316, + "step": 4729 + }, + { + "epoch": 0.12987369577155408, + "grad_norm": 0.36455363035202026, + "learning_rate": 1.9800817813583526e-05, + "loss": 0.5333, + "step": 4730 + }, + { + "epoch": 0.1299011532125206, + "grad_norm": 0.3497975468635559, + "learning_rate": 1.9800732032867215e-05, + "loss": 0.654, + "step": 4731 + }, + { + "epoch": 0.1299286106534871, + "grad_norm": 0.35124337673187256, + "learning_rate": 1.9800646233869413e-05, + "loss": 0.5405, + "step": 4732 + }, + { + "epoch": 0.1299560680944536, + "grad_norm": 0.40341323614120483, + "learning_rate": 1.9800560416590277e-05, + "loss": 0.64, + "step": 4733 + }, + { + "epoch": 0.12998352553542009, + "grad_norm": 0.3068658411502838, + "learning_rate": 1.980047458102997e-05, + "loss": 0.5579, + "step": 4734 + }, + { + "epoch": 0.1300109829763866, + "grad_norm": 0.4347068965435028, + "learning_rate": 1.980038872718865e-05, + "loss": 0.6138, + "step": 4735 + }, + { + "epoch": 0.1300384404173531, + "grad_norm": 0.3783550262451172, + "learning_rate": 1.9800302855066475e-05, + "loss": 0.549, + "step": 4736 + }, + { + "epoch": 0.1300658978583196, + "grad_norm": 0.3929012715816498, + "learning_rate": 1.9800216964663605e-05, + "loss": 0.598, + "step": 4737 + }, + { + "epoch": 0.13009335529928612, + "grad_norm": 0.3782687783241272, + "learning_rate": 1.9800131055980206e-05, + "loss": 0.6464, + "step": 4738 + }, + { + "epoch": 0.1301208127402526, + "grad_norm": 0.3863724172115326, + "learning_rate": 1.9800045129016437e-05, + "loss": 0.6175, + "step": 4739 + }, + { + "epoch": 0.1301482701812191, + "grad_norm": 0.3933177590370178, + "learning_rate": 1.9799959183772453e-05, + "loss": 0.546, + "step": 4740 + }, + { + "epoch": 0.1301757276221856, + "grad_norm": 0.34894153475761414, + "learning_rate": 1.9799873220248417e-05, + "loss": 0.5249, + "step": 4741 + }, + { + "epoch": 0.13020318506315212, + "grad_norm": 0.41971153020858765, + "learning_rate": 1.979978723844449e-05, + "loss": 0.5266, + "step": 4742 + }, + { + "epoch": 0.13023064250411862, + "grad_norm": 0.3314156234264374, + "learning_rate": 1.979970123836083e-05, + "loss": 0.5185, + "step": 4743 + }, + { + "epoch": 0.1302580999450851, + "grad_norm": 0.46580028533935547, + "learning_rate": 1.9799615219997604e-05, + "loss": 0.5759, + "step": 4744 + }, + { + "epoch": 0.13028555738605163, + "grad_norm": 0.3579244017601013, + "learning_rate": 1.9799529183354967e-05, + "loss": 0.5203, + "step": 4745 + }, + { + "epoch": 0.13031301482701813, + "grad_norm": 0.32934141159057617, + "learning_rate": 1.9799443128433078e-05, + "loss": 0.4872, + "step": 4746 + }, + { + "epoch": 0.13034047226798462, + "grad_norm": 0.38458195328712463, + "learning_rate": 1.97993570552321e-05, + "loss": 0.5031, + "step": 4747 + }, + { + "epoch": 0.1303679297089511, + "grad_norm": 0.3871532380580902, + "learning_rate": 1.9799270963752197e-05, + "loss": 0.5257, + "step": 4748 + }, + { + "epoch": 0.13039538714991764, + "grad_norm": 0.3566650450229645, + "learning_rate": 1.979918485399352e-05, + "loss": 0.5375, + "step": 4749 + }, + { + "epoch": 0.13042284459088413, + "grad_norm": 0.3387940526008606, + "learning_rate": 1.979909872595624e-05, + "loss": 0.4727, + "step": 4750 + }, + { + "epoch": 0.13045030203185062, + "grad_norm": 0.3862028419971466, + "learning_rate": 1.9799012579640517e-05, + "loss": 0.5187, + "step": 4751 + }, + { + "epoch": 0.13047775947281715, + "grad_norm": 0.39067748188972473, + "learning_rate": 1.97989264150465e-05, + "loss": 0.5272, + "step": 4752 + }, + { + "epoch": 0.13050521691378364, + "grad_norm": 0.419339656829834, + "learning_rate": 1.9798840232174364e-05, + "loss": 0.6215, + "step": 4753 + }, + { + "epoch": 0.13053267435475013, + "grad_norm": 0.3750307261943817, + "learning_rate": 1.979875403102426e-05, + "loss": 0.5451, + "step": 4754 + }, + { + "epoch": 0.13056013179571663, + "grad_norm": 0.4008561670780182, + "learning_rate": 1.9798667811596352e-05, + "loss": 0.5006, + "step": 4755 + }, + { + "epoch": 0.13058758923668315, + "grad_norm": 0.4110734164714813, + "learning_rate": 1.9798581573890804e-05, + "loss": 0.615, + "step": 4756 + }, + { + "epoch": 0.13061504667764964, + "grad_norm": 0.3746647536754608, + "learning_rate": 1.9798495317907772e-05, + "loss": 0.5267, + "step": 4757 + }, + { + "epoch": 0.13064250411861614, + "grad_norm": 0.3939198851585388, + "learning_rate": 1.9798409043647423e-05, + "loss": 0.6504, + "step": 4758 + }, + { + "epoch": 0.13066996155958266, + "grad_norm": 0.341647207736969, + "learning_rate": 1.9798322751109905e-05, + "loss": 0.5028, + "step": 4759 + }, + { + "epoch": 0.13069741900054915, + "grad_norm": 0.3801475167274475, + "learning_rate": 1.9798236440295398e-05, + "loss": 0.5708, + "step": 4760 + }, + { + "epoch": 0.13072487644151565, + "grad_norm": 0.3854199945926666, + "learning_rate": 1.9798150111204045e-05, + "loss": 0.5767, + "step": 4761 + }, + { + "epoch": 0.13075233388248214, + "grad_norm": 0.37700024247169495, + "learning_rate": 1.9798063763836017e-05, + "loss": 0.5271, + "step": 4762 + }, + { + "epoch": 0.13077979132344866, + "grad_norm": 0.7228770852088928, + "learning_rate": 1.9797977398191476e-05, + "loss": 0.5306, + "step": 4763 + }, + { + "epoch": 0.13080724876441516, + "grad_norm": 0.3755204975605011, + "learning_rate": 1.979789101427058e-05, + "loss": 0.5005, + "step": 4764 + }, + { + "epoch": 0.13083470620538165, + "grad_norm": 0.42763081192970276, + "learning_rate": 1.9797804612073485e-05, + "loss": 0.4962, + "step": 4765 + }, + { + "epoch": 0.13086216364634817, + "grad_norm": 0.32806918025016785, + "learning_rate": 1.9797718191600358e-05, + "loss": 0.5002, + "step": 4766 + }, + { + "epoch": 0.13088962108731467, + "grad_norm": 0.38588404655456543, + "learning_rate": 1.9797631752851364e-05, + "loss": 0.6148, + "step": 4767 + }, + { + "epoch": 0.13091707852828116, + "grad_norm": 0.9077989459037781, + "learning_rate": 1.9797545295826658e-05, + "loss": 0.5547, + "step": 4768 + }, + { + "epoch": 0.13094453596924766, + "grad_norm": 0.35402193665504456, + "learning_rate": 1.9797458820526403e-05, + "loss": 0.5982, + "step": 4769 + }, + { + "epoch": 0.13097199341021418, + "grad_norm": 0.36603885889053345, + "learning_rate": 1.9797372326950758e-05, + "loss": 0.5626, + "step": 4770 + }, + { + "epoch": 0.13099945085118067, + "grad_norm": 0.38114723563194275, + "learning_rate": 1.979728581509989e-05, + "loss": 0.5507, + "step": 4771 + }, + { + "epoch": 0.13102690829214717, + "grad_norm": 0.35898557305336, + "learning_rate": 1.9797199284973954e-05, + "loss": 0.5438, + "step": 4772 + }, + { + "epoch": 0.1310543657331137, + "grad_norm": 0.3715273141860962, + "learning_rate": 1.9797112736573115e-05, + "loss": 0.4819, + "step": 4773 + }, + { + "epoch": 0.13108182317408018, + "grad_norm": 0.40087807178497314, + "learning_rate": 1.9797026169897538e-05, + "loss": 0.5127, + "step": 4774 + }, + { + "epoch": 0.13110928061504667, + "grad_norm": 0.3630547523498535, + "learning_rate": 1.9796939584947377e-05, + "loss": 0.4973, + "step": 4775 + }, + { + "epoch": 0.13113673805601317, + "grad_norm": 0.3911893367767334, + "learning_rate": 1.9796852981722792e-05, + "loss": 0.6597, + "step": 4776 + }, + { + "epoch": 0.1311641954969797, + "grad_norm": 0.37113964557647705, + "learning_rate": 1.979676636022396e-05, + "loss": 0.6191, + "step": 4777 + }, + { + "epoch": 0.13119165293794618, + "grad_norm": 0.3561573922634125, + "learning_rate": 1.9796679720451025e-05, + "loss": 0.4922, + "step": 4778 + }, + { + "epoch": 0.13121911037891268, + "grad_norm": 0.3172551095485687, + "learning_rate": 1.9796593062404157e-05, + "loss": 0.4843, + "step": 4779 + }, + { + "epoch": 0.1312465678198792, + "grad_norm": 0.3621581494808197, + "learning_rate": 1.9796506386083515e-05, + "loss": 0.5634, + "step": 4780 + }, + { + "epoch": 0.1312740252608457, + "grad_norm": 0.3462367057800293, + "learning_rate": 1.9796419691489263e-05, + "loss": 0.4915, + "step": 4781 + }, + { + "epoch": 0.1313014827018122, + "grad_norm": 0.40557098388671875, + "learning_rate": 1.9796332978621562e-05, + "loss": 0.5814, + "step": 4782 + }, + { + "epoch": 0.13132894014277868, + "grad_norm": 0.3666320741176605, + "learning_rate": 1.9796246247480572e-05, + "loss": 0.5328, + "step": 4783 + }, + { + "epoch": 0.1313563975837452, + "grad_norm": 0.3556571900844574, + "learning_rate": 1.979615949806646e-05, + "loss": 0.5149, + "step": 4784 + }, + { + "epoch": 0.1313838550247117, + "grad_norm": 0.3832613229751587, + "learning_rate": 1.979607273037938e-05, + "loss": 0.5896, + "step": 4785 + }, + { + "epoch": 0.1314113124656782, + "grad_norm": 0.3721497058868408, + "learning_rate": 1.9795985944419497e-05, + "loss": 0.548, + "step": 4786 + }, + { + "epoch": 0.1314387699066447, + "grad_norm": 0.3426712155342102, + "learning_rate": 1.9795899140186977e-05, + "loss": 0.5354, + "step": 4787 + }, + { + "epoch": 0.1314662273476112, + "grad_norm": 0.3597654402256012, + "learning_rate": 1.9795812317681978e-05, + "loss": 0.6075, + "step": 4788 + }, + { + "epoch": 0.1314936847885777, + "grad_norm": 0.3309241831302643, + "learning_rate": 1.979572547690466e-05, + "loss": 0.5315, + "step": 4789 + }, + { + "epoch": 0.1315211422295442, + "grad_norm": 0.3277648687362671, + "learning_rate": 1.979563861785519e-05, + "loss": 0.5821, + "step": 4790 + }, + { + "epoch": 0.13154859967051072, + "grad_norm": 0.37115445733070374, + "learning_rate": 1.9795551740533726e-05, + "loss": 0.5082, + "step": 4791 + }, + { + "epoch": 0.1315760571114772, + "grad_norm": 0.43164709210395813, + "learning_rate": 1.9795464844940432e-05, + "loss": 0.5644, + "step": 4792 + }, + { + "epoch": 0.1316035145524437, + "grad_norm": 0.45700305700302124, + "learning_rate": 1.9795377931075472e-05, + "loss": 0.5464, + "step": 4793 + }, + { + "epoch": 0.1316309719934102, + "grad_norm": 0.4147648215293884, + "learning_rate": 1.9795290998939002e-05, + "loss": 0.6163, + "step": 4794 + }, + { + "epoch": 0.13165842943437672, + "grad_norm": 0.36461520195007324, + "learning_rate": 1.979520404853119e-05, + "loss": 0.584, + "step": 4795 + }, + { + "epoch": 0.13168588687534322, + "grad_norm": 0.3699861466884613, + "learning_rate": 1.97951170798522e-05, + "loss": 0.5875, + "step": 4796 + }, + { + "epoch": 0.1317133443163097, + "grad_norm": 0.4340061843395233, + "learning_rate": 1.9795030092902185e-05, + "loss": 0.5304, + "step": 4797 + }, + { + "epoch": 0.13174080175727623, + "grad_norm": 0.36613813042640686, + "learning_rate": 1.9794943087681317e-05, + "loss": 0.5363, + "step": 4798 + }, + { + "epoch": 0.13176825919824273, + "grad_norm": 0.9595581293106079, + "learning_rate": 1.9794856064189748e-05, + "loss": 0.4887, + "step": 4799 + }, + { + "epoch": 0.13179571663920922, + "grad_norm": 0.4021627604961395, + "learning_rate": 1.979476902242765e-05, + "loss": 0.6504, + "step": 4800 + }, + { + "epoch": 0.13182317408017571, + "grad_norm": 0.4145890772342682, + "learning_rate": 1.9794681962395182e-05, + "loss": 0.5787, + "step": 4801 + }, + { + "epoch": 0.13185063152114224, + "grad_norm": 0.3672868311405182, + "learning_rate": 1.9794594884092504e-05, + "loss": 0.5753, + "step": 4802 + }, + { + "epoch": 0.13187808896210873, + "grad_norm": 0.40493687987327576, + "learning_rate": 1.9794507787519783e-05, + "loss": 0.573, + "step": 4803 + }, + { + "epoch": 0.13190554640307522, + "grad_norm": 0.3968786299228668, + "learning_rate": 1.9794420672677178e-05, + "loss": 0.4645, + "step": 4804 + }, + { + "epoch": 0.13193300384404175, + "grad_norm": 0.3455352485179901, + "learning_rate": 1.979433353956485e-05, + "loss": 0.4933, + "step": 4805 + }, + { + "epoch": 0.13196046128500824, + "grad_norm": 0.383456826210022, + "learning_rate": 1.979424638818297e-05, + "loss": 0.5916, + "step": 4806 + }, + { + "epoch": 0.13198791872597473, + "grad_norm": 0.376600444316864, + "learning_rate": 1.979415921853169e-05, + "loss": 0.5606, + "step": 4807 + }, + { + "epoch": 0.13201537616694123, + "grad_norm": 0.36070138216018677, + "learning_rate": 1.9794072030611175e-05, + "loss": 0.5407, + "step": 4808 + }, + { + "epoch": 0.13204283360790775, + "grad_norm": 0.3542858958244324, + "learning_rate": 1.9793984824421596e-05, + "loss": 0.5402, + "step": 4809 + }, + { + "epoch": 0.13207029104887424, + "grad_norm": 0.3590506315231323, + "learning_rate": 1.97938975999631e-05, + "loss": 0.57, + "step": 4810 + }, + { + "epoch": 0.13209774848984074, + "grad_norm": 0.46108755469322205, + "learning_rate": 1.979381035723587e-05, + "loss": 0.5185, + "step": 4811 + }, + { + "epoch": 0.13212520593080726, + "grad_norm": 0.3627254068851471, + "learning_rate": 1.9793723096240052e-05, + "loss": 0.5906, + "step": 4812 + }, + { + "epoch": 0.13215266337177375, + "grad_norm": 0.4196452796459198, + "learning_rate": 1.9793635816975816e-05, + "loss": 0.5607, + "step": 4813 + }, + { + "epoch": 0.13218012081274025, + "grad_norm": 0.36930710077285767, + "learning_rate": 1.9793548519443325e-05, + "loss": 0.5885, + "step": 4814 + }, + { + "epoch": 0.13220757825370674, + "grad_norm": 0.4373982548713684, + "learning_rate": 1.9793461203642736e-05, + "loss": 0.5057, + "step": 4815 + }, + { + "epoch": 0.13223503569467326, + "grad_norm": 0.36196550726890564, + "learning_rate": 1.979337386957422e-05, + "loss": 0.6055, + "step": 4816 + }, + { + "epoch": 0.13226249313563976, + "grad_norm": 0.4767090678215027, + "learning_rate": 1.9793286517237932e-05, + "loss": 0.541, + "step": 4817 + }, + { + "epoch": 0.13228995057660625, + "grad_norm": 0.3373899459838867, + "learning_rate": 1.979319914663404e-05, + "loss": 0.5741, + "step": 4818 + }, + { + "epoch": 0.13231740801757277, + "grad_norm": 0.3310614824295044, + "learning_rate": 1.979311175776271e-05, + "loss": 0.479, + "step": 4819 + }, + { + "epoch": 0.13234486545853927, + "grad_norm": 0.5483216047286987, + "learning_rate": 1.97930243506241e-05, + "loss": 0.501, + "step": 4820 + }, + { + "epoch": 0.13237232289950576, + "grad_norm": 0.37252137064933777, + "learning_rate": 1.979293692521837e-05, + "loss": 0.5385, + "step": 4821 + }, + { + "epoch": 0.13239978034047226, + "grad_norm": 0.3370871841907501, + "learning_rate": 1.979284948154569e-05, + "loss": 0.4746, + "step": 4822 + }, + { + "epoch": 0.13242723778143878, + "grad_norm": 0.3453536331653595, + "learning_rate": 1.9792762019606218e-05, + "loss": 0.5538, + "step": 4823 + }, + { + "epoch": 0.13245469522240527, + "grad_norm": 0.32206863164901733, + "learning_rate": 1.9792674539400122e-05, + "loss": 0.5133, + "step": 4824 + }, + { + "epoch": 0.13248215266337177, + "grad_norm": 0.3509998321533203, + "learning_rate": 1.9792587040927563e-05, + "loss": 0.5141, + "step": 4825 + }, + { + "epoch": 0.1325096101043383, + "grad_norm": 0.40161970257759094, + "learning_rate": 1.9792499524188702e-05, + "loss": 0.5597, + "step": 4826 + }, + { + "epoch": 0.13253706754530478, + "grad_norm": 0.38260579109191895, + "learning_rate": 1.9792411989183704e-05, + "loss": 0.625, + "step": 4827 + }, + { + "epoch": 0.13256452498627128, + "grad_norm": 0.39529919624328613, + "learning_rate": 1.9792324435912732e-05, + "loss": 0.5666, + "step": 4828 + }, + { + "epoch": 0.13259198242723777, + "grad_norm": 0.3542931377887726, + "learning_rate": 1.9792236864375952e-05, + "loss": 0.558, + "step": 4829 + }, + { + "epoch": 0.1326194398682043, + "grad_norm": 0.3778814971446991, + "learning_rate": 1.9792149274573523e-05, + "loss": 0.5142, + "step": 4830 + }, + { + "epoch": 0.13264689730917079, + "grad_norm": 0.37390363216400146, + "learning_rate": 1.979206166650561e-05, + "loss": 0.5734, + "step": 4831 + }, + { + "epoch": 0.13267435475013728, + "grad_norm": 0.3479765057563782, + "learning_rate": 1.9791974040172376e-05, + "loss": 0.5621, + "step": 4832 + }, + { + "epoch": 0.1327018121911038, + "grad_norm": 0.38255685567855835, + "learning_rate": 1.9791886395573986e-05, + "loss": 0.5852, + "step": 4833 + }, + { + "epoch": 0.1327292696320703, + "grad_norm": 0.3987768292427063, + "learning_rate": 1.9791798732710605e-05, + "loss": 0.5748, + "step": 4834 + }, + { + "epoch": 0.1327567270730368, + "grad_norm": 0.3850495219230652, + "learning_rate": 1.979171105158239e-05, + "loss": 0.5382, + "step": 4835 + }, + { + "epoch": 0.13278418451400328, + "grad_norm": 0.41548630595207214, + "learning_rate": 1.979162335218951e-05, + "loss": 0.5466, + "step": 4836 + }, + { + "epoch": 0.1328116419549698, + "grad_norm": 0.4912225604057312, + "learning_rate": 1.9791535634532128e-05, + "loss": 0.5505, + "step": 4837 + }, + { + "epoch": 0.1328390993959363, + "grad_norm": 0.40368402004241943, + "learning_rate": 1.979144789861041e-05, + "loss": 0.5159, + "step": 4838 + }, + { + "epoch": 0.1328665568369028, + "grad_norm": 0.3249405026435852, + "learning_rate": 1.9791360144424514e-05, + "loss": 0.374, + "step": 4839 + }, + { + "epoch": 0.13289401427786932, + "grad_norm": 0.3873373866081238, + "learning_rate": 1.9791272371974605e-05, + "loss": 0.528, + "step": 4840 + }, + { + "epoch": 0.1329214717188358, + "grad_norm": 0.36045604944229126, + "learning_rate": 1.979118458126085e-05, + "loss": 0.5657, + "step": 4841 + }, + { + "epoch": 0.1329489291598023, + "grad_norm": 0.4165147840976715, + "learning_rate": 1.9791096772283408e-05, + "loss": 0.5539, + "step": 4842 + }, + { + "epoch": 0.1329763866007688, + "grad_norm": 0.4338565170764923, + "learning_rate": 1.9791008945042448e-05, + "loss": 0.6005, + "step": 4843 + }, + { + "epoch": 0.13300384404173532, + "grad_norm": 0.36060699820518494, + "learning_rate": 1.979092109953813e-05, + "loss": 0.5776, + "step": 4844 + }, + { + "epoch": 0.1330313014827018, + "grad_norm": 0.34357690811157227, + "learning_rate": 1.9790833235770622e-05, + "loss": 0.5238, + "step": 4845 + }, + { + "epoch": 0.1330587589236683, + "grad_norm": 0.3290780484676361, + "learning_rate": 1.979074535374008e-05, + "loss": 0.5569, + "step": 4846 + }, + { + "epoch": 0.13308621636463483, + "grad_norm": 0.38335248827934265, + "learning_rate": 1.9790657453446677e-05, + "loss": 0.5552, + "step": 4847 + }, + { + "epoch": 0.13311367380560132, + "grad_norm": 0.35170555114746094, + "learning_rate": 1.9790569534890574e-05, + "loss": 0.6534, + "step": 4848 + }, + { + "epoch": 0.13314113124656782, + "grad_norm": 0.3956603705883026, + "learning_rate": 1.979048159807193e-05, + "loss": 0.63, + "step": 4849 + }, + { + "epoch": 0.1331685886875343, + "grad_norm": 0.3562845289707184, + "learning_rate": 1.9790393642990917e-05, + "loss": 0.5712, + "step": 4850 + }, + { + "epoch": 0.13319604612850083, + "grad_norm": 0.3504672050476074, + "learning_rate": 1.9790305669647693e-05, + "loss": 0.4978, + "step": 4851 + }, + { + "epoch": 0.13322350356946733, + "grad_norm": 0.3288407027721405, + "learning_rate": 1.9790217678042426e-05, + "loss": 0.5141, + "step": 4852 + }, + { + "epoch": 0.13325096101043382, + "grad_norm": 0.35571756958961487, + "learning_rate": 1.9790129668175274e-05, + "loss": 0.5459, + "step": 4853 + }, + { + "epoch": 0.13327841845140032, + "grad_norm": 0.3911959230899811, + "learning_rate": 1.979004164004641e-05, + "loss": 0.537, + "step": 4854 + }, + { + "epoch": 0.13330587589236684, + "grad_norm": 0.35158488154411316, + "learning_rate": 1.9789953593655994e-05, + "loss": 0.5739, + "step": 4855 + }, + { + "epoch": 0.13333333333333333, + "grad_norm": 0.3217967748641968, + "learning_rate": 1.9789865529004188e-05, + "loss": 0.5649, + "step": 4856 + }, + { + "epoch": 0.13336079077429983, + "grad_norm": 0.3467438519001007, + "learning_rate": 1.9789777446091157e-05, + "loss": 0.5588, + "step": 4857 + }, + { + "epoch": 0.13338824821526635, + "grad_norm": 0.38607722520828247, + "learning_rate": 1.978968934491707e-05, + "loss": 0.5828, + "step": 4858 + }, + { + "epoch": 0.13341570565623284, + "grad_norm": 0.37355121970176697, + "learning_rate": 1.9789601225482085e-05, + "loss": 0.5332, + "step": 4859 + }, + { + "epoch": 0.13344316309719934, + "grad_norm": 0.40708932280540466, + "learning_rate": 1.978951308778637e-05, + "loss": 0.5976, + "step": 4860 + }, + { + "epoch": 0.13347062053816583, + "grad_norm": 0.36296966671943665, + "learning_rate": 1.978942493183009e-05, + "loss": 0.6081, + "step": 4861 + }, + { + "epoch": 0.13349807797913235, + "grad_norm": 0.37403616309165955, + "learning_rate": 1.9789336757613406e-05, + "loss": 0.5468, + "step": 4862 + }, + { + "epoch": 0.13352553542009885, + "grad_norm": 0.45402172207832336, + "learning_rate": 1.9789248565136488e-05, + "loss": 0.5943, + "step": 4863 + }, + { + "epoch": 0.13355299286106534, + "grad_norm": 0.3953668177127838, + "learning_rate": 1.9789160354399494e-05, + "loss": 0.5327, + "step": 4864 + }, + { + "epoch": 0.13358045030203186, + "grad_norm": 0.3832216262817383, + "learning_rate": 1.978907212540259e-05, + "loss": 0.5273, + "step": 4865 + }, + { + "epoch": 0.13360790774299836, + "grad_norm": 0.3654977083206177, + "learning_rate": 1.9788983878145945e-05, + "loss": 0.5229, + "step": 4866 + }, + { + "epoch": 0.13363536518396485, + "grad_norm": 0.3996317684650421, + "learning_rate": 1.978889561262972e-05, + "loss": 0.6209, + "step": 4867 + }, + { + "epoch": 0.13366282262493134, + "grad_norm": 0.3376339077949524, + "learning_rate": 1.978880732885408e-05, + "loss": 0.5205, + "step": 4868 + }, + { + "epoch": 0.13369028006589787, + "grad_norm": 0.3975311815738678, + "learning_rate": 1.978871902681919e-05, + "loss": 0.6138, + "step": 4869 + }, + { + "epoch": 0.13371773750686436, + "grad_norm": 0.35779356956481934, + "learning_rate": 1.9788630706525215e-05, + "loss": 0.5395, + "step": 4870 + }, + { + "epoch": 0.13374519494783085, + "grad_norm": 0.31286463141441345, + "learning_rate": 1.9788542367972322e-05, + "loss": 0.5043, + "step": 4871 + }, + { + "epoch": 0.13377265238879738, + "grad_norm": 0.4132097363471985, + "learning_rate": 1.9788454011160668e-05, + "loss": 0.567, + "step": 4872 + }, + { + "epoch": 0.13380010982976387, + "grad_norm": 0.3450964689254761, + "learning_rate": 1.978836563609043e-05, + "loss": 0.5556, + "step": 4873 + }, + { + "epoch": 0.13382756727073036, + "grad_norm": 0.3646983802318573, + "learning_rate": 1.978827724276176e-05, + "loss": 0.5836, + "step": 4874 + }, + { + "epoch": 0.13385502471169686, + "grad_norm": 0.35934266448020935, + "learning_rate": 1.9788188831174832e-05, + "loss": 0.6229, + "step": 4875 + }, + { + "epoch": 0.13388248215266338, + "grad_norm": 0.3318850100040436, + "learning_rate": 1.978810040132981e-05, + "loss": 0.4441, + "step": 4876 + }, + { + "epoch": 0.13390993959362987, + "grad_norm": 0.34343305230140686, + "learning_rate": 1.978801195322685e-05, + "loss": 0.5449, + "step": 4877 + }, + { + "epoch": 0.13393739703459637, + "grad_norm": 0.3646737337112427, + "learning_rate": 1.978792348686613e-05, + "loss": 0.5331, + "step": 4878 + }, + { + "epoch": 0.1339648544755629, + "grad_norm": 0.36017486453056335, + "learning_rate": 1.9787835002247805e-05, + "loss": 0.4994, + "step": 4879 + }, + { + "epoch": 0.13399231191652938, + "grad_norm": 0.3614514172077179, + "learning_rate": 1.9787746499372048e-05, + "loss": 0.5393, + "step": 4880 + }, + { + "epoch": 0.13401976935749588, + "grad_norm": 0.36906108260154724, + "learning_rate": 1.9787657978239014e-05, + "loss": 0.4727, + "step": 4881 + }, + { + "epoch": 0.13404722679846237, + "grad_norm": 0.3798219561576843, + "learning_rate": 1.978756943884888e-05, + "loss": 0.5765, + "step": 4882 + }, + { + "epoch": 0.1340746842394289, + "grad_norm": 0.36505839228630066, + "learning_rate": 1.97874808812018e-05, + "loss": 0.5266, + "step": 4883 + }, + { + "epoch": 0.1341021416803954, + "grad_norm": 0.352092981338501, + "learning_rate": 1.9787392305297946e-05, + "loss": 0.5291, + "step": 4884 + }, + { + "epoch": 0.13412959912136188, + "grad_norm": 0.3426252007484436, + "learning_rate": 1.9787303711137486e-05, + "loss": 0.5751, + "step": 4885 + }, + { + "epoch": 0.1341570565623284, + "grad_norm": 0.3982907235622406, + "learning_rate": 1.9787215098720572e-05, + "loss": 0.6256, + "step": 4886 + }, + { + "epoch": 0.1341845140032949, + "grad_norm": 0.37736284732818604, + "learning_rate": 1.9787126468047388e-05, + "loss": 0.5722, + "step": 4887 + }, + { + "epoch": 0.1342119714442614, + "grad_norm": 0.40711498260498047, + "learning_rate": 1.9787037819118082e-05, + "loss": 0.5686, + "step": 4888 + }, + { + "epoch": 0.13423942888522788, + "grad_norm": 0.35620665550231934, + "learning_rate": 1.978694915193283e-05, + "loss": 0.407, + "step": 4889 + }, + { + "epoch": 0.1342668863261944, + "grad_norm": 0.4262213706970215, + "learning_rate": 1.9786860466491794e-05, + "loss": 0.6192, + "step": 4890 + }, + { + "epoch": 0.1342943437671609, + "grad_norm": 0.3600025177001953, + "learning_rate": 1.9786771762795137e-05, + "loss": 0.503, + "step": 4891 + }, + { + "epoch": 0.1343218012081274, + "grad_norm": 0.4524364769458771, + "learning_rate": 1.9786683040843033e-05, + "loss": 0.6354, + "step": 4892 + }, + { + "epoch": 0.13434925864909392, + "grad_norm": 0.3560301959514618, + "learning_rate": 1.9786594300635637e-05, + "loss": 0.5314, + "step": 4893 + }, + { + "epoch": 0.1343767160900604, + "grad_norm": 0.33615347743034363, + "learning_rate": 1.978650554217312e-05, + "loss": 0.5685, + "step": 4894 + }, + { + "epoch": 0.1344041735310269, + "grad_norm": 0.32191959023475647, + "learning_rate": 1.9786416765455646e-05, + "loss": 0.5033, + "step": 4895 + }, + { + "epoch": 0.1344316309719934, + "grad_norm": 0.36819756031036377, + "learning_rate": 1.9786327970483382e-05, + "loss": 0.5749, + "step": 4896 + }, + { + "epoch": 0.13445908841295992, + "grad_norm": 0.3505721092224121, + "learning_rate": 1.9786239157256492e-05, + "loss": 0.5097, + "step": 4897 + }, + { + "epoch": 0.13448654585392641, + "grad_norm": 0.4051551818847656, + "learning_rate": 1.9786150325775143e-05, + "loss": 0.5602, + "step": 4898 + }, + { + "epoch": 0.1345140032948929, + "grad_norm": 0.3908953368663788, + "learning_rate": 1.9786061476039503e-05, + "loss": 0.54, + "step": 4899 + }, + { + "epoch": 0.13454146073585943, + "grad_norm": 0.411014199256897, + "learning_rate": 1.978597260804973e-05, + "loss": 0.5886, + "step": 4900 + }, + { + "epoch": 0.13456891817682592, + "grad_norm": 0.46261322498321533, + "learning_rate": 1.9785883721805997e-05, + "loss": 0.5512, + "step": 4901 + }, + { + "epoch": 0.13459637561779242, + "grad_norm": 0.4012559652328491, + "learning_rate": 1.978579481730847e-05, + "loss": 0.5353, + "step": 4902 + }, + { + "epoch": 0.1346238330587589, + "grad_norm": 0.3576109707355499, + "learning_rate": 1.978570589455731e-05, + "loss": 0.5644, + "step": 4903 + }, + { + "epoch": 0.13465129049972543, + "grad_norm": 0.3837830126285553, + "learning_rate": 1.9785616953552686e-05, + "loss": 0.5107, + "step": 4904 + }, + { + "epoch": 0.13467874794069193, + "grad_norm": 0.3508562445640564, + "learning_rate": 1.978552799429476e-05, + "loss": 0.5396, + "step": 4905 + }, + { + "epoch": 0.13470620538165842, + "grad_norm": 0.3597203195095062, + "learning_rate": 1.9785439016783706e-05, + "loss": 0.5046, + "step": 4906 + }, + { + "epoch": 0.13473366282262494, + "grad_norm": 0.9288743138313293, + "learning_rate": 1.9785350021019684e-05, + "loss": 0.5017, + "step": 4907 + }, + { + "epoch": 0.13476112026359144, + "grad_norm": 0.3313329219818115, + "learning_rate": 1.978526100700286e-05, + "loss": 0.5105, + "step": 4908 + }, + { + "epoch": 0.13478857770455793, + "grad_norm": 0.3363453447818756, + "learning_rate": 1.9785171974733402e-05, + "loss": 0.4684, + "step": 4909 + }, + { + "epoch": 0.13481603514552443, + "grad_norm": 0.35958579182624817, + "learning_rate": 1.9785082924211474e-05, + "loss": 0.4761, + "step": 4910 + }, + { + "epoch": 0.13484349258649095, + "grad_norm": 0.34296557307243347, + "learning_rate": 1.9784993855437244e-05, + "loss": 0.5859, + "step": 4911 + }, + { + "epoch": 0.13487095002745744, + "grad_norm": 0.4491249620914459, + "learning_rate": 1.9784904768410877e-05, + "loss": 0.4614, + "step": 4912 + }, + { + "epoch": 0.13489840746842394, + "grad_norm": 0.3616888225078583, + "learning_rate": 1.9784815663132538e-05, + "loss": 0.606, + "step": 4913 + }, + { + "epoch": 0.13492586490939046, + "grad_norm": 0.3349001407623291, + "learning_rate": 1.9784726539602398e-05, + "loss": 0.4706, + "step": 4914 + }, + { + "epoch": 0.13495332235035695, + "grad_norm": 0.37470510601997375, + "learning_rate": 1.978463739782062e-05, + "loss": 0.6533, + "step": 4915 + }, + { + "epoch": 0.13498077979132345, + "grad_norm": 0.4224397540092468, + "learning_rate": 1.9784548237787368e-05, + "loss": 0.603, + "step": 4916 + }, + { + "epoch": 0.13500823723228994, + "grad_norm": 0.4052431881427765, + "learning_rate": 1.9784459059502816e-05, + "loss": 0.5815, + "step": 4917 + }, + { + "epoch": 0.13503569467325646, + "grad_norm": 0.404073566198349, + "learning_rate": 1.978436986296712e-05, + "loss": 0.552, + "step": 4918 + }, + { + "epoch": 0.13506315211422296, + "grad_norm": 0.35327744483947754, + "learning_rate": 1.9784280648180448e-05, + "loss": 0.6551, + "step": 4919 + }, + { + "epoch": 0.13509060955518945, + "grad_norm": 0.3622957766056061, + "learning_rate": 1.9784191415142975e-05, + "loss": 0.5521, + "step": 4920 + }, + { + "epoch": 0.13511806699615594, + "grad_norm": 0.3593544065952301, + "learning_rate": 1.9784102163854862e-05, + "loss": 0.5956, + "step": 4921 + }, + { + "epoch": 0.13514552443712247, + "grad_norm": 0.39253565669059753, + "learning_rate": 1.9784012894316276e-05, + "loss": 0.6979, + "step": 4922 + }, + { + "epoch": 0.13517298187808896, + "grad_norm": 0.5700513124465942, + "learning_rate": 1.978392360652738e-05, + "loss": 0.51, + "step": 4923 + }, + { + "epoch": 0.13520043931905545, + "grad_norm": 0.4028007984161377, + "learning_rate": 1.9783834300488348e-05, + "loss": 0.5428, + "step": 4924 + }, + { + "epoch": 0.13522789676002198, + "grad_norm": 0.3315434753894806, + "learning_rate": 1.9783744976199338e-05, + "loss": 0.4847, + "step": 4925 + }, + { + "epoch": 0.13525535420098847, + "grad_norm": 0.34676072001457214, + "learning_rate": 1.9783655633660525e-05, + "loss": 0.4983, + "step": 4926 + }, + { + "epoch": 0.13528281164195496, + "grad_norm": 0.43252742290496826, + "learning_rate": 1.978356627287207e-05, + "loss": 0.546, + "step": 4927 + }, + { + "epoch": 0.13531026908292146, + "grad_norm": 0.37991413474082947, + "learning_rate": 1.9783476893834142e-05, + "loss": 0.5254, + "step": 4928 + }, + { + "epoch": 0.13533772652388798, + "grad_norm": 0.4096761643886566, + "learning_rate": 1.9783387496546908e-05, + "loss": 0.5806, + "step": 4929 + }, + { + "epoch": 0.13536518396485447, + "grad_norm": 0.3399903476238251, + "learning_rate": 1.978329808101053e-05, + "loss": 0.4753, + "step": 4930 + }, + { + "epoch": 0.13539264140582097, + "grad_norm": 0.38228607177734375, + "learning_rate": 1.9783208647225182e-05, + "loss": 0.6099, + "step": 4931 + }, + { + "epoch": 0.1354200988467875, + "grad_norm": 0.3899298906326294, + "learning_rate": 1.9783119195191027e-05, + "loss": 0.601, + "step": 4932 + }, + { + "epoch": 0.13544755628775398, + "grad_norm": 0.33892858028411865, + "learning_rate": 1.978302972490823e-05, + "loss": 0.5131, + "step": 4933 + }, + { + "epoch": 0.13547501372872048, + "grad_norm": 0.39501917362213135, + "learning_rate": 1.9782940236376964e-05, + "loss": 0.5184, + "step": 4934 + }, + { + "epoch": 0.13550247116968697, + "grad_norm": 0.41598808765411377, + "learning_rate": 1.978285072959739e-05, + "loss": 0.5521, + "step": 4935 + }, + { + "epoch": 0.1355299286106535, + "grad_norm": 0.31489884853363037, + "learning_rate": 1.9782761204569678e-05, + "loss": 0.4853, + "step": 4936 + }, + { + "epoch": 0.13555738605162, + "grad_norm": 0.375687837600708, + "learning_rate": 1.978267166129399e-05, + "loss": 0.5632, + "step": 4937 + }, + { + "epoch": 0.13558484349258648, + "grad_norm": 0.46530359983444214, + "learning_rate": 1.9782582099770503e-05, + "loss": 0.5221, + "step": 4938 + }, + { + "epoch": 0.135612300933553, + "grad_norm": 0.34890449047088623, + "learning_rate": 1.9782492519999375e-05, + "loss": 0.5611, + "step": 4939 + }, + { + "epoch": 0.1356397583745195, + "grad_norm": 0.367064505815506, + "learning_rate": 1.9782402921980778e-05, + "loss": 0.5406, + "step": 4940 + }, + { + "epoch": 0.135667215815486, + "grad_norm": 0.34357139468193054, + "learning_rate": 1.9782313305714873e-05, + "loss": 0.4516, + "step": 4941 + }, + { + "epoch": 0.13569467325645249, + "grad_norm": 0.33768218755722046, + "learning_rate": 1.9782223671201838e-05, + "loss": 0.4817, + "step": 4942 + }, + { + "epoch": 0.135722130697419, + "grad_norm": 0.34148526191711426, + "learning_rate": 1.9782134018441826e-05, + "loss": 0.4279, + "step": 4943 + }, + { + "epoch": 0.1357495881383855, + "grad_norm": 0.3914555311203003, + "learning_rate": 1.9782044347435017e-05, + "loss": 0.597, + "step": 4944 + }, + { + "epoch": 0.135777045579352, + "grad_norm": 0.40426385402679443, + "learning_rate": 1.9781954658181574e-05, + "loss": 0.5108, + "step": 4945 + }, + { + "epoch": 0.13580450302031852, + "grad_norm": 0.39340853691101074, + "learning_rate": 1.978186495068166e-05, + "loss": 0.5824, + "step": 4946 + }, + { + "epoch": 0.135831960461285, + "grad_norm": 0.32599565386772156, + "learning_rate": 1.978177522493545e-05, + "loss": 0.4963, + "step": 4947 + }, + { + "epoch": 0.1358594179022515, + "grad_norm": 0.3742976486682892, + "learning_rate": 1.9781685480943108e-05, + "loss": 0.472, + "step": 4948 + }, + { + "epoch": 0.135886875343218, + "grad_norm": 0.3387261927127838, + "learning_rate": 1.9781595718704793e-05, + "loss": 0.4988, + "step": 4949 + }, + { + "epoch": 0.13591433278418452, + "grad_norm": 0.38755255937576294, + "learning_rate": 1.9781505938220686e-05, + "loss": 0.5627, + "step": 4950 + }, + { + "epoch": 0.13594179022515102, + "grad_norm": 0.48852458596229553, + "learning_rate": 1.9781416139490948e-05, + "loss": 0.4642, + "step": 4951 + }, + { + "epoch": 0.1359692476661175, + "grad_norm": 0.34934186935424805, + "learning_rate": 1.9781326322515748e-05, + "loss": 0.5211, + "step": 4952 + }, + { + "epoch": 0.13599670510708403, + "grad_norm": 0.5470390915870667, + "learning_rate": 1.978123648729525e-05, + "loss": 0.5043, + "step": 4953 + }, + { + "epoch": 0.13602416254805053, + "grad_norm": 0.7115472555160522, + "learning_rate": 1.9781146633829623e-05, + "loss": 0.5755, + "step": 4954 + }, + { + "epoch": 0.13605161998901702, + "grad_norm": 0.42018812894821167, + "learning_rate": 1.978105676211904e-05, + "loss": 0.4878, + "step": 4955 + }, + { + "epoch": 0.1360790774299835, + "grad_norm": 0.38933834433555603, + "learning_rate": 1.9780966872163662e-05, + "loss": 0.6039, + "step": 4956 + }, + { + "epoch": 0.13610653487095004, + "grad_norm": 0.3920448422431946, + "learning_rate": 1.978087696396366e-05, + "loss": 0.5669, + "step": 4957 + }, + { + "epoch": 0.13613399231191653, + "grad_norm": 0.3727545440196991, + "learning_rate": 1.97807870375192e-05, + "loss": 0.5787, + "step": 4958 + }, + { + "epoch": 0.13616144975288302, + "grad_norm": 0.3576209843158722, + "learning_rate": 1.978069709283045e-05, + "loss": 0.5134, + "step": 4959 + }, + { + "epoch": 0.13618890719384955, + "grad_norm": 0.42967918515205383, + "learning_rate": 1.978060712989758e-05, + "loss": 0.5237, + "step": 4960 + }, + { + "epoch": 0.13621636463481604, + "grad_norm": 0.32449549436569214, + "learning_rate": 1.9780517148720752e-05, + "loss": 0.491, + "step": 4961 + }, + { + "epoch": 0.13624382207578253, + "grad_norm": 0.35335737466812134, + "learning_rate": 1.9780427149300143e-05, + "loss": 0.5074, + "step": 4962 + }, + { + "epoch": 0.13627127951674903, + "grad_norm": 0.3569319546222687, + "learning_rate": 1.9780337131635915e-05, + "loss": 0.516, + "step": 4963 + }, + { + "epoch": 0.13629873695771555, + "grad_norm": 0.3274610638618469, + "learning_rate": 1.9780247095728234e-05, + "loss": 0.4153, + "step": 4964 + }, + { + "epoch": 0.13632619439868204, + "grad_norm": 0.3309895396232605, + "learning_rate": 1.978015704157727e-05, + "loss": 0.471, + "step": 4965 + }, + { + "epoch": 0.13635365183964854, + "grad_norm": 0.3605915307998657, + "learning_rate": 1.9780066969183193e-05, + "loss": 0.4807, + "step": 4966 + }, + { + "epoch": 0.13638110928061506, + "grad_norm": 0.3343086540699005, + "learning_rate": 1.9779976878546172e-05, + "loss": 0.5452, + "step": 4967 + }, + { + "epoch": 0.13640856672158155, + "grad_norm": 0.3345814049243927, + "learning_rate": 1.977988676966637e-05, + "loss": 0.5487, + "step": 4968 + }, + { + "epoch": 0.13643602416254805, + "grad_norm": 0.354967325925827, + "learning_rate": 1.977979664254396e-05, + "loss": 0.4678, + "step": 4969 + }, + { + "epoch": 0.13646348160351454, + "grad_norm": 0.39486318826675415, + "learning_rate": 1.9779706497179107e-05, + "loss": 0.5612, + "step": 4970 + }, + { + "epoch": 0.13649093904448106, + "grad_norm": 0.4754645526409149, + "learning_rate": 1.977961633357198e-05, + "loss": 0.447, + "step": 4971 + }, + { + "epoch": 0.13651839648544756, + "grad_norm": 0.3794698417186737, + "learning_rate": 1.9779526151722748e-05, + "loss": 0.5947, + "step": 4972 + }, + { + "epoch": 0.13654585392641405, + "grad_norm": 0.3733673393726349, + "learning_rate": 1.9779435951631577e-05, + "loss": 0.5024, + "step": 4973 + }, + { + "epoch": 0.13657331136738057, + "grad_norm": 0.3926224410533905, + "learning_rate": 1.9779345733298637e-05, + "loss": 0.5836, + "step": 4974 + }, + { + "epoch": 0.13660076880834707, + "grad_norm": 0.33958786725997925, + "learning_rate": 1.9779255496724097e-05, + "loss": 0.5435, + "step": 4975 + }, + { + "epoch": 0.13662822624931356, + "grad_norm": 0.4033879041671753, + "learning_rate": 1.9779165241908123e-05, + "loss": 0.4929, + "step": 4976 + }, + { + "epoch": 0.13665568369028006, + "grad_norm": 0.3827058672904968, + "learning_rate": 1.9779074968850885e-05, + "loss": 0.5298, + "step": 4977 + }, + { + "epoch": 0.13668314113124658, + "grad_norm": 0.7481989860534668, + "learning_rate": 1.9778984677552552e-05, + "loss": 0.4931, + "step": 4978 + }, + { + "epoch": 0.13671059857221307, + "grad_norm": 0.37940120697021484, + "learning_rate": 1.977889436801329e-05, + "loss": 0.5108, + "step": 4979 + }, + { + "epoch": 0.13673805601317957, + "grad_norm": 0.37448397278785706, + "learning_rate": 1.977880404023327e-05, + "loss": 0.605, + "step": 4980 + }, + { + "epoch": 0.1367655134541461, + "grad_norm": 0.4099752604961395, + "learning_rate": 1.9778713694212662e-05, + "loss": 0.634, + "step": 4981 + }, + { + "epoch": 0.13679297089511258, + "grad_norm": 0.4014447033405304, + "learning_rate": 1.977862332995163e-05, + "loss": 0.5783, + "step": 4982 + }, + { + "epoch": 0.13682042833607908, + "grad_norm": 0.4280092120170593, + "learning_rate": 1.977853294745034e-05, + "loss": 0.5758, + "step": 4983 + }, + { + "epoch": 0.13684788577704557, + "grad_norm": 0.3994157910346985, + "learning_rate": 1.9778442546708968e-05, + "loss": 0.6057, + "step": 4984 + }, + { + "epoch": 0.1368753432180121, + "grad_norm": 0.3719986081123352, + "learning_rate": 1.977835212772768e-05, + "loss": 0.5721, + "step": 4985 + }, + { + "epoch": 0.13690280065897859, + "grad_norm": 0.3364984095096588, + "learning_rate": 1.9778261690506646e-05, + "loss": 0.5296, + "step": 4986 + }, + { + "epoch": 0.13693025809994508, + "grad_norm": 0.34200048446655273, + "learning_rate": 1.9778171235046033e-05, + "loss": 0.5132, + "step": 4987 + }, + { + "epoch": 0.13695771554091157, + "grad_norm": 0.36174276471138, + "learning_rate": 1.977808076134601e-05, + "loss": 0.5723, + "step": 4988 + }, + { + "epoch": 0.1369851729818781, + "grad_norm": 0.3809409737586975, + "learning_rate": 1.9777990269406742e-05, + "loss": 0.6174, + "step": 4989 + }, + { + "epoch": 0.1370126304228446, + "grad_norm": 0.4485138952732086, + "learning_rate": 1.9777899759228404e-05, + "loss": 0.5459, + "step": 4990 + }, + { + "epoch": 0.13704008786381108, + "grad_norm": 0.3712345361709595, + "learning_rate": 1.9777809230811162e-05, + "loss": 0.5421, + "step": 4991 + }, + { + "epoch": 0.1370675453047776, + "grad_norm": 0.3640673756599426, + "learning_rate": 1.9777718684155184e-05, + "loss": 0.4841, + "step": 4992 + }, + { + "epoch": 0.1370950027457441, + "grad_norm": 0.392861008644104, + "learning_rate": 1.9777628119260645e-05, + "loss": 0.5768, + "step": 4993 + }, + { + "epoch": 0.1371224601867106, + "grad_norm": 0.337295800447464, + "learning_rate": 1.9777537536127704e-05, + "loss": 0.5155, + "step": 4994 + }, + { + "epoch": 0.1371499176276771, + "grad_norm": 0.3368169963359833, + "learning_rate": 1.9777446934756535e-05, + "loss": 0.5501, + "step": 4995 + }, + { + "epoch": 0.1371773750686436, + "grad_norm": 0.4147259593009949, + "learning_rate": 1.977735631514731e-05, + "loss": 0.6063, + "step": 4996 + }, + { + "epoch": 0.1372048325096101, + "grad_norm": 0.34343671798706055, + "learning_rate": 1.977726567730019e-05, + "loss": 0.4947, + "step": 4997 + }, + { + "epoch": 0.1372322899505766, + "grad_norm": 0.3967944085597992, + "learning_rate": 1.9777175021215352e-05, + "loss": 0.5795, + "step": 4998 + }, + { + "epoch": 0.13725974739154312, + "grad_norm": 0.3250882029533386, + "learning_rate": 1.9777084346892962e-05, + "loss": 0.4558, + "step": 4999 + }, + { + "epoch": 0.1372872048325096, + "grad_norm": 0.39746996760368347, + "learning_rate": 1.9776993654333187e-05, + "loss": 0.5426, + "step": 5000 + }, + { + "epoch": 0.1373146622734761, + "grad_norm": 0.4122356176376343, + "learning_rate": 1.9776902943536203e-05, + "loss": 0.523, + "step": 5001 + }, + { + "epoch": 0.1373421197144426, + "grad_norm": 0.3609703481197357, + "learning_rate": 1.977681221450217e-05, + "loss": 0.5056, + "step": 5002 + }, + { + "epoch": 0.13736957715540912, + "grad_norm": 0.3518233597278595, + "learning_rate": 1.9776721467231262e-05, + "loss": 0.4903, + "step": 5003 + }, + { + "epoch": 0.13739703459637562, + "grad_norm": 0.365141361951828, + "learning_rate": 1.977663070172365e-05, + "loss": 0.5368, + "step": 5004 + }, + { + "epoch": 0.1374244920373421, + "grad_norm": 0.4940430819988251, + "learning_rate": 1.97765399179795e-05, + "loss": 0.5058, + "step": 5005 + }, + { + "epoch": 0.13745194947830863, + "grad_norm": 0.37191492319107056, + "learning_rate": 1.9776449115998987e-05, + "loss": 0.5355, + "step": 5006 + }, + { + "epoch": 0.13747940691927513, + "grad_norm": 0.40170803666114807, + "learning_rate": 1.9776358295782273e-05, + "loss": 0.4981, + "step": 5007 + }, + { + "epoch": 0.13750686436024162, + "grad_norm": 0.34853240847587585, + "learning_rate": 1.9776267457329533e-05, + "loss": 0.5415, + "step": 5008 + }, + { + "epoch": 0.13753432180120811, + "grad_norm": 0.3242327868938446, + "learning_rate": 1.977617660064093e-05, + "loss": 0.5359, + "step": 5009 + }, + { + "epoch": 0.13756177924217464, + "grad_norm": 0.35209745168685913, + "learning_rate": 1.9776085725716637e-05, + "loss": 0.4876, + "step": 5010 + }, + { + "epoch": 0.13758923668314113, + "grad_norm": 0.47492626309394836, + "learning_rate": 1.9775994832556828e-05, + "loss": 0.4815, + "step": 5011 + }, + { + "epoch": 0.13761669412410762, + "grad_norm": 0.43082594871520996, + "learning_rate": 1.9775903921161666e-05, + "loss": 0.5839, + "step": 5012 + }, + { + "epoch": 0.13764415156507415, + "grad_norm": 0.39613956212997437, + "learning_rate": 1.977581299153132e-05, + "loss": 0.5893, + "step": 5013 + }, + { + "epoch": 0.13767160900604064, + "grad_norm": 0.37491533160209656, + "learning_rate": 1.977572204366597e-05, + "loss": 0.5288, + "step": 5014 + }, + { + "epoch": 0.13769906644700713, + "grad_norm": 0.3563674986362457, + "learning_rate": 1.9775631077565774e-05, + "loss": 0.5016, + "step": 5015 + }, + { + "epoch": 0.13772652388797363, + "grad_norm": 0.35588786005973816, + "learning_rate": 1.9775540093230904e-05, + "loss": 0.5031, + "step": 5016 + }, + { + "epoch": 0.13775398132894015, + "grad_norm": 0.3728574216365814, + "learning_rate": 1.9775449090661536e-05, + "loss": 0.3755, + "step": 5017 + }, + { + "epoch": 0.13778143876990664, + "grad_norm": 0.3645084500312805, + "learning_rate": 1.977535806985783e-05, + "loss": 0.4364, + "step": 5018 + }, + { + "epoch": 0.13780889621087314, + "grad_norm": 0.32941946387290955, + "learning_rate": 1.9775267030819968e-05, + "loss": 0.5064, + "step": 5019 + }, + { + "epoch": 0.13783635365183966, + "grad_norm": 0.3671453893184662, + "learning_rate": 1.977517597354811e-05, + "loss": 0.5781, + "step": 5020 + }, + { + "epoch": 0.13786381109280615, + "grad_norm": 0.3550232946872711, + "learning_rate": 1.977508489804243e-05, + "loss": 0.5248, + "step": 5021 + }, + { + "epoch": 0.13789126853377265, + "grad_norm": 0.35496723651885986, + "learning_rate": 1.9774993804303097e-05, + "loss": 0.6194, + "step": 5022 + }, + { + "epoch": 0.13791872597473914, + "grad_norm": 0.4380885660648346, + "learning_rate": 1.977490269233028e-05, + "loss": 0.6342, + "step": 5023 + }, + { + "epoch": 0.13794618341570566, + "grad_norm": 0.3989108204841614, + "learning_rate": 1.9774811562124148e-05, + "loss": 0.4965, + "step": 5024 + }, + { + "epoch": 0.13797364085667216, + "grad_norm": 0.32324090600013733, + "learning_rate": 1.9774720413684874e-05, + "loss": 0.4356, + "step": 5025 + }, + { + "epoch": 0.13800109829763865, + "grad_norm": 0.3729538023471832, + "learning_rate": 1.9774629247012627e-05, + "loss": 0.5185, + "step": 5026 + }, + { + "epoch": 0.13802855573860517, + "grad_norm": 0.4090462028980255, + "learning_rate": 1.9774538062107575e-05, + "loss": 0.5924, + "step": 5027 + }, + { + "epoch": 0.13805601317957167, + "grad_norm": 0.3772102892398834, + "learning_rate": 1.9774446858969892e-05, + "loss": 0.6134, + "step": 5028 + }, + { + "epoch": 0.13808347062053816, + "grad_norm": 0.34146612882614136, + "learning_rate": 1.9774355637599747e-05, + "loss": 0.5273, + "step": 5029 + }, + { + "epoch": 0.13811092806150466, + "grad_norm": 0.3378389775753021, + "learning_rate": 1.9774264397997305e-05, + "loss": 0.4961, + "step": 5030 + }, + { + "epoch": 0.13813838550247118, + "grad_norm": 0.3810559809207916, + "learning_rate": 1.9774173140162744e-05, + "loss": 0.5777, + "step": 5031 + }, + { + "epoch": 0.13816584294343767, + "grad_norm": 0.3239996135234833, + "learning_rate": 1.977408186409623e-05, + "loss": 0.5419, + "step": 5032 + }, + { + "epoch": 0.13819330038440417, + "grad_norm": 0.34209585189819336, + "learning_rate": 1.9773990569797932e-05, + "loss": 0.4071, + "step": 5033 + }, + { + "epoch": 0.1382207578253707, + "grad_norm": 0.35367804765701294, + "learning_rate": 1.9773899257268024e-05, + "loss": 0.514, + "step": 5034 + }, + { + "epoch": 0.13824821526633718, + "grad_norm": 0.34025809168815613, + "learning_rate": 1.977380792650667e-05, + "loss": 0.5547, + "step": 5035 + }, + { + "epoch": 0.13827567270730368, + "grad_norm": 0.371284544467926, + "learning_rate": 1.977371657751405e-05, + "loss": 0.5993, + "step": 5036 + }, + { + "epoch": 0.13830313014827017, + "grad_norm": 0.351248562335968, + "learning_rate": 1.9773625210290327e-05, + "loss": 0.5836, + "step": 5037 + }, + { + "epoch": 0.1383305875892367, + "grad_norm": 0.3599323332309723, + "learning_rate": 1.9773533824835676e-05, + "loss": 0.5606, + "step": 5038 + }, + { + "epoch": 0.13835804503020319, + "grad_norm": 0.4478107988834381, + "learning_rate": 1.9773442421150264e-05, + "loss": 0.5004, + "step": 5039 + }, + { + "epoch": 0.13838550247116968, + "grad_norm": 0.4365478754043579, + "learning_rate": 1.977335099923426e-05, + "loss": 0.6115, + "step": 5040 + }, + { + "epoch": 0.1384129599121362, + "grad_norm": 0.373989999294281, + "learning_rate": 1.9773259559087838e-05, + "loss": 0.4948, + "step": 5041 + }, + { + "epoch": 0.1384404173531027, + "grad_norm": 0.39117470383644104, + "learning_rate": 1.9773168100711172e-05, + "loss": 0.4943, + "step": 5042 + }, + { + "epoch": 0.1384678747940692, + "grad_norm": 0.3307638466358185, + "learning_rate": 1.9773076624104423e-05, + "loss": 0.4717, + "step": 5043 + }, + { + "epoch": 0.13849533223503568, + "grad_norm": 0.4056659936904907, + "learning_rate": 1.977298512926777e-05, + "loss": 0.5126, + "step": 5044 + }, + { + "epoch": 0.1385227896760022, + "grad_norm": 0.3743223249912262, + "learning_rate": 1.9772893616201378e-05, + "loss": 0.5412, + "step": 5045 + }, + { + "epoch": 0.1385502471169687, + "grad_norm": 0.394788920879364, + "learning_rate": 1.977280208490542e-05, + "loss": 0.5713, + "step": 5046 + }, + { + "epoch": 0.1385777045579352, + "grad_norm": 0.37682902812957764, + "learning_rate": 1.9772710535380068e-05, + "loss": 0.527, + "step": 5047 + }, + { + "epoch": 0.13860516199890172, + "grad_norm": 0.34916019439697266, + "learning_rate": 1.9772618967625492e-05, + "loss": 0.5281, + "step": 5048 + }, + { + "epoch": 0.1386326194398682, + "grad_norm": 0.44591742753982544, + "learning_rate": 1.9772527381641863e-05, + "loss": 0.5005, + "step": 5049 + }, + { + "epoch": 0.1386600768808347, + "grad_norm": 0.3625373840332031, + "learning_rate": 1.977243577742935e-05, + "loss": 0.5861, + "step": 5050 + }, + { + "epoch": 0.1386875343218012, + "grad_norm": 0.3989298939704895, + "learning_rate": 1.977234415498813e-05, + "loss": 0.6209, + "step": 5051 + }, + { + "epoch": 0.13871499176276772, + "grad_norm": 0.4088045656681061, + "learning_rate": 1.9772252514318364e-05, + "loss": 0.5534, + "step": 5052 + }, + { + "epoch": 0.1387424492037342, + "grad_norm": 0.4071587026119232, + "learning_rate": 1.977216085542023e-05, + "loss": 0.5495, + "step": 5053 + }, + { + "epoch": 0.1387699066447007, + "grad_norm": 0.42477282881736755, + "learning_rate": 1.9772069178293898e-05, + "loss": 0.5148, + "step": 5054 + }, + { + "epoch": 0.1387973640856672, + "grad_norm": 0.49632546305656433, + "learning_rate": 1.9771977482939535e-05, + "loss": 0.5239, + "step": 5055 + }, + { + "epoch": 0.13882482152663372, + "grad_norm": 0.3305245637893677, + "learning_rate": 1.9771885769357317e-05, + "loss": 0.5075, + "step": 5056 + }, + { + "epoch": 0.13885227896760022, + "grad_norm": 0.35537129640579224, + "learning_rate": 1.9771794037547415e-05, + "loss": 0.519, + "step": 5057 + }, + { + "epoch": 0.1388797364085667, + "grad_norm": 0.34813255071640015, + "learning_rate": 1.9771702287509997e-05, + "loss": 0.5944, + "step": 5058 + }, + { + "epoch": 0.13890719384953323, + "grad_norm": 0.6297627091407776, + "learning_rate": 1.9771610519245233e-05, + "loss": 0.5225, + "step": 5059 + }, + { + "epoch": 0.13893465129049973, + "grad_norm": 0.34805941581726074, + "learning_rate": 1.97715187327533e-05, + "loss": 0.459, + "step": 5060 + }, + { + "epoch": 0.13896210873146622, + "grad_norm": 0.3337455093860626, + "learning_rate": 1.9771426928034362e-05, + "loss": 0.5314, + "step": 5061 + }, + { + "epoch": 0.13898956617243272, + "grad_norm": 0.36725589632987976, + "learning_rate": 1.97713351050886e-05, + "loss": 0.5456, + "step": 5062 + }, + { + "epoch": 0.13901702361339924, + "grad_norm": 0.3686041533946991, + "learning_rate": 1.9771243263916177e-05, + "loss": 0.5306, + "step": 5063 + }, + { + "epoch": 0.13904448105436573, + "grad_norm": 0.369164377450943, + "learning_rate": 1.9771151404517264e-05, + "loss": 0.5484, + "step": 5064 + }, + { + "epoch": 0.13907193849533223, + "grad_norm": 0.3749980628490448, + "learning_rate": 1.9771059526892036e-05, + "loss": 0.5986, + "step": 5065 + }, + { + "epoch": 0.13909939593629875, + "grad_norm": 0.37189480662345886, + "learning_rate": 1.9770967631040664e-05, + "loss": 0.576, + "step": 5066 + }, + { + "epoch": 0.13912685337726524, + "grad_norm": 0.35205352306365967, + "learning_rate": 1.977087571696332e-05, + "loss": 0.5761, + "step": 5067 + }, + { + "epoch": 0.13915431081823174, + "grad_norm": 0.40013763308525085, + "learning_rate": 1.9770783784660176e-05, + "loss": 0.4838, + "step": 5068 + }, + { + "epoch": 0.13918176825919823, + "grad_norm": 0.4290878176689148, + "learning_rate": 1.9770691834131398e-05, + "loss": 0.5796, + "step": 5069 + }, + { + "epoch": 0.13920922570016475, + "grad_norm": 0.3411395847797394, + "learning_rate": 1.977059986537716e-05, + "loss": 0.5201, + "step": 5070 + }, + { + "epoch": 0.13923668314113125, + "grad_norm": 0.3632297217845917, + "learning_rate": 1.9770507878397638e-05, + "loss": 0.5265, + "step": 5071 + }, + { + "epoch": 0.13926414058209774, + "grad_norm": 0.359386682510376, + "learning_rate": 1.9770415873192998e-05, + "loss": 0.499, + "step": 5072 + }, + { + "epoch": 0.13929159802306426, + "grad_norm": 0.4210584759712219, + "learning_rate": 1.9770323849763418e-05, + "loss": 0.5479, + "step": 5073 + }, + { + "epoch": 0.13931905546403076, + "grad_norm": 0.35762128233909607, + "learning_rate": 1.9770231808109063e-05, + "loss": 0.5826, + "step": 5074 + }, + { + "epoch": 0.13934651290499725, + "grad_norm": 0.38777491450309753, + "learning_rate": 1.9770139748230107e-05, + "loss": 0.6524, + "step": 5075 + }, + { + "epoch": 0.13937397034596374, + "grad_norm": 0.3771539628505707, + "learning_rate": 1.9770047670126725e-05, + "loss": 0.5418, + "step": 5076 + }, + { + "epoch": 0.13940142778693027, + "grad_norm": 0.42269107699394226, + "learning_rate": 1.976995557379908e-05, + "loss": 0.5886, + "step": 5077 + }, + { + "epoch": 0.13942888522789676, + "grad_norm": 0.3785474896430969, + "learning_rate": 1.9769863459247356e-05, + "loss": 0.553, + "step": 5078 + }, + { + "epoch": 0.13945634266886325, + "grad_norm": 0.42848441004753113, + "learning_rate": 1.9769771326471715e-05, + "loss": 0.5497, + "step": 5079 + }, + { + "epoch": 0.13948380010982978, + "grad_norm": 0.36823713779449463, + "learning_rate": 1.9769679175472335e-05, + "loss": 0.5746, + "step": 5080 + }, + { + "epoch": 0.13951125755079627, + "grad_norm": 0.33764955401420593, + "learning_rate": 1.976958700624938e-05, + "loss": 0.5815, + "step": 5081 + }, + { + "epoch": 0.13953871499176276, + "grad_norm": 0.6321456432342529, + "learning_rate": 1.976949481880303e-05, + "loss": 0.5499, + "step": 5082 + }, + { + "epoch": 0.13956617243272926, + "grad_norm": 0.35167545080184937, + "learning_rate": 1.976940261313346e-05, + "loss": 0.5254, + "step": 5083 + }, + { + "epoch": 0.13959362987369578, + "grad_norm": 0.40513819456100464, + "learning_rate": 1.976931038924083e-05, + "loss": 0.4834, + "step": 5084 + }, + { + "epoch": 0.13962108731466227, + "grad_norm": 0.7617354393005371, + "learning_rate": 1.9769218147125316e-05, + "loss": 0.5471, + "step": 5085 + }, + { + "epoch": 0.13964854475562877, + "grad_norm": 0.4126875400543213, + "learning_rate": 1.9769125886787095e-05, + "loss": 0.6449, + "step": 5086 + }, + { + "epoch": 0.1396760021965953, + "grad_norm": 0.3729192912578583, + "learning_rate": 1.9769033608226336e-05, + "loss": 0.5512, + "step": 5087 + }, + { + "epoch": 0.13970345963756178, + "grad_norm": 0.363986611366272, + "learning_rate": 1.9768941311443212e-05, + "loss": 0.5668, + "step": 5088 + }, + { + "epoch": 0.13973091707852828, + "grad_norm": 0.3733406066894531, + "learning_rate": 1.9768848996437895e-05, + "loss": 0.6401, + "step": 5089 + }, + { + "epoch": 0.13975837451949477, + "grad_norm": 0.33875253796577454, + "learning_rate": 1.9768756663210554e-05, + "loss": 0.5083, + "step": 5090 + }, + { + "epoch": 0.1397858319604613, + "grad_norm": 0.34692931175231934, + "learning_rate": 1.976866431176137e-05, + "loss": 0.546, + "step": 5091 + }, + { + "epoch": 0.1398132894014278, + "grad_norm": 0.3689880073070526, + "learning_rate": 1.9768571942090505e-05, + "loss": 0.5323, + "step": 5092 + }, + { + "epoch": 0.13984074684239428, + "grad_norm": 0.38928279280662537, + "learning_rate": 1.9768479554198134e-05, + "loss": 0.5698, + "step": 5093 + }, + { + "epoch": 0.1398682042833608, + "grad_norm": 0.3700495958328247, + "learning_rate": 1.9768387148084432e-05, + "loss": 0.552, + "step": 5094 + }, + { + "epoch": 0.1398956617243273, + "grad_norm": 0.3634013831615448, + "learning_rate": 1.9768294723749574e-05, + "loss": 0.5251, + "step": 5095 + }, + { + "epoch": 0.1399231191652938, + "grad_norm": 0.364368200302124, + "learning_rate": 1.9768202281193726e-05, + "loss": 0.6216, + "step": 5096 + }, + { + "epoch": 0.13995057660626029, + "grad_norm": 0.38359904289245605, + "learning_rate": 1.9768109820417064e-05, + "loss": 0.5025, + "step": 5097 + }, + { + "epoch": 0.1399780340472268, + "grad_norm": 0.3865695595741272, + "learning_rate": 1.9768017341419754e-05, + "loss": 0.5129, + "step": 5098 + }, + { + "epoch": 0.1400054914881933, + "grad_norm": 0.3774087727069855, + "learning_rate": 1.976792484420198e-05, + "loss": 0.5829, + "step": 5099 + }, + { + "epoch": 0.1400329489291598, + "grad_norm": 0.45045655965805054, + "learning_rate": 1.9767832328763907e-05, + "loss": 0.5406, + "step": 5100 + }, + { + "epoch": 0.14006040637012632, + "grad_norm": 0.3464818000793457, + "learning_rate": 1.9767739795105708e-05, + "loss": 0.5252, + "step": 5101 + }, + { + "epoch": 0.1400878638110928, + "grad_norm": 0.38034719228744507, + "learning_rate": 1.976764724322756e-05, + "loss": 0.5336, + "step": 5102 + }, + { + "epoch": 0.1401153212520593, + "grad_norm": 0.3678613603115082, + "learning_rate": 1.976755467312963e-05, + "loss": 0.5254, + "step": 5103 + }, + { + "epoch": 0.1401427786930258, + "grad_norm": 0.36178499460220337, + "learning_rate": 1.9767462084812094e-05, + "loss": 0.581, + "step": 5104 + }, + { + "epoch": 0.14017023613399232, + "grad_norm": 0.3591462969779968, + "learning_rate": 1.9767369478275123e-05, + "loss": 0.5102, + "step": 5105 + }, + { + "epoch": 0.14019769357495881, + "grad_norm": 0.38280192017555237, + "learning_rate": 1.976727685351889e-05, + "loss": 0.5385, + "step": 5106 + }, + { + "epoch": 0.1402251510159253, + "grad_norm": 0.3649687170982361, + "learning_rate": 1.976718421054357e-05, + "loss": 0.5251, + "step": 5107 + }, + { + "epoch": 0.14025260845689183, + "grad_norm": 0.4072813391685486, + "learning_rate": 1.9767091549349335e-05, + "loss": 0.5607, + "step": 5108 + }, + { + "epoch": 0.14028006589785832, + "grad_norm": 0.36535027623176575, + "learning_rate": 1.9766998869936352e-05, + "loss": 0.5097, + "step": 5109 + }, + { + "epoch": 0.14030752333882482, + "grad_norm": 0.36129873991012573, + "learning_rate": 1.9766906172304803e-05, + "loss": 0.492, + "step": 5110 + }, + { + "epoch": 0.1403349807797913, + "grad_norm": 0.33539286255836487, + "learning_rate": 1.9766813456454855e-05, + "loss": 0.5206, + "step": 5111 + }, + { + "epoch": 0.14036243822075783, + "grad_norm": 0.3805730640888214, + "learning_rate": 1.9766720722386685e-05, + "loss": 0.5678, + "step": 5112 + }, + { + "epoch": 0.14038989566172433, + "grad_norm": 0.37901657819747925, + "learning_rate": 1.9766627970100463e-05, + "loss": 0.5469, + "step": 5113 + }, + { + "epoch": 0.14041735310269082, + "grad_norm": 0.3843391239643097, + "learning_rate": 1.9766535199596362e-05, + "loss": 0.5739, + "step": 5114 + }, + { + "epoch": 0.14044481054365734, + "grad_norm": 0.3886151611804962, + "learning_rate": 1.9766442410874554e-05, + "loss": 0.533, + "step": 5115 + }, + { + "epoch": 0.14047226798462384, + "grad_norm": 0.40498775243759155, + "learning_rate": 1.9766349603935215e-05, + "loss": 0.5406, + "step": 5116 + }, + { + "epoch": 0.14049972542559033, + "grad_norm": 0.3319019675254822, + "learning_rate": 1.9766256778778518e-05, + "loss": 0.5118, + "step": 5117 + }, + { + "epoch": 0.14052718286655683, + "grad_norm": 0.3901922404766083, + "learning_rate": 1.9766163935404633e-05, + "loss": 0.4975, + "step": 5118 + }, + { + "epoch": 0.14055464030752335, + "grad_norm": 0.34731176495552063, + "learning_rate": 1.976607107381374e-05, + "loss": 0.5772, + "step": 5119 + }, + { + "epoch": 0.14058209774848984, + "grad_norm": 0.36220449209213257, + "learning_rate": 1.9765978194006005e-05, + "loss": 0.5396, + "step": 5120 + }, + { + "epoch": 0.14060955518945634, + "grad_norm": 0.35474881529808044, + "learning_rate": 1.97658852959816e-05, + "loss": 0.5028, + "step": 5121 + }, + { + "epoch": 0.14063701263042283, + "grad_norm": 0.31767410039901733, + "learning_rate": 1.9765792379740704e-05, + "loss": 0.5368, + "step": 5122 + }, + { + "epoch": 0.14066447007138935, + "grad_norm": 0.38548019528388977, + "learning_rate": 1.976569944528349e-05, + "loss": 0.4788, + "step": 5123 + }, + { + "epoch": 0.14069192751235585, + "grad_norm": 0.38302141427993774, + "learning_rate": 1.9765606492610133e-05, + "loss": 0.5324, + "step": 5124 + }, + { + "epoch": 0.14071938495332234, + "grad_norm": 0.35954320430755615, + "learning_rate": 1.9765513521720797e-05, + "loss": 0.5093, + "step": 5125 + }, + { + "epoch": 0.14074684239428886, + "grad_norm": 0.3440805673599243, + "learning_rate": 1.9765420532615663e-05, + "loss": 0.5685, + "step": 5126 + }, + { + "epoch": 0.14077429983525536, + "grad_norm": 0.3343869745731354, + "learning_rate": 1.9765327525294905e-05, + "loss": 0.4488, + "step": 5127 + }, + { + "epoch": 0.14080175727622185, + "grad_norm": 0.4121215343475342, + "learning_rate": 1.9765234499758693e-05, + "loss": 0.5567, + "step": 5128 + }, + { + "epoch": 0.14082921471718834, + "grad_norm": 0.3899824321269989, + "learning_rate": 1.97651414560072e-05, + "loss": 0.4749, + "step": 5129 + }, + { + "epoch": 0.14085667215815487, + "grad_norm": 0.32759928703308105, + "learning_rate": 1.9765048394040604e-05, + "loss": 0.4922, + "step": 5130 + }, + { + "epoch": 0.14088412959912136, + "grad_norm": 0.5473260879516602, + "learning_rate": 1.9764955313859075e-05, + "loss": 0.5576, + "step": 5131 + }, + { + "epoch": 0.14091158704008785, + "grad_norm": 0.3910808265209198, + "learning_rate": 1.9764862215462787e-05, + "loss": 0.575, + "step": 5132 + }, + { + "epoch": 0.14093904448105438, + "grad_norm": 0.3842034935951233, + "learning_rate": 1.9764769098851916e-05, + "loss": 0.5557, + "step": 5133 + }, + { + "epoch": 0.14096650192202087, + "grad_norm": 0.384150892496109, + "learning_rate": 1.9764675964026634e-05, + "loss": 0.4754, + "step": 5134 + }, + { + "epoch": 0.14099395936298736, + "grad_norm": 0.37913212180137634, + "learning_rate": 1.9764582810987116e-05, + "loss": 0.5225, + "step": 5135 + }, + { + "epoch": 0.14102141680395386, + "grad_norm": 0.33545777201652527, + "learning_rate": 1.9764489639733533e-05, + "loss": 0.5391, + "step": 5136 + }, + { + "epoch": 0.14104887424492038, + "grad_norm": 0.3970257639884949, + "learning_rate": 1.976439645026606e-05, + "loss": 0.638, + "step": 5137 + }, + { + "epoch": 0.14107633168588687, + "grad_norm": 6.840550422668457, + "learning_rate": 1.9764303242584874e-05, + "loss": 0.5736, + "step": 5138 + }, + { + "epoch": 0.14110378912685337, + "grad_norm": 0.3973082900047302, + "learning_rate": 1.976421001669014e-05, + "loss": 0.5846, + "step": 5139 + }, + { + "epoch": 0.1411312465678199, + "grad_norm": 0.3658939599990845, + "learning_rate": 1.9764116772582044e-05, + "loss": 0.5129, + "step": 5140 + }, + { + "epoch": 0.14115870400878638, + "grad_norm": 0.36921167373657227, + "learning_rate": 1.976402351026075e-05, + "loss": 0.5756, + "step": 5141 + }, + { + "epoch": 0.14118616144975288, + "grad_norm": 0.40854158997535706, + "learning_rate": 1.976393022972644e-05, + "loss": 0.5263, + "step": 5142 + }, + { + "epoch": 0.14121361889071937, + "grad_norm": 0.39392220973968506, + "learning_rate": 1.976383693097928e-05, + "loss": 0.5207, + "step": 5143 + }, + { + "epoch": 0.1412410763316859, + "grad_norm": 0.3888837695121765, + "learning_rate": 1.9763743614019448e-05, + "loss": 0.5721, + "step": 5144 + }, + { + "epoch": 0.1412685337726524, + "grad_norm": 0.41442108154296875, + "learning_rate": 1.976365027884712e-05, + "loss": 0.4956, + "step": 5145 + }, + { + "epoch": 0.14129599121361888, + "grad_norm": 0.366674542427063, + "learning_rate": 1.9763556925462467e-05, + "loss": 0.6122, + "step": 5146 + }, + { + "epoch": 0.1413234486545854, + "grad_norm": 0.6076432466506958, + "learning_rate": 1.9763463553865666e-05, + "loss": 0.6184, + "step": 5147 + }, + { + "epoch": 0.1413509060955519, + "grad_norm": 0.3274708688259125, + "learning_rate": 1.9763370164056884e-05, + "loss": 0.5578, + "step": 5148 + }, + { + "epoch": 0.1413783635365184, + "grad_norm": 0.40332868695259094, + "learning_rate": 1.9763276756036305e-05, + "loss": 0.5948, + "step": 5149 + }, + { + "epoch": 0.1414058209774849, + "grad_norm": 0.3823903203010559, + "learning_rate": 1.9763183329804097e-05, + "loss": 0.6232, + "step": 5150 + }, + { + "epoch": 0.1414332784184514, + "grad_norm": 0.31382521986961365, + "learning_rate": 1.9763089885360436e-05, + "loss": 0.4756, + "step": 5151 + }, + { + "epoch": 0.1414607358594179, + "grad_norm": 0.35142281651496887, + "learning_rate": 1.9762996422705497e-05, + "loss": 0.516, + "step": 5152 + }, + { + "epoch": 0.1414881933003844, + "grad_norm": 0.3624790608882904, + "learning_rate": 1.9762902941839453e-05, + "loss": 0.6893, + "step": 5153 + }, + { + "epoch": 0.14151565074135092, + "grad_norm": 0.37174686789512634, + "learning_rate": 1.9762809442762483e-05, + "loss": 0.5457, + "step": 5154 + }, + { + "epoch": 0.1415431081823174, + "grad_norm": 0.3992995619773865, + "learning_rate": 1.9762715925474755e-05, + "loss": 0.5806, + "step": 5155 + }, + { + "epoch": 0.1415705656232839, + "grad_norm": 0.378126859664917, + "learning_rate": 1.9762622389976445e-05, + "loss": 0.5076, + "step": 5156 + }, + { + "epoch": 0.1415980230642504, + "grad_norm": 0.3404308557510376, + "learning_rate": 1.976252883626773e-05, + "loss": 0.5297, + "step": 5157 + }, + { + "epoch": 0.14162548050521692, + "grad_norm": 0.33897069096565247, + "learning_rate": 1.976243526434878e-05, + "loss": 0.5019, + "step": 5158 + }, + { + "epoch": 0.14165293794618342, + "grad_norm": 0.37925100326538086, + "learning_rate": 1.9762341674219775e-05, + "loss": 0.4427, + "step": 5159 + }, + { + "epoch": 0.1416803953871499, + "grad_norm": 0.365349143743515, + "learning_rate": 1.9762248065880884e-05, + "loss": 0.5487, + "step": 5160 + }, + { + "epoch": 0.14170785282811643, + "grad_norm": 0.35825568437576294, + "learning_rate": 1.976215443933229e-05, + "loss": 0.5854, + "step": 5161 + }, + { + "epoch": 0.14173531026908293, + "grad_norm": 0.3822456896305084, + "learning_rate": 1.9762060794574156e-05, + "loss": 0.4915, + "step": 5162 + }, + { + "epoch": 0.14176276771004942, + "grad_norm": 0.33348333835601807, + "learning_rate": 1.9761967131606665e-05, + "loss": 0.4296, + "step": 5163 + }, + { + "epoch": 0.14179022515101591, + "grad_norm": 0.3722299635410309, + "learning_rate": 1.976187345042999e-05, + "loss": 0.5952, + "step": 5164 + }, + { + "epoch": 0.14181768259198244, + "grad_norm": 0.35679900646209717, + "learning_rate": 1.9761779751044308e-05, + "loss": 0.5979, + "step": 5165 + }, + { + "epoch": 0.14184514003294893, + "grad_norm": 0.4097611904144287, + "learning_rate": 1.976168603344979e-05, + "loss": 0.5242, + "step": 5166 + }, + { + "epoch": 0.14187259747391542, + "grad_norm": 0.39235377311706543, + "learning_rate": 1.9761592297646607e-05, + "loss": 0.5282, + "step": 5167 + }, + { + "epoch": 0.14190005491488195, + "grad_norm": 0.3546358048915863, + "learning_rate": 1.9761498543634946e-05, + "loss": 0.5689, + "step": 5168 + }, + { + "epoch": 0.14192751235584844, + "grad_norm": 0.41730985045433044, + "learning_rate": 1.9761404771414973e-05, + "loss": 0.6503, + "step": 5169 + }, + { + "epoch": 0.14195496979681493, + "grad_norm": 0.44483160972595215, + "learning_rate": 1.976131098098686e-05, + "loss": 0.4986, + "step": 5170 + }, + { + "epoch": 0.14198242723778143, + "grad_norm": 0.3717547357082367, + "learning_rate": 1.976121717235079e-05, + "loss": 0.5891, + "step": 5171 + }, + { + "epoch": 0.14200988467874795, + "grad_norm": 0.3958486020565033, + "learning_rate": 1.9761123345506937e-05, + "loss": 0.5606, + "step": 5172 + }, + { + "epoch": 0.14203734211971444, + "grad_norm": 0.3545449674129486, + "learning_rate": 1.976102950045547e-05, + "loss": 0.5833, + "step": 5173 + }, + { + "epoch": 0.14206479956068094, + "grad_norm": 0.34818708896636963, + "learning_rate": 1.9760935637196565e-05, + "loss": 0.5216, + "step": 5174 + }, + { + "epoch": 0.14209225700164746, + "grad_norm": 0.3399266302585602, + "learning_rate": 1.9760841755730403e-05, + "loss": 0.5616, + "step": 5175 + }, + { + "epoch": 0.14211971444261395, + "grad_norm": 0.3955525755882263, + "learning_rate": 1.976074785605716e-05, + "loss": 0.5102, + "step": 5176 + }, + { + "epoch": 0.14214717188358045, + "grad_norm": 0.3750360310077667, + "learning_rate": 1.9760653938177003e-05, + "loss": 0.5888, + "step": 5177 + }, + { + "epoch": 0.14217462932454694, + "grad_norm": 0.39386892318725586, + "learning_rate": 1.976056000209011e-05, + "loss": 0.589, + "step": 5178 + }, + { + "epoch": 0.14220208676551346, + "grad_norm": 0.376517653465271, + "learning_rate": 1.976046604779666e-05, + "loss": 0.5306, + "step": 5179 + }, + { + "epoch": 0.14222954420647996, + "grad_norm": 0.36512553691864014, + "learning_rate": 1.9760372075296823e-05, + "loss": 0.5631, + "step": 5180 + }, + { + "epoch": 0.14225700164744645, + "grad_norm": 0.34393811225891113, + "learning_rate": 1.976027808459078e-05, + "loss": 0.5934, + "step": 5181 + }, + { + "epoch": 0.14228445908841297, + "grad_norm": 0.528462290763855, + "learning_rate": 1.9760184075678698e-05, + "loss": 0.5977, + "step": 5182 + }, + { + "epoch": 0.14231191652937947, + "grad_norm": 0.41343313455581665, + "learning_rate": 1.976009004856076e-05, + "loss": 0.5762, + "step": 5183 + }, + { + "epoch": 0.14233937397034596, + "grad_norm": 0.41506296396255493, + "learning_rate": 1.975999600323714e-05, + "loss": 0.477, + "step": 5184 + }, + { + "epoch": 0.14236683141131246, + "grad_norm": 0.38322389125823975, + "learning_rate": 1.9759901939708014e-05, + "loss": 0.5447, + "step": 5185 + }, + { + "epoch": 0.14239428885227898, + "grad_norm": 0.4538055658340454, + "learning_rate": 1.9759807857973553e-05, + "loss": 0.5417, + "step": 5186 + }, + { + "epoch": 0.14242174629324547, + "grad_norm": 0.37027794122695923, + "learning_rate": 1.9759713758033937e-05, + "loss": 0.5197, + "step": 5187 + }, + { + "epoch": 0.14244920373421197, + "grad_norm": 0.527476966381073, + "learning_rate": 1.975961963988934e-05, + "loss": 0.5738, + "step": 5188 + }, + { + "epoch": 0.14247666117517846, + "grad_norm": 0.393109530210495, + "learning_rate": 1.9759525503539936e-05, + "loss": 0.5933, + "step": 5189 + }, + { + "epoch": 0.14250411861614498, + "grad_norm": 0.35848256945610046, + "learning_rate": 1.9759431348985903e-05, + "loss": 0.5367, + "step": 5190 + }, + { + "epoch": 0.14253157605711148, + "grad_norm": 0.4073767364025116, + "learning_rate": 1.9759337176227417e-05, + "loss": 0.6019, + "step": 5191 + }, + { + "epoch": 0.14255903349807797, + "grad_norm": 0.3970816135406494, + "learning_rate": 1.9759242985264652e-05, + "loss": 0.5613, + "step": 5192 + }, + { + "epoch": 0.1425864909390445, + "grad_norm": 0.3618789315223694, + "learning_rate": 1.9759148776097783e-05, + "loss": 0.605, + "step": 5193 + }, + { + "epoch": 0.14261394838001099, + "grad_norm": 0.42001697421073914, + "learning_rate": 1.9759054548726987e-05, + "loss": 0.5526, + "step": 5194 + }, + { + "epoch": 0.14264140582097748, + "grad_norm": 0.3464806377887726, + "learning_rate": 1.975896030315244e-05, + "loss": 0.5651, + "step": 5195 + }, + { + "epoch": 0.14266886326194397, + "grad_norm": 0.37471550703048706, + "learning_rate": 1.9758866039374315e-05, + "loss": 0.6256, + "step": 5196 + }, + { + "epoch": 0.1426963207029105, + "grad_norm": 0.31307339668273926, + "learning_rate": 1.975877175739279e-05, + "loss": 0.5061, + "step": 5197 + }, + { + "epoch": 0.142723778143877, + "grad_norm": 0.39606142044067383, + "learning_rate": 1.9758677457208044e-05, + "loss": 0.6322, + "step": 5198 + }, + { + "epoch": 0.14275123558484348, + "grad_norm": 0.34814879298210144, + "learning_rate": 1.975858313882025e-05, + "loss": 0.4519, + "step": 5199 + }, + { + "epoch": 0.14277869302581, + "grad_norm": 0.3955276608467102, + "learning_rate": 1.9758488802229585e-05, + "loss": 0.5791, + "step": 5200 + }, + { + "epoch": 0.1428061504667765, + "grad_norm": 0.41657453775405884, + "learning_rate": 1.975839444743622e-05, + "loss": 0.6048, + "step": 5201 + }, + { + "epoch": 0.142833607907743, + "grad_norm": 0.39284393191337585, + "learning_rate": 1.975830007444034e-05, + "loss": 0.5717, + "step": 5202 + }, + { + "epoch": 0.1428610653487095, + "grad_norm": 0.36834201216697693, + "learning_rate": 1.9758205683242105e-05, + "loss": 0.5417, + "step": 5203 + }, + { + "epoch": 0.142888522789676, + "grad_norm": 0.34614330530166626, + "learning_rate": 1.975811127384171e-05, + "loss": 0.4698, + "step": 5204 + }, + { + "epoch": 0.1429159802306425, + "grad_norm": 0.36794713139533997, + "learning_rate": 1.9758016846239325e-05, + "loss": 0.5515, + "step": 5205 + }, + { + "epoch": 0.142943437671609, + "grad_norm": 0.42639344930648804, + "learning_rate": 1.975792240043512e-05, + "loss": 0.5981, + "step": 5206 + }, + { + "epoch": 0.14297089511257552, + "grad_norm": 0.4046439528465271, + "learning_rate": 1.975782793642928e-05, + "loss": 0.5896, + "step": 5207 + }, + { + "epoch": 0.142998352553542, + "grad_norm": 0.5351312160491943, + "learning_rate": 1.975773345422197e-05, + "loss": 0.4552, + "step": 5208 + }, + { + "epoch": 0.1430258099945085, + "grad_norm": 0.406574547290802, + "learning_rate": 1.975763895381338e-05, + "loss": 0.5978, + "step": 5209 + }, + { + "epoch": 0.143053267435475, + "grad_norm": 0.3574973940849304, + "learning_rate": 1.9757544435203674e-05, + "loss": 0.6184, + "step": 5210 + }, + { + "epoch": 0.14308072487644152, + "grad_norm": 0.4015454947948456, + "learning_rate": 1.9757449898393033e-05, + "loss": 0.5376, + "step": 5211 + }, + { + "epoch": 0.14310818231740802, + "grad_norm": 0.3470827043056488, + "learning_rate": 1.9757355343381636e-05, + "loss": 0.5299, + "step": 5212 + }, + { + "epoch": 0.1431356397583745, + "grad_norm": 0.37330299615859985, + "learning_rate": 1.9757260770169656e-05, + "loss": 0.569, + "step": 5213 + }, + { + "epoch": 0.14316309719934103, + "grad_norm": 0.2965843081474304, + "learning_rate": 1.975716617875727e-05, + "loss": 0.5072, + "step": 5214 + }, + { + "epoch": 0.14319055464030753, + "grad_norm": 0.385028600692749, + "learning_rate": 1.9757071569144658e-05, + "loss": 0.607, + "step": 5215 + }, + { + "epoch": 0.14321801208127402, + "grad_norm": 0.37850111722946167, + "learning_rate": 1.9756976941331992e-05, + "loss": 0.5178, + "step": 5216 + }, + { + "epoch": 0.14324546952224051, + "grad_norm": 0.3664033114910126, + "learning_rate": 1.9756882295319448e-05, + "loss": 0.5034, + "step": 5217 + }, + { + "epoch": 0.14327292696320704, + "grad_norm": 0.352734237909317, + "learning_rate": 1.9756787631107205e-05, + "loss": 0.5825, + "step": 5218 + }, + { + "epoch": 0.14330038440417353, + "grad_norm": 0.3812539875507355, + "learning_rate": 1.975669294869544e-05, + "loss": 0.5891, + "step": 5219 + }, + { + "epoch": 0.14332784184514002, + "grad_norm": 0.5099696516990662, + "learning_rate": 1.975659824808433e-05, + "loss": 0.6266, + "step": 5220 + }, + { + "epoch": 0.14335529928610655, + "grad_norm": 0.3631584942340851, + "learning_rate": 1.9756503529274047e-05, + "loss": 0.5621, + "step": 5221 + }, + { + "epoch": 0.14338275672707304, + "grad_norm": 0.4031742811203003, + "learning_rate": 1.9756408792264774e-05, + "loss": 0.4833, + "step": 5222 + }, + { + "epoch": 0.14341021416803953, + "grad_norm": 2.6287312507629395, + "learning_rate": 1.9756314037056686e-05, + "loss": 0.435, + "step": 5223 + }, + { + "epoch": 0.14343767160900603, + "grad_norm": 0.3566969633102417, + "learning_rate": 1.9756219263649953e-05, + "loss": 0.586, + "step": 5224 + }, + { + "epoch": 0.14346512904997255, + "grad_norm": 0.3356954753398895, + "learning_rate": 1.975612447204476e-05, + "loss": 0.5166, + "step": 5225 + }, + { + "epoch": 0.14349258649093904, + "grad_norm": 0.3739411532878876, + "learning_rate": 1.9756029662241283e-05, + "loss": 0.6034, + "step": 5226 + }, + { + "epoch": 0.14352004393190554, + "grad_norm": 0.3644901514053345, + "learning_rate": 1.9755934834239695e-05, + "loss": 0.5225, + "step": 5227 + }, + { + "epoch": 0.14354750137287206, + "grad_norm": 0.33727386593818665, + "learning_rate": 1.9755839988040177e-05, + "loss": 0.5029, + "step": 5228 + }, + { + "epoch": 0.14357495881383855, + "grad_norm": 0.3365439474582672, + "learning_rate": 1.97557451236429e-05, + "loss": 0.506, + "step": 5229 + }, + { + "epoch": 0.14360241625480505, + "grad_norm": 0.3865739107131958, + "learning_rate": 1.9755650241048044e-05, + "loss": 0.5803, + "step": 5230 + }, + { + "epoch": 0.14362987369577154, + "grad_norm": 0.35372963547706604, + "learning_rate": 1.975555534025579e-05, + "loss": 0.5267, + "step": 5231 + }, + { + "epoch": 0.14365733113673806, + "grad_norm": 0.33166149258613586, + "learning_rate": 1.9755460421266312e-05, + "loss": 0.5187, + "step": 5232 + }, + { + "epoch": 0.14368478857770456, + "grad_norm": 0.3612311780452728, + "learning_rate": 1.975536548407979e-05, + "loss": 0.5579, + "step": 5233 + }, + { + "epoch": 0.14371224601867105, + "grad_norm": 0.3520779609680176, + "learning_rate": 1.975527052869639e-05, + "loss": 0.5884, + "step": 5234 + }, + { + "epoch": 0.14373970345963757, + "grad_norm": 0.40141236782073975, + "learning_rate": 1.9755175555116298e-05, + "loss": 0.5518, + "step": 5235 + }, + { + "epoch": 0.14376716090060407, + "grad_norm": 0.40416714549064636, + "learning_rate": 1.9755080563339694e-05, + "loss": 0.4877, + "step": 5236 + }, + { + "epoch": 0.14379461834157056, + "grad_norm": 0.4022332429885864, + "learning_rate": 1.975498555336675e-05, + "loss": 0.5412, + "step": 5237 + }, + { + "epoch": 0.14382207578253706, + "grad_norm": 0.3340296745300293, + "learning_rate": 1.975489052519764e-05, + "loss": 0.5334, + "step": 5238 + }, + { + "epoch": 0.14384953322350358, + "grad_norm": 0.44120121002197266, + "learning_rate": 1.9754795478832552e-05, + "loss": 0.5343, + "step": 5239 + }, + { + "epoch": 0.14387699066447007, + "grad_norm": 0.3596265912055969, + "learning_rate": 1.9754700414271657e-05, + "loss": 0.5109, + "step": 5240 + }, + { + "epoch": 0.14390444810543657, + "grad_norm": 0.34817934036254883, + "learning_rate": 1.975460533151513e-05, + "loss": 0.4888, + "step": 5241 + }, + { + "epoch": 0.1439319055464031, + "grad_norm": 0.3679596781730652, + "learning_rate": 1.975451023056315e-05, + "loss": 0.6343, + "step": 5242 + }, + { + "epoch": 0.14395936298736958, + "grad_norm": 0.39662593603134155, + "learning_rate": 1.9754415111415898e-05, + "loss": 0.5984, + "step": 5243 + }, + { + "epoch": 0.14398682042833608, + "grad_norm": 0.3538244068622589, + "learning_rate": 1.9754319974073544e-05, + "loss": 0.5434, + "step": 5244 + }, + { + "epoch": 0.14401427786930257, + "grad_norm": 0.6787261366844177, + "learning_rate": 1.9754224818536272e-05, + "loss": 0.5315, + "step": 5245 + }, + { + "epoch": 0.1440417353102691, + "grad_norm": 0.352488249540329, + "learning_rate": 1.9754129644804262e-05, + "loss": 0.619, + "step": 5246 + }, + { + "epoch": 0.1440691927512356, + "grad_norm": 0.36517342925071716, + "learning_rate": 1.9754034452877686e-05, + "loss": 0.5934, + "step": 5247 + }, + { + "epoch": 0.14409665019220208, + "grad_norm": 0.385572224855423, + "learning_rate": 1.9753939242756716e-05, + "loss": 0.558, + "step": 5248 + }, + { + "epoch": 0.1441241076331686, + "grad_norm": 0.3565976023674011, + "learning_rate": 1.975384401444154e-05, + "loss": 0.4891, + "step": 5249 + }, + { + "epoch": 0.1441515650741351, + "grad_norm": 0.6695986986160278, + "learning_rate": 1.9753748767932333e-05, + "loss": 0.5972, + "step": 5250 + }, + { + "epoch": 0.1441790225151016, + "grad_norm": 0.39466309547424316, + "learning_rate": 1.9753653503229272e-05, + "loss": 0.5192, + "step": 5251 + }, + { + "epoch": 0.14420647995606808, + "grad_norm": 0.3316930830478668, + "learning_rate": 1.9753558220332533e-05, + "loss": 0.5443, + "step": 5252 + }, + { + "epoch": 0.1442339373970346, + "grad_norm": 0.34133392572402954, + "learning_rate": 1.9753462919242295e-05, + "loss": 0.5074, + "step": 5253 + }, + { + "epoch": 0.1442613948380011, + "grad_norm": 0.351509153842926, + "learning_rate": 1.9753367599958735e-05, + "loss": 0.5466, + "step": 5254 + }, + { + "epoch": 0.1442888522789676, + "grad_norm": 0.34685635566711426, + "learning_rate": 1.9753272262482032e-05, + "loss": 0.574, + "step": 5255 + }, + { + "epoch": 0.1443163097199341, + "grad_norm": 0.3003350794315338, + "learning_rate": 1.9753176906812362e-05, + "loss": 0.5031, + "step": 5256 + }, + { + "epoch": 0.1443437671609006, + "grad_norm": 0.3655555546283722, + "learning_rate": 1.975308153294991e-05, + "loss": 0.5941, + "step": 5257 + }, + { + "epoch": 0.1443712246018671, + "grad_norm": 0.3401818871498108, + "learning_rate": 1.975298614089484e-05, + "loss": 0.4819, + "step": 5258 + }, + { + "epoch": 0.1443986820428336, + "grad_norm": 0.4256969094276428, + "learning_rate": 1.9752890730647342e-05, + "loss": 0.6073, + "step": 5259 + }, + { + "epoch": 0.14442613948380012, + "grad_norm": 0.3619632124900818, + "learning_rate": 1.975279530220759e-05, + "loss": 0.5001, + "step": 5260 + }, + { + "epoch": 0.14445359692476661, + "grad_norm": 0.3467947840690613, + "learning_rate": 1.975269985557576e-05, + "loss": 0.5365, + "step": 5261 + }, + { + "epoch": 0.1444810543657331, + "grad_norm": 0.3316633701324463, + "learning_rate": 1.975260439075204e-05, + "loss": 0.5045, + "step": 5262 + }, + { + "epoch": 0.1445085118066996, + "grad_norm": 0.3911300301551819, + "learning_rate": 1.975250890773659e-05, + "loss": 0.6252, + "step": 5263 + }, + { + "epoch": 0.14453596924766612, + "grad_norm": 0.4673580825328827, + "learning_rate": 1.9752413406529604e-05, + "loss": 0.5916, + "step": 5264 + }, + { + "epoch": 0.14456342668863262, + "grad_norm": 0.45395970344543457, + "learning_rate": 1.975231788713125e-05, + "loss": 0.5286, + "step": 5265 + }, + { + "epoch": 0.1445908841295991, + "grad_norm": 0.33042073249816895, + "learning_rate": 1.9752222349541716e-05, + "loss": 0.5295, + "step": 5266 + }, + { + "epoch": 0.14461834157056563, + "grad_norm": 0.3314532935619354, + "learning_rate": 1.9752126793761173e-05, + "loss": 0.492, + "step": 5267 + }, + { + "epoch": 0.14464579901153213, + "grad_norm": 0.49460774660110474, + "learning_rate": 1.9752031219789798e-05, + "loss": 0.4964, + "step": 5268 + }, + { + "epoch": 0.14467325645249862, + "grad_norm": 0.3714655637741089, + "learning_rate": 1.9751935627627774e-05, + "loss": 0.4882, + "step": 5269 + }, + { + "epoch": 0.14470071389346512, + "grad_norm": 0.3792128264904022, + "learning_rate": 1.975184001727528e-05, + "loss": 0.5328, + "step": 5270 + }, + { + "epoch": 0.14472817133443164, + "grad_norm": 0.39238178730010986, + "learning_rate": 1.975174438873249e-05, + "loss": 0.5964, + "step": 5271 + }, + { + "epoch": 0.14475562877539813, + "grad_norm": 0.37898609042167664, + "learning_rate": 1.975164874199958e-05, + "loss": 0.6393, + "step": 5272 + }, + { + "epoch": 0.14478308621636463, + "grad_norm": 0.34487801790237427, + "learning_rate": 1.975155307707674e-05, + "loss": 0.487, + "step": 5273 + }, + { + "epoch": 0.14481054365733115, + "grad_norm": 0.37276890873908997, + "learning_rate": 1.975145739396414e-05, + "loss": 0.5379, + "step": 5274 + }, + { + "epoch": 0.14483800109829764, + "grad_norm": 0.42593756318092346, + "learning_rate": 1.9751361692661954e-05, + "loss": 0.4926, + "step": 5275 + }, + { + "epoch": 0.14486545853926414, + "grad_norm": 0.38760584592819214, + "learning_rate": 1.9751265973170372e-05, + "loss": 0.5328, + "step": 5276 + }, + { + "epoch": 0.14489291598023063, + "grad_norm": 0.3921535611152649, + "learning_rate": 1.9751170235489562e-05, + "loss": 0.5033, + "step": 5277 + }, + { + "epoch": 0.14492037342119715, + "grad_norm": 0.3716851472854614, + "learning_rate": 1.975107447961971e-05, + "loss": 0.5427, + "step": 5278 + }, + { + "epoch": 0.14494783086216365, + "grad_norm": 0.3654526174068451, + "learning_rate": 1.9750978705560994e-05, + "loss": 0.5302, + "step": 5279 + }, + { + "epoch": 0.14497528830313014, + "grad_norm": 0.3662642538547516, + "learning_rate": 1.975088291331359e-05, + "loss": 0.5099, + "step": 5280 + }, + { + "epoch": 0.14500274574409666, + "grad_norm": 0.3811503052711487, + "learning_rate": 1.9750787102877673e-05, + "loss": 0.617, + "step": 5281 + }, + { + "epoch": 0.14503020318506316, + "grad_norm": 0.35303762555122375, + "learning_rate": 1.9750691274253428e-05, + "loss": 0.514, + "step": 5282 + }, + { + "epoch": 0.14505766062602965, + "grad_norm": 0.4815235137939453, + "learning_rate": 1.9750595427441035e-05, + "loss": 0.5843, + "step": 5283 + }, + { + "epoch": 0.14508511806699614, + "grad_norm": 0.38327503204345703, + "learning_rate": 1.9750499562440665e-05, + "loss": 0.6647, + "step": 5284 + }, + { + "epoch": 0.14511257550796267, + "grad_norm": 0.373838871717453, + "learning_rate": 1.9750403679252503e-05, + "loss": 0.5464, + "step": 5285 + }, + { + "epoch": 0.14514003294892916, + "grad_norm": 0.3460679054260254, + "learning_rate": 1.9750307777876728e-05, + "loss": 0.5276, + "step": 5286 + }, + { + "epoch": 0.14516749038989565, + "grad_norm": 0.36705532670021057, + "learning_rate": 1.9750211858313515e-05, + "loss": 0.5588, + "step": 5287 + }, + { + "epoch": 0.14519494783086218, + "grad_norm": 0.39007768034935, + "learning_rate": 1.9750115920563044e-05, + "loss": 0.5722, + "step": 5288 + }, + { + "epoch": 0.14522240527182867, + "grad_norm": 0.34954696893692017, + "learning_rate": 1.97500199646255e-05, + "loss": 0.5083, + "step": 5289 + }, + { + "epoch": 0.14524986271279516, + "grad_norm": 0.38864612579345703, + "learning_rate": 1.974992399050105e-05, + "loss": 0.5455, + "step": 5290 + }, + { + "epoch": 0.14527732015376166, + "grad_norm": 0.32096025347709656, + "learning_rate": 1.9749827998189882e-05, + "loss": 0.5444, + "step": 5291 + }, + { + "epoch": 0.14530477759472818, + "grad_norm": 0.3865492045879364, + "learning_rate": 1.9749731987692177e-05, + "loss": 0.655, + "step": 5292 + }, + { + "epoch": 0.14533223503569467, + "grad_norm": 0.3834858238697052, + "learning_rate": 1.9749635959008108e-05, + "loss": 0.5836, + "step": 5293 + }, + { + "epoch": 0.14535969247666117, + "grad_norm": 0.3700025975704193, + "learning_rate": 1.9749539912137852e-05, + "loss": 0.5749, + "step": 5294 + }, + { + "epoch": 0.1453871499176277, + "grad_norm": 0.3751637041568756, + "learning_rate": 1.9749443847081596e-05, + "loss": 0.519, + "step": 5295 + }, + { + "epoch": 0.14541460735859418, + "grad_norm": 0.36881428956985474, + "learning_rate": 1.974934776383952e-05, + "loss": 0.5064, + "step": 5296 + }, + { + "epoch": 0.14544206479956068, + "grad_norm": 0.356995552778244, + "learning_rate": 1.974925166241179e-05, + "loss": 0.5934, + "step": 5297 + }, + { + "epoch": 0.14546952224052717, + "grad_norm": 0.5500452518463135, + "learning_rate": 1.97491555427986e-05, + "loss": 0.6416, + "step": 5298 + }, + { + "epoch": 0.1454969796814937, + "grad_norm": 0.44850045442581177, + "learning_rate": 1.974905940500012e-05, + "loss": 0.55, + "step": 5299 + }, + { + "epoch": 0.1455244371224602, + "grad_norm": 0.35959866642951965, + "learning_rate": 1.9748963249016535e-05, + "loss": 0.5386, + "step": 5300 + }, + { + "epoch": 0.14555189456342668, + "grad_norm": 0.3935684859752655, + "learning_rate": 1.9748867074848022e-05, + "loss": 0.5113, + "step": 5301 + }, + { + "epoch": 0.1455793520043932, + "grad_norm": 0.3863079845905304, + "learning_rate": 1.9748770882494762e-05, + "loss": 0.6613, + "step": 5302 + }, + { + "epoch": 0.1456068094453597, + "grad_norm": 0.3907838463783264, + "learning_rate": 1.9748674671956927e-05, + "loss": 0.4867, + "step": 5303 + }, + { + "epoch": 0.1456342668863262, + "grad_norm": 0.331279993057251, + "learning_rate": 1.974857844323471e-05, + "loss": 0.4832, + "step": 5304 + }, + { + "epoch": 0.14566172432729269, + "grad_norm": 0.3659103512763977, + "learning_rate": 1.9748482196328275e-05, + "loss": 0.5406, + "step": 5305 + }, + { + "epoch": 0.1456891817682592, + "grad_norm": 0.398784339427948, + "learning_rate": 1.9748385931237815e-05, + "loss": 0.5818, + "step": 5306 + }, + { + "epoch": 0.1457166392092257, + "grad_norm": 0.4081275761127472, + "learning_rate": 1.97482896479635e-05, + "loss": 0.5664, + "step": 5307 + }, + { + "epoch": 0.1457440966501922, + "grad_norm": 0.3598687946796417, + "learning_rate": 1.9748193346505515e-05, + "loss": 0.5345, + "step": 5308 + }, + { + "epoch": 0.14577155409115872, + "grad_norm": 0.366371750831604, + "learning_rate": 1.974809702686404e-05, + "loss": 0.5731, + "step": 5309 + }, + { + "epoch": 0.1457990115321252, + "grad_norm": 0.4857545495033264, + "learning_rate": 1.974800068903925e-05, + "loss": 0.6214, + "step": 5310 + }, + { + "epoch": 0.1458264689730917, + "grad_norm": 0.383028119802475, + "learning_rate": 1.974790433303133e-05, + "loss": 0.6039, + "step": 5311 + }, + { + "epoch": 0.1458539264140582, + "grad_norm": 0.35723817348480225, + "learning_rate": 1.9747807958840456e-05, + "loss": 0.4647, + "step": 5312 + }, + { + "epoch": 0.14588138385502472, + "grad_norm": 0.3220714330673218, + "learning_rate": 1.974771156646681e-05, + "loss": 0.4194, + "step": 5313 + }, + { + "epoch": 0.14590884129599122, + "grad_norm": 0.37543076276779175, + "learning_rate": 1.974761515591057e-05, + "loss": 0.5383, + "step": 5314 + }, + { + "epoch": 0.1459362987369577, + "grad_norm": 0.3483539819717407, + "learning_rate": 1.9747518727171916e-05, + "loss": 0.5228, + "step": 5315 + }, + { + "epoch": 0.14596375617792423, + "grad_norm": 0.41193923354148865, + "learning_rate": 1.974742228025103e-05, + "loss": 0.6158, + "step": 5316 + }, + { + "epoch": 0.14599121361889073, + "grad_norm": 0.43999892473220825, + "learning_rate": 1.9747325815148088e-05, + "loss": 0.5995, + "step": 5317 + }, + { + "epoch": 0.14601867105985722, + "grad_norm": 0.40057849884033203, + "learning_rate": 1.9747229331863276e-05, + "loss": 0.5819, + "step": 5318 + }, + { + "epoch": 0.1460461285008237, + "grad_norm": 0.3874513506889343, + "learning_rate": 1.9747132830396766e-05, + "loss": 0.5409, + "step": 5319 + }, + { + "epoch": 0.14607358594179023, + "grad_norm": 0.3776434361934662, + "learning_rate": 1.9747036310748746e-05, + "loss": 0.581, + "step": 5320 + }, + { + "epoch": 0.14610104338275673, + "grad_norm": 0.3883746266365051, + "learning_rate": 1.9746939772919393e-05, + "loss": 0.579, + "step": 5321 + }, + { + "epoch": 0.14612850082372322, + "grad_norm": 0.36094146966934204, + "learning_rate": 1.974684321690888e-05, + "loss": 0.4632, + "step": 5322 + }, + { + "epoch": 0.14615595826468972, + "grad_norm": 0.33588096499443054, + "learning_rate": 1.9746746642717404e-05, + "loss": 0.4656, + "step": 5323 + }, + { + "epoch": 0.14618341570565624, + "grad_norm": 0.3692617416381836, + "learning_rate": 1.9746650050345126e-05, + "loss": 0.5322, + "step": 5324 + }, + { + "epoch": 0.14621087314662273, + "grad_norm": 0.33624306321144104, + "learning_rate": 1.974655343979224e-05, + "loss": 0.572, + "step": 5325 + }, + { + "epoch": 0.14623833058758923, + "grad_norm": 0.40420469641685486, + "learning_rate": 1.9746456811058917e-05, + "loss": 0.5374, + "step": 5326 + }, + { + "epoch": 0.14626578802855575, + "grad_norm": 0.3463273048400879, + "learning_rate": 1.974636016414534e-05, + "loss": 0.4844, + "step": 5327 + }, + { + "epoch": 0.14629324546952224, + "grad_norm": 0.40737441182136536, + "learning_rate": 1.9746263499051697e-05, + "loss": 0.4701, + "step": 5328 + }, + { + "epoch": 0.14632070291048874, + "grad_norm": 0.39390110969543457, + "learning_rate": 1.9746166815778158e-05, + "loss": 0.63, + "step": 5329 + }, + { + "epoch": 0.14634816035145523, + "grad_norm": 0.44744783639907837, + "learning_rate": 1.974607011432491e-05, + "loss": 0.5225, + "step": 5330 + }, + { + "epoch": 0.14637561779242175, + "grad_norm": 0.3700573444366455, + "learning_rate": 1.9745973394692125e-05, + "loss": 0.6246, + "step": 5331 + }, + { + "epoch": 0.14640307523338825, + "grad_norm": 0.3573521673679352, + "learning_rate": 1.9745876656879998e-05, + "loss": 0.5922, + "step": 5332 + }, + { + "epoch": 0.14643053267435474, + "grad_norm": 0.37474939227104187, + "learning_rate": 1.9745779900888694e-05, + "loss": 0.4701, + "step": 5333 + }, + { + "epoch": 0.14645799011532126, + "grad_norm": 0.37685492634773254, + "learning_rate": 1.97456831267184e-05, + "loss": 0.5486, + "step": 5334 + }, + { + "epoch": 0.14648544755628776, + "grad_norm": 0.36680108308792114, + "learning_rate": 1.9745586334369296e-05, + "loss": 0.5099, + "step": 5335 + }, + { + "epoch": 0.14651290499725425, + "grad_norm": 0.32809144258499146, + "learning_rate": 1.9745489523841566e-05, + "loss": 0.5017, + "step": 5336 + }, + { + "epoch": 0.14654036243822074, + "grad_norm": 0.37393873929977417, + "learning_rate": 1.9745392695135387e-05, + "loss": 0.6129, + "step": 5337 + }, + { + "epoch": 0.14656781987918727, + "grad_norm": 0.3478635847568512, + "learning_rate": 1.974529584825094e-05, + "loss": 0.5659, + "step": 5338 + }, + { + "epoch": 0.14659527732015376, + "grad_norm": 0.3196670711040497, + "learning_rate": 1.9745198983188404e-05, + "loss": 0.4785, + "step": 5339 + }, + { + "epoch": 0.14662273476112025, + "grad_norm": 0.36025553941726685, + "learning_rate": 1.9745102099947964e-05, + "loss": 0.5346, + "step": 5340 + }, + { + "epoch": 0.14665019220208678, + "grad_norm": 0.3553926646709442, + "learning_rate": 1.97450051985298e-05, + "loss": 0.5227, + "step": 5341 + }, + { + "epoch": 0.14667764964305327, + "grad_norm": 0.37404894828796387, + "learning_rate": 1.9744908278934085e-05, + "loss": 0.5534, + "step": 5342 + }, + { + "epoch": 0.14670510708401976, + "grad_norm": 0.37603169679641724, + "learning_rate": 1.974481134116101e-05, + "loss": 0.5271, + "step": 5343 + }, + { + "epoch": 0.14673256452498626, + "grad_norm": 0.39141950011253357, + "learning_rate": 1.974471438521075e-05, + "loss": 0.5164, + "step": 5344 + }, + { + "epoch": 0.14676002196595278, + "grad_norm": 0.39432278275489807, + "learning_rate": 1.9744617411083487e-05, + "loss": 0.4167, + "step": 5345 + }, + { + "epoch": 0.14678747940691927, + "grad_norm": 0.4112486243247986, + "learning_rate": 1.9744520418779406e-05, + "loss": 0.5713, + "step": 5346 + }, + { + "epoch": 0.14681493684788577, + "grad_norm": 0.41011151671409607, + "learning_rate": 1.9744423408298683e-05, + "loss": 0.4953, + "step": 5347 + }, + { + "epoch": 0.1468423942888523, + "grad_norm": 0.5496740937232971, + "learning_rate": 1.97443263796415e-05, + "loss": 0.5736, + "step": 5348 + }, + { + "epoch": 0.14686985172981878, + "grad_norm": 0.41244834661483765, + "learning_rate": 1.974422933280804e-05, + "loss": 0.5883, + "step": 5349 + }, + { + "epoch": 0.14689730917078528, + "grad_norm": 0.36061879992485046, + "learning_rate": 1.9744132267798476e-05, + "loss": 0.581, + "step": 5350 + }, + { + "epoch": 0.14692476661175177, + "grad_norm": 0.39280039072036743, + "learning_rate": 1.9744035184613e-05, + "loss": 0.5917, + "step": 5351 + }, + { + "epoch": 0.1469522240527183, + "grad_norm": 0.335227370262146, + "learning_rate": 1.9743938083251792e-05, + "loss": 0.4464, + "step": 5352 + }, + { + "epoch": 0.1469796814936848, + "grad_norm": 0.3693096339702606, + "learning_rate": 1.974384096371502e-05, + "loss": 0.5371, + "step": 5353 + }, + { + "epoch": 0.14700713893465128, + "grad_norm": 0.3495848774909973, + "learning_rate": 1.9743743826002884e-05, + "loss": 0.4827, + "step": 5354 + }, + { + "epoch": 0.1470345963756178, + "grad_norm": 0.4408680200576782, + "learning_rate": 1.974364667011555e-05, + "loss": 0.5473, + "step": 5355 + }, + { + "epoch": 0.1470620538165843, + "grad_norm": 0.3825119137763977, + "learning_rate": 1.974354949605321e-05, + "loss": 0.553, + "step": 5356 + }, + { + "epoch": 0.1470895112575508, + "grad_norm": 0.33028921484947205, + "learning_rate": 1.9743452303816038e-05, + "loss": 0.5312, + "step": 5357 + }, + { + "epoch": 0.1471169686985173, + "grad_norm": 0.37295761704444885, + "learning_rate": 1.9743355093404215e-05, + "loss": 0.5906, + "step": 5358 + }, + { + "epoch": 0.1471444261394838, + "grad_norm": 0.32657378911972046, + "learning_rate": 1.974325786481793e-05, + "loss": 0.4269, + "step": 5359 + }, + { + "epoch": 0.1471718835804503, + "grad_norm": 0.3780403137207031, + "learning_rate": 1.9743160618057355e-05, + "loss": 0.4779, + "step": 5360 + }, + { + "epoch": 0.1471993410214168, + "grad_norm": 0.3739623427391052, + "learning_rate": 1.974306335312268e-05, + "loss": 0.493, + "step": 5361 + }, + { + "epoch": 0.14722679846238332, + "grad_norm": 0.3955196142196655, + "learning_rate": 1.9742966070014077e-05, + "loss": 0.5096, + "step": 5362 + }, + { + "epoch": 0.1472542559033498, + "grad_norm": 0.36828434467315674, + "learning_rate": 1.9742868768731737e-05, + "loss": 0.5018, + "step": 5363 + }, + { + "epoch": 0.1472817133443163, + "grad_norm": 0.3341670632362366, + "learning_rate": 1.9742771449275837e-05, + "loss": 0.5224, + "step": 5364 + }, + { + "epoch": 0.1473091707852828, + "grad_norm": 0.3714386522769928, + "learning_rate": 1.9742674111646554e-05, + "loss": 0.5212, + "step": 5365 + }, + { + "epoch": 0.14733662822624932, + "grad_norm": 0.37426215410232544, + "learning_rate": 1.9742576755844078e-05, + "loss": 0.4912, + "step": 5366 + }, + { + "epoch": 0.14736408566721582, + "grad_norm": 0.34451964497566223, + "learning_rate": 1.9742479381868587e-05, + "loss": 0.5936, + "step": 5367 + }, + { + "epoch": 0.1473915431081823, + "grad_norm": 0.3622000813484192, + "learning_rate": 1.9742381989720262e-05, + "loss": 0.5603, + "step": 5368 + }, + { + "epoch": 0.14741900054914883, + "grad_norm": 0.4219096899032593, + "learning_rate": 1.9742284579399286e-05, + "loss": 0.5926, + "step": 5369 + }, + { + "epoch": 0.14744645799011533, + "grad_norm": 0.32407671213150024, + "learning_rate": 1.974218715090584e-05, + "loss": 0.4682, + "step": 5370 + }, + { + "epoch": 0.14747391543108182, + "grad_norm": 0.3334480822086334, + "learning_rate": 1.9742089704240104e-05, + "loss": 0.4844, + "step": 5371 + }, + { + "epoch": 0.14750137287204831, + "grad_norm": 0.37841692566871643, + "learning_rate": 1.9741992239402265e-05, + "loss": 0.5434, + "step": 5372 + }, + { + "epoch": 0.14752883031301484, + "grad_norm": 0.3657386898994446, + "learning_rate": 1.9741894756392495e-05, + "loss": 0.4558, + "step": 5373 + }, + { + "epoch": 0.14755628775398133, + "grad_norm": 0.39582133293151855, + "learning_rate": 1.974179725521099e-05, + "loss": 0.5888, + "step": 5374 + }, + { + "epoch": 0.14758374519494782, + "grad_norm": 0.3713113069534302, + "learning_rate": 1.9741699735857916e-05, + "loss": 0.5564, + "step": 5375 + }, + { + "epoch": 0.14761120263591435, + "grad_norm": 0.35794365406036377, + "learning_rate": 1.9741602198333466e-05, + "loss": 0.508, + "step": 5376 + }, + { + "epoch": 0.14763866007688084, + "grad_norm": 0.38769251108169556, + "learning_rate": 1.974150464263782e-05, + "loss": 0.6128, + "step": 5377 + }, + { + "epoch": 0.14766611751784733, + "grad_norm": 0.44010472297668457, + "learning_rate": 1.9741407068771157e-05, + "loss": 0.6198, + "step": 5378 + }, + { + "epoch": 0.14769357495881383, + "grad_norm": 0.35897526144981384, + "learning_rate": 1.974130947673366e-05, + "loss": 0.4612, + "step": 5379 + }, + { + "epoch": 0.14772103239978035, + "grad_norm": 0.46248939633369446, + "learning_rate": 1.9741211866525517e-05, + "loss": 0.5973, + "step": 5380 + }, + { + "epoch": 0.14774848984074684, + "grad_norm": 0.4053727686405182, + "learning_rate": 1.97411142381469e-05, + "loss": 0.5631, + "step": 5381 + }, + { + "epoch": 0.14777594728171334, + "grad_norm": 0.3468163311481476, + "learning_rate": 1.9741016591597995e-05, + "loss": 0.5992, + "step": 5382 + }, + { + "epoch": 0.14780340472267986, + "grad_norm": 0.3356316387653351, + "learning_rate": 1.9740918926878988e-05, + "loss": 0.4298, + "step": 5383 + }, + { + "epoch": 0.14783086216364635, + "grad_norm": 0.3818214535713196, + "learning_rate": 1.9740821243990056e-05, + "loss": 0.6327, + "step": 5384 + }, + { + "epoch": 0.14785831960461285, + "grad_norm": 0.37206265330314636, + "learning_rate": 1.9740723542931387e-05, + "loss": 0.5447, + "step": 5385 + }, + { + "epoch": 0.14788577704557934, + "grad_norm": 0.3761729598045349, + "learning_rate": 1.9740625823703156e-05, + "loss": 0.552, + "step": 5386 + }, + { + "epoch": 0.14791323448654586, + "grad_norm": 0.32449936866760254, + "learning_rate": 1.974052808630555e-05, + "loss": 0.4601, + "step": 5387 + }, + { + "epoch": 0.14794069192751236, + "grad_norm": 0.4250233471393585, + "learning_rate": 1.974043033073875e-05, + "loss": 0.5163, + "step": 5388 + }, + { + "epoch": 0.14796814936847885, + "grad_norm": 0.3570390045642853, + "learning_rate": 1.974033255700294e-05, + "loss": 0.5659, + "step": 5389 + }, + { + "epoch": 0.14799560680944535, + "grad_norm": 0.3714039623737335, + "learning_rate": 1.97402347650983e-05, + "loss": 0.6185, + "step": 5390 + }, + { + "epoch": 0.14802306425041187, + "grad_norm": 0.4664088487625122, + "learning_rate": 1.9740136955025012e-05, + "loss": 0.5648, + "step": 5391 + }, + { + "epoch": 0.14805052169137836, + "grad_norm": 0.3889542520046234, + "learning_rate": 1.9740039126783262e-05, + "loss": 0.5461, + "step": 5392 + }, + { + "epoch": 0.14807797913234486, + "grad_norm": 0.39194533228874207, + "learning_rate": 1.973994128037323e-05, + "loss": 0.5357, + "step": 5393 + }, + { + "epoch": 0.14810543657331138, + "grad_norm": 0.35966774821281433, + "learning_rate": 1.9739843415795102e-05, + "loss": 0.5134, + "step": 5394 + }, + { + "epoch": 0.14813289401427787, + "grad_norm": 0.43552324175834656, + "learning_rate": 1.973974553304905e-05, + "loss": 0.6745, + "step": 5395 + }, + { + "epoch": 0.14816035145524437, + "grad_norm": 0.4363223910331726, + "learning_rate": 1.9739647632135273e-05, + "loss": 0.4888, + "step": 5396 + }, + { + "epoch": 0.14818780889621086, + "grad_norm": 0.3487359881401062, + "learning_rate": 1.973954971305394e-05, + "loss": 0.4945, + "step": 5397 + }, + { + "epoch": 0.14821526633717738, + "grad_norm": 0.3342610001564026, + "learning_rate": 1.9739451775805238e-05, + "loss": 0.4479, + "step": 5398 + }, + { + "epoch": 0.14824272377814388, + "grad_norm": 0.3421074450016022, + "learning_rate": 1.9739353820389347e-05, + "loss": 0.518, + "step": 5399 + }, + { + "epoch": 0.14827018121911037, + "grad_norm": 0.4065733253955841, + "learning_rate": 1.9739255846806457e-05, + "loss": 0.5219, + "step": 5400 + }, + { + "epoch": 0.1482976386600769, + "grad_norm": 0.3942786157131195, + "learning_rate": 1.9739157855056746e-05, + "loss": 0.5276, + "step": 5401 + }, + { + "epoch": 0.14832509610104339, + "grad_norm": 0.429015189409256, + "learning_rate": 1.9739059845140394e-05, + "loss": 0.5998, + "step": 5402 + }, + { + "epoch": 0.14835255354200988, + "grad_norm": 0.4054042398929596, + "learning_rate": 1.9738961817057595e-05, + "loss": 0.6032, + "step": 5403 + }, + { + "epoch": 0.14838001098297637, + "grad_norm": 0.3672468364238739, + "learning_rate": 1.9738863770808517e-05, + "loss": 0.5609, + "step": 5404 + }, + { + "epoch": 0.1484074684239429, + "grad_norm": 0.36872121691703796, + "learning_rate": 1.973876570639335e-05, + "loss": 0.5642, + "step": 5405 + }, + { + "epoch": 0.1484349258649094, + "grad_norm": 0.4199083149433136, + "learning_rate": 1.973866762381228e-05, + "loss": 0.5518, + "step": 5406 + }, + { + "epoch": 0.14846238330587588, + "grad_norm": 0.3219093680381775, + "learning_rate": 1.9738569523065484e-05, + "loss": 0.466, + "step": 5407 + }, + { + "epoch": 0.1484898407468424, + "grad_norm": 0.40219223499298096, + "learning_rate": 1.9738471404153146e-05, + "loss": 0.5625, + "step": 5408 + }, + { + "epoch": 0.1485172981878089, + "grad_norm": 0.34217825531959534, + "learning_rate": 1.9738373267075455e-05, + "loss": 0.4688, + "step": 5409 + }, + { + "epoch": 0.1485447556287754, + "grad_norm": 0.4533117413520813, + "learning_rate": 1.9738275111832586e-05, + "loss": 0.5593, + "step": 5410 + }, + { + "epoch": 0.1485722130697419, + "grad_norm": 0.4782189428806305, + "learning_rate": 1.973817693842473e-05, + "loss": 0.6073, + "step": 5411 + }, + { + "epoch": 0.1485996705107084, + "grad_norm": 0.4053649306297302, + "learning_rate": 1.9738078746852063e-05, + "loss": 0.5427, + "step": 5412 + }, + { + "epoch": 0.1486271279516749, + "grad_norm": 0.3140270411968231, + "learning_rate": 1.973798053711477e-05, + "loss": 0.4673, + "step": 5413 + }, + { + "epoch": 0.1486545853926414, + "grad_norm": 0.4638076722621918, + "learning_rate": 1.973788230921304e-05, + "loss": 0.5994, + "step": 5414 + }, + { + "epoch": 0.14868204283360792, + "grad_norm": 0.41284942626953125, + "learning_rate": 1.9737784063147047e-05, + "loss": 0.5398, + "step": 5415 + }, + { + "epoch": 0.1487095002745744, + "grad_norm": 0.4148675501346588, + "learning_rate": 1.973768579891698e-05, + "loss": 0.6447, + "step": 5416 + }, + { + "epoch": 0.1487369577155409, + "grad_norm": 0.3828205168247223, + "learning_rate": 1.973758751652302e-05, + "loss": 0.6837, + "step": 5417 + }, + { + "epoch": 0.1487644151565074, + "grad_norm": 0.32863932847976685, + "learning_rate": 1.9737489215965353e-05, + "loss": 0.5437, + "step": 5418 + }, + { + "epoch": 0.14879187259747392, + "grad_norm": 0.35620376467704773, + "learning_rate": 1.973739089724416e-05, + "loss": 0.6147, + "step": 5419 + }, + { + "epoch": 0.14881933003844042, + "grad_norm": 0.4132196009159088, + "learning_rate": 1.9737292560359626e-05, + "loss": 0.5861, + "step": 5420 + }, + { + "epoch": 0.1488467874794069, + "grad_norm": 0.33130204677581787, + "learning_rate": 1.9737194205311935e-05, + "loss": 0.515, + "step": 5421 + }, + { + "epoch": 0.14887424492037343, + "grad_norm": 0.4014488160610199, + "learning_rate": 1.9737095832101265e-05, + "loss": 0.6329, + "step": 5422 + }, + { + "epoch": 0.14890170236133993, + "grad_norm": 0.3727554976940155, + "learning_rate": 1.9736997440727806e-05, + "loss": 0.5733, + "step": 5423 + }, + { + "epoch": 0.14892915980230642, + "grad_norm": 0.47951358556747437, + "learning_rate": 1.9736899031191742e-05, + "loss": 0.5966, + "step": 5424 + }, + { + "epoch": 0.14895661724327292, + "grad_norm": 0.3686008155345917, + "learning_rate": 1.973680060349325e-05, + "loss": 0.5355, + "step": 5425 + }, + { + "epoch": 0.14898407468423944, + "grad_norm": 0.321787029504776, + "learning_rate": 1.9736702157632515e-05, + "loss": 0.5372, + "step": 5426 + }, + { + "epoch": 0.14901153212520593, + "grad_norm": 0.44427403807640076, + "learning_rate": 1.973660369360973e-05, + "loss": 0.5212, + "step": 5427 + }, + { + "epoch": 0.14903898956617243, + "grad_norm": 0.33534571528434753, + "learning_rate": 1.973650521142506e-05, + "loss": 0.5026, + "step": 5428 + }, + { + "epoch": 0.14906644700713895, + "grad_norm": 0.37985658645629883, + "learning_rate": 1.9736406711078714e-05, + "loss": 0.4968, + "step": 5429 + }, + { + "epoch": 0.14909390444810544, + "grad_norm": 0.48362603783607483, + "learning_rate": 1.9736308192570855e-05, + "loss": 0.4581, + "step": 5430 + }, + { + "epoch": 0.14912136188907194, + "grad_norm": 0.40638208389282227, + "learning_rate": 1.973620965590167e-05, + "loss": 0.6695, + "step": 5431 + }, + { + "epoch": 0.14914881933003843, + "grad_norm": 0.34846746921539307, + "learning_rate": 1.9736111101071355e-05, + "loss": 0.5015, + "step": 5432 + }, + { + "epoch": 0.14917627677100495, + "grad_norm": 0.3912896513938904, + "learning_rate": 1.973601252808008e-05, + "loss": 0.5661, + "step": 5433 + }, + { + "epoch": 0.14920373421197144, + "grad_norm": 0.379562646150589, + "learning_rate": 1.9735913936928036e-05, + "loss": 0.5895, + "step": 5434 + }, + { + "epoch": 0.14923119165293794, + "grad_norm": 0.35137906670570374, + "learning_rate": 1.9735815327615403e-05, + "loss": 0.5226, + "step": 5435 + }, + { + "epoch": 0.14925864909390446, + "grad_norm": 0.31851595640182495, + "learning_rate": 1.973571670014237e-05, + "loss": 0.5527, + "step": 5436 + }, + { + "epoch": 0.14928610653487095, + "grad_norm": 0.3791787028312683, + "learning_rate": 1.9735618054509115e-05, + "loss": 0.5498, + "step": 5437 + }, + { + "epoch": 0.14931356397583745, + "grad_norm": 0.3747737407684326, + "learning_rate": 1.973551939071583e-05, + "loss": 0.6288, + "step": 5438 + }, + { + "epoch": 0.14934102141680394, + "grad_norm": 0.37143221497535706, + "learning_rate": 1.9735420708762688e-05, + "loss": 0.5986, + "step": 5439 + }, + { + "epoch": 0.14936847885777046, + "grad_norm": 0.3512117564678192, + "learning_rate": 1.9735322008649884e-05, + "loss": 0.577, + "step": 5440 + }, + { + "epoch": 0.14939593629873696, + "grad_norm": 0.37085437774658203, + "learning_rate": 1.9735223290377595e-05, + "loss": 0.5922, + "step": 5441 + }, + { + "epoch": 0.14942339373970345, + "grad_norm": 0.3812800645828247, + "learning_rate": 1.9735124553946008e-05, + "loss": 0.5298, + "step": 5442 + }, + { + "epoch": 0.14945085118066997, + "grad_norm": 0.39554622769355774, + "learning_rate": 1.9735025799355307e-05, + "loss": 0.5809, + "step": 5443 + }, + { + "epoch": 0.14947830862163647, + "grad_norm": 0.36180579662323, + "learning_rate": 1.9734927026605674e-05, + "loss": 0.4868, + "step": 5444 + }, + { + "epoch": 0.14950576606260296, + "grad_norm": 0.36902672052383423, + "learning_rate": 1.9734828235697295e-05, + "loss": 0.5551, + "step": 5445 + }, + { + "epoch": 0.14953322350356946, + "grad_norm": 0.3490082621574402, + "learning_rate": 1.973472942663036e-05, + "loss": 0.6011, + "step": 5446 + }, + { + "epoch": 0.14956068094453598, + "grad_norm": 0.3847252428531647, + "learning_rate": 1.973463059940504e-05, + "loss": 0.5294, + "step": 5447 + }, + { + "epoch": 0.14958813838550247, + "grad_norm": 0.38596588373184204, + "learning_rate": 1.973453175402153e-05, + "loss": 0.5368, + "step": 5448 + }, + { + "epoch": 0.14961559582646897, + "grad_norm": 0.40037351846694946, + "learning_rate": 1.9734432890480014e-05, + "loss": 0.5508, + "step": 5449 + }, + { + "epoch": 0.1496430532674355, + "grad_norm": 0.5406760573387146, + "learning_rate": 1.973433400878067e-05, + "loss": 0.5212, + "step": 5450 + }, + { + "epoch": 0.14967051070840198, + "grad_norm": 0.3883829116821289, + "learning_rate": 1.973423510892369e-05, + "loss": 0.6091, + "step": 5451 + }, + { + "epoch": 0.14969796814936848, + "grad_norm": 0.35628417134284973, + "learning_rate": 1.9734136190909252e-05, + "loss": 0.5195, + "step": 5452 + }, + { + "epoch": 0.14972542559033497, + "grad_norm": 0.3494766354560852, + "learning_rate": 1.9734037254737545e-05, + "loss": 0.5857, + "step": 5453 + }, + { + "epoch": 0.1497528830313015, + "grad_norm": 0.4444551169872284, + "learning_rate": 1.9733938300408747e-05, + "loss": 0.5893, + "step": 5454 + }, + { + "epoch": 0.149780340472268, + "grad_norm": 0.40822646021842957, + "learning_rate": 1.9733839327923053e-05, + "loss": 0.5272, + "step": 5455 + }, + { + "epoch": 0.14980779791323448, + "grad_norm": 0.3623168468475342, + "learning_rate": 1.973374033728064e-05, + "loss": 0.5543, + "step": 5456 + }, + { + "epoch": 0.14983525535420097, + "grad_norm": 0.40058866143226624, + "learning_rate": 1.9733641328481693e-05, + "loss": 0.4685, + "step": 5457 + }, + { + "epoch": 0.1498627127951675, + "grad_norm": 0.3532308340072632, + "learning_rate": 1.97335423015264e-05, + "loss": 0.5587, + "step": 5458 + }, + { + "epoch": 0.149890170236134, + "grad_norm": 0.3472833037376404, + "learning_rate": 1.9733443256414943e-05, + "loss": 0.4933, + "step": 5459 + }, + { + "epoch": 0.14991762767710048, + "grad_norm": 0.32734501361846924, + "learning_rate": 1.973334419314751e-05, + "loss": 0.5674, + "step": 5460 + }, + { + "epoch": 0.149945085118067, + "grad_norm": 0.3325420320034027, + "learning_rate": 1.9733245111724282e-05, + "loss": 0.4844, + "step": 5461 + }, + { + "epoch": 0.1499725425590335, + "grad_norm": 0.35602131485939026, + "learning_rate": 1.9733146012145445e-05, + "loss": 0.5324, + "step": 5462 + }, + { + "epoch": 0.15, + "grad_norm": 0.38359367847442627, + "learning_rate": 1.9733046894411184e-05, + "loss": 0.5913, + "step": 5463 + }, + { + "epoch": 0.1500274574409665, + "grad_norm": 0.3943618834018707, + "learning_rate": 1.9732947758521684e-05, + "loss": 0.5638, + "step": 5464 + }, + { + "epoch": 0.150054914881933, + "grad_norm": 0.3892349600791931, + "learning_rate": 1.9732848604477134e-05, + "loss": 0.5858, + "step": 5465 + }, + { + "epoch": 0.1500823723228995, + "grad_norm": 0.38026872277259827, + "learning_rate": 1.973274943227771e-05, + "loss": 0.5516, + "step": 5466 + }, + { + "epoch": 0.150109829763866, + "grad_norm": 0.361870139837265, + "learning_rate": 1.9732650241923602e-05, + "loss": 0.5461, + "step": 5467 + }, + { + "epoch": 0.15013728720483252, + "grad_norm": 0.3927292227745056, + "learning_rate": 1.9732551033415e-05, + "loss": 0.5586, + "step": 5468 + }, + { + "epoch": 0.15016474464579901, + "grad_norm": 0.4120394289493561, + "learning_rate": 1.973245180675208e-05, + "loss": 0.597, + "step": 5469 + }, + { + "epoch": 0.1501922020867655, + "grad_norm": 0.357098251581192, + "learning_rate": 1.9732352561935033e-05, + "loss": 0.5158, + "step": 5470 + }, + { + "epoch": 0.150219659527732, + "grad_norm": 0.36201396584510803, + "learning_rate": 1.973225329896404e-05, + "loss": 0.5449, + "step": 5471 + }, + { + "epoch": 0.15024711696869852, + "grad_norm": 0.3254477381706238, + "learning_rate": 1.973215401783929e-05, + "loss": 0.436, + "step": 5472 + }, + { + "epoch": 0.15027457440966502, + "grad_norm": 0.35944077372550964, + "learning_rate": 1.9732054718560968e-05, + "loss": 0.5849, + "step": 5473 + }, + { + "epoch": 0.1503020318506315, + "grad_norm": 0.3625255525112152, + "learning_rate": 1.9731955401129254e-05, + "loss": 0.5405, + "step": 5474 + }, + { + "epoch": 0.15032948929159803, + "grad_norm": 0.3702065646648407, + "learning_rate": 1.973185606554434e-05, + "loss": 0.5636, + "step": 5475 + }, + { + "epoch": 0.15035694673256453, + "grad_norm": 0.33432573080062866, + "learning_rate": 1.973175671180641e-05, + "loss": 0.4879, + "step": 5476 + }, + { + "epoch": 0.15038440417353102, + "grad_norm": 0.37046605348587036, + "learning_rate": 1.9731657339915645e-05, + "loss": 0.5376, + "step": 5477 + }, + { + "epoch": 0.15041186161449752, + "grad_norm": 0.41332754492759705, + "learning_rate": 1.9731557949872232e-05, + "loss": 0.5696, + "step": 5478 + }, + { + "epoch": 0.15043931905546404, + "grad_norm": 0.3858721852302551, + "learning_rate": 1.973145854167636e-05, + "loss": 0.5648, + "step": 5479 + }, + { + "epoch": 0.15046677649643053, + "grad_norm": 0.3690875470638275, + "learning_rate": 1.973135911532821e-05, + "loss": 0.4709, + "step": 5480 + }, + { + "epoch": 0.15049423393739703, + "grad_norm": 0.35418081283569336, + "learning_rate": 1.9731259670827973e-05, + "loss": 0.6074, + "step": 5481 + }, + { + "epoch": 0.15052169137836355, + "grad_norm": 0.3447156250476837, + "learning_rate": 1.9731160208175825e-05, + "loss": 0.4247, + "step": 5482 + }, + { + "epoch": 0.15054914881933004, + "grad_norm": 0.3723205626010895, + "learning_rate": 1.973106072737196e-05, + "loss": 0.6548, + "step": 5483 + }, + { + "epoch": 0.15057660626029654, + "grad_norm": 0.4221436083316803, + "learning_rate": 1.973096122841656e-05, + "loss": 0.5544, + "step": 5484 + }, + { + "epoch": 0.15060406370126303, + "grad_norm": 0.3566027581691742, + "learning_rate": 1.9730861711309813e-05, + "loss": 0.5151, + "step": 5485 + }, + { + "epoch": 0.15063152114222955, + "grad_norm": 0.38376685976982117, + "learning_rate": 1.9730762176051902e-05, + "loss": 0.5585, + "step": 5486 + }, + { + "epoch": 0.15065897858319605, + "grad_norm": 0.3500271439552307, + "learning_rate": 1.9730662622643017e-05, + "loss": 0.563, + "step": 5487 + }, + { + "epoch": 0.15068643602416254, + "grad_norm": 0.38316860795021057, + "learning_rate": 1.973056305108334e-05, + "loss": 0.5512, + "step": 5488 + }, + { + "epoch": 0.15071389346512906, + "grad_norm": 0.32794636487960815, + "learning_rate": 1.9730463461373055e-05, + "loss": 0.5247, + "step": 5489 + }, + { + "epoch": 0.15074135090609556, + "grad_norm": 0.3793143928050995, + "learning_rate": 1.973036385351235e-05, + "loss": 0.5294, + "step": 5490 + }, + { + "epoch": 0.15076880834706205, + "grad_norm": 0.37142741680145264, + "learning_rate": 1.9730264227501413e-05, + "loss": 0.5825, + "step": 5491 + }, + { + "epoch": 0.15079626578802854, + "grad_norm": 0.3818708062171936, + "learning_rate": 1.9730164583340425e-05, + "loss": 0.5395, + "step": 5492 + }, + { + "epoch": 0.15082372322899507, + "grad_norm": 0.3467860817909241, + "learning_rate": 1.9730064921029577e-05, + "loss": 0.5504, + "step": 5493 + }, + { + "epoch": 0.15085118066996156, + "grad_norm": 0.37573227286338806, + "learning_rate": 1.972996524056905e-05, + "loss": 0.5533, + "step": 5494 + }, + { + "epoch": 0.15087863811092805, + "grad_norm": 0.41943517327308655, + "learning_rate": 1.9729865541959035e-05, + "loss": 0.6091, + "step": 5495 + }, + { + "epoch": 0.15090609555189458, + "grad_norm": 0.3596145510673523, + "learning_rate": 1.9729765825199714e-05, + "loss": 0.5461, + "step": 5496 + }, + { + "epoch": 0.15093355299286107, + "grad_norm": 0.44140323996543884, + "learning_rate": 1.972966609029127e-05, + "loss": 0.5648, + "step": 5497 + }, + { + "epoch": 0.15096101043382756, + "grad_norm": 0.36005473136901855, + "learning_rate": 1.97295663372339e-05, + "loss": 0.5603, + "step": 5498 + }, + { + "epoch": 0.15098846787479406, + "grad_norm": 0.3879510462284088, + "learning_rate": 1.9729466566027783e-05, + "loss": 0.6081, + "step": 5499 + }, + { + "epoch": 0.15101592531576058, + "grad_norm": 0.3433636724948883, + "learning_rate": 1.9729366776673103e-05, + "loss": 0.5526, + "step": 5500 + }, + { + "epoch": 0.15104338275672707, + "grad_norm": 0.34557685256004333, + "learning_rate": 1.9729266969170048e-05, + "loss": 0.5649, + "step": 5501 + }, + { + "epoch": 0.15107084019769357, + "grad_norm": 0.32958993315696716, + "learning_rate": 1.972916714351881e-05, + "loss": 0.4302, + "step": 5502 + }, + { + "epoch": 0.1510982976386601, + "grad_norm": 0.34973689913749695, + "learning_rate": 1.9729067299719563e-05, + "loss": 0.5256, + "step": 5503 + }, + { + "epoch": 0.15112575507962658, + "grad_norm": 0.3709667921066284, + "learning_rate": 1.9728967437772506e-05, + "loss": 0.5818, + "step": 5504 + }, + { + "epoch": 0.15115321252059308, + "grad_norm": 0.3390612006187439, + "learning_rate": 1.972886755767782e-05, + "loss": 0.5358, + "step": 5505 + }, + { + "epoch": 0.15118066996155957, + "grad_norm": 0.3532524108886719, + "learning_rate": 1.9728767659435685e-05, + "loss": 0.5602, + "step": 5506 + }, + { + "epoch": 0.1512081274025261, + "grad_norm": 0.3373757302761078, + "learning_rate": 1.9728667743046296e-05, + "loss": 0.4957, + "step": 5507 + }, + { + "epoch": 0.1512355848434926, + "grad_norm": 0.47588032484054565, + "learning_rate": 1.9728567808509837e-05, + "loss": 0.53, + "step": 5508 + }, + { + "epoch": 0.15126304228445908, + "grad_norm": 0.38193178176879883, + "learning_rate": 1.9728467855826497e-05, + "loss": 0.6181, + "step": 5509 + }, + { + "epoch": 0.1512904997254256, + "grad_norm": 0.3587850034236908, + "learning_rate": 1.9728367884996454e-05, + "loss": 0.5358, + "step": 5510 + }, + { + "epoch": 0.1513179571663921, + "grad_norm": 0.37715083360671997, + "learning_rate": 1.9728267896019907e-05, + "loss": 0.5014, + "step": 5511 + }, + { + "epoch": 0.1513454146073586, + "grad_norm": 0.32873043417930603, + "learning_rate": 1.972816788889703e-05, + "loss": 0.5294, + "step": 5512 + }, + { + "epoch": 0.15137287204832509, + "grad_norm": 0.3735576868057251, + "learning_rate": 1.9728067863628015e-05, + "loss": 0.504, + "step": 5513 + }, + { + "epoch": 0.1514003294892916, + "grad_norm": 0.3857606053352356, + "learning_rate": 1.972796782021305e-05, + "loss": 0.6554, + "step": 5514 + }, + { + "epoch": 0.1514277869302581, + "grad_norm": 0.38550594449043274, + "learning_rate": 1.9727867758652318e-05, + "loss": 0.454, + "step": 5515 + }, + { + "epoch": 0.1514552443712246, + "grad_norm": 0.34088799357414246, + "learning_rate": 1.972776767894601e-05, + "loss": 0.4998, + "step": 5516 + }, + { + "epoch": 0.15148270181219112, + "grad_norm": 0.37846365571022034, + "learning_rate": 1.9727667581094313e-05, + "loss": 0.5778, + "step": 5517 + }, + { + "epoch": 0.1515101592531576, + "grad_norm": 0.38862621784210205, + "learning_rate": 1.972756746509741e-05, + "loss": 0.619, + "step": 5518 + }, + { + "epoch": 0.1515376166941241, + "grad_norm": 0.3352360129356384, + "learning_rate": 1.9727467330955485e-05, + "loss": 0.5275, + "step": 5519 + }, + { + "epoch": 0.1515650741350906, + "grad_norm": 0.35910022258758545, + "learning_rate": 1.9727367178668732e-05, + "loss": 0.566, + "step": 5520 + }, + { + "epoch": 0.15159253157605712, + "grad_norm": 0.38554370403289795, + "learning_rate": 1.9727267008237334e-05, + "loss": 0.5136, + "step": 5521 + }, + { + "epoch": 0.15161998901702362, + "grad_norm": 0.343098908662796, + "learning_rate": 1.9727166819661477e-05, + "loss": 0.5412, + "step": 5522 + }, + { + "epoch": 0.1516474464579901, + "grad_norm": 0.37904587388038635, + "learning_rate": 1.9727066612941352e-05, + "loss": 0.5571, + "step": 5523 + }, + { + "epoch": 0.1516749038989566, + "grad_norm": 0.3893482983112335, + "learning_rate": 1.9726966388077143e-05, + "loss": 0.5691, + "step": 5524 + }, + { + "epoch": 0.15170236133992313, + "grad_norm": 0.4430007040500641, + "learning_rate": 1.9726866145069035e-05, + "loss": 0.6242, + "step": 5525 + }, + { + "epoch": 0.15172981878088962, + "grad_norm": 0.30272871255874634, + "learning_rate": 1.9726765883917217e-05, + "loss": 0.4733, + "step": 5526 + }, + { + "epoch": 0.1517572762218561, + "grad_norm": 0.3406831920146942, + "learning_rate": 1.972666560462188e-05, + "loss": 0.4935, + "step": 5527 + }, + { + "epoch": 0.15178473366282264, + "grad_norm": 0.3792722523212433, + "learning_rate": 1.9726565307183203e-05, + "loss": 0.5832, + "step": 5528 + }, + { + "epoch": 0.15181219110378913, + "grad_norm": 0.3837742209434509, + "learning_rate": 1.972646499160138e-05, + "loss": 0.5561, + "step": 5529 + }, + { + "epoch": 0.15183964854475562, + "grad_norm": 0.3891392648220062, + "learning_rate": 1.972636465787659e-05, + "loss": 0.5031, + "step": 5530 + }, + { + "epoch": 0.15186710598572212, + "grad_norm": 0.4031456410884857, + "learning_rate": 1.9726264306009036e-05, + "loss": 0.541, + "step": 5531 + }, + { + "epoch": 0.15189456342668864, + "grad_norm": 0.33621543645858765, + "learning_rate": 1.9726163935998888e-05, + "loss": 0.5663, + "step": 5532 + }, + { + "epoch": 0.15192202086765513, + "grad_norm": 0.36366885900497437, + "learning_rate": 1.972606354784634e-05, + "loss": 0.6321, + "step": 5533 + }, + { + "epoch": 0.15194947830862163, + "grad_norm": 0.3497453033924103, + "learning_rate": 1.972596314155158e-05, + "loss": 0.5924, + "step": 5534 + }, + { + "epoch": 0.15197693574958815, + "grad_norm": 0.3571999669075012, + "learning_rate": 1.9725862717114796e-05, + "loss": 0.5954, + "step": 5535 + }, + { + "epoch": 0.15200439319055464, + "grad_norm": 0.36435437202453613, + "learning_rate": 1.9725762274536175e-05, + "loss": 0.5434, + "step": 5536 + }, + { + "epoch": 0.15203185063152114, + "grad_norm": 0.3968261480331421, + "learning_rate": 1.97256618138159e-05, + "loss": 0.5937, + "step": 5537 + }, + { + "epoch": 0.15205930807248763, + "grad_norm": 0.3839634954929352, + "learning_rate": 1.972556133495416e-05, + "loss": 0.5538, + "step": 5538 + }, + { + "epoch": 0.15208676551345415, + "grad_norm": 0.6172838807106018, + "learning_rate": 1.9725460837951148e-05, + "loss": 0.5342, + "step": 5539 + }, + { + "epoch": 0.15211422295442065, + "grad_norm": 0.3880188465118408, + "learning_rate": 1.9725360322807048e-05, + "loss": 0.53, + "step": 5540 + }, + { + "epoch": 0.15214168039538714, + "grad_norm": 0.40512752532958984, + "learning_rate": 1.9725259789522043e-05, + "loss": 0.5645, + "step": 5541 + }, + { + "epoch": 0.15216913783635366, + "grad_norm": 0.3824261426925659, + "learning_rate": 1.972515923809633e-05, + "loss": 0.5065, + "step": 5542 + }, + { + "epoch": 0.15219659527732016, + "grad_norm": 0.36385175585746765, + "learning_rate": 1.9725058668530085e-05, + "loss": 0.5621, + "step": 5543 + }, + { + "epoch": 0.15222405271828665, + "grad_norm": 0.3566296100616455, + "learning_rate": 1.9724958080823508e-05, + "loss": 0.5892, + "step": 5544 + }, + { + "epoch": 0.15225151015925315, + "grad_norm": 0.41889527440071106, + "learning_rate": 1.9724857474976775e-05, + "loss": 0.5937, + "step": 5545 + }, + { + "epoch": 0.15227896760021967, + "grad_norm": 0.3974285125732422, + "learning_rate": 1.972475685099008e-05, + "loss": 0.5296, + "step": 5546 + }, + { + "epoch": 0.15230642504118616, + "grad_norm": 0.3210245370864868, + "learning_rate": 1.9724656208863614e-05, + "loss": 0.5419, + "step": 5547 + }, + { + "epoch": 0.15233388248215265, + "grad_norm": 0.3521832525730133, + "learning_rate": 1.9724555548597556e-05, + "loss": 0.5812, + "step": 5548 + }, + { + "epoch": 0.15236133992311918, + "grad_norm": 0.3567545413970947, + "learning_rate": 1.97244548701921e-05, + "loss": 0.5357, + "step": 5549 + }, + { + "epoch": 0.15238879736408567, + "grad_norm": 0.34361639618873596, + "learning_rate": 1.972435417364743e-05, + "loss": 0.5095, + "step": 5550 + }, + { + "epoch": 0.15241625480505216, + "grad_norm": 0.3825651705265045, + "learning_rate": 1.9724253458963737e-05, + "loss": 0.5615, + "step": 5551 + }, + { + "epoch": 0.15244371224601866, + "grad_norm": 0.3785964548587799, + "learning_rate": 1.972415272614121e-05, + "loss": 0.5819, + "step": 5552 + }, + { + "epoch": 0.15247116968698518, + "grad_norm": 0.39576196670532227, + "learning_rate": 1.9724051975180032e-05, + "loss": 0.6905, + "step": 5553 + }, + { + "epoch": 0.15249862712795167, + "grad_norm": 0.42916107177734375, + "learning_rate": 1.9723951206080394e-05, + "loss": 0.607, + "step": 5554 + }, + { + "epoch": 0.15252608456891817, + "grad_norm": 0.3302153944969177, + "learning_rate": 1.9723850418842484e-05, + "loss": 0.4805, + "step": 5555 + }, + { + "epoch": 0.1525535420098847, + "grad_norm": 0.3506641983985901, + "learning_rate": 1.9723749613466488e-05, + "loss": 0.5067, + "step": 5556 + }, + { + "epoch": 0.15258099945085118, + "grad_norm": 0.4178961515426636, + "learning_rate": 1.9723648789952596e-05, + "loss": 0.5738, + "step": 5557 + }, + { + "epoch": 0.15260845689181768, + "grad_norm": 0.36456209421157837, + "learning_rate": 1.9723547948300995e-05, + "loss": 0.6381, + "step": 5558 + }, + { + "epoch": 0.15263591433278417, + "grad_norm": 0.34125974774360657, + "learning_rate": 1.9723447088511877e-05, + "loss": 0.4905, + "step": 5559 + }, + { + "epoch": 0.1526633717737507, + "grad_norm": 0.41266191005706787, + "learning_rate": 1.9723346210585422e-05, + "loss": 0.5414, + "step": 5560 + }, + { + "epoch": 0.1526908292147172, + "grad_norm": 0.38236403465270996, + "learning_rate": 1.9723245314521827e-05, + "loss": 0.557, + "step": 5561 + }, + { + "epoch": 0.15271828665568368, + "grad_norm": 0.3475516140460968, + "learning_rate": 1.9723144400321274e-05, + "loss": 0.4895, + "step": 5562 + }, + { + "epoch": 0.1527457440966502, + "grad_norm": 0.38912731409072876, + "learning_rate": 1.9723043467983957e-05, + "loss": 0.5621, + "step": 5563 + }, + { + "epoch": 0.1527732015376167, + "grad_norm": 0.40371137857437134, + "learning_rate": 1.9722942517510055e-05, + "loss": 0.5787, + "step": 5564 + }, + { + "epoch": 0.1528006589785832, + "grad_norm": 0.3598966896533966, + "learning_rate": 1.9722841548899768e-05, + "loss": 0.6024, + "step": 5565 + }, + { + "epoch": 0.1528281164195497, + "grad_norm": 0.3343079090118408, + "learning_rate": 1.9722740562153273e-05, + "loss": 0.5271, + "step": 5566 + }, + { + "epoch": 0.1528555738605162, + "grad_norm": 0.376792848110199, + "learning_rate": 1.9722639557270765e-05, + "loss": 0.5958, + "step": 5567 + }, + { + "epoch": 0.1528830313014827, + "grad_norm": 0.3950939178466797, + "learning_rate": 1.9722538534252432e-05, + "loss": 0.5511, + "step": 5568 + }, + { + "epoch": 0.1529104887424492, + "grad_norm": 0.378921240568161, + "learning_rate": 1.9722437493098462e-05, + "loss": 0.6029, + "step": 5569 + }, + { + "epoch": 0.15293794618341572, + "grad_norm": 0.32411178946495056, + "learning_rate": 1.9722336433809043e-05, + "loss": 0.6283, + "step": 5570 + }, + { + "epoch": 0.1529654036243822, + "grad_norm": 0.3505442440509796, + "learning_rate": 1.9722235356384364e-05, + "loss": 0.4478, + "step": 5571 + }, + { + "epoch": 0.1529928610653487, + "grad_norm": 0.3823806047439575, + "learning_rate": 1.972213426082461e-05, + "loss": 0.5893, + "step": 5572 + }, + { + "epoch": 0.1530203185063152, + "grad_norm": 0.33084216713905334, + "learning_rate": 1.9722033147129974e-05, + "loss": 0.51, + "step": 5573 + }, + { + "epoch": 0.15304777594728172, + "grad_norm": 0.3912903964519501, + "learning_rate": 1.9721932015300647e-05, + "loss": 0.5463, + "step": 5574 + }, + { + "epoch": 0.15307523338824822, + "grad_norm": 0.35563844442367554, + "learning_rate": 1.972183086533681e-05, + "loss": 0.544, + "step": 5575 + }, + { + "epoch": 0.1531026908292147, + "grad_norm": 0.3753843903541565, + "learning_rate": 1.9721729697238658e-05, + "loss": 0.5364, + "step": 5576 + }, + { + "epoch": 0.15313014827018123, + "grad_norm": 0.3997265696525574, + "learning_rate": 1.9721628511006376e-05, + "loss": 0.6538, + "step": 5577 + }, + { + "epoch": 0.15315760571114773, + "grad_norm": 0.4251525402069092, + "learning_rate": 1.972152730664015e-05, + "loss": 0.5494, + "step": 5578 + }, + { + "epoch": 0.15318506315211422, + "grad_norm": 0.3555522561073303, + "learning_rate": 1.972142608414018e-05, + "loss": 0.5594, + "step": 5579 + }, + { + "epoch": 0.15321252059308071, + "grad_norm": 0.3251302242279053, + "learning_rate": 1.9721324843506643e-05, + "loss": 0.5197, + "step": 5580 + }, + { + "epoch": 0.15323997803404724, + "grad_norm": 0.41119250655174255, + "learning_rate": 1.9721223584739735e-05, + "loss": 0.5744, + "step": 5581 + }, + { + "epoch": 0.15326743547501373, + "grad_norm": 0.39105573296546936, + "learning_rate": 1.972112230783964e-05, + "loss": 0.5899, + "step": 5582 + }, + { + "epoch": 0.15329489291598022, + "grad_norm": 0.40060991048812866, + "learning_rate": 1.9721021012806553e-05, + "loss": 0.6122, + "step": 5583 + }, + { + "epoch": 0.15332235035694675, + "grad_norm": 0.3241768181324005, + "learning_rate": 1.9720919699640654e-05, + "loss": 0.5145, + "step": 5584 + }, + { + "epoch": 0.15334980779791324, + "grad_norm": 0.3501599133014679, + "learning_rate": 1.9720818368342142e-05, + "loss": 0.4828, + "step": 5585 + }, + { + "epoch": 0.15337726523887973, + "grad_norm": 0.32264870405197144, + "learning_rate": 1.97207170189112e-05, + "loss": 0.5323, + "step": 5586 + }, + { + "epoch": 0.15340472267984623, + "grad_norm": 0.38283437490463257, + "learning_rate": 1.9720615651348014e-05, + "loss": 0.5986, + "step": 5587 + }, + { + "epoch": 0.15343218012081275, + "grad_norm": 0.3666941523551941, + "learning_rate": 1.9720514265652785e-05, + "loss": 0.6173, + "step": 5588 + }, + { + "epoch": 0.15345963756177924, + "grad_norm": 0.8631507754325867, + "learning_rate": 1.9720412861825688e-05, + "loss": 0.5501, + "step": 5589 + }, + { + "epoch": 0.15348709500274574, + "grad_norm": 0.37115368247032166, + "learning_rate": 1.9720311439866918e-05, + "loss": 0.4932, + "step": 5590 + }, + { + "epoch": 0.15351455244371223, + "grad_norm": 0.3938896954059601, + "learning_rate": 1.972020999977667e-05, + "loss": 0.5539, + "step": 5591 + }, + { + "epoch": 0.15354200988467875, + "grad_norm": 0.9908981919288635, + "learning_rate": 1.9720108541555126e-05, + "loss": 0.5734, + "step": 5592 + }, + { + "epoch": 0.15356946732564525, + "grad_norm": 0.4437166750431061, + "learning_rate": 1.972000706520248e-05, + "loss": 0.4891, + "step": 5593 + }, + { + "epoch": 0.15359692476661174, + "grad_norm": 0.3536995053291321, + "learning_rate": 1.9719905570718912e-05, + "loss": 0.5118, + "step": 5594 + }, + { + "epoch": 0.15362438220757826, + "grad_norm": 0.31357476115226746, + "learning_rate": 1.9719804058104622e-05, + "loss": 0.529, + "step": 5595 + }, + { + "epoch": 0.15365183964854476, + "grad_norm": 0.3798641562461853, + "learning_rate": 1.9719702527359795e-05, + "loss": 0.5156, + "step": 5596 + }, + { + "epoch": 0.15367929708951125, + "grad_norm": 0.3632446825504303, + "learning_rate": 1.971960097848462e-05, + "loss": 0.4834, + "step": 5597 + }, + { + "epoch": 0.15370675453047775, + "grad_norm": 0.3052544891834259, + "learning_rate": 1.9719499411479285e-05, + "loss": 0.4785, + "step": 5598 + }, + { + "epoch": 0.15373421197144427, + "grad_norm": 0.363316148519516, + "learning_rate": 1.9719397826343986e-05, + "loss": 0.5161, + "step": 5599 + }, + { + "epoch": 0.15376166941241076, + "grad_norm": 0.3966797888278961, + "learning_rate": 1.9719296223078905e-05, + "loss": 0.5331, + "step": 5600 + }, + { + "epoch": 0.15378912685337726, + "grad_norm": 0.37271738052368164, + "learning_rate": 1.9719194601684236e-05, + "loss": 0.4774, + "step": 5601 + }, + { + "epoch": 0.15381658429434378, + "grad_norm": 0.36824944615364075, + "learning_rate": 1.9719092962160165e-05, + "loss": 0.5731, + "step": 5602 + }, + { + "epoch": 0.15384404173531027, + "grad_norm": 0.350877583026886, + "learning_rate": 1.971899130450689e-05, + "loss": 0.4993, + "step": 5603 + }, + { + "epoch": 0.15387149917627677, + "grad_norm": 0.3424382507801056, + "learning_rate": 1.9718889628724586e-05, + "loss": 0.5147, + "step": 5604 + }, + { + "epoch": 0.15389895661724326, + "grad_norm": 0.3630553185939789, + "learning_rate": 1.9718787934813454e-05, + "loss": 0.5399, + "step": 5605 + }, + { + "epoch": 0.15392641405820978, + "grad_norm": 0.3650819659233093, + "learning_rate": 1.971868622277368e-05, + "loss": 0.5648, + "step": 5606 + }, + { + "epoch": 0.15395387149917628, + "grad_norm": 0.612394392490387, + "learning_rate": 1.9718584492605455e-05, + "loss": 0.6312, + "step": 5607 + }, + { + "epoch": 0.15398132894014277, + "grad_norm": 0.3908075988292694, + "learning_rate": 1.9718482744308967e-05, + "loss": 0.5172, + "step": 5608 + }, + { + "epoch": 0.1540087863811093, + "grad_norm": 0.34633272886276245, + "learning_rate": 1.971838097788441e-05, + "loss": 0.484, + "step": 5609 + }, + { + "epoch": 0.15403624382207579, + "grad_norm": 0.342155396938324, + "learning_rate": 1.971827919333197e-05, + "loss": 0.5997, + "step": 5610 + }, + { + "epoch": 0.15406370126304228, + "grad_norm": 0.37762144207954407, + "learning_rate": 1.9718177390651837e-05, + "loss": 0.5277, + "step": 5611 + }, + { + "epoch": 0.15409115870400877, + "grad_norm": 0.35411328077316284, + "learning_rate": 1.97180755698442e-05, + "loss": 0.5291, + "step": 5612 + }, + { + "epoch": 0.1541186161449753, + "grad_norm": 0.3767492473125458, + "learning_rate": 1.971797373090925e-05, + "loss": 0.5582, + "step": 5613 + }, + { + "epoch": 0.1541460735859418, + "grad_norm": 0.35152357816696167, + "learning_rate": 1.971787187384718e-05, + "loss": 0.613, + "step": 5614 + }, + { + "epoch": 0.15417353102690828, + "grad_norm": 0.3290799558162689, + "learning_rate": 1.9717769998658175e-05, + "loss": 0.5699, + "step": 5615 + }, + { + "epoch": 0.1542009884678748, + "grad_norm": 0.3586711287498474, + "learning_rate": 1.971766810534243e-05, + "loss": 0.6129, + "step": 5616 + }, + { + "epoch": 0.1542284459088413, + "grad_norm": 0.35451260209083557, + "learning_rate": 1.971756619390013e-05, + "loss": 0.5973, + "step": 5617 + }, + { + "epoch": 0.1542559033498078, + "grad_norm": 0.40031322836875916, + "learning_rate": 1.9717464264331468e-05, + "loss": 0.6035, + "step": 5618 + }, + { + "epoch": 0.1542833607907743, + "grad_norm": 0.37324970960617065, + "learning_rate": 1.9717362316636635e-05, + "loss": 0.5302, + "step": 5619 + }, + { + "epoch": 0.1543108182317408, + "grad_norm": 0.5888229012489319, + "learning_rate": 1.9717260350815818e-05, + "loss": 0.5205, + "step": 5620 + }, + { + "epoch": 0.1543382756727073, + "grad_norm": 0.4044210910797119, + "learning_rate": 1.971715836686921e-05, + "loss": 0.5862, + "step": 5621 + }, + { + "epoch": 0.1543657331136738, + "grad_norm": 0.3922604024410248, + "learning_rate": 1.9717056364797003e-05, + "loss": 0.5699, + "step": 5622 + }, + { + "epoch": 0.15439319055464032, + "grad_norm": 0.37618306279182434, + "learning_rate": 1.9716954344599378e-05, + "loss": 0.4798, + "step": 5623 + }, + { + "epoch": 0.1544206479956068, + "grad_norm": 0.36229491233825684, + "learning_rate": 1.971685230627654e-05, + "loss": 0.5031, + "step": 5624 + }, + { + "epoch": 0.1544481054365733, + "grad_norm": 0.3969488739967346, + "learning_rate": 1.9716750249828665e-05, + "loss": 0.5701, + "step": 5625 + }, + { + "epoch": 0.1544755628775398, + "grad_norm": 0.4239669740200043, + "learning_rate": 1.971664817525595e-05, + "loss": 0.5679, + "step": 5626 + }, + { + "epoch": 0.15450302031850632, + "grad_norm": 0.38922828435897827, + "learning_rate": 1.9716546082558586e-05, + "loss": 0.4963, + "step": 5627 + }, + { + "epoch": 0.15453047775947282, + "grad_norm": 0.37125635147094727, + "learning_rate": 1.9716443971736764e-05, + "loss": 0.4945, + "step": 5628 + }, + { + "epoch": 0.1545579352004393, + "grad_norm": 0.360576331615448, + "learning_rate": 1.9716341842790673e-05, + "loss": 0.4935, + "step": 5629 + }, + { + "epoch": 0.15458539264140583, + "grad_norm": 0.3835815489292145, + "learning_rate": 1.9716239695720502e-05, + "loss": 0.5198, + "step": 5630 + }, + { + "epoch": 0.15461285008237233, + "grad_norm": 0.41859200596809387, + "learning_rate": 1.9716137530526446e-05, + "loss": 0.578, + "step": 5631 + }, + { + "epoch": 0.15464030752333882, + "grad_norm": 0.37173429131507874, + "learning_rate": 1.9716035347208686e-05, + "loss": 0.5734, + "step": 5632 + }, + { + "epoch": 0.15466776496430532, + "grad_norm": 0.3195829689502716, + "learning_rate": 1.9715933145767424e-05, + "loss": 0.504, + "step": 5633 + }, + { + "epoch": 0.15469522240527184, + "grad_norm": 0.3686690330505371, + "learning_rate": 1.9715830926202844e-05, + "loss": 0.7215, + "step": 5634 + }, + { + "epoch": 0.15472267984623833, + "grad_norm": 0.381635457277298, + "learning_rate": 1.9715728688515137e-05, + "loss": 0.4869, + "step": 5635 + }, + { + "epoch": 0.15475013728720483, + "grad_norm": 0.3400125205516815, + "learning_rate": 1.97156264327045e-05, + "loss": 0.4129, + "step": 5636 + }, + { + "epoch": 0.15477759472817135, + "grad_norm": 0.4204837679862976, + "learning_rate": 1.9715524158771115e-05, + "loss": 0.469, + "step": 5637 + }, + { + "epoch": 0.15480505216913784, + "grad_norm": 0.3621370792388916, + "learning_rate": 1.9715421866715176e-05, + "loss": 0.5086, + "step": 5638 + }, + { + "epoch": 0.15483250961010434, + "grad_norm": 0.3612726926803589, + "learning_rate": 1.9715319556536875e-05, + "loss": 0.5486, + "step": 5639 + }, + { + "epoch": 0.15485996705107083, + "grad_norm": 0.3584776818752289, + "learning_rate": 1.9715217228236403e-05, + "loss": 0.561, + "step": 5640 + }, + { + "epoch": 0.15488742449203735, + "grad_norm": 0.40012142062187195, + "learning_rate": 1.971511488181395e-05, + "loss": 0.6588, + "step": 5641 + }, + { + "epoch": 0.15491488193300385, + "grad_norm": 0.4113002121448517, + "learning_rate": 1.971501251726971e-05, + "loss": 0.5808, + "step": 5642 + }, + { + "epoch": 0.15494233937397034, + "grad_norm": 0.3712954521179199, + "learning_rate": 1.9714910134603866e-05, + "loss": 0.5207, + "step": 5643 + }, + { + "epoch": 0.15496979681493686, + "grad_norm": 0.37284165620803833, + "learning_rate": 1.9714807733816618e-05, + "loss": 0.5148, + "step": 5644 + }, + { + "epoch": 0.15499725425590336, + "grad_norm": 0.3777366578578949, + "learning_rate": 1.971470531490815e-05, + "loss": 0.6387, + "step": 5645 + }, + { + "epoch": 0.15502471169686985, + "grad_norm": 0.41760584712028503, + "learning_rate": 1.9714602877878656e-05, + "loss": 0.6482, + "step": 5646 + }, + { + "epoch": 0.15505216913783634, + "grad_norm": 0.3293091058731079, + "learning_rate": 1.971450042272833e-05, + "loss": 0.5473, + "step": 5647 + }, + { + "epoch": 0.15507962657880286, + "grad_norm": 0.3748513460159302, + "learning_rate": 1.9714397949457358e-05, + "loss": 0.5429, + "step": 5648 + }, + { + "epoch": 0.15510708401976936, + "grad_norm": 0.37803444266319275, + "learning_rate": 1.9714295458065933e-05, + "loss": 0.5869, + "step": 5649 + }, + { + "epoch": 0.15513454146073585, + "grad_norm": 0.37386223673820496, + "learning_rate": 1.9714192948554247e-05, + "loss": 0.5914, + "step": 5650 + }, + { + "epoch": 0.15516199890170237, + "grad_norm": 0.38799989223480225, + "learning_rate": 1.971409042092249e-05, + "loss": 0.5054, + "step": 5651 + }, + { + "epoch": 0.15518945634266887, + "grad_norm": 0.4151075482368469, + "learning_rate": 1.9713987875170856e-05, + "loss": 0.6192, + "step": 5652 + }, + { + "epoch": 0.15521691378363536, + "grad_norm": 0.3364473581314087, + "learning_rate": 1.9713885311299533e-05, + "loss": 0.5616, + "step": 5653 + }, + { + "epoch": 0.15524437122460186, + "grad_norm": 0.33981823921203613, + "learning_rate": 1.9713782729308715e-05, + "loss": 0.5037, + "step": 5654 + }, + { + "epoch": 0.15527182866556838, + "grad_norm": 0.33685415983200073, + "learning_rate": 1.9713680129198592e-05, + "loss": 0.5078, + "step": 5655 + }, + { + "epoch": 0.15529928610653487, + "grad_norm": 0.36643290519714355, + "learning_rate": 1.9713577510969353e-05, + "loss": 0.6075, + "step": 5656 + }, + { + "epoch": 0.15532674354750137, + "grad_norm": 0.4498896896839142, + "learning_rate": 1.971347487462119e-05, + "loss": 0.5344, + "step": 5657 + }, + { + "epoch": 0.15535420098846786, + "grad_norm": 0.3430514633655548, + "learning_rate": 1.9713372220154302e-05, + "loss": 0.5159, + "step": 5658 + }, + { + "epoch": 0.15538165842943438, + "grad_norm": 0.3936172127723694, + "learning_rate": 1.971326954756887e-05, + "loss": 0.6037, + "step": 5659 + }, + { + "epoch": 0.15540911587040088, + "grad_norm": 0.3689296543598175, + "learning_rate": 1.9713166856865092e-05, + "loss": 0.5211, + "step": 5660 + }, + { + "epoch": 0.15543657331136737, + "grad_norm": 0.42469772696495056, + "learning_rate": 1.9713064148043156e-05, + "loss": 0.6194, + "step": 5661 + }, + { + "epoch": 0.1554640307523339, + "grad_norm": 0.3666783571243286, + "learning_rate": 1.971296142110326e-05, + "loss": 0.5575, + "step": 5662 + }, + { + "epoch": 0.1554914881933004, + "grad_norm": 0.5318717956542969, + "learning_rate": 1.9712858676045585e-05, + "loss": 0.5137, + "step": 5663 + }, + { + "epoch": 0.15551894563426688, + "grad_norm": 0.3484901785850525, + "learning_rate": 1.971275591287033e-05, + "loss": 0.5125, + "step": 5664 + }, + { + "epoch": 0.15554640307523337, + "grad_norm": 0.36983150243759155, + "learning_rate": 1.971265313157769e-05, + "loss": 0.5397, + "step": 5665 + }, + { + "epoch": 0.1555738605161999, + "grad_norm": 0.34134283661842346, + "learning_rate": 1.9712550332167848e-05, + "loss": 0.5883, + "step": 5666 + }, + { + "epoch": 0.1556013179571664, + "grad_norm": 0.40571022033691406, + "learning_rate": 1.9712447514641e-05, + "loss": 0.5857, + "step": 5667 + }, + { + "epoch": 0.15562877539813288, + "grad_norm": 0.37248679995536804, + "learning_rate": 1.9712344678997336e-05, + "loss": 0.5755, + "step": 5668 + }, + { + "epoch": 0.1556562328390994, + "grad_norm": 0.5988563895225525, + "learning_rate": 1.9712241825237054e-05, + "loss": 0.5292, + "step": 5669 + }, + { + "epoch": 0.1556836902800659, + "grad_norm": 0.3268367350101471, + "learning_rate": 1.9712138953360337e-05, + "loss": 0.5259, + "step": 5670 + }, + { + "epoch": 0.1557111477210324, + "grad_norm": 0.368869811296463, + "learning_rate": 1.9712036063367384e-05, + "loss": 0.557, + "step": 5671 + }, + { + "epoch": 0.1557386051619989, + "grad_norm": 0.34634846448898315, + "learning_rate": 1.971193315525838e-05, + "loss": 0.5087, + "step": 5672 + }, + { + "epoch": 0.1557660626029654, + "grad_norm": 0.350031316280365, + "learning_rate": 1.9711830229033527e-05, + "loss": 0.4736, + "step": 5673 + }, + { + "epoch": 0.1557935200439319, + "grad_norm": 0.3859115540981293, + "learning_rate": 1.971172728469301e-05, + "loss": 0.6293, + "step": 5674 + }, + { + "epoch": 0.1558209774848984, + "grad_norm": 0.341254323720932, + "learning_rate": 1.9711624322237014e-05, + "loss": 0.4693, + "step": 5675 + }, + { + "epoch": 0.15584843492586492, + "grad_norm": 0.367657870054245, + "learning_rate": 1.9711521341665745e-05, + "loss": 0.5055, + "step": 5676 + }, + { + "epoch": 0.15587589236683141, + "grad_norm": 0.3540874421596527, + "learning_rate": 1.971141834297939e-05, + "loss": 0.632, + "step": 5677 + }, + { + "epoch": 0.1559033498077979, + "grad_norm": 0.8001943230628967, + "learning_rate": 1.9711315326178137e-05, + "loss": 0.5501, + "step": 5678 + }, + { + "epoch": 0.1559308072487644, + "grad_norm": 0.3829965591430664, + "learning_rate": 1.9711212291262184e-05, + "loss": 0.591, + "step": 5679 + }, + { + "epoch": 0.15595826468973092, + "grad_norm": 0.469049334526062, + "learning_rate": 1.9711109238231723e-05, + "loss": 0.6327, + "step": 5680 + }, + { + "epoch": 0.15598572213069742, + "grad_norm": 0.4662819504737854, + "learning_rate": 1.971100616708694e-05, + "loss": 0.5724, + "step": 5681 + }, + { + "epoch": 0.1560131795716639, + "grad_norm": 0.34809795022010803, + "learning_rate": 1.9710903077828033e-05, + "loss": 0.4946, + "step": 5682 + }, + { + "epoch": 0.15604063701263043, + "grad_norm": 0.42551249265670776, + "learning_rate": 1.9710799970455187e-05, + "loss": 0.5812, + "step": 5683 + }, + { + "epoch": 0.15606809445359693, + "grad_norm": 0.4487471580505371, + "learning_rate": 1.9710696844968606e-05, + "loss": 0.5986, + "step": 5684 + }, + { + "epoch": 0.15609555189456342, + "grad_norm": 0.350241094827652, + "learning_rate": 1.9710593701368476e-05, + "loss": 0.5003, + "step": 5685 + }, + { + "epoch": 0.15612300933552992, + "grad_norm": 0.36718353629112244, + "learning_rate": 1.9710490539654987e-05, + "loss": 0.5392, + "step": 5686 + }, + { + "epoch": 0.15615046677649644, + "grad_norm": 0.3456656336784363, + "learning_rate": 1.9710387359828332e-05, + "loss": 0.5351, + "step": 5687 + }, + { + "epoch": 0.15617792421746293, + "grad_norm": 0.38504382967948914, + "learning_rate": 1.971028416188871e-05, + "loss": 0.4954, + "step": 5688 + }, + { + "epoch": 0.15620538165842943, + "grad_norm": 0.3964003324508667, + "learning_rate": 1.9710180945836307e-05, + "loss": 0.5494, + "step": 5689 + }, + { + "epoch": 0.15623283909939595, + "grad_norm": 0.4076349139213562, + "learning_rate": 1.971007771167132e-05, + "loss": 0.56, + "step": 5690 + }, + { + "epoch": 0.15626029654036244, + "grad_norm": 0.3611622452735901, + "learning_rate": 1.970997445939393e-05, + "loss": 0.5346, + "step": 5691 + }, + { + "epoch": 0.15628775398132894, + "grad_norm": 0.3816754221916199, + "learning_rate": 1.9709871189004346e-05, + "loss": 0.5617, + "step": 5692 + }, + { + "epoch": 0.15631521142229543, + "grad_norm": 0.36257824301719666, + "learning_rate": 1.9709767900502754e-05, + "loss": 0.5404, + "step": 5693 + }, + { + "epoch": 0.15634266886326195, + "grad_norm": 0.35999318957328796, + "learning_rate": 1.9709664593889345e-05, + "loss": 0.5649, + "step": 5694 + }, + { + "epoch": 0.15637012630422845, + "grad_norm": 0.3931587338447571, + "learning_rate": 1.970956126916431e-05, + "loss": 0.6255, + "step": 5695 + }, + { + "epoch": 0.15639758374519494, + "grad_norm": 0.6847226023674011, + "learning_rate": 1.9709457926327842e-05, + "loss": 0.5495, + "step": 5696 + }, + { + "epoch": 0.15642504118616146, + "grad_norm": 0.39677295088768005, + "learning_rate": 1.9709354565380142e-05, + "loss": 0.5969, + "step": 5697 + }, + { + "epoch": 0.15645249862712796, + "grad_norm": 0.36721742153167725, + "learning_rate": 1.9709251186321397e-05, + "loss": 0.4868, + "step": 5698 + }, + { + "epoch": 0.15647995606809445, + "grad_norm": 0.38937443494796753, + "learning_rate": 1.9709147789151797e-05, + "loss": 0.6131, + "step": 5699 + }, + { + "epoch": 0.15650741350906094, + "grad_norm": 0.40215927362442017, + "learning_rate": 1.9709044373871538e-05, + "loss": 0.5846, + "step": 5700 + }, + { + "epoch": 0.15653487095002747, + "grad_norm": 0.3552262783050537, + "learning_rate": 1.9708940940480813e-05, + "loss": 0.5372, + "step": 5701 + }, + { + "epoch": 0.15656232839099396, + "grad_norm": 0.5246961712837219, + "learning_rate": 1.9708837488979813e-05, + "loss": 0.5628, + "step": 5702 + }, + { + "epoch": 0.15658978583196045, + "grad_norm": 0.3650798499584198, + "learning_rate": 1.9708734019368733e-05, + "loss": 0.5507, + "step": 5703 + }, + { + "epoch": 0.15661724327292698, + "grad_norm": 0.3553321361541748, + "learning_rate": 1.9708630531647763e-05, + "loss": 0.5193, + "step": 5704 + }, + { + "epoch": 0.15664470071389347, + "grad_norm": 0.4687361419200897, + "learning_rate": 1.97085270258171e-05, + "loss": 0.4204, + "step": 5705 + }, + { + "epoch": 0.15667215815485996, + "grad_norm": 0.3787229657173157, + "learning_rate": 1.9708423501876937e-05, + "loss": 0.5118, + "step": 5706 + }, + { + "epoch": 0.15669961559582646, + "grad_norm": 0.36400356888771057, + "learning_rate": 1.9708319959827467e-05, + "loss": 0.5052, + "step": 5707 + }, + { + "epoch": 0.15672707303679298, + "grad_norm": 0.34466567635536194, + "learning_rate": 1.970821639966888e-05, + "loss": 0.4726, + "step": 5708 + }, + { + "epoch": 0.15675453047775947, + "grad_norm": 0.34959614276885986, + "learning_rate": 1.970811282140137e-05, + "loss": 0.5026, + "step": 5709 + }, + { + "epoch": 0.15678198791872597, + "grad_norm": 0.747883141040802, + "learning_rate": 1.9708009225025134e-05, + "loss": 0.5719, + "step": 5710 + }, + { + "epoch": 0.1568094453596925, + "grad_norm": 0.3982827961444855, + "learning_rate": 1.9707905610540358e-05, + "loss": 0.5659, + "step": 5711 + }, + { + "epoch": 0.15683690280065898, + "grad_norm": 0.35307857394218445, + "learning_rate": 1.9707801977947243e-05, + "loss": 0.5307, + "step": 5712 + }, + { + "epoch": 0.15686436024162548, + "grad_norm": 0.5110244750976562, + "learning_rate": 1.9707698327245977e-05, + "loss": 0.5552, + "step": 5713 + }, + { + "epoch": 0.15689181768259197, + "grad_norm": 0.41765260696411133, + "learning_rate": 1.9707594658436756e-05, + "loss": 0.5412, + "step": 5714 + }, + { + "epoch": 0.1569192751235585, + "grad_norm": 0.36512166261672974, + "learning_rate": 1.9707490971519773e-05, + "loss": 0.564, + "step": 5715 + }, + { + "epoch": 0.156946732564525, + "grad_norm": 0.46435675024986267, + "learning_rate": 1.9707387266495222e-05, + "loss": 0.6512, + "step": 5716 + }, + { + "epoch": 0.15697419000549148, + "grad_norm": 0.3301486670970917, + "learning_rate": 1.9707283543363294e-05, + "loss": 0.5047, + "step": 5717 + }, + { + "epoch": 0.157001647446458, + "grad_norm": 0.42851969599723816, + "learning_rate": 1.9707179802124185e-05, + "loss": 0.5971, + "step": 5718 + }, + { + "epoch": 0.1570291048874245, + "grad_norm": 0.34520021080970764, + "learning_rate": 1.9707076042778085e-05, + "loss": 0.5403, + "step": 5719 + }, + { + "epoch": 0.157056562328391, + "grad_norm": 0.344036728143692, + "learning_rate": 1.9706972265325193e-05, + "loss": 0.5243, + "step": 5720 + }, + { + "epoch": 0.15708401976935749, + "grad_norm": 0.4191938042640686, + "learning_rate": 1.9706868469765697e-05, + "loss": 0.6, + "step": 5721 + }, + { + "epoch": 0.157111477210324, + "grad_norm": 0.34539875388145447, + "learning_rate": 1.97067646560998e-05, + "loss": 0.5308, + "step": 5722 + }, + { + "epoch": 0.1571389346512905, + "grad_norm": 0.34519219398498535, + "learning_rate": 1.970666082432768e-05, + "loss": 0.4671, + "step": 5723 + }, + { + "epoch": 0.157166392092257, + "grad_norm": 0.35154062509536743, + "learning_rate": 1.9706556974449544e-05, + "loss": 0.5274, + "step": 5724 + }, + { + "epoch": 0.1571938495332235, + "grad_norm": 0.42690640687942505, + "learning_rate": 1.9706453106465577e-05, + "loss": 0.5548, + "step": 5725 + }, + { + "epoch": 0.15722130697419, + "grad_norm": 0.3611331582069397, + "learning_rate": 1.9706349220375983e-05, + "loss": 0.4802, + "step": 5726 + }, + { + "epoch": 0.1572487644151565, + "grad_norm": 0.3587016463279724, + "learning_rate": 1.9706245316180944e-05, + "loss": 0.4781, + "step": 5727 + }, + { + "epoch": 0.157276221856123, + "grad_norm": 0.3511245548725128, + "learning_rate": 1.9706141393880664e-05, + "loss": 0.5232, + "step": 5728 + }, + { + "epoch": 0.15730367929708952, + "grad_norm": 0.32656770944595337, + "learning_rate": 1.9706037453475333e-05, + "loss": 0.473, + "step": 5729 + }, + { + "epoch": 0.15733113673805602, + "grad_norm": 0.3651609718799591, + "learning_rate": 1.970593349496514e-05, + "loss": 0.5553, + "step": 5730 + }, + { + "epoch": 0.1573585941790225, + "grad_norm": 0.3943035304546356, + "learning_rate": 1.970582951835029e-05, + "loss": 0.6375, + "step": 5731 + }, + { + "epoch": 0.157386051619989, + "grad_norm": 0.3842829167842865, + "learning_rate": 1.9705725523630965e-05, + "loss": 0.5426, + "step": 5732 + }, + { + "epoch": 0.15741350906095553, + "grad_norm": 0.374269962310791, + "learning_rate": 1.9705621510807363e-05, + "loss": 0.5657, + "step": 5733 + }, + { + "epoch": 0.15744096650192202, + "grad_norm": 0.351324200630188, + "learning_rate": 1.9705517479879682e-05, + "loss": 0.4921, + "step": 5734 + }, + { + "epoch": 0.1574684239428885, + "grad_norm": 0.4011829197406769, + "learning_rate": 1.970541343084811e-05, + "loss": 0.5277, + "step": 5735 + }, + { + "epoch": 0.15749588138385504, + "grad_norm": 0.3412875533103943, + "learning_rate": 1.970530936371285e-05, + "loss": 0.4528, + "step": 5736 + }, + { + "epoch": 0.15752333882482153, + "grad_norm": 0.33026960492134094, + "learning_rate": 1.9705205278474088e-05, + "loss": 0.4501, + "step": 5737 + }, + { + "epoch": 0.15755079626578802, + "grad_norm": 0.33283692598342896, + "learning_rate": 1.970510117513202e-05, + "loss": 0.4998, + "step": 5738 + }, + { + "epoch": 0.15757825370675452, + "grad_norm": 0.3229723870754242, + "learning_rate": 1.970499705368684e-05, + "loss": 0.5547, + "step": 5739 + }, + { + "epoch": 0.15760571114772104, + "grad_norm": 0.36741751432418823, + "learning_rate": 1.9704892914138746e-05, + "loss": 0.5698, + "step": 5740 + }, + { + "epoch": 0.15763316858868753, + "grad_norm": 0.3811276853084564, + "learning_rate": 1.9704788756487926e-05, + "loss": 0.5202, + "step": 5741 + }, + { + "epoch": 0.15766062602965403, + "grad_norm": 0.9053453803062439, + "learning_rate": 1.9704684580734578e-05, + "loss": 0.5738, + "step": 5742 + }, + { + "epoch": 0.15768808347062055, + "grad_norm": 0.339199036359787, + "learning_rate": 1.9704580386878897e-05, + "loss": 0.564, + "step": 5743 + }, + { + "epoch": 0.15771554091158704, + "grad_norm": 0.36432772874832153, + "learning_rate": 1.9704476174921077e-05, + "loss": 0.4559, + "step": 5744 + }, + { + "epoch": 0.15774299835255354, + "grad_norm": 0.31987616419792175, + "learning_rate": 1.970437194486131e-05, + "loss": 0.5067, + "step": 5745 + }, + { + "epoch": 0.15777045579352003, + "grad_norm": 0.33007457852363586, + "learning_rate": 1.970426769669979e-05, + "loss": 0.5477, + "step": 5746 + }, + { + "epoch": 0.15779791323448655, + "grad_norm": 0.39827388525009155, + "learning_rate": 1.970416343043672e-05, + "loss": 0.5498, + "step": 5747 + }, + { + "epoch": 0.15782537067545305, + "grad_norm": 0.3407354950904846, + "learning_rate": 1.9704059146072283e-05, + "loss": 0.5231, + "step": 5748 + }, + { + "epoch": 0.15785282811641954, + "grad_norm": 0.37217265367507935, + "learning_rate": 1.970395484360668e-05, + "loss": 0.6176, + "step": 5749 + }, + { + "epoch": 0.15788028555738606, + "grad_norm": 0.38865357637405396, + "learning_rate": 1.9703850523040103e-05, + "loss": 0.5176, + "step": 5750 + }, + { + "epoch": 0.15790774299835256, + "grad_norm": 0.49500975012779236, + "learning_rate": 1.970374618437275e-05, + "loss": 0.6275, + "step": 5751 + }, + { + "epoch": 0.15793520043931905, + "grad_norm": 0.3838292956352234, + "learning_rate": 1.970364182760481e-05, + "loss": 0.5881, + "step": 5752 + }, + { + "epoch": 0.15796265788028555, + "grad_norm": 0.40469980239868164, + "learning_rate": 1.970353745273648e-05, + "loss": 0.6751, + "step": 5753 + }, + { + "epoch": 0.15799011532125207, + "grad_norm": 0.35777559876441956, + "learning_rate": 1.970343305976796e-05, + "loss": 0.5185, + "step": 5754 + }, + { + "epoch": 0.15801757276221856, + "grad_norm": 0.3822026550769806, + "learning_rate": 1.970332864869944e-05, + "loss": 0.4835, + "step": 5755 + }, + { + "epoch": 0.15804503020318506, + "grad_norm": 0.37380337715148926, + "learning_rate": 1.9703224219531113e-05, + "loss": 0.606, + "step": 5756 + }, + { + "epoch": 0.15807248764415158, + "grad_norm": 0.3529263138771057, + "learning_rate": 1.9703119772263174e-05, + "loss": 0.4886, + "step": 5757 + }, + { + "epoch": 0.15809994508511807, + "grad_norm": 0.3444855511188507, + "learning_rate": 1.9703015306895824e-05, + "loss": 0.4678, + "step": 5758 + }, + { + "epoch": 0.15812740252608457, + "grad_norm": 0.3289617896080017, + "learning_rate": 1.9702910823429254e-05, + "loss": 0.5192, + "step": 5759 + }, + { + "epoch": 0.15815485996705106, + "grad_norm": 0.36807742714881897, + "learning_rate": 1.9702806321863653e-05, + "loss": 0.6219, + "step": 5760 + }, + { + "epoch": 0.15818231740801758, + "grad_norm": 0.47651106119155884, + "learning_rate": 1.970270180219923e-05, + "loss": 0.5432, + "step": 5761 + }, + { + "epoch": 0.15820977484898407, + "grad_norm": 0.4443008005619049, + "learning_rate": 1.9702597264436166e-05, + "loss": 0.5397, + "step": 5762 + }, + { + "epoch": 0.15823723228995057, + "grad_norm": 0.3903176486492157, + "learning_rate": 1.970249270857466e-05, + "loss": 0.626, + "step": 5763 + }, + { + "epoch": 0.1582646897309171, + "grad_norm": 0.36990347504615784, + "learning_rate": 1.9702388134614908e-05, + "loss": 0.5844, + "step": 5764 + }, + { + "epoch": 0.15829214717188358, + "grad_norm": 0.3933398425579071, + "learning_rate": 1.9702283542557108e-05, + "loss": 0.6212, + "step": 5765 + }, + { + "epoch": 0.15831960461285008, + "grad_norm": 0.37500086426734924, + "learning_rate": 1.9702178932401453e-05, + "loss": 0.5286, + "step": 5766 + }, + { + "epoch": 0.15834706205381657, + "grad_norm": 0.3650386333465576, + "learning_rate": 1.9702074304148137e-05, + "loss": 0.4469, + "step": 5767 + }, + { + "epoch": 0.1583745194947831, + "grad_norm": 0.3907754421234131, + "learning_rate": 1.9701969657797354e-05, + "loss": 0.4996, + "step": 5768 + }, + { + "epoch": 0.1584019769357496, + "grad_norm": 0.4064387381076813, + "learning_rate": 1.9701864993349303e-05, + "loss": 0.4686, + "step": 5769 + }, + { + "epoch": 0.15842943437671608, + "grad_norm": 0.3955513834953308, + "learning_rate": 1.970176031080418e-05, + "loss": 0.6584, + "step": 5770 + }, + { + "epoch": 0.1584568918176826, + "grad_norm": 0.35488778352737427, + "learning_rate": 1.9701655610162174e-05, + "loss": 0.5377, + "step": 5771 + }, + { + "epoch": 0.1584843492586491, + "grad_norm": 0.35780948400497437, + "learning_rate": 1.9701550891423485e-05, + "loss": 0.5219, + "step": 5772 + }, + { + "epoch": 0.1585118066996156, + "grad_norm": 0.3735301196575165, + "learning_rate": 1.9701446154588307e-05, + "loss": 0.4948, + "step": 5773 + }, + { + "epoch": 0.1585392641405821, + "grad_norm": 0.35856130719184875, + "learning_rate": 1.9701341399656836e-05, + "loss": 0.5293, + "step": 5774 + }, + { + "epoch": 0.1585667215815486, + "grad_norm": 0.37868866324424744, + "learning_rate": 1.9701236626629264e-05, + "loss": 0.5893, + "step": 5775 + }, + { + "epoch": 0.1585941790225151, + "grad_norm": 0.371233195066452, + "learning_rate": 1.9701131835505793e-05, + "loss": 0.5272, + "step": 5776 + }, + { + "epoch": 0.1586216364634816, + "grad_norm": 0.3807404339313507, + "learning_rate": 1.9701027026286614e-05, + "loss": 0.65, + "step": 5777 + }, + { + "epoch": 0.15864909390444812, + "grad_norm": 0.3668667674064636, + "learning_rate": 1.9700922198971922e-05, + "loss": 0.6067, + "step": 5778 + }, + { + "epoch": 0.1586765513454146, + "grad_norm": 0.36773329973220825, + "learning_rate": 1.9700817353561916e-05, + "loss": 0.539, + "step": 5779 + }, + { + "epoch": 0.1587040087863811, + "grad_norm": 0.33833056688308716, + "learning_rate": 1.970071249005679e-05, + "loss": 0.5507, + "step": 5780 + }, + { + "epoch": 0.1587314662273476, + "grad_norm": 0.36202672123908997, + "learning_rate": 1.9700607608456734e-05, + "loss": 0.6184, + "step": 5781 + }, + { + "epoch": 0.15875892366831412, + "grad_norm": 0.332453191280365, + "learning_rate": 1.9700502708761955e-05, + "loss": 0.5238, + "step": 5782 + }, + { + "epoch": 0.15878638110928062, + "grad_norm": 0.34454667568206787, + "learning_rate": 1.970039779097264e-05, + "loss": 0.4607, + "step": 5783 + }, + { + "epoch": 0.1588138385502471, + "grad_norm": 0.34077200293540955, + "learning_rate": 1.9700292855088986e-05, + "loss": 0.4803, + "step": 5784 + }, + { + "epoch": 0.15884129599121363, + "grad_norm": 0.39110666513442993, + "learning_rate": 1.970018790111119e-05, + "loss": 0.5755, + "step": 5785 + }, + { + "epoch": 0.15886875343218013, + "grad_norm": 0.33956167101860046, + "learning_rate": 1.9700082929039447e-05, + "loss": 0.489, + "step": 5786 + }, + { + "epoch": 0.15889621087314662, + "grad_norm": 0.33375829458236694, + "learning_rate": 1.9699977938873956e-05, + "loss": 0.5401, + "step": 5787 + }, + { + "epoch": 0.15892366831411311, + "grad_norm": 0.3255741596221924, + "learning_rate": 1.969987293061491e-05, + "loss": 0.4952, + "step": 5788 + }, + { + "epoch": 0.15895112575507964, + "grad_norm": 0.3726412057876587, + "learning_rate": 1.9699767904262502e-05, + "loss": 0.4313, + "step": 5789 + }, + { + "epoch": 0.15897858319604613, + "grad_norm": 0.39438995718955994, + "learning_rate": 1.9699662859816934e-05, + "loss": 0.6077, + "step": 5790 + }, + { + "epoch": 0.15900604063701262, + "grad_norm": 0.3882342278957367, + "learning_rate": 1.96995577972784e-05, + "loss": 0.5958, + "step": 5791 + }, + { + "epoch": 0.15903349807797912, + "grad_norm": 0.38058149814605713, + "learning_rate": 1.969945271664709e-05, + "loss": 0.4781, + "step": 5792 + }, + { + "epoch": 0.15906095551894564, + "grad_norm": 0.3860342800617218, + "learning_rate": 1.969934761792321e-05, + "loss": 0.5583, + "step": 5793 + }, + { + "epoch": 0.15908841295991213, + "grad_norm": 0.40498441457748413, + "learning_rate": 1.969924250110695e-05, + "loss": 0.559, + "step": 5794 + }, + { + "epoch": 0.15911587040087863, + "grad_norm": 0.35673463344573975, + "learning_rate": 1.9699137366198507e-05, + "loss": 0.4347, + "step": 5795 + }, + { + "epoch": 0.15914332784184515, + "grad_norm": 0.3768269717693329, + "learning_rate": 1.9699032213198074e-05, + "loss": 0.5653, + "step": 5796 + }, + { + "epoch": 0.15917078528281164, + "grad_norm": 0.35625141859054565, + "learning_rate": 1.9698927042105855e-05, + "loss": 0.607, + "step": 5797 + }, + { + "epoch": 0.15919824272377814, + "grad_norm": 0.3542579114437103, + "learning_rate": 1.9698821852922038e-05, + "loss": 0.46, + "step": 5798 + }, + { + "epoch": 0.15922570016474463, + "grad_norm": 0.4216062128543854, + "learning_rate": 1.9698716645646824e-05, + "loss": 0.579, + "step": 5799 + }, + { + "epoch": 0.15925315760571115, + "grad_norm": 0.3080407381057739, + "learning_rate": 1.969861142028041e-05, + "loss": 0.5334, + "step": 5800 + }, + { + "epoch": 0.15928061504667765, + "grad_norm": 0.34467074275016785, + "learning_rate": 1.969850617682299e-05, + "loss": 0.548, + "step": 5801 + }, + { + "epoch": 0.15930807248764414, + "grad_norm": 0.31846246123313904, + "learning_rate": 1.9698400915274757e-05, + "loss": 0.4931, + "step": 5802 + }, + { + "epoch": 0.15933552992861066, + "grad_norm": 0.33270037174224854, + "learning_rate": 1.9698295635635913e-05, + "loss": 0.5774, + "step": 5803 + }, + { + "epoch": 0.15936298736957716, + "grad_norm": 0.35140112042427063, + "learning_rate": 1.9698190337906654e-05, + "loss": 0.5147, + "step": 5804 + }, + { + "epoch": 0.15939044481054365, + "grad_norm": 0.333535760641098, + "learning_rate": 1.969808502208717e-05, + "loss": 0.5082, + "step": 5805 + }, + { + "epoch": 0.15941790225151015, + "grad_norm": 0.3509487807750702, + "learning_rate": 1.9697979688177664e-05, + "loss": 0.5157, + "step": 5806 + }, + { + "epoch": 0.15944535969247667, + "grad_norm": 0.3680865466594696, + "learning_rate": 1.9697874336178333e-05, + "loss": 0.5713, + "step": 5807 + }, + { + "epoch": 0.15947281713344316, + "grad_norm": 0.412695974111557, + "learning_rate": 1.969776896608937e-05, + "loss": 0.5717, + "step": 5808 + }, + { + "epoch": 0.15950027457440966, + "grad_norm": 0.3523961901664734, + "learning_rate": 1.969766357791097e-05, + "loss": 0.5422, + "step": 5809 + }, + { + "epoch": 0.15952773201537618, + "grad_norm": 0.35627445578575134, + "learning_rate": 1.9697558171643337e-05, + "loss": 0.5489, + "step": 5810 + }, + { + "epoch": 0.15955518945634267, + "grad_norm": 0.34650328755378723, + "learning_rate": 1.9697452747286664e-05, + "loss": 0.5273, + "step": 5811 + }, + { + "epoch": 0.15958264689730917, + "grad_norm": 0.4469851553440094, + "learning_rate": 1.969734730484114e-05, + "loss": 0.5952, + "step": 5812 + }, + { + "epoch": 0.15961010433827566, + "grad_norm": 0.3882896304130554, + "learning_rate": 1.9697241844306972e-05, + "loss": 0.5978, + "step": 5813 + }, + { + "epoch": 0.15963756177924218, + "grad_norm": 0.32871153950691223, + "learning_rate": 1.9697136365684354e-05, + "loss": 0.4977, + "step": 5814 + }, + { + "epoch": 0.15966501922020868, + "grad_norm": 0.36846068501472473, + "learning_rate": 1.969703086897348e-05, + "loss": 0.5118, + "step": 5815 + }, + { + "epoch": 0.15969247666117517, + "grad_norm": 0.3649718463420868, + "learning_rate": 1.9696925354174548e-05, + "loss": 0.6007, + "step": 5816 + }, + { + "epoch": 0.1597199341021417, + "grad_norm": 0.4566709101200104, + "learning_rate": 1.9696819821287756e-05, + "loss": 0.4894, + "step": 5817 + }, + { + "epoch": 0.15974739154310819, + "grad_norm": 0.3595947325229645, + "learning_rate": 1.9696714270313298e-05, + "loss": 0.5847, + "step": 5818 + }, + { + "epoch": 0.15977484898407468, + "grad_norm": 0.39677107334136963, + "learning_rate": 1.9696608701251376e-05, + "loss": 0.5167, + "step": 5819 + }, + { + "epoch": 0.15980230642504117, + "grad_norm": 0.34387025237083435, + "learning_rate": 1.9696503114102184e-05, + "loss": 0.5128, + "step": 5820 + }, + { + "epoch": 0.1598297638660077, + "grad_norm": 0.3796578347682953, + "learning_rate": 1.9696397508865917e-05, + "loss": 0.6254, + "step": 5821 + }, + { + "epoch": 0.1598572213069742, + "grad_norm": 0.3710852861404419, + "learning_rate": 1.9696291885542777e-05, + "loss": 0.5229, + "step": 5822 + }, + { + "epoch": 0.15988467874794068, + "grad_norm": 0.45253461599349976, + "learning_rate": 1.9696186244132955e-05, + "loss": 0.5564, + "step": 5823 + }, + { + "epoch": 0.1599121361889072, + "grad_norm": 0.37040168046951294, + "learning_rate": 1.9696080584636653e-05, + "loss": 0.5519, + "step": 5824 + }, + { + "epoch": 0.1599395936298737, + "grad_norm": 0.31432172656059265, + "learning_rate": 1.9695974907054066e-05, + "loss": 0.4253, + "step": 5825 + }, + { + "epoch": 0.1599670510708402, + "grad_norm": 0.4003652036190033, + "learning_rate": 1.969586921138539e-05, + "loss": 0.552, + "step": 5826 + }, + { + "epoch": 0.1599945085118067, + "grad_norm": 0.3812129497528076, + "learning_rate": 1.969576349763082e-05, + "loss": 0.5731, + "step": 5827 + }, + { + "epoch": 0.1600219659527732, + "grad_norm": 0.3795168399810791, + "learning_rate": 1.9695657765790562e-05, + "loss": 0.5125, + "step": 5828 + }, + { + "epoch": 0.1600494233937397, + "grad_norm": 0.37622833251953125, + "learning_rate": 1.969555201586481e-05, + "loss": 0.4433, + "step": 5829 + }, + { + "epoch": 0.1600768808347062, + "grad_norm": 0.36082667112350464, + "learning_rate": 1.969544624785375e-05, + "loss": 0.5781, + "step": 5830 + }, + { + "epoch": 0.16010433827567272, + "grad_norm": 0.3895476758480072, + "learning_rate": 1.9695340461757596e-05, + "loss": 0.5414, + "step": 5831 + }, + { + "epoch": 0.1601317957166392, + "grad_norm": 0.6734892725944519, + "learning_rate": 1.9695234657576533e-05, + "loss": 0.4872, + "step": 5832 + }, + { + "epoch": 0.1601592531576057, + "grad_norm": 0.34317663311958313, + "learning_rate": 1.9695128835310767e-05, + "loss": 0.6032, + "step": 5833 + }, + { + "epoch": 0.1601867105985722, + "grad_norm": 0.3094826936721802, + "learning_rate": 1.969502299496049e-05, + "loss": 0.5206, + "step": 5834 + }, + { + "epoch": 0.16021416803953872, + "grad_norm": 0.4411672055721283, + "learning_rate": 1.96949171365259e-05, + "loss": 0.6241, + "step": 5835 + }, + { + "epoch": 0.16024162548050522, + "grad_norm": 0.33684468269348145, + "learning_rate": 1.9694811260007195e-05, + "loss": 0.4768, + "step": 5836 + }, + { + "epoch": 0.1602690829214717, + "grad_norm": 0.40342652797698975, + "learning_rate": 1.9694705365404576e-05, + "loss": 0.5024, + "step": 5837 + }, + { + "epoch": 0.16029654036243823, + "grad_norm": 0.3723052442073822, + "learning_rate": 1.9694599452718233e-05, + "loss": 0.5601, + "step": 5838 + }, + { + "epoch": 0.16032399780340473, + "grad_norm": 0.38121238350868225, + "learning_rate": 1.9694493521948367e-05, + "loss": 0.5119, + "step": 5839 + }, + { + "epoch": 0.16035145524437122, + "grad_norm": 0.4798239767551422, + "learning_rate": 1.969438757309518e-05, + "loss": 0.4974, + "step": 5840 + }, + { + "epoch": 0.16037891268533772, + "grad_norm": 0.33632585406303406, + "learning_rate": 1.9694281606158864e-05, + "loss": 0.5377, + "step": 5841 + }, + { + "epoch": 0.16040637012630424, + "grad_norm": 0.4216699004173279, + "learning_rate": 1.969417562113962e-05, + "loss": 0.6273, + "step": 5842 + }, + { + "epoch": 0.16043382756727073, + "grad_norm": 0.3561878502368927, + "learning_rate": 1.9694069618037644e-05, + "loss": 0.4422, + "step": 5843 + }, + { + "epoch": 0.16046128500823723, + "grad_norm": 0.325895220041275, + "learning_rate": 1.9693963596853134e-05, + "loss": 0.4177, + "step": 5844 + }, + { + "epoch": 0.16048874244920375, + "grad_norm": 0.3450259268283844, + "learning_rate": 1.969385755758629e-05, + "loss": 0.488, + "step": 5845 + }, + { + "epoch": 0.16051619989017024, + "grad_norm": 0.349286288022995, + "learning_rate": 1.9693751500237303e-05, + "loss": 0.4784, + "step": 5846 + }, + { + "epoch": 0.16054365733113674, + "grad_norm": 0.357086718082428, + "learning_rate": 1.969364542480638e-05, + "loss": 0.554, + "step": 5847 + }, + { + "epoch": 0.16057111477210323, + "grad_norm": 0.4401469826698303, + "learning_rate": 1.9693539331293716e-05, + "loss": 0.5581, + "step": 5848 + }, + { + "epoch": 0.16059857221306975, + "grad_norm": 0.3392990827560425, + "learning_rate": 1.9693433219699503e-05, + "loss": 0.4647, + "step": 5849 + }, + { + "epoch": 0.16062602965403625, + "grad_norm": 0.47106432914733887, + "learning_rate": 1.9693327090023943e-05, + "loss": 0.5, + "step": 5850 + }, + { + "epoch": 0.16065348709500274, + "grad_norm": 0.38277336955070496, + "learning_rate": 1.9693220942267234e-05, + "loss": 0.5098, + "step": 5851 + }, + { + "epoch": 0.16068094453596926, + "grad_norm": 0.3795066773891449, + "learning_rate": 1.9693114776429578e-05, + "loss": 0.496, + "step": 5852 + }, + { + "epoch": 0.16070840197693576, + "grad_norm": 0.4115433096885681, + "learning_rate": 1.9693008592511166e-05, + "loss": 0.6135, + "step": 5853 + }, + { + "epoch": 0.16073585941790225, + "grad_norm": 0.3940642178058624, + "learning_rate": 1.9692902390512202e-05, + "loss": 0.5703, + "step": 5854 + }, + { + "epoch": 0.16076331685886874, + "grad_norm": 0.39864999055862427, + "learning_rate": 1.969279617043288e-05, + "loss": 0.6381, + "step": 5855 + }, + { + "epoch": 0.16079077429983527, + "grad_norm": 0.36092427372932434, + "learning_rate": 1.9692689932273402e-05, + "loss": 0.5574, + "step": 5856 + }, + { + "epoch": 0.16081823174080176, + "grad_norm": 0.3470171391963959, + "learning_rate": 1.9692583676033958e-05, + "loss": 0.4959, + "step": 5857 + }, + { + "epoch": 0.16084568918176825, + "grad_norm": 0.7340888977050781, + "learning_rate": 1.9692477401714756e-05, + "loss": 0.5689, + "step": 5858 + }, + { + "epoch": 0.16087314662273475, + "grad_norm": 0.31496119499206543, + "learning_rate": 1.9692371109315987e-05, + "loss": 0.466, + "step": 5859 + }, + { + "epoch": 0.16090060406370127, + "grad_norm": 0.35240063071250916, + "learning_rate": 1.9692264798837856e-05, + "loss": 0.5502, + "step": 5860 + }, + { + "epoch": 0.16092806150466776, + "grad_norm": 0.4119267463684082, + "learning_rate": 1.969215847028056e-05, + "loss": 0.5882, + "step": 5861 + }, + { + "epoch": 0.16095551894563426, + "grad_norm": 0.383576363325119, + "learning_rate": 1.969205212364429e-05, + "loss": 0.569, + "step": 5862 + }, + { + "epoch": 0.16098297638660078, + "grad_norm": 0.3734026849269867, + "learning_rate": 1.9691945758929252e-05, + "loss": 0.5759, + "step": 5863 + }, + { + "epoch": 0.16101043382756727, + "grad_norm": 0.4151698052883148, + "learning_rate": 1.969183937613564e-05, + "loss": 0.5599, + "step": 5864 + }, + { + "epoch": 0.16103789126853377, + "grad_norm": 0.3528323769569397, + "learning_rate": 1.9691732975263657e-05, + "loss": 0.5217, + "step": 5865 + }, + { + "epoch": 0.16106534870950026, + "grad_norm": 0.3257858455181122, + "learning_rate": 1.9691626556313497e-05, + "loss": 0.5366, + "step": 5866 + }, + { + "epoch": 0.16109280615046678, + "grad_norm": 0.3453126847743988, + "learning_rate": 1.969152011928536e-05, + "loss": 0.5172, + "step": 5867 + }, + { + "epoch": 0.16112026359143328, + "grad_norm": 0.3504716455936432, + "learning_rate": 1.9691413664179444e-05, + "loss": 0.478, + "step": 5868 + }, + { + "epoch": 0.16114772103239977, + "grad_norm": 0.45592445135116577, + "learning_rate": 1.969130719099595e-05, + "loss": 0.5515, + "step": 5869 + }, + { + "epoch": 0.1611751784733663, + "grad_norm": 0.422553688287735, + "learning_rate": 1.9691200699735072e-05, + "loss": 0.5849, + "step": 5870 + }, + { + "epoch": 0.1612026359143328, + "grad_norm": 0.37896567583084106, + "learning_rate": 1.9691094190397017e-05, + "loss": 0.6102, + "step": 5871 + }, + { + "epoch": 0.16123009335529928, + "grad_norm": 0.4469166100025177, + "learning_rate": 1.9690987662981974e-05, + "loss": 0.5411, + "step": 5872 + }, + { + "epoch": 0.16125755079626578, + "grad_norm": 0.33068227767944336, + "learning_rate": 1.9690881117490147e-05, + "loss": 0.4687, + "step": 5873 + }, + { + "epoch": 0.1612850082372323, + "grad_norm": 0.39757853746414185, + "learning_rate": 1.9690774553921733e-05, + "loss": 0.4867, + "step": 5874 + }, + { + "epoch": 0.1613124656781988, + "grad_norm": 0.37361449003219604, + "learning_rate": 1.9690667972276937e-05, + "loss": 0.601, + "step": 5875 + }, + { + "epoch": 0.16133992311916528, + "grad_norm": 0.33023834228515625, + "learning_rate": 1.9690561372555947e-05, + "loss": 0.4837, + "step": 5876 + }, + { + "epoch": 0.1613673805601318, + "grad_norm": 0.4379355311393738, + "learning_rate": 1.9690454754758965e-05, + "loss": 0.5206, + "step": 5877 + }, + { + "epoch": 0.1613948380010983, + "grad_norm": 0.38781869411468506, + "learning_rate": 1.96903481188862e-05, + "loss": 0.5551, + "step": 5878 + }, + { + "epoch": 0.1614222954420648, + "grad_norm": 0.36046484112739563, + "learning_rate": 1.9690241464937837e-05, + "loss": 0.6027, + "step": 5879 + }, + { + "epoch": 0.1614497528830313, + "grad_norm": 0.3675413727760315, + "learning_rate": 1.969013479291408e-05, + "loss": 0.6005, + "step": 5880 + }, + { + "epoch": 0.1614772103239978, + "grad_norm": 0.49054136872291565, + "learning_rate": 1.9690028102815132e-05, + "loss": 0.6094, + "step": 5881 + }, + { + "epoch": 0.1615046677649643, + "grad_norm": 0.3671773672103882, + "learning_rate": 1.968992139464119e-05, + "loss": 0.4651, + "step": 5882 + }, + { + "epoch": 0.1615321252059308, + "grad_norm": 0.3482096791267395, + "learning_rate": 1.9689814668392446e-05, + "loss": 0.5422, + "step": 5883 + }, + { + "epoch": 0.16155958264689732, + "grad_norm": 0.3815556764602661, + "learning_rate": 1.968970792406911e-05, + "loss": 0.5204, + "step": 5884 + }, + { + "epoch": 0.16158704008786381, + "grad_norm": 0.3987388610839844, + "learning_rate": 1.9689601161671374e-05, + "loss": 0.6281, + "step": 5885 + }, + { + "epoch": 0.1616144975288303, + "grad_norm": 0.3597581386566162, + "learning_rate": 1.968949438119944e-05, + "loss": 0.5691, + "step": 5886 + }, + { + "epoch": 0.1616419549697968, + "grad_norm": 0.37345170974731445, + "learning_rate": 1.9689387582653506e-05, + "loss": 0.5491, + "step": 5887 + }, + { + "epoch": 0.16166941241076332, + "grad_norm": 0.4356056749820709, + "learning_rate": 1.9689280766033775e-05, + "loss": 0.5302, + "step": 5888 + }, + { + "epoch": 0.16169686985172982, + "grad_norm": 0.3340286612510681, + "learning_rate": 1.968917393134044e-05, + "loss": 0.5846, + "step": 5889 + }, + { + "epoch": 0.1617243272926963, + "grad_norm": 0.4203908443450928, + "learning_rate": 1.9689067078573704e-05, + "loss": 0.5624, + "step": 5890 + }, + { + "epoch": 0.16175178473366283, + "grad_norm": 0.3836018741130829, + "learning_rate": 1.9688960207733764e-05, + "loss": 0.5782, + "step": 5891 + }, + { + "epoch": 0.16177924217462933, + "grad_norm": 0.37024611234664917, + "learning_rate": 1.968885331882082e-05, + "loss": 0.4771, + "step": 5892 + }, + { + "epoch": 0.16180669961559582, + "grad_norm": 0.34736236929893494, + "learning_rate": 1.9688746411835072e-05, + "loss": 0.5244, + "step": 5893 + }, + { + "epoch": 0.16183415705656232, + "grad_norm": 0.4740428924560547, + "learning_rate": 1.9688639486776723e-05, + "loss": 0.5385, + "step": 5894 + }, + { + "epoch": 0.16186161449752884, + "grad_norm": 0.37892478704452515, + "learning_rate": 1.968853254364597e-05, + "loss": 0.44, + "step": 5895 + }, + { + "epoch": 0.16188907193849533, + "grad_norm": 0.3521192967891693, + "learning_rate": 1.9688425582443007e-05, + "loss": 0.572, + "step": 5896 + }, + { + "epoch": 0.16191652937946183, + "grad_norm": 0.5501775145530701, + "learning_rate": 1.968831860316804e-05, + "loss": 0.5192, + "step": 5897 + }, + { + "epoch": 0.16194398682042835, + "grad_norm": 0.45205530524253845, + "learning_rate": 1.968821160582127e-05, + "loss": 0.5395, + "step": 5898 + }, + { + "epoch": 0.16197144426139484, + "grad_norm": 0.33704814314842224, + "learning_rate": 1.968810459040289e-05, + "loss": 0.564, + "step": 5899 + }, + { + "epoch": 0.16199890170236134, + "grad_norm": 0.33496204018592834, + "learning_rate": 1.9687997556913104e-05, + "loss": 0.4916, + "step": 5900 + }, + { + "epoch": 0.16202635914332783, + "grad_norm": 0.4107848107814789, + "learning_rate": 1.968789050535211e-05, + "loss": 0.6047, + "step": 5901 + }, + { + "epoch": 0.16205381658429435, + "grad_norm": 0.3443552553653717, + "learning_rate": 1.9687783435720106e-05, + "loss": 0.4893, + "step": 5902 + }, + { + "epoch": 0.16208127402526085, + "grad_norm": 0.355432391166687, + "learning_rate": 1.96876763480173e-05, + "loss": 0.5388, + "step": 5903 + }, + { + "epoch": 0.16210873146622734, + "grad_norm": 0.37068697810173035, + "learning_rate": 1.9687569242243877e-05, + "loss": 0.6622, + "step": 5904 + }, + { + "epoch": 0.16213618890719386, + "grad_norm": 0.4133038818836212, + "learning_rate": 1.968746211840005e-05, + "loss": 0.6187, + "step": 5905 + }, + { + "epoch": 0.16216364634816036, + "grad_norm": 0.47101274132728577, + "learning_rate": 1.9687354976486014e-05, + "loss": 0.5879, + "step": 5906 + }, + { + "epoch": 0.16219110378912685, + "grad_norm": 0.4012320041656494, + "learning_rate": 1.9687247816501972e-05, + "loss": 0.574, + "step": 5907 + }, + { + "epoch": 0.16221856123009334, + "grad_norm": 0.3571380078792572, + "learning_rate": 1.9687140638448115e-05, + "loss": 0.5659, + "step": 5908 + }, + { + "epoch": 0.16224601867105987, + "grad_norm": 0.3719521462917328, + "learning_rate": 1.9687033442324654e-05, + "loss": 0.5379, + "step": 5909 + }, + { + "epoch": 0.16227347611202636, + "grad_norm": 0.37979522347450256, + "learning_rate": 1.968692622813178e-05, + "loss": 0.4966, + "step": 5910 + }, + { + "epoch": 0.16230093355299285, + "grad_norm": 0.37114787101745605, + "learning_rate": 1.96868189958697e-05, + "loss": 0.5757, + "step": 5911 + }, + { + "epoch": 0.16232839099395938, + "grad_norm": 0.3583783209323883, + "learning_rate": 1.968671174553861e-05, + "loss": 0.5182, + "step": 5912 + }, + { + "epoch": 0.16235584843492587, + "grad_norm": 0.46979987621307373, + "learning_rate": 1.968660447713871e-05, + "loss": 0.5884, + "step": 5913 + }, + { + "epoch": 0.16238330587589236, + "grad_norm": 0.5307267308235168, + "learning_rate": 1.9686497190670203e-05, + "loss": 0.5149, + "step": 5914 + }, + { + "epoch": 0.16241076331685886, + "grad_norm": 0.33474233746528625, + "learning_rate": 1.9686389886133287e-05, + "loss": 0.5521, + "step": 5915 + }, + { + "epoch": 0.16243822075782538, + "grad_norm": 0.3897472620010376, + "learning_rate": 1.968628256352816e-05, + "loss": 0.5584, + "step": 5916 + }, + { + "epoch": 0.16246567819879187, + "grad_norm": 0.4173433184623718, + "learning_rate": 1.9686175222855025e-05, + "loss": 0.4708, + "step": 5917 + }, + { + "epoch": 0.16249313563975837, + "grad_norm": 0.3600271940231323, + "learning_rate": 1.9686067864114086e-05, + "loss": 0.4948, + "step": 5918 + }, + { + "epoch": 0.1625205930807249, + "grad_norm": 0.40454354882240295, + "learning_rate": 1.9685960487305534e-05, + "loss": 0.5384, + "step": 5919 + }, + { + "epoch": 0.16254805052169138, + "grad_norm": 0.3218441605567932, + "learning_rate": 1.9685853092429576e-05, + "loss": 0.5256, + "step": 5920 + }, + { + "epoch": 0.16257550796265788, + "grad_norm": 0.34667226672172546, + "learning_rate": 1.9685745679486408e-05, + "loss": 0.5208, + "step": 5921 + }, + { + "epoch": 0.16260296540362437, + "grad_norm": 0.3279931843280792, + "learning_rate": 1.9685638248476238e-05, + "loss": 0.5071, + "step": 5922 + }, + { + "epoch": 0.1626304228445909, + "grad_norm": 0.3636189103126526, + "learning_rate": 1.968553079939926e-05, + "loss": 0.588, + "step": 5923 + }, + { + "epoch": 0.1626578802855574, + "grad_norm": 0.376956969499588, + "learning_rate": 1.9685423332255673e-05, + "loss": 0.6114, + "step": 5924 + }, + { + "epoch": 0.16268533772652388, + "grad_norm": 0.40843212604522705, + "learning_rate": 1.9685315847045684e-05, + "loss": 0.5531, + "step": 5925 + }, + { + "epoch": 0.16271279516749038, + "grad_norm": 0.371143639087677, + "learning_rate": 1.9685208343769485e-05, + "loss": 0.5522, + "step": 5926 + }, + { + "epoch": 0.1627402526084569, + "grad_norm": 0.34312376379966736, + "learning_rate": 1.9685100822427286e-05, + "loss": 0.4394, + "step": 5927 + }, + { + "epoch": 0.1627677100494234, + "grad_norm": 0.353338360786438, + "learning_rate": 1.968499328301928e-05, + "loss": 0.5805, + "step": 5928 + }, + { + "epoch": 0.16279516749038989, + "grad_norm": 0.3604951500892639, + "learning_rate": 1.9684885725545672e-05, + "loss": 0.5784, + "step": 5929 + }, + { + "epoch": 0.1628226249313564, + "grad_norm": 0.42750948667526245, + "learning_rate": 1.9684778150006657e-05, + "loss": 0.6232, + "step": 5930 + }, + { + "epoch": 0.1628500823723229, + "grad_norm": 0.3578625023365021, + "learning_rate": 1.9684670556402444e-05, + "loss": 0.5072, + "step": 5931 + }, + { + "epoch": 0.1628775398132894, + "grad_norm": 0.351559042930603, + "learning_rate": 1.968456294473323e-05, + "loss": 0.477, + "step": 5932 + }, + { + "epoch": 0.1629049972542559, + "grad_norm": 0.39821723103523254, + "learning_rate": 1.9684455314999215e-05, + "loss": 0.5088, + "step": 5933 + }, + { + "epoch": 0.1629324546952224, + "grad_norm": 0.37422698736190796, + "learning_rate": 1.9684347667200598e-05, + "loss": 0.5423, + "step": 5934 + }, + { + "epoch": 0.1629599121361889, + "grad_norm": 0.36309877038002014, + "learning_rate": 1.9684240001337583e-05, + "loss": 0.5705, + "step": 5935 + }, + { + "epoch": 0.1629873695771554, + "grad_norm": 0.3645501136779785, + "learning_rate": 1.968413231741037e-05, + "loss": 0.4816, + "step": 5936 + }, + { + "epoch": 0.16301482701812192, + "grad_norm": 0.5422803163528442, + "learning_rate": 1.9684024615419156e-05, + "loss": 0.5034, + "step": 5937 + }, + { + "epoch": 0.16304228445908842, + "grad_norm": 0.346344918012619, + "learning_rate": 1.968391689536415e-05, + "loss": 0.4519, + "step": 5938 + }, + { + "epoch": 0.1630697419000549, + "grad_norm": 0.3669911026954651, + "learning_rate": 1.9683809157245545e-05, + "loss": 0.552, + "step": 5939 + }, + { + "epoch": 0.1630971993410214, + "grad_norm": 0.37110984325408936, + "learning_rate": 1.9683701401063543e-05, + "loss": 0.564, + "step": 5940 + }, + { + "epoch": 0.16312465678198793, + "grad_norm": 0.32724159955978394, + "learning_rate": 1.968359362681835e-05, + "loss": 0.5184, + "step": 5941 + }, + { + "epoch": 0.16315211422295442, + "grad_norm": 0.3542429506778717, + "learning_rate": 1.9683485834510166e-05, + "loss": 0.5383, + "step": 5942 + }, + { + "epoch": 0.1631795716639209, + "grad_norm": 0.36632803082466125, + "learning_rate": 1.9683378024139187e-05, + "loss": 0.5435, + "step": 5943 + }, + { + "epoch": 0.16320702910488744, + "grad_norm": 0.351973295211792, + "learning_rate": 1.968327019570562e-05, + "loss": 0.5528, + "step": 5944 + }, + { + "epoch": 0.16323448654585393, + "grad_norm": 0.4227403402328491, + "learning_rate": 1.9683162349209662e-05, + "loss": 0.5964, + "step": 5945 + }, + { + "epoch": 0.16326194398682042, + "grad_norm": 0.387090802192688, + "learning_rate": 1.968305448465152e-05, + "loss": 0.5289, + "step": 5946 + }, + { + "epoch": 0.16328940142778692, + "grad_norm": 0.37157219648361206, + "learning_rate": 1.9682946602031385e-05, + "loss": 0.6159, + "step": 5947 + }, + { + "epoch": 0.16331685886875344, + "grad_norm": 0.34170252084732056, + "learning_rate": 1.9682838701349465e-05, + "loss": 0.5396, + "step": 5948 + }, + { + "epoch": 0.16334431630971993, + "grad_norm": 0.3778645396232605, + "learning_rate": 1.9682730782605964e-05, + "loss": 0.6032, + "step": 5949 + }, + { + "epoch": 0.16337177375068643, + "grad_norm": 0.4253092110157013, + "learning_rate": 1.9682622845801077e-05, + "loss": 0.558, + "step": 5950 + }, + { + "epoch": 0.16339923119165295, + "grad_norm": 0.35710448026657104, + "learning_rate": 1.968251489093501e-05, + "loss": 0.5067, + "step": 5951 + }, + { + "epoch": 0.16342668863261944, + "grad_norm": 0.3436387777328491, + "learning_rate": 1.9682406918007962e-05, + "loss": 0.4816, + "step": 5952 + }, + { + "epoch": 0.16345414607358594, + "grad_norm": 0.3814050853252411, + "learning_rate": 1.9682298927020133e-05, + "loss": 0.4851, + "step": 5953 + }, + { + "epoch": 0.16348160351455243, + "grad_norm": 0.3393646776676178, + "learning_rate": 1.9682190917971725e-05, + "loss": 0.5341, + "step": 5954 + }, + { + "epoch": 0.16350906095551895, + "grad_norm": 0.3380119502544403, + "learning_rate": 1.9682082890862945e-05, + "loss": 0.492, + "step": 5955 + }, + { + "epoch": 0.16353651839648545, + "grad_norm": 0.39427319169044495, + "learning_rate": 1.9681974845693986e-05, + "loss": 0.6123, + "step": 5956 + }, + { + "epoch": 0.16356397583745194, + "grad_norm": 0.37993431091308594, + "learning_rate": 1.9681866782465055e-05, + "loss": 0.5023, + "step": 5957 + }, + { + "epoch": 0.16359143327841846, + "grad_norm": 0.36897385120391846, + "learning_rate": 1.9681758701176354e-05, + "loss": 0.5989, + "step": 5958 + }, + { + "epoch": 0.16361889071938496, + "grad_norm": 0.36335939168930054, + "learning_rate": 1.9681650601828085e-05, + "loss": 0.5201, + "step": 5959 + }, + { + "epoch": 0.16364634816035145, + "grad_norm": 0.34224992990493774, + "learning_rate": 1.9681542484420442e-05, + "loss": 0.482, + "step": 5960 + }, + { + "epoch": 0.16367380560131795, + "grad_norm": 0.37969404458999634, + "learning_rate": 1.9681434348953638e-05, + "loss": 0.5353, + "step": 5961 + }, + { + "epoch": 0.16370126304228447, + "grad_norm": 0.5661979913711548, + "learning_rate": 1.9681326195427864e-05, + "loss": 0.4596, + "step": 5962 + }, + { + "epoch": 0.16372872048325096, + "grad_norm": 0.39917105436325073, + "learning_rate": 1.968121802384333e-05, + "loss": 0.6122, + "step": 5963 + }, + { + "epoch": 0.16375617792421746, + "grad_norm": 0.356785386800766, + "learning_rate": 1.968110983420023e-05, + "loss": 0.5192, + "step": 5964 + }, + { + "epoch": 0.16378363536518398, + "grad_norm": 0.3861936330795288, + "learning_rate": 1.9681001626498778e-05, + "loss": 0.5599, + "step": 5965 + }, + { + "epoch": 0.16381109280615047, + "grad_norm": 0.3468678593635559, + "learning_rate": 1.9680893400739162e-05, + "loss": 0.5591, + "step": 5966 + }, + { + "epoch": 0.16383855024711697, + "grad_norm": 0.3300604224205017, + "learning_rate": 1.968078515692159e-05, + "loss": 0.4924, + "step": 5967 + }, + { + "epoch": 0.16386600768808346, + "grad_norm": 0.34586697816848755, + "learning_rate": 1.968067689504627e-05, + "loss": 0.5216, + "step": 5968 + }, + { + "epoch": 0.16389346512904998, + "grad_norm": 0.4419156312942505, + "learning_rate": 1.968056861511339e-05, + "loss": 0.5635, + "step": 5969 + }, + { + "epoch": 0.16392092257001648, + "grad_norm": 0.4222278594970703, + "learning_rate": 1.9680460317123165e-05, + "loss": 0.5015, + "step": 5970 + }, + { + "epoch": 0.16394838001098297, + "grad_norm": 0.37268081307411194, + "learning_rate": 1.968035200107579e-05, + "loss": 0.5531, + "step": 5971 + }, + { + "epoch": 0.1639758374519495, + "grad_norm": 0.4047996401786804, + "learning_rate": 1.968024366697147e-05, + "loss": 0.5629, + "step": 5972 + }, + { + "epoch": 0.16400329489291599, + "grad_norm": 0.3732962906360626, + "learning_rate": 1.9680135314810407e-05, + "loss": 0.4674, + "step": 5973 + }, + { + "epoch": 0.16403075233388248, + "grad_norm": 0.4580184817314148, + "learning_rate": 1.96800269445928e-05, + "loss": 0.5887, + "step": 5974 + }, + { + "epoch": 0.16405820977484897, + "grad_norm": 0.3527466654777527, + "learning_rate": 1.9679918556318853e-05, + "loss": 0.6166, + "step": 5975 + }, + { + "epoch": 0.1640856672158155, + "grad_norm": 0.39620742201805115, + "learning_rate": 1.967981014998877e-05, + "loss": 0.5702, + "step": 5976 + }, + { + "epoch": 0.164113124656782, + "grad_norm": 0.47037628293037415, + "learning_rate": 1.9679701725602748e-05, + "loss": 0.5622, + "step": 5977 + }, + { + "epoch": 0.16414058209774848, + "grad_norm": 0.37670788168907166, + "learning_rate": 1.9679593283160998e-05, + "loss": 0.4955, + "step": 5978 + }, + { + "epoch": 0.164168039538715, + "grad_norm": 0.43218541145324707, + "learning_rate": 1.967948482266371e-05, + "loss": 0.533, + "step": 5979 + }, + { + "epoch": 0.1641954969796815, + "grad_norm": 0.3815891444683075, + "learning_rate": 1.96793763441111e-05, + "loss": 0.4516, + "step": 5980 + }, + { + "epoch": 0.164222954420648, + "grad_norm": 0.3875274360179901, + "learning_rate": 1.9679267847503362e-05, + "loss": 0.5915, + "step": 5981 + }, + { + "epoch": 0.1642504118616145, + "grad_norm": 0.5156402587890625, + "learning_rate": 1.9679159332840703e-05, + "loss": 0.5609, + "step": 5982 + }, + { + "epoch": 0.164277869302581, + "grad_norm": 0.42766740918159485, + "learning_rate": 1.967905080012332e-05, + "loss": 0.4794, + "step": 5983 + }, + { + "epoch": 0.1643053267435475, + "grad_norm": 0.4484354257583618, + "learning_rate": 1.9678942249351416e-05, + "loss": 0.5642, + "step": 5984 + }, + { + "epoch": 0.164332784184514, + "grad_norm": 0.32909175753593445, + "learning_rate": 1.9678833680525198e-05, + "loss": 0.4652, + "step": 5985 + }, + { + "epoch": 0.16436024162548052, + "grad_norm": 0.36967480182647705, + "learning_rate": 1.9678725093644866e-05, + "loss": 0.5624, + "step": 5986 + }, + { + "epoch": 0.164387699066447, + "grad_norm": 0.4283381402492523, + "learning_rate": 1.9678616488710623e-05, + "loss": 0.5287, + "step": 5987 + }, + { + "epoch": 0.1644151565074135, + "grad_norm": 0.34213605523109436, + "learning_rate": 1.967850786572267e-05, + "loss": 0.4511, + "step": 5988 + }, + { + "epoch": 0.16444261394838, + "grad_norm": 0.35383349657058716, + "learning_rate": 1.967839922468121e-05, + "loss": 0.4902, + "step": 5989 + }, + { + "epoch": 0.16447007138934652, + "grad_norm": 0.44964611530303955, + "learning_rate": 1.9678290565586447e-05, + "loss": 0.5706, + "step": 5990 + }, + { + "epoch": 0.16449752883031302, + "grad_norm": 0.36725693941116333, + "learning_rate": 1.9678181888438583e-05, + "loss": 0.5489, + "step": 5991 + }, + { + "epoch": 0.1645249862712795, + "grad_norm": 0.3923743665218353, + "learning_rate": 1.9678073193237824e-05, + "loss": 0.6093, + "step": 5992 + }, + { + "epoch": 0.164552443712246, + "grad_norm": 0.34573930501937866, + "learning_rate": 1.9677964479984367e-05, + "loss": 0.6047, + "step": 5993 + }, + { + "epoch": 0.16457990115321253, + "grad_norm": 0.3507407307624817, + "learning_rate": 1.9677855748678416e-05, + "loss": 0.4967, + "step": 5994 + }, + { + "epoch": 0.16460735859417902, + "grad_norm": 0.3383448123931885, + "learning_rate": 1.9677746999320176e-05, + "loss": 0.5133, + "step": 5995 + }, + { + "epoch": 0.16463481603514551, + "grad_norm": 0.5422051548957825, + "learning_rate": 1.967763823190985e-05, + "loss": 0.5192, + "step": 5996 + }, + { + "epoch": 0.16466227347611204, + "grad_norm": 0.39850959181785583, + "learning_rate": 1.9677529446447643e-05, + "loss": 0.578, + "step": 5997 + }, + { + "epoch": 0.16468973091707853, + "grad_norm": 0.4056061804294586, + "learning_rate": 1.9677420642933752e-05, + "loss": 0.5727, + "step": 5998 + }, + { + "epoch": 0.16471718835804502, + "grad_norm": 0.35046660900115967, + "learning_rate": 1.967731182136838e-05, + "loss": 0.5965, + "step": 5999 + }, + { + "epoch": 0.16474464579901152, + "grad_norm": 0.42219918966293335, + "learning_rate": 1.9677202981751736e-05, + "loss": 0.5332, + "step": 6000 + }, + { + "epoch": 0.16477210323997804, + "grad_norm": 0.35297176241874695, + "learning_rate": 1.967709412408402e-05, + "loss": 0.5674, + "step": 6001 + }, + { + "epoch": 0.16479956068094453, + "grad_norm": 0.34210219979286194, + "learning_rate": 1.9676985248365433e-05, + "loss": 0.4778, + "step": 6002 + }, + { + "epoch": 0.16482701812191103, + "grad_norm": 0.40207263827323914, + "learning_rate": 1.9676876354596183e-05, + "loss": 0.4935, + "step": 6003 + }, + { + "epoch": 0.16485447556287755, + "grad_norm": 0.4113471806049347, + "learning_rate": 1.967676744277647e-05, + "loss": 0.6124, + "step": 6004 + }, + { + "epoch": 0.16488193300384404, + "grad_norm": 0.39527130126953125, + "learning_rate": 1.9676658512906492e-05, + "loss": 0.5735, + "step": 6005 + }, + { + "epoch": 0.16490939044481054, + "grad_norm": 0.3421378433704376, + "learning_rate": 1.967654956498646e-05, + "loss": 0.5129, + "step": 6006 + }, + { + "epoch": 0.16493684788577703, + "grad_norm": 0.36119911074638367, + "learning_rate": 1.9676440599016574e-05, + "loss": 0.5684, + "step": 6007 + }, + { + "epoch": 0.16496430532674355, + "grad_norm": 0.3911708891391754, + "learning_rate": 1.967633161499704e-05, + "loss": 0.5673, + "step": 6008 + }, + { + "epoch": 0.16499176276771005, + "grad_norm": 0.36507293581962585, + "learning_rate": 1.9676222612928064e-05, + "loss": 0.5548, + "step": 6009 + }, + { + "epoch": 0.16501922020867654, + "grad_norm": 0.3616604208946228, + "learning_rate": 1.9676113592809837e-05, + "loss": 0.4869, + "step": 6010 + }, + { + "epoch": 0.16504667764964306, + "grad_norm": 0.3893595039844513, + "learning_rate": 1.967600455464257e-05, + "loss": 0.6038, + "step": 6011 + }, + { + "epoch": 0.16507413509060956, + "grad_norm": 0.3508799970149994, + "learning_rate": 1.967589549842647e-05, + "loss": 0.5639, + "step": 6012 + }, + { + "epoch": 0.16510159253157605, + "grad_norm": 0.3853070139884949, + "learning_rate": 1.9675786424161734e-05, + "loss": 0.5146, + "step": 6013 + }, + { + "epoch": 0.16512904997254255, + "grad_norm": 0.3202778398990631, + "learning_rate": 1.9675677331848568e-05, + "loss": 0.4645, + "step": 6014 + }, + { + "epoch": 0.16515650741350907, + "grad_norm": 0.365339994430542, + "learning_rate": 1.9675568221487177e-05, + "loss": 0.5665, + "step": 6015 + }, + { + "epoch": 0.16518396485447556, + "grad_norm": 0.3838532865047455, + "learning_rate": 1.9675459093077763e-05, + "loss": 0.6005, + "step": 6016 + }, + { + "epoch": 0.16521142229544206, + "grad_norm": 0.3698323965072632, + "learning_rate": 1.967534994662053e-05, + "loss": 0.5837, + "step": 6017 + }, + { + "epoch": 0.16523887973640858, + "grad_norm": 0.3577888011932373, + "learning_rate": 1.9675240782115682e-05, + "loss": 0.4916, + "step": 6018 + }, + { + "epoch": 0.16526633717737507, + "grad_norm": 0.4284653067588806, + "learning_rate": 1.967513159956342e-05, + "loss": 0.57, + "step": 6019 + }, + { + "epoch": 0.16529379461834157, + "grad_norm": 0.37486064434051514, + "learning_rate": 1.9675022398963954e-05, + "loss": 0.6318, + "step": 6020 + }, + { + "epoch": 0.16532125205930806, + "grad_norm": 0.359077125787735, + "learning_rate": 1.9674913180317478e-05, + "loss": 0.5552, + "step": 6021 + }, + { + "epoch": 0.16534870950027458, + "grad_norm": 0.40331849455833435, + "learning_rate": 1.9674803943624202e-05, + "loss": 0.6487, + "step": 6022 + }, + { + "epoch": 0.16537616694124108, + "grad_norm": 0.3378467559814453, + "learning_rate": 1.967469468888433e-05, + "loss": 0.5305, + "step": 6023 + }, + { + "epoch": 0.16540362438220757, + "grad_norm": 0.352027952671051, + "learning_rate": 1.9674585416098066e-05, + "loss": 0.4094, + "step": 6024 + }, + { + "epoch": 0.1654310818231741, + "grad_norm": 0.3266749382019043, + "learning_rate": 1.9674476125265613e-05, + "loss": 0.4815, + "step": 6025 + }, + { + "epoch": 0.16545853926414059, + "grad_norm": 0.41697314381599426, + "learning_rate": 1.967436681638717e-05, + "loss": 0.6243, + "step": 6026 + }, + { + "epoch": 0.16548599670510708, + "grad_norm": 0.3512372672557831, + "learning_rate": 1.9674257489462947e-05, + "loss": 0.5425, + "step": 6027 + }, + { + "epoch": 0.16551345414607357, + "grad_norm": 0.41652485728263855, + "learning_rate": 1.9674148144493145e-05, + "loss": 0.578, + "step": 6028 + }, + { + "epoch": 0.1655409115870401, + "grad_norm": 0.3305037021636963, + "learning_rate": 1.9674038781477973e-05, + "loss": 0.4872, + "step": 6029 + }, + { + "epoch": 0.1655683690280066, + "grad_norm": 0.35801994800567627, + "learning_rate": 1.9673929400417625e-05, + "loss": 0.5714, + "step": 6030 + }, + { + "epoch": 0.16559582646897308, + "grad_norm": 0.3911196291446686, + "learning_rate": 1.9673820001312318e-05, + "loss": 0.6437, + "step": 6031 + }, + { + "epoch": 0.1656232839099396, + "grad_norm": 0.3658948242664337, + "learning_rate": 1.9673710584162244e-05, + "loss": 0.557, + "step": 6032 + }, + { + "epoch": 0.1656507413509061, + "grad_norm": 0.38355833292007446, + "learning_rate": 1.9673601148967617e-05, + "loss": 0.5708, + "step": 6033 + }, + { + "epoch": 0.1656781987918726, + "grad_norm": 0.3739334046840668, + "learning_rate": 1.9673491695728634e-05, + "loss": 0.5117, + "step": 6034 + }, + { + "epoch": 0.1657056562328391, + "grad_norm": 0.38835033774375916, + "learning_rate": 1.9673382224445497e-05, + "loss": 0.5351, + "step": 6035 + }, + { + "epoch": 0.1657331136738056, + "grad_norm": 0.4020492732524872, + "learning_rate": 1.967327273511842e-05, + "loss": 0.546, + "step": 6036 + }, + { + "epoch": 0.1657605711147721, + "grad_norm": 0.39028358459472656, + "learning_rate": 1.9673163227747602e-05, + "loss": 0.4968, + "step": 6037 + }, + { + "epoch": 0.1657880285557386, + "grad_norm": 0.3714902698993683, + "learning_rate": 1.9673053702333246e-05, + "loss": 0.5953, + "step": 6038 + }, + { + "epoch": 0.16581548599670512, + "grad_norm": 0.32104188203811646, + "learning_rate": 1.967294415887556e-05, + "loss": 0.5447, + "step": 6039 + }, + { + "epoch": 0.1658429434376716, + "grad_norm": 0.4229342043399811, + "learning_rate": 1.9672834597374742e-05, + "loss": 0.5742, + "step": 6040 + }, + { + "epoch": 0.1658704008786381, + "grad_norm": 0.33305221796035767, + "learning_rate": 1.9672725017831003e-05, + "loss": 0.5075, + "step": 6041 + }, + { + "epoch": 0.1658978583196046, + "grad_norm": 0.6092918515205383, + "learning_rate": 1.9672615420244544e-05, + "loss": 0.5673, + "step": 6042 + }, + { + "epoch": 0.16592531576057112, + "grad_norm": 0.3503728210926056, + "learning_rate": 1.967250580461557e-05, + "loss": 0.5192, + "step": 6043 + }, + { + "epoch": 0.16595277320153762, + "grad_norm": 0.34510666131973267, + "learning_rate": 1.9672396170944284e-05, + "loss": 0.5449, + "step": 6044 + }, + { + "epoch": 0.1659802306425041, + "grad_norm": 0.510109007358551, + "learning_rate": 1.9672286519230895e-05, + "loss": 0.6686, + "step": 6045 + }, + { + "epoch": 0.16600768808347063, + "grad_norm": 0.34774476289749146, + "learning_rate": 1.9672176849475603e-05, + "loss": 0.4627, + "step": 6046 + }, + { + "epoch": 0.16603514552443713, + "grad_norm": 0.3549306094646454, + "learning_rate": 1.967206716167861e-05, + "loss": 0.5547, + "step": 6047 + }, + { + "epoch": 0.16606260296540362, + "grad_norm": 0.3948396146297455, + "learning_rate": 1.967195745584013e-05, + "loss": 0.6329, + "step": 6048 + }, + { + "epoch": 0.16609006040637012, + "grad_norm": 0.30357012152671814, + "learning_rate": 1.9671847731960362e-05, + "loss": 0.4374, + "step": 6049 + }, + { + "epoch": 0.16611751784733664, + "grad_norm": 0.36608296632766724, + "learning_rate": 1.967173799003951e-05, + "loss": 0.6455, + "step": 6050 + }, + { + "epoch": 0.16614497528830313, + "grad_norm": 0.43936023116111755, + "learning_rate": 1.967162823007778e-05, + "loss": 0.5327, + "step": 6051 + }, + { + "epoch": 0.16617243272926963, + "grad_norm": 0.37706443667411804, + "learning_rate": 1.9671518452075378e-05, + "loss": 0.5166, + "step": 6052 + }, + { + "epoch": 0.16619989017023615, + "grad_norm": 0.390299916267395, + "learning_rate": 1.9671408656032508e-05, + "loss": 0.5725, + "step": 6053 + }, + { + "epoch": 0.16622734761120264, + "grad_norm": 0.3787047564983368, + "learning_rate": 1.9671298841949368e-05, + "loss": 0.5472, + "step": 6054 + }, + { + "epoch": 0.16625480505216914, + "grad_norm": 0.3114975094795227, + "learning_rate": 1.9671189009826174e-05, + "loss": 0.476, + "step": 6055 + }, + { + "epoch": 0.16628226249313563, + "grad_norm": 0.38148245215415955, + "learning_rate": 1.9671079159663127e-05, + "loss": 0.5246, + "step": 6056 + }, + { + "epoch": 0.16630971993410215, + "grad_norm": 0.3514304459095001, + "learning_rate": 1.9670969291460426e-05, + "loss": 0.5677, + "step": 6057 + }, + { + "epoch": 0.16633717737506865, + "grad_norm": 0.33231183886528015, + "learning_rate": 1.9670859405218285e-05, + "loss": 0.4606, + "step": 6058 + }, + { + "epoch": 0.16636463481603514, + "grad_norm": 0.32489141821861267, + "learning_rate": 1.9670749500936903e-05, + "loss": 0.5342, + "step": 6059 + }, + { + "epoch": 0.16639209225700163, + "grad_norm": 0.35079091787338257, + "learning_rate": 1.9670639578616485e-05, + "loss": 0.5881, + "step": 6060 + }, + { + "epoch": 0.16641954969796816, + "grad_norm": 0.36974725127220154, + "learning_rate": 1.9670529638257242e-05, + "loss": 0.4705, + "step": 6061 + }, + { + "epoch": 0.16644700713893465, + "grad_norm": 0.4431898593902588, + "learning_rate": 1.967041967985937e-05, + "loss": 0.5052, + "step": 6062 + }, + { + "epoch": 0.16647446457990114, + "grad_norm": 0.35807061195373535, + "learning_rate": 1.9670309703423083e-05, + "loss": 0.573, + "step": 6063 + }, + { + "epoch": 0.16650192202086767, + "grad_norm": 0.4240536689758301, + "learning_rate": 1.967019970894858e-05, + "loss": 0.6404, + "step": 6064 + }, + { + "epoch": 0.16652937946183416, + "grad_norm": 0.35580208897590637, + "learning_rate": 1.967008969643607e-05, + "loss": 0.5303, + "step": 6065 + }, + { + "epoch": 0.16655683690280065, + "grad_norm": 0.38924211263656616, + "learning_rate": 1.966997966588575e-05, + "loss": 0.6984, + "step": 6066 + }, + { + "epoch": 0.16658429434376715, + "grad_norm": 0.3990425765514374, + "learning_rate": 1.9669869617297837e-05, + "loss": 0.48, + "step": 6067 + }, + { + "epoch": 0.16661175178473367, + "grad_norm": 0.35663846135139465, + "learning_rate": 1.9669759550672528e-05, + "loss": 0.5654, + "step": 6068 + }, + { + "epoch": 0.16663920922570016, + "grad_norm": 0.549411952495575, + "learning_rate": 1.9669649466010036e-05, + "loss": 0.5292, + "step": 6069 + }, + { + "epoch": 0.16666666666666666, + "grad_norm": 0.469146192073822, + "learning_rate": 1.9669539363310556e-05, + "loss": 0.5455, + "step": 6070 + }, + { + "epoch": 0.16669412410763318, + "grad_norm": 0.4251728057861328, + "learning_rate": 1.9669429242574303e-05, + "loss": 0.5843, + "step": 6071 + }, + { + "epoch": 0.16672158154859967, + "grad_norm": 0.4439394772052765, + "learning_rate": 1.9669319103801476e-05, + "loss": 0.6297, + "step": 6072 + }, + { + "epoch": 0.16674903898956617, + "grad_norm": 0.373017817735672, + "learning_rate": 1.9669208946992284e-05, + "loss": 0.4998, + "step": 6073 + }, + { + "epoch": 0.16677649643053266, + "grad_norm": 0.4103248119354248, + "learning_rate": 1.9669098772146933e-05, + "loss": 0.5273, + "step": 6074 + }, + { + "epoch": 0.16680395387149918, + "grad_norm": 0.43823814392089844, + "learning_rate": 1.9668988579265624e-05, + "loss": 0.615, + "step": 6075 + }, + { + "epoch": 0.16683141131246568, + "grad_norm": 0.33302512764930725, + "learning_rate": 1.9668878368348564e-05, + "loss": 0.5368, + "step": 6076 + }, + { + "epoch": 0.16685886875343217, + "grad_norm": 0.3614601492881775, + "learning_rate": 1.9668768139395964e-05, + "loss": 0.5541, + "step": 6077 + }, + { + "epoch": 0.1668863261943987, + "grad_norm": 0.47163820266723633, + "learning_rate": 1.9668657892408024e-05, + "loss": 0.5357, + "step": 6078 + }, + { + "epoch": 0.1669137836353652, + "grad_norm": 0.36058926582336426, + "learning_rate": 1.966854762738495e-05, + "loss": 0.5523, + "step": 6079 + }, + { + "epoch": 0.16694124107633168, + "grad_norm": 0.46985626220703125, + "learning_rate": 1.9668437344326947e-05, + "loss": 0.6506, + "step": 6080 + }, + { + "epoch": 0.16696869851729818, + "grad_norm": 0.35088658332824707, + "learning_rate": 1.9668327043234225e-05, + "loss": 0.5378, + "step": 6081 + }, + { + "epoch": 0.1669961559582647, + "grad_norm": 0.3433704376220703, + "learning_rate": 1.966821672410699e-05, + "loss": 0.5781, + "step": 6082 + }, + { + "epoch": 0.1670236133992312, + "grad_norm": 0.40412694215774536, + "learning_rate": 1.9668106386945442e-05, + "loss": 0.5413, + "step": 6083 + }, + { + "epoch": 0.16705107084019769, + "grad_norm": 0.37778741121292114, + "learning_rate": 1.966799603174979e-05, + "loss": 0.5615, + "step": 6084 + }, + { + "epoch": 0.1670785282811642, + "grad_norm": 0.40664738416671753, + "learning_rate": 1.966788565852024e-05, + "loss": 0.4778, + "step": 6085 + }, + { + "epoch": 0.1671059857221307, + "grad_norm": 0.3747825622558594, + "learning_rate": 1.9667775267256998e-05, + "loss": 0.5911, + "step": 6086 + }, + { + "epoch": 0.1671334431630972, + "grad_norm": 0.33845314383506775, + "learning_rate": 1.9667664857960268e-05, + "loss": 0.5409, + "step": 6087 + }, + { + "epoch": 0.1671609006040637, + "grad_norm": 0.3360182046890259, + "learning_rate": 1.966755443063026e-05, + "loss": 0.5054, + "step": 6088 + }, + { + "epoch": 0.1671883580450302, + "grad_norm": 0.3897232711315155, + "learning_rate": 1.9667443985267174e-05, + "loss": 0.5546, + "step": 6089 + }, + { + "epoch": 0.1672158154859967, + "grad_norm": 0.40664052963256836, + "learning_rate": 1.9667333521871223e-05, + "loss": 0.6336, + "step": 6090 + }, + { + "epoch": 0.1672432729269632, + "grad_norm": 0.3653057813644409, + "learning_rate": 1.9667223040442608e-05, + "loss": 0.6095, + "step": 6091 + }, + { + "epoch": 0.16727073036792972, + "grad_norm": 0.3559306263923645, + "learning_rate": 1.9667112540981535e-05, + "loss": 0.4986, + "step": 6092 + }, + { + "epoch": 0.16729818780889621, + "grad_norm": 0.3687642812728882, + "learning_rate": 1.9667002023488212e-05, + "loss": 0.5231, + "step": 6093 + }, + { + "epoch": 0.1673256452498627, + "grad_norm": 1.9055484533309937, + "learning_rate": 1.966689148796285e-05, + "loss": 0.6183, + "step": 6094 + }, + { + "epoch": 0.1673531026908292, + "grad_norm": 0.3752286434173584, + "learning_rate": 1.9666780934405644e-05, + "loss": 0.5455, + "step": 6095 + }, + { + "epoch": 0.16738056013179572, + "grad_norm": 0.4538190960884094, + "learning_rate": 1.9666670362816812e-05, + "loss": 0.508, + "step": 6096 + }, + { + "epoch": 0.16740801757276222, + "grad_norm": 0.3904830813407898, + "learning_rate": 1.966655977319655e-05, + "loss": 0.5337, + "step": 6097 + }, + { + "epoch": 0.1674354750137287, + "grad_norm": 0.3765058219432831, + "learning_rate": 1.9666449165545067e-05, + "loss": 0.5847, + "step": 6098 + }, + { + "epoch": 0.16746293245469523, + "grad_norm": 0.4019477665424347, + "learning_rate": 1.9666338539862577e-05, + "loss": 0.6267, + "step": 6099 + }, + { + "epoch": 0.16749038989566173, + "grad_norm": 0.3540938198566437, + "learning_rate": 1.9666227896149276e-05, + "loss": 0.5022, + "step": 6100 + }, + { + "epoch": 0.16751784733662822, + "grad_norm": 0.36842676997184753, + "learning_rate": 1.9666117234405378e-05, + "loss": 0.5791, + "step": 6101 + }, + { + "epoch": 0.16754530477759472, + "grad_norm": 0.3152039349079132, + "learning_rate": 1.9666006554631083e-05, + "loss": 0.426, + "step": 6102 + }, + { + "epoch": 0.16757276221856124, + "grad_norm": 0.364555299282074, + "learning_rate": 1.9665895856826604e-05, + "loss": 0.5676, + "step": 6103 + }, + { + "epoch": 0.16760021965952773, + "grad_norm": 0.5145508646965027, + "learning_rate": 1.966578514099214e-05, + "loss": 0.5058, + "step": 6104 + }, + { + "epoch": 0.16762767710049423, + "grad_norm": 0.3821084797382355, + "learning_rate": 1.9665674407127907e-05, + "loss": 0.5779, + "step": 6105 + }, + { + "epoch": 0.16765513454146075, + "grad_norm": 0.324028342962265, + "learning_rate": 1.9665563655234102e-05, + "loss": 0.5476, + "step": 6106 + }, + { + "epoch": 0.16768259198242724, + "grad_norm": 0.3450806736946106, + "learning_rate": 1.9665452885310937e-05, + "loss": 0.5042, + "step": 6107 + }, + { + "epoch": 0.16771004942339374, + "grad_norm": 0.4623746871948242, + "learning_rate": 1.966534209735862e-05, + "loss": 0.5664, + "step": 6108 + }, + { + "epoch": 0.16773750686436023, + "grad_norm": 0.48113784193992615, + "learning_rate": 1.9665231291377353e-05, + "loss": 0.6389, + "step": 6109 + }, + { + "epoch": 0.16776496430532675, + "grad_norm": 0.4000304937362671, + "learning_rate": 1.9665120467367346e-05, + "loss": 0.6153, + "step": 6110 + }, + { + "epoch": 0.16779242174629325, + "grad_norm": 0.35962119698524475, + "learning_rate": 1.9665009625328802e-05, + "loss": 0.5036, + "step": 6111 + }, + { + "epoch": 0.16781987918725974, + "grad_norm": 0.34091895818710327, + "learning_rate": 1.966489876526193e-05, + "loss": 0.5801, + "step": 6112 + }, + { + "epoch": 0.16784733662822626, + "grad_norm": 0.4326756000518799, + "learning_rate": 1.9664787887166943e-05, + "loss": 0.5627, + "step": 6113 + }, + { + "epoch": 0.16787479406919276, + "grad_norm": 0.3511867821216583, + "learning_rate": 1.9664676991044035e-05, + "loss": 0.546, + "step": 6114 + }, + { + "epoch": 0.16790225151015925, + "grad_norm": 0.37634846568107605, + "learning_rate": 1.9664566076893426e-05, + "loss": 0.6147, + "step": 6115 + }, + { + "epoch": 0.16792970895112574, + "grad_norm": 0.38621097803115845, + "learning_rate": 1.9664455144715313e-05, + "loss": 0.6216, + "step": 6116 + }, + { + "epoch": 0.16795716639209227, + "grad_norm": 0.326101690530777, + "learning_rate": 1.9664344194509906e-05, + "loss": 0.5665, + "step": 6117 + }, + { + "epoch": 0.16798462383305876, + "grad_norm": 0.40964528918266296, + "learning_rate": 1.9664233226277416e-05, + "loss": 0.5851, + "step": 6118 + }, + { + "epoch": 0.16801208127402525, + "grad_norm": 0.33603858947753906, + "learning_rate": 1.9664122240018045e-05, + "loss": 0.4606, + "step": 6119 + }, + { + "epoch": 0.16803953871499178, + "grad_norm": 0.35454311966896057, + "learning_rate": 1.9664011235732e-05, + "loss": 0.6291, + "step": 6120 + }, + { + "epoch": 0.16806699615595827, + "grad_norm": 0.3686046004295349, + "learning_rate": 1.9663900213419492e-05, + "loss": 0.5673, + "step": 6121 + }, + { + "epoch": 0.16809445359692476, + "grad_norm": 0.38228264451026917, + "learning_rate": 1.9663789173080723e-05, + "loss": 0.6138, + "step": 6122 + }, + { + "epoch": 0.16812191103789126, + "grad_norm": 0.36588242650032043, + "learning_rate": 1.9663678114715904e-05, + "loss": 0.5195, + "step": 6123 + }, + { + "epoch": 0.16814936847885778, + "grad_norm": 0.34180575609207153, + "learning_rate": 1.966356703832524e-05, + "loss": 0.5052, + "step": 6124 + }, + { + "epoch": 0.16817682591982427, + "grad_norm": 0.36211350560188293, + "learning_rate": 1.966345594390894e-05, + "loss": 0.5239, + "step": 6125 + }, + { + "epoch": 0.16820428336079077, + "grad_norm": 0.3310061991214752, + "learning_rate": 1.966334483146721e-05, + "loss": 0.521, + "step": 6126 + }, + { + "epoch": 0.16823174080175726, + "grad_norm": 0.3522648215293884, + "learning_rate": 1.9663233701000264e-05, + "loss": 0.4746, + "step": 6127 + }, + { + "epoch": 0.16825919824272378, + "grad_norm": 0.3470238149166107, + "learning_rate": 1.9663122552508297e-05, + "loss": 0.551, + "step": 6128 + }, + { + "epoch": 0.16828665568369028, + "grad_norm": 0.516315221786499, + "learning_rate": 1.9663011385991523e-05, + "loss": 0.5962, + "step": 6129 + }, + { + "epoch": 0.16831411312465677, + "grad_norm": 0.33558204770088196, + "learning_rate": 1.966290020145015e-05, + "loss": 0.4921, + "step": 6130 + }, + { + "epoch": 0.1683415705656233, + "grad_norm": 0.3857131898403168, + "learning_rate": 1.966278899888438e-05, + "loss": 0.6062, + "step": 6131 + }, + { + "epoch": 0.1683690280065898, + "grad_norm": 0.4149656891822815, + "learning_rate": 1.966267777829443e-05, + "loss": 0.5913, + "step": 6132 + }, + { + "epoch": 0.16839648544755628, + "grad_norm": 0.36160099506378174, + "learning_rate": 1.9662566539680497e-05, + "loss": 0.6703, + "step": 6133 + }, + { + "epoch": 0.16842394288852278, + "grad_norm": 0.366005003452301, + "learning_rate": 1.9662455283042795e-05, + "loss": 0.5602, + "step": 6134 + }, + { + "epoch": 0.1684514003294893, + "grad_norm": 0.39626607298851013, + "learning_rate": 1.966234400838153e-05, + "loss": 0.5611, + "step": 6135 + }, + { + "epoch": 0.1684788577704558, + "grad_norm": 0.38875123858451843, + "learning_rate": 1.966223271569691e-05, + "loss": 0.5704, + "step": 6136 + }, + { + "epoch": 0.1685063152114223, + "grad_norm": 0.522930383682251, + "learning_rate": 1.9662121404989146e-05, + "loss": 0.5189, + "step": 6137 + }, + { + "epoch": 0.1685337726523888, + "grad_norm": 0.3427080512046814, + "learning_rate": 1.9662010076258437e-05, + "loss": 0.5328, + "step": 6138 + }, + { + "epoch": 0.1685612300933553, + "grad_norm": 0.3448258340358734, + "learning_rate": 1.9661898729504995e-05, + "loss": 0.5062, + "step": 6139 + }, + { + "epoch": 0.1685886875343218, + "grad_norm": 0.38568389415740967, + "learning_rate": 1.966178736472903e-05, + "loss": 0.5404, + "step": 6140 + }, + { + "epoch": 0.1686161449752883, + "grad_norm": 0.35096418857574463, + "learning_rate": 1.9661675981930747e-05, + "loss": 0.5367, + "step": 6141 + }, + { + "epoch": 0.1686436024162548, + "grad_norm": 0.32657188177108765, + "learning_rate": 1.966156458111036e-05, + "loss": 0.4732, + "step": 6142 + }, + { + "epoch": 0.1686710598572213, + "grad_norm": 0.40800753235816956, + "learning_rate": 1.9661453162268066e-05, + "loss": 0.5081, + "step": 6143 + }, + { + "epoch": 0.1686985172981878, + "grad_norm": 0.36590394377708435, + "learning_rate": 1.9661341725404078e-05, + "loss": 0.5049, + "step": 6144 + }, + { + "epoch": 0.16872597473915432, + "grad_norm": 0.3546043634414673, + "learning_rate": 1.9661230270518606e-05, + "loss": 0.4865, + "step": 6145 + }, + { + "epoch": 0.16875343218012082, + "grad_norm": 0.40301400423049927, + "learning_rate": 1.9661118797611854e-05, + "loss": 0.4854, + "step": 6146 + }, + { + "epoch": 0.1687808896210873, + "grad_norm": 0.412809282541275, + "learning_rate": 1.9661007306684033e-05, + "loss": 0.4533, + "step": 6147 + }, + { + "epoch": 0.1688083470620538, + "grad_norm": 0.3760605752468109, + "learning_rate": 1.966089579773535e-05, + "loss": 0.5589, + "step": 6148 + }, + { + "epoch": 0.16883580450302033, + "grad_norm": 0.3326733410358429, + "learning_rate": 1.9660784270766014e-05, + "loss": 0.5015, + "step": 6149 + }, + { + "epoch": 0.16886326194398682, + "grad_norm": 0.3640964925289154, + "learning_rate": 1.966067272577623e-05, + "loss": 0.5146, + "step": 6150 + }, + { + "epoch": 0.16889071938495331, + "grad_norm": 0.39652398228645325, + "learning_rate": 1.966056116276621e-05, + "loss": 0.5389, + "step": 6151 + }, + { + "epoch": 0.16891817682591984, + "grad_norm": 0.3585420846939087, + "learning_rate": 1.966044958173616e-05, + "loss": 0.4897, + "step": 6152 + }, + { + "epoch": 0.16894563426688633, + "grad_norm": 0.36327043175697327, + "learning_rate": 1.9660337982686287e-05, + "loss": 0.4914, + "step": 6153 + }, + { + "epoch": 0.16897309170785282, + "grad_norm": 0.38483381271362305, + "learning_rate": 1.96602263656168e-05, + "loss": 0.48, + "step": 6154 + }, + { + "epoch": 0.16900054914881932, + "grad_norm": 0.38219207525253296, + "learning_rate": 1.966011473052791e-05, + "loss": 0.5062, + "step": 6155 + }, + { + "epoch": 0.16902800658978584, + "grad_norm": 0.3424827456474304, + "learning_rate": 1.966000307741982e-05, + "loss": 0.5056, + "step": 6156 + }, + { + "epoch": 0.16905546403075233, + "grad_norm": 0.5753840804100037, + "learning_rate": 1.9659891406292743e-05, + "loss": 0.5576, + "step": 6157 + }, + { + "epoch": 0.16908292147171883, + "grad_norm": 0.6950632333755493, + "learning_rate": 1.9659779717146886e-05, + "loss": 0.5298, + "step": 6158 + }, + { + "epoch": 0.16911037891268535, + "grad_norm": 0.4094807803630829, + "learning_rate": 1.9659668009982456e-05, + "loss": 0.6332, + "step": 6159 + }, + { + "epoch": 0.16913783635365184, + "grad_norm": 0.3458978235721588, + "learning_rate": 1.965955628479966e-05, + "loss": 0.5028, + "step": 6160 + }, + { + "epoch": 0.16916529379461834, + "grad_norm": 0.35234832763671875, + "learning_rate": 1.965944454159871e-05, + "loss": 0.4816, + "step": 6161 + }, + { + "epoch": 0.16919275123558483, + "grad_norm": 0.3659443259239197, + "learning_rate": 1.9659332780379817e-05, + "loss": 0.5322, + "step": 6162 + }, + { + "epoch": 0.16922020867655135, + "grad_norm": 0.4937681555747986, + "learning_rate": 1.9659221001143183e-05, + "loss": 0.5467, + "step": 6163 + }, + { + "epoch": 0.16924766611751785, + "grad_norm": 0.39413192868232727, + "learning_rate": 1.9659109203889017e-05, + "loss": 0.5205, + "step": 6164 + }, + { + "epoch": 0.16927512355848434, + "grad_norm": 0.33611616492271423, + "learning_rate": 1.965899738861753e-05, + "loss": 0.4527, + "step": 6165 + }, + { + "epoch": 0.16930258099945086, + "grad_norm": 0.4305313229560852, + "learning_rate": 1.965888555532893e-05, + "loss": 0.6364, + "step": 6166 + }, + { + "epoch": 0.16933003844041736, + "grad_norm": 0.37334996461868286, + "learning_rate": 1.9658773704023426e-05, + "loss": 0.5692, + "step": 6167 + }, + { + "epoch": 0.16935749588138385, + "grad_norm": 0.551084041595459, + "learning_rate": 1.965866183470123e-05, + "loss": 0.5589, + "step": 6168 + }, + { + "epoch": 0.16938495332235035, + "grad_norm": 0.41478878259658813, + "learning_rate": 1.9658549947362542e-05, + "loss": 0.587, + "step": 6169 + }, + { + "epoch": 0.16941241076331687, + "grad_norm": 0.33250170946121216, + "learning_rate": 1.9658438042007578e-05, + "loss": 0.4483, + "step": 6170 + }, + { + "epoch": 0.16943986820428336, + "grad_norm": 0.4413265287876129, + "learning_rate": 1.9658326118636545e-05, + "loss": 0.5979, + "step": 6171 + }, + { + "epoch": 0.16946732564524986, + "grad_norm": 0.3406670093536377, + "learning_rate": 1.9658214177249646e-05, + "loss": 0.557, + "step": 6172 + }, + { + "epoch": 0.16949478308621638, + "grad_norm": 0.3202119767665863, + "learning_rate": 1.9658102217847102e-05, + "loss": 0.4595, + "step": 6173 + }, + { + "epoch": 0.16952224052718287, + "grad_norm": 0.347635418176651, + "learning_rate": 1.965799024042911e-05, + "loss": 0.5725, + "step": 6174 + }, + { + "epoch": 0.16954969796814937, + "grad_norm": 0.48045438528060913, + "learning_rate": 1.9657878244995884e-05, + "loss": 0.5639, + "step": 6175 + }, + { + "epoch": 0.16957715540911586, + "grad_norm": 0.35223814845085144, + "learning_rate": 1.9657766231547634e-05, + "loss": 0.563, + "step": 6176 + }, + { + "epoch": 0.16960461285008238, + "grad_norm": 0.35095635056495667, + "learning_rate": 1.965765420008457e-05, + "loss": 0.4613, + "step": 6177 + }, + { + "epoch": 0.16963207029104888, + "grad_norm": 0.44871848821640015, + "learning_rate": 1.9657542150606897e-05, + "loss": 0.507, + "step": 6178 + }, + { + "epoch": 0.16965952773201537, + "grad_norm": 0.34832823276519775, + "learning_rate": 1.9657430083114825e-05, + "loss": 0.5234, + "step": 6179 + }, + { + "epoch": 0.1696869851729819, + "grad_norm": 0.3673684000968933, + "learning_rate": 1.9657317997608563e-05, + "loss": 0.4582, + "step": 6180 + }, + { + "epoch": 0.16971444261394839, + "grad_norm": 0.3486049771308899, + "learning_rate": 1.965720589408832e-05, + "loss": 0.6024, + "step": 6181 + }, + { + "epoch": 0.16974190005491488, + "grad_norm": 0.3625336289405823, + "learning_rate": 1.965709377255431e-05, + "loss": 0.5002, + "step": 6182 + }, + { + "epoch": 0.16976935749588137, + "grad_norm": 0.4737614393234253, + "learning_rate": 1.9656981633006732e-05, + "loss": 0.5246, + "step": 6183 + }, + { + "epoch": 0.1697968149368479, + "grad_norm": 0.46187612414360046, + "learning_rate": 1.9656869475445807e-05, + "loss": 0.5963, + "step": 6184 + }, + { + "epoch": 0.1698242723778144, + "grad_norm": 0.37999269366264343, + "learning_rate": 1.965675729987173e-05, + "loss": 0.5828, + "step": 6185 + }, + { + "epoch": 0.16985172981878088, + "grad_norm": 0.3610737919807434, + "learning_rate": 1.9656645106284726e-05, + "loss": 0.5263, + "step": 6186 + }, + { + "epoch": 0.1698791872597474, + "grad_norm": 0.41093388199806213, + "learning_rate": 1.9656532894684996e-05, + "loss": 0.6023, + "step": 6187 + }, + { + "epoch": 0.1699066447007139, + "grad_norm": 0.36026233434677124, + "learning_rate": 1.965642066507275e-05, + "loss": 0.519, + "step": 6188 + }, + { + "epoch": 0.1699341021416804, + "grad_norm": 0.3770429790019989, + "learning_rate": 1.9656308417448193e-05, + "loss": 0.5038, + "step": 6189 + }, + { + "epoch": 0.1699615595826469, + "grad_norm": 0.45526590943336487, + "learning_rate": 1.9656196151811546e-05, + "loss": 0.5307, + "step": 6190 + }, + { + "epoch": 0.1699890170236134, + "grad_norm": 0.33381035923957825, + "learning_rate": 1.9656083868163004e-05, + "loss": 0.5565, + "step": 6191 + }, + { + "epoch": 0.1700164744645799, + "grad_norm": 0.3913244307041168, + "learning_rate": 1.965597156650279e-05, + "loss": 0.571, + "step": 6192 + }, + { + "epoch": 0.1700439319055464, + "grad_norm": 0.39559149742126465, + "learning_rate": 1.96558592468311e-05, + "loss": 0.5428, + "step": 6193 + }, + { + "epoch": 0.1700713893465129, + "grad_norm": 0.44421499967575073, + "learning_rate": 1.9655746909148158e-05, + "loss": 0.5955, + "step": 6194 + }, + { + "epoch": 0.1700988467874794, + "grad_norm": 0.36310213804244995, + "learning_rate": 1.9655634553454162e-05, + "loss": 0.5284, + "step": 6195 + }, + { + "epoch": 0.1701263042284459, + "grad_norm": 0.38929590582847595, + "learning_rate": 1.9655522179749328e-05, + "loss": 0.5386, + "step": 6196 + }, + { + "epoch": 0.1701537616694124, + "grad_norm": 0.46390002965927124, + "learning_rate": 1.9655409788033863e-05, + "loss": 0.5667, + "step": 6197 + }, + { + "epoch": 0.17018121911037892, + "grad_norm": 0.4154791831970215, + "learning_rate": 1.9655297378307977e-05, + "loss": 0.5805, + "step": 6198 + }, + { + "epoch": 0.17020867655134542, + "grad_norm": 0.3305093050003052, + "learning_rate": 1.9655184950571877e-05, + "loss": 0.4995, + "step": 6199 + }, + { + "epoch": 0.1702361339923119, + "grad_norm": 0.3512718081474304, + "learning_rate": 1.965507250482578e-05, + "loss": 0.4672, + "step": 6200 + }, + { + "epoch": 0.1702635914332784, + "grad_norm": 0.3637877106666565, + "learning_rate": 1.965496004106989e-05, + "loss": 0.6636, + "step": 6201 + }, + { + "epoch": 0.17029104887424493, + "grad_norm": 0.3550313115119934, + "learning_rate": 1.9654847559304416e-05, + "loss": 0.5127, + "step": 6202 + }, + { + "epoch": 0.17031850631521142, + "grad_norm": 0.3590618371963501, + "learning_rate": 1.9654735059529573e-05, + "loss": 0.5494, + "step": 6203 + }, + { + "epoch": 0.17034596375617791, + "grad_norm": 0.3455407917499542, + "learning_rate": 1.9654622541745563e-05, + "loss": 0.5462, + "step": 6204 + }, + { + "epoch": 0.17037342119714444, + "grad_norm": 0.34545716643333435, + "learning_rate": 1.9654510005952602e-05, + "loss": 0.5027, + "step": 6205 + }, + { + "epoch": 0.17040087863811093, + "grad_norm": 0.4074101448059082, + "learning_rate": 1.96543974521509e-05, + "loss": 0.5335, + "step": 6206 + }, + { + "epoch": 0.17042833607907742, + "grad_norm": 0.3968016505241394, + "learning_rate": 1.9654284880340667e-05, + "loss": 0.5599, + "step": 6207 + }, + { + "epoch": 0.17045579352004392, + "grad_norm": 0.35486242175102234, + "learning_rate": 1.9654172290522112e-05, + "loss": 0.5689, + "step": 6208 + }, + { + "epoch": 0.17048325096101044, + "grad_norm": 0.5093222856521606, + "learning_rate": 1.965405968269544e-05, + "loss": 0.5451, + "step": 6209 + }, + { + "epoch": 0.17051070840197693, + "grad_norm": 0.3446919620037079, + "learning_rate": 1.9653947056860868e-05, + "loss": 0.5744, + "step": 6210 + }, + { + "epoch": 0.17053816584294343, + "grad_norm": 0.37866225838661194, + "learning_rate": 1.9653834413018603e-05, + "loss": 0.5609, + "step": 6211 + }, + { + "epoch": 0.17056562328390995, + "grad_norm": 0.3534792959690094, + "learning_rate": 1.9653721751168854e-05, + "loss": 0.5789, + "step": 6212 + }, + { + "epoch": 0.17059308072487644, + "grad_norm": 0.32640522718429565, + "learning_rate": 1.9653609071311835e-05, + "loss": 0.5412, + "step": 6213 + }, + { + "epoch": 0.17062053816584294, + "grad_norm": 0.4173998534679413, + "learning_rate": 1.9653496373447756e-05, + "loss": 0.5957, + "step": 6214 + }, + { + "epoch": 0.17064799560680943, + "grad_norm": 0.3654249906539917, + "learning_rate": 1.965338365757682e-05, + "loss": 0.4975, + "step": 6215 + }, + { + "epoch": 0.17067545304777595, + "grad_norm": 0.36075863242149353, + "learning_rate": 1.9653270923699246e-05, + "loss": 0.5592, + "step": 6216 + }, + { + "epoch": 0.17070291048874245, + "grad_norm": 0.34971916675567627, + "learning_rate": 1.965315817181524e-05, + "loss": 0.5862, + "step": 6217 + }, + { + "epoch": 0.17073036792970894, + "grad_norm": 0.35275882482528687, + "learning_rate": 1.9653045401925014e-05, + "loss": 0.5106, + "step": 6218 + }, + { + "epoch": 0.17075782537067546, + "grad_norm": 0.41081342101097107, + "learning_rate": 1.9652932614028777e-05, + "loss": 0.6205, + "step": 6219 + }, + { + "epoch": 0.17078528281164196, + "grad_norm": 0.353102445602417, + "learning_rate": 1.965281980812674e-05, + "loss": 0.5025, + "step": 6220 + }, + { + "epoch": 0.17081274025260845, + "grad_norm": 0.36874812841415405, + "learning_rate": 1.9652706984219114e-05, + "loss": 0.5877, + "step": 6221 + }, + { + "epoch": 0.17084019769357495, + "grad_norm": 0.3708045780658722, + "learning_rate": 1.9652594142306105e-05, + "loss": 0.5235, + "step": 6222 + }, + { + "epoch": 0.17086765513454147, + "grad_norm": 0.3688451647758484, + "learning_rate": 1.965248128238793e-05, + "loss": 0.4493, + "step": 6223 + }, + { + "epoch": 0.17089511257550796, + "grad_norm": 0.354002445936203, + "learning_rate": 1.9652368404464796e-05, + "loss": 0.5689, + "step": 6224 + }, + { + "epoch": 0.17092257001647446, + "grad_norm": 0.3699571490287781, + "learning_rate": 1.9652255508536916e-05, + "loss": 0.5365, + "step": 6225 + }, + { + "epoch": 0.17095002745744098, + "grad_norm": 0.3374769687652588, + "learning_rate": 1.9652142594604494e-05, + "loss": 0.5045, + "step": 6226 + }, + { + "epoch": 0.17097748489840747, + "grad_norm": 0.3770489990711212, + "learning_rate": 1.965202966266775e-05, + "loss": 0.5904, + "step": 6227 + }, + { + "epoch": 0.17100494233937397, + "grad_norm": 0.47822093963623047, + "learning_rate": 1.965191671272689e-05, + "loss": 0.4312, + "step": 6228 + }, + { + "epoch": 0.17103239978034046, + "grad_norm": 0.33589649200439453, + "learning_rate": 1.9651803744782124e-05, + "loss": 0.5141, + "step": 6229 + }, + { + "epoch": 0.17105985722130698, + "grad_norm": 0.40495795011520386, + "learning_rate": 1.9651690758833662e-05, + "loss": 0.5202, + "step": 6230 + }, + { + "epoch": 0.17108731466227348, + "grad_norm": 0.39113038778305054, + "learning_rate": 1.9651577754881716e-05, + "loss": 0.5744, + "step": 6231 + }, + { + "epoch": 0.17111477210323997, + "grad_norm": 0.3650939166545868, + "learning_rate": 1.96514647329265e-05, + "loss": 0.5544, + "step": 6232 + }, + { + "epoch": 0.1711422295442065, + "grad_norm": 0.3657686114311218, + "learning_rate": 1.965135169296822e-05, + "loss": 0.5311, + "step": 6233 + }, + { + "epoch": 0.171169686985173, + "grad_norm": 0.3627791702747345, + "learning_rate": 1.965123863500709e-05, + "loss": 0.5818, + "step": 6234 + }, + { + "epoch": 0.17119714442613948, + "grad_norm": 0.3896605670452118, + "learning_rate": 1.9651125559043315e-05, + "loss": 0.5759, + "step": 6235 + }, + { + "epoch": 0.17122460186710597, + "grad_norm": 0.4310438632965088, + "learning_rate": 1.9651012465077116e-05, + "loss": 0.5319, + "step": 6236 + }, + { + "epoch": 0.1712520593080725, + "grad_norm": 0.37484776973724365, + "learning_rate": 1.9650899353108695e-05, + "loss": 0.5879, + "step": 6237 + }, + { + "epoch": 0.171279516749039, + "grad_norm": 0.4604226052761078, + "learning_rate": 1.965078622313827e-05, + "loss": 0.5692, + "step": 6238 + }, + { + "epoch": 0.17130697419000548, + "grad_norm": 0.43739521503448486, + "learning_rate": 1.9650673075166047e-05, + "loss": 0.5234, + "step": 6239 + }, + { + "epoch": 0.171334431630972, + "grad_norm": 0.3478800058364868, + "learning_rate": 1.965055990919224e-05, + "loss": 0.4916, + "step": 6240 + }, + { + "epoch": 0.1713618890719385, + "grad_norm": 0.3420684039592743, + "learning_rate": 1.9650446725217056e-05, + "loss": 0.582, + "step": 6241 + }, + { + "epoch": 0.171389346512905, + "grad_norm": 0.3777155578136444, + "learning_rate": 1.965033352324071e-05, + "loss": 0.537, + "step": 6242 + }, + { + "epoch": 0.1714168039538715, + "grad_norm": 0.3580748736858368, + "learning_rate": 1.965022030326341e-05, + "loss": 0.5458, + "step": 6243 + }, + { + "epoch": 0.171444261394838, + "grad_norm": 0.39381274580955505, + "learning_rate": 1.9650107065285372e-05, + "loss": 0.5343, + "step": 6244 + }, + { + "epoch": 0.1714717188358045, + "grad_norm": 0.3732830584049225, + "learning_rate": 1.9649993809306802e-05, + "loss": 0.5391, + "step": 6245 + }, + { + "epoch": 0.171499176276771, + "grad_norm": 0.3884325325489044, + "learning_rate": 1.9649880535327918e-05, + "loss": 0.6073, + "step": 6246 + }, + { + "epoch": 0.17152663371773752, + "grad_norm": 0.3908616006374359, + "learning_rate": 1.9649767243348923e-05, + "loss": 0.6546, + "step": 6247 + }, + { + "epoch": 0.17155409115870401, + "grad_norm": 0.42377960681915283, + "learning_rate": 1.9649653933370034e-05, + "loss": 0.6062, + "step": 6248 + }, + { + "epoch": 0.1715815485996705, + "grad_norm": 0.33391210436820984, + "learning_rate": 1.964954060539146e-05, + "loss": 0.5028, + "step": 6249 + }, + { + "epoch": 0.171609006040637, + "grad_norm": 0.45648688077926636, + "learning_rate": 1.964942725941341e-05, + "loss": 0.5591, + "step": 6250 + }, + { + "epoch": 0.17163646348160352, + "grad_norm": 0.3206802308559418, + "learning_rate": 1.96493138954361e-05, + "loss": 0.5494, + "step": 6251 + }, + { + "epoch": 0.17166392092257002, + "grad_norm": 0.40880995988845825, + "learning_rate": 1.9649200513459743e-05, + "loss": 0.6102, + "step": 6252 + }, + { + "epoch": 0.1716913783635365, + "grad_norm": 0.34767529368400574, + "learning_rate": 1.9649087113484545e-05, + "loss": 0.4461, + "step": 6253 + }, + { + "epoch": 0.17171883580450303, + "grad_norm": 0.3832607865333557, + "learning_rate": 1.964897369551072e-05, + "loss": 0.518, + "step": 6254 + }, + { + "epoch": 0.17174629324546953, + "grad_norm": 0.38373807072639465, + "learning_rate": 1.964886025953848e-05, + "loss": 0.6017, + "step": 6255 + }, + { + "epoch": 0.17177375068643602, + "grad_norm": 0.37328845262527466, + "learning_rate": 1.9648746805568035e-05, + "loss": 0.5061, + "step": 6256 + }, + { + "epoch": 0.17180120812740252, + "grad_norm": 0.3716537058353424, + "learning_rate": 1.96486333335996e-05, + "loss": 0.52, + "step": 6257 + }, + { + "epoch": 0.17182866556836904, + "grad_norm": 0.371990829706192, + "learning_rate": 1.9648519843633383e-05, + "loss": 0.565, + "step": 6258 + }, + { + "epoch": 0.17185612300933553, + "grad_norm": 0.3506389260292053, + "learning_rate": 1.9648406335669595e-05, + "loss": 0.5071, + "step": 6259 + }, + { + "epoch": 0.17188358045030203, + "grad_norm": 0.3233608603477478, + "learning_rate": 1.9648292809708455e-05, + "loss": 0.5579, + "step": 6260 + }, + { + "epoch": 0.17191103789126852, + "grad_norm": 0.36530137062072754, + "learning_rate": 1.9648179265750165e-05, + "loss": 0.5947, + "step": 6261 + }, + { + "epoch": 0.17193849533223504, + "grad_norm": 0.3990533649921417, + "learning_rate": 1.964806570379494e-05, + "loss": 0.5365, + "step": 6262 + }, + { + "epoch": 0.17196595277320154, + "grad_norm": 0.3801145851612091, + "learning_rate": 1.9647952123842998e-05, + "loss": 0.4898, + "step": 6263 + }, + { + "epoch": 0.17199341021416803, + "grad_norm": 0.4027240574359894, + "learning_rate": 1.9647838525894543e-05, + "loss": 0.5881, + "step": 6264 + }, + { + "epoch": 0.17202086765513455, + "grad_norm": 0.3803488314151764, + "learning_rate": 1.964772490994979e-05, + "loss": 0.5948, + "step": 6265 + }, + { + "epoch": 0.17204832509610105, + "grad_norm": 0.8180059790611267, + "learning_rate": 1.964761127600895e-05, + "loss": 0.5254, + "step": 6266 + }, + { + "epoch": 0.17207578253706754, + "grad_norm": 0.40044984221458435, + "learning_rate": 1.964749762407224e-05, + "loss": 0.6298, + "step": 6267 + }, + { + "epoch": 0.17210323997803403, + "grad_norm": 0.36538684368133545, + "learning_rate": 1.964738395413986e-05, + "loss": 0.5953, + "step": 6268 + }, + { + "epoch": 0.17213069741900056, + "grad_norm": 0.34932634234428406, + "learning_rate": 1.964727026621204e-05, + "loss": 0.5169, + "step": 6269 + }, + { + "epoch": 0.17215815485996705, + "grad_norm": 0.33127936720848083, + "learning_rate": 1.9647156560288974e-05, + "loss": 0.4665, + "step": 6270 + }, + { + "epoch": 0.17218561230093354, + "grad_norm": 0.4179372489452362, + "learning_rate": 1.9647042836370888e-05, + "loss": 0.67, + "step": 6271 + }, + { + "epoch": 0.17221306974190007, + "grad_norm": 0.3473834991455078, + "learning_rate": 1.9646929094457983e-05, + "loss": 0.5288, + "step": 6272 + }, + { + "epoch": 0.17224052718286656, + "grad_norm": 0.3818121552467346, + "learning_rate": 1.964681533455048e-05, + "loss": 0.5648, + "step": 6273 + }, + { + "epoch": 0.17226798462383305, + "grad_norm": 0.3827752470970154, + "learning_rate": 1.9646701556648585e-05, + "loss": 0.4941, + "step": 6274 + }, + { + "epoch": 0.17229544206479955, + "grad_norm": 0.3540970981121063, + "learning_rate": 1.9646587760752515e-05, + "loss": 0.5288, + "step": 6275 + }, + { + "epoch": 0.17232289950576607, + "grad_norm": 0.38107189536094666, + "learning_rate": 1.9646473946862477e-05, + "loss": 0.4309, + "step": 6276 + }, + { + "epoch": 0.17235035694673256, + "grad_norm": 0.3849301338195801, + "learning_rate": 1.9646360114978688e-05, + "loss": 0.5415, + "step": 6277 + }, + { + "epoch": 0.17237781438769906, + "grad_norm": 0.40841755270957947, + "learning_rate": 1.9646246265101357e-05, + "loss": 0.5745, + "step": 6278 + }, + { + "epoch": 0.17240527182866558, + "grad_norm": 0.4224247634410858, + "learning_rate": 1.96461323972307e-05, + "loss": 0.573, + "step": 6279 + }, + { + "epoch": 0.17243272926963207, + "grad_norm": 0.3294138014316559, + "learning_rate": 1.9646018511366923e-05, + "loss": 0.5042, + "step": 6280 + }, + { + "epoch": 0.17246018671059857, + "grad_norm": 0.3309253454208374, + "learning_rate": 1.964590460751025e-05, + "loss": 0.4453, + "step": 6281 + }, + { + "epoch": 0.17248764415156506, + "grad_norm": 0.41621649265289307, + "learning_rate": 1.9645790685660882e-05, + "loss": 0.5913, + "step": 6282 + }, + { + "epoch": 0.17251510159253158, + "grad_norm": 0.3749745190143585, + "learning_rate": 1.9645676745819037e-05, + "loss": 0.5453, + "step": 6283 + }, + { + "epoch": 0.17254255903349808, + "grad_norm": 0.3563118278980255, + "learning_rate": 1.9645562787984925e-05, + "loss": 0.4887, + "step": 6284 + }, + { + "epoch": 0.17257001647446457, + "grad_norm": 0.40276679396629333, + "learning_rate": 1.964544881215876e-05, + "loss": 0.5948, + "step": 6285 + }, + { + "epoch": 0.1725974739154311, + "grad_norm": 0.36393219232559204, + "learning_rate": 1.964533481834076e-05, + "loss": 0.5534, + "step": 6286 + }, + { + "epoch": 0.1726249313563976, + "grad_norm": 0.3739548325538635, + "learning_rate": 1.9645220806531126e-05, + "loss": 0.5336, + "step": 6287 + }, + { + "epoch": 0.17265238879736408, + "grad_norm": 0.3317618668079376, + "learning_rate": 1.9645106776730076e-05, + "loss": 0.432, + "step": 6288 + }, + { + "epoch": 0.17267984623833058, + "grad_norm": 0.34747225046157837, + "learning_rate": 1.9644992728937825e-05, + "loss": 0.5676, + "step": 6289 + }, + { + "epoch": 0.1727073036792971, + "grad_norm": 0.3953397572040558, + "learning_rate": 1.964487866315459e-05, + "loss": 0.6594, + "step": 6290 + }, + { + "epoch": 0.1727347611202636, + "grad_norm": 0.3388277292251587, + "learning_rate": 1.964476457938057e-05, + "loss": 0.4777, + "step": 6291 + }, + { + "epoch": 0.17276221856123009, + "grad_norm": 0.734465479850769, + "learning_rate": 1.964465047761599e-05, + "loss": 0.637, + "step": 6292 + }, + { + "epoch": 0.1727896760021966, + "grad_norm": 0.44092825055122375, + "learning_rate": 1.9644536357861056e-05, + "loss": 0.6321, + "step": 6293 + }, + { + "epoch": 0.1728171334431631, + "grad_norm": 0.477285772562027, + "learning_rate": 1.9644422220115983e-05, + "loss": 0.6205, + "step": 6294 + }, + { + "epoch": 0.1728445908841296, + "grad_norm": 0.3900772035121918, + "learning_rate": 1.964430806438099e-05, + "loss": 0.4534, + "step": 6295 + }, + { + "epoch": 0.1728720483250961, + "grad_norm": 0.434730589389801, + "learning_rate": 1.9644193890656277e-05, + "loss": 0.4242, + "step": 6296 + }, + { + "epoch": 0.1728995057660626, + "grad_norm": 0.3868487477302551, + "learning_rate": 1.964407969894207e-05, + "loss": 0.49, + "step": 6297 + }, + { + "epoch": 0.1729269632070291, + "grad_norm": 0.4060467481613159, + "learning_rate": 1.9643965489238574e-05, + "loss": 0.6593, + "step": 6298 + }, + { + "epoch": 0.1729544206479956, + "grad_norm": 0.36478641629219055, + "learning_rate": 1.9643851261546006e-05, + "loss": 0.5267, + "step": 6299 + }, + { + "epoch": 0.17298187808896212, + "grad_norm": 0.31271886825561523, + "learning_rate": 1.9643737015864576e-05, + "loss": 0.507, + "step": 6300 + }, + { + "epoch": 0.17300933552992862, + "grad_norm": 0.36355090141296387, + "learning_rate": 1.9643622752194496e-05, + "loss": 0.4224, + "step": 6301 + }, + { + "epoch": 0.1730367929708951, + "grad_norm": 0.4620617926120758, + "learning_rate": 1.9643508470535985e-05, + "loss": 0.6253, + "step": 6302 + }, + { + "epoch": 0.1730642504118616, + "grad_norm": 0.39504072070121765, + "learning_rate": 1.9643394170889255e-05, + "loss": 0.5086, + "step": 6303 + }, + { + "epoch": 0.17309170785282812, + "grad_norm": 0.3610641658306122, + "learning_rate": 1.9643279853254514e-05, + "loss": 0.5337, + "step": 6304 + }, + { + "epoch": 0.17311916529379462, + "grad_norm": 0.3229556977748871, + "learning_rate": 1.9643165517631978e-05, + "loss": 0.4702, + "step": 6305 + }, + { + "epoch": 0.1731466227347611, + "grad_norm": 0.34793514013290405, + "learning_rate": 1.964305116402186e-05, + "loss": 0.5029, + "step": 6306 + }, + { + "epoch": 0.17317408017572763, + "grad_norm": 0.34425070881843567, + "learning_rate": 1.9642936792424377e-05, + "loss": 0.5196, + "step": 6307 + }, + { + "epoch": 0.17320153761669413, + "grad_norm": 0.36562421917915344, + "learning_rate": 1.9642822402839737e-05, + "loss": 0.5748, + "step": 6308 + }, + { + "epoch": 0.17322899505766062, + "grad_norm": 0.37216031551361084, + "learning_rate": 1.9642707995268155e-05, + "loss": 0.526, + "step": 6309 + }, + { + "epoch": 0.17325645249862712, + "grad_norm": 0.33419615030288696, + "learning_rate": 1.9642593569709845e-05, + "loss": 0.5306, + "step": 6310 + }, + { + "epoch": 0.17328390993959364, + "grad_norm": 0.41445431113243103, + "learning_rate": 1.9642479126165022e-05, + "loss": 0.513, + "step": 6311 + }, + { + "epoch": 0.17331136738056013, + "grad_norm": 0.38927289843559265, + "learning_rate": 1.9642364664633897e-05, + "loss": 0.4936, + "step": 6312 + }, + { + "epoch": 0.17333882482152663, + "grad_norm": 0.4009464681148529, + "learning_rate": 1.9642250185116686e-05, + "loss": 0.5966, + "step": 6313 + }, + { + "epoch": 0.17336628226249315, + "grad_norm": 0.3917927145957947, + "learning_rate": 1.96421356876136e-05, + "loss": 0.5538, + "step": 6314 + }, + { + "epoch": 0.17339373970345964, + "grad_norm": 0.3748290240764618, + "learning_rate": 1.964202117212485e-05, + "loss": 0.5734, + "step": 6315 + }, + { + "epoch": 0.17342119714442614, + "grad_norm": 0.458927720785141, + "learning_rate": 1.9641906638650657e-05, + "loss": 0.6385, + "step": 6316 + }, + { + "epoch": 0.17344865458539263, + "grad_norm": 0.3693960905075073, + "learning_rate": 1.964179208719123e-05, + "loss": 0.5027, + "step": 6317 + }, + { + "epoch": 0.17347611202635915, + "grad_norm": 0.4389669895172119, + "learning_rate": 1.9641677517746784e-05, + "loss": 0.5542, + "step": 6318 + }, + { + "epoch": 0.17350356946732565, + "grad_norm": 0.3174303472042084, + "learning_rate": 1.9641562930317534e-05, + "loss": 0.43, + "step": 6319 + }, + { + "epoch": 0.17353102690829214, + "grad_norm": 0.39448902010917664, + "learning_rate": 1.9641448324903688e-05, + "loss": 0.5343, + "step": 6320 + }, + { + "epoch": 0.17355848434925866, + "grad_norm": 0.41735923290252686, + "learning_rate": 1.9641333701505465e-05, + "loss": 0.4641, + "step": 6321 + }, + { + "epoch": 0.17358594179022516, + "grad_norm": 0.4339692294597626, + "learning_rate": 1.9641219060123076e-05, + "loss": 0.542, + "step": 6322 + }, + { + "epoch": 0.17361339923119165, + "grad_norm": 0.39699801802635193, + "learning_rate": 1.964110440075674e-05, + "loss": 0.4867, + "step": 6323 + }, + { + "epoch": 0.17364085667215814, + "grad_norm": 0.3425326645374298, + "learning_rate": 1.9640989723406666e-05, + "loss": 0.5199, + "step": 6324 + }, + { + "epoch": 0.17366831411312467, + "grad_norm": 0.40067920088768005, + "learning_rate": 1.964087502807307e-05, + "loss": 0.508, + "step": 6325 + }, + { + "epoch": 0.17369577155409116, + "grad_norm": 0.34886229038238525, + "learning_rate": 1.964076031475616e-05, + "loss": 0.5465, + "step": 6326 + }, + { + "epoch": 0.17372322899505765, + "grad_norm": 0.32790178060531616, + "learning_rate": 1.9640645583456158e-05, + "loss": 0.4505, + "step": 6327 + }, + { + "epoch": 0.17375068643602415, + "grad_norm": 0.4178152084350586, + "learning_rate": 1.9640530834173278e-05, + "loss": 0.4506, + "step": 6328 + }, + { + "epoch": 0.17377814387699067, + "grad_norm": 0.3231436610221863, + "learning_rate": 1.964041606690773e-05, + "loss": 0.5097, + "step": 6329 + }, + { + "epoch": 0.17380560131795716, + "grad_norm": 0.34856337308883667, + "learning_rate": 1.9640301281659724e-05, + "loss": 0.5981, + "step": 6330 + }, + { + "epoch": 0.17383305875892366, + "grad_norm": 0.6018600463867188, + "learning_rate": 1.9640186478429485e-05, + "loss": 0.4509, + "step": 6331 + }, + { + "epoch": 0.17386051619989018, + "grad_norm": 0.33788609504699707, + "learning_rate": 1.964007165721722e-05, + "loss": 0.4822, + "step": 6332 + }, + { + "epoch": 0.17388797364085667, + "grad_norm": 0.3100905418395996, + "learning_rate": 1.9639956818023144e-05, + "loss": 0.4975, + "step": 6333 + }, + { + "epoch": 0.17391543108182317, + "grad_norm": 0.3466983735561371, + "learning_rate": 1.963984196084747e-05, + "loss": 0.5216, + "step": 6334 + }, + { + "epoch": 0.17394288852278966, + "grad_norm": 0.3879547119140625, + "learning_rate": 1.9639727085690415e-05, + "loss": 0.5636, + "step": 6335 + }, + { + "epoch": 0.17397034596375618, + "grad_norm": 0.35947099328041077, + "learning_rate": 1.9639612192552192e-05, + "loss": 0.5285, + "step": 6336 + }, + { + "epoch": 0.17399780340472268, + "grad_norm": 0.4750586450099945, + "learning_rate": 1.963949728143302e-05, + "loss": 0.6105, + "step": 6337 + }, + { + "epoch": 0.17402526084568917, + "grad_norm": 0.4179497957229614, + "learning_rate": 1.9639382352333107e-05, + "loss": 0.5225, + "step": 6338 + }, + { + "epoch": 0.1740527182866557, + "grad_norm": 0.4021699130535126, + "learning_rate": 1.9639267405252668e-05, + "loss": 0.6459, + "step": 6339 + }, + { + "epoch": 0.1740801757276222, + "grad_norm": 0.3746078312397003, + "learning_rate": 1.963915244019192e-05, + "loss": 0.4356, + "step": 6340 + }, + { + "epoch": 0.17410763316858868, + "grad_norm": 0.3728315532207489, + "learning_rate": 1.9639037457151072e-05, + "loss": 0.5758, + "step": 6341 + }, + { + "epoch": 0.17413509060955518, + "grad_norm": 0.327722430229187, + "learning_rate": 1.963892245613035e-05, + "loss": 0.4156, + "step": 6342 + }, + { + "epoch": 0.1741625480505217, + "grad_norm": 0.4068165719509125, + "learning_rate": 1.9638807437129955e-05, + "loss": 0.5343, + "step": 6343 + }, + { + "epoch": 0.1741900054914882, + "grad_norm": 0.35643288493156433, + "learning_rate": 1.963869240015011e-05, + "loss": 0.5954, + "step": 6344 + }, + { + "epoch": 0.1742174629324547, + "grad_norm": 0.3351595401763916, + "learning_rate": 1.9638577345191028e-05, + "loss": 0.4953, + "step": 6345 + }, + { + "epoch": 0.1742449203734212, + "grad_norm": 0.34982210397720337, + "learning_rate": 1.963846227225292e-05, + "loss": 0.5311, + "step": 6346 + }, + { + "epoch": 0.1742723778143877, + "grad_norm": 0.36710089445114136, + "learning_rate": 1.9638347181336008e-05, + "loss": 0.5392, + "step": 6347 + }, + { + "epoch": 0.1742998352553542, + "grad_norm": 0.34796142578125, + "learning_rate": 1.9638232072440503e-05, + "loss": 0.5725, + "step": 6348 + }, + { + "epoch": 0.1743272926963207, + "grad_norm": 0.36021822690963745, + "learning_rate": 1.9638116945566614e-05, + "loss": 0.553, + "step": 6349 + }, + { + "epoch": 0.1743547501372872, + "grad_norm": 0.36497893929481506, + "learning_rate": 1.9638001800714563e-05, + "loss": 0.5909, + "step": 6350 + }, + { + "epoch": 0.1743822075782537, + "grad_norm": 0.3209497332572937, + "learning_rate": 1.9637886637884563e-05, + "loss": 0.4572, + "step": 6351 + }, + { + "epoch": 0.1744096650192202, + "grad_norm": 0.3253692090511322, + "learning_rate": 1.9637771457076828e-05, + "loss": 0.5359, + "step": 6352 + }, + { + "epoch": 0.17443712246018672, + "grad_norm": 0.34648871421813965, + "learning_rate": 1.9637656258291574e-05, + "loss": 0.4315, + "step": 6353 + }, + { + "epoch": 0.17446457990115322, + "grad_norm": 0.3678458333015442, + "learning_rate": 1.9637541041529014e-05, + "loss": 0.5235, + "step": 6354 + }, + { + "epoch": 0.1744920373421197, + "grad_norm": 0.342326819896698, + "learning_rate": 1.9637425806789366e-05, + "loss": 0.5383, + "step": 6355 + }, + { + "epoch": 0.1745194947830862, + "grad_norm": 0.3421344459056854, + "learning_rate": 1.963731055407284e-05, + "loss": 0.5501, + "step": 6356 + }, + { + "epoch": 0.17454695222405273, + "grad_norm": 0.3886779844760895, + "learning_rate": 1.9637195283379652e-05, + "loss": 0.5708, + "step": 6357 + }, + { + "epoch": 0.17457440966501922, + "grad_norm": 0.360370397567749, + "learning_rate": 1.9637079994710026e-05, + "loss": 0.484, + "step": 6358 + }, + { + "epoch": 0.17460186710598571, + "grad_norm": 0.40196460485458374, + "learning_rate": 1.9636964688064165e-05, + "loss": 0.5759, + "step": 6359 + }, + { + "epoch": 0.17462932454695224, + "grad_norm": 0.4451005458831787, + "learning_rate": 1.963684936344229e-05, + "loss": 0.5913, + "step": 6360 + }, + { + "epoch": 0.17465678198791873, + "grad_norm": 0.3576388955116272, + "learning_rate": 1.9636734020844614e-05, + "loss": 0.56, + "step": 6361 + }, + { + "epoch": 0.17468423942888522, + "grad_norm": 0.37611058354377747, + "learning_rate": 1.9636618660271354e-05, + "loss": 0.6004, + "step": 6362 + }, + { + "epoch": 0.17471169686985172, + "grad_norm": 0.5051586627960205, + "learning_rate": 1.963650328172273e-05, + "loss": 0.5222, + "step": 6363 + }, + { + "epoch": 0.17473915431081824, + "grad_norm": 0.3770478665828705, + "learning_rate": 1.9636387885198946e-05, + "loss": 0.6314, + "step": 6364 + }, + { + "epoch": 0.17476661175178473, + "grad_norm": 0.3351290225982666, + "learning_rate": 1.9636272470700224e-05, + "loss": 0.5561, + "step": 6365 + }, + { + "epoch": 0.17479406919275123, + "grad_norm": 0.3918443024158478, + "learning_rate": 1.963615703822678e-05, + "loss": 0.5063, + "step": 6366 + }, + { + "epoch": 0.17482152663371775, + "grad_norm": 0.3676370084285736, + "learning_rate": 1.9636041587778824e-05, + "loss": 0.5228, + "step": 6367 + }, + { + "epoch": 0.17484898407468424, + "grad_norm": 0.3402837812900543, + "learning_rate": 1.9635926119356575e-05, + "loss": 0.5562, + "step": 6368 + }, + { + "epoch": 0.17487644151565074, + "grad_norm": 0.39500725269317627, + "learning_rate": 1.9635810632960254e-05, + "loss": 0.5944, + "step": 6369 + }, + { + "epoch": 0.17490389895661723, + "grad_norm": 0.3503170311450958, + "learning_rate": 1.9635695128590066e-05, + "loss": 0.495, + "step": 6370 + }, + { + "epoch": 0.17493135639758375, + "grad_norm": 0.35058295726776123, + "learning_rate": 1.9635579606246232e-05, + "loss": 0.5029, + "step": 6371 + }, + { + "epoch": 0.17495881383855025, + "grad_norm": 0.40651723742485046, + "learning_rate": 1.963546406592897e-05, + "loss": 0.5571, + "step": 6372 + }, + { + "epoch": 0.17498627127951674, + "grad_norm": 0.3694835603237152, + "learning_rate": 1.9635348507638486e-05, + "loss": 0.5008, + "step": 6373 + }, + { + "epoch": 0.17501372872048326, + "grad_norm": 0.3399786949157715, + "learning_rate": 1.9635232931375005e-05, + "loss": 0.5716, + "step": 6374 + }, + { + "epoch": 0.17504118616144976, + "grad_norm": 0.39695432782173157, + "learning_rate": 1.963511733713874e-05, + "loss": 0.627, + "step": 6375 + }, + { + "epoch": 0.17506864360241625, + "grad_norm": 0.365743488073349, + "learning_rate": 1.9635001724929906e-05, + "loss": 0.6279, + "step": 6376 + }, + { + "epoch": 0.17509610104338275, + "grad_norm": 0.3500584363937378, + "learning_rate": 1.9634886094748718e-05, + "loss": 0.5325, + "step": 6377 + }, + { + "epoch": 0.17512355848434927, + "grad_norm": 0.43675854802131653, + "learning_rate": 1.9634770446595396e-05, + "loss": 0.5701, + "step": 6378 + }, + { + "epoch": 0.17515101592531576, + "grad_norm": 0.3514039218425751, + "learning_rate": 1.9634654780470148e-05, + "loss": 0.5671, + "step": 6379 + }, + { + "epoch": 0.17517847336628226, + "grad_norm": 0.3806209862232208, + "learning_rate": 1.9634539096373193e-05, + "loss": 0.5211, + "step": 6380 + }, + { + "epoch": 0.17520593080724878, + "grad_norm": 0.4500492513179779, + "learning_rate": 1.963442339430475e-05, + "loss": 0.5731, + "step": 6381 + }, + { + "epoch": 0.17523338824821527, + "grad_norm": 0.3718188405036926, + "learning_rate": 1.963430767426503e-05, + "loss": 0.5612, + "step": 6382 + }, + { + "epoch": 0.17526084568918177, + "grad_norm": 0.4168740510940552, + "learning_rate": 1.9634191936254253e-05, + "loss": 0.5218, + "step": 6383 + }, + { + "epoch": 0.17528830313014826, + "grad_norm": 0.34360915422439575, + "learning_rate": 1.9634076180272633e-05, + "loss": 0.5029, + "step": 6384 + }, + { + "epoch": 0.17531576057111478, + "grad_norm": 0.359855055809021, + "learning_rate": 1.9633960406320386e-05, + "loss": 0.5945, + "step": 6385 + }, + { + "epoch": 0.17534321801208128, + "grad_norm": 0.35300198197364807, + "learning_rate": 1.963384461439773e-05, + "loss": 0.5319, + "step": 6386 + }, + { + "epoch": 0.17537067545304777, + "grad_norm": 0.3802572190761566, + "learning_rate": 1.9633728804504874e-05, + "loss": 0.5856, + "step": 6387 + }, + { + "epoch": 0.1753981328940143, + "grad_norm": 0.39630258083343506, + "learning_rate": 1.9633612976642043e-05, + "loss": 0.5146, + "step": 6388 + }, + { + "epoch": 0.17542559033498079, + "grad_norm": 0.3693968951702118, + "learning_rate": 1.963349713080945e-05, + "loss": 0.55, + "step": 6389 + }, + { + "epoch": 0.17545304777594728, + "grad_norm": 0.4086429178714752, + "learning_rate": 1.9633381267007308e-05, + "loss": 0.578, + "step": 6390 + }, + { + "epoch": 0.17548050521691377, + "grad_norm": 0.4700150489807129, + "learning_rate": 1.9633265385235834e-05, + "loss": 0.488, + "step": 6391 + }, + { + "epoch": 0.1755079626578803, + "grad_norm": 0.3728618025779724, + "learning_rate": 1.9633149485495247e-05, + "loss": 0.5163, + "step": 6392 + }, + { + "epoch": 0.1755354200988468, + "grad_norm": 0.35823559761047363, + "learning_rate": 1.9633033567785765e-05, + "loss": 0.5552, + "step": 6393 + }, + { + "epoch": 0.17556287753981328, + "grad_norm": 0.3725329637527466, + "learning_rate": 1.9632917632107596e-05, + "loss": 0.6232, + "step": 6394 + }, + { + "epoch": 0.17559033498077978, + "grad_norm": 0.35217419266700745, + "learning_rate": 1.9632801678460965e-05, + "loss": 0.608, + "step": 6395 + }, + { + "epoch": 0.1756177924217463, + "grad_norm": 0.3340470790863037, + "learning_rate": 1.963268570684608e-05, + "loss": 0.616, + "step": 6396 + }, + { + "epoch": 0.1756452498627128, + "grad_norm": 0.32194504141807556, + "learning_rate": 1.9632569717263164e-05, + "loss": 0.4188, + "step": 6397 + }, + { + "epoch": 0.1756727073036793, + "grad_norm": 0.386080801486969, + "learning_rate": 1.9632453709712434e-05, + "loss": 0.597, + "step": 6398 + }, + { + "epoch": 0.1757001647446458, + "grad_norm": 0.38633960485458374, + "learning_rate": 1.96323376841941e-05, + "loss": 0.5767, + "step": 6399 + }, + { + "epoch": 0.1757276221856123, + "grad_norm": 0.40756142139434814, + "learning_rate": 1.9632221640708378e-05, + "loss": 0.5511, + "step": 6400 + }, + { + "epoch": 0.1757550796265788, + "grad_norm": 0.4353269934654236, + "learning_rate": 1.9632105579255497e-05, + "loss": 0.6244, + "step": 6401 + }, + { + "epoch": 0.1757825370675453, + "grad_norm": 0.4366411864757538, + "learning_rate": 1.963198949983566e-05, + "loss": 0.5699, + "step": 6402 + }, + { + "epoch": 0.1758099945085118, + "grad_norm": 0.37848639488220215, + "learning_rate": 1.9631873402449087e-05, + "loss": 0.5527, + "step": 6403 + }, + { + "epoch": 0.1758374519494783, + "grad_norm": 0.33644554018974304, + "learning_rate": 1.9631757287096e-05, + "loss": 0.539, + "step": 6404 + }, + { + "epoch": 0.1758649093904448, + "grad_norm": 0.37744849920272827, + "learning_rate": 1.963164115377661e-05, + "loss": 0.5777, + "step": 6405 + }, + { + "epoch": 0.17589236683141132, + "grad_norm": 0.40562304854393005, + "learning_rate": 1.9631525002491136e-05, + "loss": 0.5727, + "step": 6406 + }, + { + "epoch": 0.17591982427237782, + "grad_norm": 0.37823766469955444, + "learning_rate": 1.9631408833239793e-05, + "loss": 0.6612, + "step": 6407 + }, + { + "epoch": 0.1759472817133443, + "grad_norm": 0.3302900791168213, + "learning_rate": 1.9631292646022797e-05, + "loss": 0.4957, + "step": 6408 + }, + { + "epoch": 0.1759747391543108, + "grad_norm": 0.3334633409976959, + "learning_rate": 1.9631176440840368e-05, + "loss": 0.4909, + "step": 6409 + }, + { + "epoch": 0.17600219659527733, + "grad_norm": 0.38149914145469666, + "learning_rate": 1.9631060217692722e-05, + "loss": 0.6034, + "step": 6410 + }, + { + "epoch": 0.17602965403624382, + "grad_norm": 0.3516790270805359, + "learning_rate": 1.9630943976580073e-05, + "loss": 0.5003, + "step": 6411 + }, + { + "epoch": 0.17605711147721032, + "grad_norm": 0.3697787821292877, + "learning_rate": 1.9630827717502642e-05, + "loss": 0.5082, + "step": 6412 + }, + { + "epoch": 0.17608456891817684, + "grad_norm": 0.3624064028263092, + "learning_rate": 1.9630711440460638e-05, + "loss": 0.6165, + "step": 6413 + }, + { + "epoch": 0.17611202635914333, + "grad_norm": 0.3812905251979828, + "learning_rate": 1.963059514545429e-05, + "loss": 0.6276, + "step": 6414 + }, + { + "epoch": 0.17613948380010983, + "grad_norm": 0.35011351108551025, + "learning_rate": 1.9630478832483802e-05, + "loss": 0.5937, + "step": 6415 + }, + { + "epoch": 0.17616694124107632, + "grad_norm": 0.3502063751220703, + "learning_rate": 1.96303625015494e-05, + "loss": 0.5393, + "step": 6416 + }, + { + "epoch": 0.17619439868204284, + "grad_norm": 0.3670531213283539, + "learning_rate": 1.96302461526513e-05, + "loss": 0.5753, + "step": 6417 + }, + { + "epoch": 0.17622185612300933, + "grad_norm": 0.35940277576446533, + "learning_rate": 1.9630129785789717e-05, + "loss": 0.4311, + "step": 6418 + }, + { + "epoch": 0.17624931356397583, + "grad_norm": 0.3208222985267639, + "learning_rate": 1.9630013400964868e-05, + "loss": 0.5306, + "step": 6419 + }, + { + "epoch": 0.17627677100494235, + "grad_norm": 0.3396967053413391, + "learning_rate": 1.962989699817697e-05, + "loss": 0.5182, + "step": 6420 + }, + { + "epoch": 0.17630422844590884, + "grad_norm": 0.3762543797492981, + "learning_rate": 1.962978057742624e-05, + "loss": 0.471, + "step": 6421 + }, + { + "epoch": 0.17633168588687534, + "grad_norm": 0.40506893396377563, + "learning_rate": 1.9629664138712898e-05, + "loss": 0.5752, + "step": 6422 + }, + { + "epoch": 0.17635914332784183, + "grad_norm": 0.374995619058609, + "learning_rate": 1.9629547682037157e-05, + "loss": 0.6179, + "step": 6423 + }, + { + "epoch": 0.17638660076880835, + "grad_norm": 0.3484170734882355, + "learning_rate": 1.9629431207399236e-05, + "loss": 0.4731, + "step": 6424 + }, + { + "epoch": 0.17641405820977485, + "grad_norm": 0.3957699239253998, + "learning_rate": 1.9629314714799354e-05, + "loss": 0.628, + "step": 6425 + }, + { + "epoch": 0.17644151565074134, + "grad_norm": 0.36423155665397644, + "learning_rate": 1.9629198204237726e-05, + "loss": 0.5406, + "step": 6426 + }, + { + "epoch": 0.17646897309170786, + "grad_norm": 0.39159882068634033, + "learning_rate": 1.962908167571457e-05, + "loss": 0.6011, + "step": 6427 + }, + { + "epoch": 0.17649643053267436, + "grad_norm": 0.37953871488571167, + "learning_rate": 1.96289651292301e-05, + "loss": 0.6316, + "step": 6428 + }, + { + "epoch": 0.17652388797364085, + "grad_norm": 0.38535475730895996, + "learning_rate": 1.9628848564784543e-05, + "loss": 0.5554, + "step": 6429 + }, + { + "epoch": 0.17655134541460735, + "grad_norm": 0.37640494108200073, + "learning_rate": 1.9628731982378108e-05, + "loss": 0.615, + "step": 6430 + }, + { + "epoch": 0.17657880285557387, + "grad_norm": 0.41096341609954834, + "learning_rate": 1.9628615382011014e-05, + "loss": 0.4458, + "step": 6431 + }, + { + "epoch": 0.17660626029654036, + "grad_norm": 1.0131949186325073, + "learning_rate": 1.962849876368348e-05, + "loss": 0.5573, + "step": 6432 + }, + { + "epoch": 0.17663371773750686, + "grad_norm": 0.4212898910045624, + "learning_rate": 1.962838212739572e-05, + "loss": 0.5766, + "step": 6433 + }, + { + "epoch": 0.17666117517847338, + "grad_norm": 0.37767308950424194, + "learning_rate": 1.962826547314796e-05, + "loss": 0.5755, + "step": 6434 + }, + { + "epoch": 0.17668863261943987, + "grad_norm": 0.3814872205257416, + "learning_rate": 1.9628148800940407e-05, + "loss": 0.6103, + "step": 6435 + }, + { + "epoch": 0.17671609006040637, + "grad_norm": 0.3215424418449402, + "learning_rate": 1.962803211077328e-05, + "loss": 0.4802, + "step": 6436 + }, + { + "epoch": 0.17674354750137286, + "grad_norm": 0.36425426602363586, + "learning_rate": 1.9627915402646808e-05, + "loss": 0.5513, + "step": 6437 + }, + { + "epoch": 0.17677100494233938, + "grad_norm": 0.36910152435302734, + "learning_rate": 1.9627798676561197e-05, + "loss": 0.5033, + "step": 6438 + }, + { + "epoch": 0.17679846238330588, + "grad_norm": 0.5513092875480652, + "learning_rate": 1.962768193251667e-05, + "loss": 0.5381, + "step": 6439 + }, + { + "epoch": 0.17682591982427237, + "grad_norm": 0.3624698519706726, + "learning_rate": 1.9627565170513444e-05, + "loss": 0.5308, + "step": 6440 + }, + { + "epoch": 0.1768533772652389, + "grad_norm": 0.37560200691223145, + "learning_rate": 1.9627448390551736e-05, + "loss": 0.4949, + "step": 6441 + }, + { + "epoch": 0.1768808347062054, + "grad_norm": 0.3342743217945099, + "learning_rate": 1.962733159263176e-05, + "loss": 0.521, + "step": 6442 + }, + { + "epoch": 0.17690829214717188, + "grad_norm": 0.4421239495277405, + "learning_rate": 1.9627214776753742e-05, + "loss": 0.5194, + "step": 6443 + }, + { + "epoch": 0.17693574958813837, + "grad_norm": 0.37814947962760925, + "learning_rate": 1.9627097942917896e-05, + "loss": 0.5377, + "step": 6444 + }, + { + "epoch": 0.1769632070291049, + "grad_norm": 0.608311116695404, + "learning_rate": 1.9626981091124436e-05, + "loss": 0.561, + "step": 6445 + }, + { + "epoch": 0.1769906644700714, + "grad_norm": 0.33120518922805786, + "learning_rate": 1.962686422137359e-05, + "loss": 0.4672, + "step": 6446 + }, + { + "epoch": 0.17701812191103788, + "grad_norm": 0.5176143050193787, + "learning_rate": 1.9626747333665565e-05, + "loss": 0.5516, + "step": 6447 + }, + { + "epoch": 0.1770455793520044, + "grad_norm": 0.42105114459991455, + "learning_rate": 1.9626630428000583e-05, + "loss": 0.5555, + "step": 6448 + }, + { + "epoch": 0.1770730367929709, + "grad_norm": 0.3341221213340759, + "learning_rate": 1.9626513504378865e-05, + "loss": 0.5196, + "step": 6449 + }, + { + "epoch": 0.1771004942339374, + "grad_norm": 0.37322095036506653, + "learning_rate": 1.9626396562800628e-05, + "loss": 0.5776, + "step": 6450 + }, + { + "epoch": 0.1771279516749039, + "grad_norm": 0.36091160774230957, + "learning_rate": 1.9626279603266085e-05, + "loss": 0.5891, + "step": 6451 + }, + { + "epoch": 0.1771554091158704, + "grad_norm": 0.34840884804725647, + "learning_rate": 1.962616262577546e-05, + "loss": 0.5028, + "step": 6452 + }, + { + "epoch": 0.1771828665568369, + "grad_norm": 0.39045479893684387, + "learning_rate": 1.962604563032897e-05, + "loss": 0.5611, + "step": 6453 + }, + { + "epoch": 0.1772103239978034, + "grad_norm": 0.45066890120506287, + "learning_rate": 1.9625928616926836e-05, + "loss": 0.4467, + "step": 6454 + }, + { + "epoch": 0.17723778143876992, + "grad_norm": 0.3778124153614044, + "learning_rate": 1.9625811585569266e-05, + "loss": 0.5346, + "step": 6455 + }, + { + "epoch": 0.17726523887973641, + "grad_norm": 0.381182461977005, + "learning_rate": 1.962569453625649e-05, + "loss": 0.4932, + "step": 6456 + }, + { + "epoch": 0.1772926963207029, + "grad_norm": 0.3399115204811096, + "learning_rate": 1.962557746898872e-05, + "loss": 0.5037, + "step": 6457 + }, + { + "epoch": 0.1773201537616694, + "grad_norm": 0.32503873109817505, + "learning_rate": 1.9625460383766177e-05, + "loss": 0.514, + "step": 6458 + }, + { + "epoch": 0.17734761120263592, + "grad_norm": 0.358661949634552, + "learning_rate": 1.9625343280589077e-05, + "loss": 0.4876, + "step": 6459 + }, + { + "epoch": 0.17737506864360242, + "grad_norm": 0.43296414613723755, + "learning_rate": 1.962522615945764e-05, + "loss": 0.5611, + "step": 6460 + }, + { + "epoch": 0.1774025260845689, + "grad_norm": 0.4234931468963623, + "learning_rate": 1.9625109020372085e-05, + "loss": 0.5488, + "step": 6461 + }, + { + "epoch": 0.1774299835255354, + "grad_norm": 0.37605953216552734, + "learning_rate": 1.962499186333263e-05, + "loss": 0.4267, + "step": 6462 + }, + { + "epoch": 0.17745744096650193, + "grad_norm": 0.319707989692688, + "learning_rate": 1.962487468833949e-05, + "loss": 0.4843, + "step": 6463 + }, + { + "epoch": 0.17748489840746842, + "grad_norm": 0.3673328161239624, + "learning_rate": 1.962475749539289e-05, + "loss": 0.5378, + "step": 6464 + }, + { + "epoch": 0.17751235584843492, + "grad_norm": 0.3181294798851013, + "learning_rate": 1.9624640284493045e-05, + "loss": 0.4759, + "step": 6465 + }, + { + "epoch": 0.17753981328940144, + "grad_norm": 0.3667600750923157, + "learning_rate": 1.9624523055640172e-05, + "loss": 0.4326, + "step": 6466 + }, + { + "epoch": 0.17756727073036793, + "grad_norm": 0.42284926772117615, + "learning_rate": 1.9624405808834494e-05, + "loss": 0.5874, + "step": 6467 + }, + { + "epoch": 0.17759472817133443, + "grad_norm": 0.3395390212535858, + "learning_rate": 1.9624288544076226e-05, + "loss": 0.4754, + "step": 6468 + }, + { + "epoch": 0.17762218561230092, + "grad_norm": 0.3464043140411377, + "learning_rate": 1.962417126136559e-05, + "loss": 0.5304, + "step": 6469 + }, + { + "epoch": 0.17764964305326744, + "grad_norm": 0.345477819442749, + "learning_rate": 1.96240539607028e-05, + "loss": 0.5944, + "step": 6470 + }, + { + "epoch": 0.17767710049423394, + "grad_norm": 0.3609393537044525, + "learning_rate": 1.962393664208808e-05, + "loss": 0.4542, + "step": 6471 + }, + { + "epoch": 0.17770455793520043, + "grad_norm": 0.37982383370399475, + "learning_rate": 1.9623819305521645e-05, + "loss": 0.5055, + "step": 6472 + }, + { + "epoch": 0.17773201537616695, + "grad_norm": 0.341413289308548, + "learning_rate": 1.9623701951003717e-05, + "loss": 0.5869, + "step": 6473 + }, + { + "epoch": 0.17775947281713345, + "grad_norm": 0.34792789816856384, + "learning_rate": 1.9623584578534514e-05, + "loss": 0.5154, + "step": 6474 + }, + { + "epoch": 0.17778693025809994, + "grad_norm": 0.3972724378108978, + "learning_rate": 1.962346718811425e-05, + "loss": 0.5417, + "step": 6475 + }, + { + "epoch": 0.17781438769906643, + "grad_norm": 0.4011193811893463, + "learning_rate": 1.9623349779743152e-05, + "loss": 0.5466, + "step": 6476 + }, + { + "epoch": 0.17784184514003296, + "grad_norm": 0.34570643305778503, + "learning_rate": 1.9623232353421434e-05, + "loss": 0.5971, + "step": 6477 + }, + { + "epoch": 0.17786930258099945, + "grad_norm": 0.361625999212265, + "learning_rate": 1.9623114909149316e-05, + "loss": 0.5217, + "step": 6478 + }, + { + "epoch": 0.17789676002196594, + "grad_norm": 0.4410037398338318, + "learning_rate": 1.962299744692702e-05, + "loss": 0.5999, + "step": 6479 + }, + { + "epoch": 0.17792421746293247, + "grad_norm": 0.3798205256462097, + "learning_rate": 1.962287996675476e-05, + "loss": 0.5198, + "step": 6480 + }, + { + "epoch": 0.17795167490389896, + "grad_norm": 0.4130730628967285, + "learning_rate": 1.962276246863276e-05, + "loss": 0.5762, + "step": 6481 + }, + { + "epoch": 0.17797913234486545, + "grad_norm": 0.37875789403915405, + "learning_rate": 1.9622644952561233e-05, + "loss": 0.5641, + "step": 6482 + }, + { + "epoch": 0.17800658978583195, + "grad_norm": 0.4934975802898407, + "learning_rate": 1.9622527418540404e-05, + "loss": 0.5819, + "step": 6483 + }, + { + "epoch": 0.17803404722679847, + "grad_norm": 0.36091047525405884, + "learning_rate": 1.9622409866570492e-05, + "loss": 0.5, + "step": 6484 + }, + { + "epoch": 0.17806150466776496, + "grad_norm": 0.33989667892456055, + "learning_rate": 1.962229229665171e-05, + "loss": 0.5643, + "step": 6485 + }, + { + "epoch": 0.17808896210873146, + "grad_norm": 0.3599645793437958, + "learning_rate": 1.9622174708784284e-05, + "loss": 0.5775, + "step": 6486 + }, + { + "epoch": 0.17811641954969798, + "grad_norm": 0.7547571063041687, + "learning_rate": 1.9622057102968434e-05, + "loss": 0.5833, + "step": 6487 + }, + { + "epoch": 0.17814387699066447, + "grad_norm": 0.3046639561653137, + "learning_rate": 1.9621939479204373e-05, + "loss": 0.549, + "step": 6488 + }, + { + "epoch": 0.17817133443163097, + "grad_norm": 0.32650133967399597, + "learning_rate": 1.9621821837492324e-05, + "loss": 0.4548, + "step": 6489 + }, + { + "epoch": 0.17819879187259746, + "grad_norm": 0.3586108088493347, + "learning_rate": 1.9621704177832507e-05, + "loss": 0.5622, + "step": 6490 + }, + { + "epoch": 0.17822624931356398, + "grad_norm": 0.36318275332450867, + "learning_rate": 1.9621586500225143e-05, + "loss": 0.4764, + "step": 6491 + }, + { + "epoch": 0.17825370675453048, + "grad_norm": 0.4827343225479126, + "learning_rate": 1.9621468804670445e-05, + "loss": 0.5939, + "step": 6492 + }, + { + "epoch": 0.17828116419549697, + "grad_norm": 0.39023643732070923, + "learning_rate": 1.962135109116864e-05, + "loss": 0.512, + "step": 6493 + }, + { + "epoch": 0.1783086216364635, + "grad_norm": 0.3939274549484253, + "learning_rate": 1.9621233359719946e-05, + "loss": 0.5771, + "step": 6494 + }, + { + "epoch": 0.17833607907743, + "grad_norm": 0.36213183403015137, + "learning_rate": 1.9621115610324578e-05, + "loss": 0.4758, + "step": 6495 + }, + { + "epoch": 0.17836353651839648, + "grad_norm": 0.3655412495136261, + "learning_rate": 1.9620997842982757e-05, + "loss": 0.5309, + "step": 6496 + }, + { + "epoch": 0.17839099395936298, + "grad_norm": 0.36725351214408875, + "learning_rate": 1.962088005769471e-05, + "loss": 0.5387, + "step": 6497 + }, + { + "epoch": 0.1784184514003295, + "grad_norm": 0.3786783516407013, + "learning_rate": 1.9620762254460647e-05, + "loss": 0.4109, + "step": 6498 + }, + { + "epoch": 0.178445908841296, + "grad_norm": 0.3747915029525757, + "learning_rate": 1.9620644433280792e-05, + "loss": 0.5941, + "step": 6499 + }, + { + "epoch": 0.17847336628226249, + "grad_norm": 0.3261842131614685, + "learning_rate": 1.9620526594155365e-05, + "loss": 0.5169, + "step": 6500 + }, + { + "epoch": 0.178500823723229, + "grad_norm": 0.37986063957214355, + "learning_rate": 1.9620408737084586e-05, + "loss": 0.6116, + "step": 6501 + }, + { + "epoch": 0.1785282811641955, + "grad_norm": 0.4959363341331482, + "learning_rate": 1.9620290862068675e-05, + "loss": 0.5989, + "step": 6502 + }, + { + "epoch": 0.178555738605162, + "grad_norm": 0.32684022188186646, + "learning_rate": 1.962017296910785e-05, + "loss": 0.4652, + "step": 6503 + }, + { + "epoch": 0.1785831960461285, + "grad_norm": 0.2913042902946472, + "learning_rate": 1.962005505820233e-05, + "loss": 0.4077, + "step": 6504 + }, + { + "epoch": 0.178610653487095, + "grad_norm": 0.34130430221557617, + "learning_rate": 1.961993712935234e-05, + "loss": 0.4612, + "step": 6505 + }, + { + "epoch": 0.1786381109280615, + "grad_norm": 0.3453182876110077, + "learning_rate": 1.9619819182558094e-05, + "loss": 0.4867, + "step": 6506 + }, + { + "epoch": 0.178665568369028, + "grad_norm": 0.3485899567604065, + "learning_rate": 1.9619701217819815e-05, + "loss": 0.4725, + "step": 6507 + }, + { + "epoch": 0.17869302580999452, + "grad_norm": 0.3775183856487274, + "learning_rate": 1.9619583235137726e-05, + "loss": 0.5559, + "step": 6508 + }, + { + "epoch": 0.17872048325096102, + "grad_norm": 0.37497255206108093, + "learning_rate": 1.9619465234512044e-05, + "loss": 0.5232, + "step": 6509 + }, + { + "epoch": 0.1787479406919275, + "grad_norm": 0.3948402404785156, + "learning_rate": 1.9619347215942985e-05, + "loss": 0.5916, + "step": 6510 + }, + { + "epoch": 0.178775398132894, + "grad_norm": 0.37442898750305176, + "learning_rate": 1.9619229179430777e-05, + "loss": 0.4675, + "step": 6511 + }, + { + "epoch": 0.17880285557386053, + "grad_norm": 0.34123048186302185, + "learning_rate": 1.9619111124975635e-05, + "loss": 0.5515, + "step": 6512 + }, + { + "epoch": 0.17883031301482702, + "grad_norm": 0.3985118567943573, + "learning_rate": 1.9618993052577782e-05, + "loss": 0.5369, + "step": 6513 + }, + { + "epoch": 0.1788577704557935, + "grad_norm": 0.33410462737083435, + "learning_rate": 1.9618874962237436e-05, + "loss": 0.4894, + "step": 6514 + }, + { + "epoch": 0.17888522789676004, + "grad_norm": 0.3095585107803345, + "learning_rate": 1.9618756853954818e-05, + "loss": 0.4385, + "step": 6515 + }, + { + "epoch": 0.17891268533772653, + "grad_norm": 0.3615621030330658, + "learning_rate": 1.9618638727730152e-05, + "loss": 0.5585, + "step": 6516 + }, + { + "epoch": 0.17894014277869302, + "grad_norm": 0.3658291697502136, + "learning_rate": 1.961852058356365e-05, + "loss": 0.4886, + "step": 6517 + }, + { + "epoch": 0.17896760021965952, + "grad_norm": 0.38632744550704956, + "learning_rate": 1.961840242145554e-05, + "loss": 0.5957, + "step": 6518 + }, + { + "epoch": 0.17899505766062604, + "grad_norm": 0.34505167603492737, + "learning_rate": 1.9618284241406038e-05, + "loss": 0.507, + "step": 6519 + }, + { + "epoch": 0.17902251510159253, + "grad_norm": 0.47317299246788025, + "learning_rate": 1.9618166043415367e-05, + "loss": 0.5056, + "step": 6520 + }, + { + "epoch": 0.17904997254255903, + "grad_norm": 0.5056530833244324, + "learning_rate": 1.9618047827483745e-05, + "loss": 0.5116, + "step": 6521 + }, + { + "epoch": 0.17907742998352555, + "grad_norm": 0.41631048917770386, + "learning_rate": 1.96179295936114e-05, + "loss": 0.4989, + "step": 6522 + }, + { + "epoch": 0.17910488742449204, + "grad_norm": 0.3865056037902832, + "learning_rate": 1.9617811341798537e-05, + "loss": 0.4802, + "step": 6523 + }, + { + "epoch": 0.17913234486545854, + "grad_norm": 0.42819109559059143, + "learning_rate": 1.9617693072045395e-05, + "loss": 0.5655, + "step": 6524 + }, + { + "epoch": 0.17915980230642503, + "grad_norm": 0.38726502656936646, + "learning_rate": 1.9617574784352183e-05, + "loss": 0.5293, + "step": 6525 + }, + { + "epoch": 0.17918725974739155, + "grad_norm": 0.3588806390762329, + "learning_rate": 1.961745647871912e-05, + "loss": 0.5258, + "step": 6526 + }, + { + "epoch": 0.17921471718835805, + "grad_norm": 0.3579312562942505, + "learning_rate": 1.9617338155146437e-05, + "loss": 0.5274, + "step": 6527 + }, + { + "epoch": 0.17924217462932454, + "grad_norm": 0.4317275583744049, + "learning_rate": 1.9617219813634344e-05, + "loss": 0.5147, + "step": 6528 + }, + { + "epoch": 0.17926963207029104, + "grad_norm": 0.41484734416007996, + "learning_rate": 1.961710145418307e-05, + "loss": 0.5511, + "step": 6529 + }, + { + "epoch": 0.17929708951125756, + "grad_norm": 0.32726767659187317, + "learning_rate": 1.9616983076792833e-05, + "loss": 0.5408, + "step": 6530 + }, + { + "epoch": 0.17932454695222405, + "grad_norm": 0.4116527736186981, + "learning_rate": 1.961686468146385e-05, + "loss": 0.5183, + "step": 6531 + }, + { + "epoch": 0.17935200439319054, + "grad_norm": 0.33590108156204224, + "learning_rate": 1.961674626819635e-05, + "loss": 0.5396, + "step": 6532 + }, + { + "epoch": 0.17937946183415707, + "grad_norm": 0.32384952902793884, + "learning_rate": 1.9616627836990543e-05, + "loss": 0.496, + "step": 6533 + }, + { + "epoch": 0.17940691927512356, + "grad_norm": 0.3532908856868744, + "learning_rate": 1.9616509387846658e-05, + "loss": 0.4062, + "step": 6534 + }, + { + "epoch": 0.17943437671609005, + "grad_norm": 0.3925130069255829, + "learning_rate": 1.9616390920764912e-05, + "loss": 0.5593, + "step": 6535 + }, + { + "epoch": 0.17946183415705655, + "grad_norm": 0.4175868332386017, + "learning_rate": 1.9616272435745533e-05, + "loss": 0.6246, + "step": 6536 + }, + { + "epoch": 0.17948929159802307, + "grad_norm": 0.34742704033851624, + "learning_rate": 1.9616153932788733e-05, + "loss": 0.4864, + "step": 6537 + }, + { + "epoch": 0.17951674903898956, + "grad_norm": 0.3763229250907898, + "learning_rate": 1.9616035411894737e-05, + "loss": 0.5038, + "step": 6538 + }, + { + "epoch": 0.17954420647995606, + "grad_norm": 0.37009817361831665, + "learning_rate": 1.9615916873063767e-05, + "loss": 0.5427, + "step": 6539 + }, + { + "epoch": 0.17957166392092258, + "grad_norm": 0.3447690010070801, + "learning_rate": 1.9615798316296045e-05, + "loss": 0.5838, + "step": 6540 + }, + { + "epoch": 0.17959912136188907, + "grad_norm": 0.46372726559638977, + "learning_rate": 1.9615679741591784e-05, + "loss": 0.5603, + "step": 6541 + }, + { + "epoch": 0.17962657880285557, + "grad_norm": 0.3795663118362427, + "learning_rate": 1.9615561148951214e-05, + "loss": 0.5842, + "step": 6542 + }, + { + "epoch": 0.17965403624382206, + "grad_norm": 0.3541017472743988, + "learning_rate": 1.9615442538374555e-05, + "loss": 0.5125, + "step": 6543 + }, + { + "epoch": 0.17968149368478858, + "grad_norm": 0.4066811203956604, + "learning_rate": 1.9615323909862027e-05, + "loss": 0.5624, + "step": 6544 + }, + { + "epoch": 0.17970895112575508, + "grad_norm": 0.3183562457561493, + "learning_rate": 1.9615205263413852e-05, + "loss": 0.5093, + "step": 6545 + }, + { + "epoch": 0.17973640856672157, + "grad_norm": 0.41035863757133484, + "learning_rate": 1.9615086599030245e-05, + "loss": 0.5609, + "step": 6546 + }, + { + "epoch": 0.1797638660076881, + "grad_norm": 0.46291038393974304, + "learning_rate": 1.9614967916711437e-05, + "loss": 0.5101, + "step": 6547 + }, + { + "epoch": 0.1797913234486546, + "grad_norm": 0.4031343460083008, + "learning_rate": 1.9614849216457645e-05, + "loss": 0.5251, + "step": 6548 + }, + { + "epoch": 0.17981878088962108, + "grad_norm": 0.37913015484809875, + "learning_rate": 1.961473049826909e-05, + "loss": 0.6316, + "step": 6549 + }, + { + "epoch": 0.17984623833058758, + "grad_norm": 0.36164912581443787, + "learning_rate": 1.9614611762145996e-05, + "loss": 0.5409, + "step": 6550 + }, + { + "epoch": 0.1798736957715541, + "grad_norm": 0.4459293484687805, + "learning_rate": 1.9614493008088576e-05, + "loss": 0.4943, + "step": 6551 + }, + { + "epoch": 0.1799011532125206, + "grad_norm": 0.39832809567451477, + "learning_rate": 1.9614374236097063e-05, + "loss": 0.6195, + "step": 6552 + }, + { + "epoch": 0.1799286106534871, + "grad_norm": 0.37687790393829346, + "learning_rate": 1.9614255446171674e-05, + "loss": 0.5681, + "step": 6553 + }, + { + "epoch": 0.1799560680944536, + "grad_norm": 0.4239349365234375, + "learning_rate": 1.961413663831263e-05, + "loss": 0.6174, + "step": 6554 + }, + { + "epoch": 0.1799835255354201, + "grad_norm": 0.38683775067329407, + "learning_rate": 1.9614017812520148e-05, + "loss": 0.5497, + "step": 6555 + }, + { + "epoch": 0.1800109829763866, + "grad_norm": 0.3462676703929901, + "learning_rate": 1.9613898968794462e-05, + "loss": 0.5197, + "step": 6556 + }, + { + "epoch": 0.1800384404173531, + "grad_norm": 0.36432063579559326, + "learning_rate": 1.961378010713578e-05, + "loss": 0.5348, + "step": 6557 + }, + { + "epoch": 0.1800658978583196, + "grad_norm": 0.35698938369750977, + "learning_rate": 1.961366122754433e-05, + "loss": 0.5294, + "step": 6558 + }, + { + "epoch": 0.1800933552992861, + "grad_norm": 0.3782150149345398, + "learning_rate": 1.9613542330020336e-05, + "loss": 0.4948, + "step": 6559 + }, + { + "epoch": 0.1801208127402526, + "grad_norm": 0.36652326583862305, + "learning_rate": 1.9613423414564017e-05, + "loss": 0.5846, + "step": 6560 + }, + { + "epoch": 0.18014827018121912, + "grad_norm": 0.4745424687862396, + "learning_rate": 1.9613304481175594e-05, + "loss": 0.6733, + "step": 6561 + }, + { + "epoch": 0.18017572762218562, + "grad_norm": 0.49931764602661133, + "learning_rate": 1.961318552985529e-05, + "loss": 0.5284, + "step": 6562 + }, + { + "epoch": 0.1802031850631521, + "grad_norm": 0.37175244092941284, + "learning_rate": 1.9613066560603326e-05, + "loss": 0.535, + "step": 6563 + }, + { + "epoch": 0.1802306425041186, + "grad_norm": 0.36032170057296753, + "learning_rate": 1.9612947573419925e-05, + "loss": 0.5767, + "step": 6564 + }, + { + "epoch": 0.18025809994508513, + "grad_norm": 0.37396153807640076, + "learning_rate": 1.961282856830531e-05, + "loss": 0.4551, + "step": 6565 + }, + { + "epoch": 0.18028555738605162, + "grad_norm": 0.40200379490852356, + "learning_rate": 1.9612709545259703e-05, + "loss": 0.5484, + "step": 6566 + }, + { + "epoch": 0.18031301482701811, + "grad_norm": 0.44111600518226624, + "learning_rate": 1.9612590504283324e-05, + "loss": 0.4964, + "step": 6567 + }, + { + "epoch": 0.18034047226798464, + "grad_norm": 0.42541831731796265, + "learning_rate": 1.9612471445376395e-05, + "loss": 0.5271, + "step": 6568 + }, + { + "epoch": 0.18036792970895113, + "grad_norm": 0.4291318655014038, + "learning_rate": 1.9612352368539137e-05, + "loss": 0.5871, + "step": 6569 + }, + { + "epoch": 0.18039538714991762, + "grad_norm": 0.336080402135849, + "learning_rate": 1.9612233273771774e-05, + "loss": 0.4928, + "step": 6570 + }, + { + "epoch": 0.18042284459088412, + "grad_norm": 0.3522418141365051, + "learning_rate": 1.9612114161074533e-05, + "loss": 0.4903, + "step": 6571 + }, + { + "epoch": 0.18045030203185064, + "grad_norm": 0.37974119186401367, + "learning_rate": 1.9611995030447627e-05, + "loss": 0.4994, + "step": 6572 + }, + { + "epoch": 0.18047775947281713, + "grad_norm": 0.833219587802887, + "learning_rate": 1.961187588189128e-05, + "loss": 0.573, + "step": 6573 + }, + { + "epoch": 0.18050521691378363, + "grad_norm": 0.4648749530315399, + "learning_rate": 1.9611756715405722e-05, + "loss": 0.5346, + "step": 6574 + }, + { + "epoch": 0.18053267435475015, + "grad_norm": 0.31090807914733887, + "learning_rate": 1.9611637530991167e-05, + "loss": 0.541, + "step": 6575 + }, + { + "epoch": 0.18056013179571664, + "grad_norm": 0.6276656985282898, + "learning_rate": 1.961151832864784e-05, + "loss": 0.559, + "step": 6576 + }, + { + "epoch": 0.18058758923668314, + "grad_norm": 0.3842560350894928, + "learning_rate": 1.9611399108375962e-05, + "loss": 0.5556, + "step": 6577 + }, + { + "epoch": 0.18061504667764963, + "grad_norm": 0.35249221324920654, + "learning_rate": 1.9611279870175763e-05, + "loss": 0.5701, + "step": 6578 + }, + { + "epoch": 0.18064250411861615, + "grad_norm": 0.3619592785835266, + "learning_rate": 1.9611160614047455e-05, + "loss": 0.4796, + "step": 6579 + }, + { + "epoch": 0.18066996155958265, + "grad_norm": 0.37015992403030396, + "learning_rate": 1.9611041339991267e-05, + "loss": 0.5812, + "step": 6580 + }, + { + "epoch": 0.18069741900054914, + "grad_norm": 0.44630226492881775, + "learning_rate": 1.9610922048007418e-05, + "loss": 0.5477, + "step": 6581 + }, + { + "epoch": 0.18072487644151566, + "grad_norm": 0.4043484032154083, + "learning_rate": 1.9610802738096132e-05, + "loss": 0.5643, + "step": 6582 + }, + { + "epoch": 0.18075233388248216, + "grad_norm": 0.3628946840763092, + "learning_rate": 1.9610683410257635e-05, + "loss": 0.5816, + "step": 6583 + }, + { + "epoch": 0.18077979132344865, + "grad_norm": 0.4177703261375427, + "learning_rate": 1.961056406449214e-05, + "loss": 0.5619, + "step": 6584 + }, + { + "epoch": 0.18080724876441515, + "grad_norm": 0.47375360131263733, + "learning_rate": 1.9610444700799883e-05, + "loss": 0.562, + "step": 6585 + }, + { + "epoch": 0.18083470620538167, + "grad_norm": 0.40963634848594666, + "learning_rate": 1.9610325319181075e-05, + "loss": 0.5467, + "step": 6586 + }, + { + "epoch": 0.18086216364634816, + "grad_norm": 0.3774314522743225, + "learning_rate": 1.9610205919635944e-05, + "loss": 0.5998, + "step": 6587 + }, + { + "epoch": 0.18088962108731466, + "grad_norm": 0.4255989193916321, + "learning_rate": 1.9610086502164708e-05, + "loss": 0.5866, + "step": 6588 + }, + { + "epoch": 0.18091707852828118, + "grad_norm": 0.38105064630508423, + "learning_rate": 1.9609967066767596e-05, + "loss": 0.5948, + "step": 6589 + }, + { + "epoch": 0.18094453596924767, + "grad_norm": 0.3226192593574524, + "learning_rate": 1.960984761344483e-05, + "loss": 0.5672, + "step": 6590 + }, + { + "epoch": 0.18097199341021417, + "grad_norm": 0.33413857221603394, + "learning_rate": 1.9609728142196628e-05, + "loss": 0.5268, + "step": 6591 + }, + { + "epoch": 0.18099945085118066, + "grad_norm": 0.3308675289154053, + "learning_rate": 1.960960865302322e-05, + "loss": 0.5193, + "step": 6592 + }, + { + "epoch": 0.18102690829214718, + "grad_norm": 0.3591359555721283, + "learning_rate": 1.9609489145924823e-05, + "loss": 0.5873, + "step": 6593 + }, + { + "epoch": 0.18105436573311368, + "grad_norm": 0.35315337777137756, + "learning_rate": 1.960936962090166e-05, + "loss": 0.5802, + "step": 6594 + }, + { + "epoch": 0.18108182317408017, + "grad_norm": 0.380507230758667, + "learning_rate": 1.9609250077953955e-05, + "loss": 0.5422, + "step": 6595 + }, + { + "epoch": 0.18110928061504666, + "grad_norm": 0.3569798171520233, + "learning_rate": 1.9609130517081936e-05, + "loss": 0.4997, + "step": 6596 + }, + { + "epoch": 0.18113673805601319, + "grad_norm": 0.32641392946243286, + "learning_rate": 1.9609010938285818e-05, + "loss": 0.5357, + "step": 6597 + }, + { + "epoch": 0.18116419549697968, + "grad_norm": 0.4071410894393921, + "learning_rate": 1.9608891341565833e-05, + "loss": 0.5302, + "step": 6598 + }, + { + "epoch": 0.18119165293794617, + "grad_norm": 0.4328916072845459, + "learning_rate": 1.9608771726922193e-05, + "loss": 0.5877, + "step": 6599 + }, + { + "epoch": 0.1812191103789127, + "grad_norm": 0.3360719382762909, + "learning_rate": 1.960865209435513e-05, + "loss": 0.6043, + "step": 6600 + }, + { + "epoch": 0.1812465678198792, + "grad_norm": 0.4246176779270172, + "learning_rate": 1.960853244386486e-05, + "loss": 0.5568, + "step": 6601 + }, + { + "epoch": 0.18127402526084568, + "grad_norm": 0.3815915286540985, + "learning_rate": 1.9608412775451616e-05, + "loss": 0.5772, + "step": 6602 + }, + { + "epoch": 0.18130148270181218, + "grad_norm": 0.3796115219593048, + "learning_rate": 1.9608293089115612e-05, + "loss": 0.4624, + "step": 6603 + }, + { + "epoch": 0.1813289401427787, + "grad_norm": 0.408968448638916, + "learning_rate": 1.9608173384857076e-05, + "loss": 0.4709, + "step": 6604 + }, + { + "epoch": 0.1813563975837452, + "grad_norm": 0.3403010070323944, + "learning_rate": 1.960805366267623e-05, + "loss": 0.517, + "step": 6605 + }, + { + "epoch": 0.1813838550247117, + "grad_norm": 0.3846514821052551, + "learning_rate": 1.9607933922573296e-05, + "loss": 0.562, + "step": 6606 + }, + { + "epoch": 0.1814113124656782, + "grad_norm": 0.33959582448005676, + "learning_rate": 1.9607814164548504e-05, + "loss": 0.4731, + "step": 6607 + }, + { + "epoch": 0.1814387699066447, + "grad_norm": 0.32449421286582947, + "learning_rate": 1.9607694388602066e-05, + "loss": 0.5145, + "step": 6608 + }, + { + "epoch": 0.1814662273476112, + "grad_norm": 0.7601893544197083, + "learning_rate": 1.9607574594734213e-05, + "loss": 0.5012, + "step": 6609 + }, + { + "epoch": 0.1814936847885777, + "grad_norm": 0.3440118134021759, + "learning_rate": 1.9607454782945166e-05, + "loss": 0.4708, + "step": 6610 + }, + { + "epoch": 0.1815211422295442, + "grad_norm": 0.36644262075424194, + "learning_rate": 1.9607334953235152e-05, + "loss": 0.387, + "step": 6611 + }, + { + "epoch": 0.1815485996705107, + "grad_norm": 0.368236243724823, + "learning_rate": 1.960721510560439e-05, + "loss": 0.547, + "step": 6612 + }, + { + "epoch": 0.1815760571114772, + "grad_norm": 0.3474389910697937, + "learning_rate": 1.960709524005311e-05, + "loss": 0.5262, + "step": 6613 + }, + { + "epoch": 0.18160351455244372, + "grad_norm": 0.37778881192207336, + "learning_rate": 1.9606975356581527e-05, + "loss": 0.4798, + "step": 6614 + }, + { + "epoch": 0.18163097199341022, + "grad_norm": 0.34977850317955017, + "learning_rate": 1.960685545518987e-05, + "loss": 0.5012, + "step": 6615 + }, + { + "epoch": 0.1816584294343767, + "grad_norm": 0.3307528495788574, + "learning_rate": 1.960673553587836e-05, + "loss": 0.4949, + "step": 6616 + }, + { + "epoch": 0.1816858868753432, + "grad_norm": 0.42286619544029236, + "learning_rate": 1.9606615598647224e-05, + "loss": 0.467, + "step": 6617 + }, + { + "epoch": 0.18171334431630973, + "grad_norm": 0.37802568078041077, + "learning_rate": 1.9606495643496682e-05, + "loss": 0.5148, + "step": 6618 + }, + { + "epoch": 0.18174080175727622, + "grad_norm": 0.33488020300865173, + "learning_rate": 1.960637567042696e-05, + "loss": 0.5194, + "step": 6619 + }, + { + "epoch": 0.18176825919824272, + "grad_norm": 0.3429882526397705, + "learning_rate": 1.9606255679438282e-05, + "loss": 0.5369, + "step": 6620 + }, + { + "epoch": 0.18179571663920924, + "grad_norm": 0.43393421173095703, + "learning_rate": 1.9606135670530872e-05, + "loss": 0.5289, + "step": 6621 + }, + { + "epoch": 0.18182317408017573, + "grad_norm": 0.3814091682434082, + "learning_rate": 1.960601564370495e-05, + "loss": 0.5861, + "step": 6622 + }, + { + "epoch": 0.18185063152114223, + "grad_norm": 0.3789820671081543, + "learning_rate": 1.9605895598960746e-05, + "loss": 0.5173, + "step": 6623 + }, + { + "epoch": 0.18187808896210872, + "grad_norm": 0.3469187915325165, + "learning_rate": 1.960577553629848e-05, + "loss": 0.4075, + "step": 6624 + }, + { + "epoch": 0.18190554640307524, + "grad_norm": 0.3718814253807068, + "learning_rate": 1.9605655455718377e-05, + "loss": 0.4723, + "step": 6625 + }, + { + "epoch": 0.18193300384404174, + "grad_norm": 0.38630571961402893, + "learning_rate": 1.960553535722066e-05, + "loss": 0.5965, + "step": 6626 + }, + { + "epoch": 0.18196046128500823, + "grad_norm": 0.4062061607837677, + "learning_rate": 1.9605415240805556e-05, + "loss": 0.4945, + "step": 6627 + }, + { + "epoch": 0.18198791872597475, + "grad_norm": 0.3063819110393524, + "learning_rate": 1.9605295106473285e-05, + "loss": 0.467, + "step": 6628 + }, + { + "epoch": 0.18201537616694125, + "grad_norm": 0.37494996190071106, + "learning_rate": 1.9605174954224074e-05, + "loss": 0.6977, + "step": 6629 + }, + { + "epoch": 0.18204283360790774, + "grad_norm": 0.3210318088531494, + "learning_rate": 1.9605054784058145e-05, + "loss": 0.4574, + "step": 6630 + }, + { + "epoch": 0.18207029104887423, + "grad_norm": 0.4004106819629669, + "learning_rate": 1.960493459597572e-05, + "loss": 0.5671, + "step": 6631 + }, + { + "epoch": 0.18209774848984076, + "grad_norm": 0.4476136565208435, + "learning_rate": 1.9604814389977032e-05, + "loss": 0.5584, + "step": 6632 + }, + { + "epoch": 0.18212520593080725, + "grad_norm": 0.34398287534713745, + "learning_rate": 1.96046941660623e-05, + "loss": 0.4851, + "step": 6633 + }, + { + "epoch": 0.18215266337177374, + "grad_norm": 0.4284767508506775, + "learning_rate": 1.9604573924231745e-05, + "loss": 0.5355, + "step": 6634 + }, + { + "epoch": 0.18218012081274026, + "grad_norm": 0.34190985560417175, + "learning_rate": 1.9604453664485594e-05, + "loss": 0.5136, + "step": 6635 + }, + { + "epoch": 0.18220757825370676, + "grad_norm": 0.3798222839832306, + "learning_rate": 1.9604333386824072e-05, + "loss": 0.4169, + "step": 6636 + }, + { + "epoch": 0.18223503569467325, + "grad_norm": 0.3595619201660156, + "learning_rate": 1.9604213091247404e-05, + "loss": 0.598, + "step": 6637 + }, + { + "epoch": 0.18226249313563975, + "grad_norm": 0.4001055061817169, + "learning_rate": 1.9604092777755812e-05, + "loss": 0.6254, + "step": 6638 + }, + { + "epoch": 0.18228995057660627, + "grad_norm": 0.3701055645942688, + "learning_rate": 1.9603972446349525e-05, + "loss": 0.5338, + "step": 6639 + }, + { + "epoch": 0.18231740801757276, + "grad_norm": 0.4521777033805847, + "learning_rate": 1.9603852097028758e-05, + "loss": 0.4755, + "step": 6640 + }, + { + "epoch": 0.18234486545853926, + "grad_norm": 0.3986315131187439, + "learning_rate": 1.9603731729793746e-05, + "loss": 0.5989, + "step": 6641 + }, + { + "epoch": 0.18237232289950578, + "grad_norm": 0.39256805181503296, + "learning_rate": 1.960361134464471e-05, + "loss": 0.5795, + "step": 6642 + }, + { + "epoch": 0.18239978034047227, + "grad_norm": 0.586524248123169, + "learning_rate": 1.960349094158187e-05, + "loss": 0.5527, + "step": 6643 + }, + { + "epoch": 0.18242723778143877, + "grad_norm": 0.3523845374584198, + "learning_rate": 1.9603370520605457e-05, + "loss": 0.4464, + "step": 6644 + }, + { + "epoch": 0.18245469522240526, + "grad_norm": 0.35071879625320435, + "learning_rate": 1.960325008171569e-05, + "loss": 0.533, + "step": 6645 + }, + { + "epoch": 0.18248215266337178, + "grad_norm": 0.3633764088153839, + "learning_rate": 1.96031296249128e-05, + "loss": 0.5729, + "step": 6646 + }, + { + "epoch": 0.18250961010433828, + "grad_norm": 0.37920546531677246, + "learning_rate": 1.9603009150197007e-05, + "loss": 0.5671, + "step": 6647 + }, + { + "epoch": 0.18253706754530477, + "grad_norm": 0.3546657860279083, + "learning_rate": 1.960288865756854e-05, + "loss": 0.5268, + "step": 6648 + }, + { + "epoch": 0.1825645249862713, + "grad_norm": 0.3585810363292694, + "learning_rate": 1.9602768147027617e-05, + "loss": 0.5103, + "step": 6649 + }, + { + "epoch": 0.1825919824272378, + "grad_norm": 0.50678950548172, + "learning_rate": 1.9602647618574466e-05, + "loss": 0.6073, + "step": 6650 + }, + { + "epoch": 0.18261943986820428, + "grad_norm": 0.37133702635765076, + "learning_rate": 1.9602527072209313e-05, + "loss": 0.5358, + "step": 6651 + }, + { + "epoch": 0.18264689730917077, + "grad_norm": 0.6753482222557068, + "learning_rate": 1.9602406507932384e-05, + "loss": 0.4233, + "step": 6652 + }, + { + "epoch": 0.1826743547501373, + "grad_norm": 0.3662383556365967, + "learning_rate": 1.96022859257439e-05, + "loss": 0.6022, + "step": 6653 + }, + { + "epoch": 0.1827018121911038, + "grad_norm": 0.37322214245796204, + "learning_rate": 1.960216532564409e-05, + "loss": 0.5604, + "step": 6654 + }, + { + "epoch": 0.18272926963207028, + "grad_norm": 0.3577675223350525, + "learning_rate": 1.9602044707633174e-05, + "loss": 0.5433, + "step": 6655 + }, + { + "epoch": 0.1827567270730368, + "grad_norm": 0.3609428107738495, + "learning_rate": 1.9601924071711383e-05, + "loss": 0.5977, + "step": 6656 + }, + { + "epoch": 0.1827841845140033, + "grad_norm": 0.31481990218162537, + "learning_rate": 1.9601803417878938e-05, + "loss": 0.4566, + "step": 6657 + }, + { + "epoch": 0.1828116419549698, + "grad_norm": 0.4299263656139374, + "learning_rate": 1.9601682746136066e-05, + "loss": 0.6007, + "step": 6658 + }, + { + "epoch": 0.1828390993959363, + "grad_norm": 0.37897035479545593, + "learning_rate": 1.960156205648299e-05, + "loss": 0.4972, + "step": 6659 + }, + { + "epoch": 0.1828665568369028, + "grad_norm": 0.3300919234752655, + "learning_rate": 1.960144134891994e-05, + "loss": 0.514, + "step": 6660 + }, + { + "epoch": 0.1828940142778693, + "grad_norm": 0.40170082449913025, + "learning_rate": 1.960132062344713e-05, + "loss": 0.4872, + "step": 6661 + }, + { + "epoch": 0.1829214717188358, + "grad_norm": 0.3318944275379181, + "learning_rate": 1.96011998800648e-05, + "loss": 0.5076, + "step": 6662 + }, + { + "epoch": 0.1829489291598023, + "grad_norm": 0.38876667618751526, + "learning_rate": 1.9601079118773162e-05, + "loss": 0.4879, + "step": 6663 + }, + { + "epoch": 0.18297638660076881, + "grad_norm": 0.37409788370132446, + "learning_rate": 1.9600958339572452e-05, + "loss": 0.5259, + "step": 6664 + }, + { + "epoch": 0.1830038440417353, + "grad_norm": 0.3315151333808899, + "learning_rate": 1.9600837542462886e-05, + "loss": 0.5197, + "step": 6665 + }, + { + "epoch": 0.1830313014827018, + "grad_norm": 0.6452762484550476, + "learning_rate": 1.9600716727444696e-05, + "loss": 0.598, + "step": 6666 + }, + { + "epoch": 0.18305875892366832, + "grad_norm": 0.3477502167224884, + "learning_rate": 1.960059589451811e-05, + "loss": 0.5288, + "step": 6667 + }, + { + "epoch": 0.18308621636463482, + "grad_norm": 0.3696436285972595, + "learning_rate": 1.9600475043683344e-05, + "loss": 0.5245, + "step": 6668 + }, + { + "epoch": 0.1831136738056013, + "grad_norm": 0.37194985151290894, + "learning_rate": 1.9600354174940625e-05, + "loss": 0.451, + "step": 6669 + }, + { + "epoch": 0.1831411312465678, + "grad_norm": 0.40646710991859436, + "learning_rate": 1.9600233288290184e-05, + "loss": 0.527, + "step": 6670 + }, + { + "epoch": 0.18316858868753433, + "grad_norm": 0.3552835285663605, + "learning_rate": 1.9600112383732243e-05, + "loss": 0.5376, + "step": 6671 + }, + { + "epoch": 0.18319604612850082, + "grad_norm": 0.3644622564315796, + "learning_rate": 1.9599991461267032e-05, + "loss": 0.5348, + "step": 6672 + }, + { + "epoch": 0.18322350356946732, + "grad_norm": 0.35038691759109497, + "learning_rate": 1.959987052089477e-05, + "loss": 0.5235, + "step": 6673 + }, + { + "epoch": 0.18325096101043384, + "grad_norm": 0.36963406205177307, + "learning_rate": 1.9599749562615684e-05, + "loss": 0.6213, + "step": 6674 + }, + { + "epoch": 0.18327841845140033, + "grad_norm": 0.39860793948173523, + "learning_rate": 1.9599628586430004e-05, + "loss": 0.4727, + "step": 6675 + }, + { + "epoch": 0.18330587589236683, + "grad_norm": 0.36047565937042236, + "learning_rate": 1.9599507592337953e-05, + "loss": 0.5209, + "step": 6676 + }, + { + "epoch": 0.18333333333333332, + "grad_norm": 0.3773106336593628, + "learning_rate": 1.9599386580339754e-05, + "loss": 0.5616, + "step": 6677 + }, + { + "epoch": 0.18336079077429984, + "grad_norm": 0.4006684422492981, + "learning_rate": 1.9599265550435634e-05, + "loss": 0.58, + "step": 6678 + }, + { + "epoch": 0.18338824821526634, + "grad_norm": 0.37613150477409363, + "learning_rate": 1.9599144502625824e-05, + "loss": 0.5614, + "step": 6679 + }, + { + "epoch": 0.18341570565623283, + "grad_norm": 0.46269986033439636, + "learning_rate": 1.9599023436910544e-05, + "loss": 0.5156, + "step": 6680 + }, + { + "epoch": 0.18344316309719935, + "grad_norm": 0.5378171801567078, + "learning_rate": 1.9598902353290024e-05, + "loss": 0.4752, + "step": 6681 + }, + { + "epoch": 0.18347062053816585, + "grad_norm": 0.3583143651485443, + "learning_rate": 1.9598781251764485e-05, + "loss": 0.5165, + "step": 6682 + }, + { + "epoch": 0.18349807797913234, + "grad_norm": 0.34920769929885864, + "learning_rate": 1.9598660132334156e-05, + "loss": 0.4413, + "step": 6683 + }, + { + "epoch": 0.18352553542009883, + "grad_norm": 0.3512398302555084, + "learning_rate": 1.9598538994999262e-05, + "loss": 0.5468, + "step": 6684 + }, + { + "epoch": 0.18355299286106536, + "grad_norm": 0.37508997321128845, + "learning_rate": 1.959841783976003e-05, + "loss": 0.5553, + "step": 6685 + }, + { + "epoch": 0.18358045030203185, + "grad_norm": 0.441540390253067, + "learning_rate": 1.9598296666616685e-05, + "loss": 0.4951, + "step": 6686 + }, + { + "epoch": 0.18360790774299834, + "grad_norm": 0.32495248317718506, + "learning_rate": 1.9598175475569453e-05, + "loss": 0.5471, + "step": 6687 + }, + { + "epoch": 0.18363536518396487, + "grad_norm": 0.3588579595088959, + "learning_rate": 1.959805426661856e-05, + "loss": 0.5766, + "step": 6688 + }, + { + "epoch": 0.18366282262493136, + "grad_norm": 0.30517950654029846, + "learning_rate": 1.9597933039764234e-05, + "loss": 0.4042, + "step": 6689 + }, + { + "epoch": 0.18369028006589785, + "grad_norm": 0.35640642046928406, + "learning_rate": 1.9597811795006697e-05, + "loss": 0.5535, + "step": 6690 + }, + { + "epoch": 0.18371773750686435, + "grad_norm": 0.3426697850227356, + "learning_rate": 1.959769053234618e-05, + "loss": 0.5005, + "step": 6691 + }, + { + "epoch": 0.18374519494783087, + "grad_norm": 0.3562275767326355, + "learning_rate": 1.9597569251782907e-05, + "loss": 0.5053, + "step": 6692 + }, + { + "epoch": 0.18377265238879736, + "grad_norm": 0.3417372703552246, + "learning_rate": 1.9597447953317105e-05, + "loss": 0.5253, + "step": 6693 + }, + { + "epoch": 0.18380010982976386, + "grad_norm": 0.35134291648864746, + "learning_rate": 1.9597326636948994e-05, + "loss": 0.5072, + "step": 6694 + }, + { + "epoch": 0.18382756727073038, + "grad_norm": 0.40764763951301575, + "learning_rate": 1.959720530267881e-05, + "loss": 0.4605, + "step": 6695 + }, + { + "epoch": 0.18385502471169687, + "grad_norm": 0.4107544720172882, + "learning_rate": 1.9597083950506776e-05, + "loss": 0.5272, + "step": 6696 + }, + { + "epoch": 0.18388248215266337, + "grad_norm": 0.3437783718109131, + "learning_rate": 1.9596962580433113e-05, + "loss": 0.4842, + "step": 6697 + }, + { + "epoch": 0.18390993959362986, + "grad_norm": 0.3789130449295044, + "learning_rate": 1.9596841192458056e-05, + "loss": 0.5946, + "step": 6698 + }, + { + "epoch": 0.18393739703459638, + "grad_norm": 0.3519393801689148, + "learning_rate": 1.959671978658183e-05, + "loss": 0.5054, + "step": 6699 + }, + { + "epoch": 0.18396485447556288, + "grad_norm": 0.3724072277545929, + "learning_rate": 1.9596598362804655e-05, + "loss": 0.511, + "step": 6700 + }, + { + "epoch": 0.18399231191652937, + "grad_norm": 0.3829008638858795, + "learning_rate": 1.9596476921126757e-05, + "loss": 0.5145, + "step": 6701 + }, + { + "epoch": 0.1840197693574959, + "grad_norm": 0.3328899145126343, + "learning_rate": 1.959635546154837e-05, + "loss": 0.5134, + "step": 6702 + }, + { + "epoch": 0.1840472267984624, + "grad_norm": 0.37000131607055664, + "learning_rate": 1.959623398406972e-05, + "loss": 0.5689, + "step": 6703 + }, + { + "epoch": 0.18407468423942888, + "grad_norm": 0.39848434925079346, + "learning_rate": 1.9596112488691027e-05, + "loss": 0.5255, + "step": 6704 + }, + { + "epoch": 0.18410214168039538, + "grad_norm": 0.39122459292411804, + "learning_rate": 1.9595990975412523e-05, + "loss": 0.6085, + "step": 6705 + }, + { + "epoch": 0.1841295991213619, + "grad_norm": 0.37932607531547546, + "learning_rate": 1.9595869444234434e-05, + "loss": 0.5976, + "step": 6706 + }, + { + "epoch": 0.1841570565623284, + "grad_norm": 0.39840081334114075, + "learning_rate": 1.9595747895156987e-05, + "loss": 0.5048, + "step": 6707 + }, + { + "epoch": 0.18418451400329489, + "grad_norm": 0.4509585201740265, + "learning_rate": 1.9595626328180405e-05, + "loss": 0.5943, + "step": 6708 + }, + { + "epoch": 0.1842119714442614, + "grad_norm": 0.3566094934940338, + "learning_rate": 1.9595504743304916e-05, + "loss": 0.5614, + "step": 6709 + }, + { + "epoch": 0.1842394288852279, + "grad_norm": 0.38042911887168884, + "learning_rate": 1.959538314053075e-05, + "loss": 0.4996, + "step": 6710 + }, + { + "epoch": 0.1842668863261944, + "grad_norm": 0.5622090101242065, + "learning_rate": 1.9595261519858132e-05, + "loss": 0.5194, + "step": 6711 + }, + { + "epoch": 0.1842943437671609, + "grad_norm": 0.33112847805023193, + "learning_rate": 1.959513988128729e-05, + "loss": 0.5218, + "step": 6712 + }, + { + "epoch": 0.1843218012081274, + "grad_norm": 0.3660420775413513, + "learning_rate": 1.959501822481845e-05, + "loss": 0.493, + "step": 6713 + }, + { + "epoch": 0.1843492586490939, + "grad_norm": 0.35983988642692566, + "learning_rate": 1.9594896550451835e-05, + "loss": 0.5816, + "step": 6714 + }, + { + "epoch": 0.1843767160900604, + "grad_norm": 0.33008211851119995, + "learning_rate": 1.9594774858187676e-05, + "loss": 0.5586, + "step": 6715 + }, + { + "epoch": 0.18440417353102692, + "grad_norm": 0.3390534818172455, + "learning_rate": 1.9594653148026203e-05, + "loss": 0.4777, + "step": 6716 + }, + { + "epoch": 0.18443163097199342, + "grad_norm": 0.360431969165802, + "learning_rate": 1.959453141996764e-05, + "loss": 0.5251, + "step": 6717 + }, + { + "epoch": 0.1844590884129599, + "grad_norm": 0.3713293671607971, + "learning_rate": 1.959440967401221e-05, + "loss": 0.5239, + "step": 6718 + }, + { + "epoch": 0.1844865458539264, + "grad_norm": 0.3547488749027252, + "learning_rate": 1.9594287910160145e-05, + "loss": 0.5391, + "step": 6719 + }, + { + "epoch": 0.18451400329489293, + "grad_norm": 0.3685436248779297, + "learning_rate": 1.9594166128411672e-05, + "loss": 0.5793, + "step": 6720 + }, + { + "epoch": 0.18454146073585942, + "grad_norm": 0.3591940402984619, + "learning_rate": 1.9594044328767015e-05, + "loss": 0.5288, + "step": 6721 + }, + { + "epoch": 0.1845689181768259, + "grad_norm": 0.30893898010253906, + "learning_rate": 1.9593922511226406e-05, + "loss": 0.4966, + "step": 6722 + }, + { + "epoch": 0.18459637561779244, + "grad_norm": 0.3758653402328491, + "learning_rate": 1.959380067579007e-05, + "loss": 0.5593, + "step": 6723 + }, + { + "epoch": 0.18462383305875893, + "grad_norm": 0.3378475606441498, + "learning_rate": 1.959367882245823e-05, + "loss": 0.5111, + "step": 6724 + }, + { + "epoch": 0.18465129049972542, + "grad_norm": 0.35130152106285095, + "learning_rate": 1.959355695123112e-05, + "loss": 0.523, + "step": 6725 + }, + { + "epoch": 0.18467874794069192, + "grad_norm": 0.32864147424697876, + "learning_rate": 1.9593435062108962e-05, + "loss": 0.5466, + "step": 6726 + }, + { + "epoch": 0.18470620538165844, + "grad_norm": 0.37267106771469116, + "learning_rate": 1.959331315509199e-05, + "loss": 0.537, + "step": 6727 + }, + { + "epoch": 0.18473366282262493, + "grad_norm": 0.38670453429222107, + "learning_rate": 1.9593191230180424e-05, + "loss": 0.5722, + "step": 6728 + }, + { + "epoch": 0.18476112026359143, + "grad_norm": 0.3192785978317261, + "learning_rate": 1.9593069287374497e-05, + "loss": 0.4945, + "step": 6729 + }, + { + "epoch": 0.18478857770455792, + "grad_norm": 0.3513008952140808, + "learning_rate": 1.9592947326674426e-05, + "loss": 0.5073, + "step": 6730 + }, + { + "epoch": 0.18481603514552444, + "grad_norm": 0.38058778643608093, + "learning_rate": 1.9592825348080456e-05, + "loss": 0.5119, + "step": 6731 + }, + { + "epoch": 0.18484349258649094, + "grad_norm": 0.3522785007953644, + "learning_rate": 1.95927033515928e-05, + "loss": 0.5045, + "step": 6732 + }, + { + "epoch": 0.18487095002745743, + "grad_norm": 0.40654295682907104, + "learning_rate": 1.959258133721169e-05, + "loss": 0.5941, + "step": 6733 + }, + { + "epoch": 0.18489840746842395, + "grad_norm": 0.4667496085166931, + "learning_rate": 1.959245930493736e-05, + "loss": 0.5367, + "step": 6734 + }, + { + "epoch": 0.18492586490939045, + "grad_norm": 0.4088931679725647, + "learning_rate": 1.9592337254770028e-05, + "loss": 0.5446, + "step": 6735 + }, + { + "epoch": 0.18495332235035694, + "grad_norm": 0.3413527309894562, + "learning_rate": 1.9592215186709924e-05, + "loss": 0.543, + "step": 6736 + }, + { + "epoch": 0.18498077979132344, + "grad_norm": 0.411981463432312, + "learning_rate": 1.959209310075728e-05, + "loss": 0.6398, + "step": 6737 + }, + { + "epoch": 0.18500823723228996, + "grad_norm": 0.4007175862789154, + "learning_rate": 1.959197099691232e-05, + "loss": 0.5316, + "step": 6738 + }, + { + "epoch": 0.18503569467325645, + "grad_norm": 1.1644816398620605, + "learning_rate": 1.9591848875175274e-05, + "loss": 0.5249, + "step": 6739 + }, + { + "epoch": 0.18506315211422295, + "grad_norm": 0.41457629203796387, + "learning_rate": 1.9591726735546364e-05, + "loss": 0.5607, + "step": 6740 + }, + { + "epoch": 0.18509060955518947, + "grad_norm": 0.37312108278274536, + "learning_rate": 1.9591604578025825e-05, + "loss": 0.581, + "step": 6741 + }, + { + "epoch": 0.18511806699615596, + "grad_norm": 0.37413543462753296, + "learning_rate": 1.959148240261388e-05, + "loss": 0.5594, + "step": 6742 + }, + { + "epoch": 0.18514552443712246, + "grad_norm": 0.34128063917160034, + "learning_rate": 1.9591360209310766e-05, + "loss": 0.5457, + "step": 6743 + }, + { + "epoch": 0.18517298187808895, + "grad_norm": 0.37640324234962463, + "learning_rate": 1.95912379981167e-05, + "loss": 0.5524, + "step": 6744 + }, + { + "epoch": 0.18520043931905547, + "grad_norm": 0.39246833324432373, + "learning_rate": 1.9591115769031913e-05, + "loss": 0.5822, + "step": 6745 + }, + { + "epoch": 0.18522789676002197, + "grad_norm": 0.4212758541107178, + "learning_rate": 1.959099352205663e-05, + "loss": 0.5571, + "step": 6746 + }, + { + "epoch": 0.18525535420098846, + "grad_norm": 0.33912530541419983, + "learning_rate": 1.9590871257191088e-05, + "loss": 0.4844, + "step": 6747 + }, + { + "epoch": 0.18528281164195498, + "grad_norm": 0.3508831858634949, + "learning_rate": 1.959074897443551e-05, + "loss": 0.4508, + "step": 6748 + }, + { + "epoch": 0.18531026908292147, + "grad_norm": 0.3268592953681946, + "learning_rate": 1.9590626673790125e-05, + "loss": 0.5058, + "step": 6749 + }, + { + "epoch": 0.18533772652388797, + "grad_norm": 0.33366236090660095, + "learning_rate": 1.9590504355255158e-05, + "loss": 0.4969, + "step": 6750 + }, + { + "epoch": 0.18536518396485446, + "grad_norm": 0.3933591842651367, + "learning_rate": 1.9590382018830842e-05, + "loss": 0.5265, + "step": 6751 + }, + { + "epoch": 0.18539264140582098, + "grad_norm": 0.3208042085170746, + "learning_rate": 1.95902596645174e-05, + "loss": 0.3922, + "step": 6752 + }, + { + "epoch": 0.18542009884678748, + "grad_norm": 0.35445287823677063, + "learning_rate": 1.9590137292315067e-05, + "loss": 0.5696, + "step": 6753 + }, + { + "epoch": 0.18544755628775397, + "grad_norm": 0.38244447112083435, + "learning_rate": 1.959001490222406e-05, + "loss": 0.5438, + "step": 6754 + }, + { + "epoch": 0.1854750137287205, + "grad_norm": 0.3867049217224121, + "learning_rate": 1.958989249424462e-05, + "loss": 0.5239, + "step": 6755 + }, + { + "epoch": 0.185502471169687, + "grad_norm": 0.3641013503074646, + "learning_rate": 1.9589770068376966e-05, + "loss": 0.4816, + "step": 6756 + }, + { + "epoch": 0.18552992861065348, + "grad_norm": 0.385949045419693, + "learning_rate": 1.9589647624621335e-05, + "loss": 0.5934, + "step": 6757 + }, + { + "epoch": 0.18555738605161998, + "grad_norm": 0.3570936620235443, + "learning_rate": 1.9589525162977946e-05, + "loss": 0.5603, + "step": 6758 + }, + { + "epoch": 0.1855848434925865, + "grad_norm": 0.42774534225463867, + "learning_rate": 1.9589402683447034e-05, + "loss": 0.5201, + "step": 6759 + }, + { + "epoch": 0.185612300933553, + "grad_norm": 0.3746121823787689, + "learning_rate": 1.9589280186028826e-05, + "loss": 0.4315, + "step": 6760 + }, + { + "epoch": 0.1856397583745195, + "grad_norm": 0.3705534040927887, + "learning_rate": 1.958915767072355e-05, + "loss": 0.5596, + "step": 6761 + }, + { + "epoch": 0.185667215815486, + "grad_norm": 0.43801939487457275, + "learning_rate": 1.9589035137531433e-05, + "loss": 0.6298, + "step": 6762 + }, + { + "epoch": 0.1856946732564525, + "grad_norm": 0.3556654751300812, + "learning_rate": 1.9588912586452705e-05, + "loss": 0.5103, + "step": 6763 + }, + { + "epoch": 0.185722130697419, + "grad_norm": 0.36112627387046814, + "learning_rate": 1.9588790017487595e-05, + "loss": 0.5946, + "step": 6764 + }, + { + "epoch": 0.1857495881383855, + "grad_norm": 0.351998507976532, + "learning_rate": 1.958866743063633e-05, + "loss": 0.506, + "step": 6765 + }, + { + "epoch": 0.185777045579352, + "grad_norm": 0.3649598956108093, + "learning_rate": 1.9588544825899144e-05, + "loss": 0.5496, + "step": 6766 + }, + { + "epoch": 0.1858045030203185, + "grad_norm": 0.349015474319458, + "learning_rate": 1.958842220327626e-05, + "loss": 0.5857, + "step": 6767 + }, + { + "epoch": 0.185831960461285, + "grad_norm": 0.555534303188324, + "learning_rate": 1.9588299562767905e-05, + "loss": 0.5371, + "step": 6768 + }, + { + "epoch": 0.18585941790225152, + "grad_norm": 0.40861591696739197, + "learning_rate": 1.958817690437431e-05, + "loss": 0.5323, + "step": 6769 + }, + { + "epoch": 0.18588687534321802, + "grad_norm": 0.37565916776657104, + "learning_rate": 1.958805422809571e-05, + "loss": 0.5789, + "step": 6770 + }, + { + "epoch": 0.1859143327841845, + "grad_norm": 0.32951074838638306, + "learning_rate": 1.958793153393233e-05, + "loss": 0.527, + "step": 6771 + }, + { + "epoch": 0.185941790225151, + "grad_norm": 0.3565272092819214, + "learning_rate": 1.9587808821884394e-05, + "loss": 0.5697, + "step": 6772 + }, + { + "epoch": 0.18596924766611753, + "grad_norm": 0.7992799878120422, + "learning_rate": 1.9587686091952132e-05, + "loss": 0.522, + "step": 6773 + }, + { + "epoch": 0.18599670510708402, + "grad_norm": 0.3551509380340576, + "learning_rate": 1.958756334413578e-05, + "loss": 0.4987, + "step": 6774 + }, + { + "epoch": 0.18602416254805051, + "grad_norm": 0.3657396137714386, + "learning_rate": 1.958744057843556e-05, + "loss": 0.5902, + "step": 6775 + }, + { + "epoch": 0.18605161998901704, + "grad_norm": 0.3613986670970917, + "learning_rate": 1.9587317794851704e-05, + "loss": 0.5304, + "step": 6776 + }, + { + "epoch": 0.18607907742998353, + "grad_norm": 0.7609866261482239, + "learning_rate": 1.958719499338444e-05, + "loss": 0.5423, + "step": 6777 + }, + { + "epoch": 0.18610653487095002, + "grad_norm": 0.43186846375465393, + "learning_rate": 1.9587072174034e-05, + "loss": 0.5101, + "step": 6778 + }, + { + "epoch": 0.18613399231191652, + "grad_norm": 0.3713214099407196, + "learning_rate": 1.9586949336800606e-05, + "loss": 0.5974, + "step": 6779 + }, + { + "epoch": 0.18616144975288304, + "grad_norm": 0.7383443713188171, + "learning_rate": 1.9586826481684494e-05, + "loss": 0.5713, + "step": 6780 + }, + { + "epoch": 0.18618890719384953, + "grad_norm": 0.527539849281311, + "learning_rate": 1.958670360868589e-05, + "loss": 0.6145, + "step": 6781 + }, + { + "epoch": 0.18621636463481603, + "grad_norm": 0.3888605237007141, + "learning_rate": 1.958658071780503e-05, + "loss": 0.6001, + "step": 6782 + }, + { + "epoch": 0.18624382207578255, + "grad_norm": 0.34986022114753723, + "learning_rate": 1.958645780904213e-05, + "loss": 0.5309, + "step": 6783 + }, + { + "epoch": 0.18627127951674904, + "grad_norm": 0.44435322284698486, + "learning_rate": 1.958633488239743e-05, + "loss": 0.5877, + "step": 6784 + }, + { + "epoch": 0.18629873695771554, + "grad_norm": 0.3781454861164093, + "learning_rate": 1.9586211937871155e-05, + "loss": 0.5375, + "step": 6785 + }, + { + "epoch": 0.18632619439868203, + "grad_norm": 0.3649424612522125, + "learning_rate": 1.9586088975463534e-05, + "loss": 0.5073, + "step": 6786 + }, + { + "epoch": 0.18635365183964855, + "grad_norm": 0.34935101866722107, + "learning_rate": 1.9585965995174798e-05, + "loss": 0.4495, + "step": 6787 + }, + { + "epoch": 0.18638110928061505, + "grad_norm": 0.3906448185443878, + "learning_rate": 1.9585842997005175e-05, + "loss": 0.565, + "step": 6788 + }, + { + "epoch": 0.18640856672158154, + "grad_norm": 0.3720589280128479, + "learning_rate": 1.9585719980954897e-05, + "loss": 0.5435, + "step": 6789 + }, + { + "epoch": 0.18643602416254806, + "grad_norm": 0.36351558566093445, + "learning_rate": 1.958559694702419e-05, + "loss": 0.5594, + "step": 6790 + }, + { + "epoch": 0.18646348160351456, + "grad_norm": 0.31544938683509827, + "learning_rate": 1.958547389521329e-05, + "loss": 0.4473, + "step": 6791 + }, + { + "epoch": 0.18649093904448105, + "grad_norm": 0.39252379536628723, + "learning_rate": 1.9585350825522417e-05, + "loss": 0.5522, + "step": 6792 + }, + { + "epoch": 0.18651839648544755, + "grad_norm": 0.3611988425254822, + "learning_rate": 1.9585227737951808e-05, + "loss": 0.5863, + "step": 6793 + }, + { + "epoch": 0.18654585392641407, + "grad_norm": 0.36890915036201477, + "learning_rate": 1.9585104632501688e-05, + "loss": 0.5302, + "step": 6794 + }, + { + "epoch": 0.18657331136738056, + "grad_norm": 0.38087841868400574, + "learning_rate": 1.9584981509172292e-05, + "loss": 0.5253, + "step": 6795 + }, + { + "epoch": 0.18660076880834706, + "grad_norm": 0.3283732533454895, + "learning_rate": 1.9584858367963843e-05, + "loss": 0.5147, + "step": 6796 + }, + { + "epoch": 0.18662822624931355, + "grad_norm": 0.399641215801239, + "learning_rate": 1.9584735208876575e-05, + "loss": 0.4525, + "step": 6797 + }, + { + "epoch": 0.18665568369028007, + "grad_norm": 0.3760416805744171, + "learning_rate": 1.9584612031910714e-05, + "loss": 0.4979, + "step": 6798 + }, + { + "epoch": 0.18668314113124657, + "grad_norm": 0.4187765121459961, + "learning_rate": 1.95844888370665e-05, + "loss": 0.538, + "step": 6799 + }, + { + "epoch": 0.18671059857221306, + "grad_norm": 0.4055293798446655, + "learning_rate": 1.958436562434415e-05, + "loss": 0.6165, + "step": 6800 + }, + { + "epoch": 0.18673805601317958, + "grad_norm": 0.3894589841365814, + "learning_rate": 1.95842423937439e-05, + "loss": 0.5605, + "step": 6801 + }, + { + "epoch": 0.18676551345414608, + "grad_norm": 0.3786599338054657, + "learning_rate": 1.9584119145265977e-05, + "loss": 0.5537, + "step": 6802 + }, + { + "epoch": 0.18679297089511257, + "grad_norm": 0.42943623661994934, + "learning_rate": 1.9583995878910613e-05, + "loss": 0.5497, + "step": 6803 + }, + { + "epoch": 0.18682042833607906, + "grad_norm": 0.40102970600128174, + "learning_rate": 1.958387259467804e-05, + "loss": 0.5631, + "step": 6804 + }, + { + "epoch": 0.18684788577704559, + "grad_norm": 0.36541756987571716, + "learning_rate": 1.9583749292568484e-05, + "loss": 0.5318, + "step": 6805 + }, + { + "epoch": 0.18687534321801208, + "grad_norm": 0.3666214644908905, + "learning_rate": 1.958362597258218e-05, + "loss": 0.553, + "step": 6806 + }, + { + "epoch": 0.18690280065897857, + "grad_norm": 0.5189576148986816, + "learning_rate": 1.9583502634719354e-05, + "loss": 0.5501, + "step": 6807 + }, + { + "epoch": 0.1869302580999451, + "grad_norm": 0.34714779257774353, + "learning_rate": 1.9583379278980237e-05, + "loss": 0.4784, + "step": 6808 + }, + { + "epoch": 0.1869577155409116, + "grad_norm": 0.38772812485694885, + "learning_rate": 1.9583255905365057e-05, + "loss": 0.5522, + "step": 6809 + }, + { + "epoch": 0.18698517298187808, + "grad_norm": 0.39513394236564636, + "learning_rate": 1.9583132513874045e-05, + "loss": 0.545, + "step": 6810 + }, + { + "epoch": 0.18701263042284458, + "grad_norm": 0.3575737774372101, + "learning_rate": 1.9583009104507437e-05, + "loss": 0.539, + "step": 6811 + }, + { + "epoch": 0.1870400878638111, + "grad_norm": 0.4082486927509308, + "learning_rate": 1.9582885677265455e-05, + "loss": 0.5866, + "step": 6812 + }, + { + "epoch": 0.1870675453047776, + "grad_norm": 0.38610196113586426, + "learning_rate": 1.958276223214833e-05, + "loss": 0.6093, + "step": 6813 + }, + { + "epoch": 0.1870950027457441, + "grad_norm": 0.4153306484222412, + "learning_rate": 1.9582638769156303e-05, + "loss": 0.6159, + "step": 6814 + }, + { + "epoch": 0.1871224601867106, + "grad_norm": 0.3909022808074951, + "learning_rate": 1.958251528828959e-05, + "loss": 0.5829, + "step": 6815 + }, + { + "epoch": 0.1871499176276771, + "grad_norm": 0.3837563097476959, + "learning_rate": 1.958239178954843e-05, + "loss": 0.5611, + "step": 6816 + }, + { + "epoch": 0.1871773750686436, + "grad_norm": 0.38818663358688354, + "learning_rate": 1.958226827293305e-05, + "loss": 0.5532, + "step": 6817 + }, + { + "epoch": 0.1872048325096101, + "grad_norm": 0.3570185601711273, + "learning_rate": 1.958214473844368e-05, + "loss": 0.519, + "step": 6818 + }, + { + "epoch": 0.1872322899505766, + "grad_norm": 0.39876121282577515, + "learning_rate": 1.9582021186080556e-05, + "loss": 0.5541, + "step": 6819 + }, + { + "epoch": 0.1872597473915431, + "grad_norm": 0.3844282627105713, + "learning_rate": 1.9581897615843898e-05, + "loss": 0.5838, + "step": 6820 + }, + { + "epoch": 0.1872872048325096, + "grad_norm": 0.40826576948165894, + "learning_rate": 1.9581774027733947e-05, + "loss": 0.5071, + "step": 6821 + }, + { + "epoch": 0.18731466227347612, + "grad_norm": 0.3945906162261963, + "learning_rate": 1.958165042175093e-05, + "loss": 0.4977, + "step": 6822 + }, + { + "epoch": 0.18734211971444262, + "grad_norm": 0.3626886010169983, + "learning_rate": 1.9581526797895075e-05, + "loss": 0.4982, + "step": 6823 + }, + { + "epoch": 0.1873695771554091, + "grad_norm": 0.441259503364563, + "learning_rate": 1.9581403156166618e-05, + "loss": 0.5975, + "step": 6824 + }, + { + "epoch": 0.1873970345963756, + "grad_norm": 0.3822338581085205, + "learning_rate": 1.9581279496565782e-05, + "loss": 0.5875, + "step": 6825 + }, + { + "epoch": 0.18742449203734213, + "grad_norm": 0.36095020174980164, + "learning_rate": 1.95811558190928e-05, + "loss": 0.6033, + "step": 6826 + }, + { + "epoch": 0.18745194947830862, + "grad_norm": 0.4864550232887268, + "learning_rate": 1.9581032123747907e-05, + "loss": 0.5338, + "step": 6827 + }, + { + "epoch": 0.18747940691927512, + "grad_norm": 0.3608875870704651, + "learning_rate": 1.9580908410531333e-05, + "loss": 0.5929, + "step": 6828 + }, + { + "epoch": 0.18750686436024164, + "grad_norm": 0.5104290246963501, + "learning_rate": 1.9580784679443305e-05, + "loss": 0.4842, + "step": 6829 + }, + { + "epoch": 0.18753432180120813, + "grad_norm": 0.3639203608036041, + "learning_rate": 1.9580660930484057e-05, + "loss": 0.5468, + "step": 6830 + }, + { + "epoch": 0.18756177924217463, + "grad_norm": 0.4148023724555969, + "learning_rate": 1.9580537163653817e-05, + "loss": 0.5649, + "step": 6831 + }, + { + "epoch": 0.18758923668314112, + "grad_norm": 0.3456474542617798, + "learning_rate": 1.958041337895282e-05, + "loss": 0.5843, + "step": 6832 + }, + { + "epoch": 0.18761669412410764, + "grad_norm": 0.3674614429473877, + "learning_rate": 1.9580289576381292e-05, + "loss": 0.5368, + "step": 6833 + }, + { + "epoch": 0.18764415156507414, + "grad_norm": 0.38668695092201233, + "learning_rate": 1.9580165755939467e-05, + "loss": 0.5621, + "step": 6834 + }, + { + "epoch": 0.18767160900604063, + "grad_norm": 0.3668268322944641, + "learning_rate": 1.958004191762757e-05, + "loss": 0.4718, + "step": 6835 + }, + { + "epoch": 0.18769906644700715, + "grad_norm": 0.36033424735069275, + "learning_rate": 1.9579918061445845e-05, + "loss": 0.5545, + "step": 6836 + }, + { + "epoch": 0.18772652388797365, + "grad_norm": 0.48583075404167175, + "learning_rate": 1.9579794187394512e-05, + "loss": 0.6826, + "step": 6837 + }, + { + "epoch": 0.18775398132894014, + "grad_norm": 0.3291285037994385, + "learning_rate": 1.9579670295473806e-05, + "loss": 0.5118, + "step": 6838 + }, + { + "epoch": 0.18778143876990663, + "grad_norm": 0.3703378736972809, + "learning_rate": 1.9579546385683957e-05, + "loss": 0.6616, + "step": 6839 + }, + { + "epoch": 0.18780889621087316, + "grad_norm": 0.38657477498054504, + "learning_rate": 1.95794224580252e-05, + "loss": 0.5587, + "step": 6840 + }, + { + "epoch": 0.18783635365183965, + "grad_norm": 0.36306384205818176, + "learning_rate": 1.9579298512497758e-05, + "loss": 0.5605, + "step": 6841 + }, + { + "epoch": 0.18786381109280614, + "grad_norm": 0.35069623589515686, + "learning_rate": 1.9579174549101867e-05, + "loss": 0.5088, + "step": 6842 + }, + { + "epoch": 0.18789126853377267, + "grad_norm": 0.40265753865242004, + "learning_rate": 1.957905056783776e-05, + "loss": 0.5499, + "step": 6843 + }, + { + "epoch": 0.18791872597473916, + "grad_norm": 0.29761531949043274, + "learning_rate": 1.9578926568705667e-05, + "loss": 0.4424, + "step": 6844 + }, + { + "epoch": 0.18794618341570565, + "grad_norm": 0.4590238928794861, + "learning_rate": 1.9578802551705818e-05, + "loss": 0.5865, + "step": 6845 + }, + { + "epoch": 0.18797364085667215, + "grad_norm": 0.3460122346878052, + "learning_rate": 1.9578678516838443e-05, + "loss": 0.4932, + "step": 6846 + }, + { + "epoch": 0.18800109829763867, + "grad_norm": 0.34033727645874023, + "learning_rate": 1.957855446410378e-05, + "loss": 0.4905, + "step": 6847 + }, + { + "epoch": 0.18802855573860516, + "grad_norm": 0.34941911697387695, + "learning_rate": 1.9578430393502052e-05, + "loss": 0.5207, + "step": 6848 + }, + { + "epoch": 0.18805601317957166, + "grad_norm": 0.35217875242233276, + "learning_rate": 1.9578306305033493e-05, + "loss": 0.4919, + "step": 6849 + }, + { + "epoch": 0.18808347062053818, + "grad_norm": 0.3916667699813843, + "learning_rate": 1.957818219869834e-05, + "loss": 0.5571, + "step": 6850 + }, + { + "epoch": 0.18811092806150467, + "grad_norm": 0.3776398301124573, + "learning_rate": 1.957805807449682e-05, + "loss": 0.5623, + "step": 6851 + }, + { + "epoch": 0.18813838550247117, + "grad_norm": 0.38278159499168396, + "learning_rate": 1.957793393242916e-05, + "loss": 0.474, + "step": 6852 + }, + { + "epoch": 0.18816584294343766, + "grad_norm": 0.3364105224609375, + "learning_rate": 1.95778097724956e-05, + "loss": 0.5273, + "step": 6853 + }, + { + "epoch": 0.18819330038440418, + "grad_norm": 0.37100768089294434, + "learning_rate": 1.9577685594696367e-05, + "loss": 0.5498, + "step": 6854 + }, + { + "epoch": 0.18822075782537068, + "grad_norm": 0.34563401341438293, + "learning_rate": 1.9577561399031692e-05, + "loss": 0.5326, + "step": 6855 + }, + { + "epoch": 0.18824821526633717, + "grad_norm": 0.43228229880332947, + "learning_rate": 1.9577437185501812e-05, + "loss": 0.5766, + "step": 6856 + }, + { + "epoch": 0.1882756727073037, + "grad_norm": 0.3489593267440796, + "learning_rate": 1.957731295410695e-05, + "loss": 0.5441, + "step": 6857 + }, + { + "epoch": 0.1883031301482702, + "grad_norm": 0.35675302147865295, + "learning_rate": 1.9577188704847344e-05, + "loss": 0.5205, + "step": 6858 + }, + { + "epoch": 0.18833058758923668, + "grad_norm": 0.3516134023666382, + "learning_rate": 1.9577064437723226e-05, + "loss": 0.4966, + "step": 6859 + }, + { + "epoch": 0.18835804503020318, + "grad_norm": 0.39622431993484497, + "learning_rate": 1.9576940152734828e-05, + "loss": 0.6286, + "step": 6860 + }, + { + "epoch": 0.1883855024711697, + "grad_norm": 0.4257540702819824, + "learning_rate": 1.957681584988238e-05, + "loss": 0.5941, + "step": 6861 + }, + { + "epoch": 0.1884129599121362, + "grad_norm": 0.36149847507476807, + "learning_rate": 1.957669152916611e-05, + "loss": 0.5653, + "step": 6862 + }, + { + "epoch": 0.18844041735310268, + "grad_norm": 0.36363133788108826, + "learning_rate": 1.9576567190586257e-05, + "loss": 0.4977, + "step": 6863 + }, + { + "epoch": 0.18846787479406918, + "grad_norm": 0.36831215023994446, + "learning_rate": 1.9576442834143047e-05, + "loss": 0.5491, + "step": 6864 + }, + { + "epoch": 0.1884953322350357, + "grad_norm": 0.40143856406211853, + "learning_rate": 1.9576318459836715e-05, + "loss": 0.62, + "step": 6865 + }, + { + "epoch": 0.1885227896760022, + "grad_norm": 0.39163464307785034, + "learning_rate": 1.9576194067667495e-05, + "loss": 0.6031, + "step": 6866 + }, + { + "epoch": 0.1885502471169687, + "grad_norm": 0.3671136498451233, + "learning_rate": 1.9576069657635614e-05, + "loss": 0.5444, + "step": 6867 + }, + { + "epoch": 0.1885777045579352, + "grad_norm": 0.4096602499485016, + "learning_rate": 1.9575945229741305e-05, + "loss": 0.5598, + "step": 6868 + }, + { + "epoch": 0.1886051619989017, + "grad_norm": 0.4224018454551697, + "learning_rate": 1.9575820783984807e-05, + "loss": 0.5479, + "step": 6869 + }, + { + "epoch": 0.1886326194398682, + "grad_norm": 0.506557822227478, + "learning_rate": 1.9575696320366345e-05, + "loss": 0.5493, + "step": 6870 + }, + { + "epoch": 0.1886600768808347, + "grad_norm": 0.36617833375930786, + "learning_rate": 1.957557183888615e-05, + "loss": 0.581, + "step": 6871 + }, + { + "epoch": 0.18868753432180121, + "grad_norm": 0.3464500606060028, + "learning_rate": 1.957544733954446e-05, + "loss": 0.4837, + "step": 6872 + }, + { + "epoch": 0.1887149917627677, + "grad_norm": 0.3941195607185364, + "learning_rate": 1.957532282234151e-05, + "loss": 0.5904, + "step": 6873 + }, + { + "epoch": 0.1887424492037342, + "grad_norm": 0.3146474063396454, + "learning_rate": 1.957519828727752e-05, + "loss": 0.4368, + "step": 6874 + }, + { + "epoch": 0.18876990664470072, + "grad_norm": 0.3891701400279999, + "learning_rate": 1.957507373435273e-05, + "loss": 0.602, + "step": 6875 + }, + { + "epoch": 0.18879736408566722, + "grad_norm": 0.3965761661529541, + "learning_rate": 1.9574949163567373e-05, + "loss": 0.4592, + "step": 6876 + }, + { + "epoch": 0.1888248215266337, + "grad_norm": 0.353738397359848, + "learning_rate": 1.9574824574921682e-05, + "loss": 0.5322, + "step": 6877 + }, + { + "epoch": 0.1888522789676002, + "grad_norm": 0.4667591452598572, + "learning_rate": 1.9574699968415884e-05, + "loss": 0.5746, + "step": 6878 + }, + { + "epoch": 0.18887973640856673, + "grad_norm": 0.3589186668395996, + "learning_rate": 1.9574575344050213e-05, + "loss": 0.5249, + "step": 6879 + }, + { + "epoch": 0.18890719384953322, + "grad_norm": 0.42578983306884766, + "learning_rate": 1.9574450701824907e-05, + "loss": 0.6439, + "step": 6880 + }, + { + "epoch": 0.18893465129049972, + "grad_norm": 0.34998226165771484, + "learning_rate": 1.957432604174019e-05, + "loss": 0.5326, + "step": 6881 + }, + { + "epoch": 0.18896210873146624, + "grad_norm": 0.35209110379219055, + "learning_rate": 1.9574201363796303e-05, + "loss": 0.4835, + "step": 6882 + }, + { + "epoch": 0.18898956617243273, + "grad_norm": 0.37573570013046265, + "learning_rate": 1.9574076667993473e-05, + "loss": 0.5019, + "step": 6883 + }, + { + "epoch": 0.18901702361339923, + "grad_norm": 0.48956596851348877, + "learning_rate": 1.9573951954331935e-05, + "loss": 0.5878, + "step": 6884 + }, + { + "epoch": 0.18904448105436572, + "grad_norm": 0.3590899109840393, + "learning_rate": 1.9573827222811925e-05, + "loss": 0.5709, + "step": 6885 + }, + { + "epoch": 0.18907193849533224, + "grad_norm": 0.32039064168930054, + "learning_rate": 1.9573702473433664e-05, + "loss": 0.4467, + "step": 6886 + }, + { + "epoch": 0.18909939593629874, + "grad_norm": 0.3242812752723694, + "learning_rate": 1.9573577706197398e-05, + "loss": 0.5435, + "step": 6887 + }, + { + "epoch": 0.18912685337726523, + "grad_norm": 0.34487634897232056, + "learning_rate": 1.957345292110335e-05, + "loss": 0.5011, + "step": 6888 + }, + { + "epoch": 0.18915431081823175, + "grad_norm": 0.419750839471817, + "learning_rate": 1.9573328118151756e-05, + "loss": 0.6627, + "step": 6889 + }, + { + "epoch": 0.18918176825919825, + "grad_norm": 0.3542127013206482, + "learning_rate": 1.9573203297342855e-05, + "loss": 0.5175, + "step": 6890 + }, + { + "epoch": 0.18920922570016474, + "grad_norm": 0.7070826888084412, + "learning_rate": 1.9573078458676873e-05, + "loss": 0.4743, + "step": 6891 + }, + { + "epoch": 0.18923668314113123, + "grad_norm": 0.35835543274879456, + "learning_rate": 1.957295360215404e-05, + "loss": 0.55, + "step": 6892 + }, + { + "epoch": 0.18926414058209776, + "grad_norm": 0.3788932263851166, + "learning_rate": 1.95728287277746e-05, + "loss": 0.5016, + "step": 6893 + }, + { + "epoch": 0.18929159802306425, + "grad_norm": 0.373090535402298, + "learning_rate": 1.9572703835538776e-05, + "loss": 0.4741, + "step": 6894 + }, + { + "epoch": 0.18931905546403074, + "grad_norm": 0.3625165522098541, + "learning_rate": 1.9572578925446805e-05, + "loss": 0.4836, + "step": 6895 + }, + { + "epoch": 0.18934651290499727, + "grad_norm": 0.3365139365196228, + "learning_rate": 1.9572453997498915e-05, + "loss": 0.5459, + "step": 6896 + }, + { + "epoch": 0.18937397034596376, + "grad_norm": 0.3525559604167938, + "learning_rate": 1.9572329051695344e-05, + "loss": 0.5182, + "step": 6897 + }, + { + "epoch": 0.18940142778693025, + "grad_norm": 0.3484532833099365, + "learning_rate": 1.957220408803633e-05, + "loss": 0.499, + "step": 6898 + }, + { + "epoch": 0.18942888522789675, + "grad_norm": 0.3612794280052185, + "learning_rate": 1.9572079106522092e-05, + "loss": 0.4309, + "step": 6899 + }, + { + "epoch": 0.18945634266886327, + "grad_norm": 0.4073112905025482, + "learning_rate": 1.957195410715288e-05, + "loss": 0.5053, + "step": 6900 + }, + { + "epoch": 0.18948380010982976, + "grad_norm": 0.4449286460876465, + "learning_rate": 1.9571829089928913e-05, + "loss": 0.4792, + "step": 6901 + }, + { + "epoch": 0.18951125755079626, + "grad_norm": 0.4298200011253357, + "learning_rate": 1.957170405485043e-05, + "loss": 0.5624, + "step": 6902 + }, + { + "epoch": 0.18953871499176278, + "grad_norm": 0.4122362434864044, + "learning_rate": 1.9571579001917666e-05, + "loss": 0.5347, + "step": 6903 + }, + { + "epoch": 0.18956617243272927, + "grad_norm": 0.4166927933692932, + "learning_rate": 1.957145393113085e-05, + "loss": 0.5836, + "step": 6904 + }, + { + "epoch": 0.18959362987369577, + "grad_norm": 0.6806356906890869, + "learning_rate": 1.9571328842490218e-05, + "loss": 0.4892, + "step": 6905 + }, + { + "epoch": 0.18962108731466226, + "grad_norm": 0.47306039929389954, + "learning_rate": 1.9571203735996006e-05, + "loss": 0.5874, + "step": 6906 + }, + { + "epoch": 0.18964854475562878, + "grad_norm": 0.40287163853645325, + "learning_rate": 1.957107861164844e-05, + "loss": 0.675, + "step": 6907 + }, + { + "epoch": 0.18967600219659528, + "grad_norm": 0.3965892791748047, + "learning_rate": 1.957095346944776e-05, + "loss": 0.5873, + "step": 6908 + }, + { + "epoch": 0.18970345963756177, + "grad_norm": 0.36189836263656616, + "learning_rate": 1.95708283093942e-05, + "loss": 0.5078, + "step": 6909 + }, + { + "epoch": 0.1897309170785283, + "grad_norm": 0.3958370089530945, + "learning_rate": 1.9570703131487984e-05, + "loss": 0.5232, + "step": 6910 + }, + { + "epoch": 0.1897583745194948, + "grad_norm": 0.34994015097618103, + "learning_rate": 1.9570577935729354e-05, + "loss": 0.5146, + "step": 6911 + }, + { + "epoch": 0.18978583196046128, + "grad_norm": 0.3360840976238251, + "learning_rate": 1.9570452722118546e-05, + "loss": 0.5117, + "step": 6912 + }, + { + "epoch": 0.18981328940142778, + "grad_norm": 0.3502565622329712, + "learning_rate": 1.9570327490655787e-05, + "loss": 0.4983, + "step": 6913 + }, + { + "epoch": 0.1898407468423943, + "grad_norm": 0.34775620698928833, + "learning_rate": 1.957020224134131e-05, + "loss": 0.5462, + "step": 6914 + }, + { + "epoch": 0.1898682042833608, + "grad_norm": 0.32616209983825684, + "learning_rate": 1.9570076974175352e-05, + "loss": 0.524, + "step": 6915 + }, + { + "epoch": 0.18989566172432729, + "grad_norm": 0.622235119342804, + "learning_rate": 1.956995168915815e-05, + "loss": 0.5386, + "step": 6916 + }, + { + "epoch": 0.1899231191652938, + "grad_norm": 0.36962130665779114, + "learning_rate": 1.956982638628993e-05, + "loss": 0.6172, + "step": 6917 + }, + { + "epoch": 0.1899505766062603, + "grad_norm": 0.356423556804657, + "learning_rate": 1.956970106557093e-05, + "loss": 0.5311, + "step": 6918 + }, + { + "epoch": 0.1899780340472268, + "grad_norm": 0.43130388855934143, + "learning_rate": 1.9569575727001384e-05, + "loss": 0.5775, + "step": 6919 + }, + { + "epoch": 0.1900054914881933, + "grad_norm": 0.38675811886787415, + "learning_rate": 1.9569450370581525e-05, + "loss": 0.546, + "step": 6920 + }, + { + "epoch": 0.1900329489291598, + "grad_norm": 0.34962788224220276, + "learning_rate": 1.9569324996311586e-05, + "loss": 0.5709, + "step": 6921 + }, + { + "epoch": 0.1900604063701263, + "grad_norm": 0.3248359262943268, + "learning_rate": 1.95691996041918e-05, + "loss": 0.5863, + "step": 6922 + }, + { + "epoch": 0.1900878638110928, + "grad_norm": 0.3379427492618561, + "learning_rate": 1.9569074194222406e-05, + "loss": 0.5491, + "step": 6923 + }, + { + "epoch": 0.19011532125205932, + "grad_norm": 0.40340539813041687, + "learning_rate": 1.9568948766403633e-05, + "loss": 0.5619, + "step": 6924 + }, + { + "epoch": 0.19014277869302582, + "grad_norm": 0.4092773497104645, + "learning_rate": 1.9568823320735718e-05, + "loss": 0.5902, + "step": 6925 + }, + { + "epoch": 0.1901702361339923, + "grad_norm": 0.33309510350227356, + "learning_rate": 1.956869785721889e-05, + "loss": 0.5837, + "step": 6926 + }, + { + "epoch": 0.1901976935749588, + "grad_norm": 0.3484421670436859, + "learning_rate": 1.956857237585339e-05, + "loss": 0.5071, + "step": 6927 + }, + { + "epoch": 0.19022515101592533, + "grad_norm": 0.364927738904953, + "learning_rate": 1.956844687663945e-05, + "loss": 0.4728, + "step": 6928 + }, + { + "epoch": 0.19025260845689182, + "grad_norm": 0.3537469506263733, + "learning_rate": 1.9568321359577298e-05, + "loss": 0.5719, + "step": 6929 + }, + { + "epoch": 0.1902800658978583, + "grad_norm": 0.3491513133049011, + "learning_rate": 1.9568195824667174e-05, + "loss": 0.5196, + "step": 6930 + }, + { + "epoch": 0.1903075233388248, + "grad_norm": 0.4068663716316223, + "learning_rate": 1.9568070271909314e-05, + "loss": 0.5905, + "step": 6931 + }, + { + "epoch": 0.19033498077979133, + "grad_norm": 0.3350856304168701, + "learning_rate": 1.9567944701303947e-05, + "loss": 0.5652, + "step": 6932 + }, + { + "epoch": 0.19036243822075782, + "grad_norm": 0.3768492639064789, + "learning_rate": 1.956781911285131e-05, + "loss": 0.579, + "step": 6933 + }, + { + "epoch": 0.19038989566172432, + "grad_norm": 0.34556999802589417, + "learning_rate": 1.9567693506551632e-05, + "loss": 0.4936, + "step": 6934 + }, + { + "epoch": 0.19041735310269084, + "grad_norm": 0.37416672706604004, + "learning_rate": 1.9567567882405157e-05, + "loss": 0.567, + "step": 6935 + }, + { + "epoch": 0.19044481054365733, + "grad_norm": 0.40244197845458984, + "learning_rate": 1.9567442240412115e-05, + "loss": 0.563, + "step": 6936 + }, + { + "epoch": 0.19047226798462383, + "grad_norm": 0.3270292580127716, + "learning_rate": 1.956731658057274e-05, + "loss": 0.4459, + "step": 6937 + }, + { + "epoch": 0.19049972542559032, + "grad_norm": 0.3339061737060547, + "learning_rate": 1.956719090288726e-05, + "loss": 0.5165, + "step": 6938 + }, + { + "epoch": 0.19052718286655684, + "grad_norm": 0.3981926441192627, + "learning_rate": 1.956706520735592e-05, + "loss": 0.4263, + "step": 6939 + }, + { + "epoch": 0.19055464030752334, + "grad_norm": 0.40066128969192505, + "learning_rate": 1.9566939493978953e-05, + "loss": 0.4971, + "step": 6940 + }, + { + "epoch": 0.19058209774848983, + "grad_norm": 0.3356865644454956, + "learning_rate": 1.9566813762756584e-05, + "loss": 0.5175, + "step": 6941 + }, + { + "epoch": 0.19060955518945635, + "grad_norm": 0.39213845133781433, + "learning_rate": 1.9566688013689056e-05, + "loss": 0.4426, + "step": 6942 + }, + { + "epoch": 0.19063701263042285, + "grad_norm": 0.3634313941001892, + "learning_rate": 1.9566562246776604e-05, + "loss": 0.616, + "step": 6943 + }, + { + "epoch": 0.19066447007138934, + "grad_norm": 0.39019617438316345, + "learning_rate": 1.956643646201946e-05, + "loss": 0.5721, + "step": 6944 + }, + { + "epoch": 0.19069192751235584, + "grad_norm": 0.4079192280769348, + "learning_rate": 1.9566310659417857e-05, + "loss": 0.4959, + "step": 6945 + }, + { + "epoch": 0.19071938495332236, + "grad_norm": 0.3635096848011017, + "learning_rate": 1.956618483897203e-05, + "loss": 0.4588, + "step": 6946 + }, + { + "epoch": 0.19074684239428885, + "grad_norm": 0.4046266973018646, + "learning_rate": 1.9566059000682217e-05, + "loss": 0.542, + "step": 6947 + }, + { + "epoch": 0.19077429983525535, + "grad_norm": 0.3891264498233795, + "learning_rate": 1.956593314454865e-05, + "loss": 0.52, + "step": 6948 + }, + { + "epoch": 0.19080175727622187, + "grad_norm": 0.39161258935928345, + "learning_rate": 1.9565807270571567e-05, + "loss": 0.5359, + "step": 6949 + }, + { + "epoch": 0.19082921471718836, + "grad_norm": 0.43789616227149963, + "learning_rate": 1.95656813787512e-05, + "loss": 0.5174, + "step": 6950 + }, + { + "epoch": 0.19085667215815486, + "grad_norm": 0.37501177191734314, + "learning_rate": 1.956555546908778e-05, + "loss": 0.6246, + "step": 6951 + }, + { + "epoch": 0.19088412959912135, + "grad_norm": 0.3913848400115967, + "learning_rate": 1.956542954158155e-05, + "loss": 0.5828, + "step": 6952 + }, + { + "epoch": 0.19091158704008787, + "grad_norm": 0.4014701843261719, + "learning_rate": 1.9565303596232738e-05, + "loss": 0.5447, + "step": 6953 + }, + { + "epoch": 0.19093904448105437, + "grad_norm": 0.3197363018989563, + "learning_rate": 1.9565177633041587e-05, + "loss": 0.4677, + "step": 6954 + }, + { + "epoch": 0.19096650192202086, + "grad_norm": 0.34348398447036743, + "learning_rate": 1.9565051652008323e-05, + "loss": 0.5528, + "step": 6955 + }, + { + "epoch": 0.19099395936298738, + "grad_norm": 0.34984290599823, + "learning_rate": 1.9564925653133185e-05, + "loss": 0.4813, + "step": 6956 + }, + { + "epoch": 0.19102141680395388, + "grad_norm": 0.3776915371417999, + "learning_rate": 1.9564799636416406e-05, + "loss": 0.509, + "step": 6957 + }, + { + "epoch": 0.19104887424492037, + "grad_norm": 0.3569590449333191, + "learning_rate": 1.9564673601858226e-05, + "loss": 0.5147, + "step": 6958 + }, + { + "epoch": 0.19107633168588686, + "grad_norm": 0.3705546259880066, + "learning_rate": 1.9564547549458876e-05, + "loss": 0.5665, + "step": 6959 + }, + { + "epoch": 0.19110378912685339, + "grad_norm": 0.3040412962436676, + "learning_rate": 1.9564421479218593e-05, + "loss": 0.5245, + "step": 6960 + }, + { + "epoch": 0.19113124656781988, + "grad_norm": 0.36125874519348145, + "learning_rate": 1.956429539113761e-05, + "loss": 0.5273, + "step": 6961 + }, + { + "epoch": 0.19115870400878637, + "grad_norm": 0.39060819149017334, + "learning_rate": 1.9564169285216166e-05, + "loss": 0.5504, + "step": 6962 + }, + { + "epoch": 0.1911861614497529, + "grad_norm": 0.38899141550064087, + "learning_rate": 1.956404316145449e-05, + "loss": 0.5614, + "step": 6963 + }, + { + "epoch": 0.1912136188907194, + "grad_norm": 0.3639388680458069, + "learning_rate": 1.9563917019852827e-05, + "loss": 0.5602, + "step": 6964 + }, + { + "epoch": 0.19124107633168588, + "grad_norm": 0.3669624328613281, + "learning_rate": 1.95637908604114e-05, + "loss": 0.5448, + "step": 6965 + }, + { + "epoch": 0.19126853377265238, + "grad_norm": 0.39606618881225586, + "learning_rate": 1.956366468313045e-05, + "loss": 0.5014, + "step": 6966 + }, + { + "epoch": 0.1912959912136189, + "grad_norm": 0.3647729456424713, + "learning_rate": 1.956353848801022e-05, + "loss": 0.536, + "step": 6967 + }, + { + "epoch": 0.1913234486545854, + "grad_norm": 0.4668254256248474, + "learning_rate": 1.9563412275050933e-05, + "loss": 0.5897, + "step": 6968 + }, + { + "epoch": 0.1913509060955519, + "grad_norm": 0.45616406202316284, + "learning_rate": 1.956328604425283e-05, + "loss": 0.5883, + "step": 6969 + }, + { + "epoch": 0.1913783635365184, + "grad_norm": 0.3435705006122589, + "learning_rate": 1.956315979561615e-05, + "loss": 0.4694, + "step": 6970 + }, + { + "epoch": 0.1914058209774849, + "grad_norm": 0.40309232473373413, + "learning_rate": 1.956303352914112e-05, + "loss": 0.524, + "step": 6971 + }, + { + "epoch": 0.1914332784184514, + "grad_norm": 0.3847871422767639, + "learning_rate": 1.9562907244827983e-05, + "loss": 0.5878, + "step": 6972 + }, + { + "epoch": 0.1914607358594179, + "grad_norm": 0.37726402282714844, + "learning_rate": 1.956278094267697e-05, + "loss": 0.4876, + "step": 6973 + }, + { + "epoch": 0.1914881933003844, + "grad_norm": 0.3669898509979248, + "learning_rate": 1.9562654622688325e-05, + "loss": 0.5974, + "step": 6974 + }, + { + "epoch": 0.1915156507413509, + "grad_norm": 0.3792892396450043, + "learning_rate": 1.956252828486227e-05, + "loss": 0.4739, + "step": 6975 + }, + { + "epoch": 0.1915431081823174, + "grad_norm": 0.4032416045665741, + "learning_rate": 1.956240192919905e-05, + "loss": 0.5378, + "step": 6976 + }, + { + "epoch": 0.19157056562328392, + "grad_norm": 0.3781232237815857, + "learning_rate": 1.9562275555698898e-05, + "loss": 0.5348, + "step": 6977 + }, + { + "epoch": 0.19159802306425042, + "grad_norm": 0.3404254913330078, + "learning_rate": 1.956214916436205e-05, + "loss": 0.4797, + "step": 6978 + }, + { + "epoch": 0.1916254805052169, + "grad_norm": 0.37766364216804504, + "learning_rate": 1.956202275518874e-05, + "loss": 0.6121, + "step": 6979 + }, + { + "epoch": 0.1916529379461834, + "grad_norm": 0.32225149869918823, + "learning_rate": 1.956189632817921e-05, + "loss": 0.5117, + "step": 6980 + }, + { + "epoch": 0.19168039538714993, + "grad_norm": 0.45331841707229614, + "learning_rate": 1.956176988333369e-05, + "loss": 0.5688, + "step": 6981 + }, + { + "epoch": 0.19170785282811642, + "grad_norm": 0.4000934064388275, + "learning_rate": 1.9561643420652415e-05, + "loss": 0.4954, + "step": 6982 + }, + { + "epoch": 0.19173531026908291, + "grad_norm": 0.35431960225105286, + "learning_rate": 1.9561516940135627e-05, + "loss": 0.4943, + "step": 6983 + }, + { + "epoch": 0.19176276771004944, + "grad_norm": 0.39247363805770874, + "learning_rate": 1.9561390441783557e-05, + "loss": 0.5184, + "step": 6984 + }, + { + "epoch": 0.19179022515101593, + "grad_norm": 0.6845055818557739, + "learning_rate": 1.956126392559644e-05, + "loss": 0.513, + "step": 6985 + }, + { + "epoch": 0.19181768259198242, + "grad_norm": 0.4050014615058899, + "learning_rate": 1.9561137391574515e-05, + "loss": 0.5543, + "step": 6986 + }, + { + "epoch": 0.19184514003294892, + "grad_norm": 0.4189422130584717, + "learning_rate": 1.956101083971802e-05, + "loss": 0.6427, + "step": 6987 + }, + { + "epoch": 0.19187259747391544, + "grad_norm": 0.3994913697242737, + "learning_rate": 1.9560884270027186e-05, + "loss": 0.5599, + "step": 6988 + }, + { + "epoch": 0.19190005491488193, + "grad_norm": 0.3419458866119385, + "learning_rate": 1.9560757682502252e-05, + "loss": 0.5459, + "step": 6989 + }, + { + "epoch": 0.19192751235584843, + "grad_norm": 0.37614187598228455, + "learning_rate": 1.9560631077143456e-05, + "loss": 0.6211, + "step": 6990 + }, + { + "epoch": 0.19195496979681495, + "grad_norm": 0.4427298605442047, + "learning_rate": 1.9560504453951026e-05, + "loss": 0.5894, + "step": 6991 + }, + { + "epoch": 0.19198242723778144, + "grad_norm": 0.3692069351673126, + "learning_rate": 1.9560377812925208e-05, + "loss": 0.4909, + "step": 6992 + }, + { + "epoch": 0.19200988467874794, + "grad_norm": 0.35063228011131287, + "learning_rate": 1.9560251154066232e-05, + "loss": 0.5294, + "step": 6993 + }, + { + "epoch": 0.19203734211971443, + "grad_norm": 0.46396633982658386, + "learning_rate": 1.9560124477374337e-05, + "loss": 0.5342, + "step": 6994 + }, + { + "epoch": 0.19206479956068095, + "grad_norm": 0.40436574816703796, + "learning_rate": 1.955999778284976e-05, + "loss": 0.5713, + "step": 6995 + }, + { + "epoch": 0.19209225700164745, + "grad_norm": 0.34070324897766113, + "learning_rate": 1.9559871070492734e-05, + "loss": 0.4958, + "step": 6996 + }, + { + "epoch": 0.19211971444261394, + "grad_norm": 0.38829436898231506, + "learning_rate": 1.9559744340303498e-05, + "loss": 0.5949, + "step": 6997 + }, + { + "epoch": 0.19214717188358044, + "grad_norm": 0.36199212074279785, + "learning_rate": 1.9559617592282287e-05, + "loss": 0.4665, + "step": 6998 + }, + { + "epoch": 0.19217462932454696, + "grad_norm": 0.4077532887458801, + "learning_rate": 1.955949082642934e-05, + "loss": 0.6246, + "step": 6999 + }, + { + "epoch": 0.19220208676551345, + "grad_norm": 0.8044859766960144, + "learning_rate": 1.9559364042744887e-05, + "loss": 0.6253, + "step": 7000 + }, + { + "epoch": 0.19222954420647995, + "grad_norm": 0.37125468254089355, + "learning_rate": 1.9559237241229175e-05, + "loss": 0.5385, + "step": 7001 + }, + { + "epoch": 0.19225700164744647, + "grad_norm": 0.3773503601551056, + "learning_rate": 1.955911042188243e-05, + "loss": 0.5636, + "step": 7002 + }, + { + "epoch": 0.19228445908841296, + "grad_norm": 0.3741331696510315, + "learning_rate": 1.9558983584704896e-05, + "loss": 0.505, + "step": 7003 + }, + { + "epoch": 0.19231191652937946, + "grad_norm": 0.3699350953102112, + "learning_rate": 1.9558856729696803e-05, + "loss": 0.5747, + "step": 7004 + }, + { + "epoch": 0.19233937397034595, + "grad_norm": 0.3491131365299225, + "learning_rate": 1.9558729856858396e-05, + "loss": 0.5475, + "step": 7005 + }, + { + "epoch": 0.19236683141131247, + "grad_norm": 0.3777410387992859, + "learning_rate": 1.9558602966189905e-05, + "loss": 0.5264, + "step": 7006 + }, + { + "epoch": 0.19239428885227897, + "grad_norm": 0.3943873345851898, + "learning_rate": 1.955847605769157e-05, + "loss": 0.5645, + "step": 7007 + }, + { + "epoch": 0.19242174629324546, + "grad_norm": 0.4020509123802185, + "learning_rate": 1.9558349131363625e-05, + "loss": 0.497, + "step": 7008 + }, + { + "epoch": 0.19244920373421198, + "grad_norm": 0.3494105935096741, + "learning_rate": 1.9558222187206308e-05, + "loss": 0.5004, + "step": 7009 + }, + { + "epoch": 0.19247666117517848, + "grad_norm": 0.35440894961357117, + "learning_rate": 1.955809522521986e-05, + "loss": 0.5305, + "step": 7010 + }, + { + "epoch": 0.19250411861614497, + "grad_norm": 0.38787394762039185, + "learning_rate": 1.9557968245404507e-05, + "loss": 0.4617, + "step": 7011 + }, + { + "epoch": 0.19253157605711146, + "grad_norm": 0.39531490206718445, + "learning_rate": 1.9557841247760495e-05, + "loss": 0.5392, + "step": 7012 + }, + { + "epoch": 0.19255903349807799, + "grad_norm": 0.3669515550136566, + "learning_rate": 1.955771423228806e-05, + "loss": 0.587, + "step": 7013 + }, + { + "epoch": 0.19258649093904448, + "grad_norm": 0.4034994840621948, + "learning_rate": 1.9557587198987438e-05, + "loss": 0.5638, + "step": 7014 + }, + { + "epoch": 0.19261394838001097, + "grad_norm": 0.3339743912220001, + "learning_rate": 1.9557460147858865e-05, + "loss": 0.504, + "step": 7015 + }, + { + "epoch": 0.1926414058209775, + "grad_norm": 0.3385424315929413, + "learning_rate": 1.955733307890258e-05, + "loss": 0.5177, + "step": 7016 + }, + { + "epoch": 0.192668863261944, + "grad_norm": 0.344289094209671, + "learning_rate": 1.9557205992118815e-05, + "loss": 0.5282, + "step": 7017 + }, + { + "epoch": 0.19269632070291048, + "grad_norm": 0.3443506956100464, + "learning_rate": 1.955707888750781e-05, + "loss": 0.4742, + "step": 7018 + }, + { + "epoch": 0.19272377814387698, + "grad_norm": 0.3377014398574829, + "learning_rate": 1.9556951765069806e-05, + "loss": 0.4955, + "step": 7019 + }, + { + "epoch": 0.1927512355848435, + "grad_norm": 0.3553220331668854, + "learning_rate": 1.9556824624805035e-05, + "loss": 0.5678, + "step": 7020 + }, + { + "epoch": 0.19277869302581, + "grad_norm": 0.32762786746025085, + "learning_rate": 1.9556697466713738e-05, + "loss": 0.518, + "step": 7021 + }, + { + "epoch": 0.1928061504667765, + "grad_norm": 0.3683374524116516, + "learning_rate": 1.9556570290796148e-05, + "loss": 0.4317, + "step": 7022 + }, + { + "epoch": 0.192833607907743, + "grad_norm": 0.35216841101646423, + "learning_rate": 1.9556443097052505e-05, + "loss": 0.4583, + "step": 7023 + }, + { + "epoch": 0.1928610653487095, + "grad_norm": 0.37241801619529724, + "learning_rate": 1.9556315885483044e-05, + "loss": 0.5315, + "step": 7024 + }, + { + "epoch": 0.192888522789676, + "grad_norm": 0.34667956829071045, + "learning_rate": 1.9556188656088005e-05, + "loss": 0.5932, + "step": 7025 + }, + { + "epoch": 0.1929159802306425, + "grad_norm": 0.3337510824203491, + "learning_rate": 1.9556061408867622e-05, + "loss": 0.5288, + "step": 7026 + }, + { + "epoch": 0.192943437671609, + "grad_norm": 0.3353162407875061, + "learning_rate": 1.955593414382214e-05, + "loss": 0.5025, + "step": 7027 + }, + { + "epoch": 0.1929708951125755, + "grad_norm": 0.3487168550491333, + "learning_rate": 1.9555806860951785e-05, + "loss": 0.5534, + "step": 7028 + }, + { + "epoch": 0.192998352553542, + "grad_norm": 0.36860448122024536, + "learning_rate": 1.9555679560256803e-05, + "loss": 0.5171, + "step": 7029 + }, + { + "epoch": 0.19302580999450852, + "grad_norm": 0.3501944839954376, + "learning_rate": 1.9555552241737428e-05, + "loss": 0.5378, + "step": 7030 + }, + { + "epoch": 0.19305326743547502, + "grad_norm": 0.5548388957977295, + "learning_rate": 1.95554249053939e-05, + "loss": 0.675, + "step": 7031 + }, + { + "epoch": 0.1930807248764415, + "grad_norm": 0.32186439633369446, + "learning_rate": 1.9555297551226454e-05, + "loss": 0.483, + "step": 7032 + }, + { + "epoch": 0.193108182317408, + "grad_norm": 0.35725170373916626, + "learning_rate": 1.9555170179235326e-05, + "loss": 0.5159, + "step": 7033 + }, + { + "epoch": 0.19313563975837453, + "grad_norm": 0.9094733595848083, + "learning_rate": 1.955504278942076e-05, + "loss": 0.4956, + "step": 7034 + }, + { + "epoch": 0.19316309719934102, + "grad_norm": 0.30891624093055725, + "learning_rate": 1.955491538178299e-05, + "loss": 0.4183, + "step": 7035 + }, + { + "epoch": 0.19319055464030752, + "grad_norm": 0.32969704270362854, + "learning_rate": 1.955478795632225e-05, + "loss": 0.5475, + "step": 7036 + }, + { + "epoch": 0.19321801208127404, + "grad_norm": 0.35502755641937256, + "learning_rate": 1.955466051303878e-05, + "loss": 0.4936, + "step": 7037 + }, + { + "epoch": 0.19324546952224053, + "grad_norm": 0.34554553031921387, + "learning_rate": 1.9554533051932824e-05, + "loss": 0.5049, + "step": 7038 + }, + { + "epoch": 0.19327292696320703, + "grad_norm": 0.37126660346984863, + "learning_rate": 1.955440557300461e-05, + "loss": 0.479, + "step": 7039 + }, + { + "epoch": 0.19330038440417352, + "grad_norm": 0.4616793096065521, + "learning_rate": 1.955427807625438e-05, + "loss": 0.4941, + "step": 7040 + }, + { + "epoch": 0.19332784184514004, + "grad_norm": 0.35193684697151184, + "learning_rate": 1.9554150561682374e-05, + "loss": 0.587, + "step": 7041 + }, + { + "epoch": 0.19335529928610654, + "grad_norm": 0.36055055260658264, + "learning_rate": 1.9554023029288823e-05, + "loss": 0.5468, + "step": 7042 + }, + { + "epoch": 0.19338275672707303, + "grad_norm": 0.36816155910491943, + "learning_rate": 1.9553895479073977e-05, + "loss": 0.6131, + "step": 7043 + }, + { + "epoch": 0.19341021416803955, + "grad_norm": 0.3417634665966034, + "learning_rate": 1.9553767911038066e-05, + "loss": 0.5746, + "step": 7044 + }, + { + "epoch": 0.19343767160900605, + "grad_norm": 0.3691891133785248, + "learning_rate": 1.9553640325181323e-05, + "loss": 0.5051, + "step": 7045 + }, + { + "epoch": 0.19346512904997254, + "grad_norm": 0.34039413928985596, + "learning_rate": 1.9553512721503994e-05, + "loss": 0.5046, + "step": 7046 + }, + { + "epoch": 0.19349258649093903, + "grad_norm": 0.361599862575531, + "learning_rate": 1.9553385100006318e-05, + "loss": 0.45, + "step": 7047 + }, + { + "epoch": 0.19352004393190556, + "grad_norm": 0.3631707429885864, + "learning_rate": 1.9553257460688525e-05, + "loss": 0.5249, + "step": 7048 + }, + { + "epoch": 0.19354750137287205, + "grad_norm": 0.736594557762146, + "learning_rate": 1.955312980355086e-05, + "loss": 0.5581, + "step": 7049 + }, + { + "epoch": 0.19357495881383854, + "grad_norm": 0.4240773320198059, + "learning_rate": 1.955300212859356e-05, + "loss": 0.6069, + "step": 7050 + }, + { + "epoch": 0.19360241625480507, + "grad_norm": 0.35737890005111694, + "learning_rate": 1.955287443581686e-05, + "loss": 0.5218, + "step": 7051 + }, + { + "epoch": 0.19362987369577156, + "grad_norm": 0.38303494453430176, + "learning_rate": 1.9552746725221003e-05, + "loss": 0.6008, + "step": 7052 + }, + { + "epoch": 0.19365733113673805, + "grad_norm": 0.37311851978302, + "learning_rate": 1.955261899680622e-05, + "loss": 0.6081, + "step": 7053 + }, + { + "epoch": 0.19368478857770455, + "grad_norm": 0.3554532825946808, + "learning_rate": 1.9552491250572758e-05, + "loss": 0.6025, + "step": 7054 + }, + { + "epoch": 0.19371224601867107, + "grad_norm": 0.33497458696365356, + "learning_rate": 1.955236348652085e-05, + "loss": 0.5215, + "step": 7055 + }, + { + "epoch": 0.19373970345963756, + "grad_norm": 0.3799905478954315, + "learning_rate": 1.9552235704650732e-05, + "loss": 0.5028, + "step": 7056 + }, + { + "epoch": 0.19376716090060406, + "grad_norm": 0.4394189119338989, + "learning_rate": 1.9552107904962653e-05, + "loss": 0.5432, + "step": 7057 + }, + { + "epoch": 0.19379461834157058, + "grad_norm": 0.3874707818031311, + "learning_rate": 1.9551980087456837e-05, + "loss": 0.6236, + "step": 7058 + }, + { + "epoch": 0.19382207578253707, + "grad_norm": 0.3800000250339508, + "learning_rate": 1.9551852252133533e-05, + "loss": 0.5927, + "step": 7059 + }, + { + "epoch": 0.19384953322350357, + "grad_norm": 0.3664473593235016, + "learning_rate": 1.9551724398992973e-05, + "loss": 0.5339, + "step": 7060 + }, + { + "epoch": 0.19387699066447006, + "grad_norm": 0.3939146399497986, + "learning_rate": 1.95515965280354e-05, + "loss": 0.6566, + "step": 7061 + }, + { + "epoch": 0.19390444810543658, + "grad_norm": 0.4141204059123993, + "learning_rate": 1.955146863926105e-05, + "loss": 0.6651, + "step": 7062 + }, + { + "epoch": 0.19393190554640308, + "grad_norm": 0.3623238205909729, + "learning_rate": 1.9551340732670167e-05, + "loss": 0.5329, + "step": 7063 + }, + { + "epoch": 0.19395936298736957, + "grad_norm": 0.35530078411102295, + "learning_rate": 1.955121280826298e-05, + "loss": 0.5935, + "step": 7064 + }, + { + "epoch": 0.19398682042833607, + "grad_norm": 0.3710092008113861, + "learning_rate": 1.9551084866039736e-05, + "loss": 0.5622, + "step": 7065 + }, + { + "epoch": 0.1940142778693026, + "grad_norm": 0.3602275252342224, + "learning_rate": 1.955095690600067e-05, + "loss": 0.6223, + "step": 7066 + }, + { + "epoch": 0.19404173531026908, + "grad_norm": 0.3741648495197296, + "learning_rate": 1.955082892814602e-05, + "loss": 0.5677, + "step": 7067 + }, + { + "epoch": 0.19406919275123558, + "grad_norm": 0.3664383590221405, + "learning_rate": 1.9550700932476023e-05, + "loss": 0.5994, + "step": 7068 + }, + { + "epoch": 0.1940966501922021, + "grad_norm": 0.4249798357486725, + "learning_rate": 1.9550572918990926e-05, + "loss": 0.5886, + "step": 7069 + }, + { + "epoch": 0.1941241076331686, + "grad_norm": 0.36835092306137085, + "learning_rate": 1.9550444887690958e-05, + "loss": 0.5749, + "step": 7070 + }, + { + "epoch": 0.19415156507413509, + "grad_norm": 0.9168388247489929, + "learning_rate": 1.9550316838576365e-05, + "loss": 0.4887, + "step": 7071 + }, + { + "epoch": 0.19417902251510158, + "grad_norm": 0.3361561894416809, + "learning_rate": 1.9550188771647383e-05, + "loss": 0.4964, + "step": 7072 + }, + { + "epoch": 0.1942064799560681, + "grad_norm": 0.38099122047424316, + "learning_rate": 1.9550060686904247e-05, + "loss": 0.5402, + "step": 7073 + }, + { + "epoch": 0.1942339373970346, + "grad_norm": 0.38914793729782104, + "learning_rate": 1.9549932584347205e-05, + "loss": 0.5691, + "step": 7074 + }, + { + "epoch": 0.1942613948380011, + "grad_norm": 0.3655528128147125, + "learning_rate": 1.9549804463976488e-05, + "loss": 0.562, + "step": 7075 + }, + { + "epoch": 0.1942888522789676, + "grad_norm": 0.34185680747032166, + "learning_rate": 1.9549676325792336e-05, + "loss": 0.5355, + "step": 7076 + }, + { + "epoch": 0.1943163097199341, + "grad_norm": 0.44784706830978394, + "learning_rate": 1.9549548169794992e-05, + "loss": 0.5154, + "step": 7077 + }, + { + "epoch": 0.1943437671609006, + "grad_norm": 0.4111733138561249, + "learning_rate": 1.9549419995984692e-05, + "loss": 0.5346, + "step": 7078 + }, + { + "epoch": 0.1943712246018671, + "grad_norm": 0.4037534296512604, + "learning_rate": 1.954929180436168e-05, + "loss": 0.6513, + "step": 7079 + }, + { + "epoch": 0.19439868204283361, + "grad_norm": 0.3821345269680023, + "learning_rate": 1.9549163594926185e-05, + "loss": 0.5449, + "step": 7080 + }, + { + "epoch": 0.1944261394838001, + "grad_norm": 0.40433260798454285, + "learning_rate": 1.9549035367678453e-05, + "loss": 0.5432, + "step": 7081 + }, + { + "epoch": 0.1944535969247666, + "grad_norm": 0.40730172395706177, + "learning_rate": 1.9548907122618727e-05, + "loss": 0.5429, + "step": 7082 + }, + { + "epoch": 0.19448105436573312, + "grad_norm": 0.3996466398239136, + "learning_rate": 1.954877885974724e-05, + "loss": 0.5716, + "step": 7083 + }, + { + "epoch": 0.19450851180669962, + "grad_norm": 0.4058230519294739, + "learning_rate": 1.9548650579064226e-05, + "loss": 0.5286, + "step": 7084 + }, + { + "epoch": 0.1945359692476661, + "grad_norm": 0.37513747811317444, + "learning_rate": 1.9548522280569938e-05, + "loss": 0.5185, + "step": 7085 + }, + { + "epoch": 0.1945634266886326, + "grad_norm": 0.362341046333313, + "learning_rate": 1.9548393964264606e-05, + "loss": 0.547, + "step": 7086 + }, + { + "epoch": 0.19459088412959913, + "grad_norm": 0.44074681401252747, + "learning_rate": 1.9548265630148472e-05, + "loss": 0.5489, + "step": 7087 + }, + { + "epoch": 0.19461834157056562, + "grad_norm": 0.38960081338882446, + "learning_rate": 1.9548137278221776e-05, + "loss": 0.5169, + "step": 7088 + }, + { + "epoch": 0.19464579901153212, + "grad_norm": 0.3788408935070038, + "learning_rate": 1.9548008908484756e-05, + "loss": 0.533, + "step": 7089 + }, + { + "epoch": 0.19467325645249864, + "grad_norm": 0.37933051586151123, + "learning_rate": 1.9547880520937655e-05, + "loss": 0.4978, + "step": 7090 + }, + { + "epoch": 0.19470071389346513, + "grad_norm": 0.3765076994895935, + "learning_rate": 1.9547752115580703e-05, + "loss": 0.5499, + "step": 7091 + }, + { + "epoch": 0.19472817133443163, + "grad_norm": 0.3727859556674957, + "learning_rate": 1.9547623692414152e-05, + "loss": 0.4743, + "step": 7092 + }, + { + "epoch": 0.19475562877539812, + "grad_norm": 0.36264777183532715, + "learning_rate": 1.9547495251438232e-05, + "loss": 0.4938, + "step": 7093 + }, + { + "epoch": 0.19478308621636464, + "grad_norm": 0.595095694065094, + "learning_rate": 1.9547366792653187e-05, + "loss": 0.5597, + "step": 7094 + }, + { + "epoch": 0.19481054365733114, + "grad_norm": 0.33324581384658813, + "learning_rate": 1.9547238316059257e-05, + "loss": 0.5454, + "step": 7095 + }, + { + "epoch": 0.19483800109829763, + "grad_norm": 0.3286076784133911, + "learning_rate": 1.954710982165668e-05, + "loss": 0.5155, + "step": 7096 + }, + { + "epoch": 0.19486545853926415, + "grad_norm": 0.39441075921058655, + "learning_rate": 1.9546981309445695e-05, + "loss": 0.5928, + "step": 7097 + }, + { + "epoch": 0.19489291598023065, + "grad_norm": 0.3677467107772827, + "learning_rate": 1.9546852779426545e-05, + "loss": 0.5252, + "step": 7098 + }, + { + "epoch": 0.19492037342119714, + "grad_norm": 0.3494504988193512, + "learning_rate": 1.9546724231599468e-05, + "loss": 0.5204, + "step": 7099 + }, + { + "epoch": 0.19494783086216363, + "grad_norm": 0.43648621439933777, + "learning_rate": 1.95465956659647e-05, + "loss": 0.526, + "step": 7100 + }, + { + "epoch": 0.19497528830313016, + "grad_norm": 0.34320247173309326, + "learning_rate": 1.9546467082522486e-05, + "loss": 0.536, + "step": 7101 + }, + { + "epoch": 0.19500274574409665, + "grad_norm": 0.32266420125961304, + "learning_rate": 1.9546338481273065e-05, + "loss": 0.4496, + "step": 7102 + }, + { + "epoch": 0.19503020318506314, + "grad_norm": 0.3357849419116974, + "learning_rate": 1.9546209862216674e-05, + "loss": 0.575, + "step": 7103 + }, + { + "epoch": 0.19505766062602967, + "grad_norm": 0.372760146856308, + "learning_rate": 1.9546081225353553e-05, + "loss": 0.4671, + "step": 7104 + }, + { + "epoch": 0.19508511806699616, + "grad_norm": 0.38168057799339294, + "learning_rate": 1.954595257068395e-05, + "loss": 0.5125, + "step": 7105 + }, + { + "epoch": 0.19511257550796265, + "grad_norm": 0.3345735967159271, + "learning_rate": 1.9545823898208094e-05, + "loss": 0.5129, + "step": 7106 + }, + { + "epoch": 0.19514003294892915, + "grad_norm": 0.3664141297340393, + "learning_rate": 1.954569520792623e-05, + "loss": 0.5156, + "step": 7107 + }, + { + "epoch": 0.19516749038989567, + "grad_norm": 0.37676674127578735, + "learning_rate": 1.9545566499838596e-05, + "loss": 0.5049, + "step": 7108 + }, + { + "epoch": 0.19519494783086216, + "grad_norm": 0.422270804643631, + "learning_rate": 1.9545437773945438e-05, + "loss": 0.578, + "step": 7109 + }, + { + "epoch": 0.19522240527182866, + "grad_norm": 0.473947674036026, + "learning_rate": 1.954530903024699e-05, + "loss": 0.5399, + "step": 7110 + }, + { + "epoch": 0.19524986271279518, + "grad_norm": 0.37826886773109436, + "learning_rate": 1.9545180268743492e-05, + "loss": 0.6155, + "step": 7111 + }, + { + "epoch": 0.19527732015376167, + "grad_norm": 0.35873594880104065, + "learning_rate": 1.9545051489435193e-05, + "loss": 0.4885, + "step": 7112 + }, + { + "epoch": 0.19530477759472817, + "grad_norm": 0.369585782289505, + "learning_rate": 1.9544922692322322e-05, + "loss": 0.5725, + "step": 7113 + }, + { + "epoch": 0.19533223503569466, + "grad_norm": 0.3691712021827698, + "learning_rate": 1.9544793877405123e-05, + "loss": 0.5744, + "step": 7114 + }, + { + "epoch": 0.19535969247666118, + "grad_norm": 0.39016222953796387, + "learning_rate": 1.9544665044683838e-05, + "loss": 0.5968, + "step": 7115 + }, + { + "epoch": 0.19538714991762768, + "grad_norm": 0.3748838007450104, + "learning_rate": 1.954453619415871e-05, + "loss": 0.598, + "step": 7116 + }, + { + "epoch": 0.19541460735859417, + "grad_norm": 0.3429998457431793, + "learning_rate": 1.954440732582997e-05, + "loss": 0.5423, + "step": 7117 + }, + { + "epoch": 0.1954420647995607, + "grad_norm": 0.3752644658088684, + "learning_rate": 1.9544278439697864e-05, + "loss": 0.5473, + "step": 7118 + }, + { + "epoch": 0.1954695222405272, + "grad_norm": 0.39227351546287537, + "learning_rate": 1.954414953576264e-05, + "loss": 0.5222, + "step": 7119 + }, + { + "epoch": 0.19549697968149368, + "grad_norm": 0.3240601420402527, + "learning_rate": 1.954402061402452e-05, + "loss": 0.529, + "step": 7120 + }, + { + "epoch": 0.19552443712246018, + "grad_norm": 0.39040884375572205, + "learning_rate": 1.9543891674483767e-05, + "loss": 0.49, + "step": 7121 + }, + { + "epoch": 0.1955518945634267, + "grad_norm": 0.3687610924243927, + "learning_rate": 1.95437627171406e-05, + "loss": 0.4753, + "step": 7122 + }, + { + "epoch": 0.1955793520043932, + "grad_norm": 0.37360528111457825, + "learning_rate": 1.9543633741995275e-05, + "loss": 0.5386, + "step": 7123 + }, + { + "epoch": 0.1956068094453597, + "grad_norm": 0.36997178196907043, + "learning_rate": 1.9543504749048025e-05, + "loss": 0.5374, + "step": 7124 + }, + { + "epoch": 0.1956342668863262, + "grad_norm": 0.3902798295021057, + "learning_rate": 1.9543375738299096e-05, + "loss": 0.5533, + "step": 7125 + }, + { + "epoch": 0.1956617243272927, + "grad_norm": 0.3777187764644623, + "learning_rate": 1.9543246709748722e-05, + "loss": 0.5266, + "step": 7126 + }, + { + "epoch": 0.1956891817682592, + "grad_norm": 0.324217826128006, + "learning_rate": 1.954311766339715e-05, + "loss": 0.525, + "step": 7127 + }, + { + "epoch": 0.1957166392092257, + "grad_norm": 0.4059572219848633, + "learning_rate": 1.9542988599244615e-05, + "loss": 0.4792, + "step": 7128 + }, + { + "epoch": 0.1957440966501922, + "grad_norm": 0.34937992691993713, + "learning_rate": 1.9542859517291363e-05, + "loss": 0.476, + "step": 7129 + }, + { + "epoch": 0.1957715540911587, + "grad_norm": 0.46754807233810425, + "learning_rate": 1.954273041753763e-05, + "loss": 0.4902, + "step": 7130 + }, + { + "epoch": 0.1957990115321252, + "grad_norm": 0.3189769685268402, + "learning_rate": 1.9542601299983658e-05, + "loss": 0.5168, + "step": 7131 + }, + { + "epoch": 0.1958264689730917, + "grad_norm": 0.3236069083213806, + "learning_rate": 1.954247216462969e-05, + "loss": 0.486, + "step": 7132 + }, + { + "epoch": 0.19585392641405822, + "grad_norm": 0.3994380235671997, + "learning_rate": 1.9542343011475967e-05, + "loss": 0.4839, + "step": 7133 + }, + { + "epoch": 0.1958813838550247, + "grad_norm": 0.3506726920604706, + "learning_rate": 1.954221384052273e-05, + "loss": 0.5285, + "step": 7134 + }, + { + "epoch": 0.1959088412959912, + "grad_norm": 0.3980531394481659, + "learning_rate": 1.9542084651770213e-05, + "loss": 0.5184, + "step": 7135 + }, + { + "epoch": 0.19593629873695773, + "grad_norm": 0.35364046692848206, + "learning_rate": 1.9541955445218668e-05, + "loss": 0.4596, + "step": 7136 + }, + { + "epoch": 0.19596375617792422, + "grad_norm": 0.3507658541202545, + "learning_rate": 1.954182622086833e-05, + "loss": 0.5934, + "step": 7137 + }, + { + "epoch": 0.19599121361889071, + "grad_norm": 0.39827755093574524, + "learning_rate": 1.954169697871944e-05, + "loss": 0.5393, + "step": 7138 + }, + { + "epoch": 0.1960186710598572, + "grad_norm": 0.41258329153060913, + "learning_rate": 1.954156771877224e-05, + "loss": 0.5903, + "step": 7139 + }, + { + "epoch": 0.19604612850082373, + "grad_norm": 0.33346524834632874, + "learning_rate": 1.954143844102697e-05, + "loss": 0.5391, + "step": 7140 + }, + { + "epoch": 0.19607358594179022, + "grad_norm": 0.36685311794281006, + "learning_rate": 1.9541309145483872e-05, + "loss": 0.5959, + "step": 7141 + }, + { + "epoch": 0.19610104338275672, + "grad_norm": 0.4047291874885559, + "learning_rate": 1.9541179832143187e-05, + "loss": 0.6385, + "step": 7142 + }, + { + "epoch": 0.19612850082372324, + "grad_norm": 0.34613046050071716, + "learning_rate": 1.954105050100516e-05, + "loss": 0.519, + "step": 7143 + }, + { + "epoch": 0.19615595826468973, + "grad_norm": 0.34517884254455566, + "learning_rate": 1.9540921152070026e-05, + "loss": 0.493, + "step": 7144 + }, + { + "epoch": 0.19618341570565623, + "grad_norm": 0.3584359288215637, + "learning_rate": 1.954079178533803e-05, + "loss": 0.4932, + "step": 7145 + }, + { + "epoch": 0.19621087314662272, + "grad_norm": 0.4661368131637573, + "learning_rate": 1.9540662400809408e-05, + "loss": 0.5303, + "step": 7146 + }, + { + "epoch": 0.19623833058758924, + "grad_norm": 0.37627050280570984, + "learning_rate": 1.9540532998484408e-05, + "loss": 0.5054, + "step": 7147 + }, + { + "epoch": 0.19626578802855574, + "grad_norm": 0.35841119289398193, + "learning_rate": 1.954040357836327e-05, + "loss": 0.5564, + "step": 7148 + }, + { + "epoch": 0.19629324546952223, + "grad_norm": 0.4122997224330902, + "learning_rate": 1.9540274140446237e-05, + "loss": 0.5458, + "step": 7149 + }, + { + "epoch": 0.19632070291048875, + "grad_norm": 0.35828328132629395, + "learning_rate": 1.9540144684733543e-05, + "loss": 0.5018, + "step": 7150 + }, + { + "epoch": 0.19634816035145525, + "grad_norm": 0.3851050138473511, + "learning_rate": 1.9540015211225436e-05, + "loss": 0.4864, + "step": 7151 + }, + { + "epoch": 0.19637561779242174, + "grad_norm": 0.36378082633018494, + "learning_rate": 1.9539885719922153e-05, + "loss": 0.4787, + "step": 7152 + }, + { + "epoch": 0.19640307523338824, + "grad_norm": 0.39105677604675293, + "learning_rate": 1.9539756210823944e-05, + "loss": 0.5556, + "step": 7153 + }, + { + "epoch": 0.19643053267435476, + "grad_norm": 0.3436042070388794, + "learning_rate": 1.9539626683931044e-05, + "loss": 0.562, + "step": 7154 + }, + { + "epoch": 0.19645799011532125, + "grad_norm": 0.3398236334323883, + "learning_rate": 1.9539497139243692e-05, + "loss": 0.4782, + "step": 7155 + }, + { + "epoch": 0.19648544755628775, + "grad_norm": 0.3495853543281555, + "learning_rate": 1.9539367576762136e-05, + "loss": 0.4932, + "step": 7156 + }, + { + "epoch": 0.19651290499725427, + "grad_norm": 0.44746723771095276, + "learning_rate": 1.9539237996486614e-05, + "loss": 0.6078, + "step": 7157 + }, + { + "epoch": 0.19654036243822076, + "grad_norm": 0.39059901237487793, + "learning_rate": 1.953910839841737e-05, + "loss": 0.4787, + "step": 7158 + }, + { + "epoch": 0.19656781987918726, + "grad_norm": 0.3783426582813263, + "learning_rate": 1.953897878255464e-05, + "loss": 0.552, + "step": 7159 + }, + { + "epoch": 0.19659527732015375, + "grad_norm": 0.38346585631370544, + "learning_rate": 1.9538849148898672e-05, + "loss": 0.535, + "step": 7160 + }, + { + "epoch": 0.19662273476112027, + "grad_norm": 0.30041077733039856, + "learning_rate": 1.953871949744971e-05, + "loss": 0.4641, + "step": 7161 + }, + { + "epoch": 0.19665019220208677, + "grad_norm": 0.36442095041275024, + "learning_rate": 1.9538589828207985e-05, + "loss": 0.5903, + "step": 7162 + }, + { + "epoch": 0.19667764964305326, + "grad_norm": 2.016639471054077, + "learning_rate": 1.9538460141173748e-05, + "loss": 0.641, + "step": 7163 + }, + { + "epoch": 0.19670510708401978, + "grad_norm": 0.5272617936134338, + "learning_rate": 1.9538330436347238e-05, + "loss": 0.5308, + "step": 7164 + }, + { + "epoch": 0.19673256452498628, + "grad_norm": 0.3539971709251404, + "learning_rate": 1.9538200713728703e-05, + "loss": 0.5316, + "step": 7165 + }, + { + "epoch": 0.19676002196595277, + "grad_norm": 0.3413262963294983, + "learning_rate": 1.9538070973318373e-05, + "loss": 0.5218, + "step": 7166 + }, + { + "epoch": 0.19678747940691926, + "grad_norm": 0.32233503460884094, + "learning_rate": 1.95379412151165e-05, + "loss": 0.4572, + "step": 7167 + }, + { + "epoch": 0.19681493684788579, + "grad_norm": 0.49622124433517456, + "learning_rate": 1.953781143912332e-05, + "loss": 0.5181, + "step": 7168 + }, + { + "epoch": 0.19684239428885228, + "grad_norm": 0.3376036286354065, + "learning_rate": 1.953768164533908e-05, + "loss": 0.5897, + "step": 7169 + }, + { + "epoch": 0.19686985172981877, + "grad_norm": 0.6925176978111267, + "learning_rate": 1.953755183376402e-05, + "loss": 0.5564, + "step": 7170 + }, + { + "epoch": 0.1968973091707853, + "grad_norm": 0.6746611595153809, + "learning_rate": 1.953742200439838e-05, + "loss": 0.487, + "step": 7171 + }, + { + "epoch": 0.1969247666117518, + "grad_norm": 0.3919059634208679, + "learning_rate": 1.9537292157242405e-05, + "loss": 0.6005, + "step": 7172 + }, + { + "epoch": 0.19695222405271828, + "grad_norm": 0.3767068684101105, + "learning_rate": 1.953716229229633e-05, + "loss": 0.4442, + "step": 7173 + }, + { + "epoch": 0.19697968149368478, + "grad_norm": 0.37462007999420166, + "learning_rate": 1.9537032409560414e-05, + "loss": 0.6061, + "step": 7174 + }, + { + "epoch": 0.1970071389346513, + "grad_norm": 0.4104886054992676, + "learning_rate": 1.9536902509034883e-05, + "loss": 0.5115, + "step": 7175 + }, + { + "epoch": 0.1970345963756178, + "grad_norm": 0.36642777919769287, + "learning_rate": 1.9536772590719987e-05, + "loss": 0.6339, + "step": 7176 + }, + { + "epoch": 0.1970620538165843, + "grad_norm": 0.4304368793964386, + "learning_rate": 1.9536642654615963e-05, + "loss": 0.5584, + "step": 7177 + }, + { + "epoch": 0.1970895112575508, + "grad_norm": 0.3695346415042877, + "learning_rate": 1.9536512700723057e-05, + "loss": 0.5586, + "step": 7178 + }, + { + "epoch": 0.1971169686985173, + "grad_norm": 0.4176194369792938, + "learning_rate": 1.953638272904151e-05, + "loss": 0.5453, + "step": 7179 + }, + { + "epoch": 0.1971444261394838, + "grad_norm": 0.357776015996933, + "learning_rate": 1.953625273957157e-05, + "loss": 0.5602, + "step": 7180 + }, + { + "epoch": 0.1971718835804503, + "grad_norm": 0.39299970865249634, + "learning_rate": 1.9536122732313476e-05, + "loss": 0.4265, + "step": 7181 + }, + { + "epoch": 0.1971993410214168, + "grad_norm": 0.38582319021224976, + "learning_rate": 1.9535992707267465e-05, + "loss": 0.5775, + "step": 7182 + }, + { + "epoch": 0.1972267984623833, + "grad_norm": 0.33422353863716125, + "learning_rate": 1.9535862664433786e-05, + "loss": 0.4638, + "step": 7183 + }, + { + "epoch": 0.1972542559033498, + "grad_norm": 0.3745768368244171, + "learning_rate": 1.953573260381268e-05, + "loss": 0.6488, + "step": 7184 + }, + { + "epoch": 0.19728171334431632, + "grad_norm": 0.3630523681640625, + "learning_rate": 1.9535602525404388e-05, + "loss": 0.4787, + "step": 7185 + }, + { + "epoch": 0.19730917078528282, + "grad_norm": 0.39216434955596924, + "learning_rate": 1.9535472429209155e-05, + "loss": 0.517, + "step": 7186 + }, + { + "epoch": 0.1973366282262493, + "grad_norm": 0.33977261185646057, + "learning_rate": 1.9535342315227225e-05, + "loss": 0.4784, + "step": 7187 + }, + { + "epoch": 0.1973640856672158, + "grad_norm": 0.3722745180130005, + "learning_rate": 1.953521218345883e-05, + "loss": 0.4741, + "step": 7188 + }, + { + "epoch": 0.19739154310818233, + "grad_norm": 0.40505266189575195, + "learning_rate": 1.9535082033904228e-05, + "loss": 0.6026, + "step": 7189 + }, + { + "epoch": 0.19741900054914882, + "grad_norm": 0.3552279770374298, + "learning_rate": 1.9534951866563655e-05, + "loss": 0.5548, + "step": 7190 + }, + { + "epoch": 0.19744645799011531, + "grad_norm": 0.6264123916625977, + "learning_rate": 1.953482168143735e-05, + "loss": 0.4778, + "step": 7191 + }, + { + "epoch": 0.19747391543108184, + "grad_norm": 0.3956158459186554, + "learning_rate": 1.953469147852556e-05, + "loss": 0.5932, + "step": 7192 + }, + { + "epoch": 0.19750137287204833, + "grad_norm": 0.39580288529396057, + "learning_rate": 1.9534561257828533e-05, + "loss": 0.5117, + "step": 7193 + }, + { + "epoch": 0.19752883031301482, + "grad_norm": 0.3647053837776184, + "learning_rate": 1.95344310193465e-05, + "loss": 0.5876, + "step": 7194 + }, + { + "epoch": 0.19755628775398132, + "grad_norm": 0.369294673204422, + "learning_rate": 1.953430076307971e-05, + "loss": 0.4724, + "step": 7195 + }, + { + "epoch": 0.19758374519494784, + "grad_norm": 0.3545277416706085, + "learning_rate": 1.953417048902841e-05, + "loss": 0.4994, + "step": 7196 + }, + { + "epoch": 0.19761120263591433, + "grad_norm": 0.3297555148601532, + "learning_rate": 1.9534040197192837e-05, + "loss": 0.5086, + "step": 7197 + }, + { + "epoch": 0.19763866007688083, + "grad_norm": 0.35315993428230286, + "learning_rate": 1.9533909887573236e-05, + "loss": 0.4819, + "step": 7198 + }, + { + "epoch": 0.19766611751784732, + "grad_norm": 0.36271387338638306, + "learning_rate": 1.953377956016985e-05, + "loss": 0.488, + "step": 7199 + }, + { + "epoch": 0.19769357495881384, + "grad_norm": 0.3455018103122711, + "learning_rate": 1.953364921498292e-05, + "loss": 0.49, + "step": 7200 + }, + { + "epoch": 0.19772103239978034, + "grad_norm": 0.41672009229660034, + "learning_rate": 1.9533518852012692e-05, + "loss": 0.5471, + "step": 7201 + }, + { + "epoch": 0.19774848984074683, + "grad_norm": 0.4056854844093323, + "learning_rate": 1.9533388471259413e-05, + "loss": 0.4609, + "step": 7202 + }, + { + "epoch": 0.19777594728171335, + "grad_norm": 0.3947330713272095, + "learning_rate": 1.953325807272332e-05, + "loss": 0.546, + "step": 7203 + }, + { + "epoch": 0.19780340472267985, + "grad_norm": 0.35936641693115234, + "learning_rate": 1.9533127656404657e-05, + "loss": 0.5069, + "step": 7204 + }, + { + "epoch": 0.19783086216364634, + "grad_norm": 0.3275044858455658, + "learning_rate": 1.9532997222303668e-05, + "loss": 0.4886, + "step": 7205 + }, + { + "epoch": 0.19785831960461284, + "grad_norm": 0.41177043318748474, + "learning_rate": 1.9532866770420597e-05, + "loss": 0.4495, + "step": 7206 + }, + { + "epoch": 0.19788577704557936, + "grad_norm": 0.3257928490638733, + "learning_rate": 1.953273630075569e-05, + "loss": 0.5104, + "step": 7207 + }, + { + "epoch": 0.19791323448654585, + "grad_norm": 0.41101768612861633, + "learning_rate": 1.9532605813309185e-05, + "loss": 0.5408, + "step": 7208 + }, + { + "epoch": 0.19794069192751235, + "grad_norm": 0.3737339973449707, + "learning_rate": 1.9532475308081324e-05, + "loss": 0.5636, + "step": 7209 + }, + { + "epoch": 0.19796814936847887, + "grad_norm": 0.3762427568435669, + "learning_rate": 1.9532344785072358e-05, + "loss": 0.5922, + "step": 7210 + }, + { + "epoch": 0.19799560680944536, + "grad_norm": 0.459736168384552, + "learning_rate": 1.9532214244282527e-05, + "loss": 0.5316, + "step": 7211 + }, + { + "epoch": 0.19802306425041186, + "grad_norm": 0.3514285981655121, + "learning_rate": 1.9532083685712072e-05, + "loss": 0.4972, + "step": 7212 + }, + { + "epoch": 0.19805052169137835, + "grad_norm": 0.43646520376205444, + "learning_rate": 1.953195310936124e-05, + "loss": 0.535, + "step": 7213 + }, + { + "epoch": 0.19807797913234487, + "grad_norm": 0.34438374638557434, + "learning_rate": 1.9531822515230276e-05, + "loss": 0.4892, + "step": 7214 + }, + { + "epoch": 0.19810543657331137, + "grad_norm": 0.36947011947631836, + "learning_rate": 1.9531691903319415e-05, + "loss": 0.5635, + "step": 7215 + }, + { + "epoch": 0.19813289401427786, + "grad_norm": 0.34953573346138, + "learning_rate": 1.953156127362891e-05, + "loss": 0.5776, + "step": 7216 + }, + { + "epoch": 0.19816035145524438, + "grad_norm": 0.31937530636787415, + "learning_rate": 1.9531430626159e-05, + "loss": 0.4799, + "step": 7217 + }, + { + "epoch": 0.19818780889621088, + "grad_norm": 0.3912700116634369, + "learning_rate": 1.9531299960909928e-05, + "loss": 0.6182, + "step": 7218 + }, + { + "epoch": 0.19821526633717737, + "grad_norm": 0.38298895955085754, + "learning_rate": 1.953116927788194e-05, + "loss": 0.5679, + "step": 7219 + }, + { + "epoch": 0.19824272377814386, + "grad_norm": 0.34369972348213196, + "learning_rate": 1.9531038577075284e-05, + "loss": 0.4537, + "step": 7220 + }, + { + "epoch": 0.1982701812191104, + "grad_norm": 0.3912615180015564, + "learning_rate": 1.9530907858490195e-05, + "loss": 0.5986, + "step": 7221 + }, + { + "epoch": 0.19829763866007688, + "grad_norm": 0.3950369656085968, + "learning_rate": 1.953077712212692e-05, + "loss": 0.5873, + "step": 7222 + }, + { + "epoch": 0.19832509610104337, + "grad_norm": 0.3334648013114929, + "learning_rate": 1.953064636798571e-05, + "loss": 0.4981, + "step": 7223 + }, + { + "epoch": 0.1983525535420099, + "grad_norm": 0.380347341299057, + "learning_rate": 1.95305155960668e-05, + "loss": 0.5684, + "step": 7224 + }, + { + "epoch": 0.1983800109829764, + "grad_norm": 0.3821478486061096, + "learning_rate": 1.953038480637043e-05, + "loss": 0.5324, + "step": 7225 + }, + { + "epoch": 0.19840746842394288, + "grad_norm": 0.3779415488243103, + "learning_rate": 1.9530253998896857e-05, + "loss": 0.4894, + "step": 7226 + }, + { + "epoch": 0.19843492586490938, + "grad_norm": 0.4097645580768585, + "learning_rate": 1.953012317364632e-05, + "loss": 0.6155, + "step": 7227 + }, + { + "epoch": 0.1984623833058759, + "grad_norm": 0.407749205827713, + "learning_rate": 1.9529992330619056e-05, + "loss": 0.5623, + "step": 7228 + }, + { + "epoch": 0.1984898407468424, + "grad_norm": 0.453078955411911, + "learning_rate": 1.9529861469815316e-05, + "loss": 0.4845, + "step": 7229 + }, + { + "epoch": 0.1985172981878089, + "grad_norm": 0.3514716923236847, + "learning_rate": 1.9529730591235346e-05, + "loss": 0.5976, + "step": 7230 + }, + { + "epoch": 0.1985447556287754, + "grad_norm": 0.3212401866912842, + "learning_rate": 1.9529599694879383e-05, + "loss": 0.4459, + "step": 7231 + }, + { + "epoch": 0.1985722130697419, + "grad_norm": 0.41688039898872375, + "learning_rate": 1.952946878074768e-05, + "loss": 0.4856, + "step": 7232 + }, + { + "epoch": 0.1985996705107084, + "grad_norm": 0.3676583468914032, + "learning_rate": 1.952933784884047e-05, + "loss": 0.5004, + "step": 7233 + }, + { + "epoch": 0.1986271279516749, + "grad_norm": 0.3243754506111145, + "learning_rate": 1.952920689915801e-05, + "loss": 0.4736, + "step": 7234 + }, + { + "epoch": 0.19865458539264141, + "grad_norm": 0.3447073996067047, + "learning_rate": 1.9529075931700535e-05, + "loss": 0.4623, + "step": 7235 + }, + { + "epoch": 0.1986820428336079, + "grad_norm": 0.36044448614120483, + "learning_rate": 1.9528944946468292e-05, + "loss": 0.5093, + "step": 7236 + }, + { + "epoch": 0.1987095002745744, + "grad_norm": 0.3483608067035675, + "learning_rate": 1.9528813943461523e-05, + "loss": 0.5598, + "step": 7237 + }, + { + "epoch": 0.19873695771554092, + "grad_norm": 0.38116654753685, + "learning_rate": 1.9528682922680476e-05, + "loss": 0.5669, + "step": 7238 + }, + { + "epoch": 0.19876441515650742, + "grad_norm": 0.3651668131351471, + "learning_rate": 1.9528551884125395e-05, + "loss": 0.4976, + "step": 7239 + }, + { + "epoch": 0.1987918725974739, + "grad_norm": 0.32412225008010864, + "learning_rate": 1.9528420827796526e-05, + "loss": 0.5661, + "step": 7240 + }, + { + "epoch": 0.1988193300384404, + "grad_norm": 0.355745792388916, + "learning_rate": 1.9528289753694108e-05, + "loss": 0.5558, + "step": 7241 + }, + { + "epoch": 0.19884678747940693, + "grad_norm": 0.3472450077533722, + "learning_rate": 1.952815866181839e-05, + "loss": 0.5157, + "step": 7242 + }, + { + "epoch": 0.19887424492037342, + "grad_norm": 0.33475399017333984, + "learning_rate": 1.952802755216961e-05, + "loss": 0.4584, + "step": 7243 + }, + { + "epoch": 0.19890170236133992, + "grad_norm": 0.3952518105506897, + "learning_rate": 1.9527896424748025e-05, + "loss": 0.5297, + "step": 7244 + }, + { + "epoch": 0.19892915980230644, + "grad_norm": 0.38270044326782227, + "learning_rate": 1.952776527955387e-05, + "loss": 0.5259, + "step": 7245 + }, + { + "epoch": 0.19895661724327293, + "grad_norm": 0.4353136420249939, + "learning_rate": 1.952763411658739e-05, + "loss": 0.5541, + "step": 7246 + }, + { + "epoch": 0.19898407468423943, + "grad_norm": 0.3596036434173584, + "learning_rate": 1.9527502935848832e-05, + "loss": 0.492, + "step": 7247 + }, + { + "epoch": 0.19901153212520592, + "grad_norm": 0.39277714490890503, + "learning_rate": 1.9527371737338443e-05, + "loss": 0.5848, + "step": 7248 + }, + { + "epoch": 0.19903898956617244, + "grad_norm": 0.3556603789329529, + "learning_rate": 1.9527240521056462e-05, + "loss": 0.5664, + "step": 7249 + }, + { + "epoch": 0.19906644700713894, + "grad_norm": 0.40850144624710083, + "learning_rate": 1.9527109287003138e-05, + "loss": 0.5624, + "step": 7250 + }, + { + "epoch": 0.19909390444810543, + "grad_norm": 0.34166961908340454, + "learning_rate": 1.9526978035178713e-05, + "loss": 0.4499, + "step": 7251 + }, + { + "epoch": 0.19912136188907195, + "grad_norm": 0.32886263728141785, + "learning_rate": 1.9526846765583435e-05, + "loss": 0.4667, + "step": 7252 + }, + { + "epoch": 0.19914881933003845, + "grad_norm": 0.3720371127128601, + "learning_rate": 1.9526715478217545e-05, + "loss": 0.549, + "step": 7253 + }, + { + "epoch": 0.19917627677100494, + "grad_norm": 0.3854215443134308, + "learning_rate": 1.9526584173081293e-05, + "loss": 0.5358, + "step": 7254 + }, + { + "epoch": 0.19920373421197143, + "grad_norm": 0.4011201560497284, + "learning_rate": 1.9526452850174917e-05, + "loss": 0.5591, + "step": 7255 + }, + { + "epoch": 0.19923119165293796, + "grad_norm": 0.33162906765937805, + "learning_rate": 1.952632150949867e-05, + "loss": 0.5235, + "step": 7256 + }, + { + "epoch": 0.19925864909390445, + "grad_norm": 0.34256017208099365, + "learning_rate": 1.9526190151052787e-05, + "loss": 0.5689, + "step": 7257 + }, + { + "epoch": 0.19928610653487094, + "grad_norm": 0.4785984456539154, + "learning_rate": 1.9526058774837525e-05, + "loss": 0.5547, + "step": 7258 + }, + { + "epoch": 0.19931356397583747, + "grad_norm": 0.3598606288433075, + "learning_rate": 1.952592738085312e-05, + "loss": 0.4014, + "step": 7259 + }, + { + "epoch": 0.19934102141680396, + "grad_norm": 0.35917675495147705, + "learning_rate": 1.9525795969099822e-05, + "loss": 0.511, + "step": 7260 + }, + { + "epoch": 0.19936847885777045, + "grad_norm": 0.3319828510284424, + "learning_rate": 1.9525664539577877e-05, + "loss": 0.5112, + "step": 7261 + }, + { + "epoch": 0.19939593629873695, + "grad_norm": 0.4017603397369385, + "learning_rate": 1.952553309228752e-05, + "loss": 0.6258, + "step": 7262 + }, + { + "epoch": 0.19942339373970347, + "grad_norm": 0.37855392694473267, + "learning_rate": 1.952540162722901e-05, + "loss": 0.5565, + "step": 7263 + }, + { + "epoch": 0.19945085118066996, + "grad_norm": 0.3728278875350952, + "learning_rate": 1.9525270144402582e-05, + "loss": 0.5611, + "step": 7264 + }, + { + "epoch": 0.19947830862163646, + "grad_norm": 0.42211565375328064, + "learning_rate": 1.9525138643808487e-05, + "loss": 0.5032, + "step": 7265 + }, + { + "epoch": 0.19950576606260295, + "grad_norm": 0.3804474174976349, + "learning_rate": 1.9525007125446968e-05, + "loss": 0.5952, + "step": 7266 + }, + { + "epoch": 0.19953322350356947, + "grad_norm": 0.36546412110328674, + "learning_rate": 1.952487558931827e-05, + "loss": 0.6175, + "step": 7267 + }, + { + "epoch": 0.19956068094453597, + "grad_norm": 0.35153576731681824, + "learning_rate": 1.9524744035422637e-05, + "loss": 0.5121, + "step": 7268 + }, + { + "epoch": 0.19958813838550246, + "grad_norm": 0.385099321603775, + "learning_rate": 1.9524612463760322e-05, + "loss": 0.5623, + "step": 7269 + }, + { + "epoch": 0.19961559582646898, + "grad_norm": 0.3399818539619446, + "learning_rate": 1.952448087433156e-05, + "loss": 0.5185, + "step": 7270 + }, + { + "epoch": 0.19964305326743548, + "grad_norm": 0.4528478682041168, + "learning_rate": 1.9524349267136603e-05, + "loss": 0.5886, + "step": 7271 + }, + { + "epoch": 0.19967051070840197, + "grad_norm": 0.36903467774391174, + "learning_rate": 1.9524217642175696e-05, + "loss": 0.6044, + "step": 7272 + }, + { + "epoch": 0.19969796814936847, + "grad_norm": 0.3784032166004181, + "learning_rate": 1.9524085999449083e-05, + "loss": 0.6529, + "step": 7273 + }, + { + "epoch": 0.199725425590335, + "grad_norm": 0.35832545161247253, + "learning_rate": 1.9523954338957007e-05, + "loss": 0.5627, + "step": 7274 + }, + { + "epoch": 0.19975288303130148, + "grad_norm": 0.4117189049720764, + "learning_rate": 1.952382266069972e-05, + "loss": 0.5711, + "step": 7275 + }, + { + "epoch": 0.19978034047226798, + "grad_norm": 0.4110107719898224, + "learning_rate": 1.9523690964677462e-05, + "loss": 0.5575, + "step": 7276 + }, + { + "epoch": 0.1998077979132345, + "grad_norm": 0.3601077198982239, + "learning_rate": 1.952355925089048e-05, + "loss": 0.5116, + "step": 7277 + }, + { + "epoch": 0.199835255354201, + "grad_norm": 0.36038461327552795, + "learning_rate": 1.952342751933902e-05, + "loss": 0.5262, + "step": 7278 + }, + { + "epoch": 0.19986271279516749, + "grad_norm": 0.4073856472969055, + "learning_rate": 1.9523295770023334e-05, + "loss": 0.6271, + "step": 7279 + }, + { + "epoch": 0.19989017023613398, + "grad_norm": 0.3714544177055359, + "learning_rate": 1.9523164002943654e-05, + "loss": 0.5467, + "step": 7280 + }, + { + "epoch": 0.1999176276771005, + "grad_norm": 0.35000598430633545, + "learning_rate": 1.952303221810024e-05, + "loss": 0.5675, + "step": 7281 + }, + { + "epoch": 0.199945085118067, + "grad_norm": 1.7407808303833008, + "learning_rate": 1.952290041549333e-05, + "loss": 0.5475, + "step": 7282 + }, + { + "epoch": 0.1999725425590335, + "grad_norm": 0.3054966330528259, + "learning_rate": 1.9522768595123168e-05, + "loss": 0.4296, + "step": 7283 + }, + { + "epoch": 0.2, + "grad_norm": 0.33471494913101196, + "learning_rate": 1.9522636756990008e-05, + "loss": 0.4848, + "step": 7284 + }, + { + "epoch": 0.2000274574409665, + "grad_norm": 0.37647995352745056, + "learning_rate": 1.952250490109409e-05, + "loss": 0.5895, + "step": 7285 + }, + { + "epoch": 0.200054914881933, + "grad_norm": 0.34489938616752625, + "learning_rate": 1.9522373027435655e-05, + "loss": 0.5708, + "step": 7286 + }, + { + "epoch": 0.2000823723228995, + "grad_norm": 0.45866742730140686, + "learning_rate": 1.9522241136014965e-05, + "loss": 0.482, + "step": 7287 + }, + { + "epoch": 0.20010982976386602, + "grad_norm": 0.3989732563495636, + "learning_rate": 1.9522109226832247e-05, + "loss": 0.5117, + "step": 7288 + }, + { + "epoch": 0.2001372872048325, + "grad_norm": 0.32786762714385986, + "learning_rate": 1.9521977299887764e-05, + "loss": 0.5022, + "step": 7289 + }, + { + "epoch": 0.200164744645799, + "grad_norm": 0.3497236371040344, + "learning_rate": 1.9521845355181748e-05, + "loss": 0.5104, + "step": 7290 + }, + { + "epoch": 0.20019220208676552, + "grad_norm": 0.4128434360027313, + "learning_rate": 1.9521713392714458e-05, + "loss": 0.4664, + "step": 7291 + }, + { + "epoch": 0.20021965952773202, + "grad_norm": 0.332742840051651, + "learning_rate": 1.9521581412486127e-05, + "loss": 0.5013, + "step": 7292 + }, + { + "epoch": 0.2002471169686985, + "grad_norm": 0.4773743152618408, + "learning_rate": 1.9521449414497013e-05, + "loss": 0.549, + "step": 7293 + }, + { + "epoch": 0.200274574409665, + "grad_norm": 0.4046773612499237, + "learning_rate": 1.9521317398747352e-05, + "loss": 0.6262, + "step": 7294 + }, + { + "epoch": 0.20030203185063153, + "grad_norm": 0.3849119544029236, + "learning_rate": 1.9521185365237396e-05, + "loss": 0.5346, + "step": 7295 + }, + { + "epoch": 0.20032948929159802, + "grad_norm": 0.35975560545921326, + "learning_rate": 1.9521053313967392e-05, + "loss": 0.5089, + "step": 7296 + }, + { + "epoch": 0.20035694673256452, + "grad_norm": 0.36791449785232544, + "learning_rate": 1.9520921244937583e-05, + "loss": 0.5369, + "step": 7297 + }, + { + "epoch": 0.20038440417353104, + "grad_norm": 0.38655492663383484, + "learning_rate": 1.952078915814822e-05, + "loss": 0.6763, + "step": 7298 + }, + { + "epoch": 0.20041186161449753, + "grad_norm": 0.3122752010822296, + "learning_rate": 1.9520657053599547e-05, + "loss": 0.4254, + "step": 7299 + }, + { + "epoch": 0.20043931905546403, + "grad_norm": 0.37392082810401917, + "learning_rate": 1.952052493129181e-05, + "loss": 0.5373, + "step": 7300 + }, + { + "epoch": 0.20046677649643052, + "grad_norm": 0.35789406299591064, + "learning_rate": 1.9520392791225255e-05, + "loss": 0.6196, + "step": 7301 + }, + { + "epoch": 0.20049423393739704, + "grad_norm": 0.5537394881248474, + "learning_rate": 1.9520260633400126e-05, + "loss": 0.5176, + "step": 7302 + }, + { + "epoch": 0.20052169137836354, + "grad_norm": 0.3785388469696045, + "learning_rate": 1.9520128457816673e-05, + "loss": 0.507, + "step": 7303 + }, + { + "epoch": 0.20054914881933003, + "grad_norm": 0.3671973645687103, + "learning_rate": 1.9519996264475143e-05, + "loss": 0.5078, + "step": 7304 + }, + { + "epoch": 0.20057660626029655, + "grad_norm": 0.38311681151390076, + "learning_rate": 1.9519864053375782e-05, + "loss": 0.5446, + "step": 7305 + }, + { + "epoch": 0.20060406370126305, + "grad_norm": 0.38404810428619385, + "learning_rate": 1.9519731824518836e-05, + "loss": 0.4427, + "step": 7306 + }, + { + "epoch": 0.20063152114222954, + "grad_norm": 0.4808088541030884, + "learning_rate": 1.951959957790455e-05, + "loss": 0.5144, + "step": 7307 + }, + { + "epoch": 0.20065897858319603, + "grad_norm": 0.47841259837150574, + "learning_rate": 1.9519467313533177e-05, + "loss": 0.5221, + "step": 7308 + }, + { + "epoch": 0.20068643602416256, + "grad_norm": 0.3672653138637543, + "learning_rate": 1.9519335031404953e-05, + "loss": 0.4775, + "step": 7309 + }, + { + "epoch": 0.20071389346512905, + "grad_norm": 0.42646127939224243, + "learning_rate": 1.9519202731520133e-05, + "loss": 0.6006, + "step": 7310 + }, + { + "epoch": 0.20074135090609554, + "grad_norm": 0.35583093762397766, + "learning_rate": 1.9519070413878965e-05, + "loss": 0.4687, + "step": 7311 + }, + { + "epoch": 0.20076880834706207, + "grad_norm": 0.5379402041435242, + "learning_rate": 1.951893807848169e-05, + "loss": 0.6122, + "step": 7312 + }, + { + "epoch": 0.20079626578802856, + "grad_norm": 0.4611837863922119, + "learning_rate": 1.9518805725328557e-05, + "loss": 0.5346, + "step": 7313 + }, + { + "epoch": 0.20082372322899505, + "grad_norm": 0.3678635358810425, + "learning_rate": 1.9518673354419815e-05, + "loss": 0.4801, + "step": 7314 + }, + { + "epoch": 0.20085118066996155, + "grad_norm": 0.3484295904636383, + "learning_rate": 1.9518540965755707e-05, + "loss": 0.5877, + "step": 7315 + }, + { + "epoch": 0.20087863811092807, + "grad_norm": 0.34005439281463623, + "learning_rate": 1.9518408559336483e-05, + "loss": 0.5507, + "step": 7316 + }, + { + "epoch": 0.20090609555189456, + "grad_norm": 0.35982435941696167, + "learning_rate": 1.951827613516239e-05, + "loss": 0.5501, + "step": 7317 + }, + { + "epoch": 0.20093355299286106, + "grad_norm": 0.3501453995704651, + "learning_rate": 1.9518143693233675e-05, + "loss": 0.5305, + "step": 7318 + }, + { + "epoch": 0.20096101043382758, + "grad_norm": 0.3547631800174713, + "learning_rate": 1.9518011233550584e-05, + "loss": 0.5318, + "step": 7319 + }, + { + "epoch": 0.20098846787479407, + "grad_norm": 0.41490888595581055, + "learning_rate": 1.951787875611336e-05, + "loss": 0.5947, + "step": 7320 + }, + { + "epoch": 0.20101592531576057, + "grad_norm": 0.3756621479988098, + "learning_rate": 1.951774626092226e-05, + "loss": 0.5938, + "step": 7321 + }, + { + "epoch": 0.20104338275672706, + "grad_norm": 0.38661646842956543, + "learning_rate": 1.9517613747977523e-05, + "loss": 0.5915, + "step": 7322 + }, + { + "epoch": 0.20107084019769358, + "grad_norm": 0.3998299837112427, + "learning_rate": 1.9517481217279396e-05, + "loss": 0.4543, + "step": 7323 + }, + { + "epoch": 0.20109829763866008, + "grad_norm": 0.32125037908554077, + "learning_rate": 1.9517348668828133e-05, + "loss": 0.5313, + "step": 7324 + }, + { + "epoch": 0.20112575507962657, + "grad_norm": 0.3847087025642395, + "learning_rate": 1.9517216102623978e-05, + "loss": 0.5969, + "step": 7325 + }, + { + "epoch": 0.2011532125205931, + "grad_norm": 0.3381783366203308, + "learning_rate": 1.951708351866717e-05, + "loss": 0.5447, + "step": 7326 + }, + { + "epoch": 0.2011806699615596, + "grad_norm": 0.37442320585250854, + "learning_rate": 1.9516950916957973e-05, + "loss": 0.5833, + "step": 7327 + }, + { + "epoch": 0.20120812740252608, + "grad_norm": 0.43819111585617065, + "learning_rate": 1.951681829749662e-05, + "loss": 0.5307, + "step": 7328 + }, + { + "epoch": 0.20123558484349258, + "grad_norm": 0.36493250727653503, + "learning_rate": 1.9516685660283366e-05, + "loss": 0.5221, + "step": 7329 + }, + { + "epoch": 0.2012630422844591, + "grad_norm": 0.4357490539550781, + "learning_rate": 1.9516553005318452e-05, + "loss": 0.5254, + "step": 7330 + }, + { + "epoch": 0.2012904997254256, + "grad_norm": 0.383034884929657, + "learning_rate": 1.951642033260213e-05, + "loss": 0.546, + "step": 7331 + }, + { + "epoch": 0.2013179571663921, + "grad_norm": 0.3640297055244446, + "learning_rate": 1.951628764213465e-05, + "loss": 0.5413, + "step": 7332 + }, + { + "epoch": 0.20134541460735858, + "grad_norm": 0.34441515803337097, + "learning_rate": 1.9516154933916253e-05, + "loss": 0.5343, + "step": 7333 + }, + { + "epoch": 0.2013728720483251, + "grad_norm": 0.3524656593799591, + "learning_rate": 1.9516022207947193e-05, + "loss": 0.5556, + "step": 7334 + }, + { + "epoch": 0.2014003294892916, + "grad_norm": 0.36219149827957153, + "learning_rate": 1.9515889464227717e-05, + "loss": 0.555, + "step": 7335 + }, + { + "epoch": 0.2014277869302581, + "grad_norm": 0.34537792205810547, + "learning_rate": 1.9515756702758063e-05, + "loss": 0.4698, + "step": 7336 + }, + { + "epoch": 0.2014552443712246, + "grad_norm": 0.4025348126888275, + "learning_rate": 1.9515623923538487e-05, + "loss": 0.5352, + "step": 7337 + }, + { + "epoch": 0.2014827018121911, + "grad_norm": 0.3460020422935486, + "learning_rate": 1.9515491126569238e-05, + "loss": 0.5559, + "step": 7338 + }, + { + "epoch": 0.2015101592531576, + "grad_norm": 0.3857753574848175, + "learning_rate": 1.951535831185056e-05, + "loss": 0.4493, + "step": 7339 + }, + { + "epoch": 0.2015376166941241, + "grad_norm": 0.39023688435554504, + "learning_rate": 1.9515225479382702e-05, + "loss": 0.5817, + "step": 7340 + }, + { + "epoch": 0.20156507413509062, + "grad_norm": 0.36615413427352905, + "learning_rate": 1.951509262916591e-05, + "loss": 0.5104, + "step": 7341 + }, + { + "epoch": 0.2015925315760571, + "grad_norm": 0.3541872203350067, + "learning_rate": 1.9514959761200435e-05, + "loss": 0.5908, + "step": 7342 + }, + { + "epoch": 0.2016199890170236, + "grad_norm": 0.3829677104949951, + "learning_rate": 1.9514826875486525e-05, + "loss": 0.568, + "step": 7343 + }, + { + "epoch": 0.20164744645799013, + "grad_norm": 0.4017241597175598, + "learning_rate": 1.9514693972024424e-05, + "loss": 0.5495, + "step": 7344 + }, + { + "epoch": 0.20167490389895662, + "grad_norm": 0.36972206830978394, + "learning_rate": 1.9514561050814382e-05, + "loss": 0.5559, + "step": 7345 + }, + { + "epoch": 0.20170236133992311, + "grad_norm": 0.34825921058654785, + "learning_rate": 1.9514428111856648e-05, + "loss": 0.4808, + "step": 7346 + }, + { + "epoch": 0.2017298187808896, + "grad_norm": 0.32623592019081116, + "learning_rate": 1.951429515515147e-05, + "loss": 0.5539, + "step": 7347 + }, + { + "epoch": 0.20175727622185613, + "grad_norm": 0.3953423500061035, + "learning_rate": 1.951416218069909e-05, + "loss": 0.4878, + "step": 7348 + }, + { + "epoch": 0.20178473366282262, + "grad_norm": 0.37235966324806213, + "learning_rate": 1.9514029188499765e-05, + "loss": 0.5559, + "step": 7349 + }, + { + "epoch": 0.20181219110378912, + "grad_norm": 0.3385111093521118, + "learning_rate": 1.9513896178553734e-05, + "loss": 0.4926, + "step": 7350 + }, + { + "epoch": 0.20183964854475564, + "grad_norm": 0.354172945022583, + "learning_rate": 1.9513763150861255e-05, + "loss": 0.4642, + "step": 7351 + }, + { + "epoch": 0.20186710598572213, + "grad_norm": 0.34014031291007996, + "learning_rate": 1.9513630105422572e-05, + "loss": 0.5624, + "step": 7352 + }, + { + "epoch": 0.20189456342668863, + "grad_norm": 0.3470444679260254, + "learning_rate": 1.951349704223793e-05, + "loss": 0.4593, + "step": 7353 + }, + { + "epoch": 0.20192202086765512, + "grad_norm": 0.31937283277511597, + "learning_rate": 1.9513363961307582e-05, + "loss": 0.5544, + "step": 7354 + }, + { + "epoch": 0.20194947830862164, + "grad_norm": 0.41197383403778076, + "learning_rate": 1.951323086263177e-05, + "loss": 0.5179, + "step": 7355 + }, + { + "epoch": 0.20197693574958814, + "grad_norm": 0.48682957887649536, + "learning_rate": 1.951309774621075e-05, + "loss": 0.6141, + "step": 7356 + }, + { + "epoch": 0.20200439319055463, + "grad_norm": 0.32521429657936096, + "learning_rate": 1.9512964612044763e-05, + "loss": 0.5302, + "step": 7357 + }, + { + "epoch": 0.20203185063152115, + "grad_norm": 0.37742552161216736, + "learning_rate": 1.9512831460134066e-05, + "loss": 0.6019, + "step": 7358 + }, + { + "epoch": 0.20205930807248765, + "grad_norm": 0.3743424713611603, + "learning_rate": 1.95126982904789e-05, + "loss": 0.551, + "step": 7359 + }, + { + "epoch": 0.20208676551345414, + "grad_norm": 0.3655437231063843, + "learning_rate": 1.9512565103079513e-05, + "loss": 0.4784, + "step": 7360 + }, + { + "epoch": 0.20211422295442064, + "grad_norm": 0.35205432772636414, + "learning_rate": 1.9512431897936156e-05, + "loss": 0.4429, + "step": 7361 + }, + { + "epoch": 0.20214168039538716, + "grad_norm": 0.36893174052238464, + "learning_rate": 1.951229867504908e-05, + "loss": 0.4682, + "step": 7362 + }, + { + "epoch": 0.20216913783635365, + "grad_norm": 0.36047354340553284, + "learning_rate": 1.951216543441853e-05, + "loss": 0.5339, + "step": 7363 + }, + { + "epoch": 0.20219659527732015, + "grad_norm": 0.3535478711128235, + "learning_rate": 1.9512032176044756e-05, + "loss": 0.4651, + "step": 7364 + }, + { + "epoch": 0.20222405271828667, + "grad_norm": 0.3515976667404175, + "learning_rate": 1.9511898899928007e-05, + "loss": 0.5185, + "step": 7365 + }, + { + "epoch": 0.20225151015925316, + "grad_norm": 0.3961832821369171, + "learning_rate": 1.951176560606853e-05, + "loss": 0.5227, + "step": 7366 + }, + { + "epoch": 0.20227896760021966, + "grad_norm": 0.33784645795822144, + "learning_rate": 1.951163229446657e-05, + "loss": 0.5414, + "step": 7367 + }, + { + "epoch": 0.20230642504118615, + "grad_norm": 0.3836570978164673, + "learning_rate": 1.9511498965122383e-05, + "loss": 0.6672, + "step": 7368 + }, + { + "epoch": 0.20233388248215267, + "grad_norm": 0.3401603400707245, + "learning_rate": 1.9511365618036216e-05, + "loss": 0.4424, + "step": 7369 + }, + { + "epoch": 0.20236133992311917, + "grad_norm": 0.4185061454772949, + "learning_rate": 1.9511232253208317e-05, + "loss": 0.5626, + "step": 7370 + }, + { + "epoch": 0.20238879736408566, + "grad_norm": 0.38302484154701233, + "learning_rate": 1.9511098870638934e-05, + "loss": 0.6222, + "step": 7371 + }, + { + "epoch": 0.20241625480505218, + "grad_norm": 0.3807424008846283, + "learning_rate": 1.9510965470328316e-05, + "loss": 0.5937, + "step": 7372 + }, + { + "epoch": 0.20244371224601868, + "grad_norm": 0.36280813813209534, + "learning_rate": 1.9510832052276712e-05, + "loss": 0.5736, + "step": 7373 + }, + { + "epoch": 0.20247116968698517, + "grad_norm": 0.30268439650535583, + "learning_rate": 1.9510698616484366e-05, + "loss": 0.4508, + "step": 7374 + }, + { + "epoch": 0.20249862712795166, + "grad_norm": 0.3752821981906891, + "learning_rate": 1.9510565162951538e-05, + "loss": 0.523, + "step": 7375 + }, + { + "epoch": 0.20252608456891819, + "grad_norm": 0.4249717891216278, + "learning_rate": 1.951043169167847e-05, + "loss": 0.4806, + "step": 7376 + }, + { + "epoch": 0.20255354200988468, + "grad_norm": 0.39721015095710754, + "learning_rate": 1.951029820266541e-05, + "loss": 0.5368, + "step": 7377 + }, + { + "epoch": 0.20258099945085117, + "grad_norm": 0.38381874561309814, + "learning_rate": 1.9510164695912603e-05, + "loss": 0.5875, + "step": 7378 + }, + { + "epoch": 0.2026084568918177, + "grad_norm": 0.3454338610172272, + "learning_rate": 1.9510031171420308e-05, + "loss": 0.5221, + "step": 7379 + }, + { + "epoch": 0.2026359143327842, + "grad_norm": 0.3470473885536194, + "learning_rate": 1.950989762918877e-05, + "loss": 0.4671, + "step": 7380 + }, + { + "epoch": 0.20266337177375068, + "grad_norm": 0.3788517117500305, + "learning_rate": 1.950976406921824e-05, + "loss": 0.5021, + "step": 7381 + }, + { + "epoch": 0.20269082921471718, + "grad_norm": 0.3841521441936493, + "learning_rate": 1.9509630491508963e-05, + "loss": 0.562, + "step": 7382 + }, + { + "epoch": 0.2027182866556837, + "grad_norm": 0.3664693236351013, + "learning_rate": 1.9509496896061192e-05, + "loss": 0.4937, + "step": 7383 + }, + { + "epoch": 0.2027457440966502, + "grad_norm": 0.3343413174152374, + "learning_rate": 1.950936328287517e-05, + "loss": 0.5455, + "step": 7384 + }, + { + "epoch": 0.2027732015376167, + "grad_norm": 0.4551730751991272, + "learning_rate": 1.950922965195115e-05, + "loss": 0.5455, + "step": 7385 + }, + { + "epoch": 0.2028006589785832, + "grad_norm": 0.4085313081741333, + "learning_rate": 1.9509096003289386e-05, + "loss": 0.555, + "step": 7386 + }, + { + "epoch": 0.2028281164195497, + "grad_norm": 0.36422106623649597, + "learning_rate": 1.950896233689012e-05, + "loss": 0.5937, + "step": 7387 + }, + { + "epoch": 0.2028555738605162, + "grad_norm": 0.3654335141181946, + "learning_rate": 1.9508828652753607e-05, + "loss": 0.5371, + "step": 7388 + }, + { + "epoch": 0.2028830313014827, + "grad_norm": 0.34930387139320374, + "learning_rate": 1.9508694950880093e-05, + "loss": 0.4635, + "step": 7389 + }, + { + "epoch": 0.2029104887424492, + "grad_norm": 0.346162885427475, + "learning_rate": 1.9508561231269825e-05, + "loss": 0.5455, + "step": 7390 + }, + { + "epoch": 0.2029379461834157, + "grad_norm": 0.38914257287979126, + "learning_rate": 1.950842749392306e-05, + "loss": 0.5214, + "step": 7391 + }, + { + "epoch": 0.2029654036243822, + "grad_norm": 0.40267452597618103, + "learning_rate": 1.9508293738840038e-05, + "loss": 0.5633, + "step": 7392 + }, + { + "epoch": 0.20299286106534872, + "grad_norm": 0.33594340085983276, + "learning_rate": 1.9508159966021017e-05, + "loss": 0.4994, + "step": 7393 + }, + { + "epoch": 0.20302031850631522, + "grad_norm": 0.3360845446586609, + "learning_rate": 1.950802617546624e-05, + "loss": 0.4815, + "step": 7394 + }, + { + "epoch": 0.2030477759472817, + "grad_norm": 0.36471429467201233, + "learning_rate": 1.9507892367175963e-05, + "loss": 0.5172, + "step": 7395 + }, + { + "epoch": 0.2030752333882482, + "grad_norm": 0.40203022956848145, + "learning_rate": 1.9507758541150432e-05, + "loss": 0.6034, + "step": 7396 + }, + { + "epoch": 0.20310269082921473, + "grad_norm": 0.37335720658302307, + "learning_rate": 1.9507624697389895e-05, + "loss": 0.5717, + "step": 7397 + }, + { + "epoch": 0.20313014827018122, + "grad_norm": 0.942424476146698, + "learning_rate": 1.9507490835894605e-05, + "loss": 0.5968, + "step": 7398 + }, + { + "epoch": 0.20315760571114772, + "grad_norm": 0.3348281979560852, + "learning_rate": 1.950735695666481e-05, + "loss": 0.6025, + "step": 7399 + }, + { + "epoch": 0.2031850631521142, + "grad_norm": 0.3216911852359772, + "learning_rate": 1.950722305970076e-05, + "loss": 0.4775, + "step": 7400 + }, + { + "epoch": 0.20321252059308073, + "grad_norm": 0.3893691599369049, + "learning_rate": 1.95070891450027e-05, + "loss": 0.5993, + "step": 7401 + }, + { + "epoch": 0.20323997803404723, + "grad_norm": 0.3540497124195099, + "learning_rate": 1.9506955212570892e-05, + "loss": 0.6153, + "step": 7402 + }, + { + "epoch": 0.20326743547501372, + "grad_norm": 0.4128074645996094, + "learning_rate": 1.9506821262405574e-05, + "loss": 0.556, + "step": 7403 + }, + { + "epoch": 0.20329489291598024, + "grad_norm": 0.3790435492992401, + "learning_rate": 1.9506687294506998e-05, + "loss": 0.5678, + "step": 7404 + }, + { + "epoch": 0.20332235035694673, + "grad_norm": 4.062503814697266, + "learning_rate": 1.950655330887542e-05, + "loss": 0.6926, + "step": 7405 + }, + { + "epoch": 0.20334980779791323, + "grad_norm": 0.40454739332199097, + "learning_rate": 1.9506419305511085e-05, + "loss": 0.5258, + "step": 7406 + }, + { + "epoch": 0.20337726523887972, + "grad_norm": 0.3350772559642792, + "learning_rate": 1.9506285284414245e-05, + "loss": 0.5694, + "step": 7407 + }, + { + "epoch": 0.20340472267984624, + "grad_norm": 0.3232493996620178, + "learning_rate": 1.9506151245585147e-05, + "loss": 0.394, + "step": 7408 + }, + { + "epoch": 0.20343218012081274, + "grad_norm": 0.3331725001335144, + "learning_rate": 1.950601718902404e-05, + "loss": 0.4905, + "step": 7409 + }, + { + "epoch": 0.20345963756177923, + "grad_norm": 0.36789557337760925, + "learning_rate": 1.9505883114731184e-05, + "loss": 0.49, + "step": 7410 + }, + { + "epoch": 0.20348709500274575, + "grad_norm": 0.35910680890083313, + "learning_rate": 1.9505749022706818e-05, + "loss": 0.4764, + "step": 7411 + }, + { + "epoch": 0.20351455244371225, + "grad_norm": 0.3915785849094391, + "learning_rate": 1.9505614912951193e-05, + "loss": 0.5085, + "step": 7412 + }, + { + "epoch": 0.20354200988467874, + "grad_norm": 0.37323465943336487, + "learning_rate": 1.950548078546457e-05, + "loss": 0.6104, + "step": 7413 + }, + { + "epoch": 0.20356946732564524, + "grad_norm": 0.3650323152542114, + "learning_rate": 1.9505346640247187e-05, + "loss": 0.4727, + "step": 7414 + }, + { + "epoch": 0.20359692476661176, + "grad_norm": 0.4937528073787689, + "learning_rate": 1.95052124772993e-05, + "loss": 0.4801, + "step": 7415 + }, + { + "epoch": 0.20362438220757825, + "grad_norm": 0.35039907693862915, + "learning_rate": 1.9505078296621157e-05, + "loss": 0.5747, + "step": 7416 + }, + { + "epoch": 0.20365183964854475, + "grad_norm": 0.34304288029670715, + "learning_rate": 1.9504944098213007e-05, + "loss": 0.5953, + "step": 7417 + }, + { + "epoch": 0.20367929708951127, + "grad_norm": 0.3442210853099823, + "learning_rate": 1.9504809882075105e-05, + "loss": 0.5233, + "step": 7418 + }, + { + "epoch": 0.20370675453047776, + "grad_norm": 0.35135722160339355, + "learning_rate": 1.95046756482077e-05, + "loss": 0.4981, + "step": 7419 + }, + { + "epoch": 0.20373421197144426, + "grad_norm": 0.3934701085090637, + "learning_rate": 1.950454139661104e-05, + "loss": 0.5674, + "step": 7420 + }, + { + "epoch": 0.20376166941241075, + "grad_norm": 0.3908854126930237, + "learning_rate": 1.9504407127285377e-05, + "loss": 0.5464, + "step": 7421 + }, + { + "epoch": 0.20378912685337727, + "grad_norm": 0.3426850736141205, + "learning_rate": 1.9504272840230963e-05, + "loss": 0.484, + "step": 7422 + }, + { + "epoch": 0.20381658429434377, + "grad_norm": 0.4569788873195648, + "learning_rate": 1.9504138535448045e-05, + "loss": 0.5434, + "step": 7423 + }, + { + "epoch": 0.20384404173531026, + "grad_norm": 0.36274266242980957, + "learning_rate": 1.9504004212936872e-05, + "loss": 0.5862, + "step": 7424 + }, + { + "epoch": 0.20387149917627678, + "grad_norm": 0.3782878518104553, + "learning_rate": 1.9503869872697703e-05, + "loss": 0.5124, + "step": 7425 + }, + { + "epoch": 0.20389895661724328, + "grad_norm": 0.36932921409606934, + "learning_rate": 1.9503735514730785e-05, + "loss": 0.5111, + "step": 7426 + }, + { + "epoch": 0.20392641405820977, + "grad_norm": 0.3320193290710449, + "learning_rate": 1.9503601139036362e-05, + "loss": 0.609, + "step": 7427 + }, + { + "epoch": 0.20395387149917626, + "grad_norm": 0.47825369238853455, + "learning_rate": 1.950346674561469e-05, + "loss": 0.5782, + "step": 7428 + }, + { + "epoch": 0.2039813289401428, + "grad_norm": 0.3573855459690094, + "learning_rate": 1.9503332334466024e-05, + "loss": 0.5557, + "step": 7429 + }, + { + "epoch": 0.20400878638110928, + "grad_norm": 0.3699520528316498, + "learning_rate": 1.9503197905590607e-05, + "loss": 0.5409, + "step": 7430 + }, + { + "epoch": 0.20403624382207577, + "grad_norm": 0.3708404004573822, + "learning_rate": 1.950306345898869e-05, + "loss": 0.4878, + "step": 7431 + }, + { + "epoch": 0.2040637012630423, + "grad_norm": 0.37477177381515503, + "learning_rate": 1.950292899466053e-05, + "loss": 0.6121, + "step": 7432 + }, + { + "epoch": 0.2040911587040088, + "grad_norm": 0.3681904375553131, + "learning_rate": 1.9502794512606375e-05, + "loss": 0.4563, + "step": 7433 + }, + { + "epoch": 0.20411861614497528, + "grad_norm": 0.38901153206825256, + "learning_rate": 1.9502660012826474e-05, + "loss": 0.5073, + "step": 7434 + }, + { + "epoch": 0.20414607358594178, + "grad_norm": 0.4143115282058716, + "learning_rate": 1.950252549532108e-05, + "loss": 0.5616, + "step": 7435 + }, + { + "epoch": 0.2041735310269083, + "grad_norm": 0.34499984979629517, + "learning_rate": 1.950239096009044e-05, + "loss": 0.5479, + "step": 7436 + }, + { + "epoch": 0.2042009884678748, + "grad_norm": 0.45897531509399414, + "learning_rate": 1.9502256407134813e-05, + "loss": 0.526, + "step": 7437 + }, + { + "epoch": 0.2042284459088413, + "grad_norm": 0.3583773672580719, + "learning_rate": 1.950212183645444e-05, + "loss": 0.5565, + "step": 7438 + }, + { + "epoch": 0.2042559033498078, + "grad_norm": 0.42982640862464905, + "learning_rate": 1.950198724804958e-05, + "loss": 0.5528, + "step": 7439 + }, + { + "epoch": 0.2042833607907743, + "grad_norm": 0.37345951795578003, + "learning_rate": 1.9501852641920483e-05, + "loss": 0.5203, + "step": 7440 + }, + { + "epoch": 0.2043108182317408, + "grad_norm": 0.32849639654159546, + "learning_rate": 1.9501718018067395e-05, + "loss": 0.5254, + "step": 7441 + }, + { + "epoch": 0.2043382756727073, + "grad_norm": 0.3645012378692627, + "learning_rate": 1.950158337649057e-05, + "loss": 0.5449, + "step": 7442 + }, + { + "epoch": 0.20436573311367381, + "grad_norm": 0.37232595682144165, + "learning_rate": 1.9501448717190258e-05, + "loss": 0.5909, + "step": 7443 + }, + { + "epoch": 0.2043931905546403, + "grad_norm": 0.32305845618247986, + "learning_rate": 1.9501314040166716e-05, + "loss": 0.5585, + "step": 7444 + }, + { + "epoch": 0.2044206479956068, + "grad_norm": 0.9522542953491211, + "learning_rate": 1.950117934542019e-05, + "loss": 0.4839, + "step": 7445 + }, + { + "epoch": 0.20444810543657332, + "grad_norm": 0.3621273934841156, + "learning_rate": 1.9501044632950932e-05, + "loss": 0.5928, + "step": 7446 + }, + { + "epoch": 0.20447556287753982, + "grad_norm": 0.3878495991230011, + "learning_rate": 1.950090990275919e-05, + "loss": 0.5525, + "step": 7447 + }, + { + "epoch": 0.2045030203185063, + "grad_norm": 0.36488908529281616, + "learning_rate": 1.9500775154845222e-05, + "loss": 0.6066, + "step": 7448 + }, + { + "epoch": 0.2045304777594728, + "grad_norm": 0.345989465713501, + "learning_rate": 1.9500640389209275e-05, + "loss": 0.5049, + "step": 7449 + }, + { + "epoch": 0.20455793520043933, + "grad_norm": 0.34257805347442627, + "learning_rate": 1.9500505605851602e-05, + "loss": 0.5215, + "step": 7450 + }, + { + "epoch": 0.20458539264140582, + "grad_norm": 0.34737899899482727, + "learning_rate": 1.9500370804772456e-05, + "loss": 0.5714, + "step": 7451 + }, + { + "epoch": 0.20461285008237232, + "grad_norm": 0.34886297583580017, + "learning_rate": 1.9500235985972083e-05, + "loss": 0.5165, + "step": 7452 + }, + { + "epoch": 0.20464030752333884, + "grad_norm": 0.37414979934692383, + "learning_rate": 1.950010114945074e-05, + "loss": 0.519, + "step": 7453 + }, + { + "epoch": 0.20466776496430533, + "grad_norm": 0.36315059661865234, + "learning_rate": 1.949996629520867e-05, + "loss": 0.6013, + "step": 7454 + }, + { + "epoch": 0.20469522240527183, + "grad_norm": 0.5032866597175598, + "learning_rate": 1.949983142324614e-05, + "loss": 0.5549, + "step": 7455 + }, + { + "epoch": 0.20472267984623832, + "grad_norm": 0.3336421251296997, + "learning_rate": 1.9499696533563385e-05, + "loss": 0.4733, + "step": 7456 + }, + { + "epoch": 0.20475013728720484, + "grad_norm": 0.34455493092536926, + "learning_rate": 1.949956162616067e-05, + "loss": 0.5516, + "step": 7457 + }, + { + "epoch": 0.20477759472817134, + "grad_norm": 0.37362754344940186, + "learning_rate": 1.9499426701038236e-05, + "loss": 0.5631, + "step": 7458 + }, + { + "epoch": 0.20480505216913783, + "grad_norm": 0.34454748034477234, + "learning_rate": 1.9499291758196342e-05, + "loss": 0.4897, + "step": 7459 + }, + { + "epoch": 0.20483250961010435, + "grad_norm": 0.34226006269454956, + "learning_rate": 1.9499156797635234e-05, + "loss": 0.5312, + "step": 7460 + }, + { + "epoch": 0.20485996705107085, + "grad_norm": 0.3489041030406952, + "learning_rate": 1.9499021819355168e-05, + "loss": 0.5001, + "step": 7461 + }, + { + "epoch": 0.20488742449203734, + "grad_norm": 0.3512673079967499, + "learning_rate": 1.9498886823356397e-05, + "loss": 0.514, + "step": 7462 + }, + { + "epoch": 0.20491488193300383, + "grad_norm": 0.4462568461894989, + "learning_rate": 1.949875180963917e-05, + "loss": 0.5459, + "step": 7463 + }, + { + "epoch": 0.20494233937397036, + "grad_norm": 0.3507233262062073, + "learning_rate": 1.9498616778203735e-05, + "loss": 0.5351, + "step": 7464 + }, + { + "epoch": 0.20496979681493685, + "grad_norm": 0.36092373728752136, + "learning_rate": 1.9498481729050353e-05, + "loss": 0.5782, + "step": 7465 + }, + { + "epoch": 0.20499725425590334, + "grad_norm": 0.3713226914405823, + "learning_rate": 1.9498346662179267e-05, + "loss": 0.5547, + "step": 7466 + }, + { + "epoch": 0.20502471169686984, + "grad_norm": 0.3731040060520172, + "learning_rate": 1.949821157759074e-05, + "loss": 0.604, + "step": 7467 + }, + { + "epoch": 0.20505216913783636, + "grad_norm": 0.36185961961746216, + "learning_rate": 1.949807647528501e-05, + "loss": 0.5346, + "step": 7468 + }, + { + "epoch": 0.20507962657880285, + "grad_norm": 0.45466071367263794, + "learning_rate": 1.949794135526234e-05, + "loss": 0.5899, + "step": 7469 + }, + { + "epoch": 0.20510708401976935, + "grad_norm": 0.3466683626174927, + "learning_rate": 1.9497806217522975e-05, + "loss": 0.5138, + "step": 7470 + }, + { + "epoch": 0.20513454146073587, + "grad_norm": 0.3795687258243561, + "learning_rate": 1.9497671062067168e-05, + "loss": 0.4833, + "step": 7471 + }, + { + "epoch": 0.20516199890170236, + "grad_norm": 0.3838491141796112, + "learning_rate": 1.949753588889518e-05, + "loss": 0.5565, + "step": 7472 + }, + { + "epoch": 0.20518945634266886, + "grad_norm": 0.35425469279289246, + "learning_rate": 1.9497400698007252e-05, + "loss": 0.5556, + "step": 7473 + }, + { + "epoch": 0.20521691378363535, + "grad_norm": 0.3542131185531616, + "learning_rate": 1.949726548940364e-05, + "loss": 0.5686, + "step": 7474 + }, + { + "epoch": 0.20524437122460187, + "grad_norm": 0.37023600935935974, + "learning_rate": 1.9497130263084597e-05, + "loss": 0.5366, + "step": 7475 + }, + { + "epoch": 0.20527182866556837, + "grad_norm": 0.40987759828567505, + "learning_rate": 1.9496995019050377e-05, + "loss": 0.5729, + "step": 7476 + }, + { + "epoch": 0.20529928610653486, + "grad_norm": 0.35138148069381714, + "learning_rate": 1.949685975730123e-05, + "loss": 0.5626, + "step": 7477 + }, + { + "epoch": 0.20532674354750138, + "grad_norm": 0.32947590947151184, + "learning_rate": 1.9496724477837406e-05, + "loss": 0.4729, + "step": 7478 + }, + { + "epoch": 0.20535420098846788, + "grad_norm": 0.39107081294059753, + "learning_rate": 1.949658918065916e-05, + "loss": 0.6181, + "step": 7479 + }, + { + "epoch": 0.20538165842943437, + "grad_norm": 0.4130382239818573, + "learning_rate": 1.9496453865766748e-05, + "loss": 0.531, + "step": 7480 + }, + { + "epoch": 0.20540911587040087, + "grad_norm": 0.36393144726753235, + "learning_rate": 1.949631853316041e-05, + "loss": 0.558, + "step": 7481 + }, + { + "epoch": 0.2054365733113674, + "grad_norm": 0.3440810441970825, + "learning_rate": 1.9496183182840415e-05, + "loss": 0.443, + "step": 7482 + }, + { + "epoch": 0.20546403075233388, + "grad_norm": 0.9952693581581116, + "learning_rate": 1.9496047814807006e-05, + "loss": 0.5398, + "step": 7483 + }, + { + "epoch": 0.20549148819330038, + "grad_norm": 0.35108742117881775, + "learning_rate": 1.9495912429060437e-05, + "loss": 0.5284, + "step": 7484 + }, + { + "epoch": 0.2055189456342669, + "grad_norm": 0.5456353425979614, + "learning_rate": 1.9495777025600962e-05, + "loss": 0.3654, + "step": 7485 + }, + { + "epoch": 0.2055464030752334, + "grad_norm": 0.378020316362381, + "learning_rate": 1.949564160442883e-05, + "loss": 0.5853, + "step": 7486 + }, + { + "epoch": 0.20557386051619989, + "grad_norm": 0.3525356948375702, + "learning_rate": 1.9495506165544295e-05, + "loss": 0.5693, + "step": 7487 + }, + { + "epoch": 0.20560131795716638, + "grad_norm": 0.3635256290435791, + "learning_rate": 1.949537070894761e-05, + "loss": 0.6041, + "step": 7488 + }, + { + "epoch": 0.2056287753981329, + "grad_norm": 0.3216346204280853, + "learning_rate": 1.949523523463903e-05, + "loss": 0.4887, + "step": 7489 + }, + { + "epoch": 0.2056562328390994, + "grad_norm": 0.35648542642593384, + "learning_rate": 1.9495099742618806e-05, + "loss": 0.5051, + "step": 7490 + }, + { + "epoch": 0.2056836902800659, + "grad_norm": 0.3705017864704132, + "learning_rate": 1.9494964232887193e-05, + "loss": 0.4549, + "step": 7491 + }, + { + "epoch": 0.2057111477210324, + "grad_norm": 0.35166114568710327, + "learning_rate": 1.9494828705444436e-05, + "loss": 0.5827, + "step": 7492 + }, + { + "epoch": 0.2057386051619989, + "grad_norm": 0.3724901080131531, + "learning_rate": 1.9494693160290796e-05, + "loss": 0.4704, + "step": 7493 + }, + { + "epoch": 0.2057660626029654, + "grad_norm": 0.3477155268192291, + "learning_rate": 1.949455759742652e-05, + "loss": 0.5008, + "step": 7494 + }, + { + "epoch": 0.2057935200439319, + "grad_norm": 0.3310941457748413, + "learning_rate": 1.9494422016851867e-05, + "loss": 0.4863, + "step": 7495 + }, + { + "epoch": 0.20582097748489842, + "grad_norm": 0.364769846200943, + "learning_rate": 1.9494286418567086e-05, + "loss": 0.514, + "step": 7496 + }, + { + "epoch": 0.2058484349258649, + "grad_norm": 0.36607009172439575, + "learning_rate": 1.949415080257243e-05, + "loss": 0.4901, + "step": 7497 + }, + { + "epoch": 0.2058758923668314, + "grad_norm": 0.37459835410118103, + "learning_rate": 1.9494015168868152e-05, + "loss": 0.5698, + "step": 7498 + }, + { + "epoch": 0.20590334980779793, + "grad_norm": 0.7836223244667053, + "learning_rate": 1.9493879517454507e-05, + "loss": 0.5407, + "step": 7499 + }, + { + "epoch": 0.20593080724876442, + "grad_norm": 0.3678928315639496, + "learning_rate": 1.9493743848331744e-05, + "loss": 0.5654, + "step": 7500 + }, + { + "epoch": 0.2059582646897309, + "grad_norm": 0.32342708110809326, + "learning_rate": 1.949360816150012e-05, + "loss": 0.4634, + "step": 7501 + }, + { + "epoch": 0.2059857221306974, + "grad_norm": 0.3260608911514282, + "learning_rate": 1.949347245695989e-05, + "loss": 0.5718, + "step": 7502 + }, + { + "epoch": 0.20601317957166393, + "grad_norm": 0.40840449929237366, + "learning_rate": 1.94933367347113e-05, + "loss": 0.5584, + "step": 7503 + }, + { + "epoch": 0.20604063701263042, + "grad_norm": 0.3426133692264557, + "learning_rate": 1.949320099475461e-05, + "loss": 0.4608, + "step": 7504 + }, + { + "epoch": 0.20606809445359692, + "grad_norm": 0.37744447588920593, + "learning_rate": 1.9493065237090067e-05, + "loss": 0.5405, + "step": 7505 + }, + { + "epoch": 0.20609555189456344, + "grad_norm": 0.3186626732349396, + "learning_rate": 1.9492929461717928e-05, + "loss": 0.4646, + "step": 7506 + }, + { + "epoch": 0.20612300933552993, + "grad_norm": 0.40855786204338074, + "learning_rate": 1.949279366863845e-05, + "loss": 0.5015, + "step": 7507 + }, + { + "epoch": 0.20615046677649643, + "grad_norm": 0.39541903138160706, + "learning_rate": 1.949265785785188e-05, + "loss": 0.5277, + "step": 7508 + }, + { + "epoch": 0.20617792421746292, + "grad_norm": 0.3273670971393585, + "learning_rate": 1.949252202935847e-05, + "loss": 0.5505, + "step": 7509 + }, + { + "epoch": 0.20620538165842944, + "grad_norm": 0.39661481976509094, + "learning_rate": 1.949238618315848e-05, + "loss": 0.5713, + "step": 7510 + }, + { + "epoch": 0.20623283909939594, + "grad_norm": 0.3211910128593445, + "learning_rate": 1.949225031925216e-05, + "loss": 0.4672, + "step": 7511 + }, + { + "epoch": 0.20626029654036243, + "grad_norm": 0.3473476469516754, + "learning_rate": 1.949211443763976e-05, + "loss": 0.4468, + "step": 7512 + }, + { + "epoch": 0.20628775398132895, + "grad_norm": 0.4172080457210541, + "learning_rate": 1.949197853832154e-05, + "loss": 0.5541, + "step": 7513 + }, + { + "epoch": 0.20631521142229545, + "grad_norm": 0.35693198442459106, + "learning_rate": 1.9491842621297752e-05, + "loss": 0.5727, + "step": 7514 + }, + { + "epoch": 0.20634266886326194, + "grad_norm": 0.3937622606754303, + "learning_rate": 1.9491706686568645e-05, + "loss": 0.5893, + "step": 7515 + }, + { + "epoch": 0.20637012630422844, + "grad_norm": 0.4228106439113617, + "learning_rate": 1.9491570734134476e-05, + "loss": 0.5457, + "step": 7516 + }, + { + "epoch": 0.20639758374519496, + "grad_norm": 0.3606700897216797, + "learning_rate": 1.94914347639955e-05, + "loss": 0.5494, + "step": 7517 + }, + { + "epoch": 0.20642504118616145, + "grad_norm": 0.3906656503677368, + "learning_rate": 1.949129877615197e-05, + "loss": 0.5727, + "step": 7518 + }, + { + "epoch": 0.20645249862712794, + "grad_norm": 0.3916078209877014, + "learning_rate": 1.9491162770604134e-05, + "loss": 0.5683, + "step": 7519 + }, + { + "epoch": 0.20647995606809447, + "grad_norm": 0.3519538640975952, + "learning_rate": 1.9491026747352255e-05, + "loss": 0.5653, + "step": 7520 + }, + { + "epoch": 0.20650741350906096, + "grad_norm": 0.3471977114677429, + "learning_rate": 1.9490890706396577e-05, + "loss": 0.504, + "step": 7521 + }, + { + "epoch": 0.20653487095002745, + "grad_norm": 0.39811068773269653, + "learning_rate": 1.949075464773736e-05, + "loss": 0.537, + "step": 7522 + }, + { + "epoch": 0.20656232839099395, + "grad_norm": 0.414059579372406, + "learning_rate": 1.949061857137486e-05, + "loss": 0.6011, + "step": 7523 + }, + { + "epoch": 0.20658978583196047, + "grad_norm": 0.40160319209098816, + "learning_rate": 1.9490482477309327e-05, + "loss": 0.5724, + "step": 7524 + }, + { + "epoch": 0.20661724327292696, + "grad_norm": 0.34640300273895264, + "learning_rate": 1.9490346365541013e-05, + "loss": 0.5071, + "step": 7525 + }, + { + "epoch": 0.20664470071389346, + "grad_norm": 0.41131648421287537, + "learning_rate": 1.9490210236070175e-05, + "loss": 0.5574, + "step": 7526 + }, + { + "epoch": 0.20667215815485998, + "grad_norm": 0.337809681892395, + "learning_rate": 1.9490074088897064e-05, + "loss": 0.4609, + "step": 7527 + }, + { + "epoch": 0.20669961559582647, + "grad_norm": 0.3415927588939667, + "learning_rate": 1.948993792402194e-05, + "loss": 0.535, + "step": 7528 + }, + { + "epoch": 0.20672707303679297, + "grad_norm": 0.5304617285728455, + "learning_rate": 1.948980174144505e-05, + "loss": 0.4696, + "step": 7529 + }, + { + "epoch": 0.20675453047775946, + "grad_norm": 0.3284441828727722, + "learning_rate": 1.9489665541166648e-05, + "loss": 0.4358, + "step": 7530 + }, + { + "epoch": 0.20678198791872598, + "grad_norm": 0.36018145084381104, + "learning_rate": 1.9489529323186996e-05, + "loss": 0.4855, + "step": 7531 + }, + { + "epoch": 0.20680944535969248, + "grad_norm": 0.36659812927246094, + "learning_rate": 1.948939308750634e-05, + "loss": 0.4312, + "step": 7532 + }, + { + "epoch": 0.20683690280065897, + "grad_norm": 0.3751612901687622, + "learning_rate": 1.9489256834124937e-05, + "loss": 0.5172, + "step": 7533 + }, + { + "epoch": 0.20686436024162547, + "grad_norm": 0.40616172552108765, + "learning_rate": 1.9489120563043045e-05, + "loss": 0.545, + "step": 7534 + }, + { + "epoch": 0.206891817682592, + "grad_norm": 0.37372636795043945, + "learning_rate": 1.9488984274260912e-05, + "loss": 0.5708, + "step": 7535 + }, + { + "epoch": 0.20691927512355848, + "grad_norm": 0.45384055376052856, + "learning_rate": 1.9488847967778796e-05, + "loss": 0.5404, + "step": 7536 + }, + { + "epoch": 0.20694673256452498, + "grad_norm": 0.385635644197464, + "learning_rate": 1.948871164359695e-05, + "loss": 0.4954, + "step": 7537 + }, + { + "epoch": 0.2069741900054915, + "grad_norm": 0.37352365255355835, + "learning_rate": 1.9488575301715626e-05, + "loss": 0.6172, + "step": 7538 + }, + { + "epoch": 0.207001647446458, + "grad_norm": 0.3299863338470459, + "learning_rate": 1.9488438942135084e-05, + "loss": 0.5538, + "step": 7539 + }, + { + "epoch": 0.2070291048874245, + "grad_norm": 0.5008835196495056, + "learning_rate": 1.9488302564855576e-05, + "loss": 0.4997, + "step": 7540 + }, + { + "epoch": 0.20705656232839098, + "grad_norm": 0.7937750816345215, + "learning_rate": 1.948816616987735e-05, + "loss": 0.5392, + "step": 7541 + }, + { + "epoch": 0.2070840197693575, + "grad_norm": 0.5317788124084473, + "learning_rate": 1.9488029757200668e-05, + "loss": 0.4696, + "step": 7542 + }, + { + "epoch": 0.207111477210324, + "grad_norm": 0.39890098571777344, + "learning_rate": 1.9487893326825783e-05, + "loss": 0.5104, + "step": 7543 + }, + { + "epoch": 0.2071389346512905, + "grad_norm": 0.39517533779144287, + "learning_rate": 1.9487756878752952e-05, + "loss": 0.47, + "step": 7544 + }, + { + "epoch": 0.207166392092257, + "grad_norm": 0.3346083462238312, + "learning_rate": 1.948762041298242e-05, + "loss": 0.5186, + "step": 7545 + }, + { + "epoch": 0.2071938495332235, + "grad_norm": 0.37469974160194397, + "learning_rate": 1.948748392951445e-05, + "loss": 0.6002, + "step": 7546 + }, + { + "epoch": 0.20722130697419, + "grad_norm": 0.5224581956863403, + "learning_rate": 1.9487347428349295e-05, + "loss": 0.4765, + "step": 7547 + }, + { + "epoch": 0.2072487644151565, + "grad_norm": 0.3902285695075989, + "learning_rate": 1.948721090948721e-05, + "loss": 0.5348, + "step": 7548 + }, + { + "epoch": 0.20727622185612302, + "grad_norm": 0.8881040215492249, + "learning_rate": 1.9487074372928448e-05, + "loss": 0.4758, + "step": 7549 + }, + { + "epoch": 0.2073036792970895, + "grad_norm": 0.38446030020713806, + "learning_rate": 1.9486937818673267e-05, + "loss": 0.5388, + "step": 7550 + }, + { + "epoch": 0.207331136738056, + "grad_norm": 0.35867998003959656, + "learning_rate": 1.9486801246721914e-05, + "loss": 0.5149, + "step": 7551 + }, + { + "epoch": 0.20735859417902253, + "grad_norm": 0.39368197321891785, + "learning_rate": 1.948666465707465e-05, + "loss": 0.5402, + "step": 7552 + }, + { + "epoch": 0.20738605161998902, + "grad_norm": 0.36504340171813965, + "learning_rate": 1.948652804973173e-05, + "loss": 0.5504, + "step": 7553 + }, + { + "epoch": 0.20741350906095551, + "grad_norm": 0.3094841539859772, + "learning_rate": 1.9486391424693408e-05, + "loss": 0.493, + "step": 7554 + }, + { + "epoch": 0.207440966501922, + "grad_norm": 0.3840715289115906, + "learning_rate": 1.948625478195994e-05, + "loss": 0.5289, + "step": 7555 + }, + { + "epoch": 0.20746842394288853, + "grad_norm": 0.40064117312431335, + "learning_rate": 1.9486118121531575e-05, + "loss": 0.4444, + "step": 7556 + }, + { + "epoch": 0.20749588138385502, + "grad_norm": 0.3870463967323303, + "learning_rate": 1.9485981443408576e-05, + "loss": 0.6025, + "step": 7557 + }, + { + "epoch": 0.20752333882482152, + "grad_norm": 0.34878480434417725, + "learning_rate": 1.948584474759119e-05, + "loss": 0.5113, + "step": 7558 + }, + { + "epoch": 0.20755079626578804, + "grad_norm": 0.34857890009880066, + "learning_rate": 1.948570803407968e-05, + "loss": 0.492, + "step": 7559 + }, + { + "epoch": 0.20757825370675453, + "grad_norm": 0.35261139273643494, + "learning_rate": 1.9485571302874292e-05, + "loss": 0.525, + "step": 7560 + }, + { + "epoch": 0.20760571114772103, + "grad_norm": 0.38633492588996887, + "learning_rate": 1.948543455397529e-05, + "loss": 0.534, + "step": 7561 + }, + { + "epoch": 0.20763316858868752, + "grad_norm": 0.7279139161109924, + "learning_rate": 1.9485297787382925e-05, + "loss": 0.576, + "step": 7562 + }, + { + "epoch": 0.20766062602965404, + "grad_norm": 0.3838392198085785, + "learning_rate": 1.948516100309745e-05, + "loss": 0.477, + "step": 7563 + }, + { + "epoch": 0.20768808347062054, + "grad_norm": 0.35583823919296265, + "learning_rate": 1.9485024201119127e-05, + "loss": 0.5427, + "step": 7564 + }, + { + "epoch": 0.20771554091158703, + "grad_norm": 0.3963923156261444, + "learning_rate": 1.94848873814482e-05, + "loss": 0.6007, + "step": 7565 + }, + { + "epoch": 0.20774299835255355, + "grad_norm": 0.34324321150779724, + "learning_rate": 1.9484750544084936e-05, + "loss": 0.4685, + "step": 7566 + }, + { + "epoch": 0.20777045579352005, + "grad_norm": 0.42678889632225037, + "learning_rate": 1.9484613689029585e-05, + "loss": 0.603, + "step": 7567 + }, + { + "epoch": 0.20779791323448654, + "grad_norm": 0.3567548990249634, + "learning_rate": 1.94844768162824e-05, + "loss": 0.5712, + "step": 7568 + }, + { + "epoch": 0.20782537067545304, + "grad_norm": 0.43005841970443726, + "learning_rate": 1.948433992584364e-05, + "loss": 0.5862, + "step": 7569 + }, + { + "epoch": 0.20785282811641956, + "grad_norm": 1.48933744430542, + "learning_rate": 1.9484203017713557e-05, + "loss": 0.4989, + "step": 7570 + }, + { + "epoch": 0.20788028555738605, + "grad_norm": 0.3668833076953888, + "learning_rate": 1.9484066091892408e-05, + "loss": 0.5629, + "step": 7571 + }, + { + "epoch": 0.20790774299835255, + "grad_norm": 0.48281916975975037, + "learning_rate": 1.948392914838045e-05, + "loss": 0.5414, + "step": 7572 + }, + { + "epoch": 0.20793520043931907, + "grad_norm": 0.38930875062942505, + "learning_rate": 1.948379218717794e-05, + "loss": 0.5588, + "step": 7573 + }, + { + "epoch": 0.20796265788028556, + "grad_norm": 0.3609628677368164, + "learning_rate": 1.948365520828513e-05, + "loss": 0.5194, + "step": 7574 + }, + { + "epoch": 0.20799011532125206, + "grad_norm": 0.4016500413417816, + "learning_rate": 1.948351821170227e-05, + "loss": 0.4534, + "step": 7575 + }, + { + "epoch": 0.20801757276221855, + "grad_norm": 0.35170140862464905, + "learning_rate": 1.948338119742963e-05, + "loss": 0.5715, + "step": 7576 + }, + { + "epoch": 0.20804503020318507, + "grad_norm": 0.3507955074310303, + "learning_rate": 1.9483244165467453e-05, + "loss": 0.4482, + "step": 7577 + }, + { + "epoch": 0.20807248764415157, + "grad_norm": 0.3529643416404724, + "learning_rate": 1.9483107115815996e-05, + "loss": 0.5237, + "step": 7578 + }, + { + "epoch": 0.20809994508511806, + "grad_norm": 0.5187897682189941, + "learning_rate": 1.9482970048475524e-05, + "loss": 0.527, + "step": 7579 + }, + { + "epoch": 0.20812740252608458, + "grad_norm": 0.35961034893989563, + "learning_rate": 1.9482832963446282e-05, + "loss": 0.5209, + "step": 7580 + }, + { + "epoch": 0.20815485996705108, + "grad_norm": 0.3582625091075897, + "learning_rate": 1.9482695860728534e-05, + "loss": 0.536, + "step": 7581 + }, + { + "epoch": 0.20818231740801757, + "grad_norm": 0.3997671604156494, + "learning_rate": 1.9482558740322524e-05, + "loss": 0.534, + "step": 7582 + }, + { + "epoch": 0.20820977484898406, + "grad_norm": 0.3752930760383606, + "learning_rate": 1.9482421602228525e-05, + "loss": 0.5547, + "step": 7583 + }, + { + "epoch": 0.20823723228995059, + "grad_norm": 0.4523254334926605, + "learning_rate": 1.9482284446446778e-05, + "loss": 0.6777, + "step": 7584 + }, + { + "epoch": 0.20826468973091708, + "grad_norm": 0.38860979676246643, + "learning_rate": 1.9482147272977543e-05, + "loss": 0.5459, + "step": 7585 + }, + { + "epoch": 0.20829214717188357, + "grad_norm": 0.3776243329048157, + "learning_rate": 1.948201008182108e-05, + "loss": 0.5005, + "step": 7586 + }, + { + "epoch": 0.2083196046128501, + "grad_norm": 0.48295915126800537, + "learning_rate": 1.948187287297764e-05, + "loss": 0.5243, + "step": 7587 + }, + { + "epoch": 0.2083470620538166, + "grad_norm": 0.3734515309333801, + "learning_rate": 1.948173564644748e-05, + "loss": 0.5674, + "step": 7588 + }, + { + "epoch": 0.20837451949478308, + "grad_norm": 0.3722502887248993, + "learning_rate": 1.9481598402230862e-05, + "loss": 0.5113, + "step": 7589 + }, + { + "epoch": 0.20840197693574958, + "grad_norm": 0.36111605167388916, + "learning_rate": 1.9481461140328033e-05, + "loss": 0.5037, + "step": 7590 + }, + { + "epoch": 0.2084294343767161, + "grad_norm": 0.40063241124153137, + "learning_rate": 1.948132386073925e-05, + "loss": 0.5119, + "step": 7591 + }, + { + "epoch": 0.2084568918176826, + "grad_norm": 0.407366544008255, + "learning_rate": 1.948118656346478e-05, + "loss": 0.566, + "step": 7592 + }, + { + "epoch": 0.2084843492586491, + "grad_norm": 0.4047910273075104, + "learning_rate": 1.9481049248504863e-05, + "loss": 0.6004, + "step": 7593 + }, + { + "epoch": 0.2085118066996156, + "grad_norm": 0.3414928615093231, + "learning_rate": 1.9480911915859767e-05, + "loss": 0.4888, + "step": 7594 + }, + { + "epoch": 0.2085392641405821, + "grad_norm": 0.5145168900489807, + "learning_rate": 1.9480774565529744e-05, + "loss": 0.5782, + "step": 7595 + }, + { + "epoch": 0.2085667215815486, + "grad_norm": 0.3232507109642029, + "learning_rate": 1.9480637197515048e-05, + "loss": 0.4727, + "step": 7596 + }, + { + "epoch": 0.2085941790225151, + "grad_norm": 0.375011146068573, + "learning_rate": 1.948049981181594e-05, + "loss": 0.4958, + "step": 7597 + }, + { + "epoch": 0.2086216364634816, + "grad_norm": 0.4271661937236786, + "learning_rate": 1.9480362408432674e-05, + "loss": 0.595, + "step": 7598 + }, + { + "epoch": 0.2086490939044481, + "grad_norm": 0.3530728816986084, + "learning_rate": 1.9480224987365506e-05, + "loss": 0.5761, + "step": 7599 + }, + { + "epoch": 0.2086765513454146, + "grad_norm": 0.42961248755455017, + "learning_rate": 1.9480087548614693e-05, + "loss": 0.5569, + "step": 7600 + }, + { + "epoch": 0.2087040087863811, + "grad_norm": 0.35871684551239014, + "learning_rate": 1.947995009218049e-05, + "loss": 0.5674, + "step": 7601 + }, + { + "epoch": 0.20873146622734762, + "grad_norm": 0.4190363883972168, + "learning_rate": 1.9479812618063156e-05, + "loss": 0.5029, + "step": 7602 + }, + { + "epoch": 0.2087589236683141, + "grad_norm": 0.38788285851478577, + "learning_rate": 1.9479675126262943e-05, + "loss": 0.5421, + "step": 7603 + }, + { + "epoch": 0.2087863811092806, + "grad_norm": 0.36662906408309937, + "learning_rate": 1.9479537616780115e-05, + "loss": 0.5245, + "step": 7604 + }, + { + "epoch": 0.20881383855024713, + "grad_norm": 0.3465394675731659, + "learning_rate": 1.9479400089614922e-05, + "loss": 0.4834, + "step": 7605 + }, + { + "epoch": 0.20884129599121362, + "grad_norm": 0.34957262873649597, + "learning_rate": 1.9479262544767618e-05, + "loss": 0.4832, + "step": 7606 + }, + { + "epoch": 0.20886875343218012, + "grad_norm": 0.3421435058116913, + "learning_rate": 1.9479124982238467e-05, + "loss": 0.4614, + "step": 7607 + }, + { + "epoch": 0.2088962108731466, + "grad_norm": 0.38553664088249207, + "learning_rate": 1.9478987402027724e-05, + "loss": 0.5922, + "step": 7608 + }, + { + "epoch": 0.20892366831411313, + "grad_norm": 0.6419899463653564, + "learning_rate": 1.9478849804135646e-05, + "loss": 0.573, + "step": 7609 + }, + { + "epoch": 0.20895112575507963, + "grad_norm": 0.40198925137519836, + "learning_rate": 1.9478712188562487e-05, + "loss": 0.5646, + "step": 7610 + }, + { + "epoch": 0.20897858319604612, + "grad_norm": 0.35192954540252686, + "learning_rate": 1.94785745553085e-05, + "loss": 0.5508, + "step": 7611 + }, + { + "epoch": 0.20900604063701264, + "grad_norm": 0.4219098389148712, + "learning_rate": 1.947843690437395e-05, + "loss": 0.6408, + "step": 7612 + }, + { + "epoch": 0.20903349807797914, + "grad_norm": 0.4437122344970703, + "learning_rate": 1.9478299235759088e-05, + "loss": 0.5916, + "step": 7613 + }, + { + "epoch": 0.20906095551894563, + "grad_norm": 0.3997351825237274, + "learning_rate": 1.9478161549464174e-05, + "loss": 0.5318, + "step": 7614 + }, + { + "epoch": 0.20908841295991212, + "grad_norm": 0.5187093615531921, + "learning_rate": 1.9478023845489462e-05, + "loss": 0.6048, + "step": 7615 + }, + { + "epoch": 0.20911587040087865, + "grad_norm": 0.3538030982017517, + "learning_rate": 1.947788612383521e-05, + "loss": 0.4998, + "step": 7616 + }, + { + "epoch": 0.20914332784184514, + "grad_norm": 0.3669971525669098, + "learning_rate": 1.9477748384501678e-05, + "loss": 0.5451, + "step": 7617 + }, + { + "epoch": 0.20917078528281163, + "grad_norm": 0.3813173472881317, + "learning_rate": 1.9477610627489118e-05, + "loss": 0.5209, + "step": 7618 + }, + { + "epoch": 0.20919824272377815, + "grad_norm": 0.3493728041648865, + "learning_rate": 1.947747285279779e-05, + "loss": 0.471, + "step": 7619 + }, + { + "epoch": 0.20922570016474465, + "grad_norm": 0.363414466381073, + "learning_rate": 1.9477335060427954e-05, + "loss": 0.4192, + "step": 7620 + }, + { + "epoch": 0.20925315760571114, + "grad_norm": 0.39078351855278015, + "learning_rate": 1.9477197250379862e-05, + "loss": 0.5647, + "step": 7621 + }, + { + "epoch": 0.20928061504667764, + "grad_norm": 0.35664811730384827, + "learning_rate": 1.947705942265377e-05, + "loss": 0.5332, + "step": 7622 + }, + { + "epoch": 0.20930807248764416, + "grad_norm": 0.3787766098976135, + "learning_rate": 1.9476921577249935e-05, + "loss": 0.6096, + "step": 7623 + }, + { + "epoch": 0.20933552992861065, + "grad_norm": 0.37204113602638245, + "learning_rate": 1.9476783714168623e-05, + "loss": 0.57, + "step": 7624 + }, + { + "epoch": 0.20936298736957715, + "grad_norm": 0.4101165533065796, + "learning_rate": 1.9476645833410078e-05, + "loss": 0.5939, + "step": 7625 + }, + { + "epoch": 0.20939044481054367, + "grad_norm": 0.3835039734840393, + "learning_rate": 1.947650793497457e-05, + "loss": 0.5811, + "step": 7626 + }, + { + "epoch": 0.20941790225151016, + "grad_norm": 0.35198166966438293, + "learning_rate": 1.9476370018862346e-05, + "loss": 0.5427, + "step": 7627 + }, + { + "epoch": 0.20944535969247666, + "grad_norm": 0.3503159284591675, + "learning_rate": 1.9476232085073668e-05, + "loss": 0.5157, + "step": 7628 + }, + { + "epoch": 0.20947281713344315, + "grad_norm": 0.3532663583755493, + "learning_rate": 1.9476094133608793e-05, + "loss": 0.5397, + "step": 7629 + }, + { + "epoch": 0.20950027457440967, + "grad_norm": 0.3778485655784607, + "learning_rate": 1.9475956164467978e-05, + "loss": 0.5557, + "step": 7630 + }, + { + "epoch": 0.20952773201537617, + "grad_norm": 0.3534981608390808, + "learning_rate": 1.9475818177651483e-05, + "loss": 0.5216, + "step": 7631 + }, + { + "epoch": 0.20955518945634266, + "grad_norm": 0.37158554792404175, + "learning_rate": 1.9475680173159558e-05, + "loss": 0.6198, + "step": 7632 + }, + { + "epoch": 0.20958264689730918, + "grad_norm": 0.33717790246009827, + "learning_rate": 1.947554215099247e-05, + "loss": 0.5407, + "step": 7633 + }, + { + "epoch": 0.20961010433827568, + "grad_norm": 0.4234338104724884, + "learning_rate": 1.947540411115047e-05, + "loss": 0.5494, + "step": 7634 + }, + { + "epoch": 0.20963756177924217, + "grad_norm": 0.3379095196723938, + "learning_rate": 1.9475266053633818e-05, + "loss": 0.4766, + "step": 7635 + }, + { + "epoch": 0.20966501922020866, + "grad_norm": 0.3719825744628906, + "learning_rate": 1.9475127978442767e-05, + "loss": 0.6212, + "step": 7636 + }, + { + "epoch": 0.2096924766611752, + "grad_norm": 0.37886571884155273, + "learning_rate": 1.9474989885577584e-05, + "loss": 0.5207, + "step": 7637 + }, + { + "epoch": 0.20971993410214168, + "grad_norm": 0.37721848487854004, + "learning_rate": 1.9474851775038516e-05, + "loss": 0.5293, + "step": 7638 + }, + { + "epoch": 0.20974739154310817, + "grad_norm": 0.38806986808776855, + "learning_rate": 1.9474713646825828e-05, + "loss": 0.5895, + "step": 7639 + }, + { + "epoch": 0.2097748489840747, + "grad_norm": 0.3770739436149597, + "learning_rate": 1.947457550093977e-05, + "loss": 0.588, + "step": 7640 + }, + { + "epoch": 0.2098023064250412, + "grad_norm": 0.3505558371543884, + "learning_rate": 1.9474437337380608e-05, + "loss": 0.5031, + "step": 7641 + }, + { + "epoch": 0.20982976386600768, + "grad_norm": 0.3607693016529083, + "learning_rate": 1.94742991561486e-05, + "loss": 0.505, + "step": 7642 + }, + { + "epoch": 0.20985722130697418, + "grad_norm": 0.38945865631103516, + "learning_rate": 1.9474160957243997e-05, + "loss": 0.574, + "step": 7643 + }, + { + "epoch": 0.2098846787479407, + "grad_norm": 0.3453112840652466, + "learning_rate": 1.947402274066706e-05, + "loss": 0.4886, + "step": 7644 + }, + { + "epoch": 0.2099121361889072, + "grad_norm": 0.37124085426330566, + "learning_rate": 1.9473884506418048e-05, + "loss": 0.5024, + "step": 7645 + }, + { + "epoch": 0.2099395936298737, + "grad_norm": 0.3769884705543518, + "learning_rate": 1.9473746254497214e-05, + "loss": 0.5225, + "step": 7646 + }, + { + "epoch": 0.2099670510708402, + "grad_norm": 0.34181246161460876, + "learning_rate": 1.9473607984904824e-05, + "loss": 0.5634, + "step": 7647 + }, + { + "epoch": 0.2099945085118067, + "grad_norm": 0.3925192356109619, + "learning_rate": 1.947346969764113e-05, + "loss": 0.4693, + "step": 7648 + }, + { + "epoch": 0.2100219659527732, + "grad_norm": 0.4676647186279297, + "learning_rate": 1.9473331392706392e-05, + "loss": 0.5835, + "step": 7649 + }, + { + "epoch": 0.2100494233937397, + "grad_norm": 0.3543460965156555, + "learning_rate": 1.9473193070100866e-05, + "loss": 0.5609, + "step": 7650 + }, + { + "epoch": 0.21007688083470621, + "grad_norm": 0.33256974816322327, + "learning_rate": 1.9473054729824813e-05, + "loss": 0.4807, + "step": 7651 + }, + { + "epoch": 0.2101043382756727, + "grad_norm": 0.3847549259662628, + "learning_rate": 1.9472916371878487e-05, + "loss": 0.5844, + "step": 7652 + }, + { + "epoch": 0.2101317957166392, + "grad_norm": 0.38168856501579285, + "learning_rate": 1.947277799626215e-05, + "loss": 0.5506, + "step": 7653 + }, + { + "epoch": 0.21015925315760572, + "grad_norm": 0.40839627385139465, + "learning_rate": 1.947263960297606e-05, + "loss": 0.5083, + "step": 7654 + }, + { + "epoch": 0.21018671059857222, + "grad_norm": 0.41970112919807434, + "learning_rate": 1.947250119202047e-05, + "loss": 0.5679, + "step": 7655 + }, + { + "epoch": 0.2102141680395387, + "grad_norm": 0.3623751103878021, + "learning_rate": 1.9472362763395646e-05, + "loss": 0.5218, + "step": 7656 + }, + { + "epoch": 0.2102416254805052, + "grad_norm": 0.3482477068901062, + "learning_rate": 1.9472224317101843e-05, + "loss": 0.5485, + "step": 7657 + }, + { + "epoch": 0.21026908292147173, + "grad_norm": 0.4256426990032196, + "learning_rate": 1.9472085853139315e-05, + "loss": 0.5505, + "step": 7658 + }, + { + "epoch": 0.21029654036243822, + "grad_norm": 0.38097429275512695, + "learning_rate": 1.9471947371508326e-05, + "loss": 0.571, + "step": 7659 + }, + { + "epoch": 0.21032399780340472, + "grad_norm": 0.41050899028778076, + "learning_rate": 1.9471808872209132e-05, + "loss": 0.5228, + "step": 7660 + }, + { + "epoch": 0.21035145524437124, + "grad_norm": 0.36769217252731323, + "learning_rate": 1.947167035524199e-05, + "loss": 0.5461, + "step": 7661 + }, + { + "epoch": 0.21037891268533773, + "grad_norm": 0.40046578645706177, + "learning_rate": 1.9471531820607162e-05, + "loss": 0.539, + "step": 7662 + }, + { + "epoch": 0.21040637012630423, + "grad_norm": 0.4364890456199646, + "learning_rate": 1.9471393268304898e-05, + "loss": 0.4454, + "step": 7663 + }, + { + "epoch": 0.21043382756727072, + "grad_norm": 0.3513913154602051, + "learning_rate": 1.947125469833547e-05, + "loss": 0.5173, + "step": 7664 + }, + { + "epoch": 0.21046128500823724, + "grad_norm": 0.3839256167411804, + "learning_rate": 1.9471116110699127e-05, + "loss": 0.5594, + "step": 7665 + }, + { + "epoch": 0.21048874244920374, + "grad_norm": 0.3834983706474304, + "learning_rate": 1.9470977505396127e-05, + "loss": 0.6565, + "step": 7666 + }, + { + "epoch": 0.21051619989017023, + "grad_norm": 0.35657617449760437, + "learning_rate": 1.9470838882426735e-05, + "loss": 0.5306, + "step": 7667 + }, + { + "epoch": 0.21054365733113672, + "grad_norm": 0.33958151936531067, + "learning_rate": 1.9470700241791203e-05, + "loss": 0.5854, + "step": 7668 + }, + { + "epoch": 0.21057111477210325, + "grad_norm": 0.42710384726524353, + "learning_rate": 1.9470561583489792e-05, + "loss": 0.5282, + "step": 7669 + }, + { + "epoch": 0.21059857221306974, + "grad_norm": 0.37558797001838684, + "learning_rate": 1.9470422907522764e-05, + "loss": 0.5626, + "step": 7670 + }, + { + "epoch": 0.21062602965403623, + "grad_norm": 0.35391250252723694, + "learning_rate": 1.9470284213890373e-05, + "loss": 0.5831, + "step": 7671 + }, + { + "epoch": 0.21065348709500276, + "grad_norm": 0.3747095763683319, + "learning_rate": 1.947014550259288e-05, + "loss": 0.5323, + "step": 7672 + }, + { + "epoch": 0.21068094453596925, + "grad_norm": 0.3469755947589874, + "learning_rate": 1.947000677363054e-05, + "loss": 0.4097, + "step": 7673 + }, + { + "epoch": 0.21070840197693574, + "grad_norm": 0.3303808271884918, + "learning_rate": 1.9469868027003618e-05, + "loss": 0.5152, + "step": 7674 + }, + { + "epoch": 0.21073585941790224, + "grad_norm": 0.3417487144470215, + "learning_rate": 1.9469729262712368e-05, + "loss": 0.5053, + "step": 7675 + }, + { + "epoch": 0.21076331685886876, + "grad_norm": 0.3354649245738983, + "learning_rate": 1.9469590480757053e-05, + "loss": 0.4341, + "step": 7676 + }, + { + "epoch": 0.21079077429983525, + "grad_norm": 0.44170454144477844, + "learning_rate": 1.9469451681137926e-05, + "loss": 0.5935, + "step": 7677 + }, + { + "epoch": 0.21081823174080175, + "grad_norm": 0.35220542550086975, + "learning_rate": 1.9469312863855255e-05, + "loss": 0.4873, + "step": 7678 + }, + { + "epoch": 0.21084568918176827, + "grad_norm": 0.38703009486198425, + "learning_rate": 1.9469174028909287e-05, + "loss": 0.5801, + "step": 7679 + }, + { + "epoch": 0.21087314662273476, + "grad_norm": 0.32918980717658997, + "learning_rate": 1.946903517630029e-05, + "loss": 0.5066, + "step": 7680 + }, + { + "epoch": 0.21090060406370126, + "grad_norm": 0.4387950599193573, + "learning_rate": 1.946889630602852e-05, + "loss": 0.5686, + "step": 7681 + }, + { + "epoch": 0.21092806150466775, + "grad_norm": 0.4177253246307373, + "learning_rate": 1.9468757418094238e-05, + "loss": 0.4867, + "step": 7682 + }, + { + "epoch": 0.21095551894563427, + "grad_norm": 0.35858961939811707, + "learning_rate": 1.9468618512497696e-05, + "loss": 0.5274, + "step": 7683 + }, + { + "epoch": 0.21098297638660077, + "grad_norm": 0.4139188230037689, + "learning_rate": 1.9468479589239165e-05, + "loss": 0.5649, + "step": 7684 + }, + { + "epoch": 0.21101043382756726, + "grad_norm": 0.3729940354824066, + "learning_rate": 1.9468340648318893e-05, + "loss": 0.5132, + "step": 7685 + }, + { + "epoch": 0.21103789126853378, + "grad_norm": 0.43315044045448303, + "learning_rate": 1.9468201689737144e-05, + "loss": 0.5743, + "step": 7686 + }, + { + "epoch": 0.21106534870950028, + "grad_norm": 0.3565322458744049, + "learning_rate": 1.946806271349418e-05, + "loss": 0.5038, + "step": 7687 + }, + { + "epoch": 0.21109280615046677, + "grad_norm": 0.36181968450546265, + "learning_rate": 1.9467923719590254e-05, + "loss": 0.5503, + "step": 7688 + }, + { + "epoch": 0.21112026359143327, + "grad_norm": 0.30538374185562134, + "learning_rate": 1.946778470802563e-05, + "loss": 0.545, + "step": 7689 + }, + { + "epoch": 0.2111477210323998, + "grad_norm": 0.37401947379112244, + "learning_rate": 1.9467645678800565e-05, + "loss": 0.4703, + "step": 7690 + }, + { + "epoch": 0.21117517847336628, + "grad_norm": 0.5387868285179138, + "learning_rate": 1.9467506631915323e-05, + "loss": 0.5836, + "step": 7691 + }, + { + "epoch": 0.21120263591433278, + "grad_norm": 0.37364670634269714, + "learning_rate": 1.9467367567370155e-05, + "loss": 0.5191, + "step": 7692 + }, + { + "epoch": 0.2112300933552993, + "grad_norm": 0.4309769570827484, + "learning_rate": 1.9467228485165326e-05, + "loss": 0.6035, + "step": 7693 + }, + { + "epoch": 0.2112575507962658, + "grad_norm": 0.656316339969635, + "learning_rate": 1.9467089385301093e-05, + "loss": 0.637, + "step": 7694 + }, + { + "epoch": 0.21128500823723229, + "grad_norm": 0.3901107609272003, + "learning_rate": 1.9466950267777718e-05, + "loss": 0.5611, + "step": 7695 + }, + { + "epoch": 0.21131246567819878, + "grad_norm": 0.34908527135849, + "learning_rate": 1.946681113259546e-05, + "loss": 0.4274, + "step": 7696 + }, + { + "epoch": 0.2113399231191653, + "grad_norm": 0.3590952754020691, + "learning_rate": 1.9466671979754575e-05, + "loss": 0.3912, + "step": 7697 + }, + { + "epoch": 0.2113673805601318, + "grad_norm": 0.37690168619155884, + "learning_rate": 1.946653280925533e-05, + "loss": 0.5621, + "step": 7698 + }, + { + "epoch": 0.2113948380010983, + "grad_norm": 0.3536081314086914, + "learning_rate": 1.9466393621097976e-05, + "loss": 0.4488, + "step": 7699 + }, + { + "epoch": 0.2114222954420648, + "grad_norm": 0.36269640922546387, + "learning_rate": 1.946625441528278e-05, + "loss": 0.4956, + "step": 7700 + }, + { + "epoch": 0.2114497528830313, + "grad_norm": 0.36642515659332275, + "learning_rate": 1.9466115191809995e-05, + "loss": 0.5487, + "step": 7701 + }, + { + "epoch": 0.2114772103239978, + "grad_norm": 0.37331146001815796, + "learning_rate": 1.9465975950679884e-05, + "loss": 0.5501, + "step": 7702 + }, + { + "epoch": 0.2115046677649643, + "grad_norm": 0.3674823045730591, + "learning_rate": 1.946583669189271e-05, + "loss": 0.5283, + "step": 7703 + }, + { + "epoch": 0.21153212520593082, + "grad_norm": 0.3422505557537079, + "learning_rate": 1.9465697415448726e-05, + "loss": 0.5448, + "step": 7704 + }, + { + "epoch": 0.2115595826468973, + "grad_norm": 0.37669679522514343, + "learning_rate": 1.9465558121348197e-05, + "loss": 0.6219, + "step": 7705 + }, + { + "epoch": 0.2115870400878638, + "grad_norm": 0.32592153549194336, + "learning_rate": 1.946541880959138e-05, + "loss": 0.5223, + "step": 7706 + }, + { + "epoch": 0.21161449752883033, + "grad_norm": 0.36265498399734497, + "learning_rate": 1.9465279480178537e-05, + "loss": 0.536, + "step": 7707 + }, + { + "epoch": 0.21164195496979682, + "grad_norm": 0.39926716685295105, + "learning_rate": 1.9465140133109927e-05, + "loss": 0.5005, + "step": 7708 + }, + { + "epoch": 0.2116694124107633, + "grad_norm": 0.33597373962402344, + "learning_rate": 1.946500076838581e-05, + "loss": 0.495, + "step": 7709 + }, + { + "epoch": 0.2116968698517298, + "grad_norm": 0.3775760531425476, + "learning_rate": 1.946486138600644e-05, + "loss": 0.5253, + "step": 7710 + }, + { + "epoch": 0.21172432729269633, + "grad_norm": 0.36405226588249207, + "learning_rate": 1.946472198597209e-05, + "loss": 0.5675, + "step": 7711 + }, + { + "epoch": 0.21175178473366282, + "grad_norm": 0.3907422721385956, + "learning_rate": 1.946458256828301e-05, + "loss": 0.5013, + "step": 7712 + }, + { + "epoch": 0.21177924217462932, + "grad_norm": 0.3653850257396698, + "learning_rate": 1.9464443132939463e-05, + "loss": 0.539, + "step": 7713 + }, + { + "epoch": 0.21180669961559584, + "grad_norm": 0.33872750401496887, + "learning_rate": 1.9464303679941706e-05, + "loss": 0.4829, + "step": 7714 + }, + { + "epoch": 0.21183415705656233, + "grad_norm": 0.3511808514595032, + "learning_rate": 1.9464164209290007e-05, + "loss": 0.5203, + "step": 7715 + }, + { + "epoch": 0.21186161449752883, + "grad_norm": 0.35137391090393066, + "learning_rate": 1.9464024720984617e-05, + "loss": 0.542, + "step": 7716 + }, + { + "epoch": 0.21188907193849532, + "grad_norm": 0.33773982524871826, + "learning_rate": 1.9463885215025803e-05, + "loss": 0.5342, + "step": 7717 + }, + { + "epoch": 0.21191652937946184, + "grad_norm": 0.34832075238227844, + "learning_rate": 1.9463745691413823e-05, + "loss": 0.5604, + "step": 7718 + }, + { + "epoch": 0.21194398682042834, + "grad_norm": 0.33251723647117615, + "learning_rate": 1.9463606150148937e-05, + "loss": 0.5422, + "step": 7719 + }, + { + "epoch": 0.21197144426139483, + "grad_norm": 0.3877292275428772, + "learning_rate": 1.9463466591231404e-05, + "loss": 0.593, + "step": 7720 + }, + { + "epoch": 0.21199890170236135, + "grad_norm": 0.32499247789382935, + "learning_rate": 1.9463327014661484e-05, + "loss": 0.4878, + "step": 7721 + }, + { + "epoch": 0.21202635914332785, + "grad_norm": 0.45507898926734924, + "learning_rate": 1.946318742043944e-05, + "loss": 0.5905, + "step": 7722 + }, + { + "epoch": 0.21205381658429434, + "grad_norm": 0.3592537045478821, + "learning_rate": 1.946304780856553e-05, + "loss": 0.5617, + "step": 7723 + }, + { + "epoch": 0.21208127402526084, + "grad_norm": 0.36364272236824036, + "learning_rate": 1.9462908179040015e-05, + "loss": 0.4983, + "step": 7724 + }, + { + "epoch": 0.21210873146622736, + "grad_norm": 0.3402278423309326, + "learning_rate": 1.946276853186316e-05, + "loss": 0.6051, + "step": 7725 + }, + { + "epoch": 0.21213618890719385, + "grad_norm": 1.2872556447982788, + "learning_rate": 1.946262886703522e-05, + "loss": 0.579, + "step": 7726 + }, + { + "epoch": 0.21216364634816035, + "grad_norm": 0.3633585274219513, + "learning_rate": 1.9462489184556453e-05, + "loss": 0.5368, + "step": 7727 + }, + { + "epoch": 0.21219110378912687, + "grad_norm": 0.3664308488368988, + "learning_rate": 1.9462349484427127e-05, + "loss": 0.5376, + "step": 7728 + }, + { + "epoch": 0.21221856123009336, + "grad_norm": 0.43939539790153503, + "learning_rate": 1.94622097666475e-05, + "loss": 0.4712, + "step": 7729 + }, + { + "epoch": 0.21224601867105986, + "grad_norm": 0.4222705066204071, + "learning_rate": 1.946207003121783e-05, + "loss": 0.4837, + "step": 7730 + }, + { + "epoch": 0.21227347611202635, + "grad_norm": 0.4053070545196533, + "learning_rate": 1.9461930278138382e-05, + "loss": 0.5362, + "step": 7731 + }, + { + "epoch": 0.21230093355299287, + "grad_norm": 0.35824474692344666, + "learning_rate": 1.9461790507409412e-05, + "loss": 0.5348, + "step": 7732 + }, + { + "epoch": 0.21232839099395936, + "grad_norm": 2.450716733932495, + "learning_rate": 1.9461650719031182e-05, + "loss": 0.4265, + "step": 7733 + }, + { + "epoch": 0.21235584843492586, + "grad_norm": 0.47630947828292847, + "learning_rate": 1.9461510913003953e-05, + "loss": 0.6345, + "step": 7734 + }, + { + "epoch": 0.21238330587589235, + "grad_norm": 0.38426467776298523, + "learning_rate": 1.946137108932799e-05, + "loss": 0.5179, + "step": 7735 + }, + { + "epoch": 0.21241076331685887, + "grad_norm": 0.34811273217201233, + "learning_rate": 1.9461231248003546e-05, + "loss": 0.5383, + "step": 7736 + }, + { + "epoch": 0.21243822075782537, + "grad_norm": 0.3579137325286865, + "learning_rate": 1.946109138903089e-05, + "loss": 0.5503, + "step": 7737 + }, + { + "epoch": 0.21246567819879186, + "grad_norm": 0.35739371180534363, + "learning_rate": 1.946095151241028e-05, + "loss": 0.5221, + "step": 7738 + }, + { + "epoch": 0.21249313563975838, + "grad_norm": 0.41838210821151733, + "learning_rate": 1.946081161814197e-05, + "loss": 0.5313, + "step": 7739 + }, + { + "epoch": 0.21252059308072488, + "grad_norm": 0.3637886047363281, + "learning_rate": 1.946067170622623e-05, + "loss": 0.5063, + "step": 7740 + }, + { + "epoch": 0.21254805052169137, + "grad_norm": 0.3488861322402954, + "learning_rate": 1.946053177666332e-05, + "loss": 0.5999, + "step": 7741 + }, + { + "epoch": 0.21257550796265787, + "grad_norm": 0.3828712999820709, + "learning_rate": 1.9460391829453496e-05, + "loss": 0.5904, + "step": 7742 + }, + { + "epoch": 0.2126029654036244, + "grad_norm": 0.40868139266967773, + "learning_rate": 1.9460251864597022e-05, + "loss": 0.5636, + "step": 7743 + }, + { + "epoch": 0.21263042284459088, + "grad_norm": 0.33385640382766724, + "learning_rate": 1.946011188209416e-05, + "loss": 0.5471, + "step": 7744 + }, + { + "epoch": 0.21265788028555738, + "grad_norm": 0.38346022367477417, + "learning_rate": 1.945997188194517e-05, + "loss": 0.5311, + "step": 7745 + }, + { + "epoch": 0.2126853377265239, + "grad_norm": 0.3262152671813965, + "learning_rate": 1.945983186415031e-05, + "loss": 0.471, + "step": 7746 + }, + { + "epoch": 0.2127127951674904, + "grad_norm": 0.410839319229126, + "learning_rate": 1.9459691828709847e-05, + "loss": 0.5733, + "step": 7747 + }, + { + "epoch": 0.2127402526084569, + "grad_norm": 0.3856702744960785, + "learning_rate": 1.9459551775624036e-05, + "loss": 0.5099, + "step": 7748 + }, + { + "epoch": 0.21276771004942338, + "grad_norm": 0.48778197169303894, + "learning_rate": 1.9459411704893147e-05, + "loss": 0.4901, + "step": 7749 + }, + { + "epoch": 0.2127951674903899, + "grad_norm": 0.34644731879234314, + "learning_rate": 1.9459271616517436e-05, + "loss": 0.4881, + "step": 7750 + }, + { + "epoch": 0.2128226249313564, + "grad_norm": 0.3588396906852722, + "learning_rate": 1.945913151049716e-05, + "loss": 0.4686, + "step": 7751 + }, + { + "epoch": 0.2128500823723229, + "grad_norm": 0.39102795720100403, + "learning_rate": 1.9458991386832588e-05, + "loss": 0.5387, + "step": 7752 + }, + { + "epoch": 0.2128775398132894, + "grad_norm": 0.33956971764564514, + "learning_rate": 1.945885124552398e-05, + "loss": 0.4511, + "step": 7753 + }, + { + "epoch": 0.2129049972542559, + "grad_norm": 0.3745773136615753, + "learning_rate": 1.9458711086571593e-05, + "loss": 0.552, + "step": 7754 + }, + { + "epoch": 0.2129324546952224, + "grad_norm": 0.3400760889053345, + "learning_rate": 1.945857090997569e-05, + "loss": 0.5615, + "step": 7755 + }, + { + "epoch": 0.2129599121361889, + "grad_norm": 0.38846296072006226, + "learning_rate": 1.9458430715736534e-05, + "loss": 0.6239, + "step": 7756 + }, + { + "epoch": 0.21298736957715542, + "grad_norm": 0.41156572103500366, + "learning_rate": 1.9458290503854383e-05, + "loss": 0.6212, + "step": 7757 + }, + { + "epoch": 0.2130148270181219, + "grad_norm": 0.32920360565185547, + "learning_rate": 1.945815027432951e-05, + "loss": 0.5645, + "step": 7758 + }, + { + "epoch": 0.2130422844590884, + "grad_norm": 0.35983172059059143, + "learning_rate": 1.945801002716216e-05, + "loss": 0.4652, + "step": 7759 + }, + { + "epoch": 0.21306974190005493, + "grad_norm": 0.3461645245552063, + "learning_rate": 1.9457869762352604e-05, + "loss": 0.5258, + "step": 7760 + }, + { + "epoch": 0.21309719934102142, + "grad_norm": 0.327502965927124, + "learning_rate": 1.9457729479901103e-05, + "loss": 0.5173, + "step": 7761 + }, + { + "epoch": 0.21312465678198791, + "grad_norm": 0.33881354331970215, + "learning_rate": 1.9457589179807922e-05, + "loss": 0.4745, + "step": 7762 + }, + { + "epoch": 0.2131521142229544, + "grad_norm": 0.38264113664627075, + "learning_rate": 1.9457448862073314e-05, + "loss": 0.5875, + "step": 7763 + }, + { + "epoch": 0.21317957166392093, + "grad_norm": 0.34533512592315674, + "learning_rate": 1.9457308526697548e-05, + "loss": 0.5339, + "step": 7764 + }, + { + "epoch": 0.21320702910488742, + "grad_norm": 0.43047794699668884, + "learning_rate": 1.945716817368088e-05, + "loss": 0.5035, + "step": 7765 + }, + { + "epoch": 0.21323448654585392, + "grad_norm": 0.3788689970970154, + "learning_rate": 1.945702780302358e-05, + "loss": 0.503, + "step": 7766 + }, + { + "epoch": 0.21326194398682044, + "grad_norm": 0.38941720128059387, + "learning_rate": 1.94568874147259e-05, + "loss": 0.6281, + "step": 7767 + }, + { + "epoch": 0.21328940142778693, + "grad_norm": 0.3785932660102844, + "learning_rate": 1.9456747008788107e-05, + "loss": 0.4516, + "step": 7768 + }, + { + "epoch": 0.21331685886875343, + "grad_norm": 0.31275665760040283, + "learning_rate": 1.9456606585210465e-05, + "loss": 0.4924, + "step": 7769 + }, + { + "epoch": 0.21334431630971992, + "grad_norm": 0.42561084032058716, + "learning_rate": 1.9456466143993232e-05, + "loss": 0.5882, + "step": 7770 + }, + { + "epoch": 0.21337177375068644, + "grad_norm": 0.3582158386707306, + "learning_rate": 1.945632568513667e-05, + "loss": 0.508, + "step": 7771 + }, + { + "epoch": 0.21339923119165294, + "grad_norm": 0.3860912621021271, + "learning_rate": 1.9456185208641045e-05, + "loss": 0.5579, + "step": 7772 + }, + { + "epoch": 0.21342668863261943, + "grad_norm": 0.35531359910964966, + "learning_rate": 1.9456044714506618e-05, + "loss": 0.5235, + "step": 7773 + }, + { + "epoch": 0.21345414607358595, + "grad_norm": 0.3715299069881439, + "learning_rate": 1.9455904202733645e-05, + "loss": 0.5249, + "step": 7774 + }, + { + "epoch": 0.21348160351455245, + "grad_norm": 0.36395663022994995, + "learning_rate": 1.94557636733224e-05, + "loss": 0.4949, + "step": 7775 + }, + { + "epoch": 0.21350906095551894, + "grad_norm": 0.3640040159225464, + "learning_rate": 1.9455623126273127e-05, + "loss": 0.5214, + "step": 7776 + }, + { + "epoch": 0.21353651839648544, + "grad_norm": 0.37844139337539673, + "learning_rate": 1.945548256158611e-05, + "loss": 0.5375, + "step": 7777 + }, + { + "epoch": 0.21356397583745196, + "grad_norm": 0.35938823223114014, + "learning_rate": 1.9455341979261596e-05, + "loss": 0.5216, + "step": 7778 + }, + { + "epoch": 0.21359143327841845, + "grad_norm": 0.38652148842811584, + "learning_rate": 1.945520137929985e-05, + "loss": 0.4868, + "step": 7779 + }, + { + "epoch": 0.21361889071938495, + "grad_norm": 0.3520498275756836, + "learning_rate": 1.9455060761701134e-05, + "loss": 0.5502, + "step": 7780 + }, + { + "epoch": 0.21364634816035147, + "grad_norm": 0.3801784813404083, + "learning_rate": 1.9454920126465716e-05, + "loss": 0.4967, + "step": 7781 + }, + { + "epoch": 0.21367380560131796, + "grad_norm": 0.3093474209308624, + "learning_rate": 1.9454779473593852e-05, + "loss": 0.4482, + "step": 7782 + }, + { + "epoch": 0.21370126304228446, + "grad_norm": 0.5183829665184021, + "learning_rate": 1.9454638803085807e-05, + "loss": 0.5829, + "step": 7783 + }, + { + "epoch": 0.21372872048325095, + "grad_norm": 0.3645957112312317, + "learning_rate": 1.9454498114941845e-05, + "loss": 0.4891, + "step": 7784 + }, + { + "epoch": 0.21375617792421747, + "grad_norm": 0.3807961046695709, + "learning_rate": 1.945435740916222e-05, + "loss": 0.5381, + "step": 7785 + }, + { + "epoch": 0.21378363536518397, + "grad_norm": 0.4211444854736328, + "learning_rate": 1.9454216685747206e-05, + "loss": 0.5974, + "step": 7786 + }, + { + "epoch": 0.21381109280615046, + "grad_norm": 0.3753012716770172, + "learning_rate": 1.9454075944697065e-05, + "loss": 0.6361, + "step": 7787 + }, + { + "epoch": 0.21383855024711698, + "grad_norm": 0.37476488947868347, + "learning_rate": 1.945393518601205e-05, + "loss": 0.5664, + "step": 7788 + }, + { + "epoch": 0.21386600768808348, + "grad_norm": 0.38222450017929077, + "learning_rate": 1.9453794409692425e-05, + "loss": 0.533, + "step": 7789 + }, + { + "epoch": 0.21389346512904997, + "grad_norm": 0.370902955532074, + "learning_rate": 1.9453653615738463e-05, + "loss": 0.5158, + "step": 7790 + }, + { + "epoch": 0.21392092257001646, + "grad_norm": 0.35353022813796997, + "learning_rate": 1.9453512804150416e-05, + "loss": 0.6049, + "step": 7791 + }, + { + "epoch": 0.21394838001098299, + "grad_norm": 0.320016086101532, + "learning_rate": 1.945337197492855e-05, + "loss": 0.4474, + "step": 7792 + }, + { + "epoch": 0.21397583745194948, + "grad_norm": 0.34728676080703735, + "learning_rate": 1.945323112807313e-05, + "loss": 0.4769, + "step": 7793 + }, + { + "epoch": 0.21400329489291597, + "grad_norm": 0.40206435322761536, + "learning_rate": 1.9453090263584416e-05, + "loss": 0.4866, + "step": 7794 + }, + { + "epoch": 0.2140307523338825, + "grad_norm": 0.3609091639518738, + "learning_rate": 1.9452949381462672e-05, + "loss": 0.5287, + "step": 7795 + }, + { + "epoch": 0.214058209774849, + "grad_norm": 0.34392276406288147, + "learning_rate": 1.945280848170816e-05, + "loss": 0.4632, + "step": 7796 + }, + { + "epoch": 0.21408566721581548, + "grad_norm": 0.4118764400482178, + "learning_rate": 1.9452667564321143e-05, + "loss": 0.5209, + "step": 7797 + }, + { + "epoch": 0.21411312465678198, + "grad_norm": 0.3453790545463562, + "learning_rate": 1.9452526629301883e-05, + "loss": 0.5549, + "step": 7798 + }, + { + "epoch": 0.2141405820977485, + "grad_norm": 0.35427603125572205, + "learning_rate": 1.9452385676650646e-05, + "loss": 0.5463, + "step": 7799 + }, + { + "epoch": 0.214168039538715, + "grad_norm": 0.3975958228111267, + "learning_rate": 1.945224470636769e-05, + "loss": 0.6021, + "step": 7800 + }, + { + "epoch": 0.2141954969796815, + "grad_norm": 0.3883931636810303, + "learning_rate": 1.9452103718453283e-05, + "loss": 0.5291, + "step": 7801 + }, + { + "epoch": 0.21422295442064798, + "grad_norm": 0.35460758209228516, + "learning_rate": 1.9451962712907684e-05, + "loss": 0.4754, + "step": 7802 + }, + { + "epoch": 0.2142504118616145, + "grad_norm": 0.3371031880378723, + "learning_rate": 1.945182168973116e-05, + "loss": 0.5006, + "step": 7803 + }, + { + "epoch": 0.214277869302581, + "grad_norm": 0.37003234028816223, + "learning_rate": 1.9451680648923973e-05, + "loss": 0.5842, + "step": 7804 + }, + { + "epoch": 0.2143053267435475, + "grad_norm": 0.3449018895626068, + "learning_rate": 1.9451539590486385e-05, + "loss": 0.5423, + "step": 7805 + }, + { + "epoch": 0.214332784184514, + "grad_norm": 0.3767179548740387, + "learning_rate": 1.9451398514418656e-05, + "loss": 0.5095, + "step": 7806 + }, + { + "epoch": 0.2143602416254805, + "grad_norm": 0.38707834482192993, + "learning_rate": 1.9451257420721052e-05, + "loss": 0.6203, + "step": 7807 + }, + { + "epoch": 0.214387699066447, + "grad_norm": 0.37045225501060486, + "learning_rate": 1.945111630939384e-05, + "loss": 0.5496, + "step": 7808 + }, + { + "epoch": 0.2144151565074135, + "grad_norm": 0.3707679510116577, + "learning_rate": 1.9450975180437278e-05, + "loss": 0.5286, + "step": 7809 + }, + { + "epoch": 0.21444261394838002, + "grad_norm": 0.4984795153141022, + "learning_rate": 1.9450834033851628e-05, + "loss": 0.5781, + "step": 7810 + }, + { + "epoch": 0.2144700713893465, + "grad_norm": 0.410521924495697, + "learning_rate": 1.9450692869637158e-05, + "loss": 0.6318, + "step": 7811 + }, + { + "epoch": 0.214497528830313, + "grad_norm": 0.38461872935295105, + "learning_rate": 1.9450551687794134e-05, + "loss": 0.5601, + "step": 7812 + }, + { + "epoch": 0.21452498627127953, + "grad_norm": 0.4145320951938629, + "learning_rate": 1.945041048832281e-05, + "loss": 0.5345, + "step": 7813 + }, + { + "epoch": 0.21455244371224602, + "grad_norm": 0.4532339572906494, + "learning_rate": 1.945026927122345e-05, + "loss": 0.4754, + "step": 7814 + }, + { + "epoch": 0.21457990115321252, + "grad_norm": 0.38856783509254456, + "learning_rate": 1.945012803649633e-05, + "loss": 0.506, + "step": 7815 + }, + { + "epoch": 0.214607358594179, + "grad_norm": 0.37192535400390625, + "learning_rate": 1.9449986784141703e-05, + "loss": 0.5293, + "step": 7816 + }, + { + "epoch": 0.21463481603514553, + "grad_norm": 0.3196938633918762, + "learning_rate": 1.9449845514159832e-05, + "loss": 0.4074, + "step": 7817 + }, + { + "epoch": 0.21466227347611203, + "grad_norm": 0.3958622217178345, + "learning_rate": 1.9449704226550985e-05, + "loss": 0.4945, + "step": 7818 + }, + { + "epoch": 0.21468973091707852, + "grad_norm": 0.3842329978942871, + "learning_rate": 1.9449562921315423e-05, + "loss": 0.5904, + "step": 7819 + }, + { + "epoch": 0.21471718835804504, + "grad_norm": 0.4619813561439514, + "learning_rate": 1.944942159845341e-05, + "loss": 0.5369, + "step": 7820 + }, + { + "epoch": 0.21474464579901154, + "grad_norm": 0.43263882398605347, + "learning_rate": 1.944928025796521e-05, + "loss": 0.4265, + "step": 7821 + }, + { + "epoch": 0.21477210323997803, + "grad_norm": 0.33089905977249146, + "learning_rate": 1.9449138899851086e-05, + "loss": 0.5698, + "step": 7822 + }, + { + "epoch": 0.21479956068094452, + "grad_norm": 0.397299200296402, + "learning_rate": 1.9448997524111304e-05, + "loss": 0.5658, + "step": 7823 + }, + { + "epoch": 0.21482701812191105, + "grad_norm": 0.35092252492904663, + "learning_rate": 1.9448856130746124e-05, + "loss": 0.5345, + "step": 7824 + }, + { + "epoch": 0.21485447556287754, + "grad_norm": 0.37571823596954346, + "learning_rate": 1.9448714719755815e-05, + "loss": 0.4825, + "step": 7825 + }, + { + "epoch": 0.21488193300384403, + "grad_norm": 0.39365723729133606, + "learning_rate": 1.9448573291140635e-05, + "loss": 0.4763, + "step": 7826 + }, + { + "epoch": 0.21490939044481056, + "grad_norm": 0.3824669122695923, + "learning_rate": 1.9448431844900847e-05, + "loss": 0.4753, + "step": 7827 + }, + { + "epoch": 0.21493684788577705, + "grad_norm": 0.3541501462459564, + "learning_rate": 1.9448290381036722e-05, + "loss": 0.4938, + "step": 7828 + }, + { + "epoch": 0.21496430532674354, + "grad_norm": 0.3657459616661072, + "learning_rate": 1.9448148899548518e-05, + "loss": 0.5147, + "step": 7829 + }, + { + "epoch": 0.21499176276771004, + "grad_norm": 0.3974591791629791, + "learning_rate": 1.9448007400436503e-05, + "loss": 0.5172, + "step": 7830 + }, + { + "epoch": 0.21501922020867656, + "grad_norm": 0.38809505105018616, + "learning_rate": 1.9447865883700937e-05, + "loss": 0.5293, + "step": 7831 + }, + { + "epoch": 0.21504667764964305, + "grad_norm": 0.36427539587020874, + "learning_rate": 1.9447724349342086e-05, + "loss": 0.5458, + "step": 7832 + }, + { + "epoch": 0.21507413509060955, + "grad_norm": 0.3454245924949646, + "learning_rate": 1.9447582797360215e-05, + "loss": 0.4544, + "step": 7833 + }, + { + "epoch": 0.21510159253157607, + "grad_norm": 0.35093578696250916, + "learning_rate": 1.9447441227755584e-05, + "loss": 0.4833, + "step": 7834 + }, + { + "epoch": 0.21512904997254256, + "grad_norm": 0.3398698568344116, + "learning_rate": 1.944729964052846e-05, + "loss": 0.5151, + "step": 7835 + }, + { + "epoch": 0.21515650741350906, + "grad_norm": 0.3350887596607208, + "learning_rate": 1.944715803567911e-05, + "loss": 0.5495, + "step": 7836 + }, + { + "epoch": 0.21518396485447555, + "grad_norm": 0.34562286734580994, + "learning_rate": 1.944701641320779e-05, + "loss": 0.5365, + "step": 7837 + }, + { + "epoch": 0.21521142229544207, + "grad_norm": 0.3703630268573761, + "learning_rate": 1.9446874773114775e-05, + "loss": 0.5576, + "step": 7838 + }, + { + "epoch": 0.21523887973640857, + "grad_norm": 0.3719813823699951, + "learning_rate": 1.944673311540032e-05, + "loss": 0.5251, + "step": 7839 + }, + { + "epoch": 0.21526633717737506, + "grad_norm": 0.38200628757476807, + "learning_rate": 1.9446591440064692e-05, + "loss": 0.5187, + "step": 7840 + }, + { + "epoch": 0.21529379461834158, + "grad_norm": 0.38458025455474854, + "learning_rate": 1.944644974710816e-05, + "loss": 0.5332, + "step": 7841 + }, + { + "epoch": 0.21532125205930808, + "grad_norm": 0.3553345501422882, + "learning_rate": 1.944630803653098e-05, + "loss": 0.5498, + "step": 7842 + }, + { + "epoch": 0.21534870950027457, + "grad_norm": 0.41316306591033936, + "learning_rate": 1.9446166308333424e-05, + "loss": 0.4549, + "step": 7843 + }, + { + "epoch": 0.21537616694124107, + "grad_norm": 0.3650861084461212, + "learning_rate": 1.944602456251575e-05, + "loss": 0.5081, + "step": 7844 + }, + { + "epoch": 0.2154036243822076, + "grad_norm": 0.36110904812812805, + "learning_rate": 1.9445882799078228e-05, + "loss": 0.5488, + "step": 7845 + }, + { + "epoch": 0.21543108182317408, + "grad_norm": 0.3443514406681061, + "learning_rate": 1.9445741018021122e-05, + "loss": 0.5205, + "step": 7846 + }, + { + "epoch": 0.21545853926414057, + "grad_norm": 0.38647717237472534, + "learning_rate": 1.9445599219344687e-05, + "loss": 0.4975, + "step": 7847 + }, + { + "epoch": 0.2154859967051071, + "grad_norm": 0.3483750522136688, + "learning_rate": 1.94454574030492e-05, + "loss": 0.4812, + "step": 7848 + }, + { + "epoch": 0.2155134541460736, + "grad_norm": 0.35276123881340027, + "learning_rate": 1.9445315569134918e-05, + "loss": 0.5544, + "step": 7849 + }, + { + "epoch": 0.21554091158704008, + "grad_norm": 0.43551766872406006, + "learning_rate": 1.944517371760211e-05, + "loss": 0.5955, + "step": 7850 + }, + { + "epoch": 0.21556836902800658, + "grad_norm": 0.6050980687141418, + "learning_rate": 1.9445031848451035e-05, + "loss": 0.473, + "step": 7851 + }, + { + "epoch": 0.2155958264689731, + "grad_norm": 0.37531015276908875, + "learning_rate": 1.9444889961681964e-05, + "loss": 0.5373, + "step": 7852 + }, + { + "epoch": 0.2156232839099396, + "grad_norm": 0.4050746262073517, + "learning_rate": 1.944474805729516e-05, + "loss": 0.5934, + "step": 7853 + }, + { + "epoch": 0.2156507413509061, + "grad_norm": 0.3454146087169647, + "learning_rate": 1.9444606135290884e-05, + "loss": 0.5084, + "step": 7854 + }, + { + "epoch": 0.2156781987918726, + "grad_norm": 0.41142481565475464, + "learning_rate": 1.9444464195669403e-05, + "loss": 0.527, + "step": 7855 + }, + { + "epoch": 0.2157056562328391, + "grad_norm": 0.4680810272693634, + "learning_rate": 1.9444322238430982e-05, + "loss": 0.4867, + "step": 7856 + }, + { + "epoch": 0.2157331136738056, + "grad_norm": 0.3439626395702362, + "learning_rate": 1.944418026357589e-05, + "loss": 0.4977, + "step": 7857 + }, + { + "epoch": 0.2157605711147721, + "grad_norm": 0.368764728307724, + "learning_rate": 1.9444038271104383e-05, + "loss": 0.5187, + "step": 7858 + }, + { + "epoch": 0.21578802855573861, + "grad_norm": 0.32216280698776245, + "learning_rate": 1.944389626101673e-05, + "loss": 0.5532, + "step": 7859 + }, + { + "epoch": 0.2158154859967051, + "grad_norm": 0.3852599263191223, + "learning_rate": 1.94437542333132e-05, + "loss": 0.4917, + "step": 7860 + }, + { + "epoch": 0.2158429434376716, + "grad_norm": 0.38828572630882263, + "learning_rate": 1.944361218799405e-05, + "loss": 0.6249, + "step": 7861 + }, + { + "epoch": 0.21587040087863812, + "grad_norm": 0.3365877568721771, + "learning_rate": 1.9443470125059552e-05, + "loss": 0.4793, + "step": 7862 + }, + { + "epoch": 0.21589785831960462, + "grad_norm": 0.3742566406726837, + "learning_rate": 1.944332804450997e-05, + "loss": 0.5, + "step": 7863 + }, + { + "epoch": 0.2159253157605711, + "grad_norm": 0.33727753162384033, + "learning_rate": 1.9443185946345564e-05, + "loss": 0.4964, + "step": 7864 + }, + { + "epoch": 0.2159527732015376, + "grad_norm": 0.341051310300827, + "learning_rate": 1.94430438305666e-05, + "loss": 0.4436, + "step": 7865 + }, + { + "epoch": 0.21598023064250413, + "grad_norm": 0.3972136378288269, + "learning_rate": 1.944290169717335e-05, + "loss": 0.5906, + "step": 7866 + }, + { + "epoch": 0.21600768808347062, + "grad_norm": 0.34802401065826416, + "learning_rate": 1.9442759546166076e-05, + "loss": 0.5184, + "step": 7867 + }, + { + "epoch": 0.21603514552443712, + "grad_norm": 0.3707738518714905, + "learning_rate": 1.9442617377545037e-05, + "loss": 0.55, + "step": 7868 + }, + { + "epoch": 0.2160626029654036, + "grad_norm": 0.3427625894546509, + "learning_rate": 1.9442475191310503e-05, + "loss": 0.5346, + "step": 7869 + }, + { + "epoch": 0.21609006040637013, + "grad_norm": 0.380463570356369, + "learning_rate": 1.9442332987462742e-05, + "loss": 0.5556, + "step": 7870 + }, + { + "epoch": 0.21611751784733663, + "grad_norm": 0.3904848098754883, + "learning_rate": 1.9442190766002012e-05, + "loss": 0.6003, + "step": 7871 + }, + { + "epoch": 0.21614497528830312, + "grad_norm": 0.34269508719444275, + "learning_rate": 1.9442048526928587e-05, + "loss": 0.4615, + "step": 7872 + }, + { + "epoch": 0.21617243272926964, + "grad_norm": 0.35646456480026245, + "learning_rate": 1.944190627024273e-05, + "loss": 0.56, + "step": 7873 + }, + { + "epoch": 0.21619989017023614, + "grad_norm": 0.3351840674877167, + "learning_rate": 1.9441763995944697e-05, + "loss": 0.5237, + "step": 7874 + }, + { + "epoch": 0.21622734761120263, + "grad_norm": 0.3447469174861908, + "learning_rate": 1.9441621704034766e-05, + "loss": 0.537, + "step": 7875 + }, + { + "epoch": 0.21625480505216912, + "grad_norm": 0.36922597885131836, + "learning_rate": 1.9441479394513197e-05, + "loss": 0.5575, + "step": 7876 + }, + { + "epoch": 0.21628226249313565, + "grad_norm": 0.32156461477279663, + "learning_rate": 1.9441337067380252e-05, + "loss": 0.5062, + "step": 7877 + }, + { + "epoch": 0.21630971993410214, + "grad_norm": 0.3258417248725891, + "learning_rate": 1.9441194722636204e-05, + "loss": 0.4724, + "step": 7878 + }, + { + "epoch": 0.21633717737506863, + "grad_norm": 0.3396294116973877, + "learning_rate": 1.944105236028131e-05, + "loss": 0.4424, + "step": 7879 + }, + { + "epoch": 0.21636463481603516, + "grad_norm": 0.3740018308162689, + "learning_rate": 1.944090998031584e-05, + "loss": 0.6026, + "step": 7880 + }, + { + "epoch": 0.21639209225700165, + "grad_norm": 0.44629162549972534, + "learning_rate": 1.944076758274007e-05, + "loss": 0.5874, + "step": 7881 + }, + { + "epoch": 0.21641954969796814, + "grad_norm": 0.4096331000328064, + "learning_rate": 1.9440625167554247e-05, + "loss": 0.6143, + "step": 7882 + }, + { + "epoch": 0.21644700713893464, + "grad_norm": 0.35968008637428284, + "learning_rate": 1.9440482734758644e-05, + "loss": 0.4896, + "step": 7883 + }, + { + "epoch": 0.21647446457990116, + "grad_norm": 0.34813886880874634, + "learning_rate": 1.9440340284353528e-05, + "loss": 0.5317, + "step": 7884 + }, + { + "epoch": 0.21650192202086765, + "grad_norm": 0.34971025586128235, + "learning_rate": 1.944019781633917e-05, + "loss": 0.5402, + "step": 7885 + }, + { + "epoch": 0.21652937946183415, + "grad_norm": 0.371437668800354, + "learning_rate": 1.9440055330715822e-05, + "loss": 0.5017, + "step": 7886 + }, + { + "epoch": 0.21655683690280067, + "grad_norm": 1.4274760484695435, + "learning_rate": 1.9439912827483765e-05, + "loss": 0.4852, + "step": 7887 + }, + { + "epoch": 0.21658429434376716, + "grad_norm": 0.3596683740615845, + "learning_rate": 1.943977030664325e-05, + "loss": 0.5525, + "step": 7888 + }, + { + "epoch": 0.21661175178473366, + "grad_norm": 0.3330322206020355, + "learning_rate": 1.943962776819456e-05, + "loss": 0.4811, + "step": 7889 + }, + { + "epoch": 0.21663920922570015, + "grad_norm": 0.3559999465942383, + "learning_rate": 1.9439485212137943e-05, + "loss": 0.6012, + "step": 7890 + }, + { + "epoch": 0.21666666666666667, + "grad_norm": 0.355718195438385, + "learning_rate": 1.9439342638473676e-05, + "loss": 0.5839, + "step": 7891 + }, + { + "epoch": 0.21669412410763317, + "grad_norm": 0.35481736063957214, + "learning_rate": 1.9439200047202024e-05, + "loss": 0.5444, + "step": 7892 + }, + { + "epoch": 0.21672158154859966, + "grad_norm": 0.370109885931015, + "learning_rate": 1.943905743832325e-05, + "loss": 0.5166, + "step": 7893 + }, + { + "epoch": 0.21674903898956618, + "grad_norm": 0.31574854254722595, + "learning_rate": 1.9438914811837623e-05, + "loss": 0.4847, + "step": 7894 + }, + { + "epoch": 0.21677649643053268, + "grad_norm": 0.3373795747756958, + "learning_rate": 1.9438772167745406e-05, + "loss": 0.5453, + "step": 7895 + }, + { + "epoch": 0.21680395387149917, + "grad_norm": 0.34027326107025146, + "learning_rate": 1.9438629506046868e-05, + "loss": 0.5282, + "step": 7896 + }, + { + "epoch": 0.21683141131246567, + "grad_norm": 0.3506055772304535, + "learning_rate": 1.943848682674227e-05, + "loss": 0.5414, + "step": 7897 + }, + { + "epoch": 0.2168588687534322, + "grad_norm": 0.37641438841819763, + "learning_rate": 1.9438344129831886e-05, + "loss": 0.4736, + "step": 7898 + }, + { + "epoch": 0.21688632619439868, + "grad_norm": 0.3529817461967468, + "learning_rate": 1.9438201415315974e-05, + "loss": 0.528, + "step": 7899 + }, + { + "epoch": 0.21691378363536518, + "grad_norm": 0.37758827209472656, + "learning_rate": 1.9438058683194808e-05, + "loss": 0.5531, + "step": 7900 + }, + { + "epoch": 0.2169412410763317, + "grad_norm": 0.3631792366504669, + "learning_rate": 1.9437915933468648e-05, + "loss": 0.4541, + "step": 7901 + }, + { + "epoch": 0.2169686985172982, + "grad_norm": 0.3837461471557617, + "learning_rate": 1.9437773166137762e-05, + "loss": 0.4999, + "step": 7902 + }, + { + "epoch": 0.21699615595826469, + "grad_norm": 0.3627293109893799, + "learning_rate": 1.9437630381202415e-05, + "loss": 0.5277, + "step": 7903 + }, + { + "epoch": 0.21702361339923118, + "grad_norm": 0.3513796329498291, + "learning_rate": 1.943748757866288e-05, + "loss": 0.5393, + "step": 7904 + }, + { + "epoch": 0.2170510708401977, + "grad_norm": 0.3478952944278717, + "learning_rate": 1.9437344758519415e-05, + "loss": 0.5572, + "step": 7905 + }, + { + "epoch": 0.2170785282811642, + "grad_norm": 0.36101892590522766, + "learning_rate": 1.943720192077229e-05, + "loss": 0.4934, + "step": 7906 + }, + { + "epoch": 0.2171059857221307, + "grad_norm": 0.3258741796016693, + "learning_rate": 1.943705906542177e-05, + "loss": 0.5146, + "step": 7907 + }, + { + "epoch": 0.2171334431630972, + "grad_norm": 0.3595561385154724, + "learning_rate": 1.9436916192468126e-05, + "loss": 0.5377, + "step": 7908 + }, + { + "epoch": 0.2171609006040637, + "grad_norm": 0.49214884638786316, + "learning_rate": 1.9436773301911622e-05, + "loss": 0.6015, + "step": 7909 + }, + { + "epoch": 0.2171883580450302, + "grad_norm": 0.3790237605571747, + "learning_rate": 1.9436630393752524e-05, + "loss": 0.4787, + "step": 7910 + }, + { + "epoch": 0.2172158154859967, + "grad_norm": 0.36321330070495605, + "learning_rate": 1.9436487467991094e-05, + "loss": 0.53, + "step": 7911 + }, + { + "epoch": 0.21724327292696322, + "grad_norm": 0.3699352741241455, + "learning_rate": 1.9436344524627606e-05, + "loss": 0.4051, + "step": 7912 + }, + { + "epoch": 0.2172707303679297, + "grad_norm": 0.3265607953071594, + "learning_rate": 1.9436201563662328e-05, + "loss": 0.4663, + "step": 7913 + }, + { + "epoch": 0.2172981878088962, + "grad_norm": 0.38447192311286926, + "learning_rate": 1.9436058585095513e-05, + "loss": 0.6017, + "step": 7914 + }, + { + "epoch": 0.21732564524986273, + "grad_norm": 0.3323126435279846, + "learning_rate": 1.9435915588927445e-05, + "loss": 0.5288, + "step": 7915 + }, + { + "epoch": 0.21735310269082922, + "grad_norm": 0.4829493463039398, + "learning_rate": 1.943577257515838e-05, + "loss": 0.6018, + "step": 7916 + }, + { + "epoch": 0.2173805601317957, + "grad_norm": 0.32431560754776, + "learning_rate": 1.943562954378859e-05, + "loss": 0.4971, + "step": 7917 + }, + { + "epoch": 0.2174080175727622, + "grad_norm": 0.3597950041294098, + "learning_rate": 1.9435486494818337e-05, + "loss": 0.6569, + "step": 7918 + }, + { + "epoch": 0.21743547501372873, + "grad_norm": 0.34114962816238403, + "learning_rate": 1.943534342824789e-05, + "loss": 0.5559, + "step": 7919 + }, + { + "epoch": 0.21746293245469522, + "grad_norm": 0.3888196051120758, + "learning_rate": 1.9435200344077512e-05, + "loss": 0.5553, + "step": 7920 + }, + { + "epoch": 0.21749038989566172, + "grad_norm": 0.34974297881126404, + "learning_rate": 1.943505724230748e-05, + "loss": 0.5862, + "step": 7921 + }, + { + "epoch": 0.21751784733662824, + "grad_norm": 0.41655224561691284, + "learning_rate": 1.9434914122938055e-05, + "loss": 0.5174, + "step": 7922 + }, + { + "epoch": 0.21754530477759473, + "grad_norm": 0.36801937222480774, + "learning_rate": 1.9434770985969502e-05, + "loss": 0.5648, + "step": 7923 + }, + { + "epoch": 0.21757276221856123, + "grad_norm": 0.3897433578968048, + "learning_rate": 1.9434627831402092e-05, + "loss": 0.4758, + "step": 7924 + }, + { + "epoch": 0.21760021965952772, + "grad_norm": 0.36516886949539185, + "learning_rate": 1.9434484659236088e-05, + "loss": 0.5588, + "step": 7925 + }, + { + "epoch": 0.21762767710049424, + "grad_norm": 0.3792569041252136, + "learning_rate": 1.943434146947176e-05, + "loss": 0.5564, + "step": 7926 + }, + { + "epoch": 0.21765513454146074, + "grad_norm": 0.40502235293388367, + "learning_rate": 1.943419826210937e-05, + "loss": 0.6092, + "step": 7927 + }, + { + "epoch": 0.21768259198242723, + "grad_norm": 0.43261590600013733, + "learning_rate": 1.9434055037149192e-05, + "loss": 0.4696, + "step": 7928 + }, + { + "epoch": 0.21771004942339375, + "grad_norm": 0.36214739084243774, + "learning_rate": 1.943391179459149e-05, + "loss": 0.518, + "step": 7929 + }, + { + "epoch": 0.21773750686436025, + "grad_norm": 0.36184144020080566, + "learning_rate": 1.9433768534436535e-05, + "loss": 0.5994, + "step": 7930 + }, + { + "epoch": 0.21776496430532674, + "grad_norm": 0.41555866599082947, + "learning_rate": 1.9433625256684588e-05, + "loss": 0.4974, + "step": 7931 + }, + { + "epoch": 0.21779242174629324, + "grad_norm": 0.3799848258495331, + "learning_rate": 1.9433481961335918e-05, + "loss": 0.5232, + "step": 7932 + }, + { + "epoch": 0.21781987918725976, + "grad_norm": 0.34598562121391296, + "learning_rate": 1.9433338648390795e-05, + "loss": 0.5145, + "step": 7933 + }, + { + "epoch": 0.21784733662822625, + "grad_norm": 0.3671753704547882, + "learning_rate": 1.9433195317849485e-05, + "loss": 0.6042, + "step": 7934 + }, + { + "epoch": 0.21787479406919275, + "grad_norm": 0.3436928689479828, + "learning_rate": 1.9433051969712254e-05, + "loss": 0.5384, + "step": 7935 + }, + { + "epoch": 0.21790225151015924, + "grad_norm": 0.3844831585884094, + "learning_rate": 1.9432908603979367e-05, + "loss": 0.6113, + "step": 7936 + }, + { + "epoch": 0.21792970895112576, + "grad_norm": 0.3374062776565552, + "learning_rate": 1.94327652206511e-05, + "loss": 0.5226, + "step": 7937 + }, + { + "epoch": 0.21795716639209226, + "grad_norm": 0.3655458092689514, + "learning_rate": 1.9432621819727714e-05, + "loss": 0.6314, + "step": 7938 + }, + { + "epoch": 0.21798462383305875, + "grad_norm": 0.34000229835510254, + "learning_rate": 1.9432478401209475e-05, + "loss": 0.3956, + "step": 7939 + }, + { + "epoch": 0.21801208127402527, + "grad_norm": 0.36685314774513245, + "learning_rate": 1.9432334965096658e-05, + "loss": 0.5144, + "step": 7940 + }, + { + "epoch": 0.21803953871499177, + "grad_norm": 0.3515413999557495, + "learning_rate": 1.9432191511389524e-05, + "loss": 0.4726, + "step": 7941 + }, + { + "epoch": 0.21806699615595826, + "grad_norm": 0.35673612356185913, + "learning_rate": 1.943204804008834e-05, + "loss": 0.5079, + "step": 7942 + }, + { + "epoch": 0.21809445359692475, + "grad_norm": 0.39238718152046204, + "learning_rate": 1.943190455119338e-05, + "loss": 0.5358, + "step": 7943 + }, + { + "epoch": 0.21812191103789128, + "grad_norm": 0.3443711996078491, + "learning_rate": 1.9431761044704904e-05, + "loss": 0.5929, + "step": 7944 + }, + { + "epoch": 0.21814936847885777, + "grad_norm": 0.35231325030326843, + "learning_rate": 1.9431617520623185e-05, + "loss": 0.4959, + "step": 7945 + }, + { + "epoch": 0.21817682591982426, + "grad_norm": 0.39489567279815674, + "learning_rate": 1.943147397894849e-05, + "loss": 0.6586, + "step": 7946 + }, + { + "epoch": 0.21820428336079079, + "grad_norm": 0.3550316095352173, + "learning_rate": 1.943133041968108e-05, + "loss": 0.5165, + "step": 7947 + }, + { + "epoch": 0.21823174080175728, + "grad_norm": 0.43636754155158997, + "learning_rate": 1.9431186842821238e-05, + "loss": 0.6744, + "step": 7948 + }, + { + "epoch": 0.21825919824272377, + "grad_norm": 0.3834889829158783, + "learning_rate": 1.943104324836922e-05, + "loss": 0.5526, + "step": 7949 + }, + { + "epoch": 0.21828665568369027, + "grad_norm": 0.7978171110153198, + "learning_rate": 1.943089963632529e-05, + "loss": 0.6308, + "step": 7950 + }, + { + "epoch": 0.2183141131246568, + "grad_norm": 0.43618106842041016, + "learning_rate": 1.9430756006689727e-05, + "loss": 0.5239, + "step": 7951 + }, + { + "epoch": 0.21834157056562328, + "grad_norm": 0.34848111867904663, + "learning_rate": 1.9430612359462795e-05, + "loss": 0.546, + "step": 7952 + }, + { + "epoch": 0.21836902800658978, + "grad_norm": 0.3319282829761505, + "learning_rate": 1.943046869464476e-05, + "loss": 0.5227, + "step": 7953 + }, + { + "epoch": 0.2183964854475563, + "grad_norm": 0.38722464442253113, + "learning_rate": 1.943032501223589e-05, + "loss": 0.4816, + "step": 7954 + }, + { + "epoch": 0.2184239428885228, + "grad_norm": 0.3183745741844177, + "learning_rate": 1.9430181312236453e-05, + "loss": 0.4417, + "step": 7955 + }, + { + "epoch": 0.2184514003294893, + "grad_norm": 0.3960619866847992, + "learning_rate": 1.943003759464672e-05, + "loss": 0.507, + "step": 7956 + }, + { + "epoch": 0.21847885777045578, + "grad_norm": 0.3683792054653168, + "learning_rate": 1.942989385946696e-05, + "loss": 0.5329, + "step": 7957 + }, + { + "epoch": 0.2185063152114223, + "grad_norm": 0.35868459939956665, + "learning_rate": 1.9429750106697435e-05, + "loss": 0.5143, + "step": 7958 + }, + { + "epoch": 0.2185337726523888, + "grad_norm": 0.45949098467826843, + "learning_rate": 1.9429606336338417e-05, + "loss": 0.4886, + "step": 7959 + }, + { + "epoch": 0.2185612300933553, + "grad_norm": 0.3489017188549042, + "learning_rate": 1.942946254839017e-05, + "loss": 0.4686, + "step": 7960 + }, + { + "epoch": 0.2185886875343218, + "grad_norm": 0.38500669598579407, + "learning_rate": 1.942931874285297e-05, + "loss": 0.5871, + "step": 7961 + }, + { + "epoch": 0.2186161449752883, + "grad_norm": 0.3436225950717926, + "learning_rate": 1.942917491972708e-05, + "loss": 0.5024, + "step": 7962 + }, + { + "epoch": 0.2186436024162548, + "grad_norm": 0.4188995659351349, + "learning_rate": 1.942903107901277e-05, + "loss": 0.5545, + "step": 7963 + }, + { + "epoch": 0.2186710598572213, + "grad_norm": 0.35615140199661255, + "learning_rate": 1.9428887220710305e-05, + "loss": 0.5169, + "step": 7964 + }, + { + "epoch": 0.21869851729818782, + "grad_norm": 0.37379491329193115, + "learning_rate": 1.942874334481996e-05, + "loss": 0.522, + "step": 7965 + }, + { + "epoch": 0.2187259747391543, + "grad_norm": 0.3643708825111389, + "learning_rate": 1.9428599451341997e-05, + "loss": 0.6121, + "step": 7966 + }, + { + "epoch": 0.2187534321801208, + "grad_norm": 0.38752424716949463, + "learning_rate": 1.9428455540276688e-05, + "loss": 0.5857, + "step": 7967 + }, + { + "epoch": 0.21878088962108733, + "grad_norm": 0.35472923517227173, + "learning_rate": 1.94283116116243e-05, + "loss": 0.4401, + "step": 7968 + }, + { + "epoch": 0.21880834706205382, + "grad_norm": 0.38868609070777893, + "learning_rate": 1.94281676653851e-05, + "loss": 0.6214, + "step": 7969 + }, + { + "epoch": 0.21883580450302031, + "grad_norm": 0.41988715529441833, + "learning_rate": 1.942802370155936e-05, + "loss": 0.615, + "step": 7970 + }, + { + "epoch": 0.2188632619439868, + "grad_norm": 0.38401252031326294, + "learning_rate": 1.9427879720147347e-05, + "loss": 0.5053, + "step": 7971 + }, + { + "epoch": 0.21889071938495333, + "grad_norm": 0.36533430218696594, + "learning_rate": 1.942773572114933e-05, + "loss": 0.5848, + "step": 7972 + }, + { + "epoch": 0.21891817682591982, + "grad_norm": 0.39511051774024963, + "learning_rate": 1.9427591704565572e-05, + "loss": 0.4584, + "step": 7973 + }, + { + "epoch": 0.21894563426688632, + "grad_norm": 0.40671393275260925, + "learning_rate": 1.942744767039635e-05, + "loss": 0.5543, + "step": 7974 + }, + { + "epoch": 0.21897309170785284, + "grad_norm": 0.349764347076416, + "learning_rate": 1.942730361864193e-05, + "loss": 0.5454, + "step": 7975 + }, + { + "epoch": 0.21900054914881933, + "grad_norm": 0.3443093001842499, + "learning_rate": 1.9427159549302578e-05, + "loss": 0.5439, + "step": 7976 + }, + { + "epoch": 0.21902800658978583, + "grad_norm": 0.3339177966117859, + "learning_rate": 1.9427015462378565e-05, + "loss": 0.6097, + "step": 7977 + }, + { + "epoch": 0.21905546403075232, + "grad_norm": 0.44275394082069397, + "learning_rate": 1.942687135787016e-05, + "loss": 0.5317, + "step": 7978 + }, + { + "epoch": 0.21908292147171884, + "grad_norm": 0.4820781648159027, + "learning_rate": 1.942672723577763e-05, + "loss": 0.6381, + "step": 7979 + }, + { + "epoch": 0.21911037891268534, + "grad_norm": 0.34550103545188904, + "learning_rate": 1.9426583096101246e-05, + "loss": 0.5536, + "step": 7980 + }, + { + "epoch": 0.21913783635365183, + "grad_norm": 0.4455774426460266, + "learning_rate": 1.9426438938841278e-05, + "loss": 0.4805, + "step": 7981 + }, + { + "epoch": 0.21916529379461835, + "grad_norm": 0.4327555000782013, + "learning_rate": 1.942629476399799e-05, + "loss": 0.6124, + "step": 7982 + }, + { + "epoch": 0.21919275123558485, + "grad_norm": 0.3599054217338562, + "learning_rate": 1.9426150571571653e-05, + "loss": 0.669, + "step": 7983 + }, + { + "epoch": 0.21922020867655134, + "grad_norm": 0.3785346448421478, + "learning_rate": 1.942600636156254e-05, + "loss": 0.5789, + "step": 7984 + }, + { + "epoch": 0.21924766611751784, + "grad_norm": 0.382394939661026, + "learning_rate": 1.9425862133970914e-05, + "loss": 0.5672, + "step": 7985 + }, + { + "epoch": 0.21927512355848436, + "grad_norm": 0.3413185775279999, + "learning_rate": 1.9425717888797048e-05, + "loss": 0.4851, + "step": 7986 + }, + { + "epoch": 0.21930258099945085, + "grad_norm": 0.3566800653934479, + "learning_rate": 1.942557362604121e-05, + "loss": 0.4765, + "step": 7987 + }, + { + "epoch": 0.21933003844041735, + "grad_norm": 0.39806798100471497, + "learning_rate": 1.9425429345703668e-05, + "loss": 0.5156, + "step": 7988 + }, + { + "epoch": 0.21935749588138387, + "grad_norm": 0.35243725776672363, + "learning_rate": 1.9425285047784694e-05, + "loss": 0.5506, + "step": 7989 + }, + { + "epoch": 0.21938495332235036, + "grad_norm": 0.356689453125, + "learning_rate": 1.942514073228455e-05, + "loss": 0.5237, + "step": 7990 + }, + { + "epoch": 0.21941241076331686, + "grad_norm": 0.3501352071762085, + "learning_rate": 1.9424996399203518e-05, + "loss": 0.5511, + "step": 7991 + }, + { + "epoch": 0.21943986820428335, + "grad_norm": 0.3923778235912323, + "learning_rate": 1.9424852048541852e-05, + "loss": 0.5639, + "step": 7992 + }, + { + "epoch": 0.21946732564524987, + "grad_norm": 0.36888450384140015, + "learning_rate": 1.9424707680299834e-05, + "loss": 0.4783, + "step": 7993 + }, + { + "epoch": 0.21949478308621637, + "grad_norm": 0.36135852336883545, + "learning_rate": 1.942456329447773e-05, + "loss": 0.558, + "step": 7994 + }, + { + "epoch": 0.21952224052718286, + "grad_norm": 0.3470737934112549, + "learning_rate": 1.9424418891075803e-05, + "loss": 0.5454, + "step": 7995 + }, + { + "epoch": 0.21954969796814938, + "grad_norm": 0.45633184909820557, + "learning_rate": 1.9424274470094326e-05, + "loss": 0.5677, + "step": 7996 + }, + { + "epoch": 0.21957715540911588, + "grad_norm": 0.3937956392765045, + "learning_rate": 1.942413003153357e-05, + "loss": 0.6229, + "step": 7997 + }, + { + "epoch": 0.21960461285008237, + "grad_norm": 0.39656075835227966, + "learning_rate": 1.9423985575393806e-05, + "loss": 0.4988, + "step": 7998 + }, + { + "epoch": 0.21963207029104886, + "grad_norm": 0.4055325984954834, + "learning_rate": 1.94238411016753e-05, + "loss": 0.5542, + "step": 7999 + }, + { + "epoch": 0.21965952773201539, + "grad_norm": 0.34276989102363586, + "learning_rate": 1.9423696610378322e-05, + "loss": 0.4681, + "step": 8000 + }, + { + "epoch": 0.21968698517298188, + "grad_norm": 0.36218729615211487, + "learning_rate": 1.9423552101503143e-05, + "loss": 0.5736, + "step": 8001 + }, + { + "epoch": 0.21971444261394837, + "grad_norm": 0.3875357210636139, + "learning_rate": 1.942340757505003e-05, + "loss": 0.5509, + "step": 8002 + }, + { + "epoch": 0.21974190005491487, + "grad_norm": 0.33614692091941833, + "learning_rate": 1.9423263031019258e-05, + "loss": 0.4746, + "step": 8003 + }, + { + "epoch": 0.2197693574958814, + "grad_norm": 0.3934895396232605, + "learning_rate": 1.942311846941109e-05, + "loss": 0.5298, + "step": 8004 + }, + { + "epoch": 0.21979681493684788, + "grad_norm": 0.3472811281681061, + "learning_rate": 1.94229738902258e-05, + "loss": 0.484, + "step": 8005 + }, + { + "epoch": 0.21982427237781438, + "grad_norm": 0.3462182879447937, + "learning_rate": 1.9422829293463654e-05, + "loss": 0.4763, + "step": 8006 + }, + { + "epoch": 0.2198517298187809, + "grad_norm": 0.37922170758247375, + "learning_rate": 1.9422684679124924e-05, + "loss": 0.6223, + "step": 8007 + }, + { + "epoch": 0.2198791872597474, + "grad_norm": 0.37897342443466187, + "learning_rate": 1.9422540047209885e-05, + "loss": 0.507, + "step": 8008 + }, + { + "epoch": 0.2199066447007139, + "grad_norm": 0.4368216395378113, + "learning_rate": 1.9422395397718796e-05, + "loss": 0.6185, + "step": 8009 + }, + { + "epoch": 0.21993410214168038, + "grad_norm": 0.3697028160095215, + "learning_rate": 1.9422250730651933e-05, + "loss": 0.5036, + "step": 8010 + }, + { + "epoch": 0.2199615595826469, + "grad_norm": 0.4216284453868866, + "learning_rate": 1.9422106046009567e-05, + "loss": 0.542, + "step": 8011 + }, + { + "epoch": 0.2199890170236134, + "grad_norm": 0.4419436454772949, + "learning_rate": 1.9421961343791965e-05, + "loss": 0.511, + "step": 8012 + }, + { + "epoch": 0.2200164744645799, + "grad_norm": 0.39433273673057556, + "learning_rate": 1.9421816623999396e-05, + "loss": 0.5903, + "step": 8013 + }, + { + "epoch": 0.2200439319055464, + "grad_norm": 0.35068029165267944, + "learning_rate": 1.9421671886632135e-05, + "loss": 0.5622, + "step": 8014 + }, + { + "epoch": 0.2200713893465129, + "grad_norm": 0.35394105315208435, + "learning_rate": 1.9421527131690446e-05, + "loss": 0.5761, + "step": 8015 + }, + { + "epoch": 0.2200988467874794, + "grad_norm": 0.3729163408279419, + "learning_rate": 1.94213823591746e-05, + "loss": 0.4959, + "step": 8016 + }, + { + "epoch": 0.2201263042284459, + "grad_norm": 0.38093140721321106, + "learning_rate": 1.9421237569084877e-05, + "loss": 0.463, + "step": 8017 + }, + { + "epoch": 0.22015376166941242, + "grad_norm": 0.34641382098197937, + "learning_rate": 1.9421092761421532e-05, + "loss": 0.4343, + "step": 8018 + }, + { + "epoch": 0.2201812191103789, + "grad_norm": 0.38729119300842285, + "learning_rate": 1.9420947936184846e-05, + "loss": 0.6339, + "step": 8019 + }, + { + "epoch": 0.2202086765513454, + "grad_norm": 0.37823647260665894, + "learning_rate": 1.9420803093375086e-05, + "loss": 0.5138, + "step": 8020 + }, + { + "epoch": 0.22023613399231193, + "grad_norm": 0.3326253890991211, + "learning_rate": 1.942065823299252e-05, + "loss": 0.4414, + "step": 8021 + }, + { + "epoch": 0.22026359143327842, + "grad_norm": 0.3608744144439697, + "learning_rate": 1.9420513355037414e-05, + "loss": 0.5158, + "step": 8022 + }, + { + "epoch": 0.22029104887424492, + "grad_norm": 0.3428604006767273, + "learning_rate": 1.942036845951005e-05, + "loss": 0.4724, + "step": 8023 + }, + { + "epoch": 0.2203185063152114, + "grad_norm": 0.35999351739883423, + "learning_rate": 1.9420223546410692e-05, + "loss": 0.5302, + "step": 8024 + }, + { + "epoch": 0.22034596375617793, + "grad_norm": 0.3220391869544983, + "learning_rate": 1.942007861573961e-05, + "loss": 0.4553, + "step": 8025 + }, + { + "epoch": 0.22037342119714443, + "grad_norm": 0.3695027828216553, + "learning_rate": 1.9419933667497074e-05, + "loss": 0.5158, + "step": 8026 + }, + { + "epoch": 0.22040087863811092, + "grad_norm": 0.3768795430660248, + "learning_rate": 1.9419788701683358e-05, + "loss": 0.6201, + "step": 8027 + }, + { + "epoch": 0.22042833607907744, + "grad_norm": 0.3422100245952606, + "learning_rate": 1.9419643718298726e-05, + "loss": 0.531, + "step": 8028 + }, + { + "epoch": 0.22045579352004394, + "grad_norm": 0.3567357361316681, + "learning_rate": 1.9419498717343455e-05, + "loss": 0.5153, + "step": 8029 + }, + { + "epoch": 0.22048325096101043, + "grad_norm": 0.3580523431301117, + "learning_rate": 1.9419353698817808e-05, + "loss": 0.5734, + "step": 8030 + }, + { + "epoch": 0.22051070840197692, + "grad_norm": 0.3664233982563019, + "learning_rate": 1.9419208662722065e-05, + "loss": 0.5356, + "step": 8031 + }, + { + "epoch": 0.22053816584294345, + "grad_norm": 0.33101823925971985, + "learning_rate": 1.941906360905649e-05, + "loss": 0.4896, + "step": 8032 + }, + { + "epoch": 0.22056562328390994, + "grad_norm": 0.3464110791683197, + "learning_rate": 1.9418918537821355e-05, + "loss": 0.4816, + "step": 8033 + }, + { + "epoch": 0.22059308072487643, + "grad_norm": 0.35937443375587463, + "learning_rate": 1.941877344901693e-05, + "loss": 0.5487, + "step": 8034 + }, + { + "epoch": 0.22062053816584296, + "grad_norm": 0.3409962058067322, + "learning_rate": 1.9418628342643486e-05, + "loss": 0.5046, + "step": 8035 + }, + { + "epoch": 0.22064799560680945, + "grad_norm": 0.3625980615615845, + "learning_rate": 1.9418483218701295e-05, + "loss": 0.5077, + "step": 8036 + }, + { + "epoch": 0.22067545304777594, + "grad_norm": 0.39992594718933105, + "learning_rate": 1.9418338077190628e-05, + "loss": 0.5232, + "step": 8037 + }, + { + "epoch": 0.22070291048874244, + "grad_norm": 0.41398885846138, + "learning_rate": 1.9418192918111752e-05, + "loss": 0.6121, + "step": 8038 + }, + { + "epoch": 0.22073036792970896, + "grad_norm": 0.40345078706741333, + "learning_rate": 1.9418047741464942e-05, + "loss": 0.6386, + "step": 8039 + }, + { + "epoch": 0.22075782537067545, + "grad_norm": 0.3615337908267975, + "learning_rate": 1.9417902547250465e-05, + "loss": 0.4559, + "step": 8040 + }, + { + "epoch": 0.22078528281164195, + "grad_norm": 0.364635169506073, + "learning_rate": 1.9417757335468596e-05, + "loss": 0.5159, + "step": 8041 + }, + { + "epoch": 0.22081274025260847, + "grad_norm": 0.43943163752555847, + "learning_rate": 1.9417612106119606e-05, + "loss": 0.5454, + "step": 8042 + }, + { + "epoch": 0.22084019769357496, + "grad_norm": 0.3705514967441559, + "learning_rate": 1.941746685920376e-05, + "loss": 0.5734, + "step": 8043 + }, + { + "epoch": 0.22086765513454146, + "grad_norm": 0.37002331018447876, + "learning_rate": 1.9417321594721333e-05, + "loss": 0.5527, + "step": 8044 + }, + { + "epoch": 0.22089511257550795, + "grad_norm": 0.3963075280189514, + "learning_rate": 1.9417176312672596e-05, + "loss": 0.5722, + "step": 8045 + }, + { + "epoch": 0.22092257001647447, + "grad_norm": 0.3318404257297516, + "learning_rate": 1.941703101305782e-05, + "loss": 0.4843, + "step": 8046 + }, + { + "epoch": 0.22095002745744097, + "grad_norm": 1.0186270475387573, + "learning_rate": 1.9416885695877274e-05, + "loss": 0.5053, + "step": 8047 + }, + { + "epoch": 0.22097748489840746, + "grad_norm": 0.4562040865421295, + "learning_rate": 1.9416740361131233e-05, + "loss": 0.5391, + "step": 8048 + }, + { + "epoch": 0.22100494233937398, + "grad_norm": 0.41872820258140564, + "learning_rate": 1.9416595008819964e-05, + "loss": 0.5594, + "step": 8049 + }, + { + "epoch": 0.22103239978034048, + "grad_norm": 0.34873166680336, + "learning_rate": 1.941644963894374e-05, + "loss": 0.528, + "step": 8050 + }, + { + "epoch": 0.22105985722130697, + "grad_norm": 0.5339556932449341, + "learning_rate": 1.9416304251502832e-05, + "loss": 0.6856, + "step": 8051 + }, + { + "epoch": 0.22108731466227347, + "grad_norm": 0.39145246148109436, + "learning_rate": 1.9416158846497515e-05, + "loss": 0.6462, + "step": 8052 + }, + { + "epoch": 0.22111477210324, + "grad_norm": 0.3652844727039337, + "learning_rate": 1.9416013423928054e-05, + "loss": 0.5627, + "step": 8053 + }, + { + "epoch": 0.22114222954420648, + "grad_norm": 0.33739838004112244, + "learning_rate": 1.9415867983794722e-05, + "loss": 0.4182, + "step": 8054 + }, + { + "epoch": 0.22116968698517298, + "grad_norm": 0.3650475740432739, + "learning_rate": 1.941572252609779e-05, + "loss": 0.529, + "step": 8055 + }, + { + "epoch": 0.2211971444261395, + "grad_norm": 0.3355696499347687, + "learning_rate": 1.941557705083753e-05, + "loss": 0.5541, + "step": 8056 + }, + { + "epoch": 0.221224601867106, + "grad_norm": 0.32872310280799866, + "learning_rate": 1.9415431558014215e-05, + "loss": 0.5699, + "step": 8057 + }, + { + "epoch": 0.22125205930807249, + "grad_norm": 0.36793744564056396, + "learning_rate": 1.9415286047628116e-05, + "loss": 0.5362, + "step": 8058 + }, + { + "epoch": 0.22127951674903898, + "grad_norm": 0.358155757188797, + "learning_rate": 1.9415140519679503e-05, + "loss": 0.5759, + "step": 8059 + }, + { + "epoch": 0.2213069741900055, + "grad_norm": 0.32244130969047546, + "learning_rate": 1.9414994974168645e-05, + "loss": 0.4608, + "step": 8060 + }, + { + "epoch": 0.221334431630972, + "grad_norm": 0.3774900734424591, + "learning_rate": 1.941484941109582e-05, + "loss": 0.5871, + "step": 8061 + }, + { + "epoch": 0.2213618890719385, + "grad_norm": 0.3488394618034363, + "learning_rate": 1.9414703830461294e-05, + "loss": 0.5162, + "step": 8062 + }, + { + "epoch": 0.221389346512905, + "grad_norm": 0.37092679738998413, + "learning_rate": 1.9414558232265342e-05, + "loss": 0.524, + "step": 8063 + }, + { + "epoch": 0.2214168039538715, + "grad_norm": 0.356804221868515, + "learning_rate": 1.9414412616508236e-05, + "loss": 0.5263, + "step": 8064 + }, + { + "epoch": 0.221444261394838, + "grad_norm": 0.39769163727760315, + "learning_rate": 1.9414266983190243e-05, + "loss": 0.5771, + "step": 8065 + }, + { + "epoch": 0.2214717188358045, + "grad_norm": 0.41833820939064026, + "learning_rate": 1.9414121332311637e-05, + "loss": 0.6255, + "step": 8066 + }, + { + "epoch": 0.22149917627677101, + "grad_norm": 0.3350055515766144, + "learning_rate": 1.941397566387269e-05, + "loss": 0.5677, + "step": 8067 + }, + { + "epoch": 0.2215266337177375, + "grad_norm": 0.3791565001010895, + "learning_rate": 1.9413829977873676e-05, + "loss": 0.5524, + "step": 8068 + }, + { + "epoch": 0.221554091158704, + "grad_norm": 0.33183881640434265, + "learning_rate": 1.9413684274314862e-05, + "loss": 0.5177, + "step": 8069 + }, + { + "epoch": 0.2215815485996705, + "grad_norm": 0.4252963066101074, + "learning_rate": 1.9413538553196524e-05, + "loss": 0.5096, + "step": 8070 + }, + { + "epoch": 0.22160900604063702, + "grad_norm": 0.384536474943161, + "learning_rate": 1.941339281451893e-05, + "loss": 0.5441, + "step": 8071 + }, + { + "epoch": 0.2216364634816035, + "grad_norm": 0.35665157437324524, + "learning_rate": 1.941324705828236e-05, + "loss": 0.5445, + "step": 8072 + }, + { + "epoch": 0.22166392092257, + "grad_norm": 0.35787519812583923, + "learning_rate": 1.9413101284487072e-05, + "loss": 0.5839, + "step": 8073 + }, + { + "epoch": 0.22169137836353653, + "grad_norm": 0.40504369139671326, + "learning_rate": 1.9412955493133348e-05, + "loss": 0.4853, + "step": 8074 + }, + { + "epoch": 0.22171883580450302, + "grad_norm": 0.34215256571769714, + "learning_rate": 1.941280968422146e-05, + "loss": 0.5166, + "step": 8075 + }, + { + "epoch": 0.22174629324546952, + "grad_norm": 0.34900760650634766, + "learning_rate": 1.941266385775168e-05, + "loss": 0.5137, + "step": 8076 + }, + { + "epoch": 0.221773750686436, + "grad_norm": 0.33578115701675415, + "learning_rate": 1.941251801372427e-05, + "loss": 0.5322, + "step": 8077 + }, + { + "epoch": 0.22180120812740253, + "grad_norm": 0.379616379737854, + "learning_rate": 1.941237215213952e-05, + "loss": 0.4597, + "step": 8078 + }, + { + "epoch": 0.22182866556836903, + "grad_norm": 0.34200215339660645, + "learning_rate": 1.9412226272997684e-05, + "loss": 0.5655, + "step": 8079 + }, + { + "epoch": 0.22185612300933552, + "grad_norm": 0.372405081987381, + "learning_rate": 1.9412080376299044e-05, + "loss": 0.5579, + "step": 8080 + }, + { + "epoch": 0.22188358045030204, + "grad_norm": 0.3557495176792145, + "learning_rate": 1.941193446204387e-05, + "loss": 0.5228, + "step": 8081 + }, + { + "epoch": 0.22191103789126854, + "grad_norm": 0.3343425989151001, + "learning_rate": 1.9411788530232433e-05, + "loss": 0.4215, + "step": 8082 + }, + { + "epoch": 0.22193849533223503, + "grad_norm": 0.40734240412712097, + "learning_rate": 1.9411642580865007e-05, + "loss": 0.4984, + "step": 8083 + }, + { + "epoch": 0.22196595277320152, + "grad_norm": 0.333194375038147, + "learning_rate": 1.941149661394187e-05, + "loss": 0.5733, + "step": 8084 + }, + { + "epoch": 0.22199341021416805, + "grad_norm": 0.3769598603248596, + "learning_rate": 1.941135062946328e-05, + "loss": 0.4221, + "step": 8085 + }, + { + "epoch": 0.22202086765513454, + "grad_norm": 0.3758370876312256, + "learning_rate": 1.9411204627429517e-05, + "loss": 0.4936, + "step": 8086 + }, + { + "epoch": 0.22204832509610103, + "grad_norm": 0.3733516037464142, + "learning_rate": 1.9411058607840858e-05, + "loss": 0.5278, + "step": 8087 + }, + { + "epoch": 0.22207578253706756, + "grad_norm": 0.44331783056259155, + "learning_rate": 1.941091257069757e-05, + "loss": 0.561, + "step": 8088 + }, + { + "epoch": 0.22210323997803405, + "grad_norm": 0.3653470575809479, + "learning_rate": 1.941076651599993e-05, + "loss": 0.5397, + "step": 8089 + }, + { + "epoch": 0.22213069741900054, + "grad_norm": 0.44359949231147766, + "learning_rate": 1.94106204437482e-05, + "loss": 0.4615, + "step": 8090 + }, + { + "epoch": 0.22215815485996704, + "grad_norm": 0.35146793723106384, + "learning_rate": 1.9410474353942663e-05, + "loss": 0.5512, + "step": 8091 + }, + { + "epoch": 0.22218561230093356, + "grad_norm": 0.3815726637840271, + "learning_rate": 1.9410328246583586e-05, + "loss": 0.5272, + "step": 8092 + }, + { + "epoch": 0.22221306974190005, + "grad_norm": 0.3618924617767334, + "learning_rate": 1.9410182121671245e-05, + "loss": 0.5908, + "step": 8093 + }, + { + "epoch": 0.22224052718286655, + "grad_norm": 0.3695196211338043, + "learning_rate": 1.941003597920591e-05, + "loss": 0.611, + "step": 8094 + }, + { + "epoch": 0.22226798462383307, + "grad_norm": 0.4186045527458191, + "learning_rate": 1.9409889819187856e-05, + "loss": 0.5999, + "step": 8095 + }, + { + "epoch": 0.22229544206479956, + "grad_norm": 0.3763331174850464, + "learning_rate": 1.9409743641617353e-05, + "loss": 0.5191, + "step": 8096 + }, + { + "epoch": 0.22232289950576606, + "grad_norm": 0.3983600437641144, + "learning_rate": 1.9409597446494678e-05, + "loss": 0.5703, + "step": 8097 + }, + { + "epoch": 0.22235035694673255, + "grad_norm": 0.39477670192718506, + "learning_rate": 1.9409451233820098e-05, + "loss": 0.5549, + "step": 8098 + }, + { + "epoch": 0.22237781438769907, + "grad_norm": 0.369409441947937, + "learning_rate": 1.9409305003593887e-05, + "loss": 0.4906, + "step": 8099 + }, + { + "epoch": 0.22240527182866557, + "grad_norm": 0.3776870369911194, + "learning_rate": 1.940915875581632e-05, + "loss": 0.5514, + "step": 8100 + }, + { + "epoch": 0.22243272926963206, + "grad_norm": 0.5322780609130859, + "learning_rate": 1.940901249048767e-05, + "loss": 0.5302, + "step": 8101 + }, + { + "epoch": 0.22246018671059858, + "grad_norm": 1.0888235569000244, + "learning_rate": 1.9408866207608206e-05, + "loss": 0.5463, + "step": 8102 + }, + { + "epoch": 0.22248764415156508, + "grad_norm": 0.37611550092697144, + "learning_rate": 1.9408719907178207e-05, + "loss": 0.5496, + "step": 8103 + }, + { + "epoch": 0.22251510159253157, + "grad_norm": 0.41753020882606506, + "learning_rate": 1.940857358919794e-05, + "loss": 0.5997, + "step": 8104 + }, + { + "epoch": 0.22254255903349807, + "grad_norm": 0.3216085135936737, + "learning_rate": 1.940842725366768e-05, + "loss": 0.4694, + "step": 8105 + }, + { + "epoch": 0.2225700164744646, + "grad_norm": 0.37054505944252014, + "learning_rate": 1.9408280900587704e-05, + "loss": 0.4918, + "step": 8106 + }, + { + "epoch": 0.22259747391543108, + "grad_norm": 0.34465891122817993, + "learning_rate": 1.9408134529958277e-05, + "loss": 0.466, + "step": 8107 + }, + { + "epoch": 0.22262493135639758, + "grad_norm": 0.37950631976127625, + "learning_rate": 1.940798814177968e-05, + "loss": 0.5828, + "step": 8108 + }, + { + "epoch": 0.2226523887973641, + "grad_norm": 0.34023621678352356, + "learning_rate": 1.9407841736052183e-05, + "loss": 0.4421, + "step": 8109 + }, + { + "epoch": 0.2226798462383306, + "grad_norm": 0.4022599160671234, + "learning_rate": 1.9407695312776054e-05, + "loss": 0.5185, + "step": 8110 + }, + { + "epoch": 0.2227073036792971, + "grad_norm": 0.3752758800983429, + "learning_rate": 1.940754887195157e-05, + "loss": 0.5481, + "step": 8111 + }, + { + "epoch": 0.22273476112026358, + "grad_norm": 0.3862849473953247, + "learning_rate": 1.940740241357901e-05, + "loss": 0.4948, + "step": 8112 + }, + { + "epoch": 0.2227622185612301, + "grad_norm": 0.3702285885810852, + "learning_rate": 1.940725593765864e-05, + "loss": 0.5327, + "step": 8113 + }, + { + "epoch": 0.2227896760021966, + "grad_norm": 0.35002318024635315, + "learning_rate": 1.9407109444190736e-05, + "loss": 0.5123, + "step": 8114 + }, + { + "epoch": 0.2228171334431631, + "grad_norm": 0.34253624081611633, + "learning_rate": 1.940696293317557e-05, + "loss": 0.5392, + "step": 8115 + }, + { + "epoch": 0.2228445908841296, + "grad_norm": 0.42128393054008484, + "learning_rate": 1.9406816404613415e-05, + "loss": 0.5638, + "step": 8116 + }, + { + "epoch": 0.2228720483250961, + "grad_norm": 0.40481773018836975, + "learning_rate": 1.9406669858504542e-05, + "loss": 0.5295, + "step": 8117 + }, + { + "epoch": 0.2228995057660626, + "grad_norm": 0.3671500086784363, + "learning_rate": 1.9406523294849232e-05, + "loss": 0.5792, + "step": 8118 + }, + { + "epoch": 0.2229269632070291, + "grad_norm": 0.4110405445098877, + "learning_rate": 1.940637671364775e-05, + "loss": 0.5985, + "step": 8119 + }, + { + "epoch": 0.22295442064799562, + "grad_norm": 0.3592608869075775, + "learning_rate": 1.940623011490038e-05, + "loss": 0.4866, + "step": 8120 + }, + { + "epoch": 0.2229818780889621, + "grad_norm": 0.3975278437137604, + "learning_rate": 1.9406083498607385e-05, + "loss": 0.5696, + "step": 8121 + }, + { + "epoch": 0.2230093355299286, + "grad_norm": 0.8165764212608337, + "learning_rate": 1.9405936864769043e-05, + "loss": 0.5169, + "step": 8122 + }, + { + "epoch": 0.22303679297089513, + "grad_norm": 0.3947144150733948, + "learning_rate": 1.9405790213385623e-05, + "loss": 0.4833, + "step": 8123 + }, + { + "epoch": 0.22306425041186162, + "grad_norm": 0.331704705953598, + "learning_rate": 1.9405643544457403e-05, + "loss": 0.4864, + "step": 8124 + }, + { + "epoch": 0.22309170785282811, + "grad_norm": 0.3743974566459656, + "learning_rate": 1.940549685798466e-05, + "loss": 0.6104, + "step": 8125 + }, + { + "epoch": 0.2231191652937946, + "grad_norm": 0.39051494002342224, + "learning_rate": 1.9405350153967658e-05, + "loss": 0.5959, + "step": 8126 + }, + { + "epoch": 0.22314662273476113, + "grad_norm": 0.34079813957214355, + "learning_rate": 1.9405203432406684e-05, + "loss": 0.4791, + "step": 8127 + }, + { + "epoch": 0.22317408017572762, + "grad_norm": 0.3683178126811981, + "learning_rate": 1.9405056693301998e-05, + "loss": 0.5776, + "step": 8128 + }, + { + "epoch": 0.22320153761669412, + "grad_norm": 0.307147353887558, + "learning_rate": 1.940490993665388e-05, + "loss": 0.4954, + "step": 8129 + }, + { + "epoch": 0.22322899505766064, + "grad_norm": 0.3666066527366638, + "learning_rate": 1.9404763162462603e-05, + "loss": 0.5417, + "step": 8130 + }, + { + "epoch": 0.22325645249862713, + "grad_norm": 0.3381190598011017, + "learning_rate": 1.9404616370728443e-05, + "loss": 0.4771, + "step": 8131 + }, + { + "epoch": 0.22328390993959363, + "grad_norm": 0.33160486817359924, + "learning_rate": 1.9404469561451668e-05, + "loss": 0.4289, + "step": 8132 + }, + { + "epoch": 0.22331136738056012, + "grad_norm": 0.4484253525733948, + "learning_rate": 1.9404322734632563e-05, + "loss": 0.4942, + "step": 8133 + }, + { + "epoch": 0.22333882482152664, + "grad_norm": 0.3430327773094177, + "learning_rate": 1.940417589027139e-05, + "loss": 0.4425, + "step": 8134 + }, + { + "epoch": 0.22336628226249314, + "grad_norm": 0.38501861691474915, + "learning_rate": 1.9404029028368426e-05, + "loss": 0.4384, + "step": 8135 + }, + { + "epoch": 0.22339373970345963, + "grad_norm": 0.3726552426815033, + "learning_rate": 1.9403882148923947e-05, + "loss": 0.5079, + "step": 8136 + }, + { + "epoch": 0.22342119714442613, + "grad_norm": 0.39679890871047974, + "learning_rate": 1.9403735251938228e-05, + "loss": 0.5707, + "step": 8137 + }, + { + "epoch": 0.22344865458539265, + "grad_norm": 0.33097171783447266, + "learning_rate": 1.9403588337411538e-05, + "loss": 0.4561, + "step": 8138 + }, + { + "epoch": 0.22347611202635914, + "grad_norm": 0.370498389005661, + "learning_rate": 1.9403441405344158e-05, + "loss": 0.559, + "step": 8139 + }, + { + "epoch": 0.22350356946732564, + "grad_norm": 0.3464675545692444, + "learning_rate": 1.9403294455736358e-05, + "loss": 0.5593, + "step": 8140 + }, + { + "epoch": 0.22353102690829216, + "grad_norm": 0.3214951455593109, + "learning_rate": 1.9403147488588414e-05, + "loss": 0.455, + "step": 8141 + }, + { + "epoch": 0.22355848434925865, + "grad_norm": 0.3698200583457947, + "learning_rate": 1.94030005039006e-05, + "loss": 0.4109, + "step": 8142 + }, + { + "epoch": 0.22358594179022515, + "grad_norm": 0.37049373984336853, + "learning_rate": 1.940285350167318e-05, + "loss": 0.5085, + "step": 8143 + }, + { + "epoch": 0.22361339923119164, + "grad_norm": 0.3983760178089142, + "learning_rate": 1.940270648190645e-05, + "loss": 0.5381, + "step": 8144 + }, + { + "epoch": 0.22364085667215816, + "grad_norm": 0.38429495692253113, + "learning_rate": 1.9402559444600663e-05, + "loss": 0.6073, + "step": 8145 + }, + { + "epoch": 0.22366831411312466, + "grad_norm": 0.3701890707015991, + "learning_rate": 1.9402412389756104e-05, + "loss": 0.5944, + "step": 8146 + }, + { + "epoch": 0.22369577155409115, + "grad_norm": 0.4375746548175812, + "learning_rate": 1.940226531737304e-05, + "loss": 0.6594, + "step": 8147 + }, + { + "epoch": 0.22372322899505767, + "grad_norm": 0.3941718339920044, + "learning_rate": 1.940211822745176e-05, + "loss": 0.6006, + "step": 8148 + }, + { + "epoch": 0.22375068643602417, + "grad_norm": 0.34504422545433044, + "learning_rate": 1.9401971119992523e-05, + "loss": 0.4602, + "step": 8149 + }, + { + "epoch": 0.22377814387699066, + "grad_norm": 0.3628522455692291, + "learning_rate": 1.9401823994995608e-05, + "loss": 0.4945, + "step": 8150 + }, + { + "epoch": 0.22380560131795715, + "grad_norm": 0.6254026293754578, + "learning_rate": 1.9401676852461292e-05, + "loss": 0.5244, + "step": 8151 + }, + { + "epoch": 0.22383305875892368, + "grad_norm": 0.3853122889995575, + "learning_rate": 1.940152969238985e-05, + "loss": 0.5559, + "step": 8152 + }, + { + "epoch": 0.22386051619989017, + "grad_norm": 0.40778622031211853, + "learning_rate": 1.940138251478155e-05, + "loss": 0.5079, + "step": 8153 + }, + { + "epoch": 0.22388797364085666, + "grad_norm": 0.3807878792285919, + "learning_rate": 1.9401235319636674e-05, + "loss": 0.5529, + "step": 8154 + }, + { + "epoch": 0.22391543108182319, + "grad_norm": 0.4939849078655243, + "learning_rate": 1.9401088106955494e-05, + "loss": 0.6086, + "step": 8155 + }, + { + "epoch": 0.22394288852278968, + "grad_norm": 0.5908517241477966, + "learning_rate": 1.9400940876738283e-05, + "loss": 0.5496, + "step": 8156 + }, + { + "epoch": 0.22397034596375617, + "grad_norm": 0.35418739914894104, + "learning_rate": 1.9400793628985318e-05, + "loss": 0.5532, + "step": 8157 + }, + { + "epoch": 0.22399780340472267, + "grad_norm": 0.8612314462661743, + "learning_rate": 1.9400646363696873e-05, + "loss": 0.5032, + "step": 8158 + }, + { + "epoch": 0.2240252608456892, + "grad_norm": 0.36516737937927246, + "learning_rate": 1.940049908087322e-05, + "loss": 0.4895, + "step": 8159 + }, + { + "epoch": 0.22405271828665568, + "grad_norm": 0.35089412331581116, + "learning_rate": 1.9400351780514638e-05, + "loss": 0.5366, + "step": 8160 + }, + { + "epoch": 0.22408017572762218, + "grad_norm": 0.37289080023765564, + "learning_rate": 1.9400204462621398e-05, + "loss": 0.5755, + "step": 8161 + }, + { + "epoch": 0.2241076331685887, + "grad_norm": 0.3228849470615387, + "learning_rate": 1.9400057127193777e-05, + "loss": 0.4443, + "step": 8162 + }, + { + "epoch": 0.2241350906095552, + "grad_norm": 0.35565316677093506, + "learning_rate": 1.939990977423205e-05, + "loss": 0.5044, + "step": 8163 + }, + { + "epoch": 0.2241625480505217, + "grad_norm": 0.3674179017543793, + "learning_rate": 1.9399762403736492e-05, + "loss": 0.5079, + "step": 8164 + }, + { + "epoch": 0.22419000549148818, + "grad_norm": 0.38229286670684814, + "learning_rate": 1.9399615015707373e-05, + "loss": 0.5112, + "step": 8165 + }, + { + "epoch": 0.2242174629324547, + "grad_norm": 0.4192439615726471, + "learning_rate": 1.9399467610144977e-05, + "loss": 0.5181, + "step": 8166 + }, + { + "epoch": 0.2242449203734212, + "grad_norm": 0.3487095534801483, + "learning_rate": 1.939932018704957e-05, + "loss": 0.5, + "step": 8167 + }, + { + "epoch": 0.2242723778143877, + "grad_norm": 0.3799746036529541, + "learning_rate": 1.9399172746421434e-05, + "loss": 0.5262, + "step": 8168 + }, + { + "epoch": 0.2242998352553542, + "grad_norm": 0.34884634613990784, + "learning_rate": 1.939902528826084e-05, + "loss": 0.4214, + "step": 8169 + }, + { + "epoch": 0.2243272926963207, + "grad_norm": 0.3416674733161926, + "learning_rate": 1.9398877812568063e-05, + "loss": 0.512, + "step": 8170 + }, + { + "epoch": 0.2243547501372872, + "grad_norm": 0.34479090571403503, + "learning_rate": 1.939873031934338e-05, + "loss": 0.577, + "step": 8171 + }, + { + "epoch": 0.2243822075782537, + "grad_norm": 0.3860377371311188, + "learning_rate": 1.9398582808587062e-05, + "loss": 0.6078, + "step": 8172 + }, + { + "epoch": 0.22440966501922022, + "grad_norm": 0.35651302337646484, + "learning_rate": 1.9398435280299394e-05, + "loss": 0.5435, + "step": 8173 + }, + { + "epoch": 0.2244371224601867, + "grad_norm": 0.33736810088157654, + "learning_rate": 1.9398287734480637e-05, + "loss": 0.4772, + "step": 8174 + }, + { + "epoch": 0.2244645799011532, + "grad_norm": 0.389241099357605, + "learning_rate": 1.939814017113108e-05, + "loss": 0.6196, + "step": 8175 + }, + { + "epoch": 0.22449203734211973, + "grad_norm": 0.3848176896572113, + "learning_rate": 1.939799259025099e-05, + "loss": 0.5439, + "step": 8176 + }, + { + "epoch": 0.22451949478308622, + "grad_norm": 0.3971244990825653, + "learning_rate": 1.9397844991840646e-05, + "loss": 0.6287, + "step": 8177 + }, + { + "epoch": 0.22454695222405271, + "grad_norm": 0.43334460258483887, + "learning_rate": 1.9397697375900314e-05, + "loss": 0.6153, + "step": 8178 + }, + { + "epoch": 0.2245744096650192, + "grad_norm": 0.35335347056388855, + "learning_rate": 1.9397549742430283e-05, + "loss": 0.5235, + "step": 8179 + }, + { + "epoch": 0.22460186710598573, + "grad_norm": 0.3384202718734741, + "learning_rate": 1.9397402091430827e-05, + "loss": 0.4728, + "step": 8180 + }, + { + "epoch": 0.22462932454695222, + "grad_norm": 0.3337043225765228, + "learning_rate": 1.9397254422902208e-05, + "loss": 0.4552, + "step": 8181 + }, + { + "epoch": 0.22465678198791872, + "grad_norm": 0.3568907678127289, + "learning_rate": 1.9397106736844716e-05, + "loss": 0.5303, + "step": 8182 + }, + { + "epoch": 0.22468423942888524, + "grad_norm": 0.32075825333595276, + "learning_rate": 1.939695903325862e-05, + "loss": 0.4616, + "step": 8183 + }, + { + "epoch": 0.22471169686985173, + "grad_norm": 0.3909532427787781, + "learning_rate": 1.939681131214419e-05, + "loss": 0.5744, + "step": 8184 + }, + { + "epoch": 0.22473915431081823, + "grad_norm": 0.4133884906768799, + "learning_rate": 1.9396663573501718e-05, + "loss": 0.5627, + "step": 8185 + }, + { + "epoch": 0.22476661175178472, + "grad_norm": 0.3536604344844818, + "learning_rate": 1.9396515817331463e-05, + "loss": 0.5931, + "step": 8186 + }, + { + "epoch": 0.22479406919275124, + "grad_norm": 0.35507336258888245, + "learning_rate": 1.9396368043633708e-05, + "loss": 0.5154, + "step": 8187 + }, + { + "epoch": 0.22482152663371774, + "grad_norm": 0.3671905994415283, + "learning_rate": 1.9396220252408727e-05, + "loss": 0.5213, + "step": 8188 + }, + { + "epoch": 0.22484898407468423, + "grad_norm": 0.36801913380622864, + "learning_rate": 1.93960724436568e-05, + "loss": 0.5455, + "step": 8189 + }, + { + "epoch": 0.22487644151565075, + "grad_norm": 0.3268343210220337, + "learning_rate": 1.9395924617378196e-05, + "loss": 0.4839, + "step": 8190 + }, + { + "epoch": 0.22490389895661725, + "grad_norm": 0.33030569553375244, + "learning_rate": 1.939577677357319e-05, + "loss": 0.4389, + "step": 8191 + }, + { + "epoch": 0.22493135639758374, + "grad_norm": 0.3602979779243469, + "learning_rate": 1.9395628912242064e-05, + "loss": 0.5704, + "step": 8192 + }, + { + "epoch": 0.22495881383855024, + "grad_norm": 0.3248290419578552, + "learning_rate": 1.9395481033385094e-05, + "loss": 0.489, + "step": 8193 + }, + { + "epoch": 0.22498627127951676, + "grad_norm": 0.40111038088798523, + "learning_rate": 1.9395333137002552e-05, + "loss": 0.6126, + "step": 8194 + }, + { + "epoch": 0.22501372872048325, + "grad_norm": 0.3910524249076843, + "learning_rate": 1.9395185223094718e-05, + "loss": 0.548, + "step": 8195 + }, + { + "epoch": 0.22504118616144975, + "grad_norm": 0.3715423047542572, + "learning_rate": 1.939503729166186e-05, + "loss": 0.5686, + "step": 8196 + }, + { + "epoch": 0.22506864360241627, + "grad_norm": 0.36621761322021484, + "learning_rate": 1.939488934270426e-05, + "loss": 0.6206, + "step": 8197 + }, + { + "epoch": 0.22509610104338276, + "grad_norm": 0.3830116093158722, + "learning_rate": 1.9394741376222194e-05, + "loss": 0.5879, + "step": 8198 + }, + { + "epoch": 0.22512355848434926, + "grad_norm": 0.3857790231704712, + "learning_rate": 1.939459339221594e-05, + "loss": 0.4498, + "step": 8199 + }, + { + "epoch": 0.22515101592531575, + "grad_norm": 0.3503985106945038, + "learning_rate": 1.9394445390685766e-05, + "loss": 0.4833, + "step": 8200 + }, + { + "epoch": 0.22517847336628227, + "grad_norm": 0.40891578793525696, + "learning_rate": 1.9394297371631955e-05, + "loss": 0.4714, + "step": 8201 + }, + { + "epoch": 0.22520593080724877, + "grad_norm": 0.3622020483016968, + "learning_rate": 1.939414933505478e-05, + "loss": 0.5441, + "step": 8202 + }, + { + "epoch": 0.22523338824821526, + "grad_norm": 0.4088347256183624, + "learning_rate": 1.9394001280954517e-05, + "loss": 0.6546, + "step": 8203 + }, + { + "epoch": 0.22526084568918175, + "grad_norm": 0.35266146063804626, + "learning_rate": 1.939385320933145e-05, + "loss": 0.5449, + "step": 8204 + }, + { + "epoch": 0.22528830313014828, + "grad_norm": 0.38363903760910034, + "learning_rate": 1.939370512018584e-05, + "loss": 0.4649, + "step": 8205 + }, + { + "epoch": 0.22531576057111477, + "grad_norm": 0.32043397426605225, + "learning_rate": 1.9393557013517978e-05, + "loss": 0.4311, + "step": 8206 + }, + { + "epoch": 0.22534321801208126, + "grad_norm": 0.35859671235084534, + "learning_rate": 1.939340888932813e-05, + "loss": 0.5176, + "step": 8207 + }, + { + "epoch": 0.2253706754530478, + "grad_norm": 0.37504032254219055, + "learning_rate": 1.939326074761658e-05, + "loss": 0.5584, + "step": 8208 + }, + { + "epoch": 0.22539813289401428, + "grad_norm": 0.39444440603256226, + "learning_rate": 1.93931125883836e-05, + "loss": 0.4843, + "step": 8209 + }, + { + "epoch": 0.22542559033498077, + "grad_norm": 0.3432862460613251, + "learning_rate": 1.939296441162946e-05, + "loss": 0.5092, + "step": 8210 + }, + { + "epoch": 0.22545304777594727, + "grad_norm": 0.4030492901802063, + "learning_rate": 1.9392816217354454e-05, + "loss": 0.5344, + "step": 8211 + }, + { + "epoch": 0.2254805052169138, + "grad_norm": 0.3438984155654907, + "learning_rate": 1.939266800555884e-05, + "loss": 0.5418, + "step": 8212 + }, + { + "epoch": 0.22550796265788028, + "grad_norm": 0.4286535084247589, + "learning_rate": 1.9392519776242907e-05, + "loss": 0.4657, + "step": 8213 + }, + { + "epoch": 0.22553542009884678, + "grad_norm": 0.3363277018070221, + "learning_rate": 1.9392371529406927e-05, + "loss": 0.4964, + "step": 8214 + }, + { + "epoch": 0.2255628775398133, + "grad_norm": 0.3975432813167572, + "learning_rate": 1.9392223265051175e-05, + "loss": 0.5285, + "step": 8215 + }, + { + "epoch": 0.2255903349807798, + "grad_norm": 0.3355986773967743, + "learning_rate": 1.939207498317593e-05, + "loss": 0.5071, + "step": 8216 + }, + { + "epoch": 0.2256177924217463, + "grad_norm": 0.3327268958091736, + "learning_rate": 1.9391926683781466e-05, + "loss": 0.4665, + "step": 8217 + }, + { + "epoch": 0.22564524986271278, + "grad_norm": 0.3756018877029419, + "learning_rate": 1.939177836686806e-05, + "loss": 0.6186, + "step": 8218 + }, + { + "epoch": 0.2256727073036793, + "grad_norm": 0.371738463640213, + "learning_rate": 1.939163003243599e-05, + "loss": 0.632, + "step": 8219 + }, + { + "epoch": 0.2257001647446458, + "grad_norm": 0.3623785078525543, + "learning_rate": 1.9391481680485535e-05, + "loss": 0.5015, + "step": 8220 + }, + { + "epoch": 0.2257276221856123, + "grad_norm": 0.35255128145217896, + "learning_rate": 1.9391333311016968e-05, + "loss": 0.5564, + "step": 8221 + }, + { + "epoch": 0.22575507962657881, + "grad_norm": 0.34015733003616333, + "learning_rate": 1.9391184924030568e-05, + "loss": 0.5311, + "step": 8222 + }, + { + "epoch": 0.2257825370675453, + "grad_norm": 0.33660411834716797, + "learning_rate": 1.939103651952661e-05, + "loss": 0.3982, + "step": 8223 + }, + { + "epoch": 0.2258099945085118, + "grad_norm": 0.425221711397171, + "learning_rate": 1.9390888097505373e-05, + "loss": 0.5843, + "step": 8224 + }, + { + "epoch": 0.2258374519494783, + "grad_norm": 0.37836596369743347, + "learning_rate": 1.939073965796713e-05, + "loss": 0.5103, + "step": 8225 + }, + { + "epoch": 0.22586490939044482, + "grad_norm": 0.3715420961380005, + "learning_rate": 1.9390591200912162e-05, + "loss": 0.5559, + "step": 8226 + }, + { + "epoch": 0.2258923668314113, + "grad_norm": 0.49609532952308655, + "learning_rate": 1.9390442726340742e-05, + "loss": 0.601, + "step": 8227 + }, + { + "epoch": 0.2259198242723778, + "grad_norm": 0.3451451361179352, + "learning_rate": 1.939029423425315e-05, + "loss": 0.5227, + "step": 8228 + }, + { + "epoch": 0.22594728171334433, + "grad_norm": 0.33748912811279297, + "learning_rate": 1.9390145724649665e-05, + "loss": 0.5151, + "step": 8229 + }, + { + "epoch": 0.22597473915431082, + "grad_norm": 0.3437998294830322, + "learning_rate": 1.938999719753056e-05, + "loss": 0.4683, + "step": 8230 + }, + { + "epoch": 0.22600219659527732, + "grad_norm": 0.40850162506103516, + "learning_rate": 1.9389848652896113e-05, + "loss": 0.5356, + "step": 8231 + }, + { + "epoch": 0.2260296540362438, + "grad_norm": 1.3040398359298706, + "learning_rate": 1.9389700090746602e-05, + "loss": 0.5616, + "step": 8232 + }, + { + "epoch": 0.22605711147721033, + "grad_norm": 0.42423245310783386, + "learning_rate": 1.9389551511082303e-05, + "loss": 0.5635, + "step": 8233 + }, + { + "epoch": 0.22608456891817683, + "grad_norm": 0.3468954563140869, + "learning_rate": 1.9389402913903495e-05, + "loss": 0.54, + "step": 8234 + }, + { + "epoch": 0.22611202635914332, + "grad_norm": 0.3918916881084442, + "learning_rate": 1.938925429921045e-05, + "loss": 0.4775, + "step": 8235 + }, + { + "epoch": 0.22613948380010984, + "grad_norm": 0.4532139301300049, + "learning_rate": 1.938910566700345e-05, + "loss": 0.6267, + "step": 8236 + }, + { + "epoch": 0.22616694124107634, + "grad_norm": 0.3539528250694275, + "learning_rate": 1.9388957017282775e-05, + "loss": 0.4387, + "step": 8237 + }, + { + "epoch": 0.22619439868204283, + "grad_norm": 0.36046403646469116, + "learning_rate": 1.9388808350048697e-05, + "loss": 0.5509, + "step": 8238 + }, + { + "epoch": 0.22622185612300932, + "grad_norm": 0.3788404166698456, + "learning_rate": 1.9388659665301494e-05, + "loss": 0.5497, + "step": 8239 + }, + { + "epoch": 0.22624931356397585, + "grad_norm": 0.3916245400905609, + "learning_rate": 1.938851096304144e-05, + "loss": 0.5745, + "step": 8240 + }, + { + "epoch": 0.22627677100494234, + "grad_norm": 0.3724147379398346, + "learning_rate": 1.9388362243268823e-05, + "loss": 0.5605, + "step": 8241 + }, + { + "epoch": 0.22630422844590883, + "grad_norm": 0.3624235987663269, + "learning_rate": 1.9388213505983914e-05, + "loss": 0.5241, + "step": 8242 + }, + { + "epoch": 0.22633168588687536, + "grad_norm": 0.5791997909545898, + "learning_rate": 1.9388064751186985e-05, + "loss": 0.5601, + "step": 8243 + }, + { + "epoch": 0.22635914332784185, + "grad_norm": 0.4999411702156067, + "learning_rate": 1.9387915978878324e-05, + "loss": 0.5273, + "step": 8244 + }, + { + "epoch": 0.22638660076880834, + "grad_norm": 0.4118257761001587, + "learning_rate": 1.93877671890582e-05, + "loss": 0.5306, + "step": 8245 + }, + { + "epoch": 0.22641405820977484, + "grad_norm": 0.3871108293533325, + "learning_rate": 1.9387618381726897e-05, + "loss": 0.5492, + "step": 8246 + }, + { + "epoch": 0.22644151565074136, + "grad_norm": 0.37442949414253235, + "learning_rate": 1.9387469556884687e-05, + "loss": 0.5591, + "step": 8247 + }, + { + "epoch": 0.22646897309170785, + "grad_norm": 0.37944644689559937, + "learning_rate": 1.938732071453185e-05, + "loss": 0.5953, + "step": 8248 + }, + { + "epoch": 0.22649643053267435, + "grad_norm": 0.4290885329246521, + "learning_rate": 1.9387171854668665e-05, + "loss": 0.579, + "step": 8249 + }, + { + "epoch": 0.22652388797364087, + "grad_norm": 0.3293841481208801, + "learning_rate": 1.938702297729541e-05, + "loss": 0.4879, + "step": 8250 + }, + { + "epoch": 0.22655134541460736, + "grad_norm": 0.44695019721984863, + "learning_rate": 1.938687408241236e-05, + "loss": 0.5447, + "step": 8251 + }, + { + "epoch": 0.22657880285557386, + "grad_norm": 0.36266106367111206, + "learning_rate": 1.938672517001979e-05, + "loss": 0.5833, + "step": 8252 + }, + { + "epoch": 0.22660626029654035, + "grad_norm": 0.38067343831062317, + "learning_rate": 1.9386576240117987e-05, + "loss": 0.4788, + "step": 8253 + }, + { + "epoch": 0.22663371773750687, + "grad_norm": 0.3255939483642578, + "learning_rate": 1.938642729270722e-05, + "loss": 0.4983, + "step": 8254 + }, + { + "epoch": 0.22666117517847337, + "grad_norm": 0.3380301892757416, + "learning_rate": 1.9386278327787772e-05, + "loss": 0.3782, + "step": 8255 + }, + { + "epoch": 0.22668863261943986, + "grad_norm": 0.3777279257774353, + "learning_rate": 1.938612934535992e-05, + "loss": 0.525, + "step": 8256 + }, + { + "epoch": 0.22671609006040638, + "grad_norm": 0.3482208847999573, + "learning_rate": 1.9385980345423938e-05, + "loss": 0.5632, + "step": 8257 + }, + { + "epoch": 0.22674354750137288, + "grad_norm": 0.3751506507396698, + "learning_rate": 1.938583132798011e-05, + "loss": 0.4955, + "step": 8258 + }, + { + "epoch": 0.22677100494233937, + "grad_norm": 0.48257002234458923, + "learning_rate": 1.938568229302871e-05, + "loss": 0.642, + "step": 8259 + }, + { + "epoch": 0.22679846238330587, + "grad_norm": 0.36828184127807617, + "learning_rate": 1.9385533240570018e-05, + "loss": 0.5496, + "step": 8260 + }, + { + "epoch": 0.2268259198242724, + "grad_norm": 0.36173102259635925, + "learning_rate": 1.938538417060431e-05, + "loss": 0.5731, + "step": 8261 + }, + { + "epoch": 0.22685337726523888, + "grad_norm": 0.404882550239563, + "learning_rate": 1.9385235083131863e-05, + "loss": 0.5991, + "step": 8262 + }, + { + "epoch": 0.22688083470620538, + "grad_norm": 0.42122867703437805, + "learning_rate": 1.938508597815296e-05, + "loss": 0.5175, + "step": 8263 + }, + { + "epoch": 0.2269082921471719, + "grad_norm": 0.41757726669311523, + "learning_rate": 1.938493685566788e-05, + "loss": 0.5439, + "step": 8264 + }, + { + "epoch": 0.2269357495881384, + "grad_norm": 0.3838428854942322, + "learning_rate": 1.938478771567689e-05, + "loss": 0.5365, + "step": 8265 + }, + { + "epoch": 0.22696320702910489, + "grad_norm": 0.36478307843208313, + "learning_rate": 1.9384638558180277e-05, + "loss": 0.4969, + "step": 8266 + }, + { + "epoch": 0.22699066447007138, + "grad_norm": 0.3522917628288269, + "learning_rate": 1.9384489383178322e-05, + "loss": 0.4406, + "step": 8267 + }, + { + "epoch": 0.2270181219110379, + "grad_norm": 0.3297838568687439, + "learning_rate": 1.9384340190671293e-05, + "loss": 0.4684, + "step": 8268 + }, + { + "epoch": 0.2270455793520044, + "grad_norm": 0.36850079894065857, + "learning_rate": 1.9384190980659482e-05, + "loss": 0.5012, + "step": 8269 + }, + { + "epoch": 0.2270730367929709, + "grad_norm": 0.3305157721042633, + "learning_rate": 1.9384041753143155e-05, + "loss": 0.5177, + "step": 8270 + }, + { + "epoch": 0.22710049423393738, + "grad_norm": 0.38315239548683167, + "learning_rate": 1.9383892508122594e-05, + "loss": 0.5439, + "step": 8271 + }, + { + "epoch": 0.2271279516749039, + "grad_norm": 0.3304203450679779, + "learning_rate": 1.938374324559808e-05, + "loss": 0.4765, + "step": 8272 + }, + { + "epoch": 0.2271554091158704, + "grad_norm": 0.3577759265899658, + "learning_rate": 1.938359396556989e-05, + "loss": 0.4821, + "step": 8273 + }, + { + "epoch": 0.2271828665568369, + "grad_norm": 0.43631523847579956, + "learning_rate": 1.9383444668038302e-05, + "loss": 0.4951, + "step": 8274 + }, + { + "epoch": 0.22721032399780342, + "grad_norm": 0.3672175407409668, + "learning_rate": 1.9383295353003597e-05, + "loss": 0.4348, + "step": 8275 + }, + { + "epoch": 0.2272377814387699, + "grad_norm": 0.3975318968296051, + "learning_rate": 1.9383146020466047e-05, + "loss": 0.5396, + "step": 8276 + }, + { + "epoch": 0.2272652388797364, + "grad_norm": 0.3787493109703064, + "learning_rate": 1.938299667042594e-05, + "loss": 0.4477, + "step": 8277 + }, + { + "epoch": 0.2272926963207029, + "grad_norm": 0.3545624911785126, + "learning_rate": 1.9382847302883548e-05, + "loss": 0.5539, + "step": 8278 + }, + { + "epoch": 0.22732015376166942, + "grad_norm": 0.34871143102645874, + "learning_rate": 1.938269791783915e-05, + "loss": 0.4938, + "step": 8279 + }, + { + "epoch": 0.2273476112026359, + "grad_norm": 0.37139931321144104, + "learning_rate": 1.9382548515293026e-05, + "loss": 0.5272, + "step": 8280 + }, + { + "epoch": 0.2273750686436024, + "grad_norm": 0.3813760578632355, + "learning_rate": 1.9382399095245453e-05, + "loss": 0.4886, + "step": 8281 + }, + { + "epoch": 0.22740252608456893, + "grad_norm": 0.3447664678096771, + "learning_rate": 1.9382249657696716e-05, + "loss": 0.586, + "step": 8282 + }, + { + "epoch": 0.22742998352553542, + "grad_norm": 0.3689160645008087, + "learning_rate": 1.938210020264708e-05, + "loss": 0.633, + "step": 8283 + }, + { + "epoch": 0.22745744096650192, + "grad_norm": 0.3481333553791046, + "learning_rate": 1.9381950730096842e-05, + "loss": 0.5802, + "step": 8284 + }, + { + "epoch": 0.2274848984074684, + "grad_norm": 0.32448044419288635, + "learning_rate": 1.9381801240046268e-05, + "loss": 0.4434, + "step": 8285 + }, + { + "epoch": 0.22751235584843493, + "grad_norm": 0.3732805848121643, + "learning_rate": 1.9381651732495638e-05, + "loss": 0.5638, + "step": 8286 + }, + { + "epoch": 0.22753981328940143, + "grad_norm": 0.3274799883365631, + "learning_rate": 1.9381502207445236e-05, + "loss": 0.5158, + "step": 8287 + }, + { + "epoch": 0.22756727073036792, + "grad_norm": 0.41455018520355225, + "learning_rate": 1.9381352664895337e-05, + "loss": 0.4542, + "step": 8288 + }, + { + "epoch": 0.22759472817133444, + "grad_norm": 0.4866873323917389, + "learning_rate": 1.938120310484622e-05, + "loss": 0.5796, + "step": 8289 + }, + { + "epoch": 0.22762218561230094, + "grad_norm": 0.33844077587127686, + "learning_rate": 1.9381053527298167e-05, + "loss": 0.5087, + "step": 8290 + }, + { + "epoch": 0.22764964305326743, + "grad_norm": 0.3990241289138794, + "learning_rate": 1.938090393225146e-05, + "loss": 0.5487, + "step": 8291 + }, + { + "epoch": 0.22767710049423392, + "grad_norm": 0.394076406955719, + "learning_rate": 1.9380754319706365e-05, + "loss": 0.4936, + "step": 8292 + }, + { + "epoch": 0.22770455793520045, + "grad_norm": 0.37080851197242737, + "learning_rate": 1.938060468966317e-05, + "loss": 0.5943, + "step": 8293 + }, + { + "epoch": 0.22773201537616694, + "grad_norm": 0.33536213636398315, + "learning_rate": 1.9380455042122158e-05, + "loss": 0.5476, + "step": 8294 + }, + { + "epoch": 0.22775947281713343, + "grad_norm": 0.37987062335014343, + "learning_rate": 1.93803053770836e-05, + "loss": 0.534, + "step": 8295 + }, + { + "epoch": 0.22778693025809996, + "grad_norm": 0.3935850262641907, + "learning_rate": 1.938015569454778e-05, + "loss": 0.5242, + "step": 8296 + }, + { + "epoch": 0.22781438769906645, + "grad_norm": 0.3473701775074005, + "learning_rate": 1.9380005994514978e-05, + "loss": 0.4842, + "step": 8297 + }, + { + "epoch": 0.22784184514003294, + "grad_norm": 0.46271824836730957, + "learning_rate": 1.9379856276985466e-05, + "loss": 0.5722, + "step": 8298 + }, + { + "epoch": 0.22786930258099944, + "grad_norm": 0.4648285210132599, + "learning_rate": 1.937970654195953e-05, + "loss": 0.5331, + "step": 8299 + }, + { + "epoch": 0.22789676002196596, + "grad_norm": 0.3610915243625641, + "learning_rate": 1.937955678943745e-05, + "loss": 0.5108, + "step": 8300 + }, + { + "epoch": 0.22792421746293245, + "grad_norm": 0.7849158048629761, + "learning_rate": 1.93794070194195e-05, + "loss": 0.6301, + "step": 8301 + }, + { + "epoch": 0.22795167490389895, + "grad_norm": 0.3651820123195648, + "learning_rate": 1.9379257231905964e-05, + "loss": 0.5084, + "step": 8302 + }, + { + "epoch": 0.22797913234486547, + "grad_norm": 0.35178637504577637, + "learning_rate": 1.9379107426897122e-05, + "loss": 0.5794, + "step": 8303 + }, + { + "epoch": 0.22800658978583196, + "grad_norm": 0.41504713892936707, + "learning_rate": 1.9378957604393245e-05, + "loss": 0.541, + "step": 8304 + }, + { + "epoch": 0.22803404722679846, + "grad_norm": 0.35744708776474, + "learning_rate": 1.9378807764394625e-05, + "loss": 0.4818, + "step": 8305 + }, + { + "epoch": 0.22806150466776495, + "grad_norm": 0.36151817440986633, + "learning_rate": 1.937865790690153e-05, + "loss": 0.5922, + "step": 8306 + }, + { + "epoch": 0.22808896210873147, + "grad_norm": 0.3591896891593933, + "learning_rate": 1.937850803191425e-05, + "loss": 0.5875, + "step": 8307 + }, + { + "epoch": 0.22811641954969797, + "grad_norm": 0.40182918310165405, + "learning_rate": 1.9378358139433057e-05, + "loss": 0.5161, + "step": 8308 + }, + { + "epoch": 0.22814387699066446, + "grad_norm": 0.5497351288795471, + "learning_rate": 1.9378208229458232e-05, + "loss": 0.5613, + "step": 8309 + }, + { + "epoch": 0.22817133443163098, + "grad_norm": 0.3608291447162628, + "learning_rate": 1.9378058301990056e-05, + "loss": 0.5138, + "step": 8310 + }, + { + "epoch": 0.22819879187259748, + "grad_norm": 0.349816232919693, + "learning_rate": 1.9377908357028803e-05, + "loss": 0.5403, + "step": 8311 + }, + { + "epoch": 0.22822624931356397, + "grad_norm": 0.400680273771286, + "learning_rate": 1.9377758394574765e-05, + "loss": 0.5153, + "step": 8312 + }, + { + "epoch": 0.22825370675453047, + "grad_norm": 0.3917526304721832, + "learning_rate": 1.9377608414628212e-05, + "loss": 0.5556, + "step": 8313 + }, + { + "epoch": 0.228281164195497, + "grad_norm": 0.34170255064964294, + "learning_rate": 1.9377458417189426e-05, + "loss": 0.5511, + "step": 8314 + }, + { + "epoch": 0.22830862163646348, + "grad_norm": 0.40340352058410645, + "learning_rate": 1.937730840225869e-05, + "loss": 0.5398, + "step": 8315 + }, + { + "epoch": 0.22833607907742998, + "grad_norm": 0.3228435218334198, + "learning_rate": 1.937715836983628e-05, + "loss": 0.5185, + "step": 8316 + }, + { + "epoch": 0.2283635365183965, + "grad_norm": 0.3587694764137268, + "learning_rate": 1.9377008319922473e-05, + "loss": 0.5958, + "step": 8317 + }, + { + "epoch": 0.228390993959363, + "grad_norm": 0.5970098376274109, + "learning_rate": 1.9376858252517556e-05, + "loss": 0.4964, + "step": 8318 + }, + { + "epoch": 0.2284184514003295, + "grad_norm": 0.3690517842769623, + "learning_rate": 1.9376708167621805e-05, + "loss": 0.4465, + "step": 8319 + }, + { + "epoch": 0.22844590884129598, + "grad_norm": 0.37074998021125793, + "learning_rate": 1.93765580652355e-05, + "loss": 0.5765, + "step": 8320 + }, + { + "epoch": 0.2284733662822625, + "grad_norm": 0.3895581066608429, + "learning_rate": 1.9376407945358923e-05, + "loss": 0.5424, + "step": 8321 + }, + { + "epoch": 0.228500823723229, + "grad_norm": 0.43124139308929443, + "learning_rate": 1.937625780799235e-05, + "loss": 0.5628, + "step": 8322 + }, + { + "epoch": 0.2285282811641955, + "grad_norm": 0.40431898832321167, + "learning_rate": 1.937610765313607e-05, + "loss": 0.5221, + "step": 8323 + }, + { + "epoch": 0.228555738605162, + "grad_norm": 0.43194764852523804, + "learning_rate": 1.937595748079035e-05, + "loss": 0.6126, + "step": 8324 + }, + { + "epoch": 0.2285831960461285, + "grad_norm": 0.35418859124183655, + "learning_rate": 1.9375807290955476e-05, + "loss": 0.4963, + "step": 8325 + }, + { + "epoch": 0.228610653487095, + "grad_norm": 0.37133777141571045, + "learning_rate": 1.937565708363173e-05, + "loss": 0.5561, + "step": 8326 + }, + { + "epoch": 0.2286381109280615, + "grad_norm": 0.36189162731170654, + "learning_rate": 1.9375506858819395e-05, + "loss": 0.5532, + "step": 8327 + }, + { + "epoch": 0.22866556836902802, + "grad_norm": 0.4030328094959259, + "learning_rate": 1.9375356616518742e-05, + "loss": 0.47, + "step": 8328 + }, + { + "epoch": 0.2286930258099945, + "grad_norm": 0.3655211627483368, + "learning_rate": 1.9375206356730064e-05, + "loss": 0.5113, + "step": 8329 + }, + { + "epoch": 0.228720483250961, + "grad_norm": 0.39692947268486023, + "learning_rate": 1.937505607945363e-05, + "loss": 0.5941, + "step": 8330 + }, + { + "epoch": 0.22874794069192753, + "grad_norm": 0.3223343789577484, + "learning_rate": 1.9374905784689723e-05, + "loss": 0.4245, + "step": 8331 + }, + { + "epoch": 0.22877539813289402, + "grad_norm": 0.37122848629951477, + "learning_rate": 1.9374755472438623e-05, + "loss": 0.6966, + "step": 8332 + }, + { + "epoch": 0.22880285557386051, + "grad_norm": 0.30354607105255127, + "learning_rate": 1.9374605142700616e-05, + "loss": 0.4847, + "step": 8333 + }, + { + "epoch": 0.228830313014827, + "grad_norm": 0.34248772263526917, + "learning_rate": 1.9374454795475976e-05, + "loss": 0.5452, + "step": 8334 + }, + { + "epoch": 0.22885777045579353, + "grad_norm": 0.47256413102149963, + "learning_rate": 1.9374304430764988e-05, + "loss": 0.551, + "step": 8335 + }, + { + "epoch": 0.22888522789676002, + "grad_norm": 0.3832658529281616, + "learning_rate": 1.937415404856793e-05, + "loss": 0.5174, + "step": 8336 + }, + { + "epoch": 0.22891268533772652, + "grad_norm": 0.35880351066589355, + "learning_rate": 1.9374003648885083e-05, + "loss": 0.5184, + "step": 8337 + }, + { + "epoch": 0.228940142778693, + "grad_norm": 0.36333250999450684, + "learning_rate": 1.9373853231716728e-05, + "loss": 0.5274, + "step": 8338 + }, + { + "epoch": 0.22896760021965953, + "grad_norm": 0.4805937111377716, + "learning_rate": 1.937370279706314e-05, + "loss": 0.4883, + "step": 8339 + }, + { + "epoch": 0.22899505766062603, + "grad_norm": 0.5494170784950256, + "learning_rate": 1.937355234492461e-05, + "loss": 0.5185, + "step": 8340 + }, + { + "epoch": 0.22902251510159252, + "grad_norm": 0.3449389636516571, + "learning_rate": 1.9373401875301407e-05, + "loss": 0.4778, + "step": 8341 + }, + { + "epoch": 0.22904997254255904, + "grad_norm": 0.33531445264816284, + "learning_rate": 1.9373251388193823e-05, + "loss": 0.5024, + "step": 8342 + }, + { + "epoch": 0.22907742998352554, + "grad_norm": 0.41718944907188416, + "learning_rate": 1.937310088360213e-05, + "loss": 0.5451, + "step": 8343 + }, + { + "epoch": 0.22910488742449203, + "grad_norm": 0.3556210398674011, + "learning_rate": 1.9372950361526617e-05, + "loss": 0.5563, + "step": 8344 + }, + { + "epoch": 0.22913234486545853, + "grad_norm": 0.3673871159553528, + "learning_rate": 1.9372799821967558e-05, + "loss": 0.4622, + "step": 8345 + }, + { + "epoch": 0.22915980230642505, + "grad_norm": 0.34238117933273315, + "learning_rate": 1.9372649264925237e-05, + "loss": 0.5268, + "step": 8346 + }, + { + "epoch": 0.22918725974739154, + "grad_norm": 0.42789629101753235, + "learning_rate": 1.937249869039993e-05, + "loss": 0.5243, + "step": 8347 + }, + { + "epoch": 0.22921471718835804, + "grad_norm": 0.39402592182159424, + "learning_rate": 1.9372348098391925e-05, + "loss": 0.4856, + "step": 8348 + }, + { + "epoch": 0.22924217462932456, + "grad_norm": 0.3448143005371094, + "learning_rate": 1.9372197488901497e-05, + "loss": 0.5623, + "step": 8349 + }, + { + "epoch": 0.22926963207029105, + "grad_norm": 0.39573538303375244, + "learning_rate": 1.9372046861928932e-05, + "loss": 0.5098, + "step": 8350 + }, + { + "epoch": 0.22929708951125755, + "grad_norm": 0.360020250082016, + "learning_rate": 1.937189621747451e-05, + "loss": 0.5175, + "step": 8351 + }, + { + "epoch": 0.22932454695222404, + "grad_norm": 0.33992478251457214, + "learning_rate": 1.9371745555538504e-05, + "loss": 0.4483, + "step": 8352 + }, + { + "epoch": 0.22935200439319056, + "grad_norm": 0.36409255862236023, + "learning_rate": 1.9371594876121207e-05, + "loss": 0.5202, + "step": 8353 + }, + { + "epoch": 0.22937946183415706, + "grad_norm": 0.3408110737800598, + "learning_rate": 1.9371444179222894e-05, + "loss": 0.4872, + "step": 8354 + }, + { + "epoch": 0.22940691927512355, + "grad_norm": 0.4453859329223633, + "learning_rate": 1.9371293464843844e-05, + "loss": 0.436, + "step": 8355 + }, + { + "epoch": 0.22943437671609007, + "grad_norm": 0.3627023994922638, + "learning_rate": 1.937114273298434e-05, + "loss": 0.5081, + "step": 8356 + }, + { + "epoch": 0.22946183415705657, + "grad_norm": 0.35361307859420776, + "learning_rate": 1.9370991983644667e-05, + "loss": 0.5822, + "step": 8357 + }, + { + "epoch": 0.22948929159802306, + "grad_norm": 0.3736143708229065, + "learning_rate": 1.9370841216825103e-05, + "loss": 0.5692, + "step": 8358 + }, + { + "epoch": 0.22951674903898955, + "grad_norm": 0.4052828550338745, + "learning_rate": 1.9370690432525926e-05, + "loss": 0.5243, + "step": 8359 + }, + { + "epoch": 0.22954420647995608, + "grad_norm": 0.3726356327533722, + "learning_rate": 1.9370539630747425e-05, + "loss": 0.5305, + "step": 8360 + }, + { + "epoch": 0.22957166392092257, + "grad_norm": 0.3779815137386322, + "learning_rate": 1.9370388811489873e-05, + "loss": 0.5178, + "step": 8361 + }, + { + "epoch": 0.22959912136188906, + "grad_norm": 0.4221555292606354, + "learning_rate": 1.9370237974753556e-05, + "loss": 0.548, + "step": 8362 + }, + { + "epoch": 0.22962657880285559, + "grad_norm": 0.3809456527233124, + "learning_rate": 1.9370087120538755e-05, + "loss": 0.4832, + "step": 8363 + }, + { + "epoch": 0.22965403624382208, + "grad_norm": 0.44110214710235596, + "learning_rate": 1.9369936248845755e-05, + "loss": 0.5036, + "step": 8364 + }, + { + "epoch": 0.22968149368478857, + "grad_norm": 0.360573410987854, + "learning_rate": 1.936978535967483e-05, + "loss": 0.4149, + "step": 8365 + }, + { + "epoch": 0.22970895112575507, + "grad_norm": 0.3249720335006714, + "learning_rate": 1.936963445302626e-05, + "loss": 0.524, + "step": 8366 + }, + { + "epoch": 0.2297364085667216, + "grad_norm": 0.3680763840675354, + "learning_rate": 1.936948352890034e-05, + "loss": 0.5011, + "step": 8367 + }, + { + "epoch": 0.22976386600768808, + "grad_norm": 0.31878310441970825, + "learning_rate": 1.9369332587297336e-05, + "loss": 0.4144, + "step": 8368 + }, + { + "epoch": 0.22979132344865458, + "grad_norm": 0.38002678751945496, + "learning_rate": 1.936918162821754e-05, + "loss": 0.5736, + "step": 8369 + }, + { + "epoch": 0.2298187808896211, + "grad_norm": 0.3544250428676605, + "learning_rate": 1.936903065166123e-05, + "loss": 0.5988, + "step": 8370 + }, + { + "epoch": 0.2298462383305876, + "grad_norm": 0.4053061902523041, + "learning_rate": 1.9368879657628685e-05, + "loss": 0.4748, + "step": 8371 + }, + { + "epoch": 0.2298736957715541, + "grad_norm": 0.36695608496665955, + "learning_rate": 1.936872864612019e-05, + "loss": 0.5766, + "step": 8372 + }, + { + "epoch": 0.22990115321252058, + "grad_norm": 0.3581576943397522, + "learning_rate": 1.9368577617136025e-05, + "loss": 0.5771, + "step": 8373 + }, + { + "epoch": 0.2299286106534871, + "grad_norm": 0.8321191072463989, + "learning_rate": 1.9368426570676475e-05, + "loss": 0.4991, + "step": 8374 + }, + { + "epoch": 0.2299560680944536, + "grad_norm": 0.37169933319091797, + "learning_rate": 1.9368275506741822e-05, + "loss": 0.3949, + "step": 8375 + }, + { + "epoch": 0.2299835255354201, + "grad_norm": 0.4076373875141144, + "learning_rate": 1.936812442533234e-05, + "loss": 0.4871, + "step": 8376 + }, + { + "epoch": 0.2300109829763866, + "grad_norm": 0.4650156497955322, + "learning_rate": 1.936797332644832e-05, + "loss": 0.4494, + "step": 8377 + }, + { + "epoch": 0.2300384404173531, + "grad_norm": 0.38125163316726685, + "learning_rate": 1.9367822210090034e-05, + "loss": 0.5277, + "step": 8378 + }, + { + "epoch": 0.2300658978583196, + "grad_norm": 0.39368772506713867, + "learning_rate": 1.9367671076257775e-05, + "loss": 0.5699, + "step": 8379 + }, + { + "epoch": 0.2300933552992861, + "grad_norm": 0.3228253722190857, + "learning_rate": 1.936751992495182e-05, + "loss": 0.4354, + "step": 8380 + }, + { + "epoch": 0.23012081274025262, + "grad_norm": 0.35213181376457214, + "learning_rate": 1.9367368756172444e-05, + "loss": 0.5301, + "step": 8381 + }, + { + "epoch": 0.2301482701812191, + "grad_norm": 0.4002475440502167, + "learning_rate": 1.936721756991994e-05, + "loss": 0.5426, + "step": 8382 + }, + { + "epoch": 0.2301757276221856, + "grad_norm": 0.3616049885749817, + "learning_rate": 1.9367066366194586e-05, + "loss": 0.4724, + "step": 8383 + }, + { + "epoch": 0.23020318506315213, + "grad_norm": 0.3268059194087982, + "learning_rate": 1.9366915144996665e-05, + "loss": 0.4045, + "step": 8384 + }, + { + "epoch": 0.23023064250411862, + "grad_norm": 0.3999198079109192, + "learning_rate": 1.9366763906326454e-05, + "loss": 0.5548, + "step": 8385 + }, + { + "epoch": 0.23025809994508512, + "grad_norm": 0.3608124256134033, + "learning_rate": 1.9366612650184243e-05, + "loss": 0.5682, + "step": 8386 + }, + { + "epoch": 0.2302855573860516, + "grad_norm": 0.420049786567688, + "learning_rate": 1.9366461376570308e-05, + "loss": 0.5517, + "step": 8387 + }, + { + "epoch": 0.23031301482701813, + "grad_norm": 0.3834000527858734, + "learning_rate": 1.9366310085484935e-05, + "loss": 0.5566, + "step": 8388 + }, + { + "epoch": 0.23034047226798463, + "grad_norm": 0.3702384829521179, + "learning_rate": 1.93661587769284e-05, + "loss": 0.5881, + "step": 8389 + }, + { + "epoch": 0.23036792970895112, + "grad_norm": 0.42525824904441833, + "learning_rate": 1.936600745090099e-05, + "loss": 0.5553, + "step": 8390 + }, + { + "epoch": 0.23039538714991764, + "grad_norm": 0.3670484721660614, + "learning_rate": 1.9365856107402988e-05, + "loss": 0.5382, + "step": 8391 + }, + { + "epoch": 0.23042284459088413, + "grad_norm": 0.3988783061504364, + "learning_rate": 1.9365704746434677e-05, + "loss": 0.5892, + "step": 8392 + }, + { + "epoch": 0.23045030203185063, + "grad_norm": 0.41409626603126526, + "learning_rate": 1.9365553367996335e-05, + "loss": 0.5549, + "step": 8393 + }, + { + "epoch": 0.23047775947281712, + "grad_norm": 0.3989344537258148, + "learning_rate": 1.936540197208825e-05, + "loss": 0.5479, + "step": 8394 + }, + { + "epoch": 0.23050521691378364, + "grad_norm": 0.348515123128891, + "learning_rate": 1.9365250558710697e-05, + "loss": 0.5679, + "step": 8395 + }, + { + "epoch": 0.23053267435475014, + "grad_norm": 0.369470477104187, + "learning_rate": 1.9365099127863966e-05, + "loss": 0.6099, + "step": 8396 + }, + { + "epoch": 0.23056013179571663, + "grad_norm": 0.35255682468414307, + "learning_rate": 1.9364947679548333e-05, + "loss": 0.5293, + "step": 8397 + }, + { + "epoch": 0.23058758923668315, + "grad_norm": 0.3967011272907257, + "learning_rate": 1.936479621376409e-05, + "loss": 0.4981, + "step": 8398 + }, + { + "epoch": 0.23061504667764965, + "grad_norm": 0.37821337580680847, + "learning_rate": 1.9364644730511505e-05, + "loss": 0.6683, + "step": 8399 + }, + { + "epoch": 0.23064250411861614, + "grad_norm": 0.5256261229515076, + "learning_rate": 1.9364493229790872e-05, + "loss": 0.5095, + "step": 8400 + }, + { + "epoch": 0.23066996155958264, + "grad_norm": 0.4660869538784027, + "learning_rate": 1.936434171160247e-05, + "loss": 0.5405, + "step": 8401 + }, + { + "epoch": 0.23069741900054916, + "grad_norm": 0.32417139410972595, + "learning_rate": 1.9364190175946588e-05, + "loss": 0.6054, + "step": 8402 + }, + { + "epoch": 0.23072487644151565, + "grad_norm": 0.36102524399757385, + "learning_rate": 1.9364038622823496e-05, + "loss": 0.5149, + "step": 8403 + }, + { + "epoch": 0.23075233388248215, + "grad_norm": 0.35940611362457275, + "learning_rate": 1.9363887052233483e-05, + "loss": 0.518, + "step": 8404 + }, + { + "epoch": 0.23077979132344864, + "grad_norm": 0.36217042803764343, + "learning_rate": 1.9363735464176835e-05, + "loss": 0.534, + "step": 8405 + }, + { + "epoch": 0.23080724876441516, + "grad_norm": 0.5451474785804749, + "learning_rate": 1.936358385865383e-05, + "loss": 0.5027, + "step": 8406 + }, + { + "epoch": 0.23083470620538166, + "grad_norm": 0.36515772342681885, + "learning_rate": 1.936343223566475e-05, + "loss": 0.5295, + "step": 8407 + }, + { + "epoch": 0.23086216364634815, + "grad_norm": 0.4011467397212982, + "learning_rate": 1.936328059520989e-05, + "loss": 0.6563, + "step": 8408 + }, + { + "epoch": 0.23088962108731467, + "grad_norm": 0.4163006544113159, + "learning_rate": 1.9363128937289516e-05, + "loss": 0.5445, + "step": 8409 + }, + { + "epoch": 0.23091707852828117, + "grad_norm": 0.3622475564479828, + "learning_rate": 1.9362977261903917e-05, + "loss": 0.4908, + "step": 8410 + }, + { + "epoch": 0.23094453596924766, + "grad_norm": 0.323786199092865, + "learning_rate": 1.936282556905338e-05, + "loss": 0.4534, + "step": 8411 + }, + { + "epoch": 0.23097199341021415, + "grad_norm": 0.41697025299072266, + "learning_rate": 1.9362673858738183e-05, + "loss": 0.6305, + "step": 8412 + }, + { + "epoch": 0.23099945085118068, + "grad_norm": 0.34213608503341675, + "learning_rate": 1.9362522130958612e-05, + "loss": 0.521, + "step": 8413 + }, + { + "epoch": 0.23102690829214717, + "grad_norm": 0.3749963343143463, + "learning_rate": 1.936237038571495e-05, + "loss": 0.5196, + "step": 8414 + }, + { + "epoch": 0.23105436573311366, + "grad_norm": 0.4436042904853821, + "learning_rate": 1.9362218623007476e-05, + "loss": 0.582, + "step": 8415 + }, + { + "epoch": 0.2310818231740802, + "grad_norm": 0.45022717118263245, + "learning_rate": 1.936206684283648e-05, + "loss": 0.63, + "step": 8416 + }, + { + "epoch": 0.23110928061504668, + "grad_norm": 0.37875375151634216, + "learning_rate": 1.936191504520224e-05, + "loss": 0.5056, + "step": 8417 + }, + { + "epoch": 0.23113673805601317, + "grad_norm": 0.34428831934928894, + "learning_rate": 1.936176323010504e-05, + "loss": 0.5388, + "step": 8418 + }, + { + "epoch": 0.23116419549697967, + "grad_norm": 0.42678502202033997, + "learning_rate": 1.9361611397545164e-05, + "loss": 0.5922, + "step": 8419 + }, + { + "epoch": 0.2311916529379462, + "grad_norm": 0.40828803181648254, + "learning_rate": 1.9361459547522895e-05, + "loss": 0.5532, + "step": 8420 + }, + { + "epoch": 0.23121911037891268, + "grad_norm": 0.40800923109054565, + "learning_rate": 1.9361307680038517e-05, + "loss": 0.5837, + "step": 8421 + }, + { + "epoch": 0.23124656781987918, + "grad_norm": 0.3530968427658081, + "learning_rate": 1.936115579509231e-05, + "loss": 0.5601, + "step": 8422 + }, + { + "epoch": 0.2312740252608457, + "grad_norm": 0.36083531379699707, + "learning_rate": 1.936100389268456e-05, + "loss": 0.5005, + "step": 8423 + }, + { + "epoch": 0.2313014827018122, + "grad_norm": 0.3541795015335083, + "learning_rate": 1.9360851972815553e-05, + "loss": 0.5592, + "step": 8424 + }, + { + "epoch": 0.2313289401427787, + "grad_norm": 0.37679022550582886, + "learning_rate": 1.9360700035485567e-05, + "loss": 0.5128, + "step": 8425 + }, + { + "epoch": 0.23135639758374518, + "grad_norm": 0.4327252507209778, + "learning_rate": 1.9360548080694888e-05, + "loss": 0.6306, + "step": 8426 + }, + { + "epoch": 0.2313838550247117, + "grad_norm": 0.48539143800735474, + "learning_rate": 1.93603961084438e-05, + "loss": 0.5395, + "step": 8427 + }, + { + "epoch": 0.2314113124656782, + "grad_norm": 0.3696403205394745, + "learning_rate": 1.9360244118732584e-05, + "loss": 0.5279, + "step": 8428 + }, + { + "epoch": 0.2314387699066447, + "grad_norm": 0.390292227268219, + "learning_rate": 1.936009211156153e-05, + "loss": 0.5573, + "step": 8429 + }, + { + "epoch": 0.23146622734761121, + "grad_norm": 0.3671209216117859, + "learning_rate": 1.9359940086930914e-05, + "loss": 0.5187, + "step": 8430 + }, + { + "epoch": 0.2314936847885777, + "grad_norm": 0.32470062375068665, + "learning_rate": 1.935978804484102e-05, + "loss": 0.517, + "step": 8431 + }, + { + "epoch": 0.2315211422295442, + "grad_norm": 0.3870636820793152, + "learning_rate": 1.9359635985292135e-05, + "loss": 0.5423, + "step": 8432 + }, + { + "epoch": 0.2315485996705107, + "grad_norm": 0.33690956234931946, + "learning_rate": 1.9359483908284546e-05, + "loss": 0.4664, + "step": 8433 + }, + { + "epoch": 0.23157605711147722, + "grad_norm": 0.3568219542503357, + "learning_rate": 1.9359331813818526e-05, + "loss": 0.5389, + "step": 8434 + }, + { + "epoch": 0.2316035145524437, + "grad_norm": 0.33700230717658997, + "learning_rate": 1.935917970189437e-05, + "loss": 0.4946, + "step": 8435 + }, + { + "epoch": 0.2316309719934102, + "grad_norm": 0.41333431005477905, + "learning_rate": 1.9359027572512353e-05, + "loss": 0.5301, + "step": 8436 + }, + { + "epoch": 0.23165842943437673, + "grad_norm": 0.4045794606208801, + "learning_rate": 1.9358875425672762e-05, + "loss": 0.5401, + "step": 8437 + }, + { + "epoch": 0.23168588687534322, + "grad_norm": 0.3702659606933594, + "learning_rate": 1.9358723261375883e-05, + "loss": 0.522, + "step": 8438 + }, + { + "epoch": 0.23171334431630972, + "grad_norm": 0.36181724071502686, + "learning_rate": 1.9358571079622e-05, + "loss": 0.521, + "step": 8439 + }, + { + "epoch": 0.2317408017572762, + "grad_norm": 0.3695557713508606, + "learning_rate": 1.9358418880411395e-05, + "loss": 0.4926, + "step": 8440 + }, + { + "epoch": 0.23176825919824273, + "grad_norm": 0.40611955523490906, + "learning_rate": 1.935826666374435e-05, + "loss": 0.5905, + "step": 8441 + }, + { + "epoch": 0.23179571663920923, + "grad_norm": 0.5479353666305542, + "learning_rate": 1.935811442962115e-05, + "loss": 0.6161, + "step": 8442 + }, + { + "epoch": 0.23182317408017572, + "grad_norm": 0.4138200283050537, + "learning_rate": 1.9357962178042083e-05, + "loss": 0.5386, + "step": 8443 + }, + { + "epoch": 0.23185063152114224, + "grad_norm": 0.36864376068115234, + "learning_rate": 1.9357809909007428e-05, + "loss": 0.5716, + "step": 8444 + }, + { + "epoch": 0.23187808896210874, + "grad_norm": 0.4255823493003845, + "learning_rate": 1.935765762251747e-05, + "loss": 0.6117, + "step": 8445 + }, + { + "epoch": 0.23190554640307523, + "grad_norm": 0.4143540859222412, + "learning_rate": 1.9357505318572496e-05, + "loss": 0.5111, + "step": 8446 + }, + { + "epoch": 0.23193300384404172, + "grad_norm": 0.8079360127449036, + "learning_rate": 1.9357352997172784e-05, + "loss": 0.6535, + "step": 8447 + }, + { + "epoch": 0.23196046128500825, + "grad_norm": 0.38722383975982666, + "learning_rate": 1.9357200658318626e-05, + "loss": 0.5619, + "step": 8448 + }, + { + "epoch": 0.23198791872597474, + "grad_norm": 0.30461180210113525, + "learning_rate": 1.93570483020103e-05, + "loss": 0.4615, + "step": 8449 + }, + { + "epoch": 0.23201537616694123, + "grad_norm": 0.38834893703460693, + "learning_rate": 1.9356895928248097e-05, + "loss": 0.5416, + "step": 8450 + }, + { + "epoch": 0.23204283360790776, + "grad_norm": 0.4207307994365692, + "learning_rate": 1.9356743537032294e-05, + "loss": 0.6292, + "step": 8451 + }, + { + "epoch": 0.23207029104887425, + "grad_norm": 0.3035129904747009, + "learning_rate": 1.9356591128363177e-05, + "loss": 0.4341, + "step": 8452 + }, + { + "epoch": 0.23209774848984074, + "grad_norm": 0.3611961603164673, + "learning_rate": 1.935643870224103e-05, + "loss": 0.584, + "step": 8453 + }, + { + "epoch": 0.23212520593080724, + "grad_norm": 0.4158090651035309, + "learning_rate": 1.935628625866614e-05, + "loss": 0.492, + "step": 8454 + }, + { + "epoch": 0.23215266337177376, + "grad_norm": 0.5046626329421997, + "learning_rate": 1.9356133797638793e-05, + "loss": 0.5711, + "step": 8455 + }, + { + "epoch": 0.23218012081274025, + "grad_norm": 0.4193890690803528, + "learning_rate": 1.9355981319159267e-05, + "loss": 0.4891, + "step": 8456 + }, + { + "epoch": 0.23220757825370675, + "grad_norm": 0.33180728554725647, + "learning_rate": 1.935582882322785e-05, + "loss": 0.6084, + "step": 8457 + }, + { + "epoch": 0.23223503569467327, + "grad_norm": 0.40174567699432373, + "learning_rate": 1.9355676309844828e-05, + "loss": 0.6026, + "step": 8458 + }, + { + "epoch": 0.23226249313563976, + "grad_norm": 0.38191354274749756, + "learning_rate": 1.935552377901048e-05, + "loss": 0.55, + "step": 8459 + }, + { + "epoch": 0.23228995057660626, + "grad_norm": 0.3343692123889923, + "learning_rate": 1.93553712307251e-05, + "loss": 0.4343, + "step": 8460 + }, + { + "epoch": 0.23231740801757275, + "grad_norm": 0.36166682839393616, + "learning_rate": 1.9355218664988958e-05, + "loss": 0.5238, + "step": 8461 + }, + { + "epoch": 0.23234486545853927, + "grad_norm": 0.426285058259964, + "learning_rate": 1.9355066081802353e-05, + "loss": 0.5877, + "step": 8462 + }, + { + "epoch": 0.23237232289950577, + "grad_norm": 0.359115332365036, + "learning_rate": 1.9354913481165565e-05, + "loss": 0.5131, + "step": 8463 + }, + { + "epoch": 0.23239978034047226, + "grad_norm": 0.3586997091770172, + "learning_rate": 1.9354760863078875e-05, + "loss": 0.4571, + "step": 8464 + }, + { + "epoch": 0.23242723778143878, + "grad_norm": 0.40124601125717163, + "learning_rate": 1.935460822754257e-05, + "loss": 0.5754, + "step": 8465 + }, + { + "epoch": 0.23245469522240528, + "grad_norm": 0.350619912147522, + "learning_rate": 1.9354455574556942e-05, + "loss": 0.5586, + "step": 8466 + }, + { + "epoch": 0.23248215266337177, + "grad_norm": 0.466869056224823, + "learning_rate": 1.935430290412226e-05, + "loss": 0.6146, + "step": 8467 + }, + { + "epoch": 0.23250961010433827, + "grad_norm": 0.5188923478126526, + "learning_rate": 1.9354150216238823e-05, + "loss": 0.5365, + "step": 8468 + }, + { + "epoch": 0.2325370675453048, + "grad_norm": 0.37030351161956787, + "learning_rate": 1.935399751090691e-05, + "loss": 0.53, + "step": 8469 + }, + { + "epoch": 0.23256452498627128, + "grad_norm": 0.38124024868011475, + "learning_rate": 1.9353844788126805e-05, + "loss": 0.5574, + "step": 8470 + }, + { + "epoch": 0.23259198242723778, + "grad_norm": 0.3539254069328308, + "learning_rate": 1.935369204789879e-05, + "loss": 0.5557, + "step": 8471 + }, + { + "epoch": 0.23261943986820427, + "grad_norm": 0.32976844906806946, + "learning_rate": 1.9353539290223158e-05, + "loss": 0.5195, + "step": 8472 + }, + { + "epoch": 0.2326468973091708, + "grad_norm": 0.36041536927223206, + "learning_rate": 1.9353386515100188e-05, + "loss": 0.5452, + "step": 8473 + }, + { + "epoch": 0.23267435475013729, + "grad_norm": 0.3967084288597107, + "learning_rate": 1.935323372253017e-05, + "loss": 0.5089, + "step": 8474 + }, + { + "epoch": 0.23270181219110378, + "grad_norm": 0.3400379419326782, + "learning_rate": 1.9353080912513383e-05, + "loss": 0.5392, + "step": 8475 + }, + { + "epoch": 0.2327292696320703, + "grad_norm": 0.3672572672367096, + "learning_rate": 1.9352928085050116e-05, + "loss": 0.5357, + "step": 8476 + }, + { + "epoch": 0.2327567270730368, + "grad_norm": 0.3795188069343567, + "learning_rate": 1.9352775240140654e-05, + "loss": 0.5341, + "step": 8477 + }, + { + "epoch": 0.2327841845140033, + "grad_norm": 0.35329607129096985, + "learning_rate": 1.935262237778528e-05, + "loss": 0.4511, + "step": 8478 + }, + { + "epoch": 0.23281164195496978, + "grad_norm": 0.3488442003726959, + "learning_rate": 1.935246949798428e-05, + "loss": 0.498, + "step": 8479 + }, + { + "epoch": 0.2328390993959363, + "grad_norm": 0.36890390515327454, + "learning_rate": 1.935231660073794e-05, + "loss": 0.5839, + "step": 8480 + }, + { + "epoch": 0.2328665568369028, + "grad_norm": 0.38353490829467773, + "learning_rate": 1.9352163686046546e-05, + "loss": 0.5816, + "step": 8481 + }, + { + "epoch": 0.2328940142778693, + "grad_norm": 0.40030544996261597, + "learning_rate": 1.935201075391038e-05, + "loss": 0.5792, + "step": 8482 + }, + { + "epoch": 0.23292147171883582, + "grad_norm": 0.31404563784599304, + "learning_rate": 1.9351857804329726e-05, + "loss": 0.4443, + "step": 8483 + }, + { + "epoch": 0.2329489291598023, + "grad_norm": 0.39186206459999084, + "learning_rate": 1.935170483730488e-05, + "loss": 0.5634, + "step": 8484 + }, + { + "epoch": 0.2329763866007688, + "grad_norm": 0.4767107367515564, + "learning_rate": 1.9351551852836114e-05, + "loss": 0.5729, + "step": 8485 + }, + { + "epoch": 0.2330038440417353, + "grad_norm": 0.40498772263526917, + "learning_rate": 1.9351398850923722e-05, + "loss": 0.4944, + "step": 8486 + }, + { + "epoch": 0.23303130148270182, + "grad_norm": 0.33704230189323425, + "learning_rate": 1.9351245831567984e-05, + "loss": 0.5257, + "step": 8487 + }, + { + "epoch": 0.2330587589236683, + "grad_norm": 0.39260613918304443, + "learning_rate": 1.9351092794769188e-05, + "loss": 0.48, + "step": 8488 + }, + { + "epoch": 0.2330862163646348, + "grad_norm": 0.3843478858470917, + "learning_rate": 1.9350939740527622e-05, + "loss": 0.5323, + "step": 8489 + }, + { + "epoch": 0.23311367380560133, + "grad_norm": 0.40935003757476807, + "learning_rate": 1.9350786668843568e-05, + "loss": 0.6, + "step": 8490 + }, + { + "epoch": 0.23314113124656782, + "grad_norm": 0.31980761885643005, + "learning_rate": 1.9350633579717312e-05, + "loss": 0.4532, + "step": 8491 + }, + { + "epoch": 0.23316858868753432, + "grad_norm": 0.3711133301258087, + "learning_rate": 1.935048047314914e-05, + "loss": 0.5505, + "step": 8492 + }, + { + "epoch": 0.2331960461285008, + "grad_norm": 0.34983018040657043, + "learning_rate": 1.935032734913934e-05, + "loss": 0.5025, + "step": 8493 + }, + { + "epoch": 0.23322350356946733, + "grad_norm": 0.38475221395492554, + "learning_rate": 1.935017420768819e-05, + "loss": 0.4905, + "step": 8494 + }, + { + "epoch": 0.23325096101043383, + "grad_norm": 0.33185693621635437, + "learning_rate": 1.9350021048795983e-05, + "loss": 0.4465, + "step": 8495 + }, + { + "epoch": 0.23327841845140032, + "grad_norm": 0.32868966460227966, + "learning_rate": 1.9349867872463006e-05, + "loss": 0.3982, + "step": 8496 + }, + { + "epoch": 0.23330587589236684, + "grad_norm": 0.35981109738349915, + "learning_rate": 1.9349714678689537e-05, + "loss": 0.5208, + "step": 8497 + }, + { + "epoch": 0.23333333333333334, + "grad_norm": 0.3587247431278229, + "learning_rate": 1.934956146747587e-05, + "loss": 0.4582, + "step": 8498 + }, + { + "epoch": 0.23336079077429983, + "grad_norm": 0.37437930703163147, + "learning_rate": 1.934940823882228e-05, + "loss": 0.5096, + "step": 8499 + }, + { + "epoch": 0.23338824821526633, + "grad_norm": 0.43049880862236023, + "learning_rate": 1.9349254992729064e-05, + "loss": 0.5483, + "step": 8500 + }, + { + "epoch": 0.23341570565623285, + "grad_norm": 0.3522612452507019, + "learning_rate": 1.9349101729196505e-05, + "loss": 0.5217, + "step": 8501 + }, + { + "epoch": 0.23344316309719934, + "grad_norm": 0.3774700164794922, + "learning_rate": 1.934894844822489e-05, + "loss": 0.5569, + "step": 8502 + }, + { + "epoch": 0.23347062053816584, + "grad_norm": 0.45812085270881653, + "learning_rate": 1.9348795149814497e-05, + "loss": 0.5448, + "step": 8503 + }, + { + "epoch": 0.23349807797913236, + "grad_norm": 0.3713299632072449, + "learning_rate": 1.934864183396562e-05, + "loss": 0.5349, + "step": 8504 + }, + { + "epoch": 0.23352553542009885, + "grad_norm": 0.40539610385894775, + "learning_rate": 1.9348488500678536e-05, + "loss": 0.5124, + "step": 8505 + }, + { + "epoch": 0.23355299286106534, + "grad_norm": 0.3698674440383911, + "learning_rate": 1.9348335149953544e-05, + "loss": 0.5315, + "step": 8506 + }, + { + "epoch": 0.23358045030203184, + "grad_norm": 0.3760128617286682, + "learning_rate": 1.934818178179092e-05, + "loss": 0.5914, + "step": 8507 + }, + { + "epoch": 0.23360790774299836, + "grad_norm": 0.3199860751628876, + "learning_rate": 1.9348028396190955e-05, + "loss": 0.4828, + "step": 8508 + }, + { + "epoch": 0.23363536518396485, + "grad_norm": 0.456095814704895, + "learning_rate": 1.9347874993153934e-05, + "loss": 0.4599, + "step": 8509 + }, + { + "epoch": 0.23366282262493135, + "grad_norm": 0.3267587423324585, + "learning_rate": 1.9347721572680143e-05, + "loss": 0.5056, + "step": 8510 + }, + { + "epoch": 0.23369028006589787, + "grad_norm": 0.36335471272468567, + "learning_rate": 1.9347568134769864e-05, + "loss": 0.4779, + "step": 8511 + }, + { + "epoch": 0.23371773750686436, + "grad_norm": 0.34659016132354736, + "learning_rate": 1.934741467942339e-05, + "loss": 0.5216, + "step": 8512 + }, + { + "epoch": 0.23374519494783086, + "grad_norm": 0.3411429524421692, + "learning_rate": 1.9347261206641006e-05, + "loss": 0.4959, + "step": 8513 + }, + { + "epoch": 0.23377265238879735, + "grad_norm": 0.4224499464035034, + "learning_rate": 1.9347107716422993e-05, + "loss": 0.5419, + "step": 8514 + }, + { + "epoch": 0.23380010982976387, + "grad_norm": 0.3756140470504761, + "learning_rate": 1.9346954208769644e-05, + "loss": 0.532, + "step": 8515 + }, + { + "epoch": 0.23382756727073037, + "grad_norm": 0.3705260455608368, + "learning_rate": 1.934680068368124e-05, + "loss": 0.6022, + "step": 8516 + }, + { + "epoch": 0.23385502471169686, + "grad_norm": 0.36000990867614746, + "learning_rate": 1.9346647141158072e-05, + "loss": 0.4101, + "step": 8517 + }, + { + "epoch": 0.23388248215266338, + "grad_norm": 0.3387793302536011, + "learning_rate": 1.934649358120042e-05, + "loss": 0.5209, + "step": 8518 + }, + { + "epoch": 0.23390993959362988, + "grad_norm": 0.3457236588001251, + "learning_rate": 1.9346340003808576e-05, + "loss": 0.5341, + "step": 8519 + }, + { + "epoch": 0.23393739703459637, + "grad_norm": 0.37173834443092346, + "learning_rate": 1.9346186408982824e-05, + "loss": 0.5326, + "step": 8520 + }, + { + "epoch": 0.23396485447556287, + "grad_norm": 0.33690914511680603, + "learning_rate": 1.9346032796723454e-05, + "loss": 0.5025, + "step": 8521 + }, + { + "epoch": 0.2339923119165294, + "grad_norm": 0.36679011583328247, + "learning_rate": 1.934587916703075e-05, + "loss": 0.5139, + "step": 8522 + }, + { + "epoch": 0.23401976935749588, + "grad_norm": 0.3778705298900604, + "learning_rate": 1.9345725519905e-05, + "loss": 0.524, + "step": 8523 + }, + { + "epoch": 0.23404722679846238, + "grad_norm": 0.3421902358531952, + "learning_rate": 1.9345571855346485e-05, + "loss": 0.5231, + "step": 8524 + }, + { + "epoch": 0.2340746842394289, + "grad_norm": 0.34022143483161926, + "learning_rate": 1.9345418173355493e-05, + "loss": 0.4637, + "step": 8525 + }, + { + "epoch": 0.2341021416803954, + "grad_norm": 0.41050609946250916, + "learning_rate": 1.934526447393232e-05, + "loss": 0.6575, + "step": 8526 + }, + { + "epoch": 0.2341295991213619, + "grad_norm": 0.32567453384399414, + "learning_rate": 1.9345110757077245e-05, + "loss": 0.4852, + "step": 8527 + }, + { + "epoch": 0.23415705656232838, + "grad_norm": 0.31403446197509766, + "learning_rate": 1.9344957022790554e-05, + "loss": 0.5109, + "step": 8528 + }, + { + "epoch": 0.2341845140032949, + "grad_norm": 0.3811957836151123, + "learning_rate": 1.9344803271072535e-05, + "loss": 0.5722, + "step": 8529 + }, + { + "epoch": 0.2342119714442614, + "grad_norm": 0.33398640155792236, + "learning_rate": 1.934464950192348e-05, + "loss": 0.4123, + "step": 8530 + }, + { + "epoch": 0.2342394288852279, + "grad_norm": 0.3418843150138855, + "learning_rate": 1.9344495715343665e-05, + "loss": 0.5059, + "step": 8531 + }, + { + "epoch": 0.2342668863261944, + "grad_norm": 0.37118762731552124, + "learning_rate": 1.9344341911333386e-05, + "loss": 0.5615, + "step": 8532 + }, + { + "epoch": 0.2342943437671609, + "grad_norm": 0.32620885968208313, + "learning_rate": 1.9344188089892927e-05, + "loss": 0.5386, + "step": 8533 + }, + { + "epoch": 0.2343218012081274, + "grad_norm": 0.33162638545036316, + "learning_rate": 1.934403425102257e-05, + "loss": 0.5294, + "step": 8534 + }, + { + "epoch": 0.2343492586490939, + "grad_norm": 0.3626667559146881, + "learning_rate": 1.9343880394722613e-05, + "loss": 0.4637, + "step": 8535 + }, + { + "epoch": 0.23437671609006042, + "grad_norm": 0.3506741225719452, + "learning_rate": 1.9343726520993337e-05, + "loss": 0.5251, + "step": 8536 + }, + { + "epoch": 0.2344041735310269, + "grad_norm": 0.39587530493736267, + "learning_rate": 1.9343572629835027e-05, + "loss": 0.5515, + "step": 8537 + }, + { + "epoch": 0.2344316309719934, + "grad_norm": 0.39913907647132874, + "learning_rate": 1.9343418721247973e-05, + "loss": 0.562, + "step": 8538 + }, + { + "epoch": 0.2344590884129599, + "grad_norm": 0.39188653230667114, + "learning_rate": 1.934326479523246e-05, + "loss": 0.5122, + "step": 8539 + }, + { + "epoch": 0.23448654585392642, + "grad_norm": 0.4758952558040619, + "learning_rate": 1.9343110851788775e-05, + "loss": 0.5019, + "step": 8540 + }, + { + "epoch": 0.23451400329489291, + "grad_norm": 0.343119740486145, + "learning_rate": 1.934295689091721e-05, + "loss": 0.5287, + "step": 8541 + }, + { + "epoch": 0.2345414607358594, + "grad_norm": 0.38913148641586304, + "learning_rate": 1.9342802912618044e-05, + "loss": 0.5387, + "step": 8542 + }, + { + "epoch": 0.23456891817682593, + "grad_norm": 0.3456226885318756, + "learning_rate": 1.934264891689157e-05, + "loss": 0.5832, + "step": 8543 + }, + { + "epoch": 0.23459637561779242, + "grad_norm": 0.5934251546859741, + "learning_rate": 1.9342494903738073e-05, + "loss": 0.5993, + "step": 8544 + }, + { + "epoch": 0.23462383305875892, + "grad_norm": 0.3693557381629944, + "learning_rate": 1.9342340873157844e-05, + "loss": 0.4517, + "step": 8545 + }, + { + "epoch": 0.2346512904997254, + "grad_norm": 0.37915730476379395, + "learning_rate": 1.9342186825151167e-05, + "loss": 0.5811, + "step": 8546 + }, + { + "epoch": 0.23467874794069193, + "grad_norm": 0.3659084141254425, + "learning_rate": 1.934203275971833e-05, + "loss": 0.5591, + "step": 8547 + }, + { + "epoch": 0.23470620538165843, + "grad_norm": 0.42994675040245056, + "learning_rate": 1.934187867685962e-05, + "loss": 0.5376, + "step": 8548 + }, + { + "epoch": 0.23473366282262492, + "grad_norm": 0.3818734884262085, + "learning_rate": 1.934172457657532e-05, + "loss": 0.565, + "step": 8549 + }, + { + "epoch": 0.23476112026359144, + "grad_norm": 0.38331976532936096, + "learning_rate": 1.934157045886573e-05, + "loss": 0.5526, + "step": 8550 + }, + { + "epoch": 0.23478857770455794, + "grad_norm": 0.36273208260536194, + "learning_rate": 1.9341416323731125e-05, + "loss": 0.5598, + "step": 8551 + }, + { + "epoch": 0.23481603514552443, + "grad_norm": 0.4888930022716522, + "learning_rate": 1.93412621711718e-05, + "loss": 0.5482, + "step": 8552 + }, + { + "epoch": 0.23484349258649093, + "grad_norm": 0.3193429708480835, + "learning_rate": 1.9341108001188037e-05, + "loss": 0.4724, + "step": 8553 + }, + { + "epoch": 0.23487095002745745, + "grad_norm": 0.3565026819705963, + "learning_rate": 1.9340953813780126e-05, + "loss": 0.4839, + "step": 8554 + }, + { + "epoch": 0.23489840746842394, + "grad_norm": 0.34403276443481445, + "learning_rate": 1.9340799608948354e-05, + "loss": 0.554, + "step": 8555 + }, + { + "epoch": 0.23492586490939044, + "grad_norm": 0.3737739622592926, + "learning_rate": 1.9340645386693013e-05, + "loss": 0.6271, + "step": 8556 + }, + { + "epoch": 0.23495332235035696, + "grad_norm": 0.36897990107536316, + "learning_rate": 1.9340491147014387e-05, + "loss": 0.4926, + "step": 8557 + }, + { + "epoch": 0.23498077979132345, + "grad_norm": 0.3662615418434143, + "learning_rate": 1.934033688991276e-05, + "loss": 0.5011, + "step": 8558 + }, + { + "epoch": 0.23500823723228995, + "grad_norm": 0.4747711420059204, + "learning_rate": 1.934018261538843e-05, + "loss": 0.5553, + "step": 8559 + }, + { + "epoch": 0.23503569467325644, + "grad_norm": 0.34206700325012207, + "learning_rate": 1.934002832344167e-05, + "loss": 0.5311, + "step": 8560 + }, + { + "epoch": 0.23506315211422296, + "grad_norm": 0.33285316824913025, + "learning_rate": 1.9339874014072785e-05, + "loss": 0.5162, + "step": 8561 + }, + { + "epoch": 0.23509060955518946, + "grad_norm": 0.43468138575553894, + "learning_rate": 1.9339719687282045e-05, + "loss": 0.5606, + "step": 8562 + }, + { + "epoch": 0.23511806699615595, + "grad_norm": 0.4893453121185303, + "learning_rate": 1.933956534306975e-05, + "loss": 0.5115, + "step": 8563 + }, + { + "epoch": 0.23514552443712247, + "grad_norm": 0.6238056421279907, + "learning_rate": 1.9339410981436192e-05, + "loss": 0.5326, + "step": 8564 + }, + { + "epoch": 0.23517298187808897, + "grad_norm": 0.3759390413761139, + "learning_rate": 1.9339256602381642e-05, + "loss": 0.5065, + "step": 8565 + }, + { + "epoch": 0.23520043931905546, + "grad_norm": 0.3776780664920807, + "learning_rate": 1.9339102205906403e-05, + "loss": 0.5139, + "step": 8566 + }, + { + "epoch": 0.23522789676002195, + "grad_norm": 0.36807072162628174, + "learning_rate": 1.9338947792010755e-05, + "loss": 0.5033, + "step": 8567 + }, + { + "epoch": 0.23525535420098848, + "grad_norm": 0.39904293417930603, + "learning_rate": 1.933879336069499e-05, + "loss": 0.4902, + "step": 8568 + }, + { + "epoch": 0.23528281164195497, + "grad_norm": 0.3751380741596222, + "learning_rate": 1.9338638911959397e-05, + "loss": 0.5457, + "step": 8569 + }, + { + "epoch": 0.23531026908292146, + "grad_norm": 0.3144533336162567, + "learning_rate": 1.9338484445804256e-05, + "loss": 0.489, + "step": 8570 + }, + { + "epoch": 0.23533772652388799, + "grad_norm": 0.39271610975265503, + "learning_rate": 1.933832996222987e-05, + "loss": 0.5953, + "step": 8571 + }, + { + "epoch": 0.23536518396485448, + "grad_norm": 0.3701516389846802, + "learning_rate": 1.933817546123651e-05, + "loss": 0.6106, + "step": 8572 + }, + { + "epoch": 0.23539264140582097, + "grad_norm": 0.319723516702652, + "learning_rate": 1.9338020942824477e-05, + "loss": 0.4863, + "step": 8573 + }, + { + "epoch": 0.23542009884678747, + "grad_norm": 0.45861396193504333, + "learning_rate": 1.9337866406994053e-05, + "loss": 0.5314, + "step": 8574 + }, + { + "epoch": 0.235447556287754, + "grad_norm": 0.3413717448711395, + "learning_rate": 1.9337711853745527e-05, + "loss": 0.5968, + "step": 8575 + }, + { + "epoch": 0.23547501372872048, + "grad_norm": 0.35395121574401855, + "learning_rate": 1.933755728307919e-05, + "loss": 0.5707, + "step": 8576 + }, + { + "epoch": 0.23550247116968698, + "grad_norm": 0.3424719274044037, + "learning_rate": 1.9337402694995325e-05, + "loss": 0.5367, + "step": 8577 + }, + { + "epoch": 0.2355299286106535, + "grad_norm": 0.34722939133644104, + "learning_rate": 1.9337248089494225e-05, + "loss": 0.4912, + "step": 8578 + }, + { + "epoch": 0.23555738605162, + "grad_norm": 0.36852777004241943, + "learning_rate": 1.933709346657618e-05, + "loss": 0.5822, + "step": 8579 + }, + { + "epoch": 0.2355848434925865, + "grad_norm": 0.36839136481285095, + "learning_rate": 1.9336938826241475e-05, + "loss": 0.5439, + "step": 8580 + }, + { + "epoch": 0.23561230093355298, + "grad_norm": 0.3577512800693512, + "learning_rate": 1.9336784168490395e-05, + "loss": 0.46, + "step": 8581 + }, + { + "epoch": 0.2356397583745195, + "grad_norm": 0.3609701991081238, + "learning_rate": 1.9336629493323237e-05, + "loss": 0.4488, + "step": 8582 + }, + { + "epoch": 0.235667215815486, + "grad_norm": 0.3604409098625183, + "learning_rate": 1.9336474800740285e-05, + "loss": 0.4946, + "step": 8583 + }, + { + "epoch": 0.2356946732564525, + "grad_norm": 0.3667648434638977, + "learning_rate": 1.9336320090741828e-05, + "loss": 0.4684, + "step": 8584 + }, + { + "epoch": 0.235722130697419, + "grad_norm": 0.3735356032848358, + "learning_rate": 1.933616536332815e-05, + "loss": 0.4953, + "step": 8585 + }, + { + "epoch": 0.2357495881383855, + "grad_norm": 0.4693150222301483, + "learning_rate": 1.9336010618499545e-05, + "loss": 0.6141, + "step": 8586 + }, + { + "epoch": 0.235777045579352, + "grad_norm": 0.3749711513519287, + "learning_rate": 1.9335855856256302e-05, + "loss": 0.5125, + "step": 8587 + }, + { + "epoch": 0.2358045030203185, + "grad_norm": 0.4062865078449249, + "learning_rate": 1.933570107659871e-05, + "loss": 0.5863, + "step": 8588 + }, + { + "epoch": 0.23583196046128502, + "grad_norm": 0.3656039237976074, + "learning_rate": 1.9335546279527053e-05, + "loss": 0.5084, + "step": 8589 + }, + { + "epoch": 0.2358594179022515, + "grad_norm": 0.430649071931839, + "learning_rate": 1.9335391465041623e-05, + "loss": 0.5555, + "step": 8590 + }, + { + "epoch": 0.235886875343218, + "grad_norm": 0.37617939710617065, + "learning_rate": 1.9335236633142707e-05, + "loss": 0.5231, + "step": 8591 + }, + { + "epoch": 0.23591433278418453, + "grad_norm": 0.381004273891449, + "learning_rate": 1.93350817838306e-05, + "loss": 0.5093, + "step": 8592 + }, + { + "epoch": 0.23594179022515102, + "grad_norm": 0.41566330194473267, + "learning_rate": 1.9334926917105582e-05, + "loss": 0.5188, + "step": 8593 + }, + { + "epoch": 0.23596924766611752, + "grad_norm": 0.34515780210494995, + "learning_rate": 1.933477203296795e-05, + "loss": 0.4878, + "step": 8594 + }, + { + "epoch": 0.235996705107084, + "grad_norm": 0.3470410704612732, + "learning_rate": 1.9334617131417984e-05, + "loss": 0.562, + "step": 8595 + }, + { + "epoch": 0.23602416254805053, + "grad_norm": 0.38418757915496826, + "learning_rate": 1.933446221245598e-05, + "loss": 0.478, + "step": 8596 + }, + { + "epoch": 0.23605161998901703, + "grad_norm": 0.36442896723747253, + "learning_rate": 1.9334307276082225e-05, + "loss": 0.4602, + "step": 8597 + }, + { + "epoch": 0.23607907742998352, + "grad_norm": 0.3872376084327698, + "learning_rate": 1.9334152322297006e-05, + "loss": 0.5611, + "step": 8598 + }, + { + "epoch": 0.23610653487095004, + "grad_norm": 0.360520601272583, + "learning_rate": 1.933399735110062e-05, + "loss": 0.5742, + "step": 8599 + }, + { + "epoch": 0.23613399231191654, + "grad_norm": 0.36925315856933594, + "learning_rate": 1.9333842362493343e-05, + "loss": 0.4882, + "step": 8600 + }, + { + "epoch": 0.23616144975288303, + "grad_norm": 0.38667699694633484, + "learning_rate": 1.933368735647547e-05, + "loss": 0.4368, + "step": 8601 + }, + { + "epoch": 0.23618890719384952, + "grad_norm": 0.32521966099739075, + "learning_rate": 1.9333532333047295e-05, + "loss": 0.4535, + "step": 8602 + }, + { + "epoch": 0.23621636463481605, + "grad_norm": 0.3459606468677521, + "learning_rate": 1.9333377292209105e-05, + "loss": 0.5399, + "step": 8603 + }, + { + "epoch": 0.23624382207578254, + "grad_norm": 0.3724338710308075, + "learning_rate": 1.9333222233961183e-05, + "loss": 0.4513, + "step": 8604 + }, + { + "epoch": 0.23627127951674903, + "grad_norm": 0.38877207040786743, + "learning_rate": 1.9333067158303823e-05, + "loss": 0.513, + "step": 8605 + }, + { + "epoch": 0.23629873695771553, + "grad_norm": 0.36853349208831787, + "learning_rate": 1.9332912065237318e-05, + "loss": 0.6043, + "step": 8606 + }, + { + "epoch": 0.23632619439868205, + "grad_norm": 0.3518364131450653, + "learning_rate": 1.933275695476195e-05, + "loss": 0.5441, + "step": 8607 + }, + { + "epoch": 0.23635365183964854, + "grad_norm": 0.49680325388908386, + "learning_rate": 1.9332601826878013e-05, + "loss": 0.5386, + "step": 8608 + }, + { + "epoch": 0.23638110928061504, + "grad_norm": 0.4230257570743561, + "learning_rate": 1.9332446681585794e-05, + "loss": 0.5936, + "step": 8609 + }, + { + "epoch": 0.23640856672158156, + "grad_norm": 0.34685561060905457, + "learning_rate": 1.9332291518885583e-05, + "loss": 0.5675, + "step": 8610 + }, + { + "epoch": 0.23643602416254805, + "grad_norm": 0.355356365442276, + "learning_rate": 1.933213633877767e-05, + "loss": 0.5668, + "step": 8611 + }, + { + "epoch": 0.23646348160351455, + "grad_norm": 0.3643311858177185, + "learning_rate": 1.933198114126235e-05, + "loss": 0.5343, + "step": 8612 + }, + { + "epoch": 0.23649093904448104, + "grad_norm": 0.4126100242137909, + "learning_rate": 1.93318259263399e-05, + "loss": 0.6064, + "step": 8613 + }, + { + "epoch": 0.23651839648544756, + "grad_norm": 0.3455587327480316, + "learning_rate": 1.9331670694010616e-05, + "loss": 0.4876, + "step": 8614 + }, + { + "epoch": 0.23654585392641406, + "grad_norm": 0.36429563164711, + "learning_rate": 1.933151544427479e-05, + "loss": 0.4638, + "step": 8615 + }, + { + "epoch": 0.23657331136738055, + "grad_norm": 0.37373897433280945, + "learning_rate": 1.933136017713271e-05, + "loss": 0.4388, + "step": 8616 + }, + { + "epoch": 0.23660076880834707, + "grad_norm": 0.35950934886932373, + "learning_rate": 1.9331204892584665e-05, + "loss": 0.5498, + "step": 8617 + }, + { + "epoch": 0.23662822624931357, + "grad_norm": 0.4094233214855194, + "learning_rate": 1.933104959063094e-05, + "loss": 0.5879, + "step": 8618 + }, + { + "epoch": 0.23665568369028006, + "grad_norm": 0.3661895990371704, + "learning_rate": 1.9330894271271834e-05, + "loss": 0.6273, + "step": 8619 + }, + { + "epoch": 0.23668314113124655, + "grad_norm": 0.4045199155807495, + "learning_rate": 1.933073893450763e-05, + "loss": 0.561, + "step": 8620 + }, + { + "epoch": 0.23671059857221308, + "grad_norm": 0.41713860630989075, + "learning_rate": 1.9330583580338624e-05, + "loss": 0.5908, + "step": 8621 + }, + { + "epoch": 0.23673805601317957, + "grad_norm": 0.3813289403915405, + "learning_rate": 1.93304282087651e-05, + "loss": 0.5269, + "step": 8622 + }, + { + "epoch": 0.23676551345414606, + "grad_norm": 0.3825836181640625, + "learning_rate": 1.9330272819787347e-05, + "loss": 0.4669, + "step": 8623 + }, + { + "epoch": 0.2367929708951126, + "grad_norm": 1.2098512649536133, + "learning_rate": 1.933011741340566e-05, + "loss": 0.4349, + "step": 8624 + }, + { + "epoch": 0.23682042833607908, + "grad_norm": 0.3369964361190796, + "learning_rate": 1.9329961989620325e-05, + "loss": 0.5635, + "step": 8625 + }, + { + "epoch": 0.23684788577704557, + "grad_norm": 0.3384723365306854, + "learning_rate": 1.932980654843163e-05, + "loss": 0.445, + "step": 8626 + }, + { + "epoch": 0.23687534321801207, + "grad_norm": 0.418923944234848, + "learning_rate": 1.932965108983987e-05, + "loss": 0.5836, + "step": 8627 + }, + { + "epoch": 0.2369028006589786, + "grad_norm": 0.30650240182876587, + "learning_rate": 1.9329495613845337e-05, + "loss": 0.4898, + "step": 8628 + }, + { + "epoch": 0.23693025809994508, + "grad_norm": 0.4277849793434143, + "learning_rate": 1.9329340120448313e-05, + "loss": 0.576, + "step": 8629 + }, + { + "epoch": 0.23695771554091158, + "grad_norm": 0.3599676787853241, + "learning_rate": 1.9329184609649095e-05, + "loss": 0.5354, + "step": 8630 + }, + { + "epoch": 0.2369851729818781, + "grad_norm": 0.40852537751197815, + "learning_rate": 1.9329029081447968e-05, + "loss": 0.6004, + "step": 8631 + }, + { + "epoch": 0.2370126304228446, + "grad_norm": 0.3445485234260559, + "learning_rate": 1.9328873535845223e-05, + "loss": 0.559, + "step": 8632 + }, + { + "epoch": 0.2370400878638111, + "grad_norm": 0.35131844878196716, + "learning_rate": 1.9328717972841155e-05, + "loss": 0.4857, + "step": 8633 + }, + { + "epoch": 0.23706754530477758, + "grad_norm": 0.39153027534484863, + "learning_rate": 1.932856239243605e-05, + "loss": 0.5592, + "step": 8634 + }, + { + "epoch": 0.2370950027457441, + "grad_norm": 0.37044039368629456, + "learning_rate": 1.9328406794630196e-05, + "loss": 0.4759, + "step": 8635 + }, + { + "epoch": 0.2371224601867106, + "grad_norm": 0.36120831966400146, + "learning_rate": 1.9328251179423888e-05, + "loss": 0.5587, + "step": 8636 + }, + { + "epoch": 0.2371499176276771, + "grad_norm": 0.41824790835380554, + "learning_rate": 1.9328095546817415e-05, + "loss": 0.4619, + "step": 8637 + }, + { + "epoch": 0.23717737506864361, + "grad_norm": 0.47333577275276184, + "learning_rate": 1.9327939896811063e-05, + "loss": 0.6522, + "step": 8638 + }, + { + "epoch": 0.2372048325096101, + "grad_norm": 0.36484938859939575, + "learning_rate": 1.932778422940513e-05, + "loss": 0.5696, + "step": 8639 + }, + { + "epoch": 0.2372322899505766, + "grad_norm": 0.3691408634185791, + "learning_rate": 1.93276285445999e-05, + "loss": 0.5048, + "step": 8640 + }, + { + "epoch": 0.2372597473915431, + "grad_norm": 0.38754594326019287, + "learning_rate": 1.9327472842395666e-05, + "loss": 0.5647, + "step": 8641 + }, + { + "epoch": 0.23728720483250962, + "grad_norm": 0.33950772881507874, + "learning_rate": 1.932731712279272e-05, + "loss": 0.4531, + "step": 8642 + }, + { + "epoch": 0.2373146622734761, + "grad_norm": 0.3555997908115387, + "learning_rate": 1.9327161385791348e-05, + "loss": 0.5162, + "step": 8643 + }, + { + "epoch": 0.2373421197144426, + "grad_norm": 0.34732234477996826, + "learning_rate": 1.9327005631391844e-05, + "loss": 0.4566, + "step": 8644 + }, + { + "epoch": 0.23736957715540913, + "grad_norm": 0.3446820378303528, + "learning_rate": 1.93268498595945e-05, + "loss": 0.4989, + "step": 8645 + }, + { + "epoch": 0.23739703459637562, + "grad_norm": 0.3615168035030365, + "learning_rate": 1.93266940703996e-05, + "loss": 0.5684, + "step": 8646 + }, + { + "epoch": 0.23742449203734212, + "grad_norm": 0.3305380642414093, + "learning_rate": 1.9326538263807442e-05, + "loss": 0.5011, + "step": 8647 + }, + { + "epoch": 0.2374519494783086, + "grad_norm": 0.3770204484462738, + "learning_rate": 1.9326382439818313e-05, + "loss": 0.524, + "step": 8648 + }, + { + "epoch": 0.23747940691927513, + "grad_norm": 0.344301700592041, + "learning_rate": 1.9326226598432503e-05, + "loss": 0.5387, + "step": 8649 + }, + { + "epoch": 0.23750686436024163, + "grad_norm": 0.3580450415611267, + "learning_rate": 1.9326070739650307e-05, + "loss": 0.4837, + "step": 8650 + }, + { + "epoch": 0.23753432180120812, + "grad_norm": 0.3390201926231384, + "learning_rate": 1.932591486347201e-05, + "loss": 0.4695, + "step": 8651 + }, + { + "epoch": 0.23756177924217464, + "grad_norm": 0.38818982243537903, + "learning_rate": 1.9325758969897904e-05, + "loss": 0.551, + "step": 8652 + }, + { + "epoch": 0.23758923668314114, + "grad_norm": 0.39187362790107727, + "learning_rate": 1.932560305892828e-05, + "loss": 0.5617, + "step": 8653 + }, + { + "epoch": 0.23761669412410763, + "grad_norm": 0.7717456221580505, + "learning_rate": 1.9325447130563435e-05, + "loss": 0.5353, + "step": 8654 + }, + { + "epoch": 0.23764415156507412, + "grad_norm": 0.3545801341533661, + "learning_rate": 1.932529118480365e-05, + "loss": 0.5838, + "step": 8655 + }, + { + "epoch": 0.23767160900604065, + "grad_norm": 0.3680892288684845, + "learning_rate": 1.932513522164922e-05, + "loss": 0.5548, + "step": 8656 + }, + { + "epoch": 0.23769906644700714, + "grad_norm": 0.546219527721405, + "learning_rate": 1.9324979241100442e-05, + "loss": 0.5219, + "step": 8657 + }, + { + "epoch": 0.23772652388797363, + "grad_norm": 0.3776657283306122, + "learning_rate": 1.93248232431576e-05, + "loss": 0.4143, + "step": 8658 + }, + { + "epoch": 0.23775398132894016, + "grad_norm": 0.3670983612537384, + "learning_rate": 1.9324667227820985e-05, + "loss": 0.4836, + "step": 8659 + }, + { + "epoch": 0.23778143876990665, + "grad_norm": 0.3388442397117615, + "learning_rate": 1.932451119509089e-05, + "loss": 0.4669, + "step": 8660 + }, + { + "epoch": 0.23780889621087314, + "grad_norm": 0.5087418556213379, + "learning_rate": 1.9324355144967606e-05, + "loss": 0.5226, + "step": 8661 + }, + { + "epoch": 0.23783635365183964, + "grad_norm": 0.3978942632675171, + "learning_rate": 1.9324199077451423e-05, + "loss": 0.6166, + "step": 8662 + }, + { + "epoch": 0.23786381109280616, + "grad_norm": 0.3360753059387207, + "learning_rate": 1.9324042992542635e-05, + "loss": 0.4061, + "step": 8663 + }, + { + "epoch": 0.23789126853377265, + "grad_norm": 0.3519030213356018, + "learning_rate": 1.9323886890241526e-05, + "loss": 0.5506, + "step": 8664 + }, + { + "epoch": 0.23791872597473915, + "grad_norm": 0.43568044900894165, + "learning_rate": 1.9323730770548398e-05, + "loss": 0.4782, + "step": 8665 + }, + { + "epoch": 0.23794618341570567, + "grad_norm": 0.3485465943813324, + "learning_rate": 1.9323574633463532e-05, + "loss": 0.6398, + "step": 8666 + }, + { + "epoch": 0.23797364085667216, + "grad_norm": 0.37835636734962463, + "learning_rate": 1.9323418478987226e-05, + "loss": 0.5909, + "step": 8667 + }, + { + "epoch": 0.23800109829763866, + "grad_norm": 0.3837113380432129, + "learning_rate": 1.9323262307119765e-05, + "loss": 0.5251, + "step": 8668 + }, + { + "epoch": 0.23802855573860515, + "grad_norm": 0.3711813986301422, + "learning_rate": 1.9323106117861448e-05, + "loss": 0.5265, + "step": 8669 + }, + { + "epoch": 0.23805601317957167, + "grad_norm": 0.4221333861351013, + "learning_rate": 1.932294991121256e-05, + "loss": 0.5585, + "step": 8670 + }, + { + "epoch": 0.23808347062053817, + "grad_norm": 0.3598327338695526, + "learning_rate": 1.9322793687173397e-05, + "loss": 0.5587, + "step": 8671 + }, + { + "epoch": 0.23811092806150466, + "grad_norm": 0.36905649304389954, + "learning_rate": 1.932263744574425e-05, + "loss": 0.5679, + "step": 8672 + }, + { + "epoch": 0.23813838550247116, + "grad_norm": 0.4212762117385864, + "learning_rate": 1.9322481186925404e-05, + "loss": 0.6078, + "step": 8673 + }, + { + "epoch": 0.23816584294343768, + "grad_norm": 0.3412187993526459, + "learning_rate": 1.932232491071716e-05, + "loss": 0.5184, + "step": 8674 + }, + { + "epoch": 0.23819330038440417, + "grad_norm": 0.6421278119087219, + "learning_rate": 1.93221686171198e-05, + "loss": 0.5635, + "step": 8675 + }, + { + "epoch": 0.23822075782537067, + "grad_norm": 0.38099056482315063, + "learning_rate": 1.9322012306133623e-05, + "loss": 0.6067, + "step": 8676 + }, + { + "epoch": 0.2382482152663372, + "grad_norm": 0.39919885993003845, + "learning_rate": 1.9321855977758916e-05, + "loss": 0.4628, + "step": 8677 + }, + { + "epoch": 0.23827567270730368, + "grad_norm": 0.3473449647426605, + "learning_rate": 1.9321699631995974e-05, + "loss": 0.4701, + "step": 8678 + }, + { + "epoch": 0.23830313014827018, + "grad_norm": 0.48190218210220337, + "learning_rate": 1.9321543268845085e-05, + "loss": 0.5611, + "step": 8679 + }, + { + "epoch": 0.23833058758923667, + "grad_norm": 0.3256576657295227, + "learning_rate": 1.9321386888306545e-05, + "loss": 0.5129, + "step": 8680 + }, + { + "epoch": 0.2383580450302032, + "grad_norm": 0.3790212571620941, + "learning_rate": 1.9321230490380644e-05, + "loss": 0.5258, + "step": 8681 + }, + { + "epoch": 0.23838550247116969, + "grad_norm": 0.37949302792549133, + "learning_rate": 1.932107407506767e-05, + "loss": 0.5427, + "step": 8682 + }, + { + "epoch": 0.23841295991213618, + "grad_norm": 0.35003527998924255, + "learning_rate": 1.932091764236792e-05, + "loss": 0.4264, + "step": 8683 + }, + { + "epoch": 0.2384404173531027, + "grad_norm": 0.3364484906196594, + "learning_rate": 1.9320761192281686e-05, + "loss": 0.5557, + "step": 8684 + }, + { + "epoch": 0.2384678747940692, + "grad_norm": 0.3961993157863617, + "learning_rate": 1.9320604724809254e-05, + "loss": 0.5091, + "step": 8685 + }, + { + "epoch": 0.2384953322350357, + "grad_norm": 0.4192810356616974, + "learning_rate": 1.9320448239950922e-05, + "loss": 0.5523, + "step": 8686 + }, + { + "epoch": 0.23852278967600218, + "grad_norm": 0.3533271551132202, + "learning_rate": 1.9320291737706975e-05, + "loss": 0.5796, + "step": 8687 + }, + { + "epoch": 0.2385502471169687, + "grad_norm": 0.31446099281311035, + "learning_rate": 1.9320135218077713e-05, + "loss": 0.5019, + "step": 8688 + }, + { + "epoch": 0.2385777045579352, + "grad_norm": 0.36499038338661194, + "learning_rate": 1.9319978681063426e-05, + "loss": 0.4543, + "step": 8689 + }, + { + "epoch": 0.2386051619989017, + "grad_norm": 0.3847835063934326, + "learning_rate": 1.9319822126664397e-05, + "loss": 0.5299, + "step": 8690 + }, + { + "epoch": 0.23863261943986822, + "grad_norm": 0.30964967608451843, + "learning_rate": 1.931966555488093e-05, + "loss": 0.4225, + "step": 8691 + }, + { + "epoch": 0.2386600768808347, + "grad_norm": 0.3615293800830841, + "learning_rate": 1.931950896571331e-05, + "loss": 0.512, + "step": 8692 + }, + { + "epoch": 0.2386875343218012, + "grad_norm": 0.4327225685119629, + "learning_rate": 1.9319352359161836e-05, + "loss": 0.5558, + "step": 8693 + }, + { + "epoch": 0.2387149917627677, + "grad_norm": 0.39535006880760193, + "learning_rate": 1.9319195735226793e-05, + "loss": 0.595, + "step": 8694 + }, + { + "epoch": 0.23874244920373422, + "grad_norm": 0.33427637815475464, + "learning_rate": 1.9319039093908475e-05, + "loss": 0.4256, + "step": 8695 + }, + { + "epoch": 0.2387699066447007, + "grad_norm": 0.37088125944137573, + "learning_rate": 1.9318882435207175e-05, + "loss": 0.5316, + "step": 8696 + }, + { + "epoch": 0.2387973640856672, + "grad_norm": 0.3383795917034149, + "learning_rate": 1.9318725759123186e-05, + "loss": 0.4943, + "step": 8697 + }, + { + "epoch": 0.23882482152663373, + "grad_norm": 0.34637120366096497, + "learning_rate": 1.9318569065656797e-05, + "loss": 0.4782, + "step": 8698 + }, + { + "epoch": 0.23885227896760022, + "grad_norm": 0.36923933029174805, + "learning_rate": 1.9318412354808306e-05, + "loss": 0.485, + "step": 8699 + }, + { + "epoch": 0.23887973640856672, + "grad_norm": 0.37204357981681824, + "learning_rate": 1.9318255626577997e-05, + "loss": 0.5142, + "step": 8700 + }, + { + "epoch": 0.2389071938495332, + "grad_norm": 0.3451897203922272, + "learning_rate": 1.9318098880966173e-05, + "loss": 0.535, + "step": 8701 + }, + { + "epoch": 0.23893465129049973, + "grad_norm": 0.3818557858467102, + "learning_rate": 1.9317942117973114e-05, + "loss": 0.5331, + "step": 8702 + }, + { + "epoch": 0.23896210873146623, + "grad_norm": 0.4101245105266571, + "learning_rate": 1.9317785337599125e-05, + "loss": 0.4711, + "step": 8703 + }, + { + "epoch": 0.23898956617243272, + "grad_norm": 0.36616313457489014, + "learning_rate": 1.9317628539844488e-05, + "loss": 0.5482, + "step": 8704 + }, + { + "epoch": 0.23901702361339924, + "grad_norm": 0.5665686726570129, + "learning_rate": 1.93174717247095e-05, + "loss": 0.4904, + "step": 8705 + }, + { + "epoch": 0.23904448105436574, + "grad_norm": 0.4397428631782532, + "learning_rate": 1.9317314892194458e-05, + "loss": 0.576, + "step": 8706 + }, + { + "epoch": 0.23907193849533223, + "grad_norm": 0.3776445984840393, + "learning_rate": 1.9317158042299647e-05, + "loss": 0.5142, + "step": 8707 + }, + { + "epoch": 0.23909939593629873, + "grad_norm": 0.4118364751338959, + "learning_rate": 1.9317001175025363e-05, + "loss": 0.5683, + "step": 8708 + }, + { + "epoch": 0.23912685337726525, + "grad_norm": 0.382700115442276, + "learning_rate": 1.9316844290371898e-05, + "loss": 0.5419, + "step": 8709 + }, + { + "epoch": 0.23915431081823174, + "grad_norm": 0.3689383864402771, + "learning_rate": 1.9316687388339543e-05, + "loss": 0.5332, + "step": 8710 + }, + { + "epoch": 0.23918176825919824, + "grad_norm": 0.3800449073314667, + "learning_rate": 1.9316530468928596e-05, + "loss": 0.5174, + "step": 8711 + }, + { + "epoch": 0.23920922570016476, + "grad_norm": 0.365041583776474, + "learning_rate": 1.9316373532139345e-05, + "loss": 0.4734, + "step": 8712 + }, + { + "epoch": 0.23923668314113125, + "grad_norm": 0.32409483194351196, + "learning_rate": 1.931621657797208e-05, + "loss": 0.5256, + "step": 8713 + }, + { + "epoch": 0.23926414058209775, + "grad_norm": 0.3906497061252594, + "learning_rate": 1.9316059606427103e-05, + "loss": 0.5504, + "step": 8714 + }, + { + "epoch": 0.23929159802306424, + "grad_norm": 0.3529214560985565, + "learning_rate": 1.93159026175047e-05, + "loss": 0.5371, + "step": 8715 + }, + { + "epoch": 0.23931905546403076, + "grad_norm": 0.37346965074539185, + "learning_rate": 1.9315745611205167e-05, + "loss": 0.5235, + "step": 8716 + }, + { + "epoch": 0.23934651290499726, + "grad_norm": 0.3317355513572693, + "learning_rate": 1.931558858752879e-05, + "loss": 0.4945, + "step": 8717 + }, + { + "epoch": 0.23937397034596375, + "grad_norm": 0.34470245242118835, + "learning_rate": 1.9315431546475872e-05, + "loss": 0.4978, + "step": 8718 + }, + { + "epoch": 0.23940142778693027, + "grad_norm": 0.39471694827079773, + "learning_rate": 1.93152744880467e-05, + "loss": 0.5327, + "step": 8719 + }, + { + "epoch": 0.23942888522789676, + "grad_norm": 0.3433377146720886, + "learning_rate": 1.9315117412241568e-05, + "loss": 0.5221, + "step": 8720 + }, + { + "epoch": 0.23945634266886326, + "grad_norm": 0.39297083020210266, + "learning_rate": 1.9314960319060768e-05, + "loss": 0.52, + "step": 8721 + }, + { + "epoch": 0.23948380010982975, + "grad_norm": 0.3467468321323395, + "learning_rate": 1.9314803208504592e-05, + "loss": 0.4845, + "step": 8722 + }, + { + "epoch": 0.23951125755079627, + "grad_norm": 0.4292147159576416, + "learning_rate": 1.9314646080573337e-05, + "loss": 0.5611, + "step": 8723 + }, + { + "epoch": 0.23953871499176277, + "grad_norm": 0.3624248802661896, + "learning_rate": 1.93144889352673e-05, + "loss": 0.5289, + "step": 8724 + }, + { + "epoch": 0.23956617243272926, + "grad_norm": 0.3910753130912781, + "learning_rate": 1.9314331772586758e-05, + "loss": 0.5386, + "step": 8725 + }, + { + "epoch": 0.23959362987369578, + "grad_norm": 0.33419308066368103, + "learning_rate": 1.931417459253202e-05, + "loss": 0.522, + "step": 8726 + }, + { + "epoch": 0.23962108731466228, + "grad_norm": 0.37947797775268555, + "learning_rate": 1.931401739510337e-05, + "loss": 0.4804, + "step": 8727 + }, + { + "epoch": 0.23964854475562877, + "grad_norm": 0.3607715964317322, + "learning_rate": 1.9313860180301112e-05, + "loss": 0.5428, + "step": 8728 + }, + { + "epoch": 0.23967600219659527, + "grad_norm": 0.33891913294792175, + "learning_rate": 1.9313702948125526e-05, + "loss": 0.5156, + "step": 8729 + }, + { + "epoch": 0.2397034596375618, + "grad_norm": 0.4001785218715668, + "learning_rate": 1.9313545698576912e-05, + "loss": 0.5244, + "step": 8730 + }, + { + "epoch": 0.23973091707852828, + "grad_norm": 0.4216938018798828, + "learning_rate": 1.9313388431655563e-05, + "loss": 0.5651, + "step": 8731 + }, + { + "epoch": 0.23975837451949478, + "grad_norm": 0.35378292202949524, + "learning_rate": 1.9313231147361776e-05, + "loss": 0.569, + "step": 8732 + }, + { + "epoch": 0.2397858319604613, + "grad_norm": 0.3383517861366272, + "learning_rate": 1.9313073845695837e-05, + "loss": 0.4619, + "step": 8733 + }, + { + "epoch": 0.2398132894014278, + "grad_norm": 0.3643558621406555, + "learning_rate": 1.9312916526658042e-05, + "loss": 0.5067, + "step": 8734 + }, + { + "epoch": 0.2398407468423943, + "grad_norm": 0.33568063378334045, + "learning_rate": 1.9312759190248686e-05, + "loss": 0.445, + "step": 8735 + }, + { + "epoch": 0.23986820428336078, + "grad_norm": 0.3454386293888092, + "learning_rate": 1.9312601836468063e-05, + "loss": 0.5796, + "step": 8736 + }, + { + "epoch": 0.2398956617243273, + "grad_norm": 0.36241310834884644, + "learning_rate": 1.9312444465316467e-05, + "loss": 0.5299, + "step": 8737 + }, + { + "epoch": 0.2399231191652938, + "grad_norm": 0.4499903619289398, + "learning_rate": 1.9312287076794188e-05, + "loss": 0.6538, + "step": 8738 + }, + { + "epoch": 0.2399505766062603, + "grad_norm": 0.44785594940185547, + "learning_rate": 1.931212967090152e-05, + "loss": 0.5807, + "step": 8739 + }, + { + "epoch": 0.23997803404722678, + "grad_norm": 0.5055844187736511, + "learning_rate": 1.931197224763876e-05, + "loss": 0.4739, + "step": 8740 + }, + { + "epoch": 0.2400054914881933, + "grad_norm": 0.3270832598209381, + "learning_rate": 1.93118148070062e-05, + "loss": 0.5022, + "step": 8741 + }, + { + "epoch": 0.2400329489291598, + "grad_norm": 0.35608938336372375, + "learning_rate": 1.9311657349004133e-05, + "loss": 0.5874, + "step": 8742 + }, + { + "epoch": 0.2400604063701263, + "grad_norm": 0.3691215217113495, + "learning_rate": 1.9311499873632852e-05, + "loss": 0.5293, + "step": 8743 + }, + { + "epoch": 0.24008786381109282, + "grad_norm": 0.49517306685447693, + "learning_rate": 1.931134238089265e-05, + "loss": 0.5066, + "step": 8744 + }, + { + "epoch": 0.2401153212520593, + "grad_norm": 0.3925166428089142, + "learning_rate": 1.9311184870783828e-05, + "loss": 0.5097, + "step": 8745 + }, + { + "epoch": 0.2401427786930258, + "grad_norm": 0.3840397298336029, + "learning_rate": 1.931102734330667e-05, + "loss": 0.5847, + "step": 8746 + }, + { + "epoch": 0.2401702361339923, + "grad_norm": 0.31489264965057373, + "learning_rate": 1.9310869798461476e-05, + "loss": 0.4619, + "step": 8747 + }, + { + "epoch": 0.24019769357495882, + "grad_norm": 0.376049667596817, + "learning_rate": 1.9310712236248535e-05, + "loss": 0.478, + "step": 8748 + }, + { + "epoch": 0.24022515101592531, + "grad_norm": 0.3488622307777405, + "learning_rate": 1.931055465666815e-05, + "loss": 0.5954, + "step": 8749 + }, + { + "epoch": 0.2402526084568918, + "grad_norm": 0.35344359278678894, + "learning_rate": 1.9310397059720606e-05, + "loss": 0.5639, + "step": 8750 + }, + { + "epoch": 0.24028006589785833, + "grad_norm": 0.4201376140117645, + "learning_rate": 1.93102394454062e-05, + "loss": 0.5325, + "step": 8751 + }, + { + "epoch": 0.24030752333882482, + "grad_norm": 0.3509422242641449, + "learning_rate": 1.9310081813725225e-05, + "loss": 0.5505, + "step": 8752 + }, + { + "epoch": 0.24033498077979132, + "grad_norm": 0.3810165226459503, + "learning_rate": 1.930992416467798e-05, + "loss": 0.4481, + "step": 8753 + }, + { + "epoch": 0.2403624382207578, + "grad_norm": 0.42471763491630554, + "learning_rate": 1.930976649826475e-05, + "loss": 0.6068, + "step": 8754 + }, + { + "epoch": 0.24038989566172433, + "grad_norm": 0.388739675283432, + "learning_rate": 1.9309608814485836e-05, + "loss": 0.5523, + "step": 8755 + }, + { + "epoch": 0.24041735310269083, + "grad_norm": 0.352566659450531, + "learning_rate": 1.930945111334153e-05, + "loss": 0.5423, + "step": 8756 + }, + { + "epoch": 0.24044481054365732, + "grad_norm": 0.4109717905521393, + "learning_rate": 1.9309293394832125e-05, + "loss": 0.5499, + "step": 8757 + }, + { + "epoch": 0.24047226798462384, + "grad_norm": 1.2370877265930176, + "learning_rate": 1.930913565895792e-05, + "loss": 0.594, + "step": 8758 + }, + { + "epoch": 0.24049972542559034, + "grad_norm": 0.4043298363685608, + "learning_rate": 1.9308977905719203e-05, + "loss": 0.604, + "step": 8759 + }, + { + "epoch": 0.24052718286655683, + "grad_norm": 0.3620893657207489, + "learning_rate": 1.9308820135116275e-05, + "loss": 0.517, + "step": 8760 + }, + { + "epoch": 0.24055464030752333, + "grad_norm": 0.38919132947921753, + "learning_rate": 1.9308662347149423e-05, + "loss": 0.568, + "step": 8761 + }, + { + "epoch": 0.24058209774848985, + "grad_norm": 0.3775987923145294, + "learning_rate": 1.9308504541818944e-05, + "loss": 0.5283, + "step": 8762 + }, + { + "epoch": 0.24060955518945634, + "grad_norm": 0.3933328688144684, + "learning_rate": 1.9308346719125136e-05, + "loss": 0.5502, + "step": 8763 + }, + { + "epoch": 0.24063701263042284, + "grad_norm": 0.3943025469779968, + "learning_rate": 1.930818887906829e-05, + "loss": 0.4934, + "step": 8764 + }, + { + "epoch": 0.24066447007138936, + "grad_norm": 0.32819873094558716, + "learning_rate": 1.93080310216487e-05, + "loss": 0.487, + "step": 8765 + }, + { + "epoch": 0.24069192751235585, + "grad_norm": 0.3633822500705719, + "learning_rate": 1.930787314686666e-05, + "loss": 0.5238, + "step": 8766 + }, + { + "epoch": 0.24071938495332235, + "grad_norm": 0.45222803950309753, + "learning_rate": 1.9307715254722468e-05, + "loss": 0.6686, + "step": 8767 + }, + { + "epoch": 0.24074684239428884, + "grad_norm": 0.3346841633319855, + "learning_rate": 1.9307557345216413e-05, + "loss": 0.5024, + "step": 8768 + }, + { + "epoch": 0.24077429983525536, + "grad_norm": 0.37977755069732666, + "learning_rate": 1.9307399418348798e-05, + "loss": 0.4808, + "step": 8769 + }, + { + "epoch": 0.24080175727622186, + "grad_norm": 0.374072402715683, + "learning_rate": 1.930724147411991e-05, + "loss": 0.447, + "step": 8770 + }, + { + "epoch": 0.24082921471718835, + "grad_norm": 0.3875643014907837, + "learning_rate": 1.930708351253005e-05, + "loss": 0.6378, + "step": 8771 + }, + { + "epoch": 0.24085667215815487, + "grad_norm": 0.3488048017024994, + "learning_rate": 1.9306925533579503e-05, + "loss": 0.5223, + "step": 8772 + }, + { + "epoch": 0.24088412959912137, + "grad_norm": 0.33946987986564636, + "learning_rate": 1.930676753726857e-05, + "loss": 0.4766, + "step": 8773 + }, + { + "epoch": 0.24091158704008786, + "grad_norm": 0.3605501055717468, + "learning_rate": 1.9306609523597547e-05, + "loss": 0.5711, + "step": 8774 + }, + { + "epoch": 0.24093904448105435, + "grad_norm": 0.41460269689559937, + "learning_rate": 1.9306451492566726e-05, + "loss": 0.5989, + "step": 8775 + }, + { + "epoch": 0.24096650192202088, + "grad_norm": 0.3465343415737152, + "learning_rate": 1.9306293444176403e-05, + "loss": 0.4464, + "step": 8776 + }, + { + "epoch": 0.24099395936298737, + "grad_norm": 0.33681365847587585, + "learning_rate": 1.930613537842687e-05, + "loss": 0.5007, + "step": 8777 + }, + { + "epoch": 0.24102141680395386, + "grad_norm": 0.33834904432296753, + "learning_rate": 1.9305977295318428e-05, + "loss": 0.5151, + "step": 8778 + }, + { + "epoch": 0.24104887424492039, + "grad_norm": 0.3941114842891693, + "learning_rate": 1.9305819194851367e-05, + "loss": 0.5245, + "step": 8779 + }, + { + "epoch": 0.24107633168588688, + "grad_norm": 0.3722449541091919, + "learning_rate": 1.9305661077025982e-05, + "loss": 0.4733, + "step": 8780 + }, + { + "epoch": 0.24110378912685337, + "grad_norm": 0.37828347086906433, + "learning_rate": 1.9305502941842574e-05, + "loss": 0.5233, + "step": 8781 + }, + { + "epoch": 0.24113124656781987, + "grad_norm": 0.39776018261909485, + "learning_rate": 1.930534478930143e-05, + "loss": 0.3782, + "step": 8782 + }, + { + "epoch": 0.2411587040087864, + "grad_norm": 0.3979213535785675, + "learning_rate": 1.9305186619402846e-05, + "loss": 0.5411, + "step": 8783 + }, + { + "epoch": 0.24118616144975288, + "grad_norm": 0.387920081615448, + "learning_rate": 1.930502843214712e-05, + "loss": 0.5494, + "step": 8784 + }, + { + "epoch": 0.24121361889071938, + "grad_norm": 0.3695265054702759, + "learning_rate": 1.9304870227534547e-05, + "loss": 0.5182, + "step": 8785 + }, + { + "epoch": 0.2412410763316859, + "grad_norm": 0.33115866780281067, + "learning_rate": 1.930471200556542e-05, + "loss": 0.4944, + "step": 8786 + }, + { + "epoch": 0.2412685337726524, + "grad_norm": 0.38284412026405334, + "learning_rate": 1.930455376624004e-05, + "loss": 0.6048, + "step": 8787 + }, + { + "epoch": 0.2412959912136189, + "grad_norm": 0.3611135482788086, + "learning_rate": 1.9304395509558694e-05, + "loss": 0.4594, + "step": 8788 + }, + { + "epoch": 0.24132344865458538, + "grad_norm": 0.3107925355434418, + "learning_rate": 1.930423723552168e-05, + "loss": 0.4426, + "step": 8789 + }, + { + "epoch": 0.2413509060955519, + "grad_norm": 0.37070703506469727, + "learning_rate": 1.9304078944129293e-05, + "loss": 0.5816, + "step": 8790 + }, + { + "epoch": 0.2413783635365184, + "grad_norm": 0.4081724286079407, + "learning_rate": 1.930392063538183e-05, + "loss": 0.565, + "step": 8791 + }, + { + "epoch": 0.2414058209774849, + "grad_norm": 0.36235496401786804, + "learning_rate": 1.930376230927959e-05, + "loss": 0.4946, + "step": 8792 + }, + { + "epoch": 0.2414332784184514, + "grad_norm": 0.36039504408836365, + "learning_rate": 1.9303603965822858e-05, + "loss": 0.5361, + "step": 8793 + }, + { + "epoch": 0.2414607358594179, + "grad_norm": 0.4050566256046295, + "learning_rate": 1.930344560501194e-05, + "loss": 0.4838, + "step": 8794 + }, + { + "epoch": 0.2414881933003844, + "grad_norm": 0.36476171016693115, + "learning_rate": 1.9303287226847122e-05, + "loss": 0.5862, + "step": 8795 + }, + { + "epoch": 0.2415156507413509, + "grad_norm": 0.4001944065093994, + "learning_rate": 1.9303128831328707e-05, + "loss": 0.4375, + "step": 8796 + }, + { + "epoch": 0.24154310818231742, + "grad_norm": 0.3582918047904968, + "learning_rate": 1.9302970418456986e-05, + "loss": 0.5176, + "step": 8797 + }, + { + "epoch": 0.2415705656232839, + "grad_norm": 0.3351721465587616, + "learning_rate": 1.9302811988232256e-05, + "loss": 0.5227, + "step": 8798 + }, + { + "epoch": 0.2415980230642504, + "grad_norm": 0.3836292624473572, + "learning_rate": 1.9302653540654814e-05, + "loss": 0.5974, + "step": 8799 + }, + { + "epoch": 0.24162548050521693, + "grad_norm": 0.3444044888019562, + "learning_rate": 1.9302495075724955e-05, + "loss": 0.4979, + "step": 8800 + }, + { + "epoch": 0.24165293794618342, + "grad_norm": 0.4008808732032776, + "learning_rate": 1.930233659344297e-05, + "loss": 0.5521, + "step": 8801 + }, + { + "epoch": 0.24168039538714992, + "grad_norm": 0.392223984003067, + "learning_rate": 1.930217809380916e-05, + "loss": 0.5106, + "step": 8802 + }, + { + "epoch": 0.2417078528281164, + "grad_norm": 0.31517598032951355, + "learning_rate": 1.9302019576823824e-05, + "loss": 0.4558, + "step": 8803 + }, + { + "epoch": 0.24173531026908293, + "grad_norm": 0.33643049001693726, + "learning_rate": 1.9301861042487243e-05, + "loss": 0.4479, + "step": 8804 + }, + { + "epoch": 0.24176276771004943, + "grad_norm": 0.4739621579647064, + "learning_rate": 1.930170249079973e-05, + "loss": 0.5035, + "step": 8805 + }, + { + "epoch": 0.24179022515101592, + "grad_norm": 0.39752477407455444, + "learning_rate": 1.930154392176157e-05, + "loss": 0.5977, + "step": 8806 + }, + { + "epoch": 0.2418176825919824, + "grad_norm": 0.37203168869018555, + "learning_rate": 1.9301385335373063e-05, + "loss": 0.5173, + "step": 8807 + }, + { + "epoch": 0.24184514003294894, + "grad_norm": 0.36752045154571533, + "learning_rate": 1.9301226731634505e-05, + "loss": 0.5604, + "step": 8808 + }, + { + "epoch": 0.24187259747391543, + "grad_norm": 0.37713170051574707, + "learning_rate": 1.9301068110546185e-05, + "loss": 0.5305, + "step": 8809 + }, + { + "epoch": 0.24190005491488192, + "grad_norm": 0.360727995634079, + "learning_rate": 1.9300909472108407e-05, + "loss": 0.4903, + "step": 8810 + }, + { + "epoch": 0.24192751235584845, + "grad_norm": 0.46317192912101746, + "learning_rate": 1.9300750816321467e-05, + "loss": 0.6315, + "step": 8811 + }, + { + "epoch": 0.24195496979681494, + "grad_norm": 0.3427072763442993, + "learning_rate": 1.9300592143185656e-05, + "loss": 0.5287, + "step": 8812 + }, + { + "epoch": 0.24198242723778143, + "grad_norm": 0.38186538219451904, + "learning_rate": 1.930043345270127e-05, + "loss": 0.5232, + "step": 8813 + }, + { + "epoch": 0.24200988467874793, + "grad_norm": 0.33103299140930176, + "learning_rate": 1.9300274744868615e-05, + "loss": 0.4996, + "step": 8814 + }, + { + "epoch": 0.24203734211971445, + "grad_norm": 0.4639434814453125, + "learning_rate": 1.9300116019687972e-05, + "loss": 0.6244, + "step": 8815 + }, + { + "epoch": 0.24206479956068094, + "grad_norm": 0.372749388217926, + "learning_rate": 1.929995727715965e-05, + "loss": 0.5585, + "step": 8816 + }, + { + "epoch": 0.24209225700164744, + "grad_norm": 0.36547544598579407, + "learning_rate": 1.9299798517283935e-05, + "loss": 0.4899, + "step": 8817 + }, + { + "epoch": 0.24211971444261396, + "grad_norm": 0.3483883738517761, + "learning_rate": 1.9299639740061127e-05, + "loss": 0.461, + "step": 8818 + }, + { + "epoch": 0.24214717188358045, + "grad_norm": 0.3713272213935852, + "learning_rate": 1.9299480945491528e-05, + "loss": 0.5495, + "step": 8819 + }, + { + "epoch": 0.24217462932454695, + "grad_norm": 0.3524238169193268, + "learning_rate": 1.9299322133575426e-05, + "loss": 0.5915, + "step": 8820 + }, + { + "epoch": 0.24220208676551344, + "grad_norm": 0.37169042229652405, + "learning_rate": 1.929916330431312e-05, + "loss": 0.5572, + "step": 8821 + }, + { + "epoch": 0.24222954420647996, + "grad_norm": 0.3970213830471039, + "learning_rate": 1.9299004457704906e-05, + "loss": 0.5227, + "step": 8822 + }, + { + "epoch": 0.24225700164744646, + "grad_norm": 0.3722146153450012, + "learning_rate": 1.9298845593751077e-05, + "loss": 0.5587, + "step": 8823 + }, + { + "epoch": 0.24228445908841295, + "grad_norm": 0.37391263246536255, + "learning_rate": 1.9298686712451938e-05, + "loss": 0.4914, + "step": 8824 + }, + { + "epoch": 0.24231191652937947, + "grad_norm": 0.37321266531944275, + "learning_rate": 1.929852781380778e-05, + "loss": 0.5748, + "step": 8825 + }, + { + "epoch": 0.24233937397034597, + "grad_norm": 0.3978263735771179, + "learning_rate": 1.92983688978189e-05, + "loss": 0.6019, + "step": 8826 + }, + { + "epoch": 0.24236683141131246, + "grad_norm": 0.3377845883369446, + "learning_rate": 1.9298209964485594e-05, + "loss": 0.5967, + "step": 8827 + }, + { + "epoch": 0.24239428885227896, + "grad_norm": 0.3157040476799011, + "learning_rate": 1.929805101380816e-05, + "loss": 0.5186, + "step": 8828 + }, + { + "epoch": 0.24242174629324548, + "grad_norm": 0.370583176612854, + "learning_rate": 1.929789204578689e-05, + "loss": 0.547, + "step": 8829 + }, + { + "epoch": 0.24244920373421197, + "grad_norm": 0.36539730429649353, + "learning_rate": 1.9297733060422086e-05, + "loss": 0.4814, + "step": 8830 + }, + { + "epoch": 0.24247666117517847, + "grad_norm": 0.3976539671421051, + "learning_rate": 1.9297574057714042e-05, + "loss": 0.5703, + "step": 8831 + }, + { + "epoch": 0.242504118616145, + "grad_norm": 0.417807012796402, + "learning_rate": 1.9297415037663056e-05, + "loss": 0.5014, + "step": 8832 + }, + { + "epoch": 0.24253157605711148, + "grad_norm": 0.4118380546569824, + "learning_rate": 1.9297256000269423e-05, + "loss": 0.5271, + "step": 8833 + }, + { + "epoch": 0.24255903349807797, + "grad_norm": 0.414305180311203, + "learning_rate": 1.9297096945533437e-05, + "loss": 0.5311, + "step": 8834 + }, + { + "epoch": 0.24258649093904447, + "grad_norm": 0.4518028795719147, + "learning_rate": 1.9296937873455404e-05, + "loss": 0.588, + "step": 8835 + }, + { + "epoch": 0.242613948380011, + "grad_norm": 0.36559179425239563, + "learning_rate": 1.929677878403561e-05, + "loss": 0.515, + "step": 8836 + }, + { + "epoch": 0.24264140582097748, + "grad_norm": 0.32477259635925293, + "learning_rate": 1.9296619677274358e-05, + "loss": 0.4318, + "step": 8837 + }, + { + "epoch": 0.24266886326194398, + "grad_norm": 0.3472536504268646, + "learning_rate": 1.9296460553171944e-05, + "loss": 0.448, + "step": 8838 + }, + { + "epoch": 0.2426963207029105, + "grad_norm": 0.3629855811595917, + "learning_rate": 1.929630141172866e-05, + "loss": 0.5885, + "step": 8839 + }, + { + "epoch": 0.242723778143877, + "grad_norm": 0.37134453654289246, + "learning_rate": 1.929614225294481e-05, + "loss": 0.5195, + "step": 8840 + }, + { + "epoch": 0.2427512355848435, + "grad_norm": 0.35730916261672974, + "learning_rate": 1.9295983076820687e-05, + "loss": 0.5588, + "step": 8841 + }, + { + "epoch": 0.24277869302580998, + "grad_norm": 0.31360548734664917, + "learning_rate": 1.9295823883356592e-05, + "loss": 0.3532, + "step": 8842 + }, + { + "epoch": 0.2428061504667765, + "grad_norm": 0.3425596058368683, + "learning_rate": 1.9295664672552814e-05, + "loss": 0.5087, + "step": 8843 + }, + { + "epoch": 0.242833607907743, + "grad_norm": 0.3623373508453369, + "learning_rate": 1.9295505444409658e-05, + "loss": 0.5199, + "step": 8844 + }, + { + "epoch": 0.2428610653487095, + "grad_norm": 0.38768088817596436, + "learning_rate": 1.9295346198927413e-05, + "loss": 0.4664, + "step": 8845 + }, + { + "epoch": 0.24288852278967601, + "grad_norm": 0.3735514283180237, + "learning_rate": 1.9295186936106386e-05, + "loss": 0.4974, + "step": 8846 + }, + { + "epoch": 0.2429159802306425, + "grad_norm": 0.3841317594051361, + "learning_rate": 1.9295027655946863e-05, + "loss": 0.5432, + "step": 8847 + }, + { + "epoch": 0.242943437671609, + "grad_norm": 0.3236297369003296, + "learning_rate": 1.929486835844915e-05, + "loss": 0.4912, + "step": 8848 + }, + { + "epoch": 0.2429708951125755, + "grad_norm": 0.40409380197525024, + "learning_rate": 1.929470904361354e-05, + "loss": 0.5848, + "step": 8849 + }, + { + "epoch": 0.24299835255354202, + "grad_norm": 0.395097941160202, + "learning_rate": 1.9294549711440334e-05, + "loss": 0.553, + "step": 8850 + }, + { + "epoch": 0.2430258099945085, + "grad_norm": 0.35927122831344604, + "learning_rate": 1.9294390361929825e-05, + "loss": 0.4683, + "step": 8851 + }, + { + "epoch": 0.243053267435475, + "grad_norm": 0.405039519071579, + "learning_rate": 1.929423099508231e-05, + "loss": 0.5249, + "step": 8852 + }, + { + "epoch": 0.24308072487644153, + "grad_norm": 0.39939430356025696, + "learning_rate": 1.9294071610898088e-05, + "loss": 0.5465, + "step": 8853 + }, + { + "epoch": 0.24310818231740802, + "grad_norm": 0.389818012714386, + "learning_rate": 1.9293912209377455e-05, + "loss": 0.6072, + "step": 8854 + }, + { + "epoch": 0.24313563975837452, + "grad_norm": 0.32416054606437683, + "learning_rate": 1.9293752790520712e-05, + "loss": 0.4839, + "step": 8855 + }, + { + "epoch": 0.243163097199341, + "grad_norm": 0.45535510778427124, + "learning_rate": 1.929359335432815e-05, + "loss": 0.4554, + "step": 8856 + }, + { + "epoch": 0.24319055464030753, + "grad_norm": 0.3382176458835602, + "learning_rate": 1.929343390080008e-05, + "loss": 0.5426, + "step": 8857 + }, + { + "epoch": 0.24321801208127403, + "grad_norm": 0.38820210099220276, + "learning_rate": 1.9293274429936783e-05, + "loss": 0.5334, + "step": 8858 + }, + { + "epoch": 0.24324546952224052, + "grad_norm": 0.35064661502838135, + "learning_rate": 1.929311494173856e-05, + "loss": 0.4797, + "step": 8859 + }, + { + "epoch": 0.24327292696320704, + "grad_norm": 0.3924979567527771, + "learning_rate": 1.9292955436205715e-05, + "loss": 0.4988, + "step": 8860 + }, + { + "epoch": 0.24330038440417354, + "grad_norm": 0.3647165894508362, + "learning_rate": 1.9292795913338543e-05, + "loss": 0.5226, + "step": 8861 + }, + { + "epoch": 0.24332784184514003, + "grad_norm": 0.330352783203125, + "learning_rate": 1.9292636373137337e-05, + "loss": 0.494, + "step": 8862 + }, + { + "epoch": 0.24335529928610652, + "grad_norm": 0.37149685621261597, + "learning_rate": 1.92924768156024e-05, + "loss": 0.5742, + "step": 8863 + }, + { + "epoch": 0.24338275672707305, + "grad_norm": 0.34584543108940125, + "learning_rate": 1.929231724073403e-05, + "loss": 0.5643, + "step": 8864 + }, + { + "epoch": 0.24341021416803954, + "grad_norm": 0.3584834933280945, + "learning_rate": 1.929215764853252e-05, + "loss": 0.5031, + "step": 8865 + }, + { + "epoch": 0.24343767160900603, + "grad_norm": 0.40060994029045105, + "learning_rate": 1.929199803899817e-05, + "loss": 0.5779, + "step": 8866 + }, + { + "epoch": 0.24346512904997256, + "grad_norm": 0.41579535603523254, + "learning_rate": 1.929183841213128e-05, + "loss": 0.5293, + "step": 8867 + }, + { + "epoch": 0.24349258649093905, + "grad_norm": 0.3597356379032135, + "learning_rate": 1.929167876793215e-05, + "loss": 0.5284, + "step": 8868 + }, + { + "epoch": 0.24352004393190554, + "grad_norm": 0.36807945370674133, + "learning_rate": 1.9291519106401065e-05, + "loss": 0.5301, + "step": 8869 + }, + { + "epoch": 0.24354750137287204, + "grad_norm": 0.3535303771495819, + "learning_rate": 1.9291359427538336e-05, + "loss": 0.5599, + "step": 8870 + }, + { + "epoch": 0.24357495881383856, + "grad_norm": 0.38852062821388245, + "learning_rate": 1.9291199731344255e-05, + "loss": 0.4815, + "step": 8871 + }, + { + "epoch": 0.24360241625480505, + "grad_norm": 0.4124987721443176, + "learning_rate": 1.9291040017819122e-05, + "loss": 0.5497, + "step": 8872 + }, + { + "epoch": 0.24362987369577155, + "grad_norm": 0.35583651065826416, + "learning_rate": 1.9290880286963233e-05, + "loss": 0.5856, + "step": 8873 + }, + { + "epoch": 0.24365733113673804, + "grad_norm": 0.41346269845962524, + "learning_rate": 1.9290720538776888e-05, + "loss": 0.5309, + "step": 8874 + }, + { + "epoch": 0.24368478857770456, + "grad_norm": 0.3889893591403961, + "learning_rate": 1.9290560773260384e-05, + "loss": 0.5298, + "step": 8875 + }, + { + "epoch": 0.24371224601867106, + "grad_norm": 0.8197377920150757, + "learning_rate": 1.9290400990414017e-05, + "loss": 0.6535, + "step": 8876 + }, + { + "epoch": 0.24373970345963755, + "grad_norm": 0.3761925995349884, + "learning_rate": 1.9290241190238087e-05, + "loss": 0.568, + "step": 8877 + }, + { + "epoch": 0.24376716090060407, + "grad_norm": 0.3553387224674225, + "learning_rate": 1.9290081372732893e-05, + "loss": 0.4877, + "step": 8878 + }, + { + "epoch": 0.24379461834157057, + "grad_norm": 0.39596959948539734, + "learning_rate": 1.9289921537898736e-05, + "loss": 0.5323, + "step": 8879 + }, + { + "epoch": 0.24382207578253706, + "grad_norm": 0.456145316362381, + "learning_rate": 1.9289761685735905e-05, + "loss": 0.475, + "step": 8880 + }, + { + "epoch": 0.24384953322350356, + "grad_norm": 0.45268484950065613, + "learning_rate": 1.9289601816244708e-05, + "loss": 0.5306, + "step": 8881 + }, + { + "epoch": 0.24387699066447008, + "grad_norm": 0.33775749802589417, + "learning_rate": 1.9289441929425435e-05, + "loss": 0.492, + "step": 8882 + }, + { + "epoch": 0.24390444810543657, + "grad_norm": 0.39855024218559265, + "learning_rate": 1.928928202527839e-05, + "loss": 0.5747, + "step": 8883 + }, + { + "epoch": 0.24393190554640307, + "grad_norm": 0.36737215518951416, + "learning_rate": 1.9289122103803866e-05, + "loss": 0.5899, + "step": 8884 + }, + { + "epoch": 0.2439593629873696, + "grad_norm": 0.4010552763938904, + "learning_rate": 1.9288962165002168e-05, + "loss": 0.5359, + "step": 8885 + }, + { + "epoch": 0.24398682042833608, + "grad_norm": 0.34777408838272095, + "learning_rate": 1.9288802208873588e-05, + "loss": 0.5474, + "step": 8886 + }, + { + "epoch": 0.24401427786930258, + "grad_norm": 0.3867762088775635, + "learning_rate": 1.928864223541843e-05, + "loss": 0.4944, + "step": 8887 + }, + { + "epoch": 0.24404173531026907, + "grad_norm": 0.5653756260871887, + "learning_rate": 1.9288482244636987e-05, + "loss": 0.5047, + "step": 8888 + }, + { + "epoch": 0.2440691927512356, + "grad_norm": 0.37851250171661377, + "learning_rate": 1.9288322236529563e-05, + "loss": 0.6185, + "step": 8889 + }, + { + "epoch": 0.24409665019220209, + "grad_norm": 0.3492375314235687, + "learning_rate": 1.9288162211096452e-05, + "loss": 0.5129, + "step": 8890 + }, + { + "epoch": 0.24412410763316858, + "grad_norm": 0.6131927371025085, + "learning_rate": 1.9288002168337953e-05, + "loss": 0.5597, + "step": 8891 + }, + { + "epoch": 0.2441515650741351, + "grad_norm": 0.39827340841293335, + "learning_rate": 1.928784210825437e-05, + "loss": 0.5758, + "step": 8892 + }, + { + "epoch": 0.2441790225151016, + "grad_norm": 0.36514872312545776, + "learning_rate": 1.9287682030845995e-05, + "loss": 0.5919, + "step": 8893 + }, + { + "epoch": 0.2442064799560681, + "grad_norm": 0.35158678889274597, + "learning_rate": 1.9287521936113124e-05, + "loss": 0.4632, + "step": 8894 + }, + { + "epoch": 0.24423393739703458, + "grad_norm": 0.3559192717075348, + "learning_rate": 1.9287361824056065e-05, + "loss": 0.5547, + "step": 8895 + }, + { + "epoch": 0.2442613948380011, + "grad_norm": 0.3226472735404968, + "learning_rate": 1.9287201694675112e-05, + "loss": 0.5593, + "step": 8896 + }, + { + "epoch": 0.2442888522789676, + "grad_norm": 0.33776840567588806, + "learning_rate": 1.9287041547970563e-05, + "loss": 0.524, + "step": 8897 + }, + { + "epoch": 0.2443163097199341, + "grad_norm": 0.38217613101005554, + "learning_rate": 1.9286881383942716e-05, + "loss": 0.5466, + "step": 8898 + }, + { + "epoch": 0.24434376716090062, + "grad_norm": 0.33925744891166687, + "learning_rate": 1.9286721202591874e-05, + "loss": 0.5294, + "step": 8899 + }, + { + "epoch": 0.2443712246018671, + "grad_norm": 0.3478332757949829, + "learning_rate": 1.928656100391833e-05, + "loss": 0.616, + "step": 8900 + }, + { + "epoch": 0.2443986820428336, + "grad_norm": 0.35395899415016174, + "learning_rate": 1.928640078792239e-05, + "loss": 0.431, + "step": 8901 + }, + { + "epoch": 0.2444261394838001, + "grad_norm": 0.6209436058998108, + "learning_rate": 1.9286240554604344e-05, + "loss": 0.5106, + "step": 8902 + }, + { + "epoch": 0.24445359692476662, + "grad_norm": 0.3773331344127655, + "learning_rate": 1.9286080303964495e-05, + "loss": 0.4877, + "step": 8903 + }, + { + "epoch": 0.2444810543657331, + "grad_norm": 0.38577529788017273, + "learning_rate": 1.9285920036003145e-05, + "loss": 0.5772, + "step": 8904 + }, + { + "epoch": 0.2445085118066996, + "grad_norm": 0.3526794910430908, + "learning_rate": 1.928575975072059e-05, + "loss": 0.4405, + "step": 8905 + }, + { + "epoch": 0.24453596924766613, + "grad_norm": 0.36106443405151367, + "learning_rate": 1.928559944811713e-05, + "loss": 0.5256, + "step": 8906 + }, + { + "epoch": 0.24456342668863262, + "grad_norm": 0.36406657099723816, + "learning_rate": 1.9285439128193063e-05, + "loss": 0.5531, + "step": 8907 + }, + { + "epoch": 0.24459088412959912, + "grad_norm": 0.38600608706474304, + "learning_rate": 1.928527879094869e-05, + "loss": 0.5544, + "step": 8908 + }, + { + "epoch": 0.2446183415705656, + "grad_norm": 0.4203549325466156, + "learning_rate": 1.9285118436384305e-05, + "loss": 0.5535, + "step": 8909 + }, + { + "epoch": 0.24464579901153213, + "grad_norm": 0.33707916736602783, + "learning_rate": 1.9284958064500215e-05, + "loss": 0.5341, + "step": 8910 + }, + { + "epoch": 0.24467325645249863, + "grad_norm": 0.3737312853336334, + "learning_rate": 1.9284797675296713e-05, + "loss": 0.5123, + "step": 8911 + }, + { + "epoch": 0.24470071389346512, + "grad_norm": 0.3659346401691437, + "learning_rate": 1.9284637268774098e-05, + "loss": 0.578, + "step": 8912 + }, + { + "epoch": 0.24472817133443164, + "grad_norm": 0.3864384889602661, + "learning_rate": 1.9284476844932674e-05, + "loss": 0.4758, + "step": 8913 + }, + { + "epoch": 0.24475562877539814, + "grad_norm": 0.3191125988960266, + "learning_rate": 1.9284316403772733e-05, + "loss": 0.4794, + "step": 8914 + }, + { + "epoch": 0.24478308621636463, + "grad_norm": 0.3681592345237732, + "learning_rate": 1.9284155945294584e-05, + "loss": 0.5158, + "step": 8915 + }, + { + "epoch": 0.24481054365733113, + "grad_norm": 0.33160915970802307, + "learning_rate": 1.928399546949852e-05, + "loss": 0.4838, + "step": 8916 + }, + { + "epoch": 0.24483800109829765, + "grad_norm": 0.3539070785045624, + "learning_rate": 1.928383497638484e-05, + "loss": 0.5238, + "step": 8917 + }, + { + "epoch": 0.24486545853926414, + "grad_norm": 0.3628561794757843, + "learning_rate": 1.9283674465953843e-05, + "loss": 0.5326, + "step": 8918 + }, + { + "epoch": 0.24489291598023064, + "grad_norm": 0.3470414876937866, + "learning_rate": 1.928351393820583e-05, + "loss": 0.4669, + "step": 8919 + }, + { + "epoch": 0.24492037342119716, + "grad_norm": 0.3995512127876282, + "learning_rate": 1.9283353393141104e-05, + "loss": 0.5369, + "step": 8920 + }, + { + "epoch": 0.24494783086216365, + "grad_norm": 0.4245094656944275, + "learning_rate": 1.928319283075996e-05, + "loss": 0.5865, + "step": 8921 + }, + { + "epoch": 0.24497528830313015, + "grad_norm": 0.4900933802127838, + "learning_rate": 1.92830322510627e-05, + "loss": 0.51, + "step": 8922 + }, + { + "epoch": 0.24500274574409664, + "grad_norm": 0.3695622682571411, + "learning_rate": 1.928287165404962e-05, + "loss": 0.6239, + "step": 8923 + }, + { + "epoch": 0.24503020318506316, + "grad_norm": 0.44630488753318787, + "learning_rate": 1.928271103972102e-05, + "loss": 0.5141, + "step": 8924 + }, + { + "epoch": 0.24505766062602966, + "grad_norm": 0.4476328492164612, + "learning_rate": 1.92825504080772e-05, + "loss": 0.5495, + "step": 8925 + }, + { + "epoch": 0.24508511806699615, + "grad_norm": 0.3979041576385498, + "learning_rate": 1.9282389759118466e-05, + "loss": 0.4479, + "step": 8926 + }, + { + "epoch": 0.24511257550796267, + "grad_norm": 0.361213356256485, + "learning_rate": 1.9282229092845113e-05, + "loss": 0.606, + "step": 8927 + }, + { + "epoch": 0.24514003294892917, + "grad_norm": 0.36312371492385864, + "learning_rate": 1.928206840925744e-05, + "loss": 0.5019, + "step": 8928 + }, + { + "epoch": 0.24516749038989566, + "grad_norm": 0.4203580319881439, + "learning_rate": 1.928190770835574e-05, + "loss": 0.6118, + "step": 8929 + }, + { + "epoch": 0.24519494783086215, + "grad_norm": 0.7821816205978394, + "learning_rate": 1.9281746990140324e-05, + "loss": 0.4961, + "step": 8930 + }, + { + "epoch": 0.24522240527182868, + "grad_norm": 0.3967643678188324, + "learning_rate": 1.9281586254611487e-05, + "loss": 0.5739, + "step": 8931 + }, + { + "epoch": 0.24524986271279517, + "grad_norm": 0.3846569061279297, + "learning_rate": 1.928142550176953e-05, + "loss": 0.5654, + "step": 8932 + }, + { + "epoch": 0.24527732015376166, + "grad_norm": 0.49462729692459106, + "learning_rate": 1.9281264731614753e-05, + "loss": 0.5365, + "step": 8933 + }, + { + "epoch": 0.24530477759472818, + "grad_norm": 0.41566550731658936, + "learning_rate": 1.9281103944147452e-05, + "loss": 0.5698, + "step": 8934 + }, + { + "epoch": 0.24533223503569468, + "grad_norm": 0.3757728934288025, + "learning_rate": 1.9280943139367933e-05, + "loss": 0.5646, + "step": 8935 + }, + { + "epoch": 0.24535969247666117, + "grad_norm": 0.3690943419933319, + "learning_rate": 1.9280782317276493e-05, + "loss": 0.4971, + "step": 8936 + }, + { + "epoch": 0.24538714991762767, + "grad_norm": 0.46816757321357727, + "learning_rate": 1.928062147787343e-05, + "loss": 0.5058, + "step": 8937 + }, + { + "epoch": 0.2454146073585942, + "grad_norm": 0.3572867512702942, + "learning_rate": 1.928046062115905e-05, + "loss": 0.5684, + "step": 8938 + }, + { + "epoch": 0.24544206479956068, + "grad_norm": 0.3656623363494873, + "learning_rate": 1.9280299747133644e-05, + "loss": 0.5626, + "step": 8939 + }, + { + "epoch": 0.24546952224052718, + "grad_norm": 0.34860438108444214, + "learning_rate": 1.9280138855797518e-05, + "loss": 0.5718, + "step": 8940 + }, + { + "epoch": 0.24549697968149367, + "grad_norm": 0.35981255769729614, + "learning_rate": 1.927997794715097e-05, + "loss": 0.4679, + "step": 8941 + }, + { + "epoch": 0.2455244371224602, + "grad_norm": 0.39586910605430603, + "learning_rate": 1.9279817021194304e-05, + "loss": 0.6404, + "step": 8942 + }, + { + "epoch": 0.2455518945634267, + "grad_norm": 0.3561554551124573, + "learning_rate": 1.9279656077927815e-05, + "loss": 0.5013, + "step": 8943 + }, + { + "epoch": 0.24557935200439318, + "grad_norm": 0.37397658824920654, + "learning_rate": 1.927949511735181e-05, + "loss": 0.5021, + "step": 8944 + }, + { + "epoch": 0.2456068094453597, + "grad_norm": 0.34936854243278503, + "learning_rate": 1.927933413946658e-05, + "loss": 0.4787, + "step": 8945 + }, + { + "epoch": 0.2456342668863262, + "grad_norm": 0.3514274060726166, + "learning_rate": 1.9279173144272435e-05, + "loss": 0.4472, + "step": 8946 + }, + { + "epoch": 0.2456617243272927, + "grad_norm": 0.3639184236526489, + "learning_rate": 1.927901213176967e-05, + "loss": 0.5288, + "step": 8947 + }, + { + "epoch": 0.24568918176825918, + "grad_norm": 0.36766672134399414, + "learning_rate": 1.927885110195858e-05, + "loss": 0.5239, + "step": 8948 + }, + { + "epoch": 0.2457166392092257, + "grad_norm": 0.39966461062431335, + "learning_rate": 1.9278690054839476e-05, + "loss": 0.5708, + "step": 8949 + }, + { + "epoch": 0.2457440966501922, + "grad_norm": 0.5669352412223816, + "learning_rate": 1.9278528990412652e-05, + "loss": 0.6326, + "step": 8950 + }, + { + "epoch": 0.2457715540911587, + "grad_norm": 0.7845436930656433, + "learning_rate": 1.9278367908678414e-05, + "loss": 0.5685, + "step": 8951 + }, + { + "epoch": 0.24579901153212522, + "grad_norm": 0.38865017890930176, + "learning_rate": 1.9278206809637053e-05, + "loss": 0.6086, + "step": 8952 + }, + { + "epoch": 0.2458264689730917, + "grad_norm": 0.3581632673740387, + "learning_rate": 1.927804569328888e-05, + "loss": 0.5535, + "step": 8953 + }, + { + "epoch": 0.2458539264140582, + "grad_norm": 0.3529069721698761, + "learning_rate": 1.9277884559634184e-05, + "loss": 0.4727, + "step": 8954 + }, + { + "epoch": 0.2458813838550247, + "grad_norm": 0.40914320945739746, + "learning_rate": 1.9277723408673278e-05, + "loss": 0.6066, + "step": 8955 + }, + { + "epoch": 0.24590884129599122, + "grad_norm": 0.3754177689552307, + "learning_rate": 1.9277562240406452e-05, + "loss": 0.4961, + "step": 8956 + }, + { + "epoch": 0.24593629873695771, + "grad_norm": 0.3791096806526184, + "learning_rate": 1.927740105483402e-05, + "loss": 0.532, + "step": 8957 + }, + { + "epoch": 0.2459637561779242, + "grad_norm": 0.3577145040035248, + "learning_rate": 1.9277239851956264e-05, + "loss": 0.5199, + "step": 8958 + }, + { + "epoch": 0.24599121361889073, + "grad_norm": 0.3663995862007141, + "learning_rate": 1.9277078631773502e-05, + "loss": 0.5314, + "step": 8959 + }, + { + "epoch": 0.24601867105985722, + "grad_norm": 0.38312116265296936, + "learning_rate": 1.9276917394286022e-05, + "loss": 0.5449, + "step": 8960 + }, + { + "epoch": 0.24604612850082372, + "grad_norm": 0.36814674735069275, + "learning_rate": 1.9276756139494134e-05, + "loss": 0.5114, + "step": 8961 + }, + { + "epoch": 0.2460735859417902, + "grad_norm": 0.33360207080841064, + "learning_rate": 1.927659486739813e-05, + "loss": 0.5612, + "step": 8962 + }, + { + "epoch": 0.24610104338275673, + "grad_norm": 0.34528860449790955, + "learning_rate": 1.927643357799832e-05, + "loss": 0.5092, + "step": 8963 + }, + { + "epoch": 0.24612850082372323, + "grad_norm": 0.4145629405975342, + "learning_rate": 1.9276272271295e-05, + "loss": 0.5454, + "step": 8964 + }, + { + "epoch": 0.24615595826468972, + "grad_norm": 0.3989708423614502, + "learning_rate": 1.9276110947288472e-05, + "loss": 0.5243, + "step": 8965 + }, + { + "epoch": 0.24618341570565624, + "grad_norm": 0.38464537262916565, + "learning_rate": 1.9275949605979036e-05, + "loss": 0.5368, + "step": 8966 + }, + { + "epoch": 0.24621087314662274, + "grad_norm": 0.34941402077674866, + "learning_rate": 1.9275788247366997e-05, + "loss": 0.4657, + "step": 8967 + }, + { + "epoch": 0.24623833058758923, + "grad_norm": 0.3891195058822632, + "learning_rate": 1.927562687145265e-05, + "loss": 0.5631, + "step": 8968 + }, + { + "epoch": 0.24626578802855573, + "grad_norm": 0.426717072725296, + "learning_rate": 1.9275465478236296e-05, + "loss": 0.6311, + "step": 8969 + }, + { + "epoch": 0.24629324546952225, + "grad_norm": 0.3893952965736389, + "learning_rate": 1.927530406771824e-05, + "loss": 0.5565, + "step": 8970 + }, + { + "epoch": 0.24632070291048874, + "grad_norm": 0.3718827962875366, + "learning_rate": 1.927514263989878e-05, + "loss": 0.5062, + "step": 8971 + }, + { + "epoch": 0.24634816035145524, + "grad_norm": 0.3970291316509247, + "learning_rate": 1.927498119477822e-05, + "loss": 0.4903, + "step": 8972 + }, + { + "epoch": 0.24637561779242176, + "grad_norm": 0.3338545262813568, + "learning_rate": 1.9274819732356862e-05, + "loss": 0.4879, + "step": 8973 + }, + { + "epoch": 0.24640307523338825, + "grad_norm": 0.3473609685897827, + "learning_rate": 1.9274658252635002e-05, + "loss": 0.551, + "step": 8974 + }, + { + "epoch": 0.24643053267435475, + "grad_norm": 0.43231162428855896, + "learning_rate": 1.9274496755612944e-05, + "loss": 0.5739, + "step": 8975 + }, + { + "epoch": 0.24645799011532124, + "grad_norm": 0.3601270318031311, + "learning_rate": 1.927433524129099e-05, + "loss": 0.5445, + "step": 8976 + }, + { + "epoch": 0.24648544755628776, + "grad_norm": 0.3769501745700836, + "learning_rate": 1.9274173709669443e-05, + "loss": 0.4978, + "step": 8977 + }, + { + "epoch": 0.24651290499725426, + "grad_norm": 0.36306682229042053, + "learning_rate": 1.92740121607486e-05, + "loss": 0.5059, + "step": 8978 + }, + { + "epoch": 0.24654036243822075, + "grad_norm": 0.3761597275733948, + "learning_rate": 1.927385059452877e-05, + "loss": 0.5094, + "step": 8979 + }, + { + "epoch": 0.24656781987918727, + "grad_norm": 0.3937368094921112, + "learning_rate": 1.927368901101024e-05, + "loss": 0.5333, + "step": 8980 + }, + { + "epoch": 0.24659527732015377, + "grad_norm": 0.3871310353279114, + "learning_rate": 1.9273527410193325e-05, + "loss": 0.4599, + "step": 8981 + }, + { + "epoch": 0.24662273476112026, + "grad_norm": 0.40378662943840027, + "learning_rate": 1.927336579207832e-05, + "loss": 0.5339, + "step": 8982 + }, + { + "epoch": 0.24665019220208675, + "grad_norm": 0.4014345705509186, + "learning_rate": 1.9273204156665528e-05, + "loss": 0.5092, + "step": 8983 + }, + { + "epoch": 0.24667764964305328, + "grad_norm": 0.76580810546875, + "learning_rate": 1.9273042503955254e-05, + "loss": 0.4853, + "step": 8984 + }, + { + "epoch": 0.24670510708401977, + "grad_norm": 0.4220210909843445, + "learning_rate": 1.927288083394779e-05, + "loss": 0.5448, + "step": 8985 + }, + { + "epoch": 0.24673256452498626, + "grad_norm": 0.39838936924934387, + "learning_rate": 1.9272719146643448e-05, + "loss": 0.5401, + "step": 8986 + }, + { + "epoch": 0.24676002196595279, + "grad_norm": 0.583463191986084, + "learning_rate": 1.9272557442042522e-05, + "loss": 0.3948, + "step": 8987 + }, + { + "epoch": 0.24678747940691928, + "grad_norm": 0.4246950149536133, + "learning_rate": 1.927239572014532e-05, + "loss": 0.5896, + "step": 8988 + }, + { + "epoch": 0.24681493684788577, + "grad_norm": 0.3343854546546936, + "learning_rate": 1.927223398095214e-05, + "loss": 0.5284, + "step": 8989 + }, + { + "epoch": 0.24684239428885227, + "grad_norm": 0.37356263399124146, + "learning_rate": 1.9272072224463285e-05, + "loss": 0.5533, + "step": 8990 + }, + { + "epoch": 0.2468698517298188, + "grad_norm": 0.385189414024353, + "learning_rate": 1.9271910450679057e-05, + "loss": 0.57, + "step": 8991 + }, + { + "epoch": 0.24689730917078528, + "grad_norm": 0.36856862902641296, + "learning_rate": 1.9271748659599754e-05, + "loss": 0.5755, + "step": 8992 + }, + { + "epoch": 0.24692476661175178, + "grad_norm": 0.37300851941108704, + "learning_rate": 1.927158685122568e-05, + "loss": 0.617, + "step": 8993 + }, + { + "epoch": 0.2469522240527183, + "grad_norm": 0.34357428550720215, + "learning_rate": 1.927142502555714e-05, + "loss": 0.4788, + "step": 8994 + }, + { + "epoch": 0.2469796814936848, + "grad_norm": 0.31360888481140137, + "learning_rate": 1.9271263182594433e-05, + "loss": 0.5382, + "step": 8995 + }, + { + "epoch": 0.2470071389346513, + "grad_norm": 0.4268900454044342, + "learning_rate": 1.9271101322337858e-05, + "loss": 0.5587, + "step": 8996 + }, + { + "epoch": 0.24703459637561778, + "grad_norm": 0.3806796073913574, + "learning_rate": 1.9270939444787723e-05, + "loss": 0.631, + "step": 8997 + }, + { + "epoch": 0.2470620538165843, + "grad_norm": 0.39507320523262024, + "learning_rate": 1.9270777549944326e-05, + "loss": 0.5204, + "step": 8998 + }, + { + "epoch": 0.2470895112575508, + "grad_norm": 0.42569592595100403, + "learning_rate": 1.927061563780797e-05, + "loss": 0.459, + "step": 8999 + }, + { + "epoch": 0.2471169686985173, + "grad_norm": 0.3510018289089203, + "learning_rate": 1.927045370837896e-05, + "loss": 0.499, + "step": 9000 + }, + { + "epoch": 0.2471444261394838, + "grad_norm": 0.3838255703449249, + "learning_rate": 1.9270291761657592e-05, + "loss": 0.592, + "step": 9001 + }, + { + "epoch": 0.2471718835804503, + "grad_norm": 0.4676567316055298, + "learning_rate": 1.9270129797644167e-05, + "loss": 0.5353, + "step": 9002 + }, + { + "epoch": 0.2471993410214168, + "grad_norm": 0.33687251806259155, + "learning_rate": 1.9269967816339e-05, + "loss": 0.4403, + "step": 9003 + }, + { + "epoch": 0.2472267984623833, + "grad_norm": 0.3682221472263336, + "learning_rate": 1.926980581774238e-05, + "loss": 0.5887, + "step": 9004 + }, + { + "epoch": 0.24725425590334982, + "grad_norm": 0.4365924596786499, + "learning_rate": 1.9269643801854612e-05, + "loss": 0.5392, + "step": 9005 + }, + { + "epoch": 0.2472817133443163, + "grad_norm": 0.36387577652931213, + "learning_rate": 1.9269481768676003e-05, + "loss": 0.6194, + "step": 9006 + }, + { + "epoch": 0.2473091707852828, + "grad_norm": 0.33847740292549133, + "learning_rate": 1.926931971820685e-05, + "loss": 0.4756, + "step": 9007 + }, + { + "epoch": 0.2473366282262493, + "grad_norm": 0.3706493675708771, + "learning_rate": 1.9269157650447457e-05, + "loss": 0.5391, + "step": 9008 + }, + { + "epoch": 0.24736408566721582, + "grad_norm": 0.3875819742679596, + "learning_rate": 1.926899556539813e-05, + "loss": 0.547, + "step": 9009 + }, + { + "epoch": 0.24739154310818232, + "grad_norm": 0.43154144287109375, + "learning_rate": 1.926883346305916e-05, + "loss": 0.5863, + "step": 9010 + }, + { + "epoch": 0.2474190005491488, + "grad_norm": 0.39727526903152466, + "learning_rate": 1.9268671343430865e-05, + "loss": 0.5918, + "step": 9011 + }, + { + "epoch": 0.24744645799011533, + "grad_norm": 0.40581876039505005, + "learning_rate": 1.9268509206513536e-05, + "loss": 0.5159, + "step": 9012 + }, + { + "epoch": 0.24747391543108183, + "grad_norm": 0.3444885015487671, + "learning_rate": 1.926834705230748e-05, + "loss": 0.5097, + "step": 9013 + }, + { + "epoch": 0.24750137287204832, + "grad_norm": 0.44204992055892944, + "learning_rate": 1.9268184880813e-05, + "loss": 0.5792, + "step": 9014 + }, + { + "epoch": 0.2475288303130148, + "grad_norm": 0.3651072084903717, + "learning_rate": 1.9268022692030396e-05, + "loss": 0.57, + "step": 9015 + }, + { + "epoch": 0.24755628775398134, + "grad_norm": 0.3853370249271393, + "learning_rate": 1.9267860485959972e-05, + "loss": 0.6042, + "step": 9016 + }, + { + "epoch": 0.24758374519494783, + "grad_norm": 0.4136281907558441, + "learning_rate": 1.926769826260203e-05, + "loss": 0.5159, + "step": 9017 + }, + { + "epoch": 0.24761120263591432, + "grad_norm": 0.3485356569290161, + "learning_rate": 1.9267536021956873e-05, + "loss": 0.5276, + "step": 9018 + }, + { + "epoch": 0.24763866007688085, + "grad_norm": 0.38352078199386597, + "learning_rate": 1.9267373764024803e-05, + "loss": 0.5688, + "step": 9019 + }, + { + "epoch": 0.24766611751784734, + "grad_norm": 0.4677684009075165, + "learning_rate": 1.9267211488806126e-05, + "loss": 0.4172, + "step": 9020 + }, + { + "epoch": 0.24769357495881383, + "grad_norm": 0.40758442878723145, + "learning_rate": 1.9267049196301137e-05, + "loss": 0.4897, + "step": 9021 + }, + { + "epoch": 0.24772103239978033, + "grad_norm": 0.3817583918571472, + "learning_rate": 1.9266886886510146e-05, + "loss": 0.4406, + "step": 9022 + }, + { + "epoch": 0.24774848984074685, + "grad_norm": 0.4267972409725189, + "learning_rate": 1.9266724559433453e-05, + "loss": 0.5551, + "step": 9023 + }, + { + "epoch": 0.24777594728171334, + "grad_norm": 0.37184616923332214, + "learning_rate": 1.9266562215071364e-05, + "loss": 0.5566, + "step": 9024 + }, + { + "epoch": 0.24780340472267984, + "grad_norm": 0.33697089552879333, + "learning_rate": 1.9266399853424173e-05, + "loss": 0.4411, + "step": 9025 + }, + { + "epoch": 0.24783086216364636, + "grad_norm": 0.4017350375652313, + "learning_rate": 1.9266237474492194e-05, + "loss": 0.5092, + "step": 9026 + }, + { + "epoch": 0.24785831960461285, + "grad_norm": 0.337526798248291, + "learning_rate": 1.9266075078275724e-05, + "loss": 0.5091, + "step": 9027 + }, + { + "epoch": 0.24788577704557935, + "grad_norm": 0.35568729043006897, + "learning_rate": 1.9265912664775063e-05, + "loss": 0.5595, + "step": 9028 + }, + { + "epoch": 0.24791323448654584, + "grad_norm": 0.39410164952278137, + "learning_rate": 1.926575023399052e-05, + "loss": 0.5127, + "step": 9029 + }, + { + "epoch": 0.24794069192751236, + "grad_norm": 0.4824616312980652, + "learning_rate": 1.92655877859224e-05, + "loss": 0.5496, + "step": 9030 + }, + { + "epoch": 0.24796814936847886, + "grad_norm": 0.37118831276893616, + "learning_rate": 1.9265425320570995e-05, + "loss": 0.3518, + "step": 9031 + }, + { + "epoch": 0.24799560680944535, + "grad_norm": 0.36401259899139404, + "learning_rate": 1.9265262837936616e-05, + "loss": 0.5597, + "step": 9032 + }, + { + "epoch": 0.24802306425041187, + "grad_norm": 0.376029908657074, + "learning_rate": 1.9265100338019563e-05, + "loss": 0.6042, + "step": 9033 + }, + { + "epoch": 0.24805052169137837, + "grad_norm": 0.3486684262752533, + "learning_rate": 1.926493782082014e-05, + "loss": 0.4698, + "step": 9034 + }, + { + "epoch": 0.24807797913234486, + "grad_norm": 0.3893037438392639, + "learning_rate": 1.9264775286338658e-05, + "loss": 0.5891, + "step": 9035 + }, + { + "epoch": 0.24810543657331136, + "grad_norm": 0.3350045084953308, + "learning_rate": 1.926461273457541e-05, + "loss": 0.4763, + "step": 9036 + }, + { + "epoch": 0.24813289401427788, + "grad_norm": 0.3900049924850464, + "learning_rate": 1.92644501655307e-05, + "loss": 0.4506, + "step": 9037 + }, + { + "epoch": 0.24816035145524437, + "grad_norm": 0.3707972764968872, + "learning_rate": 1.9264287579204835e-05, + "loss": 0.514, + "step": 9038 + }, + { + "epoch": 0.24818780889621087, + "grad_norm": 0.3202662765979767, + "learning_rate": 1.926412497559812e-05, + "loss": 0.551, + "step": 9039 + }, + { + "epoch": 0.2482152663371774, + "grad_norm": 0.3974660038948059, + "learning_rate": 1.926396235471085e-05, + "loss": 0.6303, + "step": 9040 + }, + { + "epoch": 0.24824272377814388, + "grad_norm": 0.33950406312942505, + "learning_rate": 1.9263799716543335e-05, + "loss": 0.5168, + "step": 9041 + }, + { + "epoch": 0.24827018121911038, + "grad_norm": 0.42187732458114624, + "learning_rate": 1.9263637061095878e-05, + "loss": 0.5361, + "step": 9042 + }, + { + "epoch": 0.24829763866007687, + "grad_norm": 0.36966729164123535, + "learning_rate": 1.926347438836878e-05, + "loss": 0.5227, + "step": 9043 + }, + { + "epoch": 0.2483250961010434, + "grad_norm": 0.40615132451057434, + "learning_rate": 1.9263311698362347e-05, + "loss": 0.5481, + "step": 9044 + }, + { + "epoch": 0.24835255354200989, + "grad_norm": 0.3797815442085266, + "learning_rate": 1.926314899107688e-05, + "loss": 0.5094, + "step": 9045 + }, + { + "epoch": 0.24838001098297638, + "grad_norm": 0.35387295484542847, + "learning_rate": 1.9262986266512683e-05, + "loss": 0.6453, + "step": 9046 + }, + { + "epoch": 0.2484074684239429, + "grad_norm": 0.3629722595214844, + "learning_rate": 1.926282352467006e-05, + "loss": 0.551, + "step": 9047 + }, + { + "epoch": 0.2484349258649094, + "grad_norm": 0.363050639629364, + "learning_rate": 1.9262660765549318e-05, + "loss": 0.5453, + "step": 9048 + }, + { + "epoch": 0.2484623833058759, + "grad_norm": 0.386135995388031, + "learning_rate": 1.9262497989150753e-05, + "loss": 0.5826, + "step": 9049 + }, + { + "epoch": 0.24848984074684238, + "grad_norm": 0.3985317647457123, + "learning_rate": 1.9262335195474678e-05, + "loss": 0.5664, + "step": 9050 + }, + { + "epoch": 0.2485172981878089, + "grad_norm": 0.33118829131126404, + "learning_rate": 1.926217238452139e-05, + "loss": 0.5278, + "step": 9051 + }, + { + "epoch": 0.2485447556287754, + "grad_norm": 0.3617837727069855, + "learning_rate": 1.9262009556291193e-05, + "loss": 0.4041, + "step": 9052 + }, + { + "epoch": 0.2485722130697419, + "grad_norm": 0.3425613045692444, + "learning_rate": 1.9261846710784393e-05, + "loss": 0.5787, + "step": 9053 + }, + { + "epoch": 0.24859967051070841, + "grad_norm": 0.38179564476013184, + "learning_rate": 1.926168384800129e-05, + "loss": 0.527, + "step": 9054 + }, + { + "epoch": 0.2486271279516749, + "grad_norm": 0.35539910197257996, + "learning_rate": 1.9261520967942194e-05, + "loss": 0.4829, + "step": 9055 + }, + { + "epoch": 0.2486545853926414, + "grad_norm": 0.390514612197876, + "learning_rate": 1.9261358070607406e-05, + "loss": 0.4971, + "step": 9056 + }, + { + "epoch": 0.2486820428336079, + "grad_norm": 0.37187421321868896, + "learning_rate": 1.926119515599723e-05, + "loss": 0.5342, + "step": 9057 + }, + { + "epoch": 0.24870950027457442, + "grad_norm": 0.36795029044151306, + "learning_rate": 1.9261032224111963e-05, + "loss": 0.5298, + "step": 9058 + }, + { + "epoch": 0.2487369577155409, + "grad_norm": 0.38844868540763855, + "learning_rate": 1.9260869274951922e-05, + "loss": 0.5267, + "step": 9059 + }, + { + "epoch": 0.2487644151565074, + "grad_norm": 0.3975600600242615, + "learning_rate": 1.9260706308517402e-05, + "loss": 0.527, + "step": 9060 + }, + { + "epoch": 0.24879187259747393, + "grad_norm": 0.4517129361629486, + "learning_rate": 1.9260543324808706e-05, + "loss": 0.4864, + "step": 9061 + }, + { + "epoch": 0.24881933003844042, + "grad_norm": 0.3742983341217041, + "learning_rate": 1.9260380323826145e-05, + "loss": 0.6081, + "step": 9062 + }, + { + "epoch": 0.24884678747940692, + "grad_norm": 0.35753780603408813, + "learning_rate": 1.9260217305570018e-05, + "loss": 0.5292, + "step": 9063 + }, + { + "epoch": 0.2488742449203734, + "grad_norm": 0.3447599709033966, + "learning_rate": 1.926005427004063e-05, + "loss": 0.5304, + "step": 9064 + }, + { + "epoch": 0.24890170236133993, + "grad_norm": 0.3454020023345947, + "learning_rate": 1.9259891217238283e-05, + "loss": 0.4844, + "step": 9065 + }, + { + "epoch": 0.24892915980230643, + "grad_norm": 0.4065542221069336, + "learning_rate": 1.9259728147163285e-05, + "loss": 0.5627, + "step": 9066 + }, + { + "epoch": 0.24895661724327292, + "grad_norm": 0.3810174763202667, + "learning_rate": 1.9259565059815937e-05, + "loss": 0.5858, + "step": 9067 + }, + { + "epoch": 0.24898407468423944, + "grad_norm": 0.35013991594314575, + "learning_rate": 1.925940195519655e-05, + "loss": 0.4389, + "step": 9068 + }, + { + "epoch": 0.24901153212520594, + "grad_norm": 0.34617388248443604, + "learning_rate": 1.925923883330542e-05, + "loss": 0.4939, + "step": 9069 + }, + { + "epoch": 0.24903898956617243, + "grad_norm": 0.3777387738227844, + "learning_rate": 1.9259075694142855e-05, + "loss": 0.5673, + "step": 9070 + }, + { + "epoch": 0.24906644700713892, + "grad_norm": 0.5668032169342041, + "learning_rate": 1.925891253770916e-05, + "loss": 0.5416, + "step": 9071 + }, + { + "epoch": 0.24909390444810545, + "grad_norm": 0.33152204751968384, + "learning_rate": 1.9258749364004633e-05, + "loss": 0.5333, + "step": 9072 + }, + { + "epoch": 0.24912136188907194, + "grad_norm": 0.3562043011188507, + "learning_rate": 1.9258586173029587e-05, + "loss": 0.5659, + "step": 9073 + }, + { + "epoch": 0.24914881933003843, + "grad_norm": 0.36649617552757263, + "learning_rate": 1.9258422964784325e-05, + "loss": 0.5329, + "step": 9074 + }, + { + "epoch": 0.24917627677100493, + "grad_norm": 0.35291269421577454, + "learning_rate": 1.9258259739269146e-05, + "loss": 0.4792, + "step": 9075 + }, + { + "epoch": 0.24920373421197145, + "grad_norm": 0.3677527904510498, + "learning_rate": 1.9258096496484357e-05, + "loss": 0.5322, + "step": 9076 + }, + { + "epoch": 0.24923119165293794, + "grad_norm": 0.3584049642086029, + "learning_rate": 1.9257933236430265e-05, + "loss": 0.44, + "step": 9077 + }, + { + "epoch": 0.24925864909390444, + "grad_norm": 0.36686909198760986, + "learning_rate": 1.9257769959107172e-05, + "loss": 0.5843, + "step": 9078 + }, + { + "epoch": 0.24928610653487096, + "grad_norm": 0.3654935657978058, + "learning_rate": 1.9257606664515385e-05, + "loss": 0.5871, + "step": 9079 + }, + { + "epoch": 0.24931356397583745, + "grad_norm": 0.361832857131958, + "learning_rate": 1.9257443352655202e-05, + "loss": 0.5555, + "step": 9080 + }, + { + "epoch": 0.24934102141680395, + "grad_norm": 0.346492201089859, + "learning_rate": 1.9257280023526938e-05, + "loss": 0.5168, + "step": 9081 + }, + { + "epoch": 0.24936847885777044, + "grad_norm": 0.4902135729789734, + "learning_rate": 1.925711667713089e-05, + "loss": 0.5912, + "step": 9082 + }, + { + "epoch": 0.24939593629873696, + "grad_norm": 0.37343859672546387, + "learning_rate": 1.9256953313467365e-05, + "loss": 0.4454, + "step": 9083 + }, + { + "epoch": 0.24942339373970346, + "grad_norm": 0.3479512929916382, + "learning_rate": 1.9256789932536666e-05, + "loss": 0.5548, + "step": 9084 + }, + { + "epoch": 0.24945085118066995, + "grad_norm": 0.3330829441547394, + "learning_rate": 1.9256626534339102e-05, + "loss": 0.4285, + "step": 9085 + }, + { + "epoch": 0.24947830862163647, + "grad_norm": 0.3404856324195862, + "learning_rate": 1.9256463118874972e-05, + "loss": 0.5112, + "step": 9086 + }, + { + "epoch": 0.24950576606260297, + "grad_norm": 0.3757345378398895, + "learning_rate": 1.925629968614459e-05, + "loss": 0.4858, + "step": 9087 + }, + { + "epoch": 0.24953322350356946, + "grad_norm": 0.3925851583480835, + "learning_rate": 1.9256136236148248e-05, + "loss": 0.5326, + "step": 9088 + }, + { + "epoch": 0.24956068094453596, + "grad_norm": 0.3609011769294739, + "learning_rate": 1.9255972768886264e-05, + "loss": 0.5085, + "step": 9089 + }, + { + "epoch": 0.24958813838550248, + "grad_norm": 0.36159613728523254, + "learning_rate": 1.925580928435893e-05, + "loss": 0.4564, + "step": 9090 + }, + { + "epoch": 0.24961559582646897, + "grad_norm": 0.40588635206222534, + "learning_rate": 1.925564578256656e-05, + "loss": 0.5392, + "step": 9091 + }, + { + "epoch": 0.24964305326743547, + "grad_norm": 0.37397056818008423, + "learning_rate": 1.9255482263509456e-05, + "loss": 0.5158, + "step": 9092 + }, + { + "epoch": 0.249670510708402, + "grad_norm": 0.33622536063194275, + "learning_rate": 1.9255318727187927e-05, + "loss": 0.4883, + "step": 9093 + }, + { + "epoch": 0.24969796814936848, + "grad_norm": 0.3653191924095154, + "learning_rate": 1.925515517360227e-05, + "loss": 0.5091, + "step": 9094 + }, + { + "epoch": 0.24972542559033498, + "grad_norm": 0.4347001612186432, + "learning_rate": 1.9254991602752802e-05, + "loss": 0.5127, + "step": 9095 + }, + { + "epoch": 0.24975288303130147, + "grad_norm": 0.3373603820800781, + "learning_rate": 1.9254828014639813e-05, + "loss": 0.5666, + "step": 9096 + }, + { + "epoch": 0.249780340472268, + "grad_norm": 0.35183706879615784, + "learning_rate": 1.9254664409263618e-05, + "loss": 0.5874, + "step": 9097 + }, + { + "epoch": 0.2498077979132345, + "grad_norm": 0.3919627368450165, + "learning_rate": 1.9254500786624523e-05, + "loss": 0.5808, + "step": 9098 + }, + { + "epoch": 0.24983525535420098, + "grad_norm": 0.3264128863811493, + "learning_rate": 1.9254337146722828e-05, + "loss": 0.5347, + "step": 9099 + }, + { + "epoch": 0.2498627127951675, + "grad_norm": 0.35240277647972107, + "learning_rate": 1.9254173489558842e-05, + "loss": 0.5619, + "step": 9100 + }, + { + "epoch": 0.249890170236134, + "grad_norm": 0.3960556387901306, + "learning_rate": 1.9254009815132867e-05, + "loss": 0.5311, + "step": 9101 + }, + { + "epoch": 0.2499176276771005, + "grad_norm": 0.3834902048110962, + "learning_rate": 1.9253846123445214e-05, + "loss": 0.5991, + "step": 9102 + }, + { + "epoch": 0.24994508511806698, + "grad_norm": 0.3542090952396393, + "learning_rate": 1.9253682414496178e-05, + "loss": 0.5842, + "step": 9103 + }, + { + "epoch": 0.2499725425590335, + "grad_norm": 0.3413252830505371, + "learning_rate": 1.9253518688286075e-05, + "loss": 0.5371, + "step": 9104 + }, + { + "epoch": 0.25, + "grad_norm": 0.4094061255455017, + "learning_rate": 1.9253354944815205e-05, + "loss": 0.5411, + "step": 9105 + }, + { + "epoch": 0.2500274574409665, + "grad_norm": 0.35323366522789, + "learning_rate": 1.9253191184083877e-05, + "loss": 0.5061, + "step": 9106 + }, + { + "epoch": 0.250054914881933, + "grad_norm": 0.3559894263744354, + "learning_rate": 1.9253027406092393e-05, + "loss": 0.6088, + "step": 9107 + }, + { + "epoch": 0.2500823723228995, + "grad_norm": 0.3637772500514984, + "learning_rate": 1.9252863610841058e-05, + "loss": 0.4797, + "step": 9108 + }, + { + "epoch": 0.25010982976386603, + "grad_norm": 0.39542654156684875, + "learning_rate": 1.9252699798330183e-05, + "loss": 0.5604, + "step": 9109 + }, + { + "epoch": 0.2501372872048325, + "grad_norm": 0.3327066898345947, + "learning_rate": 1.9252535968560066e-05, + "loss": 0.5078, + "step": 9110 + }, + { + "epoch": 0.250164744645799, + "grad_norm": 0.3499424457550049, + "learning_rate": 1.9252372121531016e-05, + "loss": 0.5564, + "step": 9111 + }, + { + "epoch": 0.2501922020867655, + "grad_norm": 0.34591567516326904, + "learning_rate": 1.9252208257243337e-05, + "loss": 0.5128, + "step": 9112 + }, + { + "epoch": 0.250219659527732, + "grad_norm": 0.34285977482795715, + "learning_rate": 1.9252044375697342e-05, + "loss": 0.5446, + "step": 9113 + }, + { + "epoch": 0.2502471169686985, + "grad_norm": 0.3922332525253296, + "learning_rate": 1.925188047689333e-05, + "loss": 0.5419, + "step": 9114 + }, + { + "epoch": 0.250274574409665, + "grad_norm": 0.37338587641716003, + "learning_rate": 1.9251716560831608e-05, + "loss": 0.5767, + "step": 9115 + }, + { + "epoch": 0.25030203185063155, + "grad_norm": 0.39025434851646423, + "learning_rate": 1.9251552627512483e-05, + "loss": 0.5276, + "step": 9116 + }, + { + "epoch": 0.25032948929159804, + "grad_norm": 0.4520682990550995, + "learning_rate": 1.9251388676936257e-05, + "loss": 0.473, + "step": 9117 + }, + { + "epoch": 0.25035694673256453, + "grad_norm": 0.4202898442745209, + "learning_rate": 1.925122470910324e-05, + "loss": 0.6, + "step": 9118 + }, + { + "epoch": 0.250384404173531, + "grad_norm": 0.385065495967865, + "learning_rate": 1.9251060724013734e-05, + "loss": 0.4979, + "step": 9119 + }, + { + "epoch": 0.2504118616144975, + "grad_norm": 0.3776172995567322, + "learning_rate": 1.9250896721668047e-05, + "loss": 0.5492, + "step": 9120 + }, + { + "epoch": 0.250439319055464, + "grad_norm": 0.33749276399612427, + "learning_rate": 1.9250732702066487e-05, + "loss": 0.572, + "step": 9121 + }, + { + "epoch": 0.2504667764964305, + "grad_norm": 0.3705171048641205, + "learning_rate": 1.925056866520936e-05, + "loss": 0.4921, + "step": 9122 + }, + { + "epoch": 0.25049423393739706, + "grad_norm": 0.4013906717300415, + "learning_rate": 1.9250404611096967e-05, + "loss": 0.5236, + "step": 9123 + }, + { + "epoch": 0.25052169137836355, + "grad_norm": 0.33239617943763733, + "learning_rate": 1.9250240539729618e-05, + "loss": 0.4815, + "step": 9124 + }, + { + "epoch": 0.25054914881933005, + "grad_norm": 0.35447174310684204, + "learning_rate": 1.9250076451107618e-05, + "loss": 0.5196, + "step": 9125 + }, + { + "epoch": 0.25057660626029654, + "grad_norm": 0.36241912841796875, + "learning_rate": 1.9249912345231273e-05, + "loss": 0.5522, + "step": 9126 + }, + { + "epoch": 0.25060406370126304, + "grad_norm": 0.3687591254711151, + "learning_rate": 1.924974822210089e-05, + "loss": 0.5475, + "step": 9127 + }, + { + "epoch": 0.25063152114222953, + "grad_norm": 0.378627210855484, + "learning_rate": 1.924958408171677e-05, + "loss": 0.5797, + "step": 9128 + }, + { + "epoch": 0.250658978583196, + "grad_norm": 0.4018658399581909, + "learning_rate": 1.9249419924079228e-05, + "loss": 0.5348, + "step": 9129 + }, + { + "epoch": 0.2506864360241626, + "grad_norm": 0.39601489901542664, + "learning_rate": 1.9249255749188566e-05, + "loss": 0.597, + "step": 9130 + }, + { + "epoch": 0.25071389346512907, + "grad_norm": 0.36341652274131775, + "learning_rate": 1.924909155704509e-05, + "loss": 0.5714, + "step": 9131 + }, + { + "epoch": 0.25074135090609556, + "grad_norm": 0.4147484302520752, + "learning_rate": 1.9248927347649103e-05, + "loss": 0.4959, + "step": 9132 + }, + { + "epoch": 0.25076880834706206, + "grad_norm": 0.32254934310913086, + "learning_rate": 1.924876312100092e-05, + "loss": 0.5, + "step": 9133 + }, + { + "epoch": 0.25079626578802855, + "grad_norm": 0.4903514087200165, + "learning_rate": 1.9248598877100837e-05, + "loss": 0.5348, + "step": 9134 + }, + { + "epoch": 0.25082372322899504, + "grad_norm": 0.3636152744293213, + "learning_rate": 1.924843461594917e-05, + "loss": 0.5581, + "step": 9135 + }, + { + "epoch": 0.25085118066996154, + "grad_norm": 0.3985777497291565, + "learning_rate": 1.9248270337546215e-05, + "loss": 0.445, + "step": 9136 + }, + { + "epoch": 0.2508786381109281, + "grad_norm": 0.38671907782554626, + "learning_rate": 1.9248106041892287e-05, + "loss": 0.5526, + "step": 9137 + }, + { + "epoch": 0.2509060955518946, + "grad_norm": 0.5352598428726196, + "learning_rate": 1.924794172898769e-05, + "loss": 0.5271, + "step": 9138 + }, + { + "epoch": 0.2509335529928611, + "grad_norm": 0.3920799195766449, + "learning_rate": 1.9247777398832727e-05, + "loss": 0.5585, + "step": 9139 + }, + { + "epoch": 0.25096101043382757, + "grad_norm": 0.36504101753234863, + "learning_rate": 1.924761305142771e-05, + "loss": 0.5137, + "step": 9140 + }, + { + "epoch": 0.25098846787479406, + "grad_norm": 0.36053213477134705, + "learning_rate": 1.9247448686772944e-05, + "loss": 0.5174, + "step": 9141 + }, + { + "epoch": 0.25101592531576056, + "grad_norm": 0.3781697154045105, + "learning_rate": 1.9247284304868734e-05, + "loss": 0.5912, + "step": 9142 + }, + { + "epoch": 0.25104338275672705, + "grad_norm": 0.5993843078613281, + "learning_rate": 1.924711990571539e-05, + "loss": 0.63, + "step": 9143 + }, + { + "epoch": 0.2510708401976936, + "grad_norm": 0.34873393177986145, + "learning_rate": 1.924695548931321e-05, + "loss": 0.498, + "step": 9144 + }, + { + "epoch": 0.2510982976386601, + "grad_norm": 0.42013177275657654, + "learning_rate": 1.924679105566251e-05, + "loss": 0.5858, + "step": 9145 + }, + { + "epoch": 0.2511257550796266, + "grad_norm": 0.3837541937828064, + "learning_rate": 1.9246626604763595e-05, + "loss": 0.5561, + "step": 9146 + }, + { + "epoch": 0.2511532125205931, + "grad_norm": 0.35581403970718384, + "learning_rate": 1.924646213661677e-05, + "loss": 0.4361, + "step": 9147 + }, + { + "epoch": 0.2511806699615596, + "grad_norm": 0.35335561633110046, + "learning_rate": 1.924629765122234e-05, + "loss": 0.514, + "step": 9148 + }, + { + "epoch": 0.25120812740252607, + "grad_norm": 0.3804866373538971, + "learning_rate": 1.9246133148580616e-05, + "loss": 0.5291, + "step": 9149 + }, + { + "epoch": 0.25123558484349257, + "grad_norm": 0.6132171750068665, + "learning_rate": 1.92459686286919e-05, + "loss": 0.5025, + "step": 9150 + }, + { + "epoch": 0.2512630422844591, + "grad_norm": 0.43740227818489075, + "learning_rate": 1.92458040915565e-05, + "loss": 0.4598, + "step": 9151 + }, + { + "epoch": 0.2512904997254256, + "grad_norm": 0.3644731938838959, + "learning_rate": 1.924563953717473e-05, + "loss": 0.531, + "step": 9152 + }, + { + "epoch": 0.2513179571663921, + "grad_norm": 0.3786477744579315, + "learning_rate": 1.924547496554689e-05, + "loss": 0.5158, + "step": 9153 + }, + { + "epoch": 0.2513454146073586, + "grad_norm": 0.37483397126197815, + "learning_rate": 1.9245310376673286e-05, + "loss": 0.5406, + "step": 9154 + }, + { + "epoch": 0.2513728720483251, + "grad_norm": 0.33854225277900696, + "learning_rate": 1.9245145770554227e-05, + "loss": 0.4676, + "step": 9155 + }, + { + "epoch": 0.2514003294892916, + "grad_norm": 0.3552713096141815, + "learning_rate": 1.9244981147190024e-05, + "loss": 0.464, + "step": 9156 + }, + { + "epoch": 0.2514277869302581, + "grad_norm": 1.5122889280319214, + "learning_rate": 1.924481650658098e-05, + "loss": 0.5131, + "step": 9157 + }, + { + "epoch": 0.25145524437122463, + "grad_norm": 0.3461609482765198, + "learning_rate": 1.9244651848727398e-05, + "loss": 0.5404, + "step": 9158 + }, + { + "epoch": 0.2514827018121911, + "grad_norm": 0.34808599948883057, + "learning_rate": 1.9244487173629594e-05, + "loss": 0.5084, + "step": 9159 + }, + { + "epoch": 0.2515101592531576, + "grad_norm": 0.3515630066394806, + "learning_rate": 1.924432248128787e-05, + "loss": 0.4892, + "step": 9160 + }, + { + "epoch": 0.2515376166941241, + "grad_norm": 0.3981937766075134, + "learning_rate": 1.9244157771702533e-05, + "loss": 0.5492, + "step": 9161 + }, + { + "epoch": 0.2515650741350906, + "grad_norm": 0.35601192712783813, + "learning_rate": 1.924399304487389e-05, + "loss": 0.4734, + "step": 9162 + }, + { + "epoch": 0.2515925315760571, + "grad_norm": 0.40008625388145447, + "learning_rate": 1.924382830080225e-05, + "loss": 0.5631, + "step": 9163 + }, + { + "epoch": 0.2516199890170236, + "grad_norm": 0.340348482131958, + "learning_rate": 1.9243663539487924e-05, + "loss": 0.544, + "step": 9164 + }, + { + "epoch": 0.25164744645799014, + "grad_norm": 0.4126564562320709, + "learning_rate": 1.9243498760931216e-05, + "loss": 0.6353, + "step": 9165 + }, + { + "epoch": 0.25167490389895664, + "grad_norm": 0.3793872892856598, + "learning_rate": 1.9243333965132426e-05, + "loss": 0.6266, + "step": 9166 + }, + { + "epoch": 0.25170236133992313, + "grad_norm": 0.3849842846393585, + "learning_rate": 1.9243169152091875e-05, + "loss": 0.5569, + "step": 9167 + }, + { + "epoch": 0.2517298187808896, + "grad_norm": 0.3575260043144226, + "learning_rate": 1.924300432180986e-05, + "loss": 0.521, + "step": 9168 + }, + { + "epoch": 0.2517572762218561, + "grad_norm": 0.3284849524497986, + "learning_rate": 1.9242839474286688e-05, + "loss": 0.5083, + "step": 9169 + }, + { + "epoch": 0.2517847336628226, + "grad_norm": 0.4159833490848541, + "learning_rate": 1.9242674609522676e-05, + "loss": 0.5428, + "step": 9170 + }, + { + "epoch": 0.2518121911037891, + "grad_norm": 0.3966629207134247, + "learning_rate": 1.9242509727518125e-05, + "loss": 0.5851, + "step": 9171 + }, + { + "epoch": 0.2518396485447556, + "grad_norm": 0.4097462296485901, + "learning_rate": 1.9242344828273343e-05, + "loss": 0.5266, + "step": 9172 + }, + { + "epoch": 0.25186710598572215, + "grad_norm": 0.35925450921058655, + "learning_rate": 1.9242179911788636e-05, + "loss": 0.504, + "step": 9173 + }, + { + "epoch": 0.25189456342668864, + "grad_norm": 0.35132086277008057, + "learning_rate": 1.9242014978064317e-05, + "loss": 0.4713, + "step": 9174 + }, + { + "epoch": 0.25192202086765514, + "grad_norm": 0.35621213912963867, + "learning_rate": 1.9241850027100686e-05, + "loss": 0.5747, + "step": 9175 + }, + { + "epoch": 0.25194947830862163, + "grad_norm": 0.39318573474884033, + "learning_rate": 1.924168505889806e-05, + "loss": 0.5541, + "step": 9176 + }, + { + "epoch": 0.2519769357495881, + "grad_norm": 0.3449374735355377, + "learning_rate": 1.924152007345674e-05, + "loss": 0.5397, + "step": 9177 + }, + { + "epoch": 0.2520043931905546, + "grad_norm": 0.4337042272090912, + "learning_rate": 1.924135507077704e-05, + "loss": 0.618, + "step": 9178 + }, + { + "epoch": 0.2520318506315211, + "grad_norm": 0.3307179808616638, + "learning_rate": 1.9241190050859254e-05, + "loss": 0.4957, + "step": 9179 + }, + { + "epoch": 0.25205930807248766, + "grad_norm": 0.3720493018627167, + "learning_rate": 1.92410250137037e-05, + "loss": 0.5049, + "step": 9180 + }, + { + "epoch": 0.25208676551345416, + "grad_norm": 0.3644431531429291, + "learning_rate": 1.924085995931069e-05, + "loss": 0.627, + "step": 9181 + }, + { + "epoch": 0.25211422295442065, + "grad_norm": 0.35852593183517456, + "learning_rate": 1.9240694887680527e-05, + "loss": 0.5763, + "step": 9182 + }, + { + "epoch": 0.25214168039538715, + "grad_norm": 0.35912227630615234, + "learning_rate": 1.924052979881352e-05, + "loss": 0.5236, + "step": 9183 + }, + { + "epoch": 0.25216913783635364, + "grad_norm": 0.4230536222457886, + "learning_rate": 1.924036469270997e-05, + "loss": 0.4853, + "step": 9184 + }, + { + "epoch": 0.25219659527732013, + "grad_norm": 0.9716504216194153, + "learning_rate": 1.9240199569370194e-05, + "loss": 0.5131, + "step": 9185 + }, + { + "epoch": 0.25222405271828663, + "grad_norm": 0.3638480603694916, + "learning_rate": 1.9240034428794497e-05, + "loss": 0.4831, + "step": 9186 + }, + { + "epoch": 0.2522515101592532, + "grad_norm": 0.34507960081100464, + "learning_rate": 1.9239869270983184e-05, + "loss": 0.4442, + "step": 9187 + }, + { + "epoch": 0.2522789676002197, + "grad_norm": 0.42061883211135864, + "learning_rate": 1.923970409593657e-05, + "loss": 0.5762, + "step": 9188 + }, + { + "epoch": 0.25230642504118617, + "grad_norm": 0.4057043194770813, + "learning_rate": 1.9239538903654957e-05, + "loss": 0.596, + "step": 9189 + }, + { + "epoch": 0.25233388248215266, + "grad_norm": 0.3289967179298401, + "learning_rate": 1.9239373694138656e-05, + "loss": 0.4932, + "step": 9190 + }, + { + "epoch": 0.25236133992311915, + "grad_norm": 0.3397117853164673, + "learning_rate": 1.923920846738797e-05, + "loss": 0.4928, + "step": 9191 + }, + { + "epoch": 0.25238879736408565, + "grad_norm": 0.4395521283149719, + "learning_rate": 1.9239043223403216e-05, + "loss": 0.5614, + "step": 9192 + }, + { + "epoch": 0.25241625480505214, + "grad_norm": 0.3447589874267578, + "learning_rate": 1.9238877962184695e-05, + "loss": 0.5056, + "step": 9193 + }, + { + "epoch": 0.2524437122460187, + "grad_norm": 0.38349661231040955, + "learning_rate": 1.923871268373272e-05, + "loss": 0.5388, + "step": 9194 + }, + { + "epoch": 0.2524711696869852, + "grad_norm": 0.3629039227962494, + "learning_rate": 1.9238547388047593e-05, + "loss": 0.5468, + "step": 9195 + }, + { + "epoch": 0.2524986271279517, + "grad_norm": 0.40116602182388306, + "learning_rate": 1.923838207512963e-05, + "loss": 0.5782, + "step": 9196 + }, + { + "epoch": 0.2525260845689182, + "grad_norm": 0.3317120671272278, + "learning_rate": 1.9238216744979133e-05, + "loss": 0.5139, + "step": 9197 + }, + { + "epoch": 0.25255354200988467, + "grad_norm": 0.3759322762489319, + "learning_rate": 1.923805139759642e-05, + "loss": 0.4666, + "step": 9198 + }, + { + "epoch": 0.25258099945085116, + "grad_norm": 0.3652150630950928, + "learning_rate": 1.923788603298179e-05, + "loss": 0.489, + "step": 9199 + }, + { + "epoch": 0.25260845689181766, + "grad_norm": 0.36475786566734314, + "learning_rate": 1.923772065113555e-05, + "loss": 0.5617, + "step": 9200 + }, + { + "epoch": 0.2526359143327842, + "grad_norm": 0.36925405263900757, + "learning_rate": 1.9237555252058015e-05, + "loss": 0.452, + "step": 9201 + }, + { + "epoch": 0.2526633717737507, + "grad_norm": 0.34392330050468445, + "learning_rate": 1.923738983574949e-05, + "loss": 0.4494, + "step": 9202 + }, + { + "epoch": 0.2526908292147172, + "grad_norm": 0.39312809705734253, + "learning_rate": 1.923722440221029e-05, + "loss": 0.4405, + "step": 9203 + }, + { + "epoch": 0.2527182866556837, + "grad_norm": 0.31688985228538513, + "learning_rate": 1.923705895144071e-05, + "loss": 0.4994, + "step": 9204 + }, + { + "epoch": 0.2527457440966502, + "grad_norm": 0.35526227951049805, + "learning_rate": 1.9236893483441073e-05, + "loss": 0.4939, + "step": 9205 + }, + { + "epoch": 0.2527732015376167, + "grad_norm": 0.3921359181404114, + "learning_rate": 1.9236727998211678e-05, + "loss": 0.5171, + "step": 9206 + }, + { + "epoch": 0.25280065897858317, + "grad_norm": 0.3670036494731903, + "learning_rate": 1.923656249575284e-05, + "loss": 0.6245, + "step": 9207 + }, + { + "epoch": 0.2528281164195497, + "grad_norm": 0.3445891737937927, + "learning_rate": 1.9236396976064868e-05, + "loss": 0.5461, + "step": 9208 + }, + { + "epoch": 0.2528555738605162, + "grad_norm": 0.34589987993240356, + "learning_rate": 1.9236231439148062e-05, + "loss": 0.453, + "step": 9209 + }, + { + "epoch": 0.2528830313014827, + "grad_norm": 0.42586272954940796, + "learning_rate": 1.9236065885002735e-05, + "loss": 0.5655, + "step": 9210 + }, + { + "epoch": 0.2529104887424492, + "grad_norm": 0.3948447108268738, + "learning_rate": 1.9235900313629202e-05, + "loss": 0.604, + "step": 9211 + }, + { + "epoch": 0.2529379461834157, + "grad_norm": 0.3970859944820404, + "learning_rate": 1.9235734725027767e-05, + "loss": 0.6507, + "step": 9212 + }, + { + "epoch": 0.2529654036243822, + "grad_norm": 0.3473385274410248, + "learning_rate": 1.9235569119198736e-05, + "loss": 0.4937, + "step": 9213 + }, + { + "epoch": 0.2529928610653487, + "grad_norm": 0.3550540506839752, + "learning_rate": 1.923540349614242e-05, + "loss": 0.509, + "step": 9214 + }, + { + "epoch": 0.25302031850631523, + "grad_norm": 0.38992270827293396, + "learning_rate": 1.9235237855859135e-05, + "loss": 0.4707, + "step": 9215 + }, + { + "epoch": 0.25304777594728173, + "grad_norm": 0.3983781337738037, + "learning_rate": 1.9235072198349182e-05, + "loss": 0.5918, + "step": 9216 + }, + { + "epoch": 0.2530752333882482, + "grad_norm": 0.3403855860233307, + "learning_rate": 1.923490652361287e-05, + "loss": 0.5399, + "step": 9217 + }, + { + "epoch": 0.2531026908292147, + "grad_norm": 0.3685276210308075, + "learning_rate": 1.923474083165051e-05, + "loss": 0.5242, + "step": 9218 + }, + { + "epoch": 0.2531301482701812, + "grad_norm": 0.38868582248687744, + "learning_rate": 1.923457512246241e-05, + "loss": 0.5678, + "step": 9219 + }, + { + "epoch": 0.2531576057111477, + "grad_norm": 0.42382553219795227, + "learning_rate": 1.923440939604888e-05, + "loss": 0.6371, + "step": 9220 + }, + { + "epoch": 0.2531850631521142, + "grad_norm": 0.34444892406463623, + "learning_rate": 1.923424365241023e-05, + "loss": 0.5487, + "step": 9221 + }, + { + "epoch": 0.25321252059308075, + "grad_norm": 0.4879078269004822, + "learning_rate": 1.9234077891546768e-05, + "loss": 0.5373, + "step": 9222 + }, + { + "epoch": 0.25323997803404724, + "grad_norm": 0.8280101418495178, + "learning_rate": 1.9233912113458805e-05, + "loss": 0.4845, + "step": 9223 + }, + { + "epoch": 0.25326743547501374, + "grad_norm": 0.6367643475532532, + "learning_rate": 1.923374631814665e-05, + "loss": 0.6264, + "step": 9224 + }, + { + "epoch": 0.25329489291598023, + "grad_norm": 0.34076905250549316, + "learning_rate": 1.923358050561061e-05, + "loss": 0.512, + "step": 9225 + }, + { + "epoch": 0.2533223503569467, + "grad_norm": 0.32174184918403625, + "learning_rate": 1.9233414675850993e-05, + "loss": 0.4263, + "step": 9226 + }, + { + "epoch": 0.2533498077979132, + "grad_norm": 0.5080952644348145, + "learning_rate": 1.9233248828868114e-05, + "loss": 0.5216, + "step": 9227 + }, + { + "epoch": 0.2533772652388797, + "grad_norm": 0.3670526146888733, + "learning_rate": 1.9233082964662277e-05, + "loss": 0.5875, + "step": 9228 + }, + { + "epoch": 0.25340472267984626, + "grad_norm": 0.3749102056026459, + "learning_rate": 1.9232917083233794e-05, + "loss": 0.4949, + "step": 9229 + }, + { + "epoch": 0.25343218012081276, + "grad_norm": 0.34109166264533997, + "learning_rate": 1.9232751184582973e-05, + "loss": 0.4756, + "step": 9230 + }, + { + "epoch": 0.25345963756177925, + "grad_norm": 0.35943323373794556, + "learning_rate": 1.9232585268710125e-05, + "loss": 0.599, + "step": 9231 + }, + { + "epoch": 0.25348709500274574, + "grad_norm": 0.3242088854312897, + "learning_rate": 1.9232419335615556e-05, + "loss": 0.4555, + "step": 9232 + }, + { + "epoch": 0.25351455244371224, + "grad_norm": 0.38507041335105896, + "learning_rate": 1.923225338529958e-05, + "loss": 0.5683, + "step": 9233 + }, + { + "epoch": 0.25354200988467873, + "grad_norm": 0.3613778054714203, + "learning_rate": 1.9232087417762507e-05, + "loss": 0.4852, + "step": 9234 + }, + { + "epoch": 0.2535694673256452, + "grad_norm": 0.4349735975265503, + "learning_rate": 1.9231921433004644e-05, + "loss": 0.5542, + "step": 9235 + }, + { + "epoch": 0.2535969247666118, + "grad_norm": 0.3407600224018097, + "learning_rate": 1.92317554310263e-05, + "loss": 0.5194, + "step": 9236 + }, + { + "epoch": 0.25362438220757827, + "grad_norm": 0.41521039605140686, + "learning_rate": 1.9231589411827785e-05, + "loss": 0.4776, + "step": 9237 + }, + { + "epoch": 0.25365183964854476, + "grad_norm": 0.3628336787223816, + "learning_rate": 1.923142337540941e-05, + "loss": 0.5593, + "step": 9238 + }, + { + "epoch": 0.25367929708951126, + "grad_norm": 0.3978654742240906, + "learning_rate": 1.9231257321771485e-05, + "loss": 0.6509, + "step": 9239 + }, + { + "epoch": 0.25370675453047775, + "grad_norm": 0.3913373053073883, + "learning_rate": 1.923109125091432e-05, + "loss": 0.5849, + "step": 9240 + }, + { + "epoch": 0.25373421197144425, + "grad_norm": 0.33332937955856323, + "learning_rate": 1.9230925162838223e-05, + "loss": 0.4854, + "step": 9241 + }, + { + "epoch": 0.25376166941241074, + "grad_norm": 0.3485899269580841, + "learning_rate": 1.92307590575435e-05, + "loss": 0.4307, + "step": 9242 + }, + { + "epoch": 0.2537891268533773, + "grad_norm": 0.6639112830162048, + "learning_rate": 1.9230592935030468e-05, + "loss": 0.511, + "step": 9243 + }, + { + "epoch": 0.2538165842943438, + "grad_norm": 0.37942221760749817, + "learning_rate": 1.9230426795299433e-05, + "loss": 0.523, + "step": 9244 + }, + { + "epoch": 0.2538440417353103, + "grad_norm": 0.3961610198020935, + "learning_rate": 1.923026063835071e-05, + "loss": 0.6266, + "step": 9245 + }, + { + "epoch": 0.25387149917627677, + "grad_norm": 0.3878762722015381, + "learning_rate": 1.92300944641846e-05, + "loss": 0.5532, + "step": 9246 + }, + { + "epoch": 0.25389895661724327, + "grad_norm": 0.36735787987709045, + "learning_rate": 1.922992827280142e-05, + "loss": 0.5224, + "step": 9247 + }, + { + "epoch": 0.25392641405820976, + "grad_norm": 0.34473443031311035, + "learning_rate": 1.9229762064201482e-05, + "loss": 0.5019, + "step": 9248 + }, + { + "epoch": 0.25395387149917625, + "grad_norm": 0.3501138687133789, + "learning_rate": 1.9229595838385086e-05, + "loss": 0.5429, + "step": 9249 + }, + { + "epoch": 0.2539813289401428, + "grad_norm": 0.35584867000579834, + "learning_rate": 1.9229429595352552e-05, + "loss": 0.5294, + "step": 9250 + }, + { + "epoch": 0.2540087863811093, + "grad_norm": 0.34628039598464966, + "learning_rate": 1.9229263335104185e-05, + "loss": 0.4581, + "step": 9251 + }, + { + "epoch": 0.2540362438220758, + "grad_norm": 0.4224596619606018, + "learning_rate": 1.9229097057640294e-05, + "loss": 0.5535, + "step": 9252 + }, + { + "epoch": 0.2540637012630423, + "grad_norm": 0.4321525990962982, + "learning_rate": 1.9228930762961194e-05, + "loss": 0.462, + "step": 9253 + }, + { + "epoch": 0.2540911587040088, + "grad_norm": 0.3775803744792938, + "learning_rate": 1.922876445106719e-05, + "loss": 0.4175, + "step": 9254 + }, + { + "epoch": 0.2541186161449753, + "grad_norm": 0.3562146723270416, + "learning_rate": 1.9228598121958598e-05, + "loss": 0.5485, + "step": 9255 + }, + { + "epoch": 0.25414607358594177, + "grad_norm": 0.3525570034980774, + "learning_rate": 1.9228431775635722e-05, + "loss": 0.5507, + "step": 9256 + }, + { + "epoch": 0.2541735310269083, + "grad_norm": 0.45910903811454773, + "learning_rate": 1.9228265412098877e-05, + "loss": 0.5401, + "step": 9257 + }, + { + "epoch": 0.2542009884678748, + "grad_norm": 0.36710599064826965, + "learning_rate": 1.922809903134837e-05, + "loss": 0.4806, + "step": 9258 + }, + { + "epoch": 0.2542284459088413, + "grad_norm": 0.4092499315738678, + "learning_rate": 1.9227932633384516e-05, + "loss": 0.5703, + "step": 9259 + }, + { + "epoch": 0.2542559033498078, + "grad_norm": 0.42364785075187683, + "learning_rate": 1.922776621820762e-05, + "loss": 0.5615, + "step": 9260 + }, + { + "epoch": 0.2542833607907743, + "grad_norm": 0.3695901036262512, + "learning_rate": 1.9227599785817997e-05, + "loss": 0.4508, + "step": 9261 + }, + { + "epoch": 0.2543108182317408, + "grad_norm": 0.3634258210659027, + "learning_rate": 1.9227433336215954e-05, + "loss": 0.5141, + "step": 9262 + }, + { + "epoch": 0.2543382756727073, + "grad_norm": 0.3354853093624115, + "learning_rate": 1.9227266869401805e-05, + "loss": 0.4969, + "step": 9263 + }, + { + "epoch": 0.25436573311367383, + "grad_norm": 0.5445295572280884, + "learning_rate": 1.9227100385375855e-05, + "loss": 0.4972, + "step": 9264 + }, + { + "epoch": 0.2543931905546403, + "grad_norm": 0.32671359181404114, + "learning_rate": 1.922693388413842e-05, + "loss": 0.5754, + "step": 9265 + }, + { + "epoch": 0.2544206479956068, + "grad_norm": 0.40394893288612366, + "learning_rate": 1.9226767365689805e-05, + "loss": 0.5225, + "step": 9266 + }, + { + "epoch": 0.2544481054365733, + "grad_norm": 0.33649009466171265, + "learning_rate": 1.9226600830030326e-05, + "loss": 0.5506, + "step": 9267 + }, + { + "epoch": 0.2544755628775398, + "grad_norm": 0.351272314786911, + "learning_rate": 1.9226434277160293e-05, + "loss": 0.5165, + "step": 9268 + }, + { + "epoch": 0.2545030203185063, + "grad_norm": 0.3700573146343231, + "learning_rate": 1.9226267707080013e-05, + "loss": 0.6038, + "step": 9269 + }, + { + "epoch": 0.2545304777594728, + "grad_norm": 0.3656693994998932, + "learning_rate": 1.9226101119789803e-05, + "loss": 0.45, + "step": 9270 + }, + { + "epoch": 0.25455793520043934, + "grad_norm": 0.4011545181274414, + "learning_rate": 1.9225934515289967e-05, + "loss": 0.5431, + "step": 9271 + }, + { + "epoch": 0.25458539264140584, + "grad_norm": 0.4228493571281433, + "learning_rate": 1.9225767893580817e-05, + "loss": 0.6088, + "step": 9272 + }, + { + "epoch": 0.25461285008237233, + "grad_norm": 0.3713211715221405, + "learning_rate": 1.922560125466267e-05, + "loss": 0.495, + "step": 9273 + }, + { + "epoch": 0.2546403075233388, + "grad_norm": 0.3894771635532379, + "learning_rate": 1.922543459853583e-05, + "loss": 0.5902, + "step": 9274 + }, + { + "epoch": 0.2546677649643053, + "grad_norm": 0.37792837619781494, + "learning_rate": 1.9225267925200607e-05, + "loss": 0.5177, + "step": 9275 + }, + { + "epoch": 0.2546952224052718, + "grad_norm": 0.4030459523200989, + "learning_rate": 1.9225101234657318e-05, + "loss": 0.6547, + "step": 9276 + }, + { + "epoch": 0.2547226798462383, + "grad_norm": 0.35420218110084534, + "learning_rate": 1.922493452690627e-05, + "loss": 0.5242, + "step": 9277 + }, + { + "epoch": 0.25475013728720486, + "grad_norm": 1.379102349281311, + "learning_rate": 1.9224767801947775e-05, + "loss": 0.4847, + "step": 9278 + }, + { + "epoch": 0.25477759472817135, + "grad_norm": 0.36212506890296936, + "learning_rate": 1.9224601059782144e-05, + "loss": 0.5858, + "step": 9279 + }, + { + "epoch": 0.25480505216913785, + "grad_norm": 0.4349627196788788, + "learning_rate": 1.9224434300409686e-05, + "loss": 0.5702, + "step": 9280 + }, + { + "epoch": 0.25483250961010434, + "grad_norm": 0.4370262026786804, + "learning_rate": 1.922426752383072e-05, + "loss": 0.5002, + "step": 9281 + }, + { + "epoch": 0.25485996705107083, + "grad_norm": 0.34502363204956055, + "learning_rate": 1.9224100730045542e-05, + "loss": 0.5468, + "step": 9282 + }, + { + "epoch": 0.25488742449203733, + "grad_norm": 0.39965078234672546, + "learning_rate": 1.922393391905448e-05, + "loss": 0.5532, + "step": 9283 + }, + { + "epoch": 0.2549148819330038, + "grad_norm": 0.3468093276023865, + "learning_rate": 1.9223767090857833e-05, + "loss": 0.4682, + "step": 9284 + }, + { + "epoch": 0.2549423393739704, + "grad_norm": 0.37292858958244324, + "learning_rate": 1.9223600245455918e-05, + "loss": 0.5013, + "step": 9285 + }, + { + "epoch": 0.25496979681493687, + "grad_norm": 0.3804837167263031, + "learning_rate": 1.9223433382849043e-05, + "loss": 0.5112, + "step": 9286 + }, + { + "epoch": 0.25499725425590336, + "grad_norm": 0.4023738205432892, + "learning_rate": 1.9223266503037522e-05, + "loss": 0.6053, + "step": 9287 + }, + { + "epoch": 0.25502471169686985, + "grad_norm": 0.34250831604003906, + "learning_rate": 1.9223099606021668e-05, + "loss": 0.4574, + "step": 9288 + }, + { + "epoch": 0.25505216913783635, + "grad_norm": 0.4080987572669983, + "learning_rate": 1.9222932691801786e-05, + "loss": 0.4927, + "step": 9289 + }, + { + "epoch": 0.25507962657880284, + "grad_norm": 0.38583827018737793, + "learning_rate": 1.9222765760378193e-05, + "loss": 0.5698, + "step": 9290 + }, + { + "epoch": 0.25510708401976934, + "grad_norm": 0.32917115092277527, + "learning_rate": 1.9222598811751195e-05, + "loss": 0.5429, + "step": 9291 + }, + { + "epoch": 0.2551345414607359, + "grad_norm": 0.3686443567276001, + "learning_rate": 1.922243184592111e-05, + "loss": 0.5069, + "step": 9292 + }, + { + "epoch": 0.2551619989017024, + "grad_norm": 0.40303730964660645, + "learning_rate": 1.9222264862888247e-05, + "loss": 0.4944, + "step": 9293 + }, + { + "epoch": 0.2551894563426689, + "grad_norm": 0.3628910481929779, + "learning_rate": 1.9222097862652914e-05, + "loss": 0.4955, + "step": 9294 + }, + { + "epoch": 0.25521691378363537, + "grad_norm": 0.3929886817932129, + "learning_rate": 1.9221930845215426e-05, + "loss": 0.511, + "step": 9295 + }, + { + "epoch": 0.25524437122460186, + "grad_norm": 0.3697390556335449, + "learning_rate": 1.9221763810576095e-05, + "loss": 0.5384, + "step": 9296 + }, + { + "epoch": 0.25527182866556836, + "grad_norm": 0.36886462569236755, + "learning_rate": 1.922159675873523e-05, + "loss": 0.6082, + "step": 9297 + }, + { + "epoch": 0.25529928610653485, + "grad_norm": 0.3694456219673157, + "learning_rate": 1.9221429689693143e-05, + "loss": 0.4883, + "step": 9298 + }, + { + "epoch": 0.2553267435475014, + "grad_norm": 0.3711444139480591, + "learning_rate": 1.9221262603450146e-05, + "loss": 0.4986, + "step": 9299 + }, + { + "epoch": 0.2553542009884679, + "grad_norm": 0.33560970425605774, + "learning_rate": 1.922109550000655e-05, + "loss": 0.4831, + "step": 9300 + }, + { + "epoch": 0.2553816584294344, + "grad_norm": 0.33212780952453613, + "learning_rate": 1.9220928379362673e-05, + "loss": 0.52, + "step": 9301 + }, + { + "epoch": 0.2554091158704009, + "grad_norm": 0.3715454936027527, + "learning_rate": 1.9220761241518818e-05, + "loss": 0.5738, + "step": 9302 + }, + { + "epoch": 0.2554365733113674, + "grad_norm": 0.34120985865592957, + "learning_rate": 1.9220594086475302e-05, + "loss": 0.4611, + "step": 9303 + }, + { + "epoch": 0.25546403075233387, + "grad_norm": 0.3942822813987732, + "learning_rate": 1.9220426914232436e-05, + "loss": 0.57, + "step": 9304 + }, + { + "epoch": 0.25549148819330036, + "grad_norm": 0.4363870918750763, + "learning_rate": 1.922025972479053e-05, + "loss": 0.4826, + "step": 9305 + }, + { + "epoch": 0.25551894563426686, + "grad_norm": 0.3561538755893707, + "learning_rate": 1.9220092518149895e-05, + "loss": 0.5519, + "step": 9306 + }, + { + "epoch": 0.2555464030752334, + "grad_norm": 0.4997478425502777, + "learning_rate": 1.9219925294310848e-05, + "loss": 0.5006, + "step": 9307 + }, + { + "epoch": 0.2555738605161999, + "grad_norm": 0.33099740743637085, + "learning_rate": 1.9219758053273694e-05, + "loss": 0.4171, + "step": 9308 + }, + { + "epoch": 0.2556013179571664, + "grad_norm": 0.3670232594013214, + "learning_rate": 1.921959079503875e-05, + "loss": 0.5307, + "step": 9309 + }, + { + "epoch": 0.2556287753981329, + "grad_norm": 0.34771132469177246, + "learning_rate": 1.9219423519606328e-05, + "loss": 0.5403, + "step": 9310 + }, + { + "epoch": 0.2556562328390994, + "grad_norm": 0.3830222487449646, + "learning_rate": 1.9219256226976736e-05, + "loss": 0.4736, + "step": 9311 + }, + { + "epoch": 0.2556836902800659, + "grad_norm": 0.4084498882293701, + "learning_rate": 1.921908891715029e-05, + "loss": 0.5383, + "step": 9312 + }, + { + "epoch": 0.2557111477210324, + "grad_norm": 0.3903753459453583, + "learning_rate": 1.92189215901273e-05, + "loss": 0.5245, + "step": 9313 + }, + { + "epoch": 0.2557386051619989, + "grad_norm": 0.3190092146396637, + "learning_rate": 1.921875424590808e-05, + "loss": 0.5152, + "step": 9314 + }, + { + "epoch": 0.2557660626029654, + "grad_norm": 0.42319542169570923, + "learning_rate": 1.9218586884492943e-05, + "loss": 0.52, + "step": 9315 + }, + { + "epoch": 0.2557935200439319, + "grad_norm": 0.4614998996257782, + "learning_rate": 1.9218419505882195e-05, + "loss": 0.5767, + "step": 9316 + }, + { + "epoch": 0.2558209774848984, + "grad_norm": 0.4192696213722229, + "learning_rate": 1.9218252110076157e-05, + "loss": 0.5856, + "step": 9317 + }, + { + "epoch": 0.2558484349258649, + "grad_norm": 0.41604986786842346, + "learning_rate": 1.9218084697075133e-05, + "loss": 0.6127, + "step": 9318 + }, + { + "epoch": 0.2558758923668314, + "grad_norm": 0.33703628182411194, + "learning_rate": 1.921791726687944e-05, + "loss": 0.516, + "step": 9319 + }, + { + "epoch": 0.2559033498077979, + "grad_norm": 0.36875078082084656, + "learning_rate": 1.921774981948939e-05, + "loss": 0.5644, + "step": 9320 + }, + { + "epoch": 0.25593080724876444, + "grad_norm": 0.3895040452480316, + "learning_rate": 1.9217582354905295e-05, + "loss": 0.4861, + "step": 9321 + }, + { + "epoch": 0.25595826468973093, + "grad_norm": 0.35173675417900085, + "learning_rate": 1.9217414873127466e-05, + "loss": 0.5614, + "step": 9322 + }, + { + "epoch": 0.2559857221306974, + "grad_norm": 0.34261536598205566, + "learning_rate": 1.9217247374156216e-05, + "loss": 0.5037, + "step": 9323 + }, + { + "epoch": 0.2560131795716639, + "grad_norm": 0.3492025136947632, + "learning_rate": 1.9217079857991858e-05, + "loss": 0.4765, + "step": 9324 + }, + { + "epoch": 0.2560406370126304, + "grad_norm": 0.3745185434818268, + "learning_rate": 1.9216912324634703e-05, + "loss": 0.5136, + "step": 9325 + }, + { + "epoch": 0.2560680944535969, + "grad_norm": 0.3573043644428253, + "learning_rate": 1.921674477408507e-05, + "loss": 0.5757, + "step": 9326 + }, + { + "epoch": 0.2560955518945634, + "grad_norm": 0.5591436624526978, + "learning_rate": 1.9216577206343262e-05, + "loss": 0.5943, + "step": 9327 + }, + { + "epoch": 0.25612300933552995, + "grad_norm": 0.3741816282272339, + "learning_rate": 1.9216409621409597e-05, + "loss": 0.4811, + "step": 9328 + }, + { + "epoch": 0.25615046677649644, + "grad_norm": 0.3403530716896057, + "learning_rate": 1.9216242019284385e-05, + "loss": 0.4476, + "step": 9329 + }, + { + "epoch": 0.25617792421746294, + "grad_norm": 0.33798351883888245, + "learning_rate": 1.921607439996794e-05, + "loss": 0.5307, + "step": 9330 + }, + { + "epoch": 0.25620538165842943, + "grad_norm": 0.34892329573631287, + "learning_rate": 1.921590676346058e-05, + "loss": 0.5301, + "step": 9331 + }, + { + "epoch": 0.2562328390993959, + "grad_norm": 0.3473939001560211, + "learning_rate": 1.921573910976261e-05, + "loss": 0.5707, + "step": 9332 + }, + { + "epoch": 0.2562602965403624, + "grad_norm": 0.395912230014801, + "learning_rate": 1.921557143887434e-05, + "loss": 0.5873, + "step": 9333 + }, + { + "epoch": 0.2562877539813289, + "grad_norm": 0.3834340572357178, + "learning_rate": 1.9215403750796093e-05, + "loss": 0.4938, + "step": 9334 + }, + { + "epoch": 0.25631521142229546, + "grad_norm": 0.4441642761230469, + "learning_rate": 1.9215236045528177e-05, + "loss": 0.5523, + "step": 9335 + }, + { + "epoch": 0.25634266886326196, + "grad_norm": 0.37556761503219604, + "learning_rate": 1.9215068323070903e-05, + "loss": 0.4894, + "step": 9336 + }, + { + "epoch": 0.25637012630422845, + "grad_norm": 0.3497155010700226, + "learning_rate": 1.9214900583424586e-05, + "loss": 0.456, + "step": 9337 + }, + { + "epoch": 0.25639758374519495, + "grad_norm": 0.4276037812232971, + "learning_rate": 1.9214732826589538e-05, + "loss": 0.4984, + "step": 9338 + }, + { + "epoch": 0.25642504118616144, + "grad_norm": 0.42994052171707153, + "learning_rate": 1.9214565052566073e-05, + "loss": 0.5867, + "step": 9339 + }, + { + "epoch": 0.25645249862712793, + "grad_norm": 0.38128891587257385, + "learning_rate": 1.92143972613545e-05, + "loss": 0.5429, + "step": 9340 + }, + { + "epoch": 0.25647995606809443, + "grad_norm": 0.3462149202823639, + "learning_rate": 1.921422945295514e-05, + "loss": 0.4942, + "step": 9341 + }, + { + "epoch": 0.256507413509061, + "grad_norm": 0.3939410150051117, + "learning_rate": 1.9214061627368298e-05, + "loss": 0.6007, + "step": 9342 + }, + { + "epoch": 0.25653487095002747, + "grad_norm": 0.3880685269832611, + "learning_rate": 1.9213893784594293e-05, + "loss": 0.6385, + "step": 9343 + }, + { + "epoch": 0.25656232839099397, + "grad_norm": 0.42445260286331177, + "learning_rate": 1.9213725924633434e-05, + "loss": 0.5674, + "step": 9344 + }, + { + "epoch": 0.25658978583196046, + "grad_norm": 0.3889710605144501, + "learning_rate": 1.9213558047486036e-05, + "loss": 0.5342, + "step": 9345 + }, + { + "epoch": 0.25661724327292695, + "grad_norm": 0.4134369492530823, + "learning_rate": 1.921339015315241e-05, + "loss": 0.5765, + "step": 9346 + }, + { + "epoch": 0.25664470071389345, + "grad_norm": 0.37270528078079224, + "learning_rate": 1.9213222241632873e-05, + "loss": 0.4983, + "step": 9347 + }, + { + "epoch": 0.25667215815485994, + "grad_norm": 0.38544169068336487, + "learning_rate": 1.9213054312927735e-05, + "loss": 0.4265, + "step": 9348 + }, + { + "epoch": 0.2566996155958265, + "grad_norm": 0.3660299479961395, + "learning_rate": 1.9212886367037308e-05, + "loss": 0.5608, + "step": 9349 + }, + { + "epoch": 0.256727073036793, + "grad_norm": 0.3559917211532593, + "learning_rate": 1.921271840396191e-05, + "loss": 0.5724, + "step": 9350 + }, + { + "epoch": 0.2567545304777595, + "grad_norm": 0.3443695306777954, + "learning_rate": 1.921255042370185e-05, + "loss": 0.5572, + "step": 9351 + }, + { + "epoch": 0.256781987918726, + "grad_norm": 0.37343403697013855, + "learning_rate": 1.9212382426257445e-05, + "loss": 0.5567, + "step": 9352 + }, + { + "epoch": 0.25680944535969247, + "grad_norm": 0.41103556752204895, + "learning_rate": 1.9212214411629007e-05, + "loss": 0.5543, + "step": 9353 + }, + { + "epoch": 0.25683690280065896, + "grad_norm": 0.4135097861289978, + "learning_rate": 1.921204637981685e-05, + "loss": 0.5267, + "step": 9354 + }, + { + "epoch": 0.25686436024162546, + "grad_norm": 0.3880484402179718, + "learning_rate": 1.9211878330821282e-05, + "loss": 0.6162, + "step": 9355 + }, + { + "epoch": 0.256891817682592, + "grad_norm": 0.37275198101997375, + "learning_rate": 1.9211710264642622e-05, + "loss": 0.5013, + "step": 9356 + }, + { + "epoch": 0.2569192751235585, + "grad_norm": 0.44539740681648254, + "learning_rate": 1.9211542181281184e-05, + "loss": 0.5022, + "step": 9357 + }, + { + "epoch": 0.256946732564525, + "grad_norm": 0.3565678298473358, + "learning_rate": 1.921137408073728e-05, + "loss": 0.4677, + "step": 9358 + }, + { + "epoch": 0.2569741900054915, + "grad_norm": 0.4411122798919678, + "learning_rate": 1.9211205963011223e-05, + "loss": 0.5473, + "step": 9359 + }, + { + "epoch": 0.257001647446458, + "grad_norm": 0.34746164083480835, + "learning_rate": 1.9211037828103327e-05, + "loss": 0.5528, + "step": 9360 + }, + { + "epoch": 0.2570291048874245, + "grad_norm": 0.4477424621582031, + "learning_rate": 1.9210869676013906e-05, + "loss": 0.4895, + "step": 9361 + }, + { + "epoch": 0.25705656232839097, + "grad_norm": 0.5270004272460938, + "learning_rate": 1.9210701506743273e-05, + "loss": 0.5554, + "step": 9362 + }, + { + "epoch": 0.2570840197693575, + "grad_norm": 0.4253723621368408, + "learning_rate": 1.921053332029174e-05, + "loss": 0.5987, + "step": 9363 + }, + { + "epoch": 0.257111477210324, + "grad_norm": 0.3637545704841614, + "learning_rate": 1.9210365116659624e-05, + "loss": 0.5204, + "step": 9364 + }, + { + "epoch": 0.2571389346512905, + "grad_norm": 0.35714495182037354, + "learning_rate": 1.9210196895847236e-05, + "loss": 0.5286, + "step": 9365 + }, + { + "epoch": 0.257166392092257, + "grad_norm": 0.3479013442993164, + "learning_rate": 1.921002865785489e-05, + "loss": 0.4622, + "step": 9366 + }, + { + "epoch": 0.2571938495332235, + "grad_norm": 0.4222853481769562, + "learning_rate": 1.9209860402682907e-05, + "loss": 0.5599, + "step": 9367 + }, + { + "epoch": 0.25722130697419, + "grad_norm": 0.5828927159309387, + "learning_rate": 1.9209692130331592e-05, + "loss": 0.5573, + "step": 9368 + }, + { + "epoch": 0.2572487644151565, + "grad_norm": 0.35766535997390747, + "learning_rate": 1.9209523840801264e-05, + "loss": 0.5647, + "step": 9369 + }, + { + "epoch": 0.25727622185612303, + "grad_norm": 0.3310072720050812, + "learning_rate": 1.920935553409223e-05, + "loss": 0.5529, + "step": 9370 + }, + { + "epoch": 0.2573036792970895, + "grad_norm": 0.36336129903793335, + "learning_rate": 1.9209187210204812e-05, + "loss": 0.4414, + "step": 9371 + }, + { + "epoch": 0.257331136738056, + "grad_norm": 0.33759063482284546, + "learning_rate": 1.920901886913932e-05, + "loss": 0.5615, + "step": 9372 + }, + { + "epoch": 0.2573585941790225, + "grad_norm": 0.3640938699245453, + "learning_rate": 1.920885051089607e-05, + "loss": 0.5396, + "step": 9373 + }, + { + "epoch": 0.257386051619989, + "grad_norm": 0.3611994683742523, + "learning_rate": 1.9208682135475373e-05, + "loss": 0.4953, + "step": 9374 + }, + { + "epoch": 0.2574135090609555, + "grad_norm": 0.39750203490257263, + "learning_rate": 1.920851374287754e-05, + "loss": 0.5691, + "step": 9375 + }, + { + "epoch": 0.257440966501922, + "grad_norm": 0.35357576608657837, + "learning_rate": 1.9208345333102896e-05, + "loss": 0.4823, + "step": 9376 + }, + { + "epoch": 0.25746842394288855, + "grad_norm": 0.3235712945461273, + "learning_rate": 1.920817690615175e-05, + "loss": 0.5088, + "step": 9377 + }, + { + "epoch": 0.25749588138385504, + "grad_norm": 0.5578622221946716, + "learning_rate": 1.9208008462024412e-05, + "loss": 0.6277, + "step": 9378 + }, + { + "epoch": 0.25752333882482153, + "grad_norm": 0.386970192193985, + "learning_rate": 1.92078400007212e-05, + "loss": 0.4655, + "step": 9379 + }, + { + "epoch": 0.25755079626578803, + "grad_norm": 0.3529108762741089, + "learning_rate": 1.920767152224243e-05, + "loss": 0.4666, + "step": 9380 + }, + { + "epoch": 0.2575782537067545, + "grad_norm": 0.47067561745643616, + "learning_rate": 1.920750302658841e-05, + "loss": 0.6155, + "step": 9381 + }, + { + "epoch": 0.257605711147721, + "grad_norm": 0.3230997622013092, + "learning_rate": 1.920733451375946e-05, + "loss": 0.4763, + "step": 9382 + }, + { + "epoch": 0.2576331685886875, + "grad_norm": 0.3728141188621521, + "learning_rate": 1.9207165983755892e-05, + "loss": 0.5555, + "step": 9383 + }, + { + "epoch": 0.25766062602965406, + "grad_norm": 0.42391595244407654, + "learning_rate": 1.9206997436578023e-05, + "loss": 0.5773, + "step": 9384 + }, + { + "epoch": 0.25768808347062055, + "grad_norm": 0.4148499071598053, + "learning_rate": 1.920682887222616e-05, + "loss": 0.5336, + "step": 9385 + }, + { + "epoch": 0.25771554091158705, + "grad_norm": 0.3777301013469696, + "learning_rate": 1.9206660290700628e-05, + "loss": 0.4701, + "step": 9386 + }, + { + "epoch": 0.25774299835255354, + "grad_norm": 0.3644326329231262, + "learning_rate": 1.9206491692001732e-05, + "loss": 0.5486, + "step": 9387 + }, + { + "epoch": 0.25777045579352004, + "grad_norm": 0.39396777749061584, + "learning_rate": 1.9206323076129792e-05, + "loss": 0.5719, + "step": 9388 + }, + { + "epoch": 0.25779791323448653, + "grad_norm": 0.37625572085380554, + "learning_rate": 1.9206154443085123e-05, + "loss": 0.5887, + "step": 9389 + }, + { + "epoch": 0.257825370675453, + "grad_norm": 0.36311405897140503, + "learning_rate": 1.9205985792868037e-05, + "loss": 0.5327, + "step": 9390 + }, + { + "epoch": 0.2578528281164196, + "grad_norm": 0.41146159172058105, + "learning_rate": 1.9205817125478846e-05, + "loss": 0.5308, + "step": 9391 + }, + { + "epoch": 0.25788028555738607, + "grad_norm": 0.3573685586452484, + "learning_rate": 1.9205648440917874e-05, + "loss": 0.5466, + "step": 9392 + }, + { + "epoch": 0.25790774299835256, + "grad_norm": 0.39070719480514526, + "learning_rate": 1.9205479739185423e-05, + "loss": 0.6015, + "step": 9393 + }, + { + "epoch": 0.25793520043931906, + "grad_norm": 0.3390437066555023, + "learning_rate": 1.9205311020281822e-05, + "loss": 0.4797, + "step": 9394 + }, + { + "epoch": 0.25796265788028555, + "grad_norm": 0.35160815715789795, + "learning_rate": 1.9205142284207372e-05, + "loss": 0.485, + "step": 9395 + }, + { + "epoch": 0.25799011532125204, + "grad_norm": 0.3834616243839264, + "learning_rate": 1.9204973530962396e-05, + "loss": 0.5494, + "step": 9396 + }, + { + "epoch": 0.25801757276221854, + "grad_norm": 0.3805585205554962, + "learning_rate": 1.9204804760547204e-05, + "loss": 0.5374, + "step": 9397 + }, + { + "epoch": 0.2580450302031851, + "grad_norm": 0.36873164772987366, + "learning_rate": 1.9204635972962116e-05, + "loss": 0.4568, + "step": 9398 + }, + { + "epoch": 0.2580724876441516, + "grad_norm": 0.31688961386680603, + "learning_rate": 1.9204467168207443e-05, + "loss": 0.4768, + "step": 9399 + }, + { + "epoch": 0.2580999450851181, + "grad_norm": 0.37426334619522095, + "learning_rate": 1.92042983462835e-05, + "loss": 0.4801, + "step": 9400 + }, + { + "epoch": 0.25812740252608457, + "grad_norm": 0.3795463740825653, + "learning_rate": 1.9204129507190604e-05, + "loss": 0.5224, + "step": 9401 + }, + { + "epoch": 0.25815485996705106, + "grad_norm": 0.4310855567455292, + "learning_rate": 1.920396065092907e-05, + "loss": 0.5745, + "step": 9402 + }, + { + "epoch": 0.25818231740801756, + "grad_norm": 0.3663748502731323, + "learning_rate": 1.920379177749921e-05, + "loss": 0.5381, + "step": 9403 + }, + { + "epoch": 0.25820977484898405, + "grad_norm": 0.36408692598342896, + "learning_rate": 1.9203622886901342e-05, + "loss": 0.5402, + "step": 9404 + }, + { + "epoch": 0.2582372322899506, + "grad_norm": 0.3618702292442322, + "learning_rate": 1.920345397913578e-05, + "loss": 0.5389, + "step": 9405 + }, + { + "epoch": 0.2582646897309171, + "grad_norm": 0.38373878598213196, + "learning_rate": 1.9203285054202836e-05, + "loss": 0.5052, + "step": 9406 + }, + { + "epoch": 0.2582921471718836, + "grad_norm": 0.32435715198516846, + "learning_rate": 1.920311611210283e-05, + "loss": 0.376, + "step": 9407 + }, + { + "epoch": 0.2583196046128501, + "grad_norm": 0.4073440730571747, + "learning_rate": 1.9202947152836078e-05, + "loss": 0.6131, + "step": 9408 + }, + { + "epoch": 0.2583470620538166, + "grad_norm": 0.3742148280143738, + "learning_rate": 1.920277817640289e-05, + "loss": 0.548, + "step": 9409 + }, + { + "epoch": 0.2583745194947831, + "grad_norm": 0.36369067430496216, + "learning_rate": 1.9202609182803584e-05, + "loss": 0.4942, + "step": 9410 + }, + { + "epoch": 0.25840197693574957, + "grad_norm": 0.3462900221347809, + "learning_rate": 1.9202440172038474e-05, + "loss": 0.5, + "step": 9411 + }, + { + "epoch": 0.2584294343767161, + "grad_norm": 0.35675254464149475, + "learning_rate": 1.9202271144107875e-05, + "loss": 0.532, + "step": 9412 + }, + { + "epoch": 0.2584568918176826, + "grad_norm": 0.3716842830181122, + "learning_rate": 1.9202102099012103e-05, + "loss": 0.6108, + "step": 9413 + }, + { + "epoch": 0.2584843492586491, + "grad_norm": 0.38760700821876526, + "learning_rate": 1.9201933036751476e-05, + "loss": 0.5948, + "step": 9414 + }, + { + "epoch": 0.2585118066996156, + "grad_norm": 0.38309627771377563, + "learning_rate": 1.9201763957326305e-05, + "loss": 0.5496, + "step": 9415 + }, + { + "epoch": 0.2585392641405821, + "grad_norm": 0.3586679697036743, + "learning_rate": 1.920159486073691e-05, + "loss": 0.5491, + "step": 9416 + }, + { + "epoch": 0.2585667215815486, + "grad_norm": 0.39460381865501404, + "learning_rate": 1.92014257469836e-05, + "loss": 0.552, + "step": 9417 + }, + { + "epoch": 0.2585941790225151, + "grad_norm": 0.3503015339374542, + "learning_rate": 1.92012566160667e-05, + "loss": 0.6555, + "step": 9418 + }, + { + "epoch": 0.25862163646348163, + "grad_norm": 0.32782843708992004, + "learning_rate": 1.9201087467986516e-05, + "loss": 0.4853, + "step": 9419 + }, + { + "epoch": 0.2586490939044481, + "grad_norm": 0.3259095847606659, + "learning_rate": 1.9200918302743365e-05, + "loss": 0.4735, + "step": 9420 + }, + { + "epoch": 0.2586765513454146, + "grad_norm": 0.37474367022514343, + "learning_rate": 1.920074912033757e-05, + "loss": 0.5339, + "step": 9421 + }, + { + "epoch": 0.2587040087863811, + "grad_norm": 0.4079749286174774, + "learning_rate": 1.9200579920769435e-05, + "loss": 0.5886, + "step": 9422 + }, + { + "epoch": 0.2587314662273476, + "grad_norm": 0.38050031661987305, + "learning_rate": 1.9200410704039285e-05, + "loss": 0.6123, + "step": 9423 + }, + { + "epoch": 0.2587589236683141, + "grad_norm": 0.3240785598754883, + "learning_rate": 1.9200241470147435e-05, + "loss": 0.5014, + "step": 9424 + }, + { + "epoch": 0.2587863811092806, + "grad_norm": 0.3648619055747986, + "learning_rate": 1.9200072219094195e-05, + "loss": 0.6159, + "step": 9425 + }, + { + "epoch": 0.25881383855024714, + "grad_norm": 0.3824846148490906, + "learning_rate": 1.9199902950879886e-05, + "loss": 0.5362, + "step": 9426 + }, + { + "epoch": 0.25884129599121364, + "grad_norm": 0.3598269522190094, + "learning_rate": 1.9199733665504817e-05, + "loss": 0.537, + "step": 9427 + }, + { + "epoch": 0.25886875343218013, + "grad_norm": 0.3789520859718323, + "learning_rate": 1.9199564362969315e-05, + "loss": 0.5695, + "step": 9428 + }, + { + "epoch": 0.2588962108731466, + "grad_norm": 0.4428604245185852, + "learning_rate": 1.9199395043273688e-05, + "loss": 0.5574, + "step": 9429 + }, + { + "epoch": 0.2589236683141131, + "grad_norm": 0.3672722578048706, + "learning_rate": 1.919922570641825e-05, + "loss": 0.4251, + "step": 9430 + }, + { + "epoch": 0.2589511257550796, + "grad_norm": 0.3708668053150177, + "learning_rate": 1.919905635240332e-05, + "loss": 0.5602, + "step": 9431 + }, + { + "epoch": 0.2589785831960461, + "grad_norm": 0.37353041768074036, + "learning_rate": 1.919888698122922e-05, + "loss": 0.5621, + "step": 9432 + }, + { + "epoch": 0.25900604063701266, + "grad_norm": 0.37249499559402466, + "learning_rate": 1.9198717592896256e-05, + "loss": 0.5303, + "step": 9433 + }, + { + "epoch": 0.25903349807797915, + "grad_norm": 0.35409191250801086, + "learning_rate": 1.9198548187404746e-05, + "loss": 0.6018, + "step": 9434 + }, + { + "epoch": 0.25906095551894565, + "grad_norm": 0.36687397956848145, + "learning_rate": 1.919837876475501e-05, + "loss": 0.4825, + "step": 9435 + }, + { + "epoch": 0.25908841295991214, + "grad_norm": 0.34235846996307373, + "learning_rate": 1.9198209324947358e-05, + "loss": 0.5198, + "step": 9436 + }, + { + "epoch": 0.25911587040087863, + "grad_norm": 0.3279513418674469, + "learning_rate": 1.9198039867982113e-05, + "loss": 0.5297, + "step": 9437 + }, + { + "epoch": 0.25914332784184513, + "grad_norm": 0.3567107021808624, + "learning_rate": 1.919787039385959e-05, + "loss": 0.5059, + "step": 9438 + }, + { + "epoch": 0.2591707852828116, + "grad_norm": 0.43064436316490173, + "learning_rate": 1.91977009025801e-05, + "loss": 0.7024, + "step": 9439 + }, + { + "epoch": 0.2591982427237781, + "grad_norm": 0.3340536653995514, + "learning_rate": 1.9197531394143965e-05, + "loss": 0.4398, + "step": 9440 + }, + { + "epoch": 0.25922570016474467, + "grad_norm": 0.37037140130996704, + "learning_rate": 1.9197361868551498e-05, + "loss": 0.5502, + "step": 9441 + }, + { + "epoch": 0.25925315760571116, + "grad_norm": 0.3282790184020996, + "learning_rate": 1.919719232580301e-05, + "loss": 0.4547, + "step": 9442 + }, + { + "epoch": 0.25928061504667765, + "grad_norm": 0.4376263916492462, + "learning_rate": 1.919702276589883e-05, + "loss": 0.5073, + "step": 9443 + }, + { + "epoch": 0.25930807248764415, + "grad_norm": 0.3813644051551819, + "learning_rate": 1.9196853188839264e-05, + "loss": 0.5741, + "step": 9444 + }, + { + "epoch": 0.25933552992861064, + "grad_norm": 0.37996840476989746, + "learning_rate": 1.9196683594624633e-05, + "loss": 0.4946, + "step": 9445 + }, + { + "epoch": 0.25936298736957714, + "grad_norm": 0.3667914569377899, + "learning_rate": 1.919651398325525e-05, + "loss": 0.4566, + "step": 9446 + }, + { + "epoch": 0.25939044481054363, + "grad_norm": 0.3937976062297821, + "learning_rate": 1.9196344354731433e-05, + "loss": 0.5584, + "step": 9447 + }, + { + "epoch": 0.2594179022515102, + "grad_norm": 0.3504382371902466, + "learning_rate": 1.9196174709053503e-05, + "loss": 0.5789, + "step": 9448 + }, + { + "epoch": 0.2594453596924767, + "grad_norm": 0.359549880027771, + "learning_rate": 1.9196005046221765e-05, + "loss": 0.5128, + "step": 9449 + }, + { + "epoch": 0.25947281713344317, + "grad_norm": 0.34907567501068115, + "learning_rate": 1.9195835366236547e-05, + "loss": 0.5578, + "step": 9450 + }, + { + "epoch": 0.25950027457440966, + "grad_norm": 0.33749520778656006, + "learning_rate": 1.9195665669098163e-05, + "loss": 0.4476, + "step": 9451 + }, + { + "epoch": 0.25952773201537616, + "grad_norm": 0.4322696924209595, + "learning_rate": 1.9195495954806925e-05, + "loss": 0.6148, + "step": 9452 + }, + { + "epoch": 0.25955518945634265, + "grad_norm": 0.3985159695148468, + "learning_rate": 1.9195326223363147e-05, + "loss": 0.5592, + "step": 9453 + }, + { + "epoch": 0.25958264689730914, + "grad_norm": 0.37949469685554504, + "learning_rate": 1.919515647476716e-05, + "loss": 0.4844, + "step": 9454 + }, + { + "epoch": 0.2596101043382757, + "grad_norm": 0.391836941242218, + "learning_rate": 1.9194986709019266e-05, + "loss": 0.5415, + "step": 9455 + }, + { + "epoch": 0.2596375617792422, + "grad_norm": 0.38724151253700256, + "learning_rate": 1.919481692611979e-05, + "loss": 0.5332, + "step": 9456 + }, + { + "epoch": 0.2596650192202087, + "grad_norm": 0.34634166955947876, + "learning_rate": 1.9194647126069043e-05, + "loss": 0.5125, + "step": 9457 + }, + { + "epoch": 0.2596924766611752, + "grad_norm": 0.405081182718277, + "learning_rate": 1.9194477308867347e-05, + "loss": 0.5509, + "step": 9458 + }, + { + "epoch": 0.25971993410214167, + "grad_norm": 0.3887420892715454, + "learning_rate": 1.9194307474515015e-05, + "loss": 0.5518, + "step": 9459 + }, + { + "epoch": 0.25974739154310816, + "grad_norm": 0.37182143330574036, + "learning_rate": 1.9194137623012365e-05, + "loss": 0.581, + "step": 9460 + }, + { + "epoch": 0.25977484898407466, + "grad_norm": 0.3479037880897522, + "learning_rate": 1.9193967754359715e-05, + "loss": 0.5338, + "step": 9461 + }, + { + "epoch": 0.2598023064250412, + "grad_norm": 0.34946316480636597, + "learning_rate": 1.9193797868557377e-05, + "loss": 0.5952, + "step": 9462 + }, + { + "epoch": 0.2598297638660077, + "grad_norm": 0.3115597665309906, + "learning_rate": 1.9193627965605673e-05, + "loss": 0.3976, + "step": 9463 + }, + { + "epoch": 0.2598572213069742, + "grad_norm": 0.40065428614616394, + "learning_rate": 1.9193458045504924e-05, + "loss": 0.4622, + "step": 9464 + }, + { + "epoch": 0.2598846787479407, + "grad_norm": 0.3280889093875885, + "learning_rate": 1.9193288108255437e-05, + "loss": 0.4875, + "step": 9465 + }, + { + "epoch": 0.2599121361889072, + "grad_norm": 0.3393799960613251, + "learning_rate": 1.919311815385753e-05, + "loss": 0.4773, + "step": 9466 + }, + { + "epoch": 0.2599395936298737, + "grad_norm": 0.34150591492652893, + "learning_rate": 1.919294818231153e-05, + "loss": 0.4708, + "step": 9467 + }, + { + "epoch": 0.25996705107084017, + "grad_norm": 0.4000703692436218, + "learning_rate": 1.9192778193617746e-05, + "loss": 0.5074, + "step": 9468 + }, + { + "epoch": 0.2599945085118067, + "grad_norm": 0.45190656185150146, + "learning_rate": 1.91926081877765e-05, + "loss": 0.5634, + "step": 9469 + }, + { + "epoch": 0.2600219659527732, + "grad_norm": 0.39426058530807495, + "learning_rate": 1.9192438164788098e-05, + "loss": 0.4571, + "step": 9470 + }, + { + "epoch": 0.2600494233937397, + "grad_norm": 1.2232091426849365, + "learning_rate": 1.919226812465287e-05, + "loss": 0.5066, + "step": 9471 + }, + { + "epoch": 0.2600768808347062, + "grad_norm": 0.36946648359298706, + "learning_rate": 1.9192098067371127e-05, + "loss": 0.5637, + "step": 9472 + }, + { + "epoch": 0.2601043382756727, + "grad_norm": 0.38049912452697754, + "learning_rate": 1.9191927992943187e-05, + "loss": 0.5257, + "step": 9473 + }, + { + "epoch": 0.2601317957166392, + "grad_norm": 0.3689614534378052, + "learning_rate": 1.9191757901369366e-05, + "loss": 0.4212, + "step": 9474 + }, + { + "epoch": 0.2601592531576057, + "grad_norm": 0.41822826862335205, + "learning_rate": 1.9191587792649985e-05, + "loss": 0.5407, + "step": 9475 + }, + { + "epoch": 0.26018671059857224, + "grad_norm": 0.3599544167518616, + "learning_rate": 1.9191417666785356e-05, + "loss": 0.5085, + "step": 9476 + }, + { + "epoch": 0.26021416803953873, + "grad_norm": 0.3625636100769043, + "learning_rate": 1.9191247523775805e-05, + "loss": 0.4296, + "step": 9477 + }, + { + "epoch": 0.2602416254805052, + "grad_norm": 0.34985464811325073, + "learning_rate": 1.9191077363621643e-05, + "loss": 0.5353, + "step": 9478 + }, + { + "epoch": 0.2602690829214717, + "grad_norm": 0.36791735887527466, + "learning_rate": 1.9190907186323185e-05, + "loss": 0.558, + "step": 9479 + }, + { + "epoch": 0.2602965403624382, + "grad_norm": 0.34052157402038574, + "learning_rate": 1.9190736991880752e-05, + "loss": 0.4388, + "step": 9480 + }, + { + "epoch": 0.2603239978034047, + "grad_norm": 0.3950366675853729, + "learning_rate": 1.919056678029466e-05, + "loss": 0.4852, + "step": 9481 + }, + { + "epoch": 0.2603514552443712, + "grad_norm": 0.4166395664215088, + "learning_rate": 1.919039655156523e-05, + "loss": 0.5958, + "step": 9482 + }, + { + "epoch": 0.26037891268533775, + "grad_norm": 0.36381885409355164, + "learning_rate": 1.9190226305692777e-05, + "loss": 0.5336, + "step": 9483 + }, + { + "epoch": 0.26040637012630424, + "grad_norm": 0.42643511295318604, + "learning_rate": 1.919005604267762e-05, + "loss": 0.5836, + "step": 9484 + }, + { + "epoch": 0.26043382756727074, + "grad_norm": 0.33563244342803955, + "learning_rate": 1.9189885762520075e-05, + "loss": 0.5112, + "step": 9485 + }, + { + "epoch": 0.26046128500823723, + "grad_norm": 0.36456364393234253, + "learning_rate": 1.918971546522046e-05, + "loss": 0.5826, + "step": 9486 + }, + { + "epoch": 0.2604887424492037, + "grad_norm": 0.36982157826423645, + "learning_rate": 1.918954515077909e-05, + "loss": 0.5324, + "step": 9487 + }, + { + "epoch": 0.2605161998901702, + "grad_norm": 0.4584997594356537, + "learning_rate": 1.918937481919629e-05, + "loss": 0.5325, + "step": 9488 + }, + { + "epoch": 0.2605436573311367, + "grad_norm": 0.3793880343437195, + "learning_rate": 1.9189204470472365e-05, + "loss": 0.4719, + "step": 9489 + }, + { + "epoch": 0.26057111477210326, + "grad_norm": 0.4369315803050995, + "learning_rate": 1.918903410460765e-05, + "loss": 0.5407, + "step": 9490 + }, + { + "epoch": 0.26059857221306976, + "grad_norm": 0.5877314209938049, + "learning_rate": 1.918886372160245e-05, + "loss": 0.4972, + "step": 9491 + }, + { + "epoch": 0.26062602965403625, + "grad_norm": 0.41305211186408997, + "learning_rate": 1.9188693321457083e-05, + "loss": 0.5245, + "step": 9492 + }, + { + "epoch": 0.26065348709500274, + "grad_norm": 0.3475630581378937, + "learning_rate": 1.9188522904171877e-05, + "loss": 0.5137, + "step": 9493 + }, + { + "epoch": 0.26068094453596924, + "grad_norm": 0.3432077169418335, + "learning_rate": 1.918835246974714e-05, + "loss": 0.5028, + "step": 9494 + }, + { + "epoch": 0.26070840197693573, + "grad_norm": 0.34927037358283997, + "learning_rate": 1.918818201818319e-05, + "loss": 0.5297, + "step": 9495 + }, + { + "epoch": 0.2607358594179022, + "grad_norm": 0.3665786385536194, + "learning_rate": 1.918801154948035e-05, + "loss": 0.5275, + "step": 9496 + }, + { + "epoch": 0.2607633168588688, + "grad_norm": 0.34769123792648315, + "learning_rate": 1.9187841063638935e-05, + "loss": 0.5663, + "step": 9497 + }, + { + "epoch": 0.26079077429983527, + "grad_norm": 0.35744988918304443, + "learning_rate": 1.9187670560659267e-05, + "loss": 0.5366, + "step": 9498 + }, + { + "epoch": 0.26081823174080176, + "grad_norm": 0.3775603771209717, + "learning_rate": 1.918750004054166e-05, + "loss": 0.5453, + "step": 9499 + }, + { + "epoch": 0.26084568918176826, + "grad_norm": 0.39013585448265076, + "learning_rate": 1.9187329503286433e-05, + "loss": 0.5383, + "step": 9500 + }, + { + "epoch": 0.26087314662273475, + "grad_norm": 0.4151013493537903, + "learning_rate": 1.9187158948893904e-05, + "loss": 0.569, + "step": 9501 + }, + { + "epoch": 0.26090060406370125, + "grad_norm": 0.35886621475219727, + "learning_rate": 1.918698837736439e-05, + "loss": 0.522, + "step": 9502 + }, + { + "epoch": 0.26092806150466774, + "grad_norm": 0.41683346033096313, + "learning_rate": 1.9186817788698212e-05, + "loss": 0.4657, + "step": 9503 + }, + { + "epoch": 0.2609555189456343, + "grad_norm": 0.32532039284706116, + "learning_rate": 1.9186647182895686e-05, + "loss": 0.4216, + "step": 9504 + }, + { + "epoch": 0.2609829763866008, + "grad_norm": 0.3871030807495117, + "learning_rate": 1.918647655995713e-05, + "loss": 0.5127, + "step": 9505 + }, + { + "epoch": 0.2610104338275673, + "grad_norm": 0.3389013111591339, + "learning_rate": 1.9186305919882866e-05, + "loss": 0.485, + "step": 9506 + }, + { + "epoch": 0.2610378912685338, + "grad_norm": 0.40613073110580444, + "learning_rate": 1.9186135262673206e-05, + "loss": 0.5195, + "step": 9507 + }, + { + "epoch": 0.26106534870950027, + "grad_norm": 0.3572169542312622, + "learning_rate": 1.9185964588328476e-05, + "loss": 0.5143, + "step": 9508 + }, + { + "epoch": 0.26109280615046676, + "grad_norm": 0.36299630999565125, + "learning_rate": 1.918579389684899e-05, + "loss": 0.5144, + "step": 9509 + }, + { + "epoch": 0.26112026359143325, + "grad_norm": 0.3462035655975342, + "learning_rate": 1.9185623188235062e-05, + "loss": 0.4629, + "step": 9510 + }, + { + "epoch": 0.2611477210323998, + "grad_norm": 0.35392338037490845, + "learning_rate": 1.918545246248702e-05, + "loss": 0.6023, + "step": 9511 + }, + { + "epoch": 0.2611751784733663, + "grad_norm": 0.3871360123157501, + "learning_rate": 1.9185281719605174e-05, + "loss": 0.4806, + "step": 9512 + }, + { + "epoch": 0.2612026359143328, + "grad_norm": 0.4602441191673279, + "learning_rate": 1.918511095958985e-05, + "loss": 0.4911, + "step": 9513 + }, + { + "epoch": 0.2612300933552993, + "grad_norm": 0.4180428683757782, + "learning_rate": 1.918494018244136e-05, + "loss": 0.574, + "step": 9514 + }, + { + "epoch": 0.2612575507962658, + "grad_norm": 0.39070966839790344, + "learning_rate": 1.9184769388160022e-05, + "loss": 0.5824, + "step": 9515 + }, + { + "epoch": 0.2612850082372323, + "grad_norm": 0.36380016803741455, + "learning_rate": 1.9184598576746164e-05, + "loss": 0.4366, + "step": 9516 + }, + { + "epoch": 0.26131246567819877, + "grad_norm": 0.3573151230812073, + "learning_rate": 1.9184427748200095e-05, + "loss": 0.4965, + "step": 9517 + }, + { + "epoch": 0.2613399231191653, + "grad_norm": 0.34474125504493713, + "learning_rate": 1.9184256902522135e-05, + "loss": 0.4917, + "step": 9518 + }, + { + "epoch": 0.2613673805601318, + "grad_norm": 0.3957749903202057, + "learning_rate": 1.9184086039712608e-05, + "loss": 0.6056, + "step": 9519 + }, + { + "epoch": 0.2613948380010983, + "grad_norm": 0.40790924429893494, + "learning_rate": 1.9183915159771826e-05, + "loss": 0.5259, + "step": 9520 + }, + { + "epoch": 0.2614222954420648, + "grad_norm": 0.41063570976257324, + "learning_rate": 1.9183744262700114e-05, + "loss": 0.6009, + "step": 9521 + }, + { + "epoch": 0.2614497528830313, + "grad_norm": 0.35424110293388367, + "learning_rate": 1.9183573348497788e-05, + "loss": 0.4744, + "step": 9522 + }, + { + "epoch": 0.2614772103239978, + "grad_norm": 0.3755441904067993, + "learning_rate": 1.9183402417165162e-05, + "loss": 0.5374, + "step": 9523 + }, + { + "epoch": 0.2615046677649643, + "grad_norm": 0.39821648597717285, + "learning_rate": 1.9183231468702562e-05, + "loss": 0.5477, + "step": 9524 + }, + { + "epoch": 0.26153212520593083, + "grad_norm": 0.3581530451774597, + "learning_rate": 1.9183060503110303e-05, + "loss": 0.5325, + "step": 9525 + }, + { + "epoch": 0.2615595826468973, + "grad_norm": 0.3808218240737915, + "learning_rate": 1.918288952038871e-05, + "loss": 0.6346, + "step": 9526 + }, + { + "epoch": 0.2615870400878638, + "grad_norm": 0.36927419900894165, + "learning_rate": 1.9182718520538095e-05, + "loss": 0.5593, + "step": 9527 + }, + { + "epoch": 0.2616144975288303, + "grad_norm": 0.3387787640094757, + "learning_rate": 1.918254750355878e-05, + "loss": 0.4724, + "step": 9528 + }, + { + "epoch": 0.2616419549697968, + "grad_norm": 0.5253043174743652, + "learning_rate": 1.918237646945108e-05, + "loss": 0.5357, + "step": 9529 + }, + { + "epoch": 0.2616694124107633, + "grad_norm": 0.31770265102386475, + "learning_rate": 1.918220541821532e-05, + "loss": 0.5194, + "step": 9530 + }, + { + "epoch": 0.2616968698517298, + "grad_norm": 0.35823407769203186, + "learning_rate": 1.9182034349851814e-05, + "loss": 0.5099, + "step": 9531 + }, + { + "epoch": 0.26172432729269635, + "grad_norm": 0.36212533712387085, + "learning_rate": 1.9181863264360886e-05, + "loss": 0.5166, + "step": 9532 + }, + { + "epoch": 0.26175178473366284, + "grad_norm": 0.36044299602508545, + "learning_rate": 1.918169216174285e-05, + "loss": 0.5694, + "step": 9533 + }, + { + "epoch": 0.26177924217462933, + "grad_norm": 0.38584983348846436, + "learning_rate": 1.918152104199803e-05, + "loss": 0.5278, + "step": 9534 + }, + { + "epoch": 0.26180669961559583, + "grad_norm": 0.35913383960723877, + "learning_rate": 1.9181349905126742e-05, + "loss": 0.4868, + "step": 9535 + }, + { + "epoch": 0.2618341570565623, + "grad_norm": 0.32784292101860046, + "learning_rate": 1.9181178751129307e-05, + "loss": 0.4187, + "step": 9536 + }, + { + "epoch": 0.2618616144975288, + "grad_norm": 0.3950020968914032, + "learning_rate": 1.918100758000604e-05, + "loss": 0.5302, + "step": 9537 + }, + { + "epoch": 0.2618890719384953, + "grad_norm": 0.3308124244213104, + "learning_rate": 1.918083639175727e-05, + "loss": 0.5441, + "step": 9538 + }, + { + "epoch": 0.26191652937946186, + "grad_norm": 0.3867616355419159, + "learning_rate": 1.9180665186383303e-05, + "loss": 0.575, + "step": 9539 + }, + { + "epoch": 0.26194398682042835, + "grad_norm": 0.3432632386684418, + "learning_rate": 1.918049396388447e-05, + "loss": 0.5281, + "step": 9540 + }, + { + "epoch": 0.26197144426139485, + "grad_norm": 0.4088667929172516, + "learning_rate": 1.918032272426108e-05, + "loss": 0.5744, + "step": 9541 + }, + { + "epoch": 0.26199890170236134, + "grad_norm": 0.5341666340827942, + "learning_rate": 1.9180151467513465e-05, + "loss": 0.5968, + "step": 9542 + }, + { + "epoch": 0.26202635914332784, + "grad_norm": 0.4142821729183197, + "learning_rate": 1.9179980193641935e-05, + "loss": 0.4704, + "step": 9543 + }, + { + "epoch": 0.26205381658429433, + "grad_norm": 0.4774053394794464, + "learning_rate": 1.917980890264681e-05, + "loss": 0.5291, + "step": 9544 + }, + { + "epoch": 0.2620812740252608, + "grad_norm": 0.3450198471546173, + "learning_rate": 1.9179637594528414e-05, + "loss": 0.4904, + "step": 9545 + }, + { + "epoch": 0.2621087314662274, + "grad_norm": 0.3376244902610779, + "learning_rate": 1.9179466269287063e-05, + "loss": 0.5289, + "step": 9546 + }, + { + "epoch": 0.26213618890719387, + "grad_norm": 0.3433992862701416, + "learning_rate": 1.9179294926923077e-05, + "loss": 0.5682, + "step": 9547 + }, + { + "epoch": 0.26216364634816036, + "grad_norm": 0.2906266152858734, + "learning_rate": 1.9179123567436777e-05, + "loss": 0.4544, + "step": 9548 + }, + { + "epoch": 0.26219110378912686, + "grad_norm": 0.3786798119544983, + "learning_rate": 1.9178952190828482e-05, + "loss": 0.5017, + "step": 9549 + }, + { + "epoch": 0.26221856123009335, + "grad_norm": 0.43893924355506897, + "learning_rate": 1.9178780797098514e-05, + "loss": 0.4954, + "step": 9550 + }, + { + "epoch": 0.26224601867105984, + "grad_norm": 0.40444415807724, + "learning_rate": 1.9178609386247188e-05, + "loss": 0.4769, + "step": 9551 + }, + { + "epoch": 0.26227347611202634, + "grad_norm": 0.3869764506816864, + "learning_rate": 1.917843795827482e-05, + "loss": 0.5012, + "step": 9552 + }, + { + "epoch": 0.2623009335529929, + "grad_norm": 0.369089812040329, + "learning_rate": 1.9178266513181744e-05, + "loss": 0.5107, + "step": 9553 + }, + { + "epoch": 0.2623283909939594, + "grad_norm": 0.36887305974960327, + "learning_rate": 1.917809505096827e-05, + "loss": 0.5492, + "step": 9554 + }, + { + "epoch": 0.2623558484349259, + "grad_norm": 0.4875061511993408, + "learning_rate": 1.9177923571634717e-05, + "loss": 0.5542, + "step": 9555 + }, + { + "epoch": 0.26238330587589237, + "grad_norm": 0.3922513723373413, + "learning_rate": 1.917775207518141e-05, + "loss": 0.5484, + "step": 9556 + }, + { + "epoch": 0.26241076331685886, + "grad_norm": 0.36044880747795105, + "learning_rate": 1.9177580561608668e-05, + "loss": 0.4466, + "step": 9557 + }, + { + "epoch": 0.26243822075782536, + "grad_norm": 0.37549281120300293, + "learning_rate": 1.9177409030916806e-05, + "loss": 0.5171, + "step": 9558 + }, + { + "epoch": 0.26246567819879185, + "grad_norm": 0.35882240533828735, + "learning_rate": 1.9177237483106147e-05, + "loss": 0.6259, + "step": 9559 + }, + { + "epoch": 0.2624931356397584, + "grad_norm": 0.3721841275691986, + "learning_rate": 1.917706591817701e-05, + "loss": 0.5209, + "step": 9560 + }, + { + "epoch": 0.2625205930807249, + "grad_norm": 0.35287773609161377, + "learning_rate": 1.9176894336129717e-05, + "loss": 0.532, + "step": 9561 + }, + { + "epoch": 0.2625480505216914, + "grad_norm": 0.35146135091781616, + "learning_rate": 1.917672273696459e-05, + "loss": 0.5406, + "step": 9562 + }, + { + "epoch": 0.2625755079626579, + "grad_norm": 0.40884673595428467, + "learning_rate": 1.9176551120681942e-05, + "loss": 0.5381, + "step": 9563 + }, + { + "epoch": 0.2626029654036244, + "grad_norm": 0.3548591136932373, + "learning_rate": 1.91763794872821e-05, + "loss": 0.5852, + "step": 9564 + }, + { + "epoch": 0.26263042284459087, + "grad_norm": 0.6528147459030151, + "learning_rate": 1.917620783676538e-05, + "loss": 0.4871, + "step": 9565 + }, + { + "epoch": 0.26265788028555737, + "grad_norm": 0.35479700565338135, + "learning_rate": 1.9176036169132103e-05, + "loss": 0.5341, + "step": 9566 + }, + { + "epoch": 0.2626853377265239, + "grad_norm": 0.3318730592727661, + "learning_rate": 1.9175864484382593e-05, + "loss": 0.42, + "step": 9567 + }, + { + "epoch": 0.2627127951674904, + "grad_norm": 0.43123915791511536, + "learning_rate": 1.9175692782517163e-05, + "loss": 0.5072, + "step": 9568 + }, + { + "epoch": 0.2627402526084569, + "grad_norm": 0.3558466136455536, + "learning_rate": 1.917552106353614e-05, + "loss": 0.487, + "step": 9569 + }, + { + "epoch": 0.2627677100494234, + "grad_norm": 0.3710677921772003, + "learning_rate": 1.9175349327439844e-05, + "loss": 0.5034, + "step": 9570 + }, + { + "epoch": 0.2627951674903899, + "grad_norm": 0.3625600337982178, + "learning_rate": 1.9175177574228588e-05, + "loss": 0.5482, + "step": 9571 + }, + { + "epoch": 0.2628226249313564, + "grad_norm": 0.37285807728767395, + "learning_rate": 1.9175005803902703e-05, + "loss": 0.5354, + "step": 9572 + }, + { + "epoch": 0.2628500823723229, + "grad_norm": 0.37697094678878784, + "learning_rate": 1.91748340164625e-05, + "loss": 0.6026, + "step": 9573 + }, + { + "epoch": 0.2628775398132894, + "grad_norm": 0.427742063999176, + "learning_rate": 1.9174662211908305e-05, + "loss": 0.4564, + "step": 9574 + }, + { + "epoch": 0.2629049972542559, + "grad_norm": 0.3344784379005432, + "learning_rate": 1.9174490390240435e-05, + "loss": 0.5301, + "step": 9575 + }, + { + "epoch": 0.2629324546952224, + "grad_norm": 0.4011137783527374, + "learning_rate": 1.9174318551459214e-05, + "loss": 0.5045, + "step": 9576 + }, + { + "epoch": 0.2629599121361889, + "grad_norm": 0.8655737042427063, + "learning_rate": 1.917414669556496e-05, + "loss": 0.4776, + "step": 9577 + }, + { + "epoch": 0.2629873695771554, + "grad_norm": 0.37418410181999207, + "learning_rate": 1.9173974822557993e-05, + "loss": 0.5568, + "step": 9578 + }, + { + "epoch": 0.2630148270181219, + "grad_norm": 0.36712846159935, + "learning_rate": 1.9173802932438637e-05, + "loss": 0.4976, + "step": 9579 + }, + { + "epoch": 0.2630422844590884, + "grad_norm": 0.38154181838035583, + "learning_rate": 1.917363102520721e-05, + "loss": 0.4971, + "step": 9580 + }, + { + "epoch": 0.2630697419000549, + "grad_norm": 0.3725656270980835, + "learning_rate": 1.9173459100864033e-05, + "loss": 0.5227, + "step": 9581 + }, + { + "epoch": 0.26309719934102144, + "grad_norm": 0.3549153804779053, + "learning_rate": 1.9173287159409425e-05, + "loss": 0.4697, + "step": 9582 + }, + { + "epoch": 0.26312465678198793, + "grad_norm": 0.32899147272109985, + "learning_rate": 1.917311520084371e-05, + "loss": 0.5183, + "step": 9583 + }, + { + "epoch": 0.2631521142229544, + "grad_norm": 0.3964860141277313, + "learning_rate": 1.917294322516721e-05, + "loss": 0.5777, + "step": 9584 + }, + { + "epoch": 0.2631795716639209, + "grad_norm": 0.35303834080696106, + "learning_rate": 1.9172771232380244e-05, + "loss": 0.6091, + "step": 9585 + }, + { + "epoch": 0.2632070291048874, + "grad_norm": 0.4135599434375763, + "learning_rate": 1.9172599222483128e-05, + "loss": 0.6001, + "step": 9586 + }, + { + "epoch": 0.2632344865458539, + "grad_norm": 0.3965635299682617, + "learning_rate": 1.917242719547619e-05, + "loss": 0.4871, + "step": 9587 + }, + { + "epoch": 0.2632619439868204, + "grad_norm": 0.3555094003677368, + "learning_rate": 1.9172255151359746e-05, + "loss": 0.4738, + "step": 9588 + }, + { + "epoch": 0.26328940142778695, + "grad_norm": 0.33029139041900635, + "learning_rate": 1.9172083090134117e-05, + "loss": 0.4596, + "step": 9589 + }, + { + "epoch": 0.26331685886875345, + "grad_norm": 0.41901499032974243, + "learning_rate": 1.9171911011799625e-05, + "loss": 0.6084, + "step": 9590 + }, + { + "epoch": 0.26334431630971994, + "grad_norm": 0.3587934076786041, + "learning_rate": 1.9171738916356596e-05, + "loss": 0.5308, + "step": 9591 + }, + { + "epoch": 0.26337177375068643, + "grad_norm": 0.34647253155708313, + "learning_rate": 1.9171566803805347e-05, + "loss": 0.5515, + "step": 9592 + }, + { + "epoch": 0.2633992311916529, + "grad_norm": 0.3451676666736603, + "learning_rate": 1.9171394674146195e-05, + "loss": 0.4881, + "step": 9593 + }, + { + "epoch": 0.2634266886326194, + "grad_norm": 0.4046671986579895, + "learning_rate": 1.9171222527379468e-05, + "loss": 0.5588, + "step": 9594 + }, + { + "epoch": 0.2634541460735859, + "grad_norm": 0.3560144305229187, + "learning_rate": 1.917105036350548e-05, + "loss": 0.5265, + "step": 9595 + }, + { + "epoch": 0.26348160351455246, + "grad_norm": 0.3412487208843231, + "learning_rate": 1.9170878182524558e-05, + "loss": 0.4738, + "step": 9596 + }, + { + "epoch": 0.26350906095551896, + "grad_norm": 0.3334677815437317, + "learning_rate": 1.9170705984437022e-05, + "loss": 0.51, + "step": 9597 + }, + { + "epoch": 0.26353651839648545, + "grad_norm": 0.3341671824455261, + "learning_rate": 1.9170533769243192e-05, + "loss": 0.4559, + "step": 9598 + }, + { + "epoch": 0.26356397583745195, + "grad_norm": 0.3877742290496826, + "learning_rate": 1.917036153694339e-05, + "loss": 0.4878, + "step": 9599 + }, + { + "epoch": 0.26359143327841844, + "grad_norm": 0.383279949426651, + "learning_rate": 1.9170189287537936e-05, + "loss": 0.6668, + "step": 9600 + }, + { + "epoch": 0.26361889071938494, + "grad_norm": 0.30659568309783936, + "learning_rate": 1.9170017021027153e-05, + "loss": 0.4629, + "step": 9601 + }, + { + "epoch": 0.26364634816035143, + "grad_norm": 0.34990280866622925, + "learning_rate": 1.916984473741136e-05, + "loss": 0.488, + "step": 9602 + }, + { + "epoch": 0.263673805601318, + "grad_norm": 0.3550940454006195, + "learning_rate": 1.916967243669088e-05, + "loss": 0.5919, + "step": 9603 + }, + { + "epoch": 0.2637012630422845, + "grad_norm": 0.392974317073822, + "learning_rate": 1.9169500118866034e-05, + "loss": 0.5459, + "step": 9604 + }, + { + "epoch": 0.26372872048325097, + "grad_norm": 0.3230685889720917, + "learning_rate": 1.9169327783937145e-05, + "loss": 0.6285, + "step": 9605 + }, + { + "epoch": 0.26375617792421746, + "grad_norm": 0.4024016261100769, + "learning_rate": 1.9169155431904535e-05, + "loss": 0.5661, + "step": 9606 + }, + { + "epoch": 0.26378363536518395, + "grad_norm": 0.34372881054878235, + "learning_rate": 1.9168983062768522e-05, + "loss": 0.5269, + "step": 9607 + }, + { + "epoch": 0.26381109280615045, + "grad_norm": 0.34878993034362793, + "learning_rate": 1.916881067652943e-05, + "loss": 0.4844, + "step": 9608 + }, + { + "epoch": 0.26383855024711694, + "grad_norm": 0.38800927996635437, + "learning_rate": 1.9168638273187575e-05, + "loss": 0.5056, + "step": 9609 + }, + { + "epoch": 0.2638660076880835, + "grad_norm": 0.35882869362831116, + "learning_rate": 1.9168465852743286e-05, + "loss": 0.5151, + "step": 9610 + }, + { + "epoch": 0.26389346512905, + "grad_norm": 0.40235576033592224, + "learning_rate": 1.9168293415196887e-05, + "loss": 0.5709, + "step": 9611 + }, + { + "epoch": 0.2639209225700165, + "grad_norm": 0.3672046363353729, + "learning_rate": 1.916812096054869e-05, + "loss": 0.4532, + "step": 9612 + }, + { + "epoch": 0.263948380010983, + "grad_norm": 0.3578701317310333, + "learning_rate": 1.916794848879902e-05, + "loss": 0.5409, + "step": 9613 + }, + { + "epoch": 0.26397583745194947, + "grad_norm": 0.44952693581581116, + "learning_rate": 1.9167775999948203e-05, + "loss": 0.5958, + "step": 9614 + }, + { + "epoch": 0.26400329489291596, + "grad_norm": 0.352177232503891, + "learning_rate": 1.9167603493996555e-05, + "loss": 0.6156, + "step": 9615 + }, + { + "epoch": 0.26403075233388246, + "grad_norm": 0.3794274628162384, + "learning_rate": 1.9167430970944402e-05, + "loss": 0.5287, + "step": 9616 + }, + { + "epoch": 0.264058209774849, + "grad_norm": 0.4086071848869324, + "learning_rate": 1.9167258430792067e-05, + "loss": 0.5552, + "step": 9617 + }, + { + "epoch": 0.2640856672158155, + "grad_norm": 0.3740593492984772, + "learning_rate": 1.9167085873539865e-05, + "loss": 0.5087, + "step": 9618 + }, + { + "epoch": 0.264113124656782, + "grad_norm": 0.37112903594970703, + "learning_rate": 1.9166913299188124e-05, + "loss": 0.6133, + "step": 9619 + }, + { + "epoch": 0.2641405820977485, + "grad_norm": 0.45442551374435425, + "learning_rate": 1.9166740707737163e-05, + "loss": 0.4923, + "step": 9620 + }, + { + "epoch": 0.264168039538715, + "grad_norm": 0.4267108738422394, + "learning_rate": 1.9166568099187307e-05, + "loss": 0.5127, + "step": 9621 + }, + { + "epoch": 0.2641954969796815, + "grad_norm": 0.3346796929836273, + "learning_rate": 1.916639547353887e-05, + "loss": 0.4951, + "step": 9622 + }, + { + "epoch": 0.26422295442064797, + "grad_norm": 0.3688947558403015, + "learning_rate": 1.9166222830792188e-05, + "loss": 0.5071, + "step": 9623 + }, + { + "epoch": 0.2642504118616145, + "grad_norm": 0.36276134848594666, + "learning_rate": 1.916605017094757e-05, + "loss": 0.5383, + "step": 9624 + }, + { + "epoch": 0.264277869302581, + "grad_norm": 0.37570056319236755, + "learning_rate": 1.9165877494005345e-05, + "loss": 0.5961, + "step": 9625 + }, + { + "epoch": 0.2643053267435475, + "grad_norm": 0.3653617799282074, + "learning_rate": 1.9165704799965833e-05, + "loss": 0.5136, + "step": 9626 + }, + { + "epoch": 0.264332784184514, + "grad_norm": 0.3883560001850128, + "learning_rate": 1.9165532088829354e-05, + "loss": 0.5519, + "step": 9627 + }, + { + "epoch": 0.2643602416254805, + "grad_norm": 0.3660111129283905, + "learning_rate": 1.9165359360596233e-05, + "loss": 0.5241, + "step": 9628 + }, + { + "epoch": 0.264387699066447, + "grad_norm": 0.38687291741371155, + "learning_rate": 1.9165186615266792e-05, + "loss": 0.5431, + "step": 9629 + }, + { + "epoch": 0.2644151565074135, + "grad_norm": 0.38031086325645447, + "learning_rate": 1.9165013852841353e-05, + "loss": 0.5243, + "step": 9630 + }, + { + "epoch": 0.26444261394838003, + "grad_norm": 0.37998342514038086, + "learning_rate": 1.9164841073320238e-05, + "loss": 0.5546, + "step": 9631 + }, + { + "epoch": 0.26447007138934653, + "grad_norm": 0.36830297112464905, + "learning_rate": 1.9164668276703772e-05, + "loss": 0.524, + "step": 9632 + }, + { + "epoch": 0.264497528830313, + "grad_norm": 0.3662182092666626, + "learning_rate": 1.916449546299227e-05, + "loss": 0.5106, + "step": 9633 + }, + { + "epoch": 0.2645249862712795, + "grad_norm": 0.39061954617500305, + "learning_rate": 1.916432263218606e-05, + "loss": 0.5605, + "step": 9634 + }, + { + "epoch": 0.264552443712246, + "grad_norm": 0.42209678888320923, + "learning_rate": 1.9164149784285464e-05, + "loss": 0.6107, + "step": 9635 + }, + { + "epoch": 0.2645799011532125, + "grad_norm": 0.38872209191322327, + "learning_rate": 1.9163976919290806e-05, + "loss": 0.4836, + "step": 9636 + }, + { + "epoch": 0.264607358594179, + "grad_norm": 0.3437592387199402, + "learning_rate": 1.9163804037202404e-05, + "loss": 0.557, + "step": 9637 + }, + { + "epoch": 0.26463481603514555, + "grad_norm": 0.33125877380371094, + "learning_rate": 1.9163631138020582e-05, + "loss": 0.5307, + "step": 9638 + }, + { + "epoch": 0.26466227347611204, + "grad_norm": 0.33722245693206787, + "learning_rate": 1.9163458221745665e-05, + "loss": 0.492, + "step": 9639 + }, + { + "epoch": 0.26468973091707854, + "grad_norm": 0.367610365152359, + "learning_rate": 1.9163285288377973e-05, + "loss": 0.5687, + "step": 9640 + }, + { + "epoch": 0.26471718835804503, + "grad_norm": 0.6498546600341797, + "learning_rate": 1.916311233791783e-05, + "loss": 0.5727, + "step": 9641 + }, + { + "epoch": 0.2647446457990115, + "grad_norm": 0.404351145029068, + "learning_rate": 1.9162939370365555e-05, + "loss": 0.5695, + "step": 9642 + }, + { + "epoch": 0.264772103239978, + "grad_norm": 0.35110992193222046, + "learning_rate": 1.916276638572148e-05, + "loss": 0.5452, + "step": 9643 + }, + { + "epoch": 0.2647995606809445, + "grad_norm": 0.3296765387058258, + "learning_rate": 1.9162593383985916e-05, + "loss": 0.4481, + "step": 9644 + }, + { + "epoch": 0.26482701812191106, + "grad_norm": 0.3731238842010498, + "learning_rate": 1.9162420365159193e-05, + "loss": 0.478, + "step": 9645 + }, + { + "epoch": 0.26485447556287756, + "grad_norm": 0.3715347945690155, + "learning_rate": 1.916224732924163e-05, + "loss": 0.5003, + "step": 9646 + }, + { + "epoch": 0.26488193300384405, + "grad_norm": 0.33556532859802246, + "learning_rate": 1.9162074276233554e-05, + "loss": 0.5378, + "step": 9647 + }, + { + "epoch": 0.26490939044481054, + "grad_norm": 0.38038182258605957, + "learning_rate": 1.9161901206135283e-05, + "loss": 0.4609, + "step": 9648 + }, + { + "epoch": 0.26493684788577704, + "grad_norm": 0.36881354451179504, + "learning_rate": 1.9161728118947142e-05, + "loss": 0.4816, + "step": 9649 + }, + { + "epoch": 0.26496430532674353, + "grad_norm": 0.3789440989494324, + "learning_rate": 1.9161555014669455e-05, + "loss": 0.5452, + "step": 9650 + }, + { + "epoch": 0.26499176276771, + "grad_norm": 0.37304946780204773, + "learning_rate": 1.9161381893302543e-05, + "loss": 0.5518, + "step": 9651 + }, + { + "epoch": 0.2650192202086766, + "grad_norm": 0.4110119938850403, + "learning_rate": 1.916120875484673e-05, + "loss": 0.5955, + "step": 9652 + }, + { + "epoch": 0.26504667764964307, + "grad_norm": 0.3528750538825989, + "learning_rate": 1.916103559930234e-05, + "loss": 0.4578, + "step": 9653 + }, + { + "epoch": 0.26507413509060956, + "grad_norm": 0.3554224967956543, + "learning_rate": 1.9160862426669694e-05, + "loss": 0.5499, + "step": 9654 + }, + { + "epoch": 0.26510159253157606, + "grad_norm": 0.4055248498916626, + "learning_rate": 1.9160689236949116e-05, + "loss": 0.5534, + "step": 9655 + }, + { + "epoch": 0.26512904997254255, + "grad_norm": 0.3381199538707733, + "learning_rate": 1.916051603014093e-05, + "loss": 0.5125, + "step": 9656 + }, + { + "epoch": 0.26515650741350905, + "grad_norm": 0.35418254137039185, + "learning_rate": 1.9160342806245456e-05, + "loss": 0.5727, + "step": 9657 + }, + { + "epoch": 0.26518396485447554, + "grad_norm": 0.42246386408805847, + "learning_rate": 1.9160169565263018e-05, + "loss": 0.5844, + "step": 9658 + }, + { + "epoch": 0.2652114222954421, + "grad_norm": 0.3594554662704468, + "learning_rate": 1.9159996307193946e-05, + "loss": 0.5483, + "step": 9659 + }, + { + "epoch": 0.2652388797364086, + "grad_norm": 0.3888269364833832, + "learning_rate": 1.9159823032038552e-05, + "loss": 0.506, + "step": 9660 + }, + { + "epoch": 0.2652663371773751, + "grad_norm": 0.3493714928627014, + "learning_rate": 1.9159649739797163e-05, + "loss": 0.4455, + "step": 9661 + }, + { + "epoch": 0.26529379461834157, + "grad_norm": 0.47876495122909546, + "learning_rate": 1.915947643047011e-05, + "loss": 0.5399, + "step": 9662 + }, + { + "epoch": 0.26532125205930807, + "grad_norm": 0.40291666984558105, + "learning_rate": 1.9159303104057706e-05, + "loss": 0.5226, + "step": 9663 + }, + { + "epoch": 0.26534870950027456, + "grad_norm": 0.3761269748210907, + "learning_rate": 1.915912976056028e-05, + "loss": 0.496, + "step": 9664 + }, + { + "epoch": 0.26537616694124105, + "grad_norm": 0.3822683095932007, + "learning_rate": 1.915895639997815e-05, + "loss": 0.4768, + "step": 9665 + }, + { + "epoch": 0.2654036243822076, + "grad_norm": 0.3779137432575226, + "learning_rate": 1.9158783022311644e-05, + "loss": 0.5691, + "step": 9666 + }, + { + "epoch": 0.2654310818231741, + "grad_norm": 0.4052309989929199, + "learning_rate": 1.9158609627561087e-05, + "loss": 0.4986, + "step": 9667 + }, + { + "epoch": 0.2654585392641406, + "grad_norm": 0.43523305654525757, + "learning_rate": 1.9158436215726797e-05, + "loss": 0.4736, + "step": 9668 + }, + { + "epoch": 0.2654859967051071, + "grad_norm": 0.598957359790802, + "learning_rate": 1.9158262786809107e-05, + "loss": 0.633, + "step": 9669 + }, + { + "epoch": 0.2655134541460736, + "grad_norm": 0.4731094241142273, + "learning_rate": 1.915808934080833e-05, + "loss": 0.6567, + "step": 9670 + }, + { + "epoch": 0.2655409115870401, + "grad_norm": 0.41556647419929504, + "learning_rate": 1.9157915877724792e-05, + "loss": 0.4223, + "step": 9671 + }, + { + "epoch": 0.26556836902800657, + "grad_norm": 0.3959093987941742, + "learning_rate": 1.915774239755882e-05, + "loss": 0.4651, + "step": 9672 + }, + { + "epoch": 0.2655958264689731, + "grad_norm": 0.3465990126132965, + "learning_rate": 1.9157568900310734e-05, + "loss": 0.4876, + "step": 9673 + }, + { + "epoch": 0.2656232839099396, + "grad_norm": 0.3708794414997101, + "learning_rate": 1.915739538598086e-05, + "loss": 0.4798, + "step": 9674 + }, + { + "epoch": 0.2656507413509061, + "grad_norm": 0.4300672709941864, + "learning_rate": 1.915722185456952e-05, + "loss": 0.5542, + "step": 9675 + }, + { + "epoch": 0.2656781987918726, + "grad_norm": 0.3363136053085327, + "learning_rate": 1.915704830607704e-05, + "loss": 0.5387, + "step": 9676 + }, + { + "epoch": 0.2657056562328391, + "grad_norm": 0.4030798375606537, + "learning_rate": 1.9156874740503743e-05, + "loss": 0.5011, + "step": 9677 + }, + { + "epoch": 0.2657331136738056, + "grad_norm": 0.3293135464191437, + "learning_rate": 1.915670115784995e-05, + "loss": 0.4442, + "step": 9678 + }, + { + "epoch": 0.2657605711147721, + "grad_norm": 0.4067402184009552, + "learning_rate": 1.9156527558115988e-05, + "loss": 0.6114, + "step": 9679 + }, + { + "epoch": 0.26578802855573863, + "grad_norm": 0.5132409334182739, + "learning_rate": 1.9156353941302178e-05, + "loss": 0.4927, + "step": 9680 + }, + { + "epoch": 0.2658154859967051, + "grad_norm": 0.4042612612247467, + "learning_rate": 1.9156180307408846e-05, + "loss": 0.4884, + "step": 9681 + }, + { + "epoch": 0.2658429434376716, + "grad_norm": 0.3470262885093689, + "learning_rate": 1.9156006656436318e-05, + "loss": 0.4649, + "step": 9682 + }, + { + "epoch": 0.2658704008786381, + "grad_norm": 0.4658263325691223, + "learning_rate": 1.9155832988384912e-05, + "loss": 0.612, + "step": 9683 + }, + { + "epoch": 0.2658978583196046, + "grad_norm": 0.3036240339279175, + "learning_rate": 1.9155659303254957e-05, + "loss": 0.432, + "step": 9684 + }, + { + "epoch": 0.2659253157605711, + "grad_norm": 0.3453723192214966, + "learning_rate": 1.915548560104678e-05, + "loss": 0.4734, + "step": 9685 + }, + { + "epoch": 0.2659527732015376, + "grad_norm": 0.3761102259159088, + "learning_rate": 1.915531188176069e-05, + "loss": 0.4547, + "step": 9686 + }, + { + "epoch": 0.26598023064250415, + "grad_norm": 0.38731110095977783, + "learning_rate": 1.9155138145397027e-05, + "loss": 0.5732, + "step": 9687 + }, + { + "epoch": 0.26600768808347064, + "grad_norm": 0.38174399733543396, + "learning_rate": 1.915496439195611e-05, + "loss": 0.5151, + "step": 9688 + }, + { + "epoch": 0.26603514552443713, + "grad_norm": 0.3666574954986572, + "learning_rate": 1.9154790621438262e-05, + "loss": 0.4831, + "step": 9689 + }, + { + "epoch": 0.2660626029654036, + "grad_norm": 0.44414129853248596, + "learning_rate": 1.915461683384381e-05, + "loss": 0.4982, + "step": 9690 + }, + { + "epoch": 0.2660900604063701, + "grad_norm": 0.40528759360313416, + "learning_rate": 1.915444302917307e-05, + "loss": 0.5717, + "step": 9691 + }, + { + "epoch": 0.2661175178473366, + "grad_norm": 0.3392919600009918, + "learning_rate": 1.9154269207426374e-05, + "loss": 0.5271, + "step": 9692 + }, + { + "epoch": 0.2661449752883031, + "grad_norm": 0.38686496019363403, + "learning_rate": 1.9154095368604046e-05, + "loss": 0.4953, + "step": 9693 + }, + { + "epoch": 0.26617243272926966, + "grad_norm": 0.41040706634521484, + "learning_rate": 1.9153921512706407e-05, + "loss": 0.5589, + "step": 9694 + }, + { + "epoch": 0.26619989017023615, + "grad_norm": 0.3274059593677521, + "learning_rate": 1.9153747639733788e-05, + "loss": 0.5578, + "step": 9695 + }, + { + "epoch": 0.26622734761120265, + "grad_norm": 0.3710768222808838, + "learning_rate": 1.9153573749686502e-05, + "loss": 0.521, + "step": 9696 + }, + { + "epoch": 0.26625480505216914, + "grad_norm": 0.33454635739326477, + "learning_rate": 1.9153399842564878e-05, + "loss": 0.4592, + "step": 9697 + }, + { + "epoch": 0.26628226249313564, + "grad_norm": 0.3518390655517578, + "learning_rate": 1.9153225918369245e-05, + "loss": 0.5498, + "step": 9698 + }, + { + "epoch": 0.26630971993410213, + "grad_norm": 0.40558212995529175, + "learning_rate": 1.9153051977099926e-05, + "loss": 0.5929, + "step": 9699 + }, + { + "epoch": 0.2663371773750686, + "grad_norm": 0.36725592613220215, + "learning_rate": 1.9152878018757244e-05, + "loss": 0.4901, + "step": 9700 + }, + { + "epoch": 0.2663646348160352, + "grad_norm": 12.186029434204102, + "learning_rate": 1.9152704043341522e-05, + "loss": 0.5255, + "step": 9701 + }, + { + "epoch": 0.26639209225700167, + "grad_norm": 0.32981520891189575, + "learning_rate": 1.9152530050853082e-05, + "loss": 0.4988, + "step": 9702 + }, + { + "epoch": 0.26641954969796816, + "grad_norm": 1.2789078950881958, + "learning_rate": 1.9152356041292256e-05, + "loss": 0.4802, + "step": 9703 + }, + { + "epoch": 0.26644700713893466, + "grad_norm": 0.34066396951675415, + "learning_rate": 1.9152182014659366e-05, + "loss": 0.4901, + "step": 9704 + }, + { + "epoch": 0.26647446457990115, + "grad_norm": 0.41185519099235535, + "learning_rate": 1.9152007970954733e-05, + "loss": 0.6412, + "step": 9705 + }, + { + "epoch": 0.26650192202086764, + "grad_norm": 0.891452431678772, + "learning_rate": 1.9151833910178683e-05, + "loss": 0.5312, + "step": 9706 + }, + { + "epoch": 0.26652937946183414, + "grad_norm": 0.41768109798431396, + "learning_rate": 1.9151659832331545e-05, + "loss": 0.4994, + "step": 9707 + }, + { + "epoch": 0.26655683690280063, + "grad_norm": 0.35634541511535645, + "learning_rate": 1.915148573741364e-05, + "loss": 0.5043, + "step": 9708 + }, + { + "epoch": 0.2665842943437672, + "grad_norm": 2.5922248363494873, + "learning_rate": 1.915131162542529e-05, + "loss": 0.5656, + "step": 9709 + }, + { + "epoch": 0.2666117517847337, + "grad_norm": 0.38412413001060486, + "learning_rate": 1.9151137496366828e-05, + "loss": 0.5962, + "step": 9710 + }, + { + "epoch": 0.26663920922570017, + "grad_norm": 0.37335050106048584, + "learning_rate": 1.9150963350238568e-05, + "loss": 0.4948, + "step": 9711 + }, + { + "epoch": 0.26666666666666666, + "grad_norm": 0.41074469685554504, + "learning_rate": 1.915078918704085e-05, + "loss": 0.5451, + "step": 9712 + }, + { + "epoch": 0.26669412410763316, + "grad_norm": 0.36663976311683655, + "learning_rate": 1.915061500677398e-05, + "loss": 0.5506, + "step": 9713 + }, + { + "epoch": 0.26672158154859965, + "grad_norm": 0.32534879446029663, + "learning_rate": 1.9150440809438296e-05, + "loss": 0.4235, + "step": 9714 + }, + { + "epoch": 0.26674903898956615, + "grad_norm": 0.3794516324996948, + "learning_rate": 1.9150266595034122e-05, + "loss": 0.5989, + "step": 9715 + }, + { + "epoch": 0.2667764964305327, + "grad_norm": 0.392097532749176, + "learning_rate": 1.9150092363561777e-05, + "loss": 0.5068, + "step": 9716 + }, + { + "epoch": 0.2668039538714992, + "grad_norm": 0.3764788806438446, + "learning_rate": 1.914991811502159e-05, + "loss": 0.4465, + "step": 9717 + }, + { + "epoch": 0.2668314113124657, + "grad_norm": 0.3451516926288605, + "learning_rate": 1.9149743849413887e-05, + "loss": 0.5162, + "step": 9718 + }, + { + "epoch": 0.2668588687534322, + "grad_norm": 0.35021522641181946, + "learning_rate": 1.914956956673899e-05, + "loss": 0.5427, + "step": 9719 + }, + { + "epoch": 0.26688632619439867, + "grad_norm": 0.4173634350299835, + "learning_rate": 1.9149395266997226e-05, + "loss": 0.5881, + "step": 9720 + }, + { + "epoch": 0.26691378363536516, + "grad_norm": 0.33796870708465576, + "learning_rate": 1.9149220950188917e-05, + "loss": 0.4808, + "step": 9721 + }, + { + "epoch": 0.26694124107633166, + "grad_norm": 0.6692757606506348, + "learning_rate": 1.9149046616314395e-05, + "loss": 0.602, + "step": 9722 + }, + { + "epoch": 0.2669686985172982, + "grad_norm": 0.4141981303691864, + "learning_rate": 1.9148872265373978e-05, + "loss": 0.5389, + "step": 9723 + }, + { + "epoch": 0.2669961559582647, + "grad_norm": 0.362693727016449, + "learning_rate": 1.9148697897367996e-05, + "loss": 0.5547, + "step": 9724 + }, + { + "epoch": 0.2670236133992312, + "grad_norm": 0.3432229459285736, + "learning_rate": 1.9148523512296773e-05, + "loss": 0.5044, + "step": 9725 + }, + { + "epoch": 0.2670510708401977, + "grad_norm": 0.36700043082237244, + "learning_rate": 1.914834911016063e-05, + "loss": 0.48, + "step": 9726 + }, + { + "epoch": 0.2670785282811642, + "grad_norm": 0.3539049029350281, + "learning_rate": 1.91481746909599e-05, + "loss": 0.4678, + "step": 9727 + }, + { + "epoch": 0.2671059857221307, + "grad_norm": 0.33634528517723083, + "learning_rate": 1.9148000254694903e-05, + "loss": 0.5355, + "step": 9728 + }, + { + "epoch": 0.2671334431630972, + "grad_norm": 0.6096423268318176, + "learning_rate": 1.914782580136597e-05, + "loss": 0.5652, + "step": 9729 + }, + { + "epoch": 0.2671609006040637, + "grad_norm": 0.37629780173301697, + "learning_rate": 1.9147651330973417e-05, + "loss": 0.5001, + "step": 9730 + }, + { + "epoch": 0.2671883580450302, + "grad_norm": 0.35613054037094116, + "learning_rate": 1.9147476843517574e-05, + "loss": 0.5549, + "step": 9731 + }, + { + "epoch": 0.2672158154859967, + "grad_norm": 0.3602292537689209, + "learning_rate": 1.914730233899877e-05, + "loss": 0.5752, + "step": 9732 + }, + { + "epoch": 0.2672432729269632, + "grad_norm": 0.3413795828819275, + "learning_rate": 1.9147127817417327e-05, + "loss": 0.5173, + "step": 9733 + }, + { + "epoch": 0.2672707303679297, + "grad_norm": 0.37886103987693787, + "learning_rate": 1.914695327877357e-05, + "loss": 0.4866, + "step": 9734 + }, + { + "epoch": 0.2672981878088962, + "grad_norm": 0.36669865250587463, + "learning_rate": 1.9146778723067826e-05, + "loss": 0.5272, + "step": 9735 + }, + { + "epoch": 0.2673256452498627, + "grad_norm": 0.3531798720359802, + "learning_rate": 1.9146604150300425e-05, + "loss": 0.5531, + "step": 9736 + }, + { + "epoch": 0.26735310269082924, + "grad_norm": 0.4010038673877716, + "learning_rate": 1.9146429560471685e-05, + "loss": 0.6124, + "step": 9737 + }, + { + "epoch": 0.26738056013179573, + "grad_norm": 0.39246878027915955, + "learning_rate": 1.9146254953581934e-05, + "loss": 0.5279, + "step": 9738 + }, + { + "epoch": 0.2674080175727622, + "grad_norm": 0.3997604548931122, + "learning_rate": 1.9146080329631496e-05, + "loss": 0.4994, + "step": 9739 + }, + { + "epoch": 0.2674354750137287, + "grad_norm": 0.36739468574523926, + "learning_rate": 1.91459056886207e-05, + "loss": 0.6005, + "step": 9740 + }, + { + "epoch": 0.2674629324546952, + "grad_norm": 0.4121066927909851, + "learning_rate": 1.9145731030549873e-05, + "loss": 0.5236, + "step": 9741 + }, + { + "epoch": 0.2674903898956617, + "grad_norm": 0.37619736790657043, + "learning_rate": 1.914555635541934e-05, + "loss": 0.4976, + "step": 9742 + }, + { + "epoch": 0.2675178473366282, + "grad_norm": 0.3541381061077118, + "learning_rate": 1.914538166322942e-05, + "loss": 0.572, + "step": 9743 + }, + { + "epoch": 0.26754530477759475, + "grad_norm": 0.3646325170993805, + "learning_rate": 1.914520695398045e-05, + "loss": 0.5426, + "step": 9744 + }, + { + "epoch": 0.26757276221856124, + "grad_norm": 0.3175213038921356, + "learning_rate": 1.9145032227672747e-05, + "loss": 0.4039, + "step": 9745 + }, + { + "epoch": 0.26760021965952774, + "grad_norm": 0.41148555278778076, + "learning_rate": 1.9144857484306642e-05, + "loss": 0.4795, + "step": 9746 + }, + { + "epoch": 0.26762767710049423, + "grad_norm": 0.6565341353416443, + "learning_rate": 1.914468272388246e-05, + "loss": 0.5675, + "step": 9747 + }, + { + "epoch": 0.2676551345414607, + "grad_norm": 0.43212229013442993, + "learning_rate": 1.9144507946400522e-05, + "loss": 0.5796, + "step": 9748 + }, + { + "epoch": 0.2676825919824272, + "grad_norm": 0.37087878584861755, + "learning_rate": 1.9144333151861162e-05, + "loss": 0.5407, + "step": 9749 + }, + { + "epoch": 0.2677100494233937, + "grad_norm": 0.3386351466178894, + "learning_rate": 1.91441583402647e-05, + "loss": 0.5366, + "step": 9750 + }, + { + "epoch": 0.26773750686436026, + "grad_norm": 0.3601735532283783, + "learning_rate": 1.9143983511611463e-05, + "loss": 0.537, + "step": 9751 + }, + { + "epoch": 0.26776496430532676, + "grad_norm": 0.3241231143474579, + "learning_rate": 1.914380866590178e-05, + "loss": 0.4037, + "step": 9752 + }, + { + "epoch": 0.26779242174629325, + "grad_norm": 0.41108840703964233, + "learning_rate": 1.9143633803135974e-05, + "loss": 0.5729, + "step": 9753 + }, + { + "epoch": 0.26781987918725975, + "grad_norm": 0.4378061890602112, + "learning_rate": 1.914345892331438e-05, + "loss": 0.5661, + "step": 9754 + }, + { + "epoch": 0.26784733662822624, + "grad_norm": 0.37385082244873047, + "learning_rate": 1.9143284026437307e-05, + "loss": 0.558, + "step": 9755 + }, + { + "epoch": 0.26787479406919273, + "grad_norm": 0.44738465547561646, + "learning_rate": 1.9143109112505092e-05, + "loss": 0.5883, + "step": 9756 + }, + { + "epoch": 0.26790225151015923, + "grad_norm": 0.3492004871368408, + "learning_rate": 1.914293418151807e-05, + "loss": 0.4841, + "step": 9757 + }, + { + "epoch": 0.2679297089511258, + "grad_norm": 0.3484858274459839, + "learning_rate": 1.9142759233476548e-05, + "loss": 0.4678, + "step": 9758 + }, + { + "epoch": 0.26795716639209227, + "grad_norm": 0.3618752062320709, + "learning_rate": 1.9142584268380866e-05, + "loss": 0.4551, + "step": 9759 + }, + { + "epoch": 0.26798462383305877, + "grad_norm": 0.4073898494243622, + "learning_rate": 1.9142409286231345e-05, + "loss": 0.5035, + "step": 9760 + }, + { + "epoch": 0.26801208127402526, + "grad_norm": 0.3515893518924713, + "learning_rate": 1.9142234287028313e-05, + "loss": 0.4867, + "step": 9761 + }, + { + "epoch": 0.26803953871499175, + "grad_norm": 0.32989564538002014, + "learning_rate": 1.91420592707721e-05, + "loss": 0.4032, + "step": 9762 + }, + { + "epoch": 0.26806699615595825, + "grad_norm": 0.4137049615383148, + "learning_rate": 1.914188423746302e-05, + "loss": 0.4766, + "step": 9763 + }, + { + "epoch": 0.26809445359692474, + "grad_norm": 0.47461962699890137, + "learning_rate": 1.9141709187101414e-05, + "loss": 0.636, + "step": 9764 + }, + { + "epoch": 0.2681219110378913, + "grad_norm": 0.37120887637138367, + "learning_rate": 1.9141534119687605e-05, + "loss": 0.5088, + "step": 9765 + }, + { + "epoch": 0.2681493684788578, + "grad_norm": 0.47231078147888184, + "learning_rate": 1.914135903522191e-05, + "loss": 0.5103, + "step": 9766 + }, + { + "epoch": 0.2681768259198243, + "grad_norm": 0.5834417939186096, + "learning_rate": 1.9141183933704668e-05, + "loss": 0.4296, + "step": 9767 + }, + { + "epoch": 0.2682042833607908, + "grad_norm": 0.37957099080085754, + "learning_rate": 1.91410088151362e-05, + "loss": 0.4987, + "step": 9768 + }, + { + "epoch": 0.26823174080175727, + "grad_norm": 0.3324994444847107, + "learning_rate": 1.9140833679516832e-05, + "loss": 0.4339, + "step": 9769 + }, + { + "epoch": 0.26825919824272376, + "grad_norm": 0.3767721354961395, + "learning_rate": 1.914065852684689e-05, + "loss": 0.5255, + "step": 9770 + }, + { + "epoch": 0.26828665568369026, + "grad_norm": 0.3966728448867798, + "learning_rate": 1.9140483357126706e-05, + "loss": 0.52, + "step": 9771 + }, + { + "epoch": 0.2683141131246568, + "grad_norm": 0.373540461063385, + "learning_rate": 1.9140308170356604e-05, + "loss": 0.5236, + "step": 9772 + }, + { + "epoch": 0.2683415705656233, + "grad_norm": 0.3543161153793335, + "learning_rate": 1.914013296653691e-05, + "loss": 0.5812, + "step": 9773 + }, + { + "epoch": 0.2683690280065898, + "grad_norm": 0.5518162250518799, + "learning_rate": 1.9139957745667944e-05, + "loss": 0.5956, + "step": 9774 + }, + { + "epoch": 0.2683964854475563, + "grad_norm": 0.3820234537124634, + "learning_rate": 1.9139782507750048e-05, + "loss": 0.6127, + "step": 9775 + }, + { + "epoch": 0.2684239428885228, + "grad_norm": 0.5409272909164429, + "learning_rate": 1.9139607252783534e-05, + "loss": 0.5323, + "step": 9776 + }, + { + "epoch": 0.2684514003294893, + "grad_norm": 0.3401220142841339, + "learning_rate": 1.913943198076874e-05, + "loss": 0.5102, + "step": 9777 + }, + { + "epoch": 0.26847885777045577, + "grad_norm": 0.3793210983276367, + "learning_rate": 1.9139256691705985e-05, + "loss": 0.5254, + "step": 9778 + }, + { + "epoch": 0.2685063152114223, + "grad_norm": 0.44602257013320923, + "learning_rate": 1.91390813855956e-05, + "loss": 0.5057, + "step": 9779 + }, + { + "epoch": 0.2685337726523888, + "grad_norm": 0.39210301637649536, + "learning_rate": 1.9138906062437916e-05, + "loss": 0.5318, + "step": 9780 + }, + { + "epoch": 0.2685612300933553, + "grad_norm": 0.4559965431690216, + "learning_rate": 1.913873072223325e-05, + "loss": 0.4473, + "step": 9781 + }, + { + "epoch": 0.2685886875343218, + "grad_norm": 0.31834569573402405, + "learning_rate": 1.913855536498194e-05, + "loss": 0.5, + "step": 9782 + }, + { + "epoch": 0.2686161449752883, + "grad_norm": 0.35518959164619446, + "learning_rate": 1.9138379990684303e-05, + "loss": 0.5259, + "step": 9783 + }, + { + "epoch": 0.2686436024162548, + "grad_norm": 0.39839065074920654, + "learning_rate": 1.913820459934067e-05, + "loss": 0.4609, + "step": 9784 + }, + { + "epoch": 0.2686710598572213, + "grad_norm": 0.35992231965065, + "learning_rate": 1.9138029190951372e-05, + "loss": 0.5665, + "step": 9785 + }, + { + "epoch": 0.26869851729818783, + "grad_norm": 0.3677034378051758, + "learning_rate": 1.913785376551673e-05, + "loss": 0.5876, + "step": 9786 + }, + { + "epoch": 0.2687259747391543, + "grad_norm": 0.33809253573417664, + "learning_rate": 1.9137678323037073e-05, + "loss": 0.4672, + "step": 9787 + }, + { + "epoch": 0.2687534321801208, + "grad_norm": 0.49835261702537537, + "learning_rate": 1.9137502863512735e-05, + "loss": 0.5511, + "step": 9788 + }, + { + "epoch": 0.2687808896210873, + "grad_norm": 0.41913217306137085, + "learning_rate": 1.9137327386944035e-05, + "loss": 0.5076, + "step": 9789 + }, + { + "epoch": 0.2688083470620538, + "grad_norm": 0.3479680120944977, + "learning_rate": 1.9137151893331304e-05, + "loss": 0.4966, + "step": 9790 + }, + { + "epoch": 0.2688358045030203, + "grad_norm": 0.3717440962791443, + "learning_rate": 1.9136976382674867e-05, + "loss": 0.4866, + "step": 9791 + }, + { + "epoch": 0.2688632619439868, + "grad_norm": 0.36988821625709534, + "learning_rate": 1.9136800854975056e-05, + "loss": 0.534, + "step": 9792 + }, + { + "epoch": 0.26889071938495335, + "grad_norm": 0.4553162157535553, + "learning_rate": 1.913662531023219e-05, + "loss": 0.5426, + "step": 9793 + }, + { + "epoch": 0.26891817682591984, + "grad_norm": 0.4279744625091553, + "learning_rate": 1.9136449748446606e-05, + "loss": 0.4615, + "step": 9794 + }, + { + "epoch": 0.26894563426688634, + "grad_norm": 0.3745644688606262, + "learning_rate": 1.9136274169618626e-05, + "loss": 0.5117, + "step": 9795 + }, + { + "epoch": 0.26897309170785283, + "grad_norm": 0.3508593738079071, + "learning_rate": 1.9136098573748577e-05, + "loss": 0.514, + "step": 9796 + }, + { + "epoch": 0.2690005491488193, + "grad_norm": 0.45678842067718506, + "learning_rate": 1.9135922960836792e-05, + "loss": 0.6062, + "step": 9797 + }, + { + "epoch": 0.2690280065897858, + "grad_norm": 0.3407766819000244, + "learning_rate": 1.913574733088359e-05, + "loss": 0.5492, + "step": 9798 + }, + { + "epoch": 0.2690554640307523, + "grad_norm": 0.38647571206092834, + "learning_rate": 1.9135571683889307e-05, + "loss": 0.5625, + "step": 9799 + }, + { + "epoch": 0.26908292147171886, + "grad_norm": 0.42047086358070374, + "learning_rate": 1.9135396019854268e-05, + "loss": 0.4911, + "step": 9800 + }, + { + "epoch": 0.26911037891268536, + "grad_norm": 0.34407517313957214, + "learning_rate": 1.9135220338778797e-05, + "loss": 0.5344, + "step": 9801 + }, + { + "epoch": 0.26913783635365185, + "grad_norm": 0.3305038511753082, + "learning_rate": 1.9135044640663225e-05, + "loss": 0.4769, + "step": 9802 + }, + { + "epoch": 0.26916529379461834, + "grad_norm": 0.5723841190338135, + "learning_rate": 1.9134868925507878e-05, + "loss": 0.5245, + "step": 9803 + }, + { + "epoch": 0.26919275123558484, + "grad_norm": 0.3831455707550049, + "learning_rate": 1.9134693193313087e-05, + "loss": 0.5333, + "step": 9804 + }, + { + "epoch": 0.26922020867655133, + "grad_norm": 0.3809056580066681, + "learning_rate": 1.913451744407918e-05, + "loss": 0.5814, + "step": 9805 + }, + { + "epoch": 0.2692476661175178, + "grad_norm": 0.3578951954841614, + "learning_rate": 1.9134341677806477e-05, + "loss": 0.4907, + "step": 9806 + }, + { + "epoch": 0.2692751235584844, + "grad_norm": 0.3252653479576111, + "learning_rate": 1.9134165894495315e-05, + "loss": 0.4658, + "step": 9807 + }, + { + "epoch": 0.26930258099945087, + "grad_norm": 0.3932402729988098, + "learning_rate": 1.9133990094146017e-05, + "loss": 0.5325, + "step": 9808 + }, + { + "epoch": 0.26933003844041736, + "grad_norm": 0.3751697242259979, + "learning_rate": 1.9133814276758913e-05, + "loss": 0.4848, + "step": 9809 + }, + { + "epoch": 0.26935749588138386, + "grad_norm": 0.34531140327453613, + "learning_rate": 1.913363844233433e-05, + "loss": 0.5235, + "step": 9810 + }, + { + "epoch": 0.26938495332235035, + "grad_norm": 0.35406339168548584, + "learning_rate": 1.9133462590872595e-05, + "loss": 0.5223, + "step": 9811 + }, + { + "epoch": 0.26941241076331685, + "grad_norm": 0.3575029969215393, + "learning_rate": 1.913328672237404e-05, + "loss": 0.4226, + "step": 9812 + }, + { + "epoch": 0.26943986820428334, + "grad_norm": 0.4238944947719574, + "learning_rate": 1.913311083683899e-05, + "loss": 0.5413, + "step": 9813 + }, + { + "epoch": 0.2694673256452499, + "grad_norm": 0.3711632192134857, + "learning_rate": 1.9132934934267767e-05, + "loss": 0.55, + "step": 9814 + }, + { + "epoch": 0.2694947830862164, + "grad_norm": 0.345323383808136, + "learning_rate": 1.9132759014660713e-05, + "loss": 0.5165, + "step": 9815 + }, + { + "epoch": 0.2695222405271829, + "grad_norm": 0.3540407121181488, + "learning_rate": 1.9132583078018145e-05, + "loss": 0.5934, + "step": 9816 + }, + { + "epoch": 0.26954969796814937, + "grad_norm": 0.32070431113243103, + "learning_rate": 1.9132407124340394e-05, + "loss": 0.4986, + "step": 9817 + }, + { + "epoch": 0.26957715540911587, + "grad_norm": 0.3500041365623474, + "learning_rate": 1.9132231153627793e-05, + "loss": 0.4066, + "step": 9818 + }, + { + "epoch": 0.26960461285008236, + "grad_norm": 0.35406187176704407, + "learning_rate": 1.9132055165880662e-05, + "loss": 0.6191, + "step": 9819 + }, + { + "epoch": 0.26963207029104885, + "grad_norm": 1.061446189880371, + "learning_rate": 1.9131879161099336e-05, + "loss": 0.5011, + "step": 9820 + }, + { + "epoch": 0.2696595277320154, + "grad_norm": 0.34080320596694946, + "learning_rate": 1.9131703139284143e-05, + "loss": 0.4704, + "step": 9821 + }, + { + "epoch": 0.2696869851729819, + "grad_norm": 0.3683513402938843, + "learning_rate": 1.9131527100435406e-05, + "loss": 0.5345, + "step": 9822 + }, + { + "epoch": 0.2697144426139484, + "grad_norm": 0.40094754099845886, + "learning_rate": 1.9131351044553456e-05, + "loss": 0.6297, + "step": 9823 + }, + { + "epoch": 0.2697419000549149, + "grad_norm": 0.38259661197662354, + "learning_rate": 1.9131174971638625e-05, + "loss": 0.5272, + "step": 9824 + }, + { + "epoch": 0.2697693574958814, + "grad_norm": 0.336723268032074, + "learning_rate": 1.9130998881691233e-05, + "loss": 0.6052, + "step": 9825 + }, + { + "epoch": 0.2697968149368479, + "grad_norm": 0.3405775725841522, + "learning_rate": 1.913082277471162e-05, + "loss": 0.5587, + "step": 9826 + }, + { + "epoch": 0.26982427237781437, + "grad_norm": 0.43415552377700806, + "learning_rate": 1.91306466507001e-05, + "loss": 0.526, + "step": 9827 + }, + { + "epoch": 0.2698517298187809, + "grad_norm": 0.32873955368995667, + "learning_rate": 1.9130470509657018e-05, + "loss": 0.5046, + "step": 9828 + }, + { + "epoch": 0.2698791872597474, + "grad_norm": 0.3966353237628937, + "learning_rate": 1.913029435158269e-05, + "loss": 0.5816, + "step": 9829 + }, + { + "epoch": 0.2699066447007139, + "grad_norm": 0.39512449502944946, + "learning_rate": 1.9130118176477452e-05, + "loss": 0.5135, + "step": 9830 + }, + { + "epoch": 0.2699341021416804, + "grad_norm": 0.39724794030189514, + "learning_rate": 1.912994198434163e-05, + "loss": 0.5595, + "step": 9831 + }, + { + "epoch": 0.2699615595826469, + "grad_norm": 0.3550942838191986, + "learning_rate": 1.912976577517555e-05, + "loss": 0.4725, + "step": 9832 + }, + { + "epoch": 0.2699890170236134, + "grad_norm": 0.34817275404930115, + "learning_rate": 1.9129589548979543e-05, + "loss": 0.5192, + "step": 9833 + }, + { + "epoch": 0.2700164744645799, + "grad_norm": 0.4286346435546875, + "learning_rate": 1.9129413305753936e-05, + "loss": 0.5685, + "step": 9834 + }, + { + "epoch": 0.27004393190554643, + "grad_norm": 0.3743230998516083, + "learning_rate": 1.9129237045499064e-05, + "loss": 0.493, + "step": 9835 + }, + { + "epoch": 0.2700713893465129, + "grad_norm": 0.35164231061935425, + "learning_rate": 1.9129060768215248e-05, + "loss": 0.508, + "step": 9836 + }, + { + "epoch": 0.2700988467874794, + "grad_norm": 0.34301045536994934, + "learning_rate": 1.912888447390282e-05, + "loss": 0.5341, + "step": 9837 + }, + { + "epoch": 0.2701263042284459, + "grad_norm": 0.41521045565605164, + "learning_rate": 1.912870816256211e-05, + "loss": 0.5097, + "step": 9838 + }, + { + "epoch": 0.2701537616694124, + "grad_norm": 0.33203360438346863, + "learning_rate": 1.9128531834193445e-05, + "loss": 0.4112, + "step": 9839 + }, + { + "epoch": 0.2701812191103789, + "grad_norm": 0.41138365864753723, + "learning_rate": 1.9128355488797155e-05, + "loss": 0.4383, + "step": 9840 + }, + { + "epoch": 0.2702086765513454, + "grad_norm": 0.3815813362598419, + "learning_rate": 1.912817912637357e-05, + "loss": 0.5802, + "step": 9841 + }, + { + "epoch": 0.2702361339923119, + "grad_norm": 0.4031226933002472, + "learning_rate": 1.9128002746923014e-05, + "loss": 0.4641, + "step": 9842 + }, + { + "epoch": 0.27026359143327844, + "grad_norm": 0.3417733907699585, + "learning_rate": 1.912782635044582e-05, + "loss": 0.4844, + "step": 9843 + }, + { + "epoch": 0.27029104887424493, + "grad_norm": 0.4594157636165619, + "learning_rate": 1.9127649936942317e-05, + "loss": 0.5427, + "step": 9844 + }, + { + "epoch": 0.2703185063152114, + "grad_norm": 0.3856273293495178, + "learning_rate": 1.9127473506412836e-05, + "loss": 0.541, + "step": 9845 + }, + { + "epoch": 0.2703459637561779, + "grad_norm": 0.3583327531814575, + "learning_rate": 1.9127297058857705e-05, + "loss": 0.5102, + "step": 9846 + }, + { + "epoch": 0.2703734211971444, + "grad_norm": 0.3427186906337738, + "learning_rate": 1.9127120594277245e-05, + "loss": 0.5359, + "step": 9847 + }, + { + "epoch": 0.2704008786381109, + "grad_norm": 0.39706024527549744, + "learning_rate": 1.91269441126718e-05, + "loss": 0.4787, + "step": 9848 + }, + { + "epoch": 0.2704283360790774, + "grad_norm": 0.35180458426475525, + "learning_rate": 1.9126767614041686e-05, + "loss": 0.5576, + "step": 9849 + }, + { + "epoch": 0.27045579352004395, + "grad_norm": 0.3995959758758545, + "learning_rate": 1.912659109838724e-05, + "loss": 0.568, + "step": 9850 + }, + { + "epoch": 0.27048325096101045, + "grad_norm": 0.33609065413475037, + "learning_rate": 1.9126414565708788e-05, + "loss": 0.4897, + "step": 9851 + }, + { + "epoch": 0.27051070840197694, + "grad_norm": 0.35539305210113525, + "learning_rate": 1.912623801600666e-05, + "loss": 0.5311, + "step": 9852 + }, + { + "epoch": 0.27053816584294343, + "grad_norm": 0.3461754322052002, + "learning_rate": 1.9126061449281183e-05, + "loss": 0.4742, + "step": 9853 + }, + { + "epoch": 0.27056562328390993, + "grad_norm": 0.419450581073761, + "learning_rate": 1.9125884865532695e-05, + "loss": 0.5289, + "step": 9854 + }, + { + "epoch": 0.2705930807248764, + "grad_norm": 0.36920684576034546, + "learning_rate": 1.9125708264761514e-05, + "loss": 0.5316, + "step": 9855 + }, + { + "epoch": 0.2706205381658429, + "grad_norm": 0.509120762348175, + "learning_rate": 1.9125531646967973e-05, + "loss": 0.5789, + "step": 9856 + }, + { + "epoch": 0.27064799560680947, + "grad_norm": 0.40556105971336365, + "learning_rate": 1.9125355012152407e-05, + "loss": 0.5234, + "step": 9857 + }, + { + "epoch": 0.27067545304777596, + "grad_norm": 0.4005683958530426, + "learning_rate": 1.912517836031514e-05, + "loss": 0.5272, + "step": 9858 + }, + { + "epoch": 0.27070291048874245, + "grad_norm": 0.3622051775455475, + "learning_rate": 1.9125001691456502e-05, + "loss": 0.5044, + "step": 9859 + }, + { + "epoch": 0.27073036792970895, + "grad_norm": 0.35783475637435913, + "learning_rate": 1.9124825005576823e-05, + "loss": 0.6155, + "step": 9860 + }, + { + "epoch": 0.27075782537067544, + "grad_norm": 0.4288792610168457, + "learning_rate": 1.9124648302676437e-05, + "loss": 0.525, + "step": 9861 + }, + { + "epoch": 0.27078528281164194, + "grad_norm": 0.3309309184551239, + "learning_rate": 1.9124471582755666e-05, + "loss": 0.5416, + "step": 9862 + }, + { + "epoch": 0.27081274025260843, + "grad_norm": 0.36693888902664185, + "learning_rate": 1.9124294845814842e-05, + "loss": 0.5579, + "step": 9863 + }, + { + "epoch": 0.270840197693575, + "grad_norm": 0.5052103996276855, + "learning_rate": 1.91241180918543e-05, + "loss": 0.537, + "step": 9864 + }, + { + "epoch": 0.2708676551345415, + "grad_norm": 0.43067431449890137, + "learning_rate": 1.9123941320874367e-05, + "loss": 0.5768, + "step": 9865 + }, + { + "epoch": 0.27089511257550797, + "grad_norm": 0.3520979583263397, + "learning_rate": 1.9123764532875366e-05, + "loss": 0.5159, + "step": 9866 + }, + { + "epoch": 0.27092257001647446, + "grad_norm": 0.42264533042907715, + "learning_rate": 1.9123587727857636e-05, + "loss": 0.5545, + "step": 9867 + }, + { + "epoch": 0.27095002745744096, + "grad_norm": 0.3593469262123108, + "learning_rate": 1.91234109058215e-05, + "loss": 0.4808, + "step": 9868 + }, + { + "epoch": 0.27097748489840745, + "grad_norm": 0.3687663674354553, + "learning_rate": 1.9123234066767294e-05, + "loss": 0.5531, + "step": 9869 + }, + { + "epoch": 0.27100494233937394, + "grad_norm": 0.3696453273296356, + "learning_rate": 1.912305721069534e-05, + "loss": 0.4925, + "step": 9870 + }, + { + "epoch": 0.2710323997803405, + "grad_norm": 0.4145413339138031, + "learning_rate": 1.912288033760598e-05, + "loss": 0.6008, + "step": 9871 + }, + { + "epoch": 0.271059857221307, + "grad_norm": 1.381593942642212, + "learning_rate": 1.912270344749953e-05, + "loss": 0.4836, + "step": 9872 + }, + { + "epoch": 0.2710873146622735, + "grad_norm": 0.4204270541667938, + "learning_rate": 1.9122526540376327e-05, + "loss": 0.437, + "step": 9873 + }, + { + "epoch": 0.27111477210324, + "grad_norm": 0.38020676374435425, + "learning_rate": 1.9122349616236705e-05, + "loss": 0.5648, + "step": 9874 + }, + { + "epoch": 0.27114222954420647, + "grad_norm": 0.37495705485343933, + "learning_rate": 1.9122172675080985e-05, + "loss": 0.5662, + "step": 9875 + }, + { + "epoch": 0.27116968698517296, + "grad_norm": 0.3831397593021393, + "learning_rate": 1.9121995716909503e-05, + "loss": 0.4566, + "step": 9876 + }, + { + "epoch": 0.27119714442613946, + "grad_norm": 0.346337229013443, + "learning_rate": 1.9121818741722587e-05, + "loss": 0.5676, + "step": 9877 + }, + { + "epoch": 0.271224601867106, + "grad_norm": 0.3712306618690491, + "learning_rate": 1.912164174952057e-05, + "loss": 0.5637, + "step": 9878 + }, + { + "epoch": 0.2712520593080725, + "grad_norm": 0.39312639832496643, + "learning_rate": 1.912146474030378e-05, + "loss": 0.514, + "step": 9879 + }, + { + "epoch": 0.271279516749039, + "grad_norm": 0.3729567527770996, + "learning_rate": 1.9121287714072543e-05, + "loss": 0.5227, + "step": 9880 + }, + { + "epoch": 0.2713069741900055, + "grad_norm": 0.3381660282611847, + "learning_rate": 1.9121110670827193e-05, + "loss": 0.4594, + "step": 9881 + }, + { + "epoch": 0.271334431630972, + "grad_norm": 0.3260592222213745, + "learning_rate": 1.9120933610568066e-05, + "loss": 0.4101, + "step": 9882 + }, + { + "epoch": 0.2713618890719385, + "grad_norm": 0.35581591725349426, + "learning_rate": 1.9120756533295482e-05, + "loss": 0.5291, + "step": 9883 + }, + { + "epoch": 0.27138934651290497, + "grad_norm": 0.6947402358055115, + "learning_rate": 1.9120579439009777e-05, + "loss": 0.5759, + "step": 9884 + }, + { + "epoch": 0.2714168039538715, + "grad_norm": 0.3291179835796356, + "learning_rate": 1.912040232771128e-05, + "loss": 0.5098, + "step": 9885 + }, + { + "epoch": 0.271444261394838, + "grad_norm": 0.4356599748134613, + "learning_rate": 1.9120225199400323e-05, + "loss": 0.5764, + "step": 9886 + }, + { + "epoch": 0.2714717188358045, + "grad_norm": 0.34648871421813965, + "learning_rate": 1.9120048054077237e-05, + "loss": 0.5097, + "step": 9887 + }, + { + "epoch": 0.271499176276771, + "grad_norm": 0.38144561648368835, + "learning_rate": 1.9119870891742346e-05, + "loss": 0.4987, + "step": 9888 + }, + { + "epoch": 0.2715266337177375, + "grad_norm": 0.336112380027771, + "learning_rate": 1.9119693712395986e-05, + "loss": 0.5384, + "step": 9889 + }, + { + "epoch": 0.271554091158704, + "grad_norm": 0.3938659727573395, + "learning_rate": 1.911951651603849e-05, + "loss": 0.6183, + "step": 9890 + }, + { + "epoch": 0.2715815485996705, + "grad_norm": 0.43657591938972473, + "learning_rate": 1.9119339302670187e-05, + "loss": 0.5843, + "step": 9891 + }, + { + "epoch": 0.27160900604063704, + "grad_norm": 0.4665045142173767, + "learning_rate": 1.91191620722914e-05, + "loss": 0.542, + "step": 9892 + }, + { + "epoch": 0.27163646348160353, + "grad_norm": 0.41740739345550537, + "learning_rate": 1.9118984824902464e-05, + "loss": 0.471, + "step": 9893 + }, + { + "epoch": 0.27166392092257, + "grad_norm": 0.3649424612522125, + "learning_rate": 1.9118807560503717e-05, + "loss": 0.5623, + "step": 9894 + }, + { + "epoch": 0.2716913783635365, + "grad_norm": 0.3550761938095093, + "learning_rate": 1.911863027909548e-05, + "loss": 0.4918, + "step": 9895 + }, + { + "epoch": 0.271718835804503, + "grad_norm": 0.396675169467926, + "learning_rate": 1.911845298067809e-05, + "loss": 0.5367, + "step": 9896 + }, + { + "epoch": 0.2717462932454695, + "grad_norm": 0.3661268949508667, + "learning_rate": 1.9118275665251872e-05, + "loss": 0.5139, + "step": 9897 + }, + { + "epoch": 0.271773750686436, + "grad_norm": 0.37169986963272095, + "learning_rate": 1.9118098332817155e-05, + "loss": 0.5154, + "step": 9898 + }, + { + "epoch": 0.27180120812740255, + "grad_norm": 0.3403613567352295, + "learning_rate": 1.9117920983374284e-05, + "loss": 0.4897, + "step": 9899 + }, + { + "epoch": 0.27182866556836904, + "grad_norm": 0.3278340697288513, + "learning_rate": 1.9117743616923575e-05, + "loss": 0.4479, + "step": 9900 + }, + { + "epoch": 0.27185612300933554, + "grad_norm": 0.4376673698425293, + "learning_rate": 1.9117566233465362e-05, + "loss": 0.5406, + "step": 9901 + }, + { + "epoch": 0.27188358045030203, + "grad_norm": 0.4957289397716522, + "learning_rate": 1.9117388832999984e-05, + "loss": 0.585, + "step": 9902 + }, + { + "epoch": 0.2719110378912685, + "grad_norm": 0.3694450259208679, + "learning_rate": 1.9117211415527764e-05, + "loss": 0.5186, + "step": 9903 + }, + { + "epoch": 0.271938495332235, + "grad_norm": 0.34558039903640747, + "learning_rate": 1.911703398104903e-05, + "loss": 0.4704, + "step": 9904 + }, + { + "epoch": 0.2719659527732015, + "grad_norm": 0.37124475836753845, + "learning_rate": 1.9116856529564124e-05, + "loss": 0.4675, + "step": 9905 + }, + { + "epoch": 0.27199341021416806, + "grad_norm": 0.3943414092063904, + "learning_rate": 1.9116679061073368e-05, + "loss": 0.5972, + "step": 9906 + }, + { + "epoch": 0.27202086765513456, + "grad_norm": 0.4420850872993469, + "learning_rate": 1.9116501575577094e-05, + "loss": 0.6081, + "step": 9907 + }, + { + "epoch": 0.27204832509610105, + "grad_norm": 0.3354905843734741, + "learning_rate": 1.911632407307564e-05, + "loss": 0.4853, + "step": 9908 + }, + { + "epoch": 0.27207578253706755, + "grad_norm": 0.33850884437561035, + "learning_rate": 1.911614655356933e-05, + "loss": 0.5847, + "step": 9909 + }, + { + "epoch": 0.27210323997803404, + "grad_norm": 0.37864333391189575, + "learning_rate": 1.9115969017058495e-05, + "loss": 0.5625, + "step": 9910 + }, + { + "epoch": 0.27213069741900053, + "grad_norm": 0.42382922768592834, + "learning_rate": 1.9115791463543472e-05, + "loss": 0.6441, + "step": 9911 + }, + { + "epoch": 0.272158154859967, + "grad_norm": 0.36682820320129395, + "learning_rate": 1.9115613893024586e-05, + "loss": 0.5443, + "step": 9912 + }, + { + "epoch": 0.2721856123009336, + "grad_norm": 0.4664061665534973, + "learning_rate": 1.911543630550217e-05, + "loss": 0.5219, + "step": 9913 + }, + { + "epoch": 0.27221306974190007, + "grad_norm": 0.37746065855026245, + "learning_rate": 1.9115258700976554e-05, + "loss": 0.5582, + "step": 9914 + }, + { + "epoch": 0.27224052718286657, + "grad_norm": 0.34339815378189087, + "learning_rate": 1.9115081079448076e-05, + "loss": 0.5328, + "step": 9915 + }, + { + "epoch": 0.27226798462383306, + "grad_norm": 0.38903892040252686, + "learning_rate": 1.9114903440917062e-05, + "loss": 0.4052, + "step": 9916 + }, + { + "epoch": 0.27229544206479955, + "grad_norm": 0.3646758496761322, + "learning_rate": 1.9114725785383843e-05, + "loss": 0.5102, + "step": 9917 + }, + { + "epoch": 0.27232289950576605, + "grad_norm": 0.3549175262451172, + "learning_rate": 1.911454811284875e-05, + "loss": 0.4975, + "step": 9918 + }, + { + "epoch": 0.27235035694673254, + "grad_norm": 0.3610672354698181, + "learning_rate": 1.9114370423312118e-05, + "loss": 0.491, + "step": 9919 + }, + { + "epoch": 0.2723778143876991, + "grad_norm": 0.35971638560295105, + "learning_rate": 1.9114192716774275e-05, + "loss": 0.4716, + "step": 9920 + }, + { + "epoch": 0.2724052718286656, + "grad_norm": 0.3618254065513611, + "learning_rate": 1.9114014993235552e-05, + "loss": 0.4952, + "step": 9921 + }, + { + "epoch": 0.2724327292696321, + "grad_norm": 0.4243094325065613, + "learning_rate": 1.9113837252696282e-05, + "loss": 0.5846, + "step": 9922 + }, + { + "epoch": 0.2724601867105986, + "grad_norm": 0.35683801770210266, + "learning_rate": 1.91136594951568e-05, + "loss": 0.525, + "step": 9923 + }, + { + "epoch": 0.27248764415156507, + "grad_norm": 0.3508235812187195, + "learning_rate": 1.9113481720617434e-05, + "loss": 0.5451, + "step": 9924 + }, + { + "epoch": 0.27251510159253156, + "grad_norm": 0.38714319467544556, + "learning_rate": 1.9113303929078514e-05, + "loss": 0.5318, + "step": 9925 + }, + { + "epoch": 0.27254255903349806, + "grad_norm": 0.3994044363498688, + "learning_rate": 1.9113126120540374e-05, + "loss": 0.5152, + "step": 9926 + }, + { + "epoch": 0.2725700164744646, + "grad_norm": 0.3863588869571686, + "learning_rate": 1.9112948295003347e-05, + "loss": 0.4705, + "step": 9927 + }, + { + "epoch": 0.2725974739154311, + "grad_norm": 0.44262945652008057, + "learning_rate": 1.911277045246776e-05, + "loss": 0.5849, + "step": 9928 + }, + { + "epoch": 0.2726249313563976, + "grad_norm": 0.36223524808883667, + "learning_rate": 1.9112592592933948e-05, + "loss": 0.4574, + "step": 9929 + }, + { + "epoch": 0.2726523887973641, + "grad_norm": 0.4359566569328308, + "learning_rate": 1.9112414716402244e-05, + "loss": 0.5759, + "step": 9930 + }, + { + "epoch": 0.2726798462383306, + "grad_norm": 0.3343963623046875, + "learning_rate": 1.9112236822872977e-05, + "loss": 0.4844, + "step": 9931 + }, + { + "epoch": 0.2727073036792971, + "grad_norm": 0.3669431507587433, + "learning_rate": 1.9112058912346483e-05, + "loss": 0.449, + "step": 9932 + }, + { + "epoch": 0.27273476112026357, + "grad_norm": 0.3658734858036041, + "learning_rate": 1.9111880984823088e-05, + "loss": 0.4988, + "step": 9933 + }, + { + "epoch": 0.2727622185612301, + "grad_norm": 0.3613303601741791, + "learning_rate": 1.9111703040303125e-05, + "loss": 0.5287, + "step": 9934 + }, + { + "epoch": 0.2727896760021966, + "grad_norm": 0.3479010760784149, + "learning_rate": 1.911152507878693e-05, + "loss": 0.4995, + "step": 9935 + }, + { + "epoch": 0.2728171334431631, + "grad_norm": 0.36884674429893494, + "learning_rate": 1.9111347100274833e-05, + "loss": 0.5592, + "step": 9936 + }, + { + "epoch": 0.2728445908841296, + "grad_norm": 0.32190725207328796, + "learning_rate": 1.9111169104767167e-05, + "loss": 0.4531, + "step": 9937 + }, + { + "epoch": 0.2728720483250961, + "grad_norm": 0.35047444701194763, + "learning_rate": 1.911099109226426e-05, + "loss": 0.5092, + "step": 9938 + }, + { + "epoch": 0.2728995057660626, + "grad_norm": 0.3900201916694641, + "learning_rate": 1.9110813062766448e-05, + "loss": 0.4392, + "step": 9939 + }, + { + "epoch": 0.2729269632070291, + "grad_norm": 0.3737871050834656, + "learning_rate": 1.9110635016274063e-05, + "loss": 0.579, + "step": 9940 + }, + { + "epoch": 0.27295442064799563, + "grad_norm": 0.33886560797691345, + "learning_rate": 1.9110456952787432e-05, + "loss": 0.5252, + "step": 9941 + }, + { + "epoch": 0.2729818780889621, + "grad_norm": 0.46305903792381287, + "learning_rate": 1.9110278872306895e-05, + "loss": 0.5625, + "step": 9942 + }, + { + "epoch": 0.2730093355299286, + "grad_norm": 0.5367904305458069, + "learning_rate": 1.911010077483278e-05, + "loss": 0.5828, + "step": 9943 + }, + { + "epoch": 0.2730367929708951, + "grad_norm": 0.33565518260002136, + "learning_rate": 1.9109922660365417e-05, + "loss": 0.5064, + "step": 9944 + }, + { + "epoch": 0.2730642504118616, + "grad_norm": 0.36408793926239014, + "learning_rate": 1.910974452890514e-05, + "loss": 0.5084, + "step": 9945 + }, + { + "epoch": 0.2730917078528281, + "grad_norm": 0.4075987935066223, + "learning_rate": 1.9109566380452283e-05, + "loss": 0.4868, + "step": 9946 + }, + { + "epoch": 0.2731191652937946, + "grad_norm": 0.3790868818759918, + "learning_rate": 1.910938821500718e-05, + "loss": 0.5465, + "step": 9947 + }, + { + "epoch": 0.27314662273476115, + "grad_norm": 0.3407665491104126, + "learning_rate": 1.910921003257016e-05, + "loss": 0.5163, + "step": 9948 + }, + { + "epoch": 0.27317408017572764, + "grad_norm": 0.3683985769748688, + "learning_rate": 1.9109031833141554e-05, + "loss": 0.5194, + "step": 9949 + }, + { + "epoch": 0.27320153761669413, + "grad_norm": 0.32936447858810425, + "learning_rate": 1.9108853616721698e-05, + "loss": 0.386, + "step": 9950 + }, + { + "epoch": 0.27322899505766063, + "grad_norm": 0.4124986231327057, + "learning_rate": 1.910867538331092e-05, + "loss": 0.5568, + "step": 9951 + }, + { + "epoch": 0.2732564524986271, + "grad_norm": 0.35784968733787537, + "learning_rate": 1.910849713290956e-05, + "loss": 0.5283, + "step": 9952 + }, + { + "epoch": 0.2732839099395936, + "grad_norm": 0.39179691672325134, + "learning_rate": 1.910831886551794e-05, + "loss": 0.5587, + "step": 9953 + }, + { + "epoch": 0.2733113673805601, + "grad_norm": 0.40338844060897827, + "learning_rate": 1.9108140581136403e-05, + "loss": 0.5416, + "step": 9954 + }, + { + "epoch": 0.27333882482152666, + "grad_norm": 0.38420209288597107, + "learning_rate": 1.9107962279765276e-05, + "loss": 0.4498, + "step": 9955 + }, + { + "epoch": 0.27336628226249315, + "grad_norm": 0.34317854046821594, + "learning_rate": 1.910778396140489e-05, + "loss": 0.5104, + "step": 9956 + }, + { + "epoch": 0.27339373970345965, + "grad_norm": 0.37836387753486633, + "learning_rate": 1.910760562605558e-05, + "loss": 0.6248, + "step": 9957 + }, + { + "epoch": 0.27342119714442614, + "grad_norm": 0.3767372667789459, + "learning_rate": 1.9107427273717684e-05, + "loss": 0.4832, + "step": 9958 + }, + { + "epoch": 0.27344865458539264, + "grad_norm": 0.33637264370918274, + "learning_rate": 1.9107248904391525e-05, + "loss": 0.4897, + "step": 9959 + }, + { + "epoch": 0.27347611202635913, + "grad_norm": 0.3900202512741089, + "learning_rate": 1.910707051807744e-05, + "loss": 0.4915, + "step": 9960 + }, + { + "epoch": 0.2735035694673256, + "grad_norm": 0.39375507831573486, + "learning_rate": 1.9106892114775763e-05, + "loss": 0.5079, + "step": 9961 + }, + { + "epoch": 0.2735310269082922, + "grad_norm": 0.4811682105064392, + "learning_rate": 1.9106713694486823e-05, + "loss": 0.5258, + "step": 9962 + }, + { + "epoch": 0.27355848434925867, + "grad_norm": 0.44188427925109863, + "learning_rate": 1.9106535257210956e-05, + "loss": 0.4965, + "step": 9963 + }, + { + "epoch": 0.27358594179022516, + "grad_norm": 2.236426830291748, + "learning_rate": 1.9106356802948498e-05, + "loss": 0.5074, + "step": 9964 + }, + { + "epoch": 0.27361339923119166, + "grad_norm": 0.34329482913017273, + "learning_rate": 1.9106178331699776e-05, + "loss": 0.4345, + "step": 9965 + }, + { + "epoch": 0.27364085667215815, + "grad_norm": 0.36275172233581543, + "learning_rate": 1.9105999843465123e-05, + "loss": 0.5576, + "step": 9966 + }, + { + "epoch": 0.27366831411312464, + "grad_norm": 0.4447198212146759, + "learning_rate": 1.9105821338244877e-05, + "loss": 0.6074, + "step": 9967 + }, + { + "epoch": 0.27369577155409114, + "grad_norm": 0.347940593957901, + "learning_rate": 1.910564281603937e-05, + "loss": 0.5254, + "step": 9968 + }, + { + "epoch": 0.2737232289950577, + "grad_norm": 0.7880483865737915, + "learning_rate": 1.9105464276848928e-05, + "loss": 0.5391, + "step": 9969 + }, + { + "epoch": 0.2737506864360242, + "grad_norm": 0.38076260685920715, + "learning_rate": 1.910528572067389e-05, + "loss": 0.5729, + "step": 9970 + }, + { + "epoch": 0.2737781438769907, + "grad_norm": 1.6311343908309937, + "learning_rate": 1.910510714751459e-05, + "loss": 0.4575, + "step": 9971 + }, + { + "epoch": 0.27380560131795717, + "grad_norm": 0.37366747856140137, + "learning_rate": 1.910492855737136e-05, + "loss": 0.5605, + "step": 9972 + }, + { + "epoch": 0.27383305875892366, + "grad_norm": 0.5087101459503174, + "learning_rate": 1.910474995024453e-05, + "loss": 0.6478, + "step": 9973 + }, + { + "epoch": 0.27386051619989016, + "grad_norm": 0.3841054141521454, + "learning_rate": 1.9104571326134435e-05, + "loss": 0.5645, + "step": 9974 + }, + { + "epoch": 0.27388797364085665, + "grad_norm": 0.4028850495815277, + "learning_rate": 1.9104392685041407e-05, + "loss": 0.625, + "step": 9975 + }, + { + "epoch": 0.27391543108182315, + "grad_norm": 0.32923951745033264, + "learning_rate": 1.9104214026965783e-05, + "loss": 0.5419, + "step": 9976 + }, + { + "epoch": 0.2739428885227897, + "grad_norm": 0.3664994537830353, + "learning_rate": 1.9104035351907896e-05, + "loss": 0.5487, + "step": 9977 + }, + { + "epoch": 0.2739703459637562, + "grad_norm": 0.44592389464378357, + "learning_rate": 1.910385665986808e-05, + "loss": 0.5639, + "step": 9978 + }, + { + "epoch": 0.2739978034047227, + "grad_norm": 0.33739885687828064, + "learning_rate": 1.910367795084666e-05, + "loss": 0.5456, + "step": 9979 + }, + { + "epoch": 0.2740252608456892, + "grad_norm": 0.3400880694389343, + "learning_rate": 1.9103499224843976e-05, + "loss": 0.5241, + "step": 9980 + }, + { + "epoch": 0.27405271828665567, + "grad_norm": 0.3730035424232483, + "learning_rate": 1.9103320481860362e-05, + "loss": 0.5853, + "step": 9981 + }, + { + "epoch": 0.27408017572762217, + "grad_norm": 0.3500209450721741, + "learning_rate": 1.9103141721896147e-05, + "loss": 0.4626, + "step": 9982 + }, + { + "epoch": 0.27410763316858866, + "grad_norm": 0.33709242939949036, + "learning_rate": 1.910296294495167e-05, + "loss": 0.5073, + "step": 9983 + }, + { + "epoch": 0.2741350906095552, + "grad_norm": 0.5570001602172852, + "learning_rate": 1.910278415102726e-05, + "loss": 0.5044, + "step": 9984 + }, + { + "epoch": 0.2741625480505217, + "grad_norm": 0.3342267572879791, + "learning_rate": 1.9102605340123254e-05, + "loss": 0.4663, + "step": 9985 + }, + { + "epoch": 0.2741900054914882, + "grad_norm": 0.34972089529037476, + "learning_rate": 1.9102426512239983e-05, + "loss": 0.5377, + "step": 9986 + }, + { + "epoch": 0.2742174629324547, + "grad_norm": 0.3424040973186493, + "learning_rate": 1.910224766737778e-05, + "loss": 0.5509, + "step": 9987 + }, + { + "epoch": 0.2742449203734212, + "grad_norm": 0.3814975619316101, + "learning_rate": 1.9102068805536983e-05, + "loss": 0.5742, + "step": 9988 + }, + { + "epoch": 0.2742723778143877, + "grad_norm": 0.344881147146225, + "learning_rate": 1.910188992671792e-05, + "loss": 0.4989, + "step": 9989 + }, + { + "epoch": 0.2742998352553542, + "grad_norm": 0.3487935960292816, + "learning_rate": 1.9101711030920928e-05, + "loss": 0.498, + "step": 9990 + }, + { + "epoch": 0.2743272926963207, + "grad_norm": 0.3517645001411438, + "learning_rate": 1.9101532118146342e-05, + "loss": 0.5103, + "step": 9991 + }, + { + "epoch": 0.2743547501372872, + "grad_norm": 0.3459113538265228, + "learning_rate": 1.910135318839449e-05, + "loss": 0.5651, + "step": 9992 + }, + { + "epoch": 0.2743822075782537, + "grad_norm": 0.37721192836761475, + "learning_rate": 1.9101174241665715e-05, + "loss": 0.5566, + "step": 9993 + }, + { + "epoch": 0.2744096650192202, + "grad_norm": 0.3718890845775604, + "learning_rate": 1.910099527796034e-05, + "loss": 0.5301, + "step": 9994 + }, + { + "epoch": 0.2744371224601867, + "grad_norm": 0.38843467831611633, + "learning_rate": 1.9100816297278707e-05, + "loss": 0.5005, + "step": 9995 + }, + { + "epoch": 0.2744645799011532, + "grad_norm": 0.4001559019088745, + "learning_rate": 1.9100637299621147e-05, + "loss": 0.5637, + "step": 9996 + }, + { + "epoch": 0.2744920373421197, + "grad_norm": 0.3482390344142914, + "learning_rate": 1.9100458284987992e-05, + "loss": 0.5526, + "step": 9997 + }, + { + "epoch": 0.27451949478308624, + "grad_norm": 0.43493956327438354, + "learning_rate": 1.910027925337958e-05, + "loss": 0.507, + "step": 9998 + }, + { + "epoch": 0.27454695222405273, + "grad_norm": 0.35914936661720276, + "learning_rate": 1.9100100204796243e-05, + "loss": 0.5376, + "step": 9999 + }, + { + "epoch": 0.2745744096650192, + "grad_norm": 0.35638219118118286, + "learning_rate": 1.9099921139238312e-05, + "loss": 0.561, + "step": 10000 + }, + { + "epoch": 0.2746018671059857, + "grad_norm": 0.3287396728992462, + "learning_rate": 1.9099742056706123e-05, + "loss": 0.4849, + "step": 10001 + }, + { + "epoch": 0.2746293245469522, + "grad_norm": 0.4176933169364929, + "learning_rate": 1.9099562957200013e-05, + "loss": 0.499, + "step": 10002 + }, + { + "epoch": 0.2746567819879187, + "grad_norm": 0.41822922229766846, + "learning_rate": 1.9099383840720315e-05, + "loss": 0.6528, + "step": 10003 + }, + { + "epoch": 0.2746842394288852, + "grad_norm": 0.37375423312187195, + "learning_rate": 1.909920470726736e-05, + "loss": 0.5349, + "step": 10004 + }, + { + "epoch": 0.27471169686985175, + "grad_norm": 0.43775495886802673, + "learning_rate": 1.9099025556841485e-05, + "loss": 0.5967, + "step": 10005 + }, + { + "epoch": 0.27473915431081825, + "grad_norm": 0.35610273480415344, + "learning_rate": 1.9098846389443018e-05, + "loss": 0.5, + "step": 10006 + }, + { + "epoch": 0.27476661175178474, + "grad_norm": 0.45750105381011963, + "learning_rate": 1.9098667205072305e-05, + "loss": 0.5583, + "step": 10007 + }, + { + "epoch": 0.27479406919275123, + "grad_norm": 0.3684680461883545, + "learning_rate": 1.9098488003729673e-05, + "loss": 0.55, + "step": 10008 + }, + { + "epoch": 0.2748215266337177, + "grad_norm": 0.5242764949798584, + "learning_rate": 1.9098308785415456e-05, + "loss": 0.5899, + "step": 10009 + }, + { + "epoch": 0.2748489840746842, + "grad_norm": 0.4170685112476349, + "learning_rate": 1.909812955012999e-05, + "loss": 0.5977, + "step": 10010 + }, + { + "epoch": 0.2748764415156507, + "grad_norm": 0.3485099673271179, + "learning_rate": 1.9097950297873606e-05, + "loss": 0.4667, + "step": 10011 + }, + { + "epoch": 0.27490389895661727, + "grad_norm": 0.9485474228858948, + "learning_rate": 1.9097771028646647e-05, + "loss": 0.5536, + "step": 10012 + }, + { + "epoch": 0.27493135639758376, + "grad_norm": 0.4386463761329651, + "learning_rate": 1.9097591742449434e-05, + "loss": 0.5065, + "step": 10013 + }, + { + "epoch": 0.27495881383855025, + "grad_norm": 0.37624701857566833, + "learning_rate": 1.9097412439282312e-05, + "loss": 0.5391, + "step": 10014 + }, + { + "epoch": 0.27498627127951675, + "grad_norm": 0.37362465262413025, + "learning_rate": 1.9097233119145614e-05, + "loss": 0.5316, + "step": 10015 + }, + { + "epoch": 0.27501372872048324, + "grad_norm": 0.3448387086391449, + "learning_rate": 1.909705378203967e-05, + "loss": 0.5078, + "step": 10016 + }, + { + "epoch": 0.27504118616144974, + "grad_norm": 0.48027241230010986, + "learning_rate": 1.909687442796482e-05, + "loss": 0.4422, + "step": 10017 + }, + { + "epoch": 0.27506864360241623, + "grad_norm": 0.3532547056674957, + "learning_rate": 1.909669505692139e-05, + "loss": 0.5375, + "step": 10018 + }, + { + "epoch": 0.2750961010433828, + "grad_norm": 0.31640633940696716, + "learning_rate": 1.9096515668909725e-05, + "loss": 0.5288, + "step": 10019 + }, + { + "epoch": 0.2751235584843493, + "grad_norm": 0.3431938588619232, + "learning_rate": 1.9096336263930154e-05, + "loss": 0.4186, + "step": 10020 + }, + { + "epoch": 0.27515101592531577, + "grad_norm": 0.35133492946624756, + "learning_rate": 1.9096156841983013e-05, + "loss": 0.4638, + "step": 10021 + }, + { + "epoch": 0.27517847336628226, + "grad_norm": 0.34118378162384033, + "learning_rate": 1.9095977403068636e-05, + "loss": 0.5211, + "step": 10022 + }, + { + "epoch": 0.27520593080724876, + "grad_norm": 0.3888697028160095, + "learning_rate": 1.909579794718736e-05, + "loss": 0.564, + "step": 10023 + }, + { + "epoch": 0.27523338824821525, + "grad_norm": 0.3812296688556671, + "learning_rate": 1.9095618474339518e-05, + "loss": 0.5277, + "step": 10024 + }, + { + "epoch": 0.27526084568918174, + "grad_norm": 0.4377080500125885, + "learning_rate": 1.909543898452544e-05, + "loss": 0.5405, + "step": 10025 + }, + { + "epoch": 0.2752883031301483, + "grad_norm": 0.3324609100818634, + "learning_rate": 1.909525947774547e-05, + "loss": 0.3646, + "step": 10026 + }, + { + "epoch": 0.2753157605711148, + "grad_norm": 0.4445863664150238, + "learning_rate": 1.9095079953999936e-05, + "loss": 0.5441, + "step": 10027 + }, + { + "epoch": 0.2753432180120813, + "grad_norm": 0.3771135210990906, + "learning_rate": 1.9094900413289173e-05, + "loss": 0.5223, + "step": 10028 + }, + { + "epoch": 0.2753706754530478, + "grad_norm": 0.4525003433227539, + "learning_rate": 1.909472085561352e-05, + "loss": 0.5974, + "step": 10029 + }, + { + "epoch": 0.27539813289401427, + "grad_norm": 0.394182026386261, + "learning_rate": 1.909454128097331e-05, + "loss": 0.505, + "step": 10030 + }, + { + "epoch": 0.27542559033498076, + "grad_norm": 0.37022989988327026, + "learning_rate": 1.9094361689368882e-05, + "loss": 0.5132, + "step": 10031 + }, + { + "epoch": 0.27545304777594726, + "grad_norm": 0.3969801962375641, + "learning_rate": 1.909418208080056e-05, + "loss": 0.5498, + "step": 10032 + }, + { + "epoch": 0.2754805052169138, + "grad_norm": 0.36887598037719727, + "learning_rate": 1.909400245526869e-05, + "loss": 0.5229, + "step": 10033 + }, + { + "epoch": 0.2755079626578803, + "grad_norm": 0.3238699734210968, + "learning_rate": 1.9093822812773602e-05, + "loss": 0.5168, + "step": 10034 + }, + { + "epoch": 0.2755354200988468, + "grad_norm": 0.37131747603416443, + "learning_rate": 1.909364315331563e-05, + "loss": 0.4795, + "step": 10035 + }, + { + "epoch": 0.2755628775398133, + "grad_norm": 0.3810587227344513, + "learning_rate": 1.9093463476895113e-05, + "loss": 0.526, + "step": 10036 + }, + { + "epoch": 0.2755903349807798, + "grad_norm": 0.37247276306152344, + "learning_rate": 1.9093283783512384e-05, + "loss": 0.4977, + "step": 10037 + }, + { + "epoch": 0.2756177924217463, + "grad_norm": 0.3957775831222534, + "learning_rate": 1.909310407316778e-05, + "loss": 0.5182, + "step": 10038 + }, + { + "epoch": 0.27564524986271277, + "grad_norm": 0.3680056631565094, + "learning_rate": 1.9092924345861635e-05, + "loss": 0.4943, + "step": 10039 + }, + { + "epoch": 0.2756727073036793, + "grad_norm": 0.3110438287258148, + "learning_rate": 1.909274460159428e-05, + "loss": 0.4951, + "step": 10040 + }, + { + "epoch": 0.2757001647446458, + "grad_norm": 0.3851597309112549, + "learning_rate": 1.909256484036606e-05, + "loss": 0.5345, + "step": 10041 + }, + { + "epoch": 0.2757276221856123, + "grad_norm": 0.4439169466495514, + "learning_rate": 1.90923850621773e-05, + "loss": 0.526, + "step": 10042 + }, + { + "epoch": 0.2757550796265788, + "grad_norm": 0.3456805944442749, + "learning_rate": 1.909220526702834e-05, + "loss": 0.5797, + "step": 10043 + }, + { + "epoch": 0.2757825370675453, + "grad_norm": 0.36618614196777344, + "learning_rate": 1.9092025454919517e-05, + "loss": 0.5618, + "step": 10044 + }, + { + "epoch": 0.2758099945085118, + "grad_norm": 0.40008407831192017, + "learning_rate": 1.9091845625851163e-05, + "loss": 0.5406, + "step": 10045 + }, + { + "epoch": 0.2758374519494783, + "grad_norm": 0.3378714919090271, + "learning_rate": 1.9091665779823617e-05, + "loss": 0.4501, + "step": 10046 + }, + { + "epoch": 0.27586490939044483, + "grad_norm": 0.36479124426841736, + "learning_rate": 1.9091485916837214e-05, + "loss": 0.5154, + "step": 10047 + }, + { + "epoch": 0.27589236683141133, + "grad_norm": 0.40910428762435913, + "learning_rate": 1.9091306036892284e-05, + "loss": 0.6026, + "step": 10048 + }, + { + "epoch": 0.2759198242723778, + "grad_norm": 0.3771435618400574, + "learning_rate": 1.909112613998917e-05, + "loss": 0.5912, + "step": 10049 + }, + { + "epoch": 0.2759472817133443, + "grad_norm": 0.35349783301353455, + "learning_rate": 1.90909462261282e-05, + "loss": 0.502, + "step": 10050 + }, + { + "epoch": 0.2759747391543108, + "grad_norm": 0.3553089499473572, + "learning_rate": 1.909076629530972e-05, + "loss": 0.5741, + "step": 10051 + }, + { + "epoch": 0.2760021965952773, + "grad_norm": 0.3832581341266632, + "learning_rate": 1.9090586347534055e-05, + "loss": 0.549, + "step": 10052 + }, + { + "epoch": 0.2760296540362438, + "grad_norm": 0.3852662742137909, + "learning_rate": 1.9090406382801547e-05, + "loss": 0.4671, + "step": 10053 + }, + { + "epoch": 0.27605711147721035, + "grad_norm": 0.35346144437789917, + "learning_rate": 1.9090226401112527e-05, + "loss": 0.6014, + "step": 10054 + }, + { + "epoch": 0.27608456891817684, + "grad_norm": 0.3751943111419678, + "learning_rate": 1.9090046402467337e-05, + "loss": 0.5021, + "step": 10055 + }, + { + "epoch": 0.27611202635914334, + "grad_norm": 0.34393438696861267, + "learning_rate": 1.908986638686631e-05, + "loss": 0.5256, + "step": 10056 + }, + { + "epoch": 0.27613948380010983, + "grad_norm": 0.3526577055454254, + "learning_rate": 1.9089686354309775e-05, + "loss": 0.5283, + "step": 10057 + }, + { + "epoch": 0.2761669412410763, + "grad_norm": 0.3358345329761505, + "learning_rate": 1.908950630479808e-05, + "loss": 0.5004, + "step": 10058 + }, + { + "epoch": 0.2761943986820428, + "grad_norm": 0.3643590211868286, + "learning_rate": 1.9089326238331552e-05, + "loss": 0.485, + "step": 10059 + }, + { + "epoch": 0.2762218561230093, + "grad_norm": 0.36023953557014465, + "learning_rate": 1.908914615491053e-05, + "loss": 0.5774, + "step": 10060 + }, + { + "epoch": 0.27624931356397586, + "grad_norm": 0.34720706939697266, + "learning_rate": 1.908896605453535e-05, + "loss": 0.5323, + "step": 10061 + }, + { + "epoch": 0.27627677100494236, + "grad_norm": 0.5511215329170227, + "learning_rate": 1.9088785937206345e-05, + "loss": 0.3961, + "step": 10062 + }, + { + "epoch": 0.27630422844590885, + "grad_norm": 0.43661054968833923, + "learning_rate": 1.9088605802923855e-05, + "loss": 0.5243, + "step": 10063 + }, + { + "epoch": 0.27633168588687534, + "grad_norm": 0.3310297727584839, + "learning_rate": 1.9088425651688217e-05, + "loss": 0.4851, + "step": 10064 + }, + { + "epoch": 0.27635914332784184, + "grad_norm": 0.3422190248966217, + "learning_rate": 1.908824548349976e-05, + "loss": 0.5268, + "step": 10065 + }, + { + "epoch": 0.27638660076880833, + "grad_norm": 0.39735254645347595, + "learning_rate": 1.9088065298358826e-05, + "loss": 0.5052, + "step": 10066 + }, + { + "epoch": 0.2764140582097748, + "grad_norm": 0.4234541058540344, + "learning_rate": 1.908788509626575e-05, + "loss": 0.5657, + "step": 10067 + }, + { + "epoch": 0.2764415156507414, + "grad_norm": 0.3662709593772888, + "learning_rate": 1.908770487722087e-05, + "loss": 0.5877, + "step": 10068 + }, + { + "epoch": 0.27646897309170787, + "grad_norm": 0.3970843255519867, + "learning_rate": 1.9087524641224516e-05, + "loss": 0.6228, + "step": 10069 + }, + { + "epoch": 0.27649643053267436, + "grad_norm": 0.3679914176464081, + "learning_rate": 1.9087344388277033e-05, + "loss": 0.5827, + "step": 10070 + }, + { + "epoch": 0.27652388797364086, + "grad_norm": 0.6151641607284546, + "learning_rate": 1.9087164118378747e-05, + "loss": 0.4618, + "step": 10071 + }, + { + "epoch": 0.27655134541460735, + "grad_norm": 0.36288532614707947, + "learning_rate": 1.9086983831530004e-05, + "loss": 0.5853, + "step": 10072 + }, + { + "epoch": 0.27657880285557385, + "grad_norm": 0.3736637234687805, + "learning_rate": 1.9086803527731135e-05, + "loss": 0.6437, + "step": 10073 + }, + { + "epoch": 0.27660626029654034, + "grad_norm": 0.3885047435760498, + "learning_rate": 1.9086623206982476e-05, + "loss": 0.5996, + "step": 10074 + }, + { + "epoch": 0.2766337177375069, + "grad_norm": 0.3286890387535095, + "learning_rate": 1.9086442869284366e-05, + "loss": 0.478, + "step": 10075 + }, + { + "epoch": 0.2766611751784734, + "grad_norm": 0.3712425231933594, + "learning_rate": 1.9086262514637137e-05, + "loss": 0.5561, + "step": 10076 + }, + { + "epoch": 0.2766886326194399, + "grad_norm": 0.4538847804069519, + "learning_rate": 1.908608214304113e-05, + "loss": 0.607, + "step": 10077 + }, + { + "epoch": 0.27671609006040637, + "grad_norm": 0.40046167373657227, + "learning_rate": 1.908590175449668e-05, + "loss": 0.5749, + "step": 10078 + }, + { + "epoch": 0.27674354750137287, + "grad_norm": 0.3507947027683258, + "learning_rate": 1.9085721349004124e-05, + "loss": 0.5634, + "step": 10079 + }, + { + "epoch": 0.27677100494233936, + "grad_norm": 0.34595856070518494, + "learning_rate": 1.9085540926563796e-05, + "loss": 0.5347, + "step": 10080 + }, + { + "epoch": 0.27679846238330585, + "grad_norm": 0.3872843086719513, + "learning_rate": 1.9085360487176037e-05, + "loss": 0.5467, + "step": 10081 + }, + { + "epoch": 0.2768259198242724, + "grad_norm": 0.4008041024208069, + "learning_rate": 1.908518003084118e-05, + "loss": 0.5371, + "step": 10082 + }, + { + "epoch": 0.2768533772652389, + "grad_norm": 0.434414267539978, + "learning_rate": 1.908499955755956e-05, + "loss": 0.5672, + "step": 10083 + }, + { + "epoch": 0.2768808347062054, + "grad_norm": 0.4168112576007843, + "learning_rate": 1.9084819067331522e-05, + "loss": 0.5745, + "step": 10084 + }, + { + "epoch": 0.2769082921471719, + "grad_norm": 0.322084903717041, + "learning_rate": 1.9084638560157393e-05, + "loss": 0.5764, + "step": 10085 + }, + { + "epoch": 0.2769357495881384, + "grad_norm": 0.4214191138744354, + "learning_rate": 1.9084458036037517e-05, + "loss": 0.6041, + "step": 10086 + }, + { + "epoch": 0.2769632070291049, + "grad_norm": 0.35040736198425293, + "learning_rate": 1.9084277494972224e-05, + "loss": 0.5666, + "step": 10087 + }, + { + "epoch": 0.27699066447007137, + "grad_norm": 0.401603102684021, + "learning_rate": 1.9084096936961854e-05, + "loss": 0.6, + "step": 10088 + }, + { + "epoch": 0.2770181219110379, + "grad_norm": 0.35348135232925415, + "learning_rate": 1.9083916362006746e-05, + "loss": 0.5161, + "step": 10089 + }, + { + "epoch": 0.2770455793520044, + "grad_norm": 0.37455788254737854, + "learning_rate": 1.9083735770107232e-05, + "loss": 0.5396, + "step": 10090 + }, + { + "epoch": 0.2770730367929709, + "grad_norm": 0.3767058253288269, + "learning_rate": 1.908355516126365e-05, + "loss": 0.6047, + "step": 10091 + }, + { + "epoch": 0.2771004942339374, + "grad_norm": 0.3493451774120331, + "learning_rate": 1.9083374535476343e-05, + "loss": 0.4437, + "step": 10092 + }, + { + "epoch": 0.2771279516749039, + "grad_norm": 0.412237286567688, + "learning_rate": 1.9083193892745643e-05, + "loss": 0.5136, + "step": 10093 + }, + { + "epoch": 0.2771554091158704, + "grad_norm": 0.36062684655189514, + "learning_rate": 1.9083013233071885e-05, + "loss": 0.5589, + "step": 10094 + }, + { + "epoch": 0.2771828665568369, + "grad_norm": 0.3837268352508545, + "learning_rate": 1.908283255645541e-05, + "loss": 0.4594, + "step": 10095 + }, + { + "epoch": 0.27721032399780343, + "grad_norm": 0.36245501041412354, + "learning_rate": 1.908265186289655e-05, + "loss": 0.5059, + "step": 10096 + }, + { + "epoch": 0.2772377814387699, + "grad_norm": 0.3449246883392334, + "learning_rate": 1.908247115239565e-05, + "loss": 0.5351, + "step": 10097 + }, + { + "epoch": 0.2772652388797364, + "grad_norm": 0.3645075857639313, + "learning_rate": 1.9082290424953043e-05, + "loss": 0.453, + "step": 10098 + }, + { + "epoch": 0.2772926963207029, + "grad_norm": 0.3478483259677887, + "learning_rate": 1.908210968056906e-05, + "loss": 0.5082, + "step": 10099 + }, + { + "epoch": 0.2773201537616694, + "grad_norm": 0.4178157150745392, + "learning_rate": 1.908192891924405e-05, + "loss": 0.5687, + "step": 10100 + }, + { + "epoch": 0.2773476112026359, + "grad_norm": 0.3862474858760834, + "learning_rate": 1.908174814097834e-05, + "loss": 0.5234, + "step": 10101 + }, + { + "epoch": 0.2773750686436024, + "grad_norm": 0.37392503023147583, + "learning_rate": 1.9081567345772273e-05, + "loss": 0.526, + "step": 10102 + }, + { + "epoch": 0.27740252608456895, + "grad_norm": 0.3900866210460663, + "learning_rate": 1.9081386533626184e-05, + "loss": 0.4953, + "step": 10103 + }, + { + "epoch": 0.27742998352553544, + "grad_norm": 0.3395434021949768, + "learning_rate": 1.908120570454041e-05, + "loss": 0.5767, + "step": 10104 + }, + { + "epoch": 0.27745744096650193, + "grad_norm": 0.38716092705726624, + "learning_rate": 1.908102485851529e-05, + "loss": 0.4954, + "step": 10105 + }, + { + "epoch": 0.2774848984074684, + "grad_norm": 0.3187010884284973, + "learning_rate": 1.908084399555116e-05, + "loss": 0.3911, + "step": 10106 + }, + { + "epoch": 0.2775123558484349, + "grad_norm": 0.3850921392440796, + "learning_rate": 1.9080663115648357e-05, + "loss": 0.493, + "step": 10107 + }, + { + "epoch": 0.2775398132894014, + "grad_norm": 0.4332759380340576, + "learning_rate": 1.908048221880722e-05, + "loss": 0.5413, + "step": 10108 + }, + { + "epoch": 0.2775672707303679, + "grad_norm": 0.3489910960197449, + "learning_rate": 1.9080301305028084e-05, + "loss": 0.6139, + "step": 10109 + }, + { + "epoch": 0.2775947281713344, + "grad_norm": 0.3139532804489136, + "learning_rate": 1.9080120374311288e-05, + "loss": 0.4808, + "step": 10110 + }, + { + "epoch": 0.27762218561230095, + "grad_norm": 0.3745000660419464, + "learning_rate": 1.907993942665717e-05, + "loss": 0.5263, + "step": 10111 + }, + { + "epoch": 0.27764964305326745, + "grad_norm": 0.37539970874786377, + "learning_rate": 1.907975846206607e-05, + "loss": 0.4593, + "step": 10112 + }, + { + "epoch": 0.27767710049423394, + "grad_norm": 0.4050968587398529, + "learning_rate": 1.9079577480538318e-05, + "loss": 0.6491, + "step": 10113 + }, + { + "epoch": 0.27770455793520044, + "grad_norm": 0.33210405707359314, + "learning_rate": 1.9079396482074258e-05, + "loss": 0.4967, + "step": 10114 + }, + { + "epoch": 0.27773201537616693, + "grad_norm": 0.3818451166152954, + "learning_rate": 1.9079215466674225e-05, + "loss": 0.5637, + "step": 10115 + }, + { + "epoch": 0.2777594728171334, + "grad_norm": 0.32986488938331604, + "learning_rate": 1.907903443433856e-05, + "loss": 0.528, + "step": 10116 + }, + { + "epoch": 0.2777869302580999, + "grad_norm": 0.5922122597694397, + "learning_rate": 1.9078853385067594e-05, + "loss": 0.5113, + "step": 10117 + }, + { + "epoch": 0.27781438769906647, + "grad_norm": 0.4134388267993927, + "learning_rate": 1.9078672318861674e-05, + "loss": 0.5446, + "step": 10118 + }, + { + "epoch": 0.27784184514003296, + "grad_norm": 0.33276763558387756, + "learning_rate": 1.9078491235721127e-05, + "loss": 0.5273, + "step": 10119 + }, + { + "epoch": 0.27786930258099946, + "grad_norm": 0.43442440032958984, + "learning_rate": 1.90783101356463e-05, + "loss": 0.5171, + "step": 10120 + }, + { + "epoch": 0.27789676002196595, + "grad_norm": 0.38945823907852173, + "learning_rate": 1.907812901863753e-05, + "loss": 0.4784, + "step": 10121 + }, + { + "epoch": 0.27792421746293244, + "grad_norm": 0.3631054162979126, + "learning_rate": 1.9077947884695143e-05, + "loss": 0.5169, + "step": 10122 + }, + { + "epoch": 0.27795167490389894, + "grad_norm": 0.38212332129478455, + "learning_rate": 1.9077766733819492e-05, + "loss": 0.5769, + "step": 10123 + }, + { + "epoch": 0.27797913234486543, + "grad_norm": 0.32767829298973083, + "learning_rate": 1.907758556601091e-05, + "loss": 0.509, + "step": 10124 + }, + { + "epoch": 0.278006589785832, + "grad_norm": 0.32863789796829224, + "learning_rate": 1.9077404381269732e-05, + "loss": 0.4128, + "step": 10125 + }, + { + "epoch": 0.2780340472267985, + "grad_norm": 0.3218472898006439, + "learning_rate": 1.9077223179596295e-05, + "loss": 0.5674, + "step": 10126 + }, + { + "epoch": 0.27806150466776497, + "grad_norm": 0.3329183757305145, + "learning_rate": 1.9077041960990943e-05, + "loss": 0.4593, + "step": 10127 + }, + { + "epoch": 0.27808896210873146, + "grad_norm": 0.38333770632743835, + "learning_rate": 1.907686072545401e-05, + "loss": 0.5353, + "step": 10128 + }, + { + "epoch": 0.27811641954969796, + "grad_norm": 0.36337265372276306, + "learning_rate": 1.9076679472985833e-05, + "loss": 0.542, + "step": 10129 + }, + { + "epoch": 0.27814387699066445, + "grad_norm": 0.41443076729774475, + "learning_rate": 1.907649820358675e-05, + "loss": 0.5857, + "step": 10130 + }, + { + "epoch": 0.27817133443163095, + "grad_norm": 0.35546642541885376, + "learning_rate": 1.9076316917257108e-05, + "loss": 0.5417, + "step": 10131 + }, + { + "epoch": 0.2781987918725975, + "grad_norm": 0.3215763568878174, + "learning_rate": 1.9076135613997234e-05, + "loss": 0.4407, + "step": 10132 + }, + { + "epoch": 0.278226249313564, + "grad_norm": 0.3689330816268921, + "learning_rate": 1.9075954293807468e-05, + "loss": 0.4773, + "step": 10133 + }, + { + "epoch": 0.2782537067545305, + "grad_norm": 0.38261327147483826, + "learning_rate": 1.9075772956688153e-05, + "loss": 0.5576, + "step": 10134 + }, + { + "epoch": 0.278281164195497, + "grad_norm": 0.3439863324165344, + "learning_rate": 1.9075591602639628e-05, + "loss": 0.6457, + "step": 10135 + }, + { + "epoch": 0.27830862163646347, + "grad_norm": 0.4377201497554779, + "learning_rate": 1.907541023166222e-05, + "loss": 0.5721, + "step": 10136 + }, + { + "epoch": 0.27833607907742997, + "grad_norm": 0.42143985629081726, + "learning_rate": 1.907522884375628e-05, + "loss": 0.5791, + "step": 10137 + }, + { + "epoch": 0.27836353651839646, + "grad_norm": 0.3778749108314514, + "learning_rate": 1.9075047438922144e-05, + "loss": 0.5706, + "step": 10138 + }, + { + "epoch": 0.278390993959363, + "grad_norm": 0.35900846123695374, + "learning_rate": 1.9074866017160145e-05, + "loss": 0.5332, + "step": 10139 + }, + { + "epoch": 0.2784184514003295, + "grad_norm": 0.3700673282146454, + "learning_rate": 1.907468457847063e-05, + "loss": 0.4912, + "step": 10140 + }, + { + "epoch": 0.278445908841296, + "grad_norm": 0.351886123418808, + "learning_rate": 1.9074503122853923e-05, + "loss": 0.528, + "step": 10141 + }, + { + "epoch": 0.2784733662822625, + "grad_norm": 0.3374916911125183, + "learning_rate": 1.9074321650310377e-05, + "loss": 0.4631, + "step": 10142 + }, + { + "epoch": 0.278500823723229, + "grad_norm": 0.5390721559524536, + "learning_rate": 1.9074140160840325e-05, + "loss": 0.5108, + "step": 10143 + }, + { + "epoch": 0.2785282811641955, + "grad_norm": 0.44561368227005005, + "learning_rate": 1.9073958654444102e-05, + "loss": 0.5725, + "step": 10144 + }, + { + "epoch": 0.278555738605162, + "grad_norm": 0.38934701681137085, + "learning_rate": 1.9073777131122053e-05, + "loss": 0.498, + "step": 10145 + }, + { + "epoch": 0.2785831960461285, + "grad_norm": 0.38667458295822144, + "learning_rate": 1.9073595590874513e-05, + "loss": 0.5318, + "step": 10146 + }, + { + "epoch": 0.278610653487095, + "grad_norm": 0.42917266488075256, + "learning_rate": 1.9073414033701822e-05, + "loss": 0.6572, + "step": 10147 + }, + { + "epoch": 0.2786381109280615, + "grad_norm": 0.3867585361003876, + "learning_rate": 1.907323245960432e-05, + "loss": 0.5475, + "step": 10148 + }, + { + "epoch": 0.278665568369028, + "grad_norm": 0.39309927821159363, + "learning_rate": 1.9073050868582338e-05, + "loss": 0.4587, + "step": 10149 + }, + { + "epoch": 0.2786930258099945, + "grad_norm": 0.41154101490974426, + "learning_rate": 1.9072869260636226e-05, + "loss": 0.5833, + "step": 10150 + }, + { + "epoch": 0.278720483250961, + "grad_norm": 0.4631204605102539, + "learning_rate": 1.907268763576631e-05, + "loss": 0.5148, + "step": 10151 + }, + { + "epoch": 0.2787479406919275, + "grad_norm": 0.3848029673099518, + "learning_rate": 1.9072505993972944e-05, + "loss": 0.5737, + "step": 10152 + }, + { + "epoch": 0.27877539813289404, + "grad_norm": 0.4893612861633301, + "learning_rate": 1.9072324335256453e-05, + "loss": 0.5169, + "step": 10153 + }, + { + "epoch": 0.27880285557386053, + "grad_norm": 0.3600632846355438, + "learning_rate": 1.9072142659617182e-05, + "loss": 0.5295, + "step": 10154 + }, + { + "epoch": 0.278830313014827, + "grad_norm": 0.3742150068283081, + "learning_rate": 1.9071960967055473e-05, + "loss": 0.484, + "step": 10155 + }, + { + "epoch": 0.2788577704557935, + "grad_norm": 0.30332812666893005, + "learning_rate": 1.907177925757166e-05, + "loss": 0.3949, + "step": 10156 + }, + { + "epoch": 0.27888522789676, + "grad_norm": 0.41561776399612427, + "learning_rate": 1.907159753116608e-05, + "loss": 0.4925, + "step": 10157 + }, + { + "epoch": 0.2789126853377265, + "grad_norm": 0.3246718943119049, + "learning_rate": 1.907141578783908e-05, + "loss": 0.4219, + "step": 10158 + }, + { + "epoch": 0.278940142778693, + "grad_norm": 0.7736389636993408, + "learning_rate": 1.907123402759099e-05, + "loss": 0.5496, + "step": 10159 + }, + { + "epoch": 0.27896760021965955, + "grad_norm": 0.3723120391368866, + "learning_rate": 1.9071052250422157e-05, + "loss": 0.5017, + "step": 10160 + }, + { + "epoch": 0.27899505766062604, + "grad_norm": 0.3314211368560791, + "learning_rate": 1.9070870456332914e-05, + "loss": 0.4901, + "step": 10161 + }, + { + "epoch": 0.27902251510159254, + "grad_norm": 0.5713589787483215, + "learning_rate": 1.9070688645323602e-05, + "loss": 0.5831, + "step": 10162 + }, + { + "epoch": 0.27904997254255903, + "grad_norm": 0.4533842206001282, + "learning_rate": 1.9070506817394566e-05, + "loss": 0.5662, + "step": 10163 + }, + { + "epoch": 0.2790774299835255, + "grad_norm": 0.36232590675354004, + "learning_rate": 1.9070324972546132e-05, + "loss": 0.4933, + "step": 10164 + }, + { + "epoch": 0.279104887424492, + "grad_norm": 0.359088271856308, + "learning_rate": 1.907014311077865e-05, + "loss": 0.5385, + "step": 10165 + }, + { + "epoch": 0.2791323448654585, + "grad_norm": 0.35530003905296326, + "learning_rate": 1.906996123209246e-05, + "loss": 0.5996, + "step": 10166 + }, + { + "epoch": 0.27915980230642506, + "grad_norm": 0.32721996307373047, + "learning_rate": 1.9069779336487895e-05, + "loss": 0.4387, + "step": 10167 + }, + { + "epoch": 0.27918725974739156, + "grad_norm": 0.42004910111427307, + "learning_rate": 1.9069597423965296e-05, + "loss": 0.5734, + "step": 10168 + }, + { + "epoch": 0.27921471718835805, + "grad_norm": 0.3662331998348236, + "learning_rate": 1.9069415494525002e-05, + "loss": 0.5515, + "step": 10169 + }, + { + "epoch": 0.27924217462932455, + "grad_norm": 0.3793543875217438, + "learning_rate": 1.9069233548167357e-05, + "loss": 0.5693, + "step": 10170 + }, + { + "epoch": 0.27926963207029104, + "grad_norm": 0.3882600665092468, + "learning_rate": 1.9069051584892695e-05, + "loss": 0.4352, + "step": 10171 + }, + { + "epoch": 0.27929708951125753, + "grad_norm": 0.36040449142456055, + "learning_rate": 1.9068869604701356e-05, + "loss": 0.5906, + "step": 10172 + }, + { + "epoch": 0.27932454695222403, + "grad_norm": 0.38540560007095337, + "learning_rate": 1.9068687607593684e-05, + "loss": 0.5937, + "step": 10173 + }, + { + "epoch": 0.2793520043931906, + "grad_norm": 0.41839760541915894, + "learning_rate": 1.9068505593570012e-05, + "loss": 0.601, + "step": 10174 + }, + { + "epoch": 0.2793794618341571, + "grad_norm": 0.4477398693561554, + "learning_rate": 1.9068323562630683e-05, + "loss": 0.4873, + "step": 10175 + }, + { + "epoch": 0.27940691927512357, + "grad_norm": 0.33766305446624756, + "learning_rate": 1.9068141514776037e-05, + "loss": 0.5178, + "step": 10176 + }, + { + "epoch": 0.27943437671609006, + "grad_norm": 0.3489362895488739, + "learning_rate": 1.9067959450006415e-05, + "loss": 0.4489, + "step": 10177 + }, + { + "epoch": 0.27946183415705655, + "grad_norm": 0.35088905692100525, + "learning_rate": 1.9067777368322154e-05, + "loss": 0.5559, + "step": 10178 + }, + { + "epoch": 0.27948929159802305, + "grad_norm": 0.3465668857097626, + "learning_rate": 1.9067595269723588e-05, + "loss": 0.4735, + "step": 10179 + }, + { + "epoch": 0.27951674903898954, + "grad_norm": 0.3686895966529846, + "learning_rate": 1.906741315421107e-05, + "loss": 0.4561, + "step": 10180 + }, + { + "epoch": 0.2795442064799561, + "grad_norm": 0.3835863173007965, + "learning_rate": 1.906723102178493e-05, + "loss": 0.4938, + "step": 10181 + }, + { + "epoch": 0.2795716639209226, + "grad_norm": 0.34615838527679443, + "learning_rate": 1.9067048872445508e-05, + "loss": 0.426, + "step": 10182 + }, + { + "epoch": 0.2795991213618891, + "grad_norm": 0.3825620412826538, + "learning_rate": 1.906686670619315e-05, + "loss": 0.5551, + "step": 10183 + }, + { + "epoch": 0.2796265788028556, + "grad_norm": 0.3846849799156189, + "learning_rate": 1.906668452302819e-05, + "loss": 0.5441, + "step": 10184 + }, + { + "epoch": 0.27965403624382207, + "grad_norm": 0.41443169116973877, + "learning_rate": 1.906650232295097e-05, + "loss": 0.561, + "step": 10185 + }, + { + "epoch": 0.27968149368478856, + "grad_norm": 0.3504103720188141, + "learning_rate": 1.9066320105961828e-05, + "loss": 0.4993, + "step": 10186 + }, + { + "epoch": 0.27970895112575506, + "grad_norm": 0.3688082993030548, + "learning_rate": 1.906613787206111e-05, + "loss": 0.538, + "step": 10187 + }, + { + "epoch": 0.2797364085667216, + "grad_norm": 0.3797212839126587, + "learning_rate": 1.9065955621249146e-05, + "loss": 0.5555, + "step": 10188 + }, + { + "epoch": 0.2797638660076881, + "grad_norm": 0.36018460988998413, + "learning_rate": 1.906577335352628e-05, + "loss": 0.4415, + "step": 10189 + }, + { + "epoch": 0.2797913234486546, + "grad_norm": 0.40526458621025085, + "learning_rate": 1.9065591068892857e-05, + "loss": 0.5063, + "step": 10190 + }, + { + "epoch": 0.2798187808896211, + "grad_norm": 0.3413203954696655, + "learning_rate": 1.9065408767349214e-05, + "loss": 0.4511, + "step": 10191 + }, + { + "epoch": 0.2798462383305876, + "grad_norm": 0.6199982762336731, + "learning_rate": 1.9065226448895688e-05, + "loss": 0.4441, + "step": 10192 + }, + { + "epoch": 0.2798736957715541, + "grad_norm": 0.3361359238624573, + "learning_rate": 1.9065044113532622e-05, + "loss": 0.4888, + "step": 10193 + }, + { + "epoch": 0.27990115321252057, + "grad_norm": 0.3666882812976837, + "learning_rate": 1.9064861761260352e-05, + "loss": 0.5495, + "step": 10194 + }, + { + "epoch": 0.2799286106534871, + "grad_norm": 0.36797451972961426, + "learning_rate": 1.906467939207923e-05, + "loss": 0.5132, + "step": 10195 + }, + { + "epoch": 0.2799560680944536, + "grad_norm": 0.37044480443000793, + "learning_rate": 1.9064497005989582e-05, + "loss": 0.5285, + "step": 10196 + }, + { + "epoch": 0.2799835255354201, + "grad_norm": 0.402975857257843, + "learning_rate": 1.9064314602991755e-05, + "loss": 0.5082, + "step": 10197 + }, + { + "epoch": 0.2800109829763866, + "grad_norm": 0.36571329832077026, + "learning_rate": 1.9064132183086087e-05, + "loss": 0.5364, + "step": 10198 + }, + { + "epoch": 0.2800384404173531, + "grad_norm": 0.3385966122150421, + "learning_rate": 1.906394974627292e-05, + "loss": 0.4887, + "step": 10199 + }, + { + "epoch": 0.2800658978583196, + "grad_norm": 0.35743266344070435, + "learning_rate": 1.9063767292552598e-05, + "loss": 0.5452, + "step": 10200 + }, + { + "epoch": 0.2800933552992861, + "grad_norm": 0.35939881205558777, + "learning_rate": 1.9063584821925452e-05, + "loss": 0.499, + "step": 10201 + }, + { + "epoch": 0.28012081274025263, + "grad_norm": 0.4043385684490204, + "learning_rate": 1.906340233439183e-05, + "loss": 0.48, + "step": 10202 + }, + { + "epoch": 0.28014827018121913, + "grad_norm": 0.35582882165908813, + "learning_rate": 1.906321982995207e-05, + "loss": 0.5291, + "step": 10203 + }, + { + "epoch": 0.2801757276221856, + "grad_norm": 0.3591715693473816, + "learning_rate": 1.906303730860651e-05, + "loss": 0.5424, + "step": 10204 + }, + { + "epoch": 0.2802031850631521, + "grad_norm": 0.3288976550102234, + "learning_rate": 1.9062854770355494e-05, + "loss": 0.5059, + "step": 10205 + }, + { + "epoch": 0.2802306425041186, + "grad_norm": 0.4004463851451874, + "learning_rate": 1.9062672215199365e-05, + "loss": 0.4726, + "step": 10206 + }, + { + "epoch": 0.2802580999450851, + "grad_norm": 0.43167829513549805, + "learning_rate": 1.9062489643138455e-05, + "loss": 0.4857, + "step": 10207 + }, + { + "epoch": 0.2802855573860516, + "grad_norm": 0.3386596739292145, + "learning_rate": 1.906230705417311e-05, + "loss": 0.4794, + "step": 10208 + }, + { + "epoch": 0.28031301482701815, + "grad_norm": 0.39179620146751404, + "learning_rate": 1.9062124448303674e-05, + "loss": 0.5464, + "step": 10209 + }, + { + "epoch": 0.28034047226798464, + "grad_norm": 0.3380320370197296, + "learning_rate": 1.906194182553048e-05, + "loss": 0.4602, + "step": 10210 + }, + { + "epoch": 0.28036792970895114, + "grad_norm": 0.39554399251937866, + "learning_rate": 1.906175918585387e-05, + "loss": 0.4546, + "step": 10211 + }, + { + "epoch": 0.28039538714991763, + "grad_norm": 0.3588031232357025, + "learning_rate": 1.9061576529274194e-05, + "loss": 0.5789, + "step": 10212 + }, + { + "epoch": 0.2804228445908841, + "grad_norm": 0.3855004906654358, + "learning_rate": 1.906139385579178e-05, + "loss": 0.4887, + "step": 10213 + }, + { + "epoch": 0.2804503020318506, + "grad_norm": 0.3578420877456665, + "learning_rate": 1.9061211165406978e-05, + "loss": 0.5373, + "step": 10214 + }, + { + "epoch": 0.2804777594728171, + "grad_norm": 0.4339151382446289, + "learning_rate": 1.906102845812012e-05, + "loss": 0.6163, + "step": 10215 + }, + { + "epoch": 0.28050521691378366, + "grad_norm": 0.36802688241004944, + "learning_rate": 1.9060845733931557e-05, + "loss": 0.5449, + "step": 10216 + }, + { + "epoch": 0.28053267435475016, + "grad_norm": 0.35641390085220337, + "learning_rate": 1.906066299284162e-05, + "loss": 0.5185, + "step": 10217 + }, + { + "epoch": 0.28056013179571665, + "grad_norm": 0.42019885778427124, + "learning_rate": 1.906048023485066e-05, + "loss": 0.5287, + "step": 10218 + }, + { + "epoch": 0.28058758923668314, + "grad_norm": 0.9418049454689026, + "learning_rate": 1.906029745995901e-05, + "loss": 0.5956, + "step": 10219 + }, + { + "epoch": 0.28061504667764964, + "grad_norm": 0.4017864763736725, + "learning_rate": 1.9060114668167015e-05, + "loss": 0.5762, + "step": 10220 + }, + { + "epoch": 0.28064250411861613, + "grad_norm": 0.4477185010910034, + "learning_rate": 1.9059931859475013e-05, + "loss": 0.5904, + "step": 10221 + }, + { + "epoch": 0.2806699615595826, + "grad_norm": 0.391867458820343, + "learning_rate": 1.9059749033883345e-05, + "loss": 0.5087, + "step": 10222 + }, + { + "epoch": 0.2806974190005492, + "grad_norm": 0.4331045150756836, + "learning_rate": 1.905956619139236e-05, + "loss": 0.4722, + "step": 10223 + }, + { + "epoch": 0.28072487644151567, + "grad_norm": 0.3518742322921753, + "learning_rate": 1.9059383332002386e-05, + "loss": 0.5398, + "step": 10224 + }, + { + "epoch": 0.28075233388248216, + "grad_norm": 0.40507814288139343, + "learning_rate": 1.905920045571377e-05, + "loss": 0.4985, + "step": 10225 + }, + { + "epoch": 0.28077979132344866, + "grad_norm": 0.41753992438316345, + "learning_rate": 1.9059017562526857e-05, + "loss": 0.5772, + "step": 10226 + }, + { + "epoch": 0.28080724876441515, + "grad_norm": 0.3756506145000458, + "learning_rate": 1.9058834652441985e-05, + "loss": 0.474, + "step": 10227 + }, + { + "epoch": 0.28083470620538165, + "grad_norm": 0.38233691453933716, + "learning_rate": 1.9058651725459494e-05, + "loss": 0.5558, + "step": 10228 + }, + { + "epoch": 0.28086216364634814, + "grad_norm": 0.3718664348125458, + "learning_rate": 1.9058468781579726e-05, + "loss": 0.4859, + "step": 10229 + }, + { + "epoch": 0.2808896210873147, + "grad_norm": 0.3687560558319092, + "learning_rate": 1.9058285820803023e-05, + "loss": 0.5513, + "step": 10230 + }, + { + "epoch": 0.2809170785282812, + "grad_norm": 0.3517082929611206, + "learning_rate": 1.9058102843129726e-05, + "loss": 0.5684, + "step": 10231 + }, + { + "epoch": 0.2809445359692477, + "grad_norm": 0.3281862437725067, + "learning_rate": 1.905791984856018e-05, + "loss": 0.4272, + "step": 10232 + }, + { + "epoch": 0.28097199341021417, + "grad_norm": 0.38546091318130493, + "learning_rate": 1.9057736837094717e-05, + "loss": 0.5397, + "step": 10233 + }, + { + "epoch": 0.28099945085118067, + "grad_norm": 0.3665340840816498, + "learning_rate": 1.9057553808733685e-05, + "loss": 0.5956, + "step": 10234 + }, + { + "epoch": 0.28102690829214716, + "grad_norm": 0.4210297763347626, + "learning_rate": 1.905737076347743e-05, + "loss": 0.5453, + "step": 10235 + }, + { + "epoch": 0.28105436573311365, + "grad_norm": 0.6254693865776062, + "learning_rate": 1.9057187701326278e-05, + "loss": 0.5024, + "step": 10236 + }, + { + "epoch": 0.2810818231740802, + "grad_norm": 0.3718787133693695, + "learning_rate": 1.9057004622280586e-05, + "loss": 0.5036, + "step": 10237 + }, + { + "epoch": 0.2811092806150467, + "grad_norm": 0.3613419532775879, + "learning_rate": 1.9056821526340687e-05, + "loss": 0.4794, + "step": 10238 + }, + { + "epoch": 0.2811367380560132, + "grad_norm": 0.3742837905883789, + "learning_rate": 1.905663841350693e-05, + "loss": 0.5, + "step": 10239 + }, + { + "epoch": 0.2811641954969797, + "grad_norm": 0.3746374547481537, + "learning_rate": 1.905645528377965e-05, + "loss": 0.5264, + "step": 10240 + }, + { + "epoch": 0.2811916529379462, + "grad_norm": 0.3873957097530365, + "learning_rate": 1.9056272137159187e-05, + "loss": 0.4652, + "step": 10241 + }, + { + "epoch": 0.2812191103789127, + "grad_norm": 0.435569167137146, + "learning_rate": 1.905608897364589e-05, + "loss": 0.5122, + "step": 10242 + }, + { + "epoch": 0.28124656781987917, + "grad_norm": 0.3504176139831543, + "learning_rate": 1.9055905793240096e-05, + "loss": 0.5106, + "step": 10243 + }, + { + "epoch": 0.28127402526084566, + "grad_norm": 0.557307243347168, + "learning_rate": 1.9055722595942144e-05, + "loss": 0.5317, + "step": 10244 + }, + { + "epoch": 0.2813014827018122, + "grad_norm": 0.3631347715854645, + "learning_rate": 1.9055539381752383e-05, + "loss": 0.5336, + "step": 10245 + }, + { + "epoch": 0.2813289401427787, + "grad_norm": 0.38810497522354126, + "learning_rate": 1.9055356150671146e-05, + "loss": 0.5798, + "step": 10246 + }, + { + "epoch": 0.2813563975837452, + "grad_norm": 0.48672375082969666, + "learning_rate": 1.9055172902698787e-05, + "loss": 0.4459, + "step": 10247 + }, + { + "epoch": 0.2813838550247117, + "grad_norm": 0.36992624402046204, + "learning_rate": 1.9054989637835636e-05, + "loss": 0.5024, + "step": 10248 + }, + { + "epoch": 0.2814113124656782, + "grad_norm": 0.41024020314216614, + "learning_rate": 1.905480635608204e-05, + "loss": 0.5298, + "step": 10249 + }, + { + "epoch": 0.2814387699066447, + "grad_norm": 0.335920512676239, + "learning_rate": 1.905462305743834e-05, + "loss": 0.5634, + "step": 10250 + }, + { + "epoch": 0.2814662273476112, + "grad_norm": 0.3628586232662201, + "learning_rate": 1.9054439741904878e-05, + "loss": 0.5077, + "step": 10251 + }, + { + "epoch": 0.2814936847885777, + "grad_norm": 0.37049973011016846, + "learning_rate": 1.9054256409481996e-05, + "loss": 0.5212, + "step": 10252 + }, + { + "epoch": 0.2815211422295442, + "grad_norm": 0.35797208547592163, + "learning_rate": 1.9054073060170036e-05, + "loss": 0.5179, + "step": 10253 + }, + { + "epoch": 0.2815485996705107, + "grad_norm": 0.33772796392440796, + "learning_rate": 1.9053889693969337e-05, + "loss": 0.5285, + "step": 10254 + }, + { + "epoch": 0.2815760571114772, + "grad_norm": 0.3518511950969696, + "learning_rate": 1.905370631088025e-05, + "loss": 0.5371, + "step": 10255 + }, + { + "epoch": 0.2816035145524437, + "grad_norm": 0.39319294691085815, + "learning_rate": 1.9053522910903107e-05, + "loss": 0.5605, + "step": 10256 + }, + { + "epoch": 0.2816309719934102, + "grad_norm": 0.32929757237434387, + "learning_rate": 1.9053339494038258e-05, + "loss": 0.5231, + "step": 10257 + }, + { + "epoch": 0.2816584294343767, + "grad_norm": 0.36477240920066833, + "learning_rate": 1.9053156060286037e-05, + "loss": 0.5549, + "step": 10258 + }, + { + "epoch": 0.28168588687534324, + "grad_norm": 0.4101243317127228, + "learning_rate": 1.9052972609646792e-05, + "loss": 0.4713, + "step": 10259 + }, + { + "epoch": 0.28171334431630973, + "grad_norm": 0.32519930601119995, + "learning_rate": 1.9052789142120868e-05, + "loss": 0.4925, + "step": 10260 + }, + { + "epoch": 0.2817408017572762, + "grad_norm": 0.4037676751613617, + "learning_rate": 1.9052605657708596e-05, + "loss": 0.5852, + "step": 10261 + }, + { + "epoch": 0.2817682591982427, + "grad_norm": 0.37784698605537415, + "learning_rate": 1.905242215641033e-05, + "loss": 0.5586, + "step": 10262 + }, + { + "epoch": 0.2817957166392092, + "grad_norm": 0.3382710814476013, + "learning_rate": 1.9052238638226404e-05, + "loss": 0.5199, + "step": 10263 + }, + { + "epoch": 0.2818231740801757, + "grad_norm": 0.38013482093811035, + "learning_rate": 1.9052055103157165e-05, + "loss": 0.5628, + "step": 10264 + }, + { + "epoch": 0.2818506315211422, + "grad_norm": 0.3392690122127533, + "learning_rate": 1.9051871551202957e-05, + "loss": 0.4881, + "step": 10265 + }, + { + "epoch": 0.28187808896210875, + "grad_norm": 0.39749979972839355, + "learning_rate": 1.9051687982364116e-05, + "loss": 0.4633, + "step": 10266 + }, + { + "epoch": 0.28190554640307525, + "grad_norm": 0.35572412610054016, + "learning_rate": 1.905150439664099e-05, + "loss": 0.499, + "step": 10267 + }, + { + "epoch": 0.28193300384404174, + "grad_norm": 0.3532596528530121, + "learning_rate": 1.905132079403392e-05, + "loss": 0.4846, + "step": 10268 + }, + { + "epoch": 0.28196046128500823, + "grad_norm": 0.40593889355659485, + "learning_rate": 1.9051137174543247e-05, + "loss": 0.5043, + "step": 10269 + }, + { + "epoch": 0.28198791872597473, + "grad_norm": 0.38289058208465576, + "learning_rate": 1.905095353816931e-05, + "loss": 0.5013, + "step": 10270 + }, + { + "epoch": 0.2820153761669412, + "grad_norm": 0.4139348864555359, + "learning_rate": 1.905076988491246e-05, + "loss": 0.5588, + "step": 10271 + }, + { + "epoch": 0.2820428336079077, + "grad_norm": 0.3624190390110016, + "learning_rate": 1.9050586214773036e-05, + "loss": 0.4814, + "step": 10272 + }, + { + "epoch": 0.28207029104887427, + "grad_norm": 0.419989675283432, + "learning_rate": 1.9050402527751378e-05, + "loss": 0.5568, + "step": 10273 + }, + { + "epoch": 0.28209774848984076, + "grad_norm": 0.43515145778656006, + "learning_rate": 1.905021882384783e-05, + "loss": 0.6187, + "step": 10274 + }, + { + "epoch": 0.28212520593080725, + "grad_norm": 0.37258821725845337, + "learning_rate": 1.9050035103062742e-05, + "loss": 0.4791, + "step": 10275 + }, + { + "epoch": 0.28215266337177375, + "grad_norm": 0.3481035530567169, + "learning_rate": 1.9049851365396445e-05, + "loss": 0.5248, + "step": 10276 + }, + { + "epoch": 0.28218012081274024, + "grad_norm": 0.33561205863952637, + "learning_rate": 1.9049667610849287e-05, + "loss": 0.4745, + "step": 10277 + }, + { + "epoch": 0.28220757825370674, + "grad_norm": 0.3939867317676544, + "learning_rate": 1.9049483839421612e-05, + "loss": 0.5011, + "step": 10278 + }, + { + "epoch": 0.28223503569467323, + "grad_norm": 0.3458270728588104, + "learning_rate": 1.904930005111376e-05, + "loss": 0.5115, + "step": 10279 + }, + { + "epoch": 0.2822624931356398, + "grad_norm": 0.3392016887664795, + "learning_rate": 1.9049116245926076e-05, + "loss": 0.4466, + "step": 10280 + }, + { + "epoch": 0.2822899505766063, + "grad_norm": 0.3804769814014435, + "learning_rate": 1.9048932423858903e-05, + "loss": 0.4978, + "step": 10281 + }, + { + "epoch": 0.28231740801757277, + "grad_norm": 0.3667837679386139, + "learning_rate": 1.904874858491258e-05, + "loss": 0.5212, + "step": 10282 + }, + { + "epoch": 0.28234486545853926, + "grad_norm": 0.3595873713493347, + "learning_rate": 1.9048564729087452e-05, + "loss": 0.523, + "step": 10283 + }, + { + "epoch": 0.28237232289950576, + "grad_norm": 0.4001471996307373, + "learning_rate": 1.9048380856383866e-05, + "loss": 0.5854, + "step": 10284 + }, + { + "epoch": 0.28239978034047225, + "grad_norm": 0.35025468468666077, + "learning_rate": 1.904819696680216e-05, + "loss": 0.5408, + "step": 10285 + }, + { + "epoch": 0.28242723778143874, + "grad_norm": 0.42405906319618225, + "learning_rate": 1.904801306034268e-05, + "loss": 0.4308, + "step": 10286 + }, + { + "epoch": 0.2824546952224053, + "grad_norm": 0.563441812992096, + "learning_rate": 1.904782913700577e-05, + "loss": 0.4373, + "step": 10287 + }, + { + "epoch": 0.2824821526633718, + "grad_norm": 0.34220945835113525, + "learning_rate": 1.9047645196791768e-05, + "loss": 0.5138, + "step": 10288 + }, + { + "epoch": 0.2825096101043383, + "grad_norm": 0.416213721036911, + "learning_rate": 1.904746123970102e-05, + "loss": 0.5523, + "step": 10289 + }, + { + "epoch": 0.2825370675453048, + "grad_norm": 0.3685774505138397, + "learning_rate": 1.9047277265733873e-05, + "loss": 0.5898, + "step": 10290 + }, + { + "epoch": 0.28256452498627127, + "grad_norm": 0.36235812306404114, + "learning_rate": 1.9047093274890662e-05, + "loss": 0.5627, + "step": 10291 + }, + { + "epoch": 0.28259198242723776, + "grad_norm": 0.3979489207267761, + "learning_rate": 1.9046909267171735e-05, + "loss": 0.5024, + "step": 10292 + }, + { + "epoch": 0.28261943986820426, + "grad_norm": 0.3816218078136444, + "learning_rate": 1.9046725242577436e-05, + "loss": 0.482, + "step": 10293 + }, + { + "epoch": 0.2826468973091708, + "grad_norm": 0.3656329810619354, + "learning_rate": 1.9046541201108105e-05, + "loss": 0.4833, + "step": 10294 + }, + { + "epoch": 0.2826743547501373, + "grad_norm": 0.36685308814048767, + "learning_rate": 1.904635714276409e-05, + "loss": 0.5568, + "step": 10295 + }, + { + "epoch": 0.2827018121911038, + "grad_norm": 0.3623228967189789, + "learning_rate": 1.904617306754573e-05, + "loss": 0.4581, + "step": 10296 + }, + { + "epoch": 0.2827292696320703, + "grad_norm": 0.377581387758255, + "learning_rate": 1.904598897545337e-05, + "loss": 0.5795, + "step": 10297 + }, + { + "epoch": 0.2827567270730368, + "grad_norm": 0.39367231726646423, + "learning_rate": 1.9045804866487357e-05, + "loss": 0.4831, + "step": 10298 + }, + { + "epoch": 0.2827841845140033, + "grad_norm": 0.34587156772613525, + "learning_rate": 1.904562074064803e-05, + "loss": 0.4489, + "step": 10299 + }, + { + "epoch": 0.2828116419549698, + "grad_norm": 0.35807543992996216, + "learning_rate": 1.9045436597935727e-05, + "loss": 0.5196, + "step": 10300 + }, + { + "epoch": 0.2828390993959363, + "grad_norm": 0.35670167207717896, + "learning_rate": 1.9045252438350803e-05, + "loss": 0.5742, + "step": 10301 + }, + { + "epoch": 0.2828665568369028, + "grad_norm": 0.38472217321395874, + "learning_rate": 1.9045068261893596e-05, + "loss": 0.6194, + "step": 10302 + }, + { + "epoch": 0.2828940142778693, + "grad_norm": 0.35421690344810486, + "learning_rate": 1.9044884068564448e-05, + "loss": 0.5947, + "step": 10303 + }, + { + "epoch": 0.2829214717188358, + "grad_norm": 0.3976437747478485, + "learning_rate": 1.9044699858363705e-05, + "loss": 0.5429, + "step": 10304 + }, + { + "epoch": 0.2829489291598023, + "grad_norm": 0.40571755170822144, + "learning_rate": 1.9044515631291712e-05, + "loss": 0.5753, + "step": 10305 + }, + { + "epoch": 0.2829763866007688, + "grad_norm": 0.3495918810367584, + "learning_rate": 1.904433138734881e-05, + "loss": 0.4796, + "step": 10306 + }, + { + "epoch": 0.2830038440417353, + "grad_norm": 0.3505289852619171, + "learning_rate": 1.9044147126535343e-05, + "loss": 0.4182, + "step": 10307 + }, + { + "epoch": 0.28303130148270184, + "grad_norm": 0.3644457459449768, + "learning_rate": 1.904396284885165e-05, + "loss": 0.423, + "step": 10308 + }, + { + "epoch": 0.28305875892366833, + "grad_norm": 0.4123876690864563, + "learning_rate": 1.9043778554298088e-05, + "loss": 0.5481, + "step": 10309 + }, + { + "epoch": 0.2830862163646348, + "grad_norm": 0.3858984708786011, + "learning_rate": 1.9043594242874987e-05, + "loss": 0.4867, + "step": 10310 + }, + { + "epoch": 0.2831136738056013, + "grad_norm": 0.3421988785266876, + "learning_rate": 1.90434099145827e-05, + "loss": 0.5592, + "step": 10311 + }, + { + "epoch": 0.2831411312465678, + "grad_norm": 0.38252657651901245, + "learning_rate": 1.9043225569421567e-05, + "loss": 0.5392, + "step": 10312 + }, + { + "epoch": 0.2831685886875343, + "grad_norm": 0.4140589237213135, + "learning_rate": 1.9043041207391927e-05, + "loss": 0.5569, + "step": 10313 + }, + { + "epoch": 0.2831960461285008, + "grad_norm": 0.3463423550128937, + "learning_rate": 1.9042856828494134e-05, + "loss": 0.5894, + "step": 10314 + }, + { + "epoch": 0.28322350356946735, + "grad_norm": 0.32841095328330994, + "learning_rate": 1.9042672432728526e-05, + "loss": 0.5212, + "step": 10315 + }, + { + "epoch": 0.28325096101043384, + "grad_norm": 0.29680681228637695, + "learning_rate": 1.9042488020095447e-05, + "loss": 0.4559, + "step": 10316 + }, + { + "epoch": 0.28327841845140034, + "grad_norm": 0.3770354986190796, + "learning_rate": 1.904230359059524e-05, + "loss": 0.5308, + "step": 10317 + }, + { + "epoch": 0.28330587589236683, + "grad_norm": 0.46051642298698425, + "learning_rate": 1.9042119144228254e-05, + "loss": 0.5178, + "step": 10318 + }, + { + "epoch": 0.2833333333333333, + "grad_norm": 0.35680311918258667, + "learning_rate": 1.904193468099483e-05, + "loss": 0.5406, + "step": 10319 + }, + { + "epoch": 0.2833607907742998, + "grad_norm": 0.3562031090259552, + "learning_rate": 1.904175020089531e-05, + "loss": 0.469, + "step": 10320 + }, + { + "epoch": 0.2833882482152663, + "grad_norm": 0.4270978271961212, + "learning_rate": 1.904156570393004e-05, + "loss": 0.5421, + "step": 10321 + }, + { + "epoch": 0.28341570565623286, + "grad_norm": 0.4014187753200531, + "learning_rate": 1.9041381190099366e-05, + "loss": 0.4804, + "step": 10322 + }, + { + "epoch": 0.28344316309719936, + "grad_norm": 0.6620213985443115, + "learning_rate": 1.904119665940363e-05, + "loss": 0.4841, + "step": 10323 + }, + { + "epoch": 0.28347062053816585, + "grad_norm": 0.3710618317127228, + "learning_rate": 1.9041012111843175e-05, + "loss": 0.5531, + "step": 10324 + }, + { + "epoch": 0.28349807797913235, + "grad_norm": 0.3191380798816681, + "learning_rate": 1.904082754741835e-05, + "loss": 0.5167, + "step": 10325 + }, + { + "epoch": 0.28352553542009884, + "grad_norm": 0.3532584607601166, + "learning_rate": 1.904064296612949e-05, + "loss": 0.4982, + "step": 10326 + }, + { + "epoch": 0.28355299286106533, + "grad_norm": 0.4938264489173889, + "learning_rate": 1.904045836797695e-05, + "loss": 0.5024, + "step": 10327 + }, + { + "epoch": 0.28358045030203183, + "grad_norm": 0.3586426377296448, + "learning_rate": 1.904027375296107e-05, + "loss": 0.5873, + "step": 10328 + }, + { + "epoch": 0.2836079077429984, + "grad_norm": 0.3490108847618103, + "learning_rate": 1.9040089121082195e-05, + "loss": 0.5111, + "step": 10329 + }, + { + "epoch": 0.28363536518396487, + "grad_norm": 0.3585154712200165, + "learning_rate": 1.9039904472340667e-05, + "loss": 0.4822, + "step": 10330 + }, + { + "epoch": 0.28366282262493137, + "grad_norm": 0.32075005769729614, + "learning_rate": 1.903971980673683e-05, + "loss": 0.465, + "step": 10331 + }, + { + "epoch": 0.28369028006589786, + "grad_norm": 0.3793329894542694, + "learning_rate": 1.903953512427103e-05, + "loss": 0.5536, + "step": 10332 + }, + { + "epoch": 0.28371773750686435, + "grad_norm": 0.34835758805274963, + "learning_rate": 1.903935042494362e-05, + "loss": 0.4981, + "step": 10333 + }, + { + "epoch": 0.28374519494783085, + "grad_norm": 0.37638190388679504, + "learning_rate": 1.9039165708754927e-05, + "loss": 0.4462, + "step": 10334 + }, + { + "epoch": 0.28377265238879734, + "grad_norm": 0.3612581789493561, + "learning_rate": 1.903898097570531e-05, + "loss": 0.5221, + "step": 10335 + }, + { + "epoch": 0.2838001098297639, + "grad_norm": 0.3648439943790436, + "learning_rate": 1.9038796225795108e-05, + "loss": 0.5755, + "step": 10336 + }, + { + "epoch": 0.2838275672707304, + "grad_norm": 0.4192727208137512, + "learning_rate": 1.903861145902466e-05, + "loss": 0.5768, + "step": 10337 + }, + { + "epoch": 0.2838550247116969, + "grad_norm": 0.35826629400253296, + "learning_rate": 1.9038426675394326e-05, + "loss": 0.5561, + "step": 10338 + }, + { + "epoch": 0.2838824821526634, + "grad_norm": 0.38933679461479187, + "learning_rate": 1.9038241874904437e-05, + "loss": 0.5025, + "step": 10339 + }, + { + "epoch": 0.28390993959362987, + "grad_norm": 0.41523459553718567, + "learning_rate": 1.9038057057555342e-05, + "loss": 0.5513, + "step": 10340 + }, + { + "epoch": 0.28393739703459636, + "grad_norm": 0.40151193737983704, + "learning_rate": 1.9037872223347385e-05, + "loss": 0.5289, + "step": 10341 + }, + { + "epoch": 0.28396485447556286, + "grad_norm": 0.41636037826538086, + "learning_rate": 1.9037687372280914e-05, + "loss": 0.592, + "step": 10342 + }, + { + "epoch": 0.2839923119165294, + "grad_norm": 0.388034462928772, + "learning_rate": 1.9037502504356272e-05, + "loss": 0.4716, + "step": 10343 + }, + { + "epoch": 0.2840197693574959, + "grad_norm": 0.4041734039783478, + "learning_rate": 1.9037317619573804e-05, + "loss": 0.5712, + "step": 10344 + }, + { + "epoch": 0.2840472267984624, + "grad_norm": 0.3613959550857544, + "learning_rate": 1.9037132717933852e-05, + "loss": 0.4339, + "step": 10345 + }, + { + "epoch": 0.2840746842394289, + "grad_norm": 0.3556935489177704, + "learning_rate": 1.9036947799436766e-05, + "loss": 0.5083, + "step": 10346 + }, + { + "epoch": 0.2841021416803954, + "grad_norm": 0.33111143112182617, + "learning_rate": 1.9036762864082883e-05, + "loss": 0.5632, + "step": 10347 + }, + { + "epoch": 0.2841295991213619, + "grad_norm": 0.3536824882030487, + "learning_rate": 1.9036577911872557e-05, + "loss": 0.4581, + "step": 10348 + }, + { + "epoch": 0.28415705656232837, + "grad_norm": 0.445364773273468, + "learning_rate": 1.903639294280613e-05, + "loss": 0.5658, + "step": 10349 + }, + { + "epoch": 0.2841845140032949, + "grad_norm": 0.4457208514213562, + "learning_rate": 1.9036207956883944e-05, + "loss": 0.4817, + "step": 10350 + }, + { + "epoch": 0.2842119714442614, + "grad_norm": 0.3674617111682892, + "learning_rate": 1.9036022954106347e-05, + "loss": 0.4991, + "step": 10351 + }, + { + "epoch": 0.2842394288852279, + "grad_norm": 0.3404936194419861, + "learning_rate": 1.9035837934473683e-05, + "loss": 0.4797, + "step": 10352 + }, + { + "epoch": 0.2842668863261944, + "grad_norm": 0.34075498580932617, + "learning_rate": 1.9035652897986296e-05, + "loss": 0.5655, + "step": 10353 + }, + { + "epoch": 0.2842943437671609, + "grad_norm": 0.36926889419555664, + "learning_rate": 1.9035467844644532e-05, + "loss": 0.5504, + "step": 10354 + }, + { + "epoch": 0.2843218012081274, + "grad_norm": 0.403548002243042, + "learning_rate": 1.903528277444874e-05, + "loss": 0.4535, + "step": 10355 + }, + { + "epoch": 0.2843492586490939, + "grad_norm": 0.35838234424591064, + "learning_rate": 1.903509768739926e-05, + "loss": 0.4789, + "step": 10356 + }, + { + "epoch": 0.28437671609006043, + "grad_norm": 0.41408780217170715, + "learning_rate": 1.903491258349644e-05, + "loss": 0.5157, + "step": 10357 + }, + { + "epoch": 0.2844041735310269, + "grad_norm": 0.3383234739303589, + "learning_rate": 1.903472746274062e-05, + "loss": 0.4915, + "step": 10358 + }, + { + "epoch": 0.2844316309719934, + "grad_norm": 0.3544083833694458, + "learning_rate": 1.9034542325132154e-05, + "loss": 0.3826, + "step": 10359 + }, + { + "epoch": 0.2844590884129599, + "grad_norm": 0.38491493463516235, + "learning_rate": 1.9034357170671383e-05, + "loss": 0.488, + "step": 10360 + }, + { + "epoch": 0.2844865458539264, + "grad_norm": 0.3857385516166687, + "learning_rate": 1.9034171999358655e-05, + "loss": 0.4575, + "step": 10361 + }, + { + "epoch": 0.2845140032948929, + "grad_norm": 0.4501913785934448, + "learning_rate": 1.903398681119431e-05, + "loss": 0.5972, + "step": 10362 + }, + { + "epoch": 0.2845414607358594, + "grad_norm": 0.4114726781845093, + "learning_rate": 1.903380160617869e-05, + "loss": 0.589, + "step": 10363 + }, + { + "epoch": 0.28456891817682595, + "grad_norm": 0.39547839760780334, + "learning_rate": 1.9033616384312157e-05, + "loss": 0.5664, + "step": 10364 + }, + { + "epoch": 0.28459637561779244, + "grad_norm": 0.44773146510124207, + "learning_rate": 1.903343114559504e-05, + "loss": 0.519, + "step": 10365 + }, + { + "epoch": 0.28462383305875893, + "grad_norm": 0.3704400360584259, + "learning_rate": 1.9033245890027695e-05, + "loss": 0.5283, + "step": 10366 + }, + { + "epoch": 0.28465129049972543, + "grad_norm": 0.3868197202682495, + "learning_rate": 1.9033060617610458e-05, + "loss": 0.5418, + "step": 10367 + }, + { + "epoch": 0.2846787479406919, + "grad_norm": 0.36931097507476807, + "learning_rate": 1.9032875328343686e-05, + "loss": 0.5132, + "step": 10368 + }, + { + "epoch": 0.2847062053816584, + "grad_norm": 0.37181660532951355, + "learning_rate": 1.9032690022227714e-05, + "loss": 0.5227, + "step": 10369 + }, + { + "epoch": 0.2847336628226249, + "grad_norm": 0.32040759921073914, + "learning_rate": 1.9032504699262895e-05, + "loss": 0.5053, + "step": 10370 + }, + { + "epoch": 0.28476112026359146, + "grad_norm": 0.3438103199005127, + "learning_rate": 1.903231935944957e-05, + "loss": 0.4426, + "step": 10371 + }, + { + "epoch": 0.28478857770455795, + "grad_norm": 0.355803906917572, + "learning_rate": 1.903213400278809e-05, + "loss": 0.5556, + "step": 10372 + }, + { + "epoch": 0.28481603514552445, + "grad_norm": 0.3536284267902374, + "learning_rate": 1.9031948629278792e-05, + "loss": 0.4523, + "step": 10373 + }, + { + "epoch": 0.28484349258649094, + "grad_norm": 0.42222172021865845, + "learning_rate": 1.9031763238922033e-05, + "loss": 0.512, + "step": 10374 + }, + { + "epoch": 0.28487095002745744, + "grad_norm": 0.35044607520103455, + "learning_rate": 1.9031577831718147e-05, + "loss": 0.5002, + "step": 10375 + }, + { + "epoch": 0.28489840746842393, + "grad_norm": 0.37926939129829407, + "learning_rate": 1.9031392407667488e-05, + "loss": 0.4952, + "step": 10376 + }, + { + "epoch": 0.2849258649093904, + "grad_norm": 0.384799987077713, + "learning_rate": 1.90312069667704e-05, + "loss": 0.491, + "step": 10377 + }, + { + "epoch": 0.2849533223503569, + "grad_norm": 0.34999439120292664, + "learning_rate": 1.903102150902723e-05, + "loss": 0.5001, + "step": 10378 + }, + { + "epoch": 0.28498077979132347, + "grad_norm": 0.4538396894931793, + "learning_rate": 1.903083603443832e-05, + "loss": 0.5454, + "step": 10379 + }, + { + "epoch": 0.28500823723228996, + "grad_norm": 0.35642197728157043, + "learning_rate": 1.903065054300402e-05, + "loss": 0.5763, + "step": 10380 + }, + { + "epoch": 0.28503569467325646, + "grad_norm": 0.3210013806819916, + "learning_rate": 1.9030465034724676e-05, + "loss": 0.4529, + "step": 10381 + }, + { + "epoch": 0.28506315211422295, + "grad_norm": 0.3584228456020355, + "learning_rate": 1.903027950960063e-05, + "loss": 0.6149, + "step": 10382 + }, + { + "epoch": 0.28509060955518944, + "grad_norm": 0.3775506913661957, + "learning_rate": 1.903009396763223e-05, + "loss": 0.5353, + "step": 10383 + }, + { + "epoch": 0.28511806699615594, + "grad_norm": 0.34134209156036377, + "learning_rate": 1.9029908408819826e-05, + "loss": 0.5348, + "step": 10384 + }, + { + "epoch": 0.28514552443712243, + "grad_norm": 0.3560941219329834, + "learning_rate": 1.902972283316376e-05, + "loss": 0.563, + "step": 10385 + }, + { + "epoch": 0.285172981878089, + "grad_norm": 0.8777788281440735, + "learning_rate": 1.9029537240664377e-05, + "loss": 0.5038, + "step": 10386 + }, + { + "epoch": 0.2852004393190555, + "grad_norm": 0.3336629569530487, + "learning_rate": 1.9029351631322025e-05, + "loss": 0.451, + "step": 10387 + }, + { + "epoch": 0.28522789676002197, + "grad_norm": 0.3367331027984619, + "learning_rate": 1.9029166005137053e-05, + "loss": 0.4986, + "step": 10388 + }, + { + "epoch": 0.28525535420098846, + "grad_norm": 0.36165305972099304, + "learning_rate": 1.9028980362109803e-05, + "loss": 0.5106, + "step": 10389 + }, + { + "epoch": 0.28528281164195496, + "grad_norm": 0.3331947326660156, + "learning_rate": 1.9028794702240624e-05, + "loss": 0.5285, + "step": 10390 + }, + { + "epoch": 0.28531026908292145, + "grad_norm": 0.4914761781692505, + "learning_rate": 1.902860902552986e-05, + "loss": 0.5716, + "step": 10391 + }, + { + "epoch": 0.28533772652388795, + "grad_norm": 0.4121670126914978, + "learning_rate": 1.9028423331977855e-05, + "loss": 0.6018, + "step": 10392 + }, + { + "epoch": 0.2853651839648545, + "grad_norm": 0.362193763256073, + "learning_rate": 1.9028237621584967e-05, + "loss": 0.4432, + "step": 10393 + }, + { + "epoch": 0.285392641405821, + "grad_norm": 0.3541114032268524, + "learning_rate": 1.902805189435153e-05, + "loss": 0.5461, + "step": 10394 + }, + { + "epoch": 0.2854200988467875, + "grad_norm": 0.35656654834747314, + "learning_rate": 1.9027866150277894e-05, + "loss": 0.6156, + "step": 10395 + }, + { + "epoch": 0.285447556287754, + "grad_norm": 0.38039037585258484, + "learning_rate": 1.9027680389364406e-05, + "loss": 0.4478, + "step": 10396 + }, + { + "epoch": 0.2854750137287205, + "grad_norm": 0.35162752866744995, + "learning_rate": 1.9027494611611416e-05, + "loss": 0.4809, + "step": 10397 + }, + { + "epoch": 0.28550247116968697, + "grad_norm": 0.4025368392467499, + "learning_rate": 1.902730881701926e-05, + "loss": 0.6054, + "step": 10398 + }, + { + "epoch": 0.28552992861065346, + "grad_norm": 0.3677777051925659, + "learning_rate": 1.9027123005588302e-05, + "loss": 0.5127, + "step": 10399 + }, + { + "epoch": 0.28555738605162, + "grad_norm": 0.35755929350852966, + "learning_rate": 1.902693717731887e-05, + "loss": 0.5056, + "step": 10400 + }, + { + "epoch": 0.2855848434925865, + "grad_norm": 0.42068102955818176, + "learning_rate": 1.9026751332211324e-05, + "loss": 0.5748, + "step": 10401 + }, + { + "epoch": 0.285612300933553, + "grad_norm": 0.40213748812675476, + "learning_rate": 1.9026565470266007e-05, + "loss": 0.5408, + "step": 10402 + }, + { + "epoch": 0.2856397583745195, + "grad_norm": 0.3552500605583191, + "learning_rate": 1.902637959148326e-05, + "loss": 0.5415, + "step": 10403 + }, + { + "epoch": 0.285667215815486, + "grad_norm": 0.3328021168708801, + "learning_rate": 1.9026193695863436e-05, + "loss": 0.5291, + "step": 10404 + }, + { + "epoch": 0.2856946732564525, + "grad_norm": 0.3589495122432709, + "learning_rate": 1.902600778340688e-05, + "loss": 0.4675, + "step": 10405 + }, + { + "epoch": 0.285722130697419, + "grad_norm": 0.36390942335128784, + "learning_rate": 1.9025821854113937e-05, + "loss": 0.4683, + "step": 10406 + }, + { + "epoch": 0.2857495881383855, + "grad_norm": 0.37147483229637146, + "learning_rate": 1.902563590798496e-05, + "loss": 0.5254, + "step": 10407 + }, + { + "epoch": 0.285777045579352, + "grad_norm": 0.361863911151886, + "learning_rate": 1.9025449945020287e-05, + "loss": 0.4741, + "step": 10408 + }, + { + "epoch": 0.2858045030203185, + "grad_norm": 0.3356306552886963, + "learning_rate": 1.9025263965220272e-05, + "loss": 0.55, + "step": 10409 + }, + { + "epoch": 0.285831960461285, + "grad_norm": 0.3445087671279907, + "learning_rate": 1.9025077968585257e-05, + "loss": 0.4535, + "step": 10410 + }, + { + "epoch": 0.2858594179022515, + "grad_norm": 0.3716329038143158, + "learning_rate": 1.902489195511559e-05, + "loss": 0.5666, + "step": 10411 + }, + { + "epoch": 0.285886875343218, + "grad_norm": 0.39139917492866516, + "learning_rate": 1.9024705924811624e-05, + "loss": 0.5451, + "step": 10412 + }, + { + "epoch": 0.2859143327841845, + "grad_norm": 0.4575875401496887, + "learning_rate": 1.90245198776737e-05, + "loss": 0.6697, + "step": 10413 + }, + { + "epoch": 0.28594179022515104, + "grad_norm": 0.39047345519065857, + "learning_rate": 1.9024333813702164e-05, + "loss": 0.5396, + "step": 10414 + }, + { + "epoch": 0.28596924766611753, + "grad_norm": 0.3881571590900421, + "learning_rate": 1.9024147732897363e-05, + "loss": 0.5305, + "step": 10415 + }, + { + "epoch": 0.285996705107084, + "grad_norm": 0.3369690477848053, + "learning_rate": 1.902396163525965e-05, + "loss": 0.5106, + "step": 10416 + }, + { + "epoch": 0.2860241625480505, + "grad_norm": 0.46756622195243835, + "learning_rate": 1.902377552078937e-05, + "loss": 0.6251, + "step": 10417 + }, + { + "epoch": 0.286051619989017, + "grad_norm": 0.4553997218608856, + "learning_rate": 1.9023589389486866e-05, + "loss": 0.527, + "step": 10418 + }, + { + "epoch": 0.2860790774299835, + "grad_norm": 0.3492216467857361, + "learning_rate": 1.9023403241352487e-05, + "loss": 0.5554, + "step": 10419 + }, + { + "epoch": 0.28610653487095, + "grad_norm": 0.3446822166442871, + "learning_rate": 1.902321707638658e-05, + "loss": 0.5038, + "step": 10420 + }, + { + "epoch": 0.28613399231191655, + "grad_norm": 0.3892175257205963, + "learning_rate": 1.9023030894589496e-05, + "loss": 0.5155, + "step": 10421 + }, + { + "epoch": 0.28616144975288305, + "grad_norm": 0.34333235025405884, + "learning_rate": 1.902284469596158e-05, + "loss": 0.4969, + "step": 10422 + }, + { + "epoch": 0.28618890719384954, + "grad_norm": 0.37184762954711914, + "learning_rate": 1.9022658480503175e-05, + "loss": 0.5483, + "step": 10423 + }, + { + "epoch": 0.28621636463481603, + "grad_norm": 0.36515378952026367, + "learning_rate": 1.9022472248214633e-05, + "loss": 0.5681, + "step": 10424 + }, + { + "epoch": 0.28624382207578253, + "grad_norm": 0.357617050409317, + "learning_rate": 1.9022285999096305e-05, + "loss": 0.5211, + "step": 10425 + }, + { + "epoch": 0.286271279516749, + "grad_norm": 0.3632064759731293, + "learning_rate": 1.902209973314853e-05, + "loss": 0.5389, + "step": 10426 + }, + { + "epoch": 0.2862987369577155, + "grad_norm": 0.33186075091362, + "learning_rate": 1.9021913450371664e-05, + "loss": 0.5183, + "step": 10427 + }, + { + "epoch": 0.28632619439868207, + "grad_norm": 0.3152519166469574, + "learning_rate": 1.9021727150766046e-05, + "loss": 0.5506, + "step": 10428 + }, + { + "epoch": 0.28635365183964856, + "grad_norm": 0.35798463225364685, + "learning_rate": 1.9021540834332027e-05, + "loss": 0.4908, + "step": 10429 + }, + { + "epoch": 0.28638110928061505, + "grad_norm": 0.38702595233917236, + "learning_rate": 1.9021354501069958e-05, + "loss": 0.4768, + "step": 10430 + }, + { + "epoch": 0.28640856672158155, + "grad_norm": 0.43462061882019043, + "learning_rate": 1.902116815098018e-05, + "loss": 0.5861, + "step": 10431 + }, + { + "epoch": 0.28643602416254804, + "grad_norm": 0.32983705401420593, + "learning_rate": 1.9020981784063045e-05, + "loss": 0.5116, + "step": 10432 + }, + { + "epoch": 0.28646348160351454, + "grad_norm": 0.3761734068393707, + "learning_rate": 1.9020795400318903e-05, + "loss": 0.6156, + "step": 10433 + }, + { + "epoch": 0.28649093904448103, + "grad_norm": 0.3590378761291504, + "learning_rate": 1.9020608999748096e-05, + "loss": 0.5087, + "step": 10434 + }, + { + "epoch": 0.2865183964854476, + "grad_norm": 0.42511725425720215, + "learning_rate": 1.9020422582350976e-05, + "loss": 0.4726, + "step": 10435 + }, + { + "epoch": 0.2865458539264141, + "grad_norm": 0.33881792426109314, + "learning_rate": 1.9020236148127885e-05, + "loss": 0.4451, + "step": 10436 + }, + { + "epoch": 0.28657331136738057, + "grad_norm": 0.3795412480831146, + "learning_rate": 1.902004969707918e-05, + "loss": 0.5303, + "step": 10437 + }, + { + "epoch": 0.28660076880834706, + "grad_norm": 0.3286045789718628, + "learning_rate": 1.9019863229205197e-05, + "loss": 0.4895, + "step": 10438 + }, + { + "epoch": 0.28662822624931356, + "grad_norm": 0.3766331374645233, + "learning_rate": 1.90196767445063e-05, + "loss": 0.6271, + "step": 10439 + }, + { + "epoch": 0.28665568369028005, + "grad_norm": 0.36473169922828674, + "learning_rate": 1.9019490242982818e-05, + "loss": 0.5477, + "step": 10440 + }, + { + "epoch": 0.28668314113124654, + "grad_norm": 0.5746028423309326, + "learning_rate": 1.901930372463511e-05, + "loss": 0.6216, + "step": 10441 + }, + { + "epoch": 0.2867105985722131, + "grad_norm": 0.3696066439151764, + "learning_rate": 1.9019117189463522e-05, + "loss": 0.5485, + "step": 10442 + }, + { + "epoch": 0.2867380560131796, + "grad_norm": 0.35725805163383484, + "learning_rate": 1.90189306374684e-05, + "loss": 0.5893, + "step": 10443 + }, + { + "epoch": 0.2867655134541461, + "grad_norm": 0.33947044610977173, + "learning_rate": 1.90187440686501e-05, + "loss": 0.5341, + "step": 10444 + }, + { + "epoch": 0.2867929708951126, + "grad_norm": 0.34681540727615356, + "learning_rate": 1.9018557483008958e-05, + "loss": 0.5369, + "step": 10445 + }, + { + "epoch": 0.28682042833607907, + "grad_norm": 0.34683138132095337, + "learning_rate": 1.901837088054533e-05, + "loss": 0.513, + "step": 10446 + }, + { + "epoch": 0.28684788577704556, + "grad_norm": 0.3450472056865692, + "learning_rate": 1.9018184261259564e-05, + "loss": 0.5602, + "step": 10447 + }, + { + "epoch": 0.28687534321801206, + "grad_norm": 0.3499576151371002, + "learning_rate": 1.9017997625152004e-05, + "loss": 0.5332, + "step": 10448 + }, + { + "epoch": 0.2869028006589786, + "grad_norm": 0.3635813295841217, + "learning_rate": 1.9017810972223004e-05, + "loss": 0.5448, + "step": 10449 + }, + { + "epoch": 0.2869302580999451, + "grad_norm": 0.3411414325237274, + "learning_rate": 1.9017624302472902e-05, + "loss": 0.5107, + "step": 10450 + }, + { + "epoch": 0.2869577155409116, + "grad_norm": 0.362878680229187, + "learning_rate": 1.9017437615902057e-05, + "loss": 0.5618, + "step": 10451 + }, + { + "epoch": 0.2869851729818781, + "grad_norm": 0.6208773255348206, + "learning_rate": 1.901725091251081e-05, + "loss": 0.4807, + "step": 10452 + }, + { + "epoch": 0.2870126304228446, + "grad_norm": 0.33850955963134766, + "learning_rate": 1.9017064192299514e-05, + "loss": 0.5163, + "step": 10453 + }, + { + "epoch": 0.2870400878638111, + "grad_norm": 0.34463855624198914, + "learning_rate": 1.9016877455268518e-05, + "loss": 0.4788, + "step": 10454 + }, + { + "epoch": 0.28706754530477757, + "grad_norm": 0.3369885981082916, + "learning_rate": 1.901669070141816e-05, + "loss": 0.5661, + "step": 10455 + }, + { + "epoch": 0.2870950027457441, + "grad_norm": 0.39134877920150757, + "learning_rate": 1.9016503930748803e-05, + "loss": 0.5798, + "step": 10456 + }, + { + "epoch": 0.2871224601867106, + "grad_norm": 0.3710697293281555, + "learning_rate": 1.9016317143260786e-05, + "loss": 0.4912, + "step": 10457 + }, + { + "epoch": 0.2871499176276771, + "grad_norm": 0.3803185820579529, + "learning_rate": 1.901613033895446e-05, + "loss": 0.5342, + "step": 10458 + }, + { + "epoch": 0.2871773750686436, + "grad_norm": 0.34467628598213196, + "learning_rate": 1.9015943517830173e-05, + "loss": 0.5311, + "step": 10459 + }, + { + "epoch": 0.2872048325096101, + "grad_norm": 0.4370225667953491, + "learning_rate": 1.9015756679888273e-05, + "loss": 0.5603, + "step": 10460 + }, + { + "epoch": 0.2872322899505766, + "grad_norm": 0.3366222381591797, + "learning_rate": 1.9015569825129112e-05, + "loss": 0.4821, + "step": 10461 + }, + { + "epoch": 0.2872597473915431, + "grad_norm": 0.3841160833835602, + "learning_rate": 1.9015382953553034e-05, + "loss": 0.5617, + "step": 10462 + }, + { + "epoch": 0.28728720483250964, + "grad_norm": 0.4222491383552551, + "learning_rate": 1.9015196065160387e-05, + "loss": 0.507, + "step": 10463 + }, + { + "epoch": 0.28731466227347613, + "grad_norm": 0.3281455934047699, + "learning_rate": 1.9015009159951528e-05, + "loss": 0.4563, + "step": 10464 + }, + { + "epoch": 0.2873421197144426, + "grad_norm": 0.37589216232299805, + "learning_rate": 1.9014822237926794e-05, + "loss": 0.5162, + "step": 10465 + }, + { + "epoch": 0.2873695771554091, + "grad_norm": 0.3575749099254608, + "learning_rate": 1.9014635299086542e-05, + "loss": 0.5889, + "step": 10466 + }, + { + "epoch": 0.2873970345963756, + "grad_norm": 0.3571198284626007, + "learning_rate": 1.9014448343431118e-05, + "loss": 0.5163, + "step": 10467 + }, + { + "epoch": 0.2874244920373421, + "grad_norm": 0.6338431239128113, + "learning_rate": 1.901426137096087e-05, + "loss": 0.5042, + "step": 10468 + }, + { + "epoch": 0.2874519494783086, + "grad_norm": 0.37023860216140747, + "learning_rate": 1.901407438167615e-05, + "loss": 0.5356, + "step": 10469 + }, + { + "epoch": 0.28747940691927515, + "grad_norm": 0.32283490896224976, + "learning_rate": 1.90138873755773e-05, + "loss": 0.5144, + "step": 10470 + }, + { + "epoch": 0.28750686436024164, + "grad_norm": 0.3237452805042267, + "learning_rate": 1.9013700352664675e-05, + "loss": 0.4279, + "step": 10471 + }, + { + "epoch": 0.28753432180120814, + "grad_norm": 0.4035041332244873, + "learning_rate": 1.9013513312938622e-05, + "loss": 0.5385, + "step": 10472 + }, + { + "epoch": 0.28756177924217463, + "grad_norm": 0.37745827436447144, + "learning_rate": 1.9013326256399493e-05, + "loss": 0.5875, + "step": 10473 + }, + { + "epoch": 0.2875892366831411, + "grad_norm": 0.38245484232902527, + "learning_rate": 1.901313918304763e-05, + "loss": 0.5214, + "step": 10474 + }, + { + "epoch": 0.2876166941241076, + "grad_norm": 0.367196261882782, + "learning_rate": 1.9012952092883384e-05, + "loss": 0.5762, + "step": 10475 + }, + { + "epoch": 0.2876441515650741, + "grad_norm": 0.39956963062286377, + "learning_rate": 1.9012764985907113e-05, + "loss": 0.5052, + "step": 10476 + }, + { + "epoch": 0.28767160900604066, + "grad_norm": 0.42284533381462097, + "learning_rate": 1.901257786211915e-05, + "loss": 0.5383, + "step": 10477 + }, + { + "epoch": 0.28769906644700716, + "grad_norm": 0.36900678277015686, + "learning_rate": 1.901239072151986e-05, + "loss": 0.512, + "step": 10478 + }, + { + "epoch": 0.28772652388797365, + "grad_norm": 0.42578569054603577, + "learning_rate": 1.9012203564109583e-05, + "loss": 0.4248, + "step": 10479 + }, + { + "epoch": 0.28775398132894014, + "grad_norm": 0.325700581073761, + "learning_rate": 1.9012016389888667e-05, + "loss": 0.5181, + "step": 10480 + }, + { + "epoch": 0.28778143876990664, + "grad_norm": 0.35645052790641785, + "learning_rate": 1.9011829198857467e-05, + "loss": 0.4844, + "step": 10481 + }, + { + "epoch": 0.28780889621087313, + "grad_norm": 0.3549627661705017, + "learning_rate": 1.901164199101633e-05, + "loss": 0.5782, + "step": 10482 + }, + { + "epoch": 0.2878363536518396, + "grad_norm": 0.5051921010017395, + "learning_rate": 1.9011454766365602e-05, + "loss": 0.4939, + "step": 10483 + }, + { + "epoch": 0.2878638110928062, + "grad_norm": 0.37499353289604187, + "learning_rate": 1.9011267524905635e-05, + "loss": 0.5197, + "step": 10484 + }, + { + "epoch": 0.28789126853377267, + "grad_norm": 0.350375771522522, + "learning_rate": 1.901108026663678e-05, + "loss": 0.4597, + "step": 10485 + }, + { + "epoch": 0.28791872597473916, + "grad_norm": 0.383234441280365, + "learning_rate": 1.9010892991559383e-05, + "loss": 0.546, + "step": 10486 + }, + { + "epoch": 0.28794618341570566, + "grad_norm": 0.3967646360397339, + "learning_rate": 1.9010705699673793e-05, + "loss": 0.408, + "step": 10487 + }, + { + "epoch": 0.28797364085667215, + "grad_norm": 0.3999048173427582, + "learning_rate": 1.9010518390980362e-05, + "loss": 0.6573, + "step": 10488 + }, + { + "epoch": 0.28800109829763865, + "grad_norm": 0.3686874806880951, + "learning_rate": 1.901033106547944e-05, + "loss": 0.5857, + "step": 10489 + }, + { + "epoch": 0.28802855573860514, + "grad_norm": 0.36177563667297363, + "learning_rate": 1.9010143723171374e-05, + "loss": 0.5195, + "step": 10490 + }, + { + "epoch": 0.2880560131795717, + "grad_norm": 0.3890845477581024, + "learning_rate": 1.9009956364056515e-05, + "loss": 0.6105, + "step": 10491 + }, + { + "epoch": 0.2880834706205382, + "grad_norm": 0.3787180185317993, + "learning_rate": 1.900976898813521e-05, + "loss": 0.5284, + "step": 10492 + }, + { + "epoch": 0.2881109280615047, + "grad_norm": 0.4012282192707062, + "learning_rate": 1.900958159540781e-05, + "loss": 0.5966, + "step": 10493 + }, + { + "epoch": 0.2881383855024712, + "grad_norm": 0.3944941461086273, + "learning_rate": 1.9009394185874667e-05, + "loss": 0.5324, + "step": 10494 + }, + { + "epoch": 0.28816584294343767, + "grad_norm": 0.3732452392578125, + "learning_rate": 1.9009206759536125e-05, + "loss": 0.4759, + "step": 10495 + }, + { + "epoch": 0.28819330038440416, + "grad_norm": 0.38772061467170715, + "learning_rate": 1.900901931639254e-05, + "loss": 0.5844, + "step": 10496 + }, + { + "epoch": 0.28822075782537065, + "grad_norm": 0.4300769567489624, + "learning_rate": 1.9008831856444257e-05, + "loss": 0.5706, + "step": 10497 + }, + { + "epoch": 0.2882482152663372, + "grad_norm": 0.39787742495536804, + "learning_rate": 1.900864437969163e-05, + "loss": 0.5595, + "step": 10498 + }, + { + "epoch": 0.2882756727073037, + "grad_norm": 0.34762898087501526, + "learning_rate": 1.9008456886135006e-05, + "loss": 0.4767, + "step": 10499 + }, + { + "epoch": 0.2883031301482702, + "grad_norm": 0.37809187173843384, + "learning_rate": 1.900826937577473e-05, + "loss": 0.492, + "step": 10500 + }, + { + "epoch": 0.2883305875892367, + "grad_norm": 0.3552244305610657, + "learning_rate": 1.900808184861116e-05, + "loss": 0.4816, + "step": 10501 + }, + { + "epoch": 0.2883580450302032, + "grad_norm": 0.37785398960113525, + "learning_rate": 1.9007894304644644e-05, + "loss": 0.5483, + "step": 10502 + }, + { + "epoch": 0.2883855024711697, + "grad_norm": 0.3766702711582184, + "learning_rate": 1.9007706743875526e-05, + "loss": 0.5138, + "step": 10503 + }, + { + "epoch": 0.28841295991213617, + "grad_norm": 0.34409067034721375, + "learning_rate": 1.900751916630416e-05, + "loss": 0.512, + "step": 10504 + }, + { + "epoch": 0.2884404173531027, + "grad_norm": 0.38180670142173767, + "learning_rate": 1.90073315719309e-05, + "loss": 0.5173, + "step": 10505 + }, + { + "epoch": 0.2884678747940692, + "grad_norm": 0.3249037265777588, + "learning_rate": 1.9007143960756086e-05, + "loss": 0.4774, + "step": 10506 + }, + { + "epoch": 0.2884953322350357, + "grad_norm": 0.34776920080184937, + "learning_rate": 1.900695633278008e-05, + "loss": 0.4916, + "step": 10507 + }, + { + "epoch": 0.2885227896760022, + "grad_norm": 0.3667697310447693, + "learning_rate": 1.9006768688003224e-05, + "loss": 0.5879, + "step": 10508 + }, + { + "epoch": 0.2885502471169687, + "grad_norm": 0.37004387378692627, + "learning_rate": 1.9006581026425866e-05, + "loss": 0.5585, + "step": 10509 + }, + { + "epoch": 0.2885777045579352, + "grad_norm": 0.33895349502563477, + "learning_rate": 1.9006393348048365e-05, + "loss": 0.5357, + "step": 10510 + }, + { + "epoch": 0.2886051619989017, + "grad_norm": 0.3531481623649597, + "learning_rate": 1.9006205652871062e-05, + "loss": 0.4712, + "step": 10511 + }, + { + "epoch": 0.2886326194398682, + "grad_norm": 0.3935193121433258, + "learning_rate": 1.9006017940894312e-05, + "loss": 0.5155, + "step": 10512 + }, + { + "epoch": 0.2886600768808347, + "grad_norm": 0.45821988582611084, + "learning_rate": 1.9005830212118465e-05, + "loss": 0.5595, + "step": 10513 + }, + { + "epoch": 0.2886875343218012, + "grad_norm": 0.36502715945243835, + "learning_rate": 1.9005642466543868e-05, + "loss": 0.5625, + "step": 10514 + }, + { + "epoch": 0.2887149917627677, + "grad_norm": 0.37807443737983704, + "learning_rate": 1.9005454704170877e-05, + "loss": 0.5065, + "step": 10515 + }, + { + "epoch": 0.2887424492037342, + "grad_norm": 0.3204372823238373, + "learning_rate": 1.900526692499984e-05, + "loss": 0.4533, + "step": 10516 + }, + { + "epoch": 0.2887699066447007, + "grad_norm": 0.3946102261543274, + "learning_rate": 1.9005079129031102e-05, + "loss": 0.5679, + "step": 10517 + }, + { + "epoch": 0.2887973640856672, + "grad_norm": 0.3674132227897644, + "learning_rate": 1.9004891316265016e-05, + "loss": 0.5508, + "step": 10518 + }, + { + "epoch": 0.2888248215266337, + "grad_norm": 0.3421477675437927, + "learning_rate": 1.900470348670194e-05, + "loss": 0.451, + "step": 10519 + }, + { + "epoch": 0.28885227896760024, + "grad_norm": 0.3460724949836731, + "learning_rate": 1.9004515640342212e-05, + "loss": 0.4643, + "step": 10520 + }, + { + "epoch": 0.28887973640856673, + "grad_norm": 0.3642425537109375, + "learning_rate": 1.9004327777186195e-05, + "loss": 0.5223, + "step": 10521 + }, + { + "epoch": 0.28890719384953323, + "grad_norm": 0.5207915306091309, + "learning_rate": 1.9004139897234225e-05, + "loss": 0.4952, + "step": 10522 + }, + { + "epoch": 0.2889346512904997, + "grad_norm": 1.336111068725586, + "learning_rate": 1.9003952000486666e-05, + "loss": 0.5729, + "step": 10523 + }, + { + "epoch": 0.2889621087314662, + "grad_norm": 0.3569989502429962, + "learning_rate": 1.900376408694386e-05, + "loss": 0.625, + "step": 10524 + }, + { + "epoch": 0.2889895661724327, + "grad_norm": 0.37392285466194153, + "learning_rate": 1.900357615660616e-05, + "loss": 0.4312, + "step": 10525 + }, + { + "epoch": 0.2890170236133992, + "grad_norm": 0.48103228211402893, + "learning_rate": 1.900338820947392e-05, + "loss": 0.5405, + "step": 10526 + }, + { + "epoch": 0.28904448105436575, + "grad_norm": 0.35615968704223633, + "learning_rate": 1.9003200245547484e-05, + "loss": 0.5112, + "step": 10527 + }, + { + "epoch": 0.28907193849533225, + "grad_norm": 0.3727186918258667, + "learning_rate": 1.9003012264827206e-05, + "loss": 0.5191, + "step": 10528 + }, + { + "epoch": 0.28909939593629874, + "grad_norm": 0.31604960560798645, + "learning_rate": 1.900282426731344e-05, + "loss": 0.3951, + "step": 10529 + }, + { + "epoch": 0.28912685337726524, + "grad_norm": 0.3875981867313385, + "learning_rate": 1.900263625300653e-05, + "loss": 0.5023, + "step": 10530 + }, + { + "epoch": 0.28915431081823173, + "grad_norm": 0.41025760769844055, + "learning_rate": 1.9002448221906834e-05, + "loss": 0.554, + "step": 10531 + }, + { + "epoch": 0.2891817682591982, + "grad_norm": 0.3608320653438568, + "learning_rate": 1.9002260174014696e-05, + "loss": 0.5487, + "step": 10532 + }, + { + "epoch": 0.2892092257001647, + "grad_norm": 0.3547680079936981, + "learning_rate": 1.9002072109330468e-05, + "loss": 0.5077, + "step": 10533 + }, + { + "epoch": 0.28923668314113127, + "grad_norm": 0.39259544014930725, + "learning_rate": 1.9001884027854504e-05, + "loss": 0.5284, + "step": 10534 + }, + { + "epoch": 0.28926414058209776, + "grad_norm": 0.34032854437828064, + "learning_rate": 1.9001695929587158e-05, + "loss": 0.5149, + "step": 10535 + }, + { + "epoch": 0.28929159802306426, + "grad_norm": 0.420770525932312, + "learning_rate": 1.900150781452877e-05, + "loss": 0.5844, + "step": 10536 + }, + { + "epoch": 0.28931905546403075, + "grad_norm": 0.44021326303482056, + "learning_rate": 1.9001319682679698e-05, + "loss": 0.5513, + "step": 10537 + }, + { + "epoch": 0.28934651290499724, + "grad_norm": 0.4814611077308655, + "learning_rate": 1.9001131534040292e-05, + "loss": 0.5505, + "step": 10538 + }, + { + "epoch": 0.28937397034596374, + "grad_norm": 0.3775991201400757, + "learning_rate": 1.9000943368610904e-05, + "loss": 0.5364, + "step": 10539 + }, + { + "epoch": 0.28940142778693023, + "grad_norm": 0.37884852290153503, + "learning_rate": 1.9000755186391883e-05, + "loss": 0.4734, + "step": 10540 + }, + { + "epoch": 0.2894288852278968, + "grad_norm": 0.39455530047416687, + "learning_rate": 1.9000566987383582e-05, + "loss": 0.5162, + "step": 10541 + }, + { + "epoch": 0.2894563426688633, + "grad_norm": 0.39992010593414307, + "learning_rate": 1.900037877158635e-05, + "loss": 0.5079, + "step": 10542 + }, + { + "epoch": 0.28948380010982977, + "grad_norm": 0.376645028591156, + "learning_rate": 1.9000190539000536e-05, + "loss": 0.6127, + "step": 10543 + }, + { + "epoch": 0.28951125755079626, + "grad_norm": 0.36446720361709595, + "learning_rate": 1.9000002289626497e-05, + "loss": 0.5725, + "step": 10544 + }, + { + "epoch": 0.28953871499176276, + "grad_norm": 0.33712437748908997, + "learning_rate": 1.8999814023464584e-05, + "loss": 0.5047, + "step": 10545 + }, + { + "epoch": 0.28956617243272925, + "grad_norm": 0.41511058807373047, + "learning_rate": 1.899962574051514e-05, + "loss": 0.5084, + "step": 10546 + }, + { + "epoch": 0.28959362987369575, + "grad_norm": 0.3432982563972473, + "learning_rate": 1.899943744077853e-05, + "loss": 0.4455, + "step": 10547 + }, + { + "epoch": 0.2896210873146623, + "grad_norm": 0.424612432718277, + "learning_rate": 1.8999249124255088e-05, + "loss": 0.6561, + "step": 10548 + }, + { + "epoch": 0.2896485447556288, + "grad_norm": 0.39852529764175415, + "learning_rate": 1.899906079094518e-05, + "loss": 0.47, + "step": 10549 + }, + { + "epoch": 0.2896760021965953, + "grad_norm": 0.38231784105300903, + "learning_rate": 1.8998872440849148e-05, + "loss": 0.5367, + "step": 10550 + }, + { + "epoch": 0.2897034596375618, + "grad_norm": 0.3322747051715851, + "learning_rate": 1.8998684073967346e-05, + "loss": 0.5665, + "step": 10551 + }, + { + "epoch": 0.28973091707852827, + "grad_norm": 0.33011916279792786, + "learning_rate": 1.8998495690300126e-05, + "loss": 0.5235, + "step": 10552 + }, + { + "epoch": 0.28975837451949477, + "grad_norm": 0.3492213189601898, + "learning_rate": 1.8998307289847844e-05, + "loss": 0.5483, + "step": 10553 + }, + { + "epoch": 0.28978583196046126, + "grad_norm": 0.35531044006347656, + "learning_rate": 1.8998118872610845e-05, + "loss": 0.5252, + "step": 10554 + }, + { + "epoch": 0.2898132894014278, + "grad_norm": 0.39919111132621765, + "learning_rate": 1.899793043858948e-05, + "loss": 0.5636, + "step": 10555 + }, + { + "epoch": 0.2898407468423943, + "grad_norm": 0.36453473567962646, + "learning_rate": 1.8997741987784105e-05, + "loss": 0.5846, + "step": 10556 + }, + { + "epoch": 0.2898682042833608, + "grad_norm": 0.36717137694358826, + "learning_rate": 1.899755352019507e-05, + "loss": 0.5509, + "step": 10557 + }, + { + "epoch": 0.2898956617243273, + "grad_norm": 0.6730944514274597, + "learning_rate": 1.8997365035822727e-05, + "loss": 0.526, + "step": 10558 + }, + { + "epoch": 0.2899231191652938, + "grad_norm": 0.3569776713848114, + "learning_rate": 1.8997176534667426e-05, + "loss": 0.4946, + "step": 10559 + }, + { + "epoch": 0.2899505766062603, + "grad_norm": 0.3382580578327179, + "learning_rate": 1.8996988016729516e-05, + "loss": 0.5395, + "step": 10560 + }, + { + "epoch": 0.2899780340472268, + "grad_norm": 0.4099234938621521, + "learning_rate": 1.8996799482009355e-05, + "loss": 0.4891, + "step": 10561 + }, + { + "epoch": 0.2900054914881933, + "grad_norm": 0.3214190602302551, + "learning_rate": 1.899661093050729e-05, + "loss": 0.3988, + "step": 10562 + }, + { + "epoch": 0.2900329489291598, + "grad_norm": 0.3363940417766571, + "learning_rate": 1.8996422362223672e-05, + "loss": 0.4861, + "step": 10563 + }, + { + "epoch": 0.2900604063701263, + "grad_norm": 1.281022548675537, + "learning_rate": 1.8996233777158858e-05, + "loss": 0.5633, + "step": 10564 + }, + { + "epoch": 0.2900878638110928, + "grad_norm": 0.42590969800949097, + "learning_rate": 1.8996045175313196e-05, + "loss": 0.5534, + "step": 10565 + }, + { + "epoch": 0.2901153212520593, + "grad_norm": 0.471465528011322, + "learning_rate": 1.899585655668704e-05, + "loss": 0.491, + "step": 10566 + }, + { + "epoch": 0.2901427786930258, + "grad_norm": 0.35766884684562683, + "learning_rate": 1.899566792128074e-05, + "loss": 0.5064, + "step": 10567 + }, + { + "epoch": 0.2901702361339923, + "grad_norm": 0.3660731017589569, + "learning_rate": 1.8995479269094646e-05, + "loss": 0.4616, + "step": 10568 + }, + { + "epoch": 0.29019769357495884, + "grad_norm": 0.43302810192108154, + "learning_rate": 1.8995290600129115e-05, + "loss": 0.5236, + "step": 10569 + }, + { + "epoch": 0.29022515101592533, + "grad_norm": 0.41010960936546326, + "learning_rate": 1.8995101914384494e-05, + "loss": 0.6231, + "step": 10570 + }, + { + "epoch": 0.2902526084568918, + "grad_norm": 0.3536451458930969, + "learning_rate": 1.899491321186114e-05, + "loss": 0.5187, + "step": 10571 + }, + { + "epoch": 0.2902800658978583, + "grad_norm": 0.38244542479515076, + "learning_rate": 1.8994724492559398e-05, + "loss": 0.5704, + "step": 10572 + }, + { + "epoch": 0.2903075233388248, + "grad_norm": 0.3789508044719696, + "learning_rate": 1.8994535756479628e-05, + "loss": 0.5542, + "step": 10573 + }, + { + "epoch": 0.2903349807797913, + "grad_norm": 0.40209096670150757, + "learning_rate": 1.8994347003622174e-05, + "loss": 0.5307, + "step": 10574 + }, + { + "epoch": 0.2903624382207578, + "grad_norm": 0.3571290969848633, + "learning_rate": 1.8994158233987394e-05, + "loss": 0.6003, + "step": 10575 + }, + { + "epoch": 0.29038989566172435, + "grad_norm": 0.36364102363586426, + "learning_rate": 1.8993969447575638e-05, + "loss": 0.506, + "step": 10576 + }, + { + "epoch": 0.29041735310269085, + "grad_norm": 0.3656938076019287, + "learning_rate": 1.899378064438726e-05, + "loss": 0.5182, + "step": 10577 + }, + { + "epoch": 0.29044481054365734, + "grad_norm": 0.3556077182292938, + "learning_rate": 1.899359182442261e-05, + "loss": 0.4828, + "step": 10578 + }, + { + "epoch": 0.29047226798462383, + "grad_norm": 0.36831703782081604, + "learning_rate": 1.899340298768204e-05, + "loss": 0.5143, + "step": 10579 + }, + { + "epoch": 0.2904997254255903, + "grad_norm": 0.45847517251968384, + "learning_rate": 1.8993214134165904e-05, + "loss": 0.5039, + "step": 10580 + }, + { + "epoch": 0.2905271828665568, + "grad_norm": 0.34670403599739075, + "learning_rate": 1.8993025263874552e-05, + "loss": 0.5534, + "step": 10581 + }, + { + "epoch": 0.2905546403075233, + "grad_norm": 0.33571258187294006, + "learning_rate": 1.8992836376808337e-05, + "loss": 0.5395, + "step": 10582 + }, + { + "epoch": 0.29058209774848986, + "grad_norm": 0.35840147733688354, + "learning_rate": 1.8992647472967613e-05, + "loss": 0.4174, + "step": 10583 + }, + { + "epoch": 0.29060955518945636, + "grad_norm": 0.345600962638855, + "learning_rate": 1.8992458552352733e-05, + "loss": 0.4831, + "step": 10584 + }, + { + "epoch": 0.29063701263042285, + "grad_norm": 0.38233110308647156, + "learning_rate": 1.8992269614964046e-05, + "loss": 0.5464, + "step": 10585 + }, + { + "epoch": 0.29066447007138935, + "grad_norm": 0.3238309919834137, + "learning_rate": 1.8992080660801904e-05, + "loss": 0.5218, + "step": 10586 + }, + { + "epoch": 0.29069192751235584, + "grad_norm": 0.42455628514289856, + "learning_rate": 1.8991891689866667e-05, + "loss": 0.5752, + "step": 10587 + }, + { + "epoch": 0.29071938495332234, + "grad_norm": 0.3436873257160187, + "learning_rate": 1.899170270215868e-05, + "loss": 0.4937, + "step": 10588 + }, + { + "epoch": 0.29074684239428883, + "grad_norm": 0.35423097014427185, + "learning_rate": 1.8991513697678296e-05, + "loss": 0.565, + "step": 10589 + }, + { + "epoch": 0.2907742998352554, + "grad_norm": 0.3527736961841583, + "learning_rate": 1.899132467642587e-05, + "loss": 0.5101, + "step": 10590 + }, + { + "epoch": 0.2908017572762219, + "grad_norm": 0.3552381694316864, + "learning_rate": 1.8991135638401754e-05, + "loss": 0.581, + "step": 10591 + }, + { + "epoch": 0.29082921471718837, + "grad_norm": 0.3791722059249878, + "learning_rate": 1.89909465836063e-05, + "loss": 0.4683, + "step": 10592 + }, + { + "epoch": 0.29085667215815486, + "grad_norm": 0.43254151940345764, + "learning_rate": 1.899075751203986e-05, + "loss": 0.5116, + "step": 10593 + }, + { + "epoch": 0.29088412959912135, + "grad_norm": 0.6379194855690002, + "learning_rate": 1.899056842370279e-05, + "loss": 0.5228, + "step": 10594 + }, + { + "epoch": 0.29091158704008785, + "grad_norm": 0.36204150319099426, + "learning_rate": 1.899037931859544e-05, + "loss": 0.5567, + "step": 10595 + }, + { + "epoch": 0.29093904448105434, + "grad_norm": 0.46727314591407776, + "learning_rate": 1.8990190196718163e-05, + "loss": 0.5828, + "step": 10596 + }, + { + "epoch": 0.2909665019220209, + "grad_norm": 0.466789186000824, + "learning_rate": 1.8990001058071314e-05, + "loss": 0.4491, + "step": 10597 + }, + { + "epoch": 0.2909939593629874, + "grad_norm": 0.45434898138046265, + "learning_rate": 1.898981190265524e-05, + "loss": 0.5592, + "step": 10598 + }, + { + "epoch": 0.2910214168039539, + "grad_norm": 0.7237570881843567, + "learning_rate": 1.89896227304703e-05, + "loss": 0.5324, + "step": 10599 + }, + { + "epoch": 0.2910488742449204, + "grad_norm": 0.40640079975128174, + "learning_rate": 1.8989433541516842e-05, + "loss": 0.4076, + "step": 10600 + }, + { + "epoch": 0.29107633168588687, + "grad_norm": 0.36548855900764465, + "learning_rate": 1.8989244335795222e-05, + "loss": 0.4758, + "step": 10601 + }, + { + "epoch": 0.29110378912685336, + "grad_norm": 0.35590359568595886, + "learning_rate": 1.8989055113305796e-05, + "loss": 0.5635, + "step": 10602 + }, + { + "epoch": 0.29113124656781986, + "grad_norm": 0.3500351905822754, + "learning_rate": 1.898886587404891e-05, + "loss": 0.427, + "step": 10603 + }, + { + "epoch": 0.2911587040087864, + "grad_norm": 0.3665693700313568, + "learning_rate": 1.8988676618024916e-05, + "loss": 0.5828, + "step": 10604 + }, + { + "epoch": 0.2911861614497529, + "grad_norm": 0.3762263357639313, + "learning_rate": 1.898848734523418e-05, + "loss": 0.5123, + "step": 10605 + }, + { + "epoch": 0.2912136188907194, + "grad_norm": 0.354993999004364, + "learning_rate": 1.8988298055677038e-05, + "loss": 0.4763, + "step": 10606 + }, + { + "epoch": 0.2912410763316859, + "grad_norm": 0.351469486951828, + "learning_rate": 1.8988108749353857e-05, + "loss": 0.5309, + "step": 10607 + }, + { + "epoch": 0.2912685337726524, + "grad_norm": 0.4178650975227356, + "learning_rate": 1.8987919426264984e-05, + "loss": 0.5358, + "step": 10608 + }, + { + "epoch": 0.2912959912136189, + "grad_norm": 0.4530799686908722, + "learning_rate": 1.898773008641077e-05, + "loss": 0.5837, + "step": 10609 + }, + { + "epoch": 0.29132344865458537, + "grad_norm": 0.5075688362121582, + "learning_rate": 1.8987540729791573e-05, + "loss": 0.4582, + "step": 10610 + }, + { + "epoch": 0.2913509060955519, + "grad_norm": 0.3918704688549042, + "learning_rate": 1.8987351356407744e-05, + "loss": 0.5408, + "step": 10611 + }, + { + "epoch": 0.2913783635365184, + "grad_norm": 0.37213873863220215, + "learning_rate": 1.8987161966259635e-05, + "loss": 0.5944, + "step": 10612 + }, + { + "epoch": 0.2914058209774849, + "grad_norm": 0.40451282262802124, + "learning_rate": 1.8986972559347602e-05, + "loss": 0.545, + "step": 10613 + }, + { + "epoch": 0.2914332784184514, + "grad_norm": 0.3403902053833008, + "learning_rate": 1.8986783135671997e-05, + "loss": 0.5075, + "step": 10614 + }, + { + "epoch": 0.2914607358594179, + "grad_norm": 0.33036917448043823, + "learning_rate": 1.8986593695233174e-05, + "loss": 0.5548, + "step": 10615 + }, + { + "epoch": 0.2914881933003844, + "grad_norm": 0.35949158668518066, + "learning_rate": 1.898640423803148e-05, + "loss": 0.5522, + "step": 10616 + }, + { + "epoch": 0.2915156507413509, + "grad_norm": 0.3353768587112427, + "learning_rate": 1.898621476406728e-05, + "loss": 0.5099, + "step": 10617 + }, + { + "epoch": 0.29154310818231743, + "grad_norm": 0.3969521224498749, + "learning_rate": 1.898602527334092e-05, + "loss": 0.5388, + "step": 10618 + }, + { + "epoch": 0.29157056562328393, + "grad_norm": 0.5075987577438354, + "learning_rate": 1.8985835765852753e-05, + "loss": 0.5726, + "step": 10619 + }, + { + "epoch": 0.2915980230642504, + "grad_norm": 0.3636513352394104, + "learning_rate": 1.8985646241603137e-05, + "loss": 0.5276, + "step": 10620 + }, + { + "epoch": 0.2916254805052169, + "grad_norm": 0.3523928225040436, + "learning_rate": 1.898545670059242e-05, + "loss": 0.5162, + "step": 10621 + }, + { + "epoch": 0.2916529379461834, + "grad_norm": 0.3505879342556, + "learning_rate": 1.8985267142820962e-05, + "loss": 0.5123, + "step": 10622 + }, + { + "epoch": 0.2916803953871499, + "grad_norm": 0.3566543459892273, + "learning_rate": 1.8985077568289108e-05, + "loss": 0.4946, + "step": 10623 + }, + { + "epoch": 0.2917078528281164, + "grad_norm": 0.39260542392730713, + "learning_rate": 1.898488797699722e-05, + "loss": 0.5302, + "step": 10624 + }, + { + "epoch": 0.29173531026908295, + "grad_norm": 0.3354779779911041, + "learning_rate": 1.898469836894565e-05, + "loss": 0.4769, + "step": 10625 + }, + { + "epoch": 0.29176276771004944, + "grad_norm": 0.3801306486129761, + "learning_rate": 1.8984508744134747e-05, + "loss": 0.4616, + "step": 10626 + }, + { + "epoch": 0.29179022515101594, + "grad_norm": 0.3731025457382202, + "learning_rate": 1.8984319102564867e-05, + "loss": 0.4498, + "step": 10627 + }, + { + "epoch": 0.29181768259198243, + "grad_norm": 0.38671189546585083, + "learning_rate": 1.898412944423637e-05, + "loss": 0.563, + "step": 10628 + }, + { + "epoch": 0.2918451400329489, + "grad_norm": 0.33593514561653137, + "learning_rate": 1.89839397691496e-05, + "loss": 0.5782, + "step": 10629 + }, + { + "epoch": 0.2918725974739154, + "grad_norm": 0.34059178829193115, + "learning_rate": 1.8983750077304912e-05, + "loss": 0.5726, + "step": 10630 + }, + { + "epoch": 0.2919000549148819, + "grad_norm": 0.32741212844848633, + "learning_rate": 1.8983560368702667e-05, + "loss": 0.5409, + "step": 10631 + }, + { + "epoch": 0.29192751235584846, + "grad_norm": 0.3799813687801361, + "learning_rate": 1.8983370643343214e-05, + "loss": 0.5612, + "step": 10632 + }, + { + "epoch": 0.29195496979681496, + "grad_norm": 0.3772587180137634, + "learning_rate": 1.8983180901226905e-05, + "loss": 0.5145, + "step": 10633 + }, + { + "epoch": 0.29198242723778145, + "grad_norm": 0.3765260577201843, + "learning_rate": 1.89829911423541e-05, + "loss": 0.5202, + "step": 10634 + }, + { + "epoch": 0.29200988467874794, + "grad_norm": 0.4091521203517914, + "learning_rate": 1.8982801366725147e-05, + "loss": 0.4754, + "step": 10635 + }, + { + "epoch": 0.29203734211971444, + "grad_norm": 0.62028568983078, + "learning_rate": 1.8982611574340404e-05, + "loss": 0.489, + "step": 10636 + }, + { + "epoch": 0.29206479956068093, + "grad_norm": 0.42159050703048706, + "learning_rate": 1.8982421765200224e-05, + "loss": 0.5379, + "step": 10637 + }, + { + "epoch": 0.2920922570016474, + "grad_norm": 0.32403329014778137, + "learning_rate": 1.898223193930496e-05, + "loss": 0.5124, + "step": 10638 + }, + { + "epoch": 0.292119714442614, + "grad_norm": 0.3590332567691803, + "learning_rate": 1.8982042096654964e-05, + "loss": 0.5353, + "step": 10639 + }, + { + "epoch": 0.29214717188358047, + "grad_norm": 0.3689941167831421, + "learning_rate": 1.89818522372506e-05, + "loss": 0.5249, + "step": 10640 + }, + { + "epoch": 0.29217462932454696, + "grad_norm": 0.4200587570667267, + "learning_rate": 1.8981662361092206e-05, + "loss": 0.4941, + "step": 10641 + }, + { + "epoch": 0.29220208676551346, + "grad_norm": 0.34544819593429565, + "learning_rate": 1.898147246818015e-05, + "loss": 0.4489, + "step": 10642 + }, + { + "epoch": 0.29222954420647995, + "grad_norm": 0.4147844910621643, + "learning_rate": 1.898128255851478e-05, + "loss": 0.5329, + "step": 10643 + }, + { + "epoch": 0.29225700164744645, + "grad_norm": 0.3494395911693573, + "learning_rate": 1.898109263209645e-05, + "loss": 0.4889, + "step": 10644 + }, + { + "epoch": 0.29228445908841294, + "grad_norm": 0.5508231520652771, + "learning_rate": 1.898090268892552e-05, + "loss": 0.5276, + "step": 10645 + }, + { + "epoch": 0.29231191652937943, + "grad_norm": 0.3594970703125, + "learning_rate": 1.8980712729002337e-05, + "loss": 0.4781, + "step": 10646 + }, + { + "epoch": 0.292339373970346, + "grad_norm": 0.37386807799339294, + "learning_rate": 1.898052275232726e-05, + "loss": 0.5329, + "step": 10647 + }, + { + "epoch": 0.2923668314113125, + "grad_norm": 0.45178818702697754, + "learning_rate": 1.8980332758900642e-05, + "loss": 0.5677, + "step": 10648 + }, + { + "epoch": 0.29239428885227897, + "grad_norm": 0.36211520433425903, + "learning_rate": 1.8980142748722837e-05, + "loss": 0.5231, + "step": 10649 + }, + { + "epoch": 0.29242174629324547, + "grad_norm": 0.4018748700618744, + "learning_rate": 1.8979952721794196e-05, + "loss": 0.5871, + "step": 10650 + }, + { + "epoch": 0.29244920373421196, + "grad_norm": 0.39492884278297424, + "learning_rate": 1.8979762678115082e-05, + "loss": 0.5733, + "step": 10651 + }, + { + "epoch": 0.29247666117517845, + "grad_norm": 0.30207088589668274, + "learning_rate": 1.897957261768584e-05, + "loss": 0.4986, + "step": 10652 + }, + { + "epoch": 0.29250411861614495, + "grad_norm": 0.32317227125167847, + "learning_rate": 1.8979382540506835e-05, + "loss": 0.4459, + "step": 10653 + }, + { + "epoch": 0.2925315760571115, + "grad_norm": 0.3597392737865448, + "learning_rate": 1.897919244657841e-05, + "loss": 0.4494, + "step": 10654 + }, + { + "epoch": 0.292559033498078, + "grad_norm": 0.37299197912216187, + "learning_rate": 1.897900233590093e-05, + "loss": 0.505, + "step": 10655 + }, + { + "epoch": 0.2925864909390445, + "grad_norm": 0.42168453335762024, + "learning_rate": 1.8978812208474742e-05, + "loss": 0.584, + "step": 10656 + }, + { + "epoch": 0.292613948380011, + "grad_norm": 0.361549437046051, + "learning_rate": 1.8978622064300204e-05, + "loss": 0.574, + "step": 10657 + }, + { + "epoch": 0.2926414058209775, + "grad_norm": 0.38631999492645264, + "learning_rate": 1.897843190337767e-05, + "loss": 0.5602, + "step": 10658 + }, + { + "epoch": 0.29266886326194397, + "grad_norm": 0.3645487129688263, + "learning_rate": 1.8978241725707495e-05, + "loss": 0.5663, + "step": 10659 + }, + { + "epoch": 0.29269632070291046, + "grad_norm": 0.4305476248264313, + "learning_rate": 1.8978051531290035e-05, + "loss": 0.5551, + "step": 10660 + }, + { + "epoch": 0.292723778143877, + "grad_norm": 0.32242971658706665, + "learning_rate": 1.897786132012564e-05, + "loss": 0.5104, + "step": 10661 + }, + { + "epoch": 0.2927512355848435, + "grad_norm": 0.3539206087589264, + "learning_rate": 1.897767109221467e-05, + "loss": 0.5456, + "step": 10662 + }, + { + "epoch": 0.29277869302581, + "grad_norm": 0.3587137460708618, + "learning_rate": 1.8977480847557482e-05, + "loss": 0.475, + "step": 10663 + }, + { + "epoch": 0.2928061504667765, + "grad_norm": 0.3514917194843292, + "learning_rate": 1.897729058615442e-05, + "loss": 0.5413, + "step": 10664 + }, + { + "epoch": 0.292833607907743, + "grad_norm": 0.3490965962409973, + "learning_rate": 1.8977100308005854e-05, + "loss": 0.5728, + "step": 10665 + }, + { + "epoch": 0.2928610653487095, + "grad_norm": 0.3661758303642273, + "learning_rate": 1.8976910013112123e-05, + "loss": 0.5357, + "step": 10666 + }, + { + "epoch": 0.292888522789676, + "grad_norm": 0.35939550399780273, + "learning_rate": 1.8976719701473594e-05, + "loss": 0.5825, + "step": 10667 + }, + { + "epoch": 0.2929159802306425, + "grad_norm": 0.3733099400997162, + "learning_rate": 1.8976529373090616e-05, + "loss": 0.5297, + "step": 10668 + }, + { + "epoch": 0.292943437671609, + "grad_norm": 0.44955357909202576, + "learning_rate": 1.8976339027963546e-05, + "loss": 0.4997, + "step": 10669 + }, + { + "epoch": 0.2929708951125755, + "grad_norm": 0.3806009888648987, + "learning_rate": 1.897614866609274e-05, + "loss": 0.6011, + "step": 10670 + }, + { + "epoch": 0.292998352553542, + "grad_norm": 0.39469921588897705, + "learning_rate": 1.897595828747855e-05, + "loss": 0.5321, + "step": 10671 + }, + { + "epoch": 0.2930258099945085, + "grad_norm": 0.3487900495529175, + "learning_rate": 1.8975767892121336e-05, + "loss": 0.5609, + "step": 10672 + }, + { + "epoch": 0.293053267435475, + "grad_norm": 0.33829739689826965, + "learning_rate": 1.8975577480021447e-05, + "loss": 0.4949, + "step": 10673 + }, + { + "epoch": 0.2930807248764415, + "grad_norm": 0.41052666306495667, + "learning_rate": 1.8975387051179244e-05, + "loss": 0.5489, + "step": 10674 + }, + { + "epoch": 0.29310818231740804, + "grad_norm": 0.3558547794818878, + "learning_rate": 1.8975196605595076e-05, + "loss": 0.5016, + "step": 10675 + }, + { + "epoch": 0.29313563975837453, + "grad_norm": 0.41525760293006897, + "learning_rate": 1.8975006143269304e-05, + "loss": 0.5591, + "step": 10676 + }, + { + "epoch": 0.293163097199341, + "grad_norm": 0.3811076581478119, + "learning_rate": 1.8974815664202283e-05, + "loss": 0.5101, + "step": 10677 + }, + { + "epoch": 0.2931905546403075, + "grad_norm": 0.3430793881416321, + "learning_rate": 1.8974625168394363e-05, + "loss": 0.5123, + "step": 10678 + }, + { + "epoch": 0.293218012081274, + "grad_norm": 0.37612423300743103, + "learning_rate": 1.8974434655845907e-05, + "loss": 0.5084, + "step": 10679 + }, + { + "epoch": 0.2932454695222405, + "grad_norm": 0.7070287466049194, + "learning_rate": 1.8974244126557262e-05, + "loss": 0.5816, + "step": 10680 + }, + { + "epoch": 0.293272926963207, + "grad_norm": 0.37966230511665344, + "learning_rate": 1.8974053580528786e-05, + "loss": 0.5478, + "step": 10681 + }, + { + "epoch": 0.29330038440417355, + "grad_norm": 0.37395399808883667, + "learning_rate": 1.8973863017760838e-05, + "loss": 0.4388, + "step": 10682 + }, + { + "epoch": 0.29332784184514005, + "grad_norm": 0.3886961340904236, + "learning_rate": 1.8973672438253774e-05, + "loss": 0.5208, + "step": 10683 + }, + { + "epoch": 0.29335529928610654, + "grad_norm": 0.3940000832080841, + "learning_rate": 1.8973481842007943e-05, + "loss": 0.5334, + "step": 10684 + }, + { + "epoch": 0.29338275672707304, + "grad_norm": 0.3680715560913086, + "learning_rate": 1.8973291229023708e-05, + "loss": 0.5249, + "step": 10685 + }, + { + "epoch": 0.29341021416803953, + "grad_norm": 0.3418077230453491, + "learning_rate": 1.8973100599301417e-05, + "loss": 0.4969, + "step": 10686 + }, + { + "epoch": 0.293437671609006, + "grad_norm": 0.36081090569496155, + "learning_rate": 1.897290995284143e-05, + "loss": 0.4295, + "step": 10687 + }, + { + "epoch": 0.2934651290499725, + "grad_norm": 0.417817085981369, + "learning_rate": 1.8972719289644103e-05, + "loss": 0.5146, + "step": 10688 + }, + { + "epoch": 0.29349258649093907, + "grad_norm": 0.6383808851242065, + "learning_rate": 1.8972528609709794e-05, + "loss": 0.549, + "step": 10689 + }, + { + "epoch": 0.29352004393190556, + "grad_norm": 0.3753795027732849, + "learning_rate": 1.8972337913038853e-05, + "loss": 0.5689, + "step": 10690 + }, + { + "epoch": 0.29354750137287206, + "grad_norm": 0.40231436491012573, + "learning_rate": 1.8972147199631635e-05, + "loss": 0.5544, + "step": 10691 + }, + { + "epoch": 0.29357495881383855, + "grad_norm": 0.35932618379592896, + "learning_rate": 1.8971956469488503e-05, + "loss": 0.4925, + "step": 10692 + }, + { + "epoch": 0.29360241625480504, + "grad_norm": 0.7994845509529114, + "learning_rate": 1.897176572260981e-05, + "loss": 0.4321, + "step": 10693 + }, + { + "epoch": 0.29362987369577154, + "grad_norm": 0.38827452063560486, + "learning_rate": 1.8971574958995906e-05, + "loss": 0.564, + "step": 10694 + }, + { + "epoch": 0.29365733113673803, + "grad_norm": 0.4744679927825928, + "learning_rate": 1.8971384178647152e-05, + "loss": 0.4762, + "step": 10695 + }, + { + "epoch": 0.2936847885777046, + "grad_norm": 0.386243999004364, + "learning_rate": 1.8971193381563907e-05, + "loss": 0.4425, + "step": 10696 + }, + { + "epoch": 0.2937122460186711, + "grad_norm": 0.3495384156703949, + "learning_rate": 1.897100256774652e-05, + "loss": 0.5209, + "step": 10697 + }, + { + "epoch": 0.29373970345963757, + "grad_norm": 0.3609231114387512, + "learning_rate": 1.897081173719535e-05, + "loss": 0.5279, + "step": 10698 + }, + { + "epoch": 0.29376716090060406, + "grad_norm": 0.3890160024166107, + "learning_rate": 1.897062088991075e-05, + "loss": 0.5719, + "step": 10699 + }, + { + "epoch": 0.29379461834157056, + "grad_norm": 0.34962525963783264, + "learning_rate": 1.8970430025893085e-05, + "loss": 0.4854, + "step": 10700 + }, + { + "epoch": 0.29382207578253705, + "grad_norm": 0.3605354130268097, + "learning_rate": 1.89702391451427e-05, + "loss": 0.5268, + "step": 10701 + }, + { + "epoch": 0.29384953322350355, + "grad_norm": 0.4129225015640259, + "learning_rate": 1.897004824765996e-05, + "loss": 0.4104, + "step": 10702 + }, + { + "epoch": 0.2938769906644701, + "grad_norm": 0.3510945439338684, + "learning_rate": 1.8969857333445218e-05, + "loss": 0.4675, + "step": 10703 + }, + { + "epoch": 0.2939044481054366, + "grad_norm": 0.39796701073646545, + "learning_rate": 1.8969666402498824e-05, + "loss": 0.5441, + "step": 10704 + }, + { + "epoch": 0.2939319055464031, + "grad_norm": 0.3555264174938202, + "learning_rate": 1.896947545482114e-05, + "loss": 0.6661, + "step": 10705 + }, + { + "epoch": 0.2939593629873696, + "grad_norm": 0.3644757568836212, + "learning_rate": 1.8969284490412528e-05, + "loss": 0.4444, + "step": 10706 + }, + { + "epoch": 0.29398682042833607, + "grad_norm": 0.35775241255760193, + "learning_rate": 1.896909350927333e-05, + "loss": 0.5771, + "step": 10707 + }, + { + "epoch": 0.29401427786930256, + "grad_norm": 0.38780707120895386, + "learning_rate": 1.8968902511403914e-05, + "loss": 0.5008, + "step": 10708 + }, + { + "epoch": 0.29404173531026906, + "grad_norm": 0.3810136020183563, + "learning_rate": 1.8968711496804634e-05, + "loss": 0.4859, + "step": 10709 + }, + { + "epoch": 0.2940691927512356, + "grad_norm": 0.38880324363708496, + "learning_rate": 1.896852046547584e-05, + "loss": 0.5811, + "step": 10710 + }, + { + "epoch": 0.2940966501922021, + "grad_norm": 0.34620943665504456, + "learning_rate": 1.8968329417417896e-05, + "loss": 0.5437, + "step": 10711 + }, + { + "epoch": 0.2941241076331686, + "grad_norm": 0.3644077479839325, + "learning_rate": 1.8968138352631152e-05, + "loss": 0.4247, + "step": 10712 + }, + { + "epoch": 0.2941515650741351, + "grad_norm": 0.34870901703834534, + "learning_rate": 1.896794727111597e-05, + "loss": 0.5363, + "step": 10713 + }, + { + "epoch": 0.2941790225151016, + "grad_norm": 0.4676774740219116, + "learning_rate": 1.8967756172872707e-05, + "loss": 0.5235, + "step": 10714 + }, + { + "epoch": 0.2942064799560681, + "grad_norm": 0.3515307605266571, + "learning_rate": 1.896756505790171e-05, + "loss": 0.4944, + "step": 10715 + }, + { + "epoch": 0.2942339373970346, + "grad_norm": 0.35742703080177307, + "learning_rate": 1.896737392620335e-05, + "loss": 0.5574, + "step": 10716 + }, + { + "epoch": 0.2942613948380011, + "grad_norm": 0.33269786834716797, + "learning_rate": 1.896718277777797e-05, + "loss": 0.4496, + "step": 10717 + }, + { + "epoch": 0.2942888522789676, + "grad_norm": 0.4283444583415985, + "learning_rate": 1.8966991612625932e-05, + "loss": 0.5117, + "step": 10718 + }, + { + "epoch": 0.2943163097199341, + "grad_norm": 0.3926866054534912, + "learning_rate": 1.8966800430747593e-05, + "loss": 0.5748, + "step": 10719 + }, + { + "epoch": 0.2943437671609006, + "grad_norm": 0.39476293325424194, + "learning_rate": 1.896660923214331e-05, + "loss": 0.5296, + "step": 10720 + }, + { + "epoch": 0.2943712246018671, + "grad_norm": 0.3824125826358795, + "learning_rate": 1.8966418016813443e-05, + "loss": 0.5509, + "step": 10721 + }, + { + "epoch": 0.2943986820428336, + "grad_norm": 0.4133097529411316, + "learning_rate": 1.8966226784758342e-05, + "loss": 0.5274, + "step": 10722 + }, + { + "epoch": 0.2944261394838001, + "grad_norm": 0.34004297852516174, + "learning_rate": 1.8966035535978363e-05, + "loss": 0.5118, + "step": 10723 + }, + { + "epoch": 0.29445359692476664, + "grad_norm": 0.3975982666015625, + "learning_rate": 1.896584427047387e-05, + "loss": 0.5356, + "step": 10724 + }, + { + "epoch": 0.29448105436573313, + "grad_norm": 0.3809800446033478, + "learning_rate": 1.8965652988245214e-05, + "loss": 0.4528, + "step": 10725 + }, + { + "epoch": 0.2945085118066996, + "grad_norm": 0.3797608017921448, + "learning_rate": 1.8965461689292756e-05, + "loss": 0.5172, + "step": 10726 + }, + { + "epoch": 0.2945359692476661, + "grad_norm": 0.3808315396308899, + "learning_rate": 1.8965270373616845e-05, + "loss": 0.4681, + "step": 10727 + }, + { + "epoch": 0.2945634266886326, + "grad_norm": 0.3571028709411621, + "learning_rate": 1.8965079041217848e-05, + "loss": 0.5899, + "step": 10728 + }, + { + "epoch": 0.2945908841295991, + "grad_norm": 0.37878352403640747, + "learning_rate": 1.8964887692096116e-05, + "loss": 0.6057, + "step": 10729 + }, + { + "epoch": 0.2946183415705656, + "grad_norm": 0.3301376402378082, + "learning_rate": 1.896469632625201e-05, + "loss": 0.5044, + "step": 10730 + }, + { + "epoch": 0.29464579901153215, + "grad_norm": 0.3999161720275879, + "learning_rate": 1.896450494368588e-05, + "loss": 0.5327, + "step": 10731 + }, + { + "epoch": 0.29467325645249864, + "grad_norm": 0.8599289059638977, + "learning_rate": 1.896431354439809e-05, + "loss": 0.4956, + "step": 10732 + }, + { + "epoch": 0.29470071389346514, + "grad_norm": 0.3344440758228302, + "learning_rate": 1.8964122128388992e-05, + "loss": 0.4491, + "step": 10733 + }, + { + "epoch": 0.29472817133443163, + "grad_norm": 0.39317572116851807, + "learning_rate": 1.8963930695658946e-05, + "loss": 0.521, + "step": 10734 + }, + { + "epoch": 0.2947556287753981, + "grad_norm": 0.8964151740074158, + "learning_rate": 1.896373924620831e-05, + "loss": 0.5063, + "step": 10735 + }, + { + "epoch": 0.2947830862163646, + "grad_norm": 0.412073016166687, + "learning_rate": 1.8963547780037436e-05, + "loss": 0.6236, + "step": 10736 + }, + { + "epoch": 0.2948105436573311, + "grad_norm": 0.4031982421875, + "learning_rate": 1.8963356297146688e-05, + "loss": 0.5961, + "step": 10737 + }, + { + "epoch": 0.29483800109829766, + "grad_norm": 0.3629284203052521, + "learning_rate": 1.8963164797536414e-05, + "loss": 0.4738, + "step": 10738 + }, + { + "epoch": 0.29486545853926416, + "grad_norm": 0.36002859473228455, + "learning_rate": 1.8962973281206984e-05, + "loss": 0.46, + "step": 10739 + }, + { + "epoch": 0.29489291598023065, + "grad_norm": 0.3896573781967163, + "learning_rate": 1.8962781748158746e-05, + "loss": 0.4905, + "step": 10740 + }, + { + "epoch": 0.29492037342119715, + "grad_norm": 0.3493849039077759, + "learning_rate": 1.8962590198392057e-05, + "loss": 0.5057, + "step": 10741 + }, + { + "epoch": 0.29494783086216364, + "grad_norm": 0.35759106278419495, + "learning_rate": 1.8962398631907278e-05, + "loss": 0.5894, + "step": 10742 + }, + { + "epoch": 0.29497528830313013, + "grad_norm": 0.3753238022327423, + "learning_rate": 1.896220704870477e-05, + "loss": 0.5197, + "step": 10743 + }, + { + "epoch": 0.29500274574409663, + "grad_norm": 0.40956246852874756, + "learning_rate": 1.896201544878488e-05, + "loss": 0.5725, + "step": 10744 + }, + { + "epoch": 0.2950302031850632, + "grad_norm": 0.3645186424255371, + "learning_rate": 1.896182383214797e-05, + "loss": 0.5235, + "step": 10745 + }, + { + "epoch": 0.29505766062602967, + "grad_norm": 0.3461386561393738, + "learning_rate": 1.89616321987944e-05, + "loss": 0.5132, + "step": 10746 + }, + { + "epoch": 0.29508511806699617, + "grad_norm": 0.3824485242366791, + "learning_rate": 1.8961440548724525e-05, + "loss": 0.4629, + "step": 10747 + }, + { + "epoch": 0.29511257550796266, + "grad_norm": 0.3827904164791107, + "learning_rate": 1.8961248881938706e-05, + "loss": 0.434, + "step": 10748 + }, + { + "epoch": 0.29514003294892915, + "grad_norm": 0.48615413904190063, + "learning_rate": 1.8961057198437295e-05, + "loss": 0.5412, + "step": 10749 + }, + { + "epoch": 0.29516749038989565, + "grad_norm": 0.4157959520816803, + "learning_rate": 1.8960865498220654e-05, + "loss": 0.5921, + "step": 10750 + }, + { + "epoch": 0.29519494783086214, + "grad_norm": 0.4102717339992523, + "learning_rate": 1.8960673781289136e-05, + "loss": 0.4758, + "step": 10751 + }, + { + "epoch": 0.2952224052718287, + "grad_norm": 0.35036247968673706, + "learning_rate": 1.8960482047643106e-05, + "loss": 0.4619, + "step": 10752 + }, + { + "epoch": 0.2952498627127952, + "grad_norm": 0.36394867300987244, + "learning_rate": 1.8960290297282914e-05, + "loss": 0.5925, + "step": 10753 + }, + { + "epoch": 0.2952773201537617, + "grad_norm": 0.3919617831707001, + "learning_rate": 1.8960098530208923e-05, + "loss": 0.5275, + "step": 10754 + }, + { + "epoch": 0.2953047775947282, + "grad_norm": 0.3491906225681305, + "learning_rate": 1.8959906746421484e-05, + "loss": 0.4655, + "step": 10755 + }, + { + "epoch": 0.29533223503569467, + "grad_norm": 0.35618141293525696, + "learning_rate": 1.8959714945920963e-05, + "loss": 0.6225, + "step": 10756 + }, + { + "epoch": 0.29535969247666116, + "grad_norm": 0.3772583603858948, + "learning_rate": 1.895952312870771e-05, + "loss": 0.5359, + "step": 10757 + }, + { + "epoch": 0.29538714991762766, + "grad_norm": 0.406768262386322, + "learning_rate": 1.895933129478209e-05, + "loss": 0.5385, + "step": 10758 + }, + { + "epoch": 0.2954146073585942, + "grad_norm": 0.3257448077201843, + "learning_rate": 1.895913944414446e-05, + "loss": 0.4436, + "step": 10759 + }, + { + "epoch": 0.2954420647995607, + "grad_norm": 0.34982961416244507, + "learning_rate": 1.8958947576795174e-05, + "loss": 0.4768, + "step": 10760 + }, + { + "epoch": 0.2954695222405272, + "grad_norm": 0.35456717014312744, + "learning_rate": 1.895875569273459e-05, + "loss": 0.4307, + "step": 10761 + }, + { + "epoch": 0.2954969796814937, + "grad_norm": 0.3436332643032074, + "learning_rate": 1.8958563791963067e-05, + "loss": 0.4552, + "step": 10762 + }, + { + "epoch": 0.2955244371224602, + "grad_norm": 0.38015687465667725, + "learning_rate": 1.8958371874480964e-05, + "loss": 0.5585, + "step": 10763 + }, + { + "epoch": 0.2955518945634267, + "grad_norm": 0.37490296363830566, + "learning_rate": 1.895817994028864e-05, + "loss": 0.5001, + "step": 10764 + }, + { + "epoch": 0.29557935200439317, + "grad_norm": 0.37402117252349854, + "learning_rate": 1.8957987989386448e-05, + "loss": 0.52, + "step": 10765 + }, + { + "epoch": 0.2956068094453597, + "grad_norm": 0.40306907892227173, + "learning_rate": 1.8957796021774753e-05, + "loss": 0.5567, + "step": 10766 + }, + { + "epoch": 0.2956342668863262, + "grad_norm": 0.3416852355003357, + "learning_rate": 1.8957604037453907e-05, + "loss": 0.4812, + "step": 10767 + }, + { + "epoch": 0.2956617243272927, + "grad_norm": 0.3615260720252991, + "learning_rate": 1.8957412036424272e-05, + "loss": 0.4837, + "step": 10768 + }, + { + "epoch": 0.2956891817682592, + "grad_norm": 0.3758598268032074, + "learning_rate": 1.89572200186862e-05, + "loss": 0.4532, + "step": 10769 + }, + { + "epoch": 0.2957166392092257, + "grad_norm": 0.4432010054588318, + "learning_rate": 1.895702798424006e-05, + "loss": 0.5066, + "step": 10770 + }, + { + "epoch": 0.2957440966501922, + "grad_norm": 0.3523518741130829, + "learning_rate": 1.8956835933086203e-05, + "loss": 0.5782, + "step": 10771 + }, + { + "epoch": 0.2957715540911587, + "grad_norm": 0.47596898674964905, + "learning_rate": 1.8956643865224987e-05, + "loss": 0.4943, + "step": 10772 + }, + { + "epoch": 0.29579901153212523, + "grad_norm": 0.36161455512046814, + "learning_rate": 1.8956451780656772e-05, + "loss": 0.5132, + "step": 10773 + }, + { + "epoch": 0.2958264689730917, + "grad_norm": 0.36001235246658325, + "learning_rate": 1.8956259679381913e-05, + "loss": 0.5229, + "step": 10774 + }, + { + "epoch": 0.2958539264140582, + "grad_norm": 0.35767462849617004, + "learning_rate": 1.8956067561400772e-05, + "loss": 0.4461, + "step": 10775 + }, + { + "epoch": 0.2958813838550247, + "grad_norm": 0.3993496596813202, + "learning_rate": 1.895587542671371e-05, + "loss": 0.5866, + "step": 10776 + }, + { + "epoch": 0.2959088412959912, + "grad_norm": 0.34247928857803345, + "learning_rate": 1.895568327532108e-05, + "loss": 0.5829, + "step": 10777 + }, + { + "epoch": 0.2959362987369577, + "grad_norm": 0.4307877719402313, + "learning_rate": 1.895549110722324e-05, + "loss": 0.5422, + "step": 10778 + }, + { + "epoch": 0.2959637561779242, + "grad_norm": 0.34574517607688904, + "learning_rate": 1.8955298922420556e-05, + "loss": 0.4858, + "step": 10779 + }, + { + "epoch": 0.2959912136188907, + "grad_norm": 0.38505983352661133, + "learning_rate": 1.8955106720913377e-05, + "loss": 0.5945, + "step": 10780 + }, + { + "epoch": 0.29601867105985724, + "grad_norm": 0.4265718460083008, + "learning_rate": 1.895491450270207e-05, + "loss": 0.5641, + "step": 10781 + }, + { + "epoch": 0.29604612850082374, + "grad_norm": 0.3774309456348419, + "learning_rate": 1.8954722267786986e-05, + "loss": 0.5249, + "step": 10782 + }, + { + "epoch": 0.29607358594179023, + "grad_norm": 0.40438979864120483, + "learning_rate": 1.8954530016168485e-05, + "loss": 0.5457, + "step": 10783 + }, + { + "epoch": 0.2961010433827567, + "grad_norm": 0.3367365002632141, + "learning_rate": 1.895433774784693e-05, + "loss": 0.4888, + "step": 10784 + }, + { + "epoch": 0.2961285008237232, + "grad_norm": 0.3954540491104126, + "learning_rate": 1.895414546282268e-05, + "loss": 0.5069, + "step": 10785 + }, + { + "epoch": 0.2961559582646897, + "grad_norm": 0.41326645016670227, + "learning_rate": 1.8953953161096085e-05, + "loss": 0.5623, + "step": 10786 + }, + { + "epoch": 0.2961834157056562, + "grad_norm": 0.40745478868484497, + "learning_rate": 1.8953760842667514e-05, + "loss": 0.5332, + "step": 10787 + }, + { + "epoch": 0.29621087314662276, + "grad_norm": 0.38341668248176575, + "learning_rate": 1.895356850753732e-05, + "loss": 0.4969, + "step": 10788 + }, + { + "epoch": 0.29623833058758925, + "grad_norm": 0.4105336368083954, + "learning_rate": 1.8953376155705864e-05, + "loss": 0.6407, + "step": 10789 + }, + { + "epoch": 0.29626578802855574, + "grad_norm": 0.3423267900943756, + "learning_rate": 1.8953183787173505e-05, + "loss": 0.4796, + "step": 10790 + }, + { + "epoch": 0.29629324546952224, + "grad_norm": 0.37765052914619446, + "learning_rate": 1.8952991401940598e-05, + "loss": 0.5262, + "step": 10791 + }, + { + "epoch": 0.29632070291048873, + "grad_norm": 0.33892494440078735, + "learning_rate": 1.8952799000007505e-05, + "loss": 0.54, + "step": 10792 + }, + { + "epoch": 0.2963481603514552, + "grad_norm": 0.39131444692611694, + "learning_rate": 1.8952606581374584e-05, + "loss": 0.4029, + "step": 10793 + }, + { + "epoch": 0.2963756177924217, + "grad_norm": 0.4347560405731201, + "learning_rate": 1.8952414146042195e-05, + "loss": 0.5395, + "step": 10794 + }, + { + "epoch": 0.29640307523338827, + "grad_norm": 0.38824957609176636, + "learning_rate": 1.8952221694010698e-05, + "loss": 0.5775, + "step": 10795 + }, + { + "epoch": 0.29643053267435476, + "grad_norm": 0.3228376805782318, + "learning_rate": 1.895202922528045e-05, + "loss": 0.4573, + "step": 10796 + }, + { + "epoch": 0.29645799011532126, + "grad_norm": 0.40403038263320923, + "learning_rate": 1.895183673985181e-05, + "loss": 0.491, + "step": 10797 + }, + { + "epoch": 0.29648544755628775, + "grad_norm": 0.32940474152565, + "learning_rate": 1.8951644237725137e-05, + "loss": 0.5031, + "step": 10798 + }, + { + "epoch": 0.29651290499725425, + "grad_norm": 0.3782308101654053, + "learning_rate": 1.895145171890079e-05, + "loss": 0.5925, + "step": 10799 + }, + { + "epoch": 0.29654036243822074, + "grad_norm": 0.36629363894462585, + "learning_rate": 1.8951259183379127e-05, + "loss": 0.5115, + "step": 10800 + }, + { + "epoch": 0.29656781987918723, + "grad_norm": 0.38566383719444275, + "learning_rate": 1.895106663116051e-05, + "loss": 0.5162, + "step": 10801 + }, + { + "epoch": 0.2965952773201538, + "grad_norm": 0.3724574148654938, + "learning_rate": 1.8950874062245298e-05, + "loss": 0.4638, + "step": 10802 + }, + { + "epoch": 0.2966227347611203, + "grad_norm": 0.3856683671474457, + "learning_rate": 1.895068147663385e-05, + "loss": 0.5624, + "step": 10803 + }, + { + "epoch": 0.29665019220208677, + "grad_norm": 0.3430090844631195, + "learning_rate": 1.895048887432652e-05, + "loss": 0.4519, + "step": 10804 + }, + { + "epoch": 0.29667764964305327, + "grad_norm": 0.38990867137908936, + "learning_rate": 1.8950296255323675e-05, + "loss": 0.524, + "step": 10805 + }, + { + "epoch": 0.29670510708401976, + "grad_norm": 0.31869977712631226, + "learning_rate": 1.895010361962567e-05, + "loss": 0.4839, + "step": 10806 + }, + { + "epoch": 0.29673256452498625, + "grad_norm": 0.4051859676837921, + "learning_rate": 1.8949910967232866e-05, + "loss": 0.4615, + "step": 10807 + }, + { + "epoch": 0.29676002196595275, + "grad_norm": 0.5086353421211243, + "learning_rate": 1.894971829814562e-05, + "loss": 0.4516, + "step": 10808 + }, + { + "epoch": 0.2967874794069193, + "grad_norm": 0.3583243489265442, + "learning_rate": 1.8949525612364296e-05, + "loss": 0.56, + "step": 10809 + }, + { + "epoch": 0.2968149368478858, + "grad_norm": 0.34585142135620117, + "learning_rate": 1.8949332909889246e-05, + "loss": 0.508, + "step": 10810 + }, + { + "epoch": 0.2968423942888523, + "grad_norm": 0.3738400936126709, + "learning_rate": 1.8949140190720836e-05, + "loss": 0.4975, + "step": 10811 + }, + { + "epoch": 0.2968698517298188, + "grad_norm": 0.501816987991333, + "learning_rate": 1.8948947454859426e-05, + "loss": 0.4902, + "step": 10812 + }, + { + "epoch": 0.2968973091707853, + "grad_norm": 0.4884268641471863, + "learning_rate": 1.894875470230537e-05, + "loss": 1.0136, + "step": 10813 + }, + { + "epoch": 0.29692476661175177, + "grad_norm": 0.4167112410068512, + "learning_rate": 1.894856193305903e-05, + "loss": 0.5, + "step": 10814 + }, + { + "epoch": 0.29695222405271826, + "grad_norm": 0.3739244043827057, + "learning_rate": 1.8948369147120768e-05, + "loss": 0.5406, + "step": 10815 + }, + { + "epoch": 0.2969796814936848, + "grad_norm": 0.37864840030670166, + "learning_rate": 1.894817634449094e-05, + "loss": 0.5474, + "step": 10816 + }, + { + "epoch": 0.2970071389346513, + "grad_norm": 0.3553939461708069, + "learning_rate": 1.8947983525169906e-05, + "loss": 0.5815, + "step": 10817 + }, + { + "epoch": 0.2970345963756178, + "grad_norm": 0.4425792992115021, + "learning_rate": 1.8947790689158028e-05, + "loss": 0.554, + "step": 10818 + }, + { + "epoch": 0.2970620538165843, + "grad_norm": 1.8045668601989746, + "learning_rate": 1.8947597836455664e-05, + "loss": 0.5429, + "step": 10819 + }, + { + "epoch": 0.2970895112575508, + "grad_norm": 0.3370573818683624, + "learning_rate": 1.8947404967063176e-05, + "loss": 0.4308, + "step": 10820 + }, + { + "epoch": 0.2971169686985173, + "grad_norm": 0.41697758436203003, + "learning_rate": 1.894721208098092e-05, + "loss": 0.5071, + "step": 10821 + }, + { + "epoch": 0.2971444261394838, + "grad_norm": 0.38402098417282104, + "learning_rate": 1.894701917820926e-05, + "loss": 0.5711, + "step": 10822 + }, + { + "epoch": 0.2971718835804503, + "grad_norm": 0.4011977016925812, + "learning_rate": 1.8946826258748552e-05, + "loss": 0.5497, + "step": 10823 + }, + { + "epoch": 0.2971993410214168, + "grad_norm": 0.4152972400188446, + "learning_rate": 1.894663332259916e-05, + "loss": 0.5524, + "step": 10824 + }, + { + "epoch": 0.2972267984623833, + "grad_norm": 0.3698970675468445, + "learning_rate": 1.894644036976144e-05, + "loss": 0.5423, + "step": 10825 + }, + { + "epoch": 0.2972542559033498, + "grad_norm": 0.3738550841808319, + "learning_rate": 1.8946247400235753e-05, + "loss": 0.5507, + "step": 10826 + }, + { + "epoch": 0.2972817133443163, + "grad_norm": 0.4567551612854004, + "learning_rate": 1.894605441402246e-05, + "loss": 0.571, + "step": 10827 + }, + { + "epoch": 0.2973091707852828, + "grad_norm": 0.3612056374549866, + "learning_rate": 1.8945861411121916e-05, + "loss": 0.5048, + "step": 10828 + }, + { + "epoch": 0.2973366282262493, + "grad_norm": 0.4756089150905609, + "learning_rate": 1.894566839153449e-05, + "loss": 0.4877, + "step": 10829 + }, + { + "epoch": 0.29736408566721584, + "grad_norm": 0.4706931412220001, + "learning_rate": 1.8945475355260538e-05, + "loss": 0.5432, + "step": 10830 + }, + { + "epoch": 0.29739154310818233, + "grad_norm": 0.33526045083999634, + "learning_rate": 1.8945282302300415e-05, + "loss": 0.5719, + "step": 10831 + }, + { + "epoch": 0.2974190005491488, + "grad_norm": 0.37052178382873535, + "learning_rate": 1.8945089232654488e-05, + "loss": 0.5342, + "step": 10832 + }, + { + "epoch": 0.2974464579901153, + "grad_norm": 0.3655042350292206, + "learning_rate": 1.8944896146323117e-05, + "loss": 0.5706, + "step": 10833 + }, + { + "epoch": 0.2974739154310818, + "grad_norm": 0.33872321248054504, + "learning_rate": 1.8944703043306657e-05, + "loss": 0.527, + "step": 10834 + }, + { + "epoch": 0.2975013728720483, + "grad_norm": 0.6805616021156311, + "learning_rate": 1.894450992360547e-05, + "loss": 0.5659, + "step": 10835 + }, + { + "epoch": 0.2975288303130148, + "grad_norm": 0.324282705783844, + "learning_rate": 1.8944316787219917e-05, + "loss": 0.4753, + "step": 10836 + }, + { + "epoch": 0.29755628775398135, + "grad_norm": 0.38324716687202454, + "learning_rate": 1.8944123634150362e-05, + "loss": 0.561, + "step": 10837 + }, + { + "epoch": 0.29758374519494785, + "grad_norm": 0.36725595593452454, + "learning_rate": 1.894393046439716e-05, + "loss": 0.5573, + "step": 10838 + }, + { + "epoch": 0.29761120263591434, + "grad_norm": 0.35875651240348816, + "learning_rate": 1.8943737277960676e-05, + "loss": 0.5518, + "step": 10839 + }, + { + "epoch": 0.29763866007688083, + "grad_norm": 0.4366750121116638, + "learning_rate": 1.894354407484126e-05, + "loss": 0.5713, + "step": 10840 + }, + { + "epoch": 0.29766611751784733, + "grad_norm": 0.3965785801410675, + "learning_rate": 1.8943350855039288e-05, + "loss": 0.4631, + "step": 10841 + }, + { + "epoch": 0.2976935749588138, + "grad_norm": 0.458773136138916, + "learning_rate": 1.8943157618555104e-05, + "loss": 0.556, + "step": 10842 + }, + { + "epoch": 0.2977210323997803, + "grad_norm": 0.339426189661026, + "learning_rate": 1.8942964365389085e-05, + "loss": 0.4292, + "step": 10843 + }, + { + "epoch": 0.29774848984074687, + "grad_norm": 0.3585992753505707, + "learning_rate": 1.8942771095541578e-05, + "loss": 0.5333, + "step": 10844 + }, + { + "epoch": 0.29777594728171336, + "grad_norm": 0.37483206391334534, + "learning_rate": 1.894257780901295e-05, + "loss": 0.4779, + "step": 10845 + }, + { + "epoch": 0.29780340472267985, + "grad_norm": 0.3996804356575012, + "learning_rate": 1.8942384505803562e-05, + "loss": 0.4891, + "step": 10846 + }, + { + "epoch": 0.29783086216364635, + "grad_norm": 0.3551115095615387, + "learning_rate": 1.894219118591377e-05, + "loss": 0.5872, + "step": 10847 + }, + { + "epoch": 0.29785831960461284, + "grad_norm": 0.5261492133140564, + "learning_rate": 1.894199784934394e-05, + "loss": 0.55, + "step": 10848 + }, + { + "epoch": 0.29788577704557934, + "grad_norm": 0.43165284395217896, + "learning_rate": 1.894180449609443e-05, + "loss": 0.6002, + "step": 10849 + }, + { + "epoch": 0.29791323448654583, + "grad_norm": 0.3774586021900177, + "learning_rate": 1.8941611126165597e-05, + "loss": 0.5531, + "step": 10850 + }, + { + "epoch": 0.2979406919275124, + "grad_norm": 0.3163756728172302, + "learning_rate": 1.8941417739557806e-05, + "loss": 0.5142, + "step": 10851 + }, + { + "epoch": 0.2979681493684789, + "grad_norm": 0.37798184156417847, + "learning_rate": 1.8941224336271423e-05, + "loss": 0.5171, + "step": 10852 + }, + { + "epoch": 0.29799560680944537, + "grad_norm": 0.337776243686676, + "learning_rate": 1.89410309163068e-05, + "loss": 0.4721, + "step": 10853 + }, + { + "epoch": 0.29802306425041186, + "grad_norm": 0.36611634492874146, + "learning_rate": 1.8940837479664297e-05, + "loss": 0.5413, + "step": 10854 + }, + { + "epoch": 0.29805052169137836, + "grad_norm": 0.38074663281440735, + "learning_rate": 1.8940644026344283e-05, + "loss": 0.4903, + "step": 10855 + }, + { + "epoch": 0.29807797913234485, + "grad_norm": 0.4038684368133545, + "learning_rate": 1.8940450556347112e-05, + "loss": 0.5315, + "step": 10856 + }, + { + "epoch": 0.29810543657331134, + "grad_norm": 0.4130924344062805, + "learning_rate": 1.8940257069673147e-05, + "loss": 0.5714, + "step": 10857 + }, + { + "epoch": 0.2981328940142779, + "grad_norm": 0.34778186678886414, + "learning_rate": 1.894006356632275e-05, + "loss": 0.4828, + "step": 10858 + }, + { + "epoch": 0.2981603514552444, + "grad_norm": 0.39373865723609924, + "learning_rate": 1.893987004629628e-05, + "loss": 0.6504, + "step": 10859 + }, + { + "epoch": 0.2981878088962109, + "grad_norm": 0.4166102409362793, + "learning_rate": 1.89396765095941e-05, + "loss": 0.6126, + "step": 10860 + }, + { + "epoch": 0.2982152663371774, + "grad_norm": 0.37797075510025024, + "learning_rate": 1.8939482956216573e-05, + "loss": 0.5222, + "step": 10861 + }, + { + "epoch": 0.29824272377814387, + "grad_norm": 0.3921407163143158, + "learning_rate": 1.8939289386164055e-05, + "loss": 0.5674, + "step": 10862 + }, + { + "epoch": 0.29827018121911036, + "grad_norm": 0.3702322244644165, + "learning_rate": 1.8939095799436908e-05, + "loss": 0.5663, + "step": 10863 + }, + { + "epoch": 0.29829763866007686, + "grad_norm": 0.36403998732566833, + "learning_rate": 1.8938902196035494e-05, + "loss": 0.4339, + "step": 10864 + }, + { + "epoch": 0.2983250961010434, + "grad_norm": 0.3975658714771271, + "learning_rate": 1.8938708575960175e-05, + "loss": 0.5348, + "step": 10865 + }, + { + "epoch": 0.2983525535420099, + "grad_norm": 0.3963545560836792, + "learning_rate": 1.8938514939211315e-05, + "loss": 0.4667, + "step": 10866 + }, + { + "epoch": 0.2983800109829764, + "grad_norm": 0.40832045674324036, + "learning_rate": 1.8938321285789267e-05, + "loss": 0.4775, + "step": 10867 + }, + { + "epoch": 0.2984074684239429, + "grad_norm": 0.3746139407157898, + "learning_rate": 1.8938127615694397e-05, + "loss": 0.4818, + "step": 10868 + }, + { + "epoch": 0.2984349258649094, + "grad_norm": 0.36171770095825195, + "learning_rate": 1.893793392892707e-05, + "loss": 0.5496, + "step": 10869 + }, + { + "epoch": 0.2984623833058759, + "grad_norm": 0.3743970990180969, + "learning_rate": 1.893774022548764e-05, + "loss": 0.5395, + "step": 10870 + }, + { + "epoch": 0.29848984074684237, + "grad_norm": 0.37003394961357117, + "learning_rate": 1.8937546505376474e-05, + "loss": 0.509, + "step": 10871 + }, + { + "epoch": 0.2985172981878089, + "grad_norm": 0.4097583293914795, + "learning_rate": 1.8937352768593933e-05, + "loss": 0.5206, + "step": 10872 + }, + { + "epoch": 0.2985447556287754, + "grad_norm": 0.3751893639564514, + "learning_rate": 1.8937159015140372e-05, + "loss": 0.5733, + "step": 10873 + }, + { + "epoch": 0.2985722130697419, + "grad_norm": 0.39508625864982605, + "learning_rate": 1.893696524501616e-05, + "loss": 0.4766, + "step": 10874 + }, + { + "epoch": 0.2985996705107084, + "grad_norm": 0.3583570122718811, + "learning_rate": 1.8936771458221655e-05, + "loss": 0.4721, + "step": 10875 + }, + { + "epoch": 0.2986271279516749, + "grad_norm": 0.37513467669487, + "learning_rate": 1.893657765475722e-05, + "loss": 0.5363, + "step": 10876 + }, + { + "epoch": 0.2986545853926414, + "grad_norm": 0.3767146170139313, + "learning_rate": 1.8936383834623214e-05, + "loss": 0.5867, + "step": 10877 + }, + { + "epoch": 0.2986820428336079, + "grad_norm": 0.428607314825058, + "learning_rate": 1.893618999782e-05, + "loss": 0.5477, + "step": 10878 + }, + { + "epoch": 0.29870950027457444, + "grad_norm": 0.32567232847213745, + "learning_rate": 1.8935996144347938e-05, + "loss": 0.4484, + "step": 10879 + }, + { + "epoch": 0.29873695771554093, + "grad_norm": 0.3490471839904785, + "learning_rate": 1.8935802274207392e-05, + "loss": 0.5881, + "step": 10880 + }, + { + "epoch": 0.2987644151565074, + "grad_norm": 0.398642361164093, + "learning_rate": 1.893560838739873e-05, + "loss": 0.5796, + "step": 10881 + }, + { + "epoch": 0.2987918725974739, + "grad_norm": 0.35972437262535095, + "learning_rate": 1.8935414483922296e-05, + "loss": 0.5341, + "step": 10882 + }, + { + "epoch": 0.2988193300384404, + "grad_norm": 0.3990684151649475, + "learning_rate": 1.8935220563778468e-05, + "loss": 0.5463, + "step": 10883 + }, + { + "epoch": 0.2988467874794069, + "grad_norm": 0.4865363538265228, + "learning_rate": 1.89350266269676e-05, + "loss": 0.5355, + "step": 10884 + }, + { + "epoch": 0.2988742449203734, + "grad_norm": 0.33082637190818787, + "learning_rate": 1.8934832673490057e-05, + "loss": 0.4935, + "step": 10885 + }, + { + "epoch": 0.29890170236133995, + "grad_norm": 0.36841699481010437, + "learning_rate": 1.89346387033462e-05, + "loss": 0.5268, + "step": 10886 + }, + { + "epoch": 0.29892915980230644, + "grad_norm": 0.3702302873134613, + "learning_rate": 1.8934444716536385e-05, + "loss": 0.4521, + "step": 10887 + }, + { + "epoch": 0.29895661724327294, + "grad_norm": 0.3535923957824707, + "learning_rate": 1.893425071306098e-05, + "loss": 0.5261, + "step": 10888 + }, + { + "epoch": 0.29898407468423943, + "grad_norm": 0.35352078080177307, + "learning_rate": 1.893405669292035e-05, + "loss": 0.5131, + "step": 10889 + }, + { + "epoch": 0.2990115321252059, + "grad_norm": 0.45138221979141235, + "learning_rate": 1.8933862656114853e-05, + "loss": 0.4558, + "step": 10890 + }, + { + "epoch": 0.2990389895661724, + "grad_norm": 0.3415718376636505, + "learning_rate": 1.8933668602644847e-05, + "loss": 0.4999, + "step": 10891 + }, + { + "epoch": 0.2990664470071389, + "grad_norm": 0.47314587235450745, + "learning_rate": 1.89334745325107e-05, + "loss": 0.5296, + "step": 10892 + }, + { + "epoch": 0.29909390444810546, + "grad_norm": 0.42635369300842285, + "learning_rate": 1.893328044571277e-05, + "loss": 0.614, + "step": 10893 + }, + { + "epoch": 0.29912136188907196, + "grad_norm": 0.35159507393836975, + "learning_rate": 1.8933086342251426e-05, + "loss": 0.4873, + "step": 10894 + }, + { + "epoch": 0.29914881933003845, + "grad_norm": 0.35765641927719116, + "learning_rate": 1.893289222212702e-05, + "loss": 0.4631, + "step": 10895 + }, + { + "epoch": 0.29917627677100495, + "grad_norm": 0.4177396595478058, + "learning_rate": 1.893269808533992e-05, + "loss": 0.5504, + "step": 10896 + }, + { + "epoch": 0.29920373421197144, + "grad_norm": 0.4810444414615631, + "learning_rate": 1.8932503931890487e-05, + "loss": 0.5831, + "step": 10897 + }, + { + "epoch": 0.29923119165293793, + "grad_norm": 0.33061009645462036, + "learning_rate": 1.8932309761779084e-05, + "loss": 0.4452, + "step": 10898 + }, + { + "epoch": 0.2992586490939044, + "grad_norm": 0.3618420660495758, + "learning_rate": 1.8932115575006072e-05, + "loss": 0.4546, + "step": 10899 + }, + { + "epoch": 0.299286106534871, + "grad_norm": 0.4312288761138916, + "learning_rate": 1.8931921371571814e-05, + "loss": 0.545, + "step": 10900 + }, + { + "epoch": 0.29931356397583747, + "grad_norm": 0.3678516149520874, + "learning_rate": 1.893172715147667e-05, + "loss": 0.5695, + "step": 10901 + }, + { + "epoch": 0.29934102141680397, + "grad_norm": 0.38743850588798523, + "learning_rate": 1.8931532914721008e-05, + "loss": 0.4791, + "step": 10902 + }, + { + "epoch": 0.29936847885777046, + "grad_norm": 0.35151833295822144, + "learning_rate": 1.893133866130518e-05, + "loss": 0.6062, + "step": 10903 + }, + { + "epoch": 0.29939593629873695, + "grad_norm": 0.34097206592559814, + "learning_rate": 1.8931144391229563e-05, + "loss": 0.5439, + "step": 10904 + }, + { + "epoch": 0.29942339373970345, + "grad_norm": 0.342652827501297, + "learning_rate": 1.8930950104494506e-05, + "loss": 0.4525, + "step": 10905 + }, + { + "epoch": 0.29945085118066994, + "grad_norm": 0.4296620786190033, + "learning_rate": 1.893075580110038e-05, + "loss": 0.5906, + "step": 10906 + }, + { + "epoch": 0.2994783086216365, + "grad_norm": 0.3718644082546234, + "learning_rate": 1.893056148104754e-05, + "loss": 0.5376, + "step": 10907 + }, + { + "epoch": 0.299505766062603, + "grad_norm": 0.36686626076698303, + "learning_rate": 1.8930367144336356e-05, + "loss": 0.4849, + "step": 10908 + }, + { + "epoch": 0.2995332235035695, + "grad_norm": 0.3789626657962799, + "learning_rate": 1.8930172790967183e-05, + "loss": 0.5524, + "step": 10909 + }, + { + "epoch": 0.299560680944536, + "grad_norm": 0.39235207438468933, + "learning_rate": 1.8929978420940392e-05, + "loss": 0.4888, + "step": 10910 + }, + { + "epoch": 0.29958813838550247, + "grad_norm": 0.42099934816360474, + "learning_rate": 1.892978403425634e-05, + "loss": 0.5967, + "step": 10911 + }, + { + "epoch": 0.29961559582646896, + "grad_norm": 0.3725258708000183, + "learning_rate": 1.8929589630915387e-05, + "loss": 0.6167, + "step": 10912 + }, + { + "epoch": 0.29964305326743546, + "grad_norm": 0.3896276652812958, + "learning_rate": 1.8929395210917904e-05, + "loss": 0.4957, + "step": 10913 + }, + { + "epoch": 0.29967051070840195, + "grad_norm": 0.41210320591926575, + "learning_rate": 1.8929200774264247e-05, + "loss": 0.6769, + "step": 10914 + }, + { + "epoch": 0.2996979681493685, + "grad_norm": 0.40179261565208435, + "learning_rate": 1.8929006320954778e-05, + "loss": 0.4715, + "step": 10915 + }, + { + "epoch": 0.299725425590335, + "grad_norm": 0.38657140731811523, + "learning_rate": 1.892881185098987e-05, + "loss": 0.5082, + "step": 10916 + }, + { + "epoch": 0.2997528830313015, + "grad_norm": 0.48375290632247925, + "learning_rate": 1.892861736436987e-05, + "loss": 0.5634, + "step": 10917 + }, + { + "epoch": 0.299780340472268, + "grad_norm": 0.36648663878440857, + "learning_rate": 1.892842286109515e-05, + "loss": 0.5542, + "step": 10918 + }, + { + "epoch": 0.2998077979132345, + "grad_norm": 0.32879289984703064, + "learning_rate": 1.8928228341166077e-05, + "loss": 0.5671, + "step": 10919 + }, + { + "epoch": 0.29983525535420097, + "grad_norm": 0.41883763670921326, + "learning_rate": 1.892803380458301e-05, + "loss": 0.5689, + "step": 10920 + }, + { + "epoch": 0.29986271279516746, + "grad_norm": 0.3617636561393738, + "learning_rate": 1.8927839251346302e-05, + "loss": 0.5446, + "step": 10921 + }, + { + "epoch": 0.299890170236134, + "grad_norm": 0.358537882566452, + "learning_rate": 1.892764468145633e-05, + "loss": 0.5016, + "step": 10922 + }, + { + "epoch": 0.2999176276771005, + "grad_norm": 0.33985936641693115, + "learning_rate": 1.8927450094913448e-05, + "loss": 0.4849, + "step": 10923 + }, + { + "epoch": 0.299945085118067, + "grad_norm": 0.35180002450942993, + "learning_rate": 1.8927255491718023e-05, + "loss": 0.5006, + "step": 10924 + }, + { + "epoch": 0.2999725425590335, + "grad_norm": 0.3203871548175812, + "learning_rate": 1.892706087187042e-05, + "loss": 0.4593, + "step": 10925 + }, + { + "epoch": 0.3, + "grad_norm": 0.3373394310474396, + "learning_rate": 1.8926866235371e-05, + "loss": 0.4657, + "step": 10926 + }, + { + "epoch": 0.3000274574409665, + "grad_norm": 0.3684391975402832, + "learning_rate": 1.892667158222012e-05, + "loss": 0.5571, + "step": 10927 + }, + { + "epoch": 0.300054914881933, + "grad_norm": 0.3817688226699829, + "learning_rate": 1.892647691241815e-05, + "loss": 0.5024, + "step": 10928 + }, + { + "epoch": 0.3000823723228995, + "grad_norm": 0.34217390418052673, + "learning_rate": 1.8926282225965456e-05, + "loss": 0.5153, + "step": 10929 + }, + { + "epoch": 0.300109829763866, + "grad_norm": 0.3489822745323181, + "learning_rate": 1.8926087522862392e-05, + "loss": 0.5126, + "step": 10930 + }, + { + "epoch": 0.3001372872048325, + "grad_norm": 0.30839526653289795, + "learning_rate": 1.8925892803109328e-05, + "loss": 0.4316, + "step": 10931 + }, + { + "epoch": 0.300164744645799, + "grad_norm": 0.38616976141929626, + "learning_rate": 1.8925698066706625e-05, + "loss": 0.4847, + "step": 10932 + }, + { + "epoch": 0.3001922020867655, + "grad_norm": 0.3926428258419037, + "learning_rate": 1.892550331365465e-05, + "loss": 0.515, + "step": 10933 + }, + { + "epoch": 0.300219659527732, + "grad_norm": 0.3826693594455719, + "learning_rate": 1.8925308543953756e-05, + "loss": 0.5745, + "step": 10934 + }, + { + "epoch": 0.3002471169686985, + "grad_norm": 0.455049991607666, + "learning_rate": 1.8925113757604315e-05, + "loss": 0.5463, + "step": 10935 + }, + { + "epoch": 0.30027457440966504, + "grad_norm": 0.4945071041584015, + "learning_rate": 1.892491895460669e-05, + "loss": 0.4937, + "step": 10936 + }, + { + "epoch": 0.30030203185063153, + "grad_norm": 0.36703863739967346, + "learning_rate": 1.892472413496124e-05, + "loss": 0.5464, + "step": 10937 + }, + { + "epoch": 0.30032948929159803, + "grad_norm": 0.33624207973480225, + "learning_rate": 1.8924529298668334e-05, + "loss": 0.4928, + "step": 10938 + }, + { + "epoch": 0.3003569467325645, + "grad_norm": 0.3720129430294037, + "learning_rate": 1.892433444572833e-05, + "loss": 0.526, + "step": 10939 + }, + { + "epoch": 0.300384404173531, + "grad_norm": 0.3663005828857422, + "learning_rate": 1.8924139576141596e-05, + "loss": 0.5699, + "step": 10940 + }, + { + "epoch": 0.3004118616144975, + "grad_norm": 0.4051869809627533, + "learning_rate": 1.8923944689908492e-05, + "loss": 0.6255, + "step": 10941 + }, + { + "epoch": 0.300439319055464, + "grad_norm": 0.3736560046672821, + "learning_rate": 1.8923749787029384e-05, + "loss": 0.5133, + "step": 10942 + }, + { + "epoch": 0.30046677649643055, + "grad_norm": 0.39923352003097534, + "learning_rate": 1.8923554867504633e-05, + "loss": 0.4813, + "step": 10943 + }, + { + "epoch": 0.30049423393739705, + "grad_norm": 0.37185561656951904, + "learning_rate": 1.8923359931334606e-05, + "loss": 0.4301, + "step": 10944 + }, + { + "epoch": 0.30052169137836354, + "grad_norm": 0.37370091676712036, + "learning_rate": 1.8923164978519664e-05, + "loss": 0.5423, + "step": 10945 + }, + { + "epoch": 0.30054914881933004, + "grad_norm": 0.3686252236366272, + "learning_rate": 1.8922970009060172e-05, + "loss": 0.5039, + "step": 10946 + }, + { + "epoch": 0.30057660626029653, + "grad_norm": 0.43140631914138794, + "learning_rate": 1.8922775022956493e-05, + "loss": 0.527, + "step": 10947 + }, + { + "epoch": 0.300604063701263, + "grad_norm": 0.4005013406276703, + "learning_rate": 1.8922580020208986e-05, + "loss": 0.48, + "step": 10948 + }, + { + "epoch": 0.3006315211422295, + "grad_norm": 0.3619464337825775, + "learning_rate": 1.8922385000818026e-05, + "loss": 0.525, + "step": 10949 + }, + { + "epoch": 0.30065897858319607, + "grad_norm": 0.39588168263435364, + "learning_rate": 1.8922189964783967e-05, + "loss": 0.5758, + "step": 10950 + }, + { + "epoch": 0.30068643602416256, + "grad_norm": 0.3503384292125702, + "learning_rate": 1.8921994912107178e-05, + "loss": 0.4646, + "step": 10951 + }, + { + "epoch": 0.30071389346512906, + "grad_norm": 0.4395134150981903, + "learning_rate": 1.8921799842788018e-05, + "loss": 0.5409, + "step": 10952 + }, + { + "epoch": 0.30074135090609555, + "grad_norm": 0.49381551146507263, + "learning_rate": 1.8921604756826855e-05, + "loss": 0.5247, + "step": 10953 + }, + { + "epoch": 0.30076880834706204, + "grad_norm": 0.33386942744255066, + "learning_rate": 1.8921409654224053e-05, + "loss": 0.3821, + "step": 10954 + }, + { + "epoch": 0.30079626578802854, + "grad_norm": 0.39377403259277344, + "learning_rate": 1.8921214534979975e-05, + "loss": 0.5262, + "step": 10955 + }, + { + "epoch": 0.30082372322899503, + "grad_norm": 0.38531485199928284, + "learning_rate": 1.892101939909498e-05, + "loss": 0.5776, + "step": 10956 + }, + { + "epoch": 0.3008511806699616, + "grad_norm": 0.34894055128097534, + "learning_rate": 1.8920824246569444e-05, + "loss": 0.6013, + "step": 10957 + }, + { + "epoch": 0.3008786381109281, + "grad_norm": 0.321336954832077, + "learning_rate": 1.892062907740372e-05, + "loss": 0.4718, + "step": 10958 + }, + { + "epoch": 0.30090609555189457, + "grad_norm": 0.41395363211631775, + "learning_rate": 1.8920433891598172e-05, + "loss": 0.5943, + "step": 10959 + }, + { + "epoch": 0.30093355299286106, + "grad_norm": 0.3755747973918915, + "learning_rate": 1.892023868915317e-05, + "loss": 0.4931, + "step": 10960 + }, + { + "epoch": 0.30096101043382756, + "grad_norm": 0.37217792868614197, + "learning_rate": 1.892004347006908e-05, + "loss": 0.4486, + "step": 10961 + }, + { + "epoch": 0.30098846787479405, + "grad_norm": 0.3231324851512909, + "learning_rate": 1.891984823434626e-05, + "loss": 0.4737, + "step": 10962 + }, + { + "epoch": 0.30101592531576055, + "grad_norm": 0.3688288629055023, + "learning_rate": 1.8919652981985072e-05, + "loss": 0.5913, + "step": 10963 + }, + { + "epoch": 0.3010433827567271, + "grad_norm": 0.34187984466552734, + "learning_rate": 1.891945771298589e-05, + "loss": 0.5196, + "step": 10964 + }, + { + "epoch": 0.3010708401976936, + "grad_norm": 0.3866761326789856, + "learning_rate": 1.8919262427349067e-05, + "loss": 0.5297, + "step": 10965 + }, + { + "epoch": 0.3010982976386601, + "grad_norm": 0.3917812705039978, + "learning_rate": 1.891906712507498e-05, + "loss": 0.6087, + "step": 10966 + }, + { + "epoch": 0.3011257550796266, + "grad_norm": 0.41688334941864014, + "learning_rate": 1.891887180616398e-05, + "loss": 0.4813, + "step": 10967 + }, + { + "epoch": 0.30115321252059307, + "grad_norm": 0.3556533753871918, + "learning_rate": 1.891867647061644e-05, + "loss": 0.4935, + "step": 10968 + }, + { + "epoch": 0.30118066996155957, + "grad_norm": 0.5756800770759583, + "learning_rate": 1.891848111843272e-05, + "loss": 0.4915, + "step": 10969 + }, + { + "epoch": 0.30120812740252606, + "grad_norm": 0.3702562153339386, + "learning_rate": 1.8918285749613185e-05, + "loss": 0.5295, + "step": 10970 + }, + { + "epoch": 0.3012355848434926, + "grad_norm": 0.3238142430782318, + "learning_rate": 1.8918090364158208e-05, + "loss": 0.4898, + "step": 10971 + }, + { + "epoch": 0.3012630422844591, + "grad_norm": 0.37529510259628296, + "learning_rate": 1.8917894962068137e-05, + "loss": 0.4808, + "step": 10972 + }, + { + "epoch": 0.3012904997254256, + "grad_norm": 0.39626359939575195, + "learning_rate": 1.8917699543343352e-05, + "loss": 0.5674, + "step": 10973 + }, + { + "epoch": 0.3013179571663921, + "grad_norm": 0.3661440908908844, + "learning_rate": 1.891750410798421e-05, + "loss": 0.4872, + "step": 10974 + }, + { + "epoch": 0.3013454146073586, + "grad_norm": 0.35855865478515625, + "learning_rate": 1.8917308655991074e-05, + "loss": 0.5679, + "step": 10975 + }, + { + "epoch": 0.3013728720483251, + "grad_norm": 0.3775087296962738, + "learning_rate": 1.8917113187364312e-05, + "loss": 0.5289, + "step": 10976 + }, + { + "epoch": 0.3014003294892916, + "grad_norm": 0.3446570932865143, + "learning_rate": 1.891691770210429e-05, + "loss": 0.5188, + "step": 10977 + }, + { + "epoch": 0.3014277869302581, + "grad_norm": 0.370281457901001, + "learning_rate": 1.891672220021137e-05, + "loss": 0.4836, + "step": 10978 + }, + { + "epoch": 0.3014552443712246, + "grad_norm": 0.354131281375885, + "learning_rate": 1.8916526681685918e-05, + "loss": 0.525, + "step": 10979 + }, + { + "epoch": 0.3014827018121911, + "grad_norm": 0.4400595724582672, + "learning_rate": 1.8916331146528296e-05, + "loss": 0.5939, + "step": 10980 + }, + { + "epoch": 0.3015101592531576, + "grad_norm": 0.36991164088249207, + "learning_rate": 1.891613559473887e-05, + "loss": 0.5561, + "step": 10981 + }, + { + "epoch": 0.3015376166941241, + "grad_norm": 0.4268181324005127, + "learning_rate": 1.8915940026318007e-05, + "loss": 0.533, + "step": 10982 + }, + { + "epoch": 0.3015650741350906, + "grad_norm": 0.37012046575546265, + "learning_rate": 1.8915744441266072e-05, + "loss": 0.5706, + "step": 10983 + }, + { + "epoch": 0.3015925315760571, + "grad_norm": 0.5497652888298035, + "learning_rate": 1.8915548839583425e-05, + "loss": 0.604, + "step": 10984 + }, + { + "epoch": 0.30161998901702364, + "grad_norm": 0.39667990803718567, + "learning_rate": 1.8915353221270437e-05, + "loss": 0.5641, + "step": 10985 + }, + { + "epoch": 0.30164744645799013, + "grad_norm": 0.37480154633522034, + "learning_rate": 1.8915157586327465e-05, + "loss": 0.4555, + "step": 10986 + }, + { + "epoch": 0.3016749038989566, + "grad_norm": 0.4517703056335449, + "learning_rate": 1.8914961934754882e-05, + "loss": 0.5646, + "step": 10987 + }, + { + "epoch": 0.3017023613399231, + "grad_norm": 0.36712831258773804, + "learning_rate": 1.891476626655305e-05, + "loss": 0.4609, + "step": 10988 + }, + { + "epoch": 0.3017298187808896, + "grad_norm": 0.38053327798843384, + "learning_rate": 1.8914570581722334e-05, + "loss": 0.5727, + "step": 10989 + }, + { + "epoch": 0.3017572762218561, + "grad_norm": 0.358246386051178, + "learning_rate": 1.8914374880263095e-05, + "loss": 0.4547, + "step": 10990 + }, + { + "epoch": 0.3017847336628226, + "grad_norm": 0.37945061922073364, + "learning_rate": 1.8914179162175705e-05, + "loss": 0.5111, + "step": 10991 + }, + { + "epoch": 0.30181219110378915, + "grad_norm": 0.44828668236732483, + "learning_rate": 1.8913983427460522e-05, + "loss": 0.5382, + "step": 10992 + }, + { + "epoch": 0.30183964854475565, + "grad_norm": 0.44445618987083435, + "learning_rate": 1.891378767611792e-05, + "loss": 0.5636, + "step": 10993 + }, + { + "epoch": 0.30186710598572214, + "grad_norm": 0.4053727984428406, + "learning_rate": 1.8913591908148258e-05, + "loss": 0.5417, + "step": 10994 + }, + { + "epoch": 0.30189456342668863, + "grad_norm": 0.3573754131793976, + "learning_rate": 1.8913396123551902e-05, + "loss": 0.5857, + "step": 10995 + }, + { + "epoch": 0.3019220208676551, + "grad_norm": 0.35638779401779175, + "learning_rate": 1.8913200322329213e-05, + "loss": 0.4659, + "step": 10996 + }, + { + "epoch": 0.3019494783086216, + "grad_norm": 0.3810458481311798, + "learning_rate": 1.8913004504480566e-05, + "loss": 0.5382, + "step": 10997 + }, + { + "epoch": 0.3019769357495881, + "grad_norm": 0.3639042377471924, + "learning_rate": 1.891280867000632e-05, + "loss": 0.533, + "step": 10998 + }, + { + "epoch": 0.30200439319055467, + "grad_norm": 0.3233224153518677, + "learning_rate": 1.891261281890684e-05, + "loss": 0.4893, + "step": 10999 + }, + { + "epoch": 0.30203185063152116, + "grad_norm": 0.581595242023468, + "learning_rate": 1.891241695118249e-05, + "loss": 0.526, + "step": 11000 + }, + { + "epoch": 0.30205930807248765, + "grad_norm": 0.4037179946899414, + "learning_rate": 1.891222106683364e-05, + "loss": 0.5573, + "step": 11001 + }, + { + "epoch": 0.30208676551345415, + "grad_norm": 0.3376685082912445, + "learning_rate": 1.8912025165860655e-05, + "loss": 0.547, + "step": 11002 + }, + { + "epoch": 0.30211422295442064, + "grad_norm": 0.4141163229942322, + "learning_rate": 1.8911829248263897e-05, + "loss": 0.658, + "step": 11003 + }, + { + "epoch": 0.30214168039538714, + "grad_norm": 0.34919866919517517, + "learning_rate": 1.8911633314043737e-05, + "loss": 0.486, + "step": 11004 + }, + { + "epoch": 0.30216913783635363, + "grad_norm": 0.3765205442905426, + "learning_rate": 1.8911437363200533e-05, + "loss": 0.5295, + "step": 11005 + }, + { + "epoch": 0.3021965952773202, + "grad_norm": 0.3734496831893921, + "learning_rate": 1.8911241395734652e-05, + "loss": 0.4452, + "step": 11006 + }, + { + "epoch": 0.3022240527182867, + "grad_norm": 0.36797529458999634, + "learning_rate": 1.8911045411646464e-05, + "loss": 0.5721, + "step": 11007 + }, + { + "epoch": 0.30225151015925317, + "grad_norm": 0.38844358921051025, + "learning_rate": 1.8910849410936333e-05, + "loss": 0.4141, + "step": 11008 + }, + { + "epoch": 0.30227896760021966, + "grad_norm": 0.3836982846260071, + "learning_rate": 1.8910653393604623e-05, + "loss": 0.539, + "step": 11009 + }, + { + "epoch": 0.30230642504118616, + "grad_norm": 0.3667266070842743, + "learning_rate": 1.89104573596517e-05, + "loss": 0.4171, + "step": 11010 + }, + { + "epoch": 0.30233388248215265, + "grad_norm": 0.3465175926685333, + "learning_rate": 1.8910261309077932e-05, + "loss": 0.5992, + "step": 11011 + }, + { + "epoch": 0.30236133992311914, + "grad_norm": 0.36270904541015625, + "learning_rate": 1.891006524188368e-05, + "loss": 0.5199, + "step": 11012 + }, + { + "epoch": 0.3023887973640857, + "grad_norm": 0.36580193042755127, + "learning_rate": 1.8909869158069313e-05, + "loss": 0.4885, + "step": 11013 + }, + { + "epoch": 0.3024162548050522, + "grad_norm": 0.37753745913505554, + "learning_rate": 1.8909673057635197e-05, + "loss": 0.515, + "step": 11014 + }, + { + "epoch": 0.3024437122460187, + "grad_norm": 0.3852120041847229, + "learning_rate": 1.89094769405817e-05, + "loss": 0.5866, + "step": 11015 + }, + { + "epoch": 0.3024711696869852, + "grad_norm": 0.3899472653865814, + "learning_rate": 1.8909280806909184e-05, + "loss": 0.5152, + "step": 11016 + }, + { + "epoch": 0.30249862712795167, + "grad_norm": 0.38010329008102417, + "learning_rate": 1.8909084656618014e-05, + "loss": 0.4494, + "step": 11017 + }, + { + "epoch": 0.30252608456891816, + "grad_norm": 0.3977871835231781, + "learning_rate": 1.8908888489708556e-05, + "loss": 0.5496, + "step": 11018 + }, + { + "epoch": 0.30255354200988466, + "grad_norm": 0.35185250639915466, + "learning_rate": 1.890869230618118e-05, + "loss": 0.5602, + "step": 11019 + }, + { + "epoch": 0.3025809994508512, + "grad_norm": 0.38584157824516296, + "learning_rate": 1.890849610603625e-05, + "loss": 0.5779, + "step": 11020 + }, + { + "epoch": 0.3026084568918177, + "grad_norm": 0.39722099900245667, + "learning_rate": 1.890829988927413e-05, + "loss": 0.6081, + "step": 11021 + }, + { + "epoch": 0.3026359143327842, + "grad_norm": 0.3683856725692749, + "learning_rate": 1.8908103655895186e-05, + "loss": 0.5024, + "step": 11022 + }, + { + "epoch": 0.3026633717737507, + "grad_norm": 0.4113459885120392, + "learning_rate": 1.890790740589979e-05, + "loss": 0.548, + "step": 11023 + }, + { + "epoch": 0.3026908292147172, + "grad_norm": 0.5018664002418518, + "learning_rate": 1.8907711139288302e-05, + "loss": 0.5726, + "step": 11024 + }, + { + "epoch": 0.3027182866556837, + "grad_norm": 0.37239977717399597, + "learning_rate": 1.8907514856061085e-05, + "loss": 0.5864, + "step": 11025 + }, + { + "epoch": 0.30274574409665017, + "grad_norm": 0.3443738520145416, + "learning_rate": 1.8907318556218514e-05, + "loss": 0.4307, + "step": 11026 + }, + { + "epoch": 0.3027732015376167, + "grad_norm": 0.4101811647415161, + "learning_rate": 1.890712223976095e-05, + "loss": 0.5526, + "step": 11027 + }, + { + "epoch": 0.3028006589785832, + "grad_norm": 0.3058456778526306, + "learning_rate": 1.890692590668876e-05, + "loss": 0.4442, + "step": 11028 + }, + { + "epoch": 0.3028281164195497, + "grad_norm": 0.33990174531936646, + "learning_rate": 1.8906729557002316e-05, + "loss": 0.4946, + "step": 11029 + }, + { + "epoch": 0.3028555738605162, + "grad_norm": 0.3908669352531433, + "learning_rate": 1.8906533190701973e-05, + "loss": 0.4889, + "step": 11030 + }, + { + "epoch": 0.3028830313014827, + "grad_norm": 0.37746283411979675, + "learning_rate": 1.8906336807788102e-05, + "loss": 0.5259, + "step": 11031 + }, + { + "epoch": 0.3029104887424492, + "grad_norm": 0.3542831242084503, + "learning_rate": 1.8906140408261073e-05, + "loss": 0.5853, + "step": 11032 + }, + { + "epoch": 0.3029379461834157, + "grad_norm": 0.35915061831474304, + "learning_rate": 1.890594399212125e-05, + "loss": 0.5319, + "step": 11033 + }, + { + "epoch": 0.30296540362438223, + "grad_norm": 0.449190229177475, + "learning_rate": 1.8905747559368998e-05, + "loss": 0.5418, + "step": 11034 + }, + { + "epoch": 0.30299286106534873, + "grad_norm": 0.3490431606769562, + "learning_rate": 1.8905551110004684e-05, + "loss": 0.512, + "step": 11035 + }, + { + "epoch": 0.3030203185063152, + "grad_norm": 0.35249394178390503, + "learning_rate": 1.8905354644028672e-05, + "loss": 0.4811, + "step": 11036 + }, + { + "epoch": 0.3030477759472817, + "grad_norm": 0.3732965588569641, + "learning_rate": 1.8905158161441337e-05, + "loss": 0.5558, + "step": 11037 + }, + { + "epoch": 0.3030752333882482, + "grad_norm": 0.4014623761177063, + "learning_rate": 1.8904961662243036e-05, + "loss": 0.5062, + "step": 11038 + }, + { + "epoch": 0.3031026908292147, + "grad_norm": 0.4227347671985626, + "learning_rate": 1.8904765146434144e-05, + "loss": 0.5547, + "step": 11039 + }, + { + "epoch": 0.3031301482701812, + "grad_norm": 0.35590022802352905, + "learning_rate": 1.8904568614015018e-05, + "loss": 0.5176, + "step": 11040 + }, + { + "epoch": 0.30315760571114775, + "grad_norm": 0.3649507761001587, + "learning_rate": 1.8904372064986033e-05, + "loss": 0.4598, + "step": 11041 + }, + { + "epoch": 0.30318506315211424, + "grad_norm": 0.4455070495605469, + "learning_rate": 1.8904175499347548e-05, + "loss": 0.5999, + "step": 11042 + }, + { + "epoch": 0.30321252059308074, + "grad_norm": 0.3774770498275757, + "learning_rate": 1.8903978917099937e-05, + "loss": 0.488, + "step": 11043 + }, + { + "epoch": 0.30323997803404723, + "grad_norm": 0.46598726511001587, + "learning_rate": 1.890378231824356e-05, + "loss": 0.5827, + "step": 11044 + }, + { + "epoch": 0.3032674354750137, + "grad_norm": 0.4246653616428375, + "learning_rate": 1.8903585702778793e-05, + "loss": 0.5708, + "step": 11045 + }, + { + "epoch": 0.3032948929159802, + "grad_norm": 0.34432244300842285, + "learning_rate": 1.890338907070599e-05, + "loss": 0.4961, + "step": 11046 + }, + { + "epoch": 0.3033223503569467, + "grad_norm": 0.4035293459892273, + "learning_rate": 1.890319242202553e-05, + "loss": 0.4107, + "step": 11047 + }, + { + "epoch": 0.3033498077979132, + "grad_norm": 0.3767447769641876, + "learning_rate": 1.8902995756737773e-05, + "loss": 0.594, + "step": 11048 + }, + { + "epoch": 0.30337726523887976, + "grad_norm": 0.3230535387992859, + "learning_rate": 1.8902799074843088e-05, + "loss": 0.5147, + "step": 11049 + }, + { + "epoch": 0.30340472267984625, + "grad_norm": 0.39285826683044434, + "learning_rate": 1.890260237634184e-05, + "loss": 0.5682, + "step": 11050 + }, + { + "epoch": 0.30343218012081274, + "grad_norm": 0.6553149223327637, + "learning_rate": 1.89024056612344e-05, + "loss": 0.5417, + "step": 11051 + }, + { + "epoch": 0.30345963756177924, + "grad_norm": 0.3421376049518585, + "learning_rate": 1.890220892952113e-05, + "loss": 0.5252, + "step": 11052 + }, + { + "epoch": 0.30348709500274573, + "grad_norm": 0.353819340467453, + "learning_rate": 1.89020121812024e-05, + "loss": 0.5146, + "step": 11053 + }, + { + "epoch": 0.3035145524437122, + "grad_norm": 0.3898753821849823, + "learning_rate": 1.8901815416278574e-05, + "loss": 0.5325, + "step": 11054 + }, + { + "epoch": 0.3035420098846787, + "grad_norm": 0.4109979569911957, + "learning_rate": 1.8901618634750022e-05, + "loss": 0.5444, + "step": 11055 + }, + { + "epoch": 0.30356946732564527, + "grad_norm": 0.3525926470756531, + "learning_rate": 1.890142183661711e-05, + "loss": 0.555, + "step": 11056 + }, + { + "epoch": 0.30359692476661176, + "grad_norm": 0.3611142337322235, + "learning_rate": 1.8901225021880206e-05, + "loss": 0.5166, + "step": 11057 + }, + { + "epoch": 0.30362438220757826, + "grad_norm": 0.33160778880119324, + "learning_rate": 1.8901028190539676e-05, + "loss": 0.5397, + "step": 11058 + }, + { + "epoch": 0.30365183964854475, + "grad_norm": 0.398023784160614, + "learning_rate": 1.8900831342595888e-05, + "loss": 0.6044, + "step": 11059 + }, + { + "epoch": 0.30367929708951125, + "grad_norm": 0.38794273138046265, + "learning_rate": 1.890063447804921e-05, + "loss": 0.6014, + "step": 11060 + }, + { + "epoch": 0.30370675453047774, + "grad_norm": 0.3740580677986145, + "learning_rate": 1.890043759690001e-05, + "loss": 0.4537, + "step": 11061 + }, + { + "epoch": 0.30373421197144423, + "grad_norm": 0.4302629828453064, + "learning_rate": 1.8900240699148647e-05, + "loss": 0.5907, + "step": 11062 + }, + { + "epoch": 0.3037616694124108, + "grad_norm": 0.34816381335258484, + "learning_rate": 1.8900043784795493e-05, + "loss": 0.4908, + "step": 11063 + }, + { + "epoch": 0.3037891268533773, + "grad_norm": 0.34268495440483093, + "learning_rate": 1.8899846853840924e-05, + "loss": 0.5366, + "step": 11064 + }, + { + "epoch": 0.30381658429434377, + "grad_norm": 0.39991295337677, + "learning_rate": 1.8899649906285296e-05, + "loss": 0.5373, + "step": 11065 + }, + { + "epoch": 0.30384404173531027, + "grad_norm": 0.38865283131599426, + "learning_rate": 1.8899452942128983e-05, + "loss": 0.5386, + "step": 11066 + }, + { + "epoch": 0.30387149917627676, + "grad_norm": 0.3438073992729187, + "learning_rate": 1.8899255961372347e-05, + "loss": 0.5009, + "step": 11067 + }, + { + "epoch": 0.30389895661724325, + "grad_norm": 0.410197913646698, + "learning_rate": 1.8899058964015758e-05, + "loss": 0.5667, + "step": 11068 + }, + { + "epoch": 0.30392641405820975, + "grad_norm": 0.37694212794303894, + "learning_rate": 1.8898861950059587e-05, + "loss": 0.5719, + "step": 11069 + }, + { + "epoch": 0.3039538714991763, + "grad_norm": 0.37923651933670044, + "learning_rate": 1.8898664919504196e-05, + "loss": 0.4865, + "step": 11070 + }, + { + "epoch": 0.3039813289401428, + "grad_norm": 0.3996421992778778, + "learning_rate": 1.8898467872349957e-05, + "loss": 0.5514, + "step": 11071 + }, + { + "epoch": 0.3040087863811093, + "grad_norm": 0.35641753673553467, + "learning_rate": 1.8898270808597234e-05, + "loss": 0.4709, + "step": 11072 + }, + { + "epoch": 0.3040362438220758, + "grad_norm": 0.4131092131137848, + "learning_rate": 1.8898073728246396e-05, + "loss": 0.5852, + "step": 11073 + }, + { + "epoch": 0.3040637012630423, + "grad_norm": 0.39850255846977234, + "learning_rate": 1.889787663129781e-05, + "loss": 0.548, + "step": 11074 + }, + { + "epoch": 0.30409115870400877, + "grad_norm": 0.37634116411209106, + "learning_rate": 1.8897679517751846e-05, + "loss": 0.5494, + "step": 11075 + }, + { + "epoch": 0.30411861614497526, + "grad_norm": 0.3412899971008301, + "learning_rate": 1.8897482387608867e-05, + "loss": 0.5105, + "step": 11076 + }, + { + "epoch": 0.3041460735859418, + "grad_norm": 0.5330919623374939, + "learning_rate": 1.8897285240869247e-05, + "loss": 0.4889, + "step": 11077 + }, + { + "epoch": 0.3041735310269083, + "grad_norm": 0.5215820670127869, + "learning_rate": 1.8897088077533347e-05, + "loss": 0.622, + "step": 11078 + }, + { + "epoch": 0.3042009884678748, + "grad_norm": 0.39739474654197693, + "learning_rate": 1.8896890897601537e-05, + "loss": 0.5734, + "step": 11079 + }, + { + "epoch": 0.3042284459088413, + "grad_norm": 0.35337233543395996, + "learning_rate": 1.889669370107419e-05, + "loss": 0.4498, + "step": 11080 + }, + { + "epoch": 0.3042559033498078, + "grad_norm": 0.3757399320602417, + "learning_rate": 1.889649648795167e-05, + "loss": 0.5759, + "step": 11081 + }, + { + "epoch": 0.3042833607907743, + "grad_norm": 0.3337714672088623, + "learning_rate": 1.8896299258234343e-05, + "loss": 0.5123, + "step": 11082 + }, + { + "epoch": 0.3043108182317408, + "grad_norm": 0.33044418692588806, + "learning_rate": 1.889610201192258e-05, + "loss": 0.4714, + "step": 11083 + }, + { + "epoch": 0.3043382756727073, + "grad_norm": 0.541510820388794, + "learning_rate": 1.8895904749016745e-05, + "loss": 0.5748, + "step": 11084 + }, + { + "epoch": 0.3043657331136738, + "grad_norm": 0.35813772678375244, + "learning_rate": 1.889570746951721e-05, + "loss": 0.4871, + "step": 11085 + }, + { + "epoch": 0.3043931905546403, + "grad_norm": 0.39763695001602173, + "learning_rate": 1.8895510173424337e-05, + "loss": 0.5201, + "step": 11086 + }, + { + "epoch": 0.3044206479956068, + "grad_norm": 0.35128843784332275, + "learning_rate": 1.8895312860738504e-05, + "loss": 0.4265, + "step": 11087 + }, + { + "epoch": 0.3044481054365733, + "grad_norm": 0.3703915476799011, + "learning_rate": 1.889511553146007e-05, + "loss": 0.5842, + "step": 11088 + }, + { + "epoch": 0.3044755628775398, + "grad_norm": 0.41767123341560364, + "learning_rate": 1.8894918185589408e-05, + "loss": 0.527, + "step": 11089 + }, + { + "epoch": 0.3045030203185063, + "grad_norm": 0.460068017244339, + "learning_rate": 1.8894720823126888e-05, + "loss": 0.4644, + "step": 11090 + }, + { + "epoch": 0.30453047775947284, + "grad_norm": 0.37693119049072266, + "learning_rate": 1.889452344407287e-05, + "loss": 0.4477, + "step": 11091 + }, + { + "epoch": 0.30455793520043933, + "grad_norm": 0.32962101697921753, + "learning_rate": 1.889432604842773e-05, + "loss": 0.4901, + "step": 11092 + }, + { + "epoch": 0.3045853926414058, + "grad_norm": 1.7100961208343506, + "learning_rate": 1.889412863619183e-05, + "loss": 0.4982, + "step": 11093 + }, + { + "epoch": 0.3046128500823723, + "grad_norm": 0.3305343985557556, + "learning_rate": 1.8893931207365545e-05, + "loss": 0.4847, + "step": 11094 + }, + { + "epoch": 0.3046403075233388, + "grad_norm": 0.4166306257247925, + "learning_rate": 1.8893733761949235e-05, + "loss": 0.6074, + "step": 11095 + }, + { + "epoch": 0.3046677649643053, + "grad_norm": 0.4021270275115967, + "learning_rate": 1.8893536299943277e-05, + "loss": 0.5879, + "step": 11096 + }, + { + "epoch": 0.3046952224052718, + "grad_norm": 0.3765951097011566, + "learning_rate": 1.8893338821348033e-05, + "loss": 0.5089, + "step": 11097 + }, + { + "epoch": 0.30472267984623835, + "grad_norm": 0.315895140171051, + "learning_rate": 1.8893141326163876e-05, + "loss": 0.4404, + "step": 11098 + }, + { + "epoch": 0.30475013728720485, + "grad_norm": 0.36457934975624084, + "learning_rate": 1.8892943814391168e-05, + "loss": 0.4795, + "step": 11099 + }, + { + "epoch": 0.30477759472817134, + "grad_norm": 0.36460238695144653, + "learning_rate": 1.889274628603029e-05, + "loss": 0.58, + "step": 11100 + }, + { + "epoch": 0.30480505216913784, + "grad_norm": 0.3470749855041504, + "learning_rate": 1.889254874108159e-05, + "loss": 0.4698, + "step": 11101 + }, + { + "epoch": 0.30483250961010433, + "grad_norm": 0.35653284192085266, + "learning_rate": 1.8892351179545457e-05, + "loss": 0.5692, + "step": 11102 + }, + { + "epoch": 0.3048599670510708, + "grad_norm": 0.33791258931159973, + "learning_rate": 1.889215360142225e-05, + "loss": 0.5446, + "step": 11103 + }, + { + "epoch": 0.3048874244920373, + "grad_norm": 0.34571146965026855, + "learning_rate": 1.8891956006712337e-05, + "loss": 0.5253, + "step": 11104 + }, + { + "epoch": 0.30491488193300387, + "grad_norm": 0.37521734833717346, + "learning_rate": 1.8891758395416087e-05, + "loss": 0.5717, + "step": 11105 + }, + { + "epoch": 0.30494233937397036, + "grad_norm": 0.39167213439941406, + "learning_rate": 1.889156076753387e-05, + "loss": 0.4981, + "step": 11106 + }, + { + "epoch": 0.30496979681493686, + "grad_norm": 0.41422390937805176, + "learning_rate": 1.8891363123066056e-05, + "loss": 0.5136, + "step": 11107 + }, + { + "epoch": 0.30499725425590335, + "grad_norm": 0.3901185393333435, + "learning_rate": 1.8891165462013014e-05, + "loss": 0.5542, + "step": 11108 + }, + { + "epoch": 0.30502471169686984, + "grad_norm": 0.38954511284828186, + "learning_rate": 1.8890967784375104e-05, + "loss": 0.5531, + "step": 11109 + }, + { + "epoch": 0.30505216913783634, + "grad_norm": 0.3441867232322693, + "learning_rate": 1.8890770090152705e-05, + "loss": 0.5321, + "step": 11110 + }, + { + "epoch": 0.30507962657880283, + "grad_norm": 0.39199697971343994, + "learning_rate": 1.8890572379346184e-05, + "loss": 0.3829, + "step": 11111 + }, + { + "epoch": 0.3051070840197694, + "grad_norm": 1.3695906400680542, + "learning_rate": 1.8890374651955906e-05, + "loss": 0.4986, + "step": 11112 + }, + { + "epoch": 0.3051345414607359, + "grad_norm": 0.3585394322872162, + "learning_rate": 1.889017690798224e-05, + "loss": 0.5481, + "step": 11113 + }, + { + "epoch": 0.30516199890170237, + "grad_norm": 0.3653968870639801, + "learning_rate": 1.8889979147425562e-05, + "loss": 0.5329, + "step": 11114 + }, + { + "epoch": 0.30518945634266886, + "grad_norm": 0.33642831444740295, + "learning_rate": 1.888978137028623e-05, + "loss": 0.4556, + "step": 11115 + }, + { + "epoch": 0.30521691378363536, + "grad_norm": 0.48646461963653564, + "learning_rate": 1.888958357656462e-05, + "loss": 0.4926, + "step": 11116 + }, + { + "epoch": 0.30524437122460185, + "grad_norm": 0.3697434067726135, + "learning_rate": 1.88893857662611e-05, + "loss": 0.4893, + "step": 11117 + }, + { + "epoch": 0.30527182866556835, + "grad_norm": 0.3651670217514038, + "learning_rate": 1.8889187939376042e-05, + "loss": 0.5702, + "step": 11118 + }, + { + "epoch": 0.3052992861065349, + "grad_norm": 0.40896478295326233, + "learning_rate": 1.8888990095909806e-05, + "loss": 0.5925, + "step": 11119 + }, + { + "epoch": 0.3053267435475014, + "grad_norm": 0.3947643041610718, + "learning_rate": 1.888879223586277e-05, + "loss": 0.5668, + "step": 11120 + }, + { + "epoch": 0.3053542009884679, + "grad_norm": 0.37182289361953735, + "learning_rate": 1.8888594359235297e-05, + "loss": 0.4636, + "step": 11121 + }, + { + "epoch": 0.3053816584294344, + "grad_norm": 0.3330807685852051, + "learning_rate": 1.888839646602776e-05, + "loss": 0.4599, + "step": 11122 + }, + { + "epoch": 0.30540911587040087, + "grad_norm": 0.34839746356010437, + "learning_rate": 1.8888198556240527e-05, + "loss": 0.5, + "step": 11123 + }, + { + "epoch": 0.30543657331136737, + "grad_norm": 0.39751002192497253, + "learning_rate": 1.8888000629873967e-05, + "loss": 0.5283, + "step": 11124 + }, + { + "epoch": 0.30546403075233386, + "grad_norm": 0.8811100125312805, + "learning_rate": 1.8887802686928448e-05, + "loss": 0.5671, + "step": 11125 + }, + { + "epoch": 0.3054914881933004, + "grad_norm": 0.3810620605945587, + "learning_rate": 1.888760472740434e-05, + "loss": 0.4879, + "step": 11126 + }, + { + "epoch": 0.3055189456342669, + "grad_norm": 0.32690519094467163, + "learning_rate": 1.8887406751302013e-05, + "loss": 0.4237, + "step": 11127 + }, + { + "epoch": 0.3055464030752334, + "grad_norm": 0.3620862662792206, + "learning_rate": 1.8887208758621837e-05, + "loss": 0.6174, + "step": 11128 + }, + { + "epoch": 0.3055738605161999, + "grad_norm": 0.42098695039749146, + "learning_rate": 1.8887010749364178e-05, + "loss": 0.5735, + "step": 11129 + }, + { + "epoch": 0.3056013179571664, + "grad_norm": 0.33709877729415894, + "learning_rate": 1.8886812723529407e-05, + "loss": 0.4568, + "step": 11130 + }, + { + "epoch": 0.3056287753981329, + "grad_norm": 0.3997587561607361, + "learning_rate": 1.88866146811179e-05, + "loss": 0.6112, + "step": 11131 + }, + { + "epoch": 0.3056562328390994, + "grad_norm": 0.3535623550415039, + "learning_rate": 1.8886416622130012e-05, + "loss": 0.5472, + "step": 11132 + }, + { + "epoch": 0.3056836902800659, + "grad_norm": 0.343872994184494, + "learning_rate": 1.8886218546566127e-05, + "loss": 0.4998, + "step": 11133 + }, + { + "epoch": 0.3057111477210324, + "grad_norm": 0.35378050804138184, + "learning_rate": 1.8886020454426606e-05, + "loss": 0.4997, + "step": 11134 + }, + { + "epoch": 0.3057386051619989, + "grad_norm": 0.36605924367904663, + "learning_rate": 1.8885822345711823e-05, + "loss": 0.5724, + "step": 11135 + }, + { + "epoch": 0.3057660626029654, + "grad_norm": 0.3024701476097107, + "learning_rate": 1.888562422042214e-05, + "loss": 0.441, + "step": 11136 + }, + { + "epoch": 0.3057935200439319, + "grad_norm": 0.338835746049881, + "learning_rate": 1.8885426078557937e-05, + "loss": 0.5089, + "step": 11137 + }, + { + "epoch": 0.3058209774848984, + "grad_norm": 0.342124879360199, + "learning_rate": 1.8885227920119575e-05, + "loss": 0.487, + "step": 11138 + }, + { + "epoch": 0.3058484349258649, + "grad_norm": 0.3488984704017639, + "learning_rate": 1.888502974510743e-05, + "loss": 0.5064, + "step": 11139 + }, + { + "epoch": 0.30587589236683144, + "grad_norm": 0.34910279512405396, + "learning_rate": 1.8884831553521866e-05, + "loss": 0.3722, + "step": 11140 + }, + { + "epoch": 0.30590334980779793, + "grad_norm": 0.36274996399879456, + "learning_rate": 1.8884633345363257e-05, + "loss": 0.4789, + "step": 11141 + }, + { + "epoch": 0.3059308072487644, + "grad_norm": 0.4109959304332733, + "learning_rate": 1.888443512063197e-05, + "loss": 0.5024, + "step": 11142 + }, + { + "epoch": 0.3059582646897309, + "grad_norm": 0.4690670073032379, + "learning_rate": 1.888423687932838e-05, + "loss": 0.5341, + "step": 11143 + }, + { + "epoch": 0.3059857221306974, + "grad_norm": 0.3583936393260956, + "learning_rate": 1.888403862145285e-05, + "loss": 0.4822, + "step": 11144 + }, + { + "epoch": 0.3060131795716639, + "grad_norm": 0.4393978416919708, + "learning_rate": 1.888384034700575e-05, + "loss": 0.4841, + "step": 11145 + }, + { + "epoch": 0.3060406370126304, + "grad_norm": 0.3791782855987549, + "learning_rate": 1.8883642055987453e-05, + "loss": 0.5396, + "step": 11146 + }, + { + "epoch": 0.30606809445359695, + "grad_norm": 0.385439932346344, + "learning_rate": 1.8883443748398333e-05, + "loss": 0.5263, + "step": 11147 + }, + { + "epoch": 0.30609555189456344, + "grad_norm": 0.39708036184310913, + "learning_rate": 1.888324542423875e-05, + "loss": 0.5413, + "step": 11148 + }, + { + "epoch": 0.30612300933552994, + "grad_norm": 0.43442943692207336, + "learning_rate": 1.888304708350908e-05, + "loss": 0.5313, + "step": 11149 + }, + { + "epoch": 0.30615046677649643, + "grad_norm": 0.43197867274284363, + "learning_rate": 1.8882848726209694e-05, + "loss": 0.5251, + "step": 11150 + }, + { + "epoch": 0.3061779242174629, + "grad_norm": 0.35540616512298584, + "learning_rate": 1.888265035234096e-05, + "loss": 0.5658, + "step": 11151 + }, + { + "epoch": 0.3062053816584294, + "grad_norm": 0.4087935984134674, + "learning_rate": 1.8882451961903246e-05, + "loss": 0.5364, + "step": 11152 + }, + { + "epoch": 0.3062328390993959, + "grad_norm": 0.3958073556423187, + "learning_rate": 1.8882253554896927e-05, + "loss": 0.5635, + "step": 11153 + }, + { + "epoch": 0.30626029654036246, + "grad_norm": 0.35070890188217163, + "learning_rate": 1.8882055131322365e-05, + "loss": 0.5101, + "step": 11154 + }, + { + "epoch": 0.30628775398132896, + "grad_norm": 0.39489981532096863, + "learning_rate": 1.888185669117994e-05, + "loss": 0.5703, + "step": 11155 + }, + { + "epoch": 0.30631521142229545, + "grad_norm": 0.38177090883255005, + "learning_rate": 1.8881658234470017e-05, + "loss": 0.483, + "step": 11156 + }, + { + "epoch": 0.30634266886326195, + "grad_norm": 0.5147358775138855, + "learning_rate": 1.8881459761192965e-05, + "loss": 0.5852, + "step": 11157 + }, + { + "epoch": 0.30637012630422844, + "grad_norm": 0.40596964955329895, + "learning_rate": 1.8881261271349158e-05, + "loss": 0.5576, + "step": 11158 + }, + { + "epoch": 0.30639758374519493, + "grad_norm": 0.3992031514644623, + "learning_rate": 1.8881062764938965e-05, + "loss": 0.5265, + "step": 11159 + }, + { + "epoch": 0.30642504118616143, + "grad_norm": 1.1233546733856201, + "learning_rate": 1.888086424196275e-05, + "loss": 0.4586, + "step": 11160 + }, + { + "epoch": 0.306452498627128, + "grad_norm": 0.3356042504310608, + "learning_rate": 1.8880665702420894e-05, + "loss": 0.4778, + "step": 11161 + }, + { + "epoch": 0.3064799560680945, + "grad_norm": 0.3586128056049347, + "learning_rate": 1.888046714631376e-05, + "loss": 0.5609, + "step": 11162 + }, + { + "epoch": 0.30650741350906097, + "grad_norm": 0.4246090352535248, + "learning_rate": 1.8880268573641722e-05, + "loss": 0.5529, + "step": 11163 + }, + { + "epoch": 0.30653487095002746, + "grad_norm": 0.35237622261047363, + "learning_rate": 1.8880069984405148e-05, + "loss": 0.4341, + "step": 11164 + }, + { + "epoch": 0.30656232839099395, + "grad_norm": 0.41939783096313477, + "learning_rate": 1.8879871378604408e-05, + "loss": 0.6026, + "step": 11165 + }, + { + "epoch": 0.30658978583196045, + "grad_norm": 0.39945438504219055, + "learning_rate": 1.8879672756239873e-05, + "loss": 0.5128, + "step": 11166 + }, + { + "epoch": 0.30661724327292694, + "grad_norm": 0.411832720041275, + "learning_rate": 1.887947411731192e-05, + "loss": 0.5703, + "step": 11167 + }, + { + "epoch": 0.3066447007138935, + "grad_norm": 0.3555002808570862, + "learning_rate": 1.887927546182091e-05, + "loss": 0.5469, + "step": 11168 + }, + { + "epoch": 0.30667215815486, + "grad_norm": 0.39220160245895386, + "learning_rate": 1.8879076789767216e-05, + "loss": 0.547, + "step": 11169 + }, + { + "epoch": 0.3066996155958265, + "grad_norm": 0.3668062686920166, + "learning_rate": 1.887887810115121e-05, + "loss": 0.4923, + "step": 11170 + }, + { + "epoch": 0.306727073036793, + "grad_norm": 0.3531438112258911, + "learning_rate": 1.887867939597326e-05, + "loss": 0.4498, + "step": 11171 + }, + { + "epoch": 0.30675453047775947, + "grad_norm": 0.3432660400867462, + "learning_rate": 1.8878480674233744e-05, + "loss": 0.459, + "step": 11172 + }, + { + "epoch": 0.30678198791872596, + "grad_norm": 0.3698910176753998, + "learning_rate": 1.887828193593303e-05, + "loss": 0.509, + "step": 11173 + }, + { + "epoch": 0.30680944535969246, + "grad_norm": 0.39957156777381897, + "learning_rate": 1.887808318107148e-05, + "loss": 0.5989, + "step": 11174 + }, + { + "epoch": 0.306836902800659, + "grad_norm": 0.3847670555114746, + "learning_rate": 1.8877884409649478e-05, + "loss": 0.5319, + "step": 11175 + }, + { + "epoch": 0.3068643602416255, + "grad_norm": 0.41158926486968994, + "learning_rate": 1.8877685621667383e-05, + "loss": 0.621, + "step": 11176 + }, + { + "epoch": 0.306891817682592, + "grad_norm": 0.3625026345252991, + "learning_rate": 1.8877486817125574e-05, + "loss": 0.5486, + "step": 11177 + }, + { + "epoch": 0.3069192751235585, + "grad_norm": 0.3941892087459564, + "learning_rate": 1.8877287996024417e-05, + "loss": 0.6189, + "step": 11178 + }, + { + "epoch": 0.306946732564525, + "grad_norm": 0.3573720157146454, + "learning_rate": 1.8877089158364284e-05, + "loss": 0.5053, + "step": 11179 + }, + { + "epoch": 0.3069741900054915, + "grad_norm": 0.51397705078125, + "learning_rate": 1.8876890304145548e-05, + "loss": 0.4986, + "step": 11180 + }, + { + "epoch": 0.30700164744645797, + "grad_norm": 0.3304443359375, + "learning_rate": 1.887669143336858e-05, + "loss": 0.4895, + "step": 11181 + }, + { + "epoch": 0.30702910488742446, + "grad_norm": 0.40529879927635193, + "learning_rate": 1.887649254603374e-05, + "loss": 0.5615, + "step": 11182 + }, + { + "epoch": 0.307056562328391, + "grad_norm": 0.45701003074645996, + "learning_rate": 1.887629364214142e-05, + "loss": 0.5666, + "step": 11183 + }, + { + "epoch": 0.3070840197693575, + "grad_norm": 0.385613352060318, + "learning_rate": 1.8876094721691974e-05, + "loss": 0.5635, + "step": 11184 + }, + { + "epoch": 0.307111477210324, + "grad_norm": 0.3333829939365387, + "learning_rate": 1.887589578468578e-05, + "loss": 0.5487, + "step": 11185 + }, + { + "epoch": 0.3071389346512905, + "grad_norm": 0.3847090005874634, + "learning_rate": 1.8875696831123207e-05, + "loss": 0.5398, + "step": 11186 + }, + { + "epoch": 0.307166392092257, + "grad_norm": 0.37340474128723145, + "learning_rate": 1.8875497861004625e-05, + "loss": 0.4973, + "step": 11187 + }, + { + "epoch": 0.3071938495332235, + "grad_norm": 0.36711013317108154, + "learning_rate": 1.8875298874330407e-05, + "loss": 0.5712, + "step": 11188 + }, + { + "epoch": 0.30722130697419, + "grad_norm": 0.36542782187461853, + "learning_rate": 1.8875099871100926e-05, + "loss": 0.4953, + "step": 11189 + }, + { + "epoch": 0.30724876441515653, + "grad_norm": 0.3626367449760437, + "learning_rate": 1.887490085131655e-05, + "loss": 0.578, + "step": 11190 + }, + { + "epoch": 0.307276221856123, + "grad_norm": 0.45987555384635925, + "learning_rate": 1.8874701814977653e-05, + "loss": 0.5178, + "step": 11191 + }, + { + "epoch": 0.3073036792970895, + "grad_norm": 0.3902856409549713, + "learning_rate": 1.88745027620846e-05, + "loss": 0.5393, + "step": 11192 + }, + { + "epoch": 0.307331136738056, + "grad_norm": 0.42272672057151794, + "learning_rate": 1.8874303692637772e-05, + "loss": 0.5869, + "step": 11193 + }, + { + "epoch": 0.3073585941790225, + "grad_norm": 0.3236834704875946, + "learning_rate": 1.8874104606637533e-05, + "loss": 0.4965, + "step": 11194 + }, + { + "epoch": 0.307386051619989, + "grad_norm": 0.3777744174003601, + "learning_rate": 1.8873905504084255e-05, + "loss": 0.5244, + "step": 11195 + }, + { + "epoch": 0.3074135090609555, + "grad_norm": 0.42487698793411255, + "learning_rate": 1.8873706384978313e-05, + "loss": 0.5869, + "step": 11196 + }, + { + "epoch": 0.30744096650192204, + "grad_norm": 0.3544144630432129, + "learning_rate": 1.887350724932008e-05, + "loss": 0.467, + "step": 11197 + }, + { + "epoch": 0.30746842394288854, + "grad_norm": 0.33189520239830017, + "learning_rate": 1.8873308097109918e-05, + "loss": 0.5268, + "step": 11198 + }, + { + "epoch": 0.30749588138385503, + "grad_norm": 0.36669474840164185, + "learning_rate": 1.8873108928348208e-05, + "loss": 0.5442, + "step": 11199 + }, + { + "epoch": 0.3075233388248215, + "grad_norm": 0.413105309009552, + "learning_rate": 1.8872909743035314e-05, + "loss": 0.4661, + "step": 11200 + }, + { + "epoch": 0.307550796265788, + "grad_norm": 0.37067943811416626, + "learning_rate": 1.8872710541171614e-05, + "loss": 0.5275, + "step": 11201 + }, + { + "epoch": 0.3075782537067545, + "grad_norm": 0.38456183671951294, + "learning_rate": 1.887251132275748e-05, + "loss": 0.5076, + "step": 11202 + }, + { + "epoch": 0.307605711147721, + "grad_norm": 0.34157684445381165, + "learning_rate": 1.8872312087793276e-05, + "loss": 0.5025, + "step": 11203 + }, + { + "epoch": 0.30763316858868756, + "grad_norm": 0.3774910569190979, + "learning_rate": 1.887211283627938e-05, + "loss": 0.5331, + "step": 11204 + }, + { + "epoch": 0.30766062602965405, + "grad_norm": 0.37343159317970276, + "learning_rate": 1.887191356821616e-05, + "loss": 0.5279, + "step": 11205 + }, + { + "epoch": 0.30768808347062054, + "grad_norm": 0.3930940628051758, + "learning_rate": 1.887171428360399e-05, + "loss": 0.5325, + "step": 11206 + }, + { + "epoch": 0.30771554091158704, + "grad_norm": 0.5612218976020813, + "learning_rate": 1.8871514982443245e-05, + "loss": 0.5521, + "step": 11207 + }, + { + "epoch": 0.30774299835255353, + "grad_norm": 0.35316982865333557, + "learning_rate": 1.887131566473429e-05, + "loss": 0.5041, + "step": 11208 + }, + { + "epoch": 0.30777045579352, + "grad_norm": 0.38150864839553833, + "learning_rate": 1.8871116330477504e-05, + "loss": 0.4917, + "step": 11209 + }, + { + "epoch": 0.3077979132344865, + "grad_norm": 0.38072529435157776, + "learning_rate": 1.8870916979673248e-05, + "loss": 0.5502, + "step": 11210 + }, + { + "epoch": 0.30782537067545307, + "grad_norm": 0.45102939009666443, + "learning_rate": 1.8870717612321905e-05, + "loss": 0.5282, + "step": 11211 + }, + { + "epoch": 0.30785282811641956, + "grad_norm": 0.337655246257782, + "learning_rate": 1.887051822842384e-05, + "loss": 0.5143, + "step": 11212 + }, + { + "epoch": 0.30788028555738606, + "grad_norm": 0.3924558460712433, + "learning_rate": 1.887031882797943e-05, + "loss": 0.582, + "step": 11213 + }, + { + "epoch": 0.30790774299835255, + "grad_norm": 0.41195330023765564, + "learning_rate": 1.8870119410989046e-05, + "loss": 0.5792, + "step": 11214 + }, + { + "epoch": 0.30793520043931905, + "grad_norm": 0.4164257049560547, + "learning_rate": 1.8869919977453052e-05, + "loss": 0.5024, + "step": 11215 + }, + { + "epoch": 0.30796265788028554, + "grad_norm": 0.42760002613067627, + "learning_rate": 1.8869720527371832e-05, + "loss": 0.4942, + "step": 11216 + }, + { + "epoch": 0.30799011532125203, + "grad_norm": 0.3968799412250519, + "learning_rate": 1.886952106074575e-05, + "loss": 0.5398, + "step": 11217 + }, + { + "epoch": 0.3080175727622186, + "grad_norm": 0.4029574990272522, + "learning_rate": 1.8869321577575178e-05, + "loss": 0.4926, + "step": 11218 + }, + { + "epoch": 0.3080450302031851, + "grad_norm": 0.31256774067878723, + "learning_rate": 1.8869122077860497e-05, + "loss": 0.5423, + "step": 11219 + }, + { + "epoch": 0.30807248764415157, + "grad_norm": 0.3690100908279419, + "learning_rate": 1.8868922561602067e-05, + "loss": 0.5483, + "step": 11220 + }, + { + "epoch": 0.30809994508511807, + "grad_norm": 0.37646380066871643, + "learning_rate": 1.8868723028800263e-05, + "loss": 0.5305, + "step": 11221 + }, + { + "epoch": 0.30812740252608456, + "grad_norm": 0.3178396224975586, + "learning_rate": 1.8868523479455468e-05, + "loss": 0.5369, + "step": 11222 + }, + { + "epoch": 0.30815485996705105, + "grad_norm": 0.3572700321674347, + "learning_rate": 1.8868323913568042e-05, + "loss": 0.5059, + "step": 11223 + }, + { + "epoch": 0.30818231740801755, + "grad_norm": 0.3217736780643463, + "learning_rate": 1.886812433113836e-05, + "loss": 0.4716, + "step": 11224 + }, + { + "epoch": 0.3082097748489841, + "grad_norm": 0.36267292499542236, + "learning_rate": 1.88679247321668e-05, + "loss": 0.4975, + "step": 11225 + }, + { + "epoch": 0.3082372322899506, + "grad_norm": 0.40382349491119385, + "learning_rate": 1.8867725116653727e-05, + "loss": 0.5417, + "step": 11226 + }, + { + "epoch": 0.3082646897309171, + "grad_norm": 0.3571723699569702, + "learning_rate": 1.886752548459952e-05, + "loss": 0.5127, + "step": 11227 + }, + { + "epoch": 0.3082921471718836, + "grad_norm": 0.41572263836860657, + "learning_rate": 1.886732583600454e-05, + "loss": 0.472, + "step": 11228 + }, + { + "epoch": 0.3083196046128501, + "grad_norm": 0.40711382031440735, + "learning_rate": 1.8867126170869174e-05, + "loss": 0.5332, + "step": 11229 + }, + { + "epoch": 0.30834706205381657, + "grad_norm": 0.36496198177337646, + "learning_rate": 1.8866926489193784e-05, + "loss": 0.5124, + "step": 11230 + }, + { + "epoch": 0.30837451949478306, + "grad_norm": 0.326542466878891, + "learning_rate": 1.8866726790978748e-05, + "loss": 0.4243, + "step": 11231 + }, + { + "epoch": 0.3084019769357496, + "grad_norm": 0.3426162302494049, + "learning_rate": 1.8866527076224438e-05, + "loss": 0.4704, + "step": 11232 + }, + { + "epoch": 0.3084294343767161, + "grad_norm": 0.35072818398475647, + "learning_rate": 1.8866327344931223e-05, + "loss": 0.4827, + "step": 11233 + }, + { + "epoch": 0.3084568918176826, + "grad_norm": 0.4028296172618866, + "learning_rate": 1.886612759709948e-05, + "loss": 0.5892, + "step": 11234 + }, + { + "epoch": 0.3084843492586491, + "grad_norm": 0.3248579502105713, + "learning_rate": 1.8865927832729575e-05, + "loss": 0.5464, + "step": 11235 + }, + { + "epoch": 0.3085118066996156, + "grad_norm": 0.40313732624053955, + "learning_rate": 1.8865728051821887e-05, + "loss": 0.5848, + "step": 11236 + }, + { + "epoch": 0.3085392641405821, + "grad_norm": 0.3550894260406494, + "learning_rate": 1.886552825437679e-05, + "loss": 0.5547, + "step": 11237 + }, + { + "epoch": 0.3085667215815486, + "grad_norm": 0.345862478017807, + "learning_rate": 1.886532844039465e-05, + "loss": 0.4932, + "step": 11238 + }, + { + "epoch": 0.3085941790225151, + "grad_norm": 0.33761218190193176, + "learning_rate": 1.886512860987584e-05, + "loss": 0.5217, + "step": 11239 + }, + { + "epoch": 0.3086216364634816, + "grad_norm": 0.34675318002700806, + "learning_rate": 1.886492876282074e-05, + "loss": 0.5321, + "step": 11240 + }, + { + "epoch": 0.3086490939044481, + "grad_norm": 0.36701714992523193, + "learning_rate": 1.886472889922972e-05, + "loss": 0.4906, + "step": 11241 + }, + { + "epoch": 0.3086765513454146, + "grad_norm": 0.3599221706390381, + "learning_rate": 1.886452901910315e-05, + "loss": 0.455, + "step": 11242 + }, + { + "epoch": 0.3087040087863811, + "grad_norm": 0.37764397263526917, + "learning_rate": 1.8864329122441402e-05, + "loss": 0.5288, + "step": 11243 + }, + { + "epoch": 0.3087314662273476, + "grad_norm": 0.35613590478897095, + "learning_rate": 1.8864129209244853e-05, + "loss": 0.5536, + "step": 11244 + }, + { + "epoch": 0.3087589236683141, + "grad_norm": 0.4068291485309601, + "learning_rate": 1.886392927951387e-05, + "loss": 0.4589, + "step": 11245 + }, + { + "epoch": 0.30878638110928064, + "grad_norm": 0.3485753536224365, + "learning_rate": 1.8863729333248836e-05, + "loss": 0.5517, + "step": 11246 + }, + { + "epoch": 0.30881383855024713, + "grad_norm": 0.3644098937511444, + "learning_rate": 1.886352937045011e-05, + "loss": 0.4773, + "step": 11247 + }, + { + "epoch": 0.3088412959912136, + "grad_norm": 0.38053247332572937, + "learning_rate": 1.886332939111808e-05, + "loss": 0.498, + "step": 11248 + }, + { + "epoch": 0.3088687534321801, + "grad_norm": 0.3748336136341095, + "learning_rate": 1.8863129395253107e-05, + "loss": 0.5071, + "step": 11249 + }, + { + "epoch": 0.3088962108731466, + "grad_norm": 0.4131118953227997, + "learning_rate": 1.8862929382855573e-05, + "loss": 0.5111, + "step": 11250 + }, + { + "epoch": 0.3089236683141131, + "grad_norm": 0.3669385612010956, + "learning_rate": 1.8862729353925847e-05, + "loss": 0.5315, + "step": 11251 + }, + { + "epoch": 0.3089511257550796, + "grad_norm": 0.3807041645050049, + "learning_rate": 1.88625293084643e-05, + "loss": 0.5202, + "step": 11252 + }, + { + "epoch": 0.30897858319604615, + "grad_norm": 0.4254024624824524, + "learning_rate": 1.8862329246471307e-05, + "loss": 0.4682, + "step": 11253 + }, + { + "epoch": 0.30900604063701265, + "grad_norm": 0.38423192501068115, + "learning_rate": 1.886212916794724e-05, + "loss": 0.5197, + "step": 11254 + }, + { + "epoch": 0.30903349807797914, + "grad_norm": 0.34807705879211426, + "learning_rate": 1.8861929072892476e-05, + "loss": 0.5323, + "step": 11255 + }, + { + "epoch": 0.30906095551894563, + "grad_norm": 0.3598158657550812, + "learning_rate": 1.8861728961307385e-05, + "loss": 0.5579, + "step": 11256 + }, + { + "epoch": 0.30908841295991213, + "grad_norm": 0.3874618709087372, + "learning_rate": 1.8861528833192343e-05, + "loss": 0.5748, + "step": 11257 + }, + { + "epoch": 0.3091158704008786, + "grad_norm": 0.37065351009368896, + "learning_rate": 1.886132868854772e-05, + "loss": 0.5575, + "step": 11258 + }, + { + "epoch": 0.3091433278418451, + "grad_norm": 0.4391917288303375, + "learning_rate": 1.8861128527373892e-05, + "loss": 0.5372, + "step": 11259 + }, + { + "epoch": 0.30917078528281167, + "grad_norm": 0.3859999179840088, + "learning_rate": 1.8860928349671227e-05, + "loss": 0.5407, + "step": 11260 + }, + { + "epoch": 0.30919824272377816, + "grad_norm": 0.35364970564842224, + "learning_rate": 1.8860728155440108e-05, + "loss": 0.5285, + "step": 11261 + }, + { + "epoch": 0.30922570016474465, + "grad_norm": 0.35805779695510864, + "learning_rate": 1.8860527944680898e-05, + "loss": 0.5052, + "step": 11262 + }, + { + "epoch": 0.30925315760571115, + "grad_norm": 0.3966748118400574, + "learning_rate": 1.886032771739398e-05, + "loss": 0.5184, + "step": 11263 + }, + { + "epoch": 0.30928061504667764, + "grad_norm": 0.38174790143966675, + "learning_rate": 1.8860127473579722e-05, + "loss": 0.5191, + "step": 11264 + }, + { + "epoch": 0.30930807248764414, + "grad_norm": 0.44568976759910583, + "learning_rate": 1.8859927213238497e-05, + "loss": 0.4984, + "step": 11265 + }, + { + "epoch": 0.30933552992861063, + "grad_norm": 0.36715659499168396, + "learning_rate": 1.885972693637068e-05, + "loss": 0.5263, + "step": 11266 + }, + { + "epoch": 0.3093629873695772, + "grad_norm": 0.5756145715713501, + "learning_rate": 1.885952664297664e-05, + "loss": 0.5074, + "step": 11267 + }, + { + "epoch": 0.3093904448105437, + "grad_norm": 0.3872334063053131, + "learning_rate": 1.8859326333056765e-05, + "loss": 0.5283, + "step": 11268 + }, + { + "epoch": 0.30941790225151017, + "grad_norm": 0.3908112943172455, + "learning_rate": 1.8859126006611412e-05, + "loss": 0.5216, + "step": 11269 + }, + { + "epoch": 0.30944535969247666, + "grad_norm": 0.3831113576889038, + "learning_rate": 1.8858925663640965e-05, + "loss": 0.5663, + "step": 11270 + }, + { + "epoch": 0.30947281713344316, + "grad_norm": 0.5934312343597412, + "learning_rate": 1.8858725304145792e-05, + "loss": 0.5449, + "step": 11271 + }, + { + "epoch": 0.30950027457440965, + "grad_norm": 0.3202603757381439, + "learning_rate": 1.8858524928126267e-05, + "loss": 0.451, + "step": 11272 + }, + { + "epoch": 0.30952773201537614, + "grad_norm": 0.3631031811237335, + "learning_rate": 1.885832453558277e-05, + "loss": 0.5633, + "step": 11273 + }, + { + "epoch": 0.3095551894563427, + "grad_norm": 0.32394886016845703, + "learning_rate": 1.8858124126515667e-05, + "loss": 0.4832, + "step": 11274 + }, + { + "epoch": 0.3095826468973092, + "grad_norm": 0.37036362290382385, + "learning_rate": 1.885792370092534e-05, + "loss": 0.5431, + "step": 11275 + }, + { + "epoch": 0.3096101043382757, + "grad_norm": 0.3637445569038391, + "learning_rate": 1.8857723258812154e-05, + "loss": 0.5992, + "step": 11276 + }, + { + "epoch": 0.3096375617792422, + "grad_norm": 0.3149133622646332, + "learning_rate": 1.8857522800176488e-05, + "loss": 0.5549, + "step": 11277 + }, + { + "epoch": 0.30966501922020867, + "grad_norm": 0.3982038199901581, + "learning_rate": 1.8857322325018715e-05, + "loss": 0.4847, + "step": 11278 + }, + { + "epoch": 0.30969247666117516, + "grad_norm": 0.41919785737991333, + "learning_rate": 1.885712183333921e-05, + "loss": 0.5599, + "step": 11279 + }, + { + "epoch": 0.30971993410214166, + "grad_norm": 0.37685608863830566, + "learning_rate": 1.8856921325138343e-05, + "loss": 0.5277, + "step": 11280 + }, + { + "epoch": 0.3097473915431082, + "grad_norm": 0.4007309377193451, + "learning_rate": 1.8856720800416496e-05, + "loss": 0.5105, + "step": 11281 + }, + { + "epoch": 0.3097748489840747, + "grad_norm": 0.36570093035697937, + "learning_rate": 1.8856520259174033e-05, + "loss": 0.5713, + "step": 11282 + }, + { + "epoch": 0.3098023064250412, + "grad_norm": 0.38156914710998535, + "learning_rate": 1.8856319701411334e-05, + "loss": 0.5557, + "step": 11283 + }, + { + "epoch": 0.3098297638660077, + "grad_norm": 0.3536161482334137, + "learning_rate": 1.8856119127128772e-05, + "loss": 0.4971, + "step": 11284 + }, + { + "epoch": 0.3098572213069742, + "grad_norm": 0.34560626745224, + "learning_rate": 1.8855918536326727e-05, + "loss": 0.5117, + "step": 11285 + }, + { + "epoch": 0.3098846787479407, + "grad_norm": 0.4105583727359772, + "learning_rate": 1.885571792900556e-05, + "loss": 0.6173, + "step": 11286 + }, + { + "epoch": 0.3099121361889072, + "grad_norm": 0.3999376893043518, + "learning_rate": 1.8855517305165655e-05, + "loss": 0.5745, + "step": 11287 + }, + { + "epoch": 0.3099395936298737, + "grad_norm": 0.3814598321914673, + "learning_rate": 1.8855316664807388e-05, + "loss": 0.53, + "step": 11288 + }, + { + "epoch": 0.3099670510708402, + "grad_norm": 0.37982991337776184, + "learning_rate": 1.8855116007931124e-05, + "loss": 0.6337, + "step": 11289 + }, + { + "epoch": 0.3099945085118067, + "grad_norm": 0.3733052611351013, + "learning_rate": 1.8854915334537244e-05, + "loss": 0.6006, + "step": 11290 + }, + { + "epoch": 0.3100219659527732, + "grad_norm": 0.32917851209640503, + "learning_rate": 1.885471464462612e-05, + "loss": 0.4561, + "step": 11291 + }, + { + "epoch": 0.3100494233937397, + "grad_norm": 0.3965912461280823, + "learning_rate": 1.8854513938198127e-05, + "loss": 0.546, + "step": 11292 + }, + { + "epoch": 0.3100768808347062, + "grad_norm": 0.4211497902870178, + "learning_rate": 1.8854313215253643e-05, + "loss": 0.6284, + "step": 11293 + }, + { + "epoch": 0.3101043382756727, + "grad_norm": 0.3517448902130127, + "learning_rate": 1.8854112475793036e-05, + "loss": 0.4975, + "step": 11294 + }, + { + "epoch": 0.31013179571663924, + "grad_norm": 0.357208251953125, + "learning_rate": 1.8853911719816682e-05, + "loss": 0.603, + "step": 11295 + }, + { + "epoch": 0.31015925315760573, + "grad_norm": 0.40067464113235474, + "learning_rate": 1.8853710947324958e-05, + "loss": 0.6465, + "step": 11296 + }, + { + "epoch": 0.3101867105985722, + "grad_norm": 0.34183788299560547, + "learning_rate": 1.8853510158318236e-05, + "loss": 0.4746, + "step": 11297 + }, + { + "epoch": 0.3102141680395387, + "grad_norm": 0.36647069454193115, + "learning_rate": 1.8853309352796896e-05, + "loss": 0.538, + "step": 11298 + }, + { + "epoch": 0.3102416254805052, + "grad_norm": 0.3190484046936035, + "learning_rate": 1.8853108530761303e-05, + "loss": 0.4935, + "step": 11299 + }, + { + "epoch": 0.3102690829214717, + "grad_norm": 0.32953938841819763, + "learning_rate": 1.885290769221184e-05, + "loss": 0.5298, + "step": 11300 + }, + { + "epoch": 0.3102965403624382, + "grad_norm": 0.3689562678337097, + "learning_rate": 1.885270683714888e-05, + "loss": 0.5438, + "step": 11301 + }, + { + "epoch": 0.31032399780340475, + "grad_norm": 0.3849407136440277, + "learning_rate": 1.8852505965572792e-05, + "loss": 0.5533, + "step": 11302 + }, + { + "epoch": 0.31035145524437124, + "grad_norm": 0.41696494817733765, + "learning_rate": 1.8852305077483958e-05, + "loss": 0.5171, + "step": 11303 + }, + { + "epoch": 0.31037891268533774, + "grad_norm": 0.42613956332206726, + "learning_rate": 1.885210417288275e-05, + "loss": 0.6309, + "step": 11304 + }, + { + "epoch": 0.31040637012630423, + "grad_norm": 0.3716728687286377, + "learning_rate": 1.8851903251769542e-05, + "loss": 0.5493, + "step": 11305 + }, + { + "epoch": 0.3104338275672707, + "grad_norm": 0.39843443036079407, + "learning_rate": 1.885170231414471e-05, + "loss": 0.525, + "step": 11306 + }, + { + "epoch": 0.3104612850082372, + "grad_norm": 0.3518000543117523, + "learning_rate": 1.8851501360008627e-05, + "loss": 0.5194, + "step": 11307 + }, + { + "epoch": 0.3104887424492037, + "grad_norm": 0.31618738174438477, + "learning_rate": 1.885130038936167e-05, + "loss": 0.4513, + "step": 11308 + }, + { + "epoch": 0.31051619989017026, + "grad_norm": 0.39830586314201355, + "learning_rate": 1.885109940220421e-05, + "loss": 0.5385, + "step": 11309 + }, + { + "epoch": 0.31054365733113676, + "grad_norm": 0.39882344007492065, + "learning_rate": 1.885089839853663e-05, + "loss": 0.611, + "step": 11310 + }, + { + "epoch": 0.31057111477210325, + "grad_norm": 0.47808486223220825, + "learning_rate": 1.8850697378359295e-05, + "loss": 0.5672, + "step": 11311 + }, + { + "epoch": 0.31059857221306975, + "grad_norm": 0.34714749455451965, + "learning_rate": 1.8850496341672583e-05, + "loss": 0.5815, + "step": 11312 + }, + { + "epoch": 0.31062602965403624, + "grad_norm": 0.3725500702857971, + "learning_rate": 1.8850295288476875e-05, + "loss": 0.5125, + "step": 11313 + }, + { + "epoch": 0.31065348709500273, + "grad_norm": 0.36777591705322266, + "learning_rate": 1.8850094218772538e-05, + "loss": 0.5715, + "step": 11314 + }, + { + "epoch": 0.31068094453596923, + "grad_norm": 0.36089763045310974, + "learning_rate": 1.8849893132559956e-05, + "loss": 0.5491, + "step": 11315 + }, + { + "epoch": 0.3107084019769357, + "grad_norm": 0.3788428008556366, + "learning_rate": 1.8849692029839492e-05, + "loss": 0.6035, + "step": 11316 + }, + { + "epoch": 0.31073585941790227, + "grad_norm": 0.36409837007522583, + "learning_rate": 1.884949091061153e-05, + "loss": 0.5395, + "step": 11317 + }, + { + "epoch": 0.31076331685886877, + "grad_norm": 0.4296551048755646, + "learning_rate": 1.8849289774876446e-05, + "loss": 0.5723, + "step": 11318 + }, + { + "epoch": 0.31079077429983526, + "grad_norm": 0.35167181491851807, + "learning_rate": 1.884908862263461e-05, + "loss": 0.4639, + "step": 11319 + }, + { + "epoch": 0.31081823174080175, + "grad_norm": 0.6806081533432007, + "learning_rate": 1.88488874538864e-05, + "loss": 0.4543, + "step": 11320 + }, + { + "epoch": 0.31084568918176825, + "grad_norm": 0.3366139829158783, + "learning_rate": 1.8848686268632193e-05, + "loss": 0.5027, + "step": 11321 + }, + { + "epoch": 0.31087314662273474, + "grad_norm": 0.3681115210056305, + "learning_rate": 1.8848485066872357e-05, + "loss": 0.5811, + "step": 11322 + }, + { + "epoch": 0.31090060406370124, + "grad_norm": 0.3778879642486572, + "learning_rate": 1.8848283848607277e-05, + "loss": 0.5513, + "step": 11323 + }, + { + "epoch": 0.3109280615046678, + "grad_norm": 0.33004075288772583, + "learning_rate": 1.884808261383732e-05, + "loss": 0.5232, + "step": 11324 + }, + { + "epoch": 0.3109555189456343, + "grad_norm": 0.3816484808921814, + "learning_rate": 1.884788136256287e-05, + "loss": 0.5295, + "step": 11325 + }, + { + "epoch": 0.3109829763866008, + "grad_norm": 0.3728027939796448, + "learning_rate": 1.8847680094784292e-05, + "loss": 0.5481, + "step": 11326 + }, + { + "epoch": 0.31101043382756727, + "grad_norm": 0.3555505871772766, + "learning_rate": 1.8847478810501963e-05, + "loss": 0.5436, + "step": 11327 + }, + { + "epoch": 0.31103789126853376, + "grad_norm": 0.331566721200943, + "learning_rate": 1.884727750971627e-05, + "loss": 0.4482, + "step": 11328 + }, + { + "epoch": 0.31106534870950026, + "grad_norm": 0.40008875727653503, + "learning_rate": 1.884707619242758e-05, + "loss": 0.5492, + "step": 11329 + }, + { + "epoch": 0.31109280615046675, + "grad_norm": 0.4424271583557129, + "learning_rate": 1.8846874858636265e-05, + "loss": 0.5452, + "step": 11330 + }, + { + "epoch": 0.3111202635914333, + "grad_norm": 0.35575130581855774, + "learning_rate": 1.884667350834271e-05, + "loss": 0.5009, + "step": 11331 + }, + { + "epoch": 0.3111477210323998, + "grad_norm": 0.3749183416366577, + "learning_rate": 1.884647214154728e-05, + "loss": 0.4858, + "step": 11332 + }, + { + "epoch": 0.3111751784733663, + "grad_norm": 0.40534406900405884, + "learning_rate": 1.884627075825036e-05, + "loss": 0.5064, + "step": 11333 + }, + { + "epoch": 0.3112026359143328, + "grad_norm": 0.326568603515625, + "learning_rate": 1.884606935845232e-05, + "loss": 0.4694, + "step": 11334 + }, + { + "epoch": 0.3112300933552993, + "grad_norm": 0.40226808190345764, + "learning_rate": 1.8845867942153536e-05, + "loss": 0.58, + "step": 11335 + }, + { + "epoch": 0.31125755079626577, + "grad_norm": 0.351814329624176, + "learning_rate": 1.8845666509354388e-05, + "loss": 0.5317, + "step": 11336 + }, + { + "epoch": 0.31128500823723226, + "grad_norm": 0.3486435115337372, + "learning_rate": 1.8845465060055244e-05, + "loss": 0.5216, + "step": 11337 + }, + { + "epoch": 0.3113124656781988, + "grad_norm": 0.34122416377067566, + "learning_rate": 1.8845263594256492e-05, + "loss": 0.5282, + "step": 11338 + }, + { + "epoch": 0.3113399231191653, + "grad_norm": 0.36832940578460693, + "learning_rate": 1.8845062111958494e-05, + "loss": 0.5048, + "step": 11339 + }, + { + "epoch": 0.3113673805601318, + "grad_norm": 0.38114285469055176, + "learning_rate": 1.8844860613161635e-05, + "loss": 0.5293, + "step": 11340 + }, + { + "epoch": 0.3113948380010983, + "grad_norm": 0.3649303913116455, + "learning_rate": 1.884465909786629e-05, + "loss": 0.4974, + "step": 11341 + }, + { + "epoch": 0.3114222954420648, + "grad_norm": 0.36032944917678833, + "learning_rate": 1.8844457566072828e-05, + "loss": 0.5473, + "step": 11342 + }, + { + "epoch": 0.3114497528830313, + "grad_norm": 0.3570484519004822, + "learning_rate": 1.8844256017781632e-05, + "loss": 0.5896, + "step": 11343 + }, + { + "epoch": 0.3114772103239978, + "grad_norm": 0.4493335783481598, + "learning_rate": 1.8844054452993073e-05, + "loss": 0.4676, + "step": 11344 + }, + { + "epoch": 0.3115046677649643, + "grad_norm": 0.37627431750297546, + "learning_rate": 1.8843852871707535e-05, + "loss": 0.5348, + "step": 11345 + }, + { + "epoch": 0.3115321252059308, + "grad_norm": 0.3497869074344635, + "learning_rate": 1.8843651273925386e-05, + "loss": 0.4898, + "step": 11346 + }, + { + "epoch": 0.3115595826468973, + "grad_norm": 0.4168960452079773, + "learning_rate": 1.8843449659647003e-05, + "loss": 0.6214, + "step": 11347 + }, + { + "epoch": 0.3115870400878638, + "grad_norm": 0.3855307698249817, + "learning_rate": 1.884324802887277e-05, + "loss": 0.5214, + "step": 11348 + }, + { + "epoch": 0.3116144975288303, + "grad_norm": 0.37278813123703003, + "learning_rate": 1.8843046381603052e-05, + "loss": 0.5448, + "step": 11349 + }, + { + "epoch": 0.3116419549697968, + "grad_norm": 0.3956211507320404, + "learning_rate": 1.8842844717838233e-05, + "loss": 0.5961, + "step": 11350 + }, + { + "epoch": 0.3116694124107633, + "grad_norm": 0.3878605365753174, + "learning_rate": 1.8842643037578684e-05, + "loss": 0.5669, + "step": 11351 + }, + { + "epoch": 0.31169686985172984, + "grad_norm": 0.3865380883216858, + "learning_rate": 1.884244134082478e-05, + "loss": 0.6094, + "step": 11352 + }, + { + "epoch": 0.31172432729269633, + "grad_norm": 0.3722621500492096, + "learning_rate": 1.8842239627576906e-05, + "loss": 0.4518, + "step": 11353 + }, + { + "epoch": 0.31175178473366283, + "grad_norm": 0.40808913111686707, + "learning_rate": 1.8842037897835434e-05, + "loss": 0.5379, + "step": 11354 + }, + { + "epoch": 0.3117792421746293, + "grad_norm": 0.4044474959373474, + "learning_rate": 1.8841836151600735e-05, + "loss": 0.4585, + "step": 11355 + }, + { + "epoch": 0.3118066996155958, + "grad_norm": 0.3958859145641327, + "learning_rate": 1.884163438887319e-05, + "loss": 0.4874, + "step": 11356 + }, + { + "epoch": 0.3118341570565623, + "grad_norm": 0.39973965287208557, + "learning_rate": 1.884143260965318e-05, + "loss": 0.6342, + "step": 11357 + }, + { + "epoch": 0.3118616144975288, + "grad_norm": 0.32400909066200256, + "learning_rate": 1.8841230813941068e-05, + "loss": 0.4811, + "step": 11358 + }, + { + "epoch": 0.31188907193849535, + "grad_norm": 0.36785465478897095, + "learning_rate": 1.8841029001737245e-05, + "loss": 0.5556, + "step": 11359 + }, + { + "epoch": 0.31191652937946185, + "grad_norm": 0.39711031317710876, + "learning_rate": 1.884082717304208e-05, + "loss": 0.5149, + "step": 11360 + }, + { + "epoch": 0.31194398682042834, + "grad_norm": 0.3844723105430603, + "learning_rate": 1.884062532785595e-05, + "loss": 0.529, + "step": 11361 + }, + { + "epoch": 0.31197144426139484, + "grad_norm": 0.6045128703117371, + "learning_rate": 1.8840423466179232e-05, + "loss": 0.5163, + "step": 11362 + }, + { + "epoch": 0.31199890170236133, + "grad_norm": 0.41016659140586853, + "learning_rate": 1.8840221588012305e-05, + "loss": 0.551, + "step": 11363 + }, + { + "epoch": 0.3120263591433278, + "grad_norm": 0.37000808119773865, + "learning_rate": 1.8840019693355544e-05, + "loss": 0.4864, + "step": 11364 + }, + { + "epoch": 0.3120538165842943, + "grad_norm": 0.36425498127937317, + "learning_rate": 1.8839817782209318e-05, + "loss": 0.5363, + "step": 11365 + }, + { + "epoch": 0.31208127402526087, + "grad_norm": 0.4120878577232361, + "learning_rate": 1.8839615854574017e-05, + "loss": 0.5579, + "step": 11366 + }, + { + "epoch": 0.31210873146622736, + "grad_norm": 0.5323432683944702, + "learning_rate": 1.883941391045001e-05, + "loss": 0.5898, + "step": 11367 + }, + { + "epoch": 0.31213618890719386, + "grad_norm": 0.46722346544265747, + "learning_rate": 1.883921194983767e-05, + "loss": 0.4565, + "step": 11368 + }, + { + "epoch": 0.31216364634816035, + "grad_norm": 0.3680703639984131, + "learning_rate": 1.8839009972737385e-05, + "loss": 0.5351, + "step": 11369 + }, + { + "epoch": 0.31219110378912684, + "grad_norm": 0.40063047409057617, + "learning_rate": 1.883880797914952e-05, + "loss": 0.5632, + "step": 11370 + }, + { + "epoch": 0.31221856123009334, + "grad_norm": 0.3955143094062805, + "learning_rate": 1.883860596907446e-05, + "loss": 0.52, + "step": 11371 + }, + { + "epoch": 0.31224601867105983, + "grad_norm": 0.3987671434879303, + "learning_rate": 1.883840394251258e-05, + "loss": 0.5044, + "step": 11372 + }, + { + "epoch": 0.3122734761120264, + "grad_norm": 0.38556650280952454, + "learning_rate": 1.8838201899464254e-05, + "loss": 0.5977, + "step": 11373 + }, + { + "epoch": 0.3123009335529929, + "grad_norm": 0.3640457093715668, + "learning_rate": 1.883799983992986e-05, + "loss": 0.5737, + "step": 11374 + }, + { + "epoch": 0.31232839099395937, + "grad_norm": 0.3726956248283386, + "learning_rate": 1.883779776390978e-05, + "loss": 0.5028, + "step": 11375 + }, + { + "epoch": 0.31235584843492586, + "grad_norm": 0.44557851552963257, + "learning_rate": 1.8837595671404383e-05, + "loss": 0.4355, + "step": 11376 + }, + { + "epoch": 0.31238330587589236, + "grad_norm": 0.40030235052108765, + "learning_rate": 1.883739356241405e-05, + "loss": 0.5378, + "step": 11377 + }, + { + "epoch": 0.31241076331685885, + "grad_norm": 0.3414601981639862, + "learning_rate": 1.8837191436939156e-05, + "loss": 0.524, + "step": 11378 + }, + { + "epoch": 0.31243822075782535, + "grad_norm": 0.34316831827163696, + "learning_rate": 1.883698929498008e-05, + "loss": 0.5561, + "step": 11379 + }, + { + "epoch": 0.3124656781987919, + "grad_norm": 0.3822277784347534, + "learning_rate": 1.8836787136537198e-05, + "loss": 0.4425, + "step": 11380 + }, + { + "epoch": 0.3124931356397584, + "grad_norm": 0.3973458707332611, + "learning_rate": 1.883658496161089e-05, + "loss": 0.4705, + "step": 11381 + }, + { + "epoch": 0.3125205930807249, + "grad_norm": 0.3669137954711914, + "learning_rate": 1.883638277020153e-05, + "loss": 0.4851, + "step": 11382 + }, + { + "epoch": 0.3125480505216914, + "grad_norm": 0.3308241367340088, + "learning_rate": 1.8836180562309493e-05, + "loss": 0.5117, + "step": 11383 + }, + { + "epoch": 0.3125755079626579, + "grad_norm": 0.32334885001182556, + "learning_rate": 1.8835978337935162e-05, + "loss": 0.5222, + "step": 11384 + }, + { + "epoch": 0.31260296540362437, + "grad_norm": 0.4367953836917877, + "learning_rate": 1.8835776097078908e-05, + "loss": 0.5632, + "step": 11385 + }, + { + "epoch": 0.31263042284459086, + "grad_norm": 0.37910184264183044, + "learning_rate": 1.8835573839741114e-05, + "loss": 0.567, + "step": 11386 + }, + { + "epoch": 0.3126578802855574, + "grad_norm": 0.35174256563186646, + "learning_rate": 1.8835371565922156e-05, + "loss": 0.5443, + "step": 11387 + }, + { + "epoch": 0.3126853377265239, + "grad_norm": 0.3539165258407593, + "learning_rate": 1.883516927562241e-05, + "loss": 0.5147, + "step": 11388 + }, + { + "epoch": 0.3127127951674904, + "grad_norm": 0.3502107858657837, + "learning_rate": 1.8834966968842254e-05, + "loss": 0.5224, + "step": 11389 + }, + { + "epoch": 0.3127402526084569, + "grad_norm": 0.3474768400192261, + "learning_rate": 1.883476464558206e-05, + "loss": 0.5394, + "step": 11390 + }, + { + "epoch": 0.3127677100494234, + "grad_norm": 0.36897218227386475, + "learning_rate": 1.8834562305842215e-05, + "loss": 0.5068, + "step": 11391 + }, + { + "epoch": 0.3127951674903899, + "grad_norm": 0.3595658540725708, + "learning_rate": 1.8834359949623093e-05, + "loss": 0.5523, + "step": 11392 + }, + { + "epoch": 0.3128226249313564, + "grad_norm": 0.3529592454433441, + "learning_rate": 1.8834157576925066e-05, + "loss": 0.5351, + "step": 11393 + }, + { + "epoch": 0.3128500823723229, + "grad_norm": 0.40112701058387756, + "learning_rate": 1.8833955187748515e-05, + "loss": 0.5219, + "step": 11394 + }, + { + "epoch": 0.3128775398132894, + "grad_norm": 0.41050034761428833, + "learning_rate": 1.883375278209382e-05, + "loss": 0.5125, + "step": 11395 + }, + { + "epoch": 0.3129049972542559, + "grad_norm": 0.42023107409477234, + "learning_rate": 1.8833550359961354e-05, + "loss": 0.5497, + "step": 11396 + }, + { + "epoch": 0.3129324546952224, + "grad_norm": 0.3478147089481354, + "learning_rate": 1.8833347921351503e-05, + "loss": 0.4126, + "step": 11397 + }, + { + "epoch": 0.3129599121361889, + "grad_norm": 0.410381555557251, + "learning_rate": 1.883314546626463e-05, + "loss": 0.5235, + "step": 11398 + }, + { + "epoch": 0.3129873695771554, + "grad_norm": 0.39227548241615295, + "learning_rate": 1.883294299470113e-05, + "loss": 0.5471, + "step": 11399 + }, + { + "epoch": 0.3130148270181219, + "grad_norm": 0.42469561100006104, + "learning_rate": 1.883274050666137e-05, + "loss": 0.5337, + "step": 11400 + }, + { + "epoch": 0.31304228445908844, + "grad_norm": 0.4026385545730591, + "learning_rate": 1.8832538002145728e-05, + "loss": 0.4831, + "step": 11401 + }, + { + "epoch": 0.31306974190005493, + "grad_norm": 0.3808736801147461, + "learning_rate": 1.8832335481154587e-05, + "loss": 0.5374, + "step": 11402 + }, + { + "epoch": 0.3130971993410214, + "grad_norm": 0.35645681619644165, + "learning_rate": 1.8832132943688317e-05, + "loss": 0.5334, + "step": 11403 + }, + { + "epoch": 0.3131246567819879, + "grad_norm": 0.6723828911781311, + "learning_rate": 1.8831930389747303e-05, + "loss": 0.4953, + "step": 11404 + }, + { + "epoch": 0.3131521142229544, + "grad_norm": 0.33795520663261414, + "learning_rate": 1.883172781933192e-05, + "loss": 0.3935, + "step": 11405 + }, + { + "epoch": 0.3131795716639209, + "grad_norm": 0.3502257168292999, + "learning_rate": 1.8831525232442547e-05, + "loss": 0.5126, + "step": 11406 + }, + { + "epoch": 0.3132070291048874, + "grad_norm": 0.3785855174064636, + "learning_rate": 1.883132262907956e-05, + "loss": 0.5388, + "step": 11407 + }, + { + "epoch": 0.31323448654585395, + "grad_norm": 0.3328665494918823, + "learning_rate": 1.8831120009243333e-05, + "loss": 0.4335, + "step": 11408 + }, + { + "epoch": 0.31326194398682045, + "grad_norm": 0.46332070231437683, + "learning_rate": 1.883091737293425e-05, + "loss": 0.5587, + "step": 11409 + }, + { + "epoch": 0.31328940142778694, + "grad_norm": 0.3294747769832611, + "learning_rate": 1.8830714720152695e-05, + "loss": 0.4678, + "step": 11410 + }, + { + "epoch": 0.31331685886875343, + "grad_norm": 0.3508578836917877, + "learning_rate": 1.883051205089903e-05, + "loss": 0.5501, + "step": 11411 + }, + { + "epoch": 0.31334431630971993, + "grad_norm": 0.38605642318725586, + "learning_rate": 1.8830309365173643e-05, + "loss": 0.4804, + "step": 11412 + }, + { + "epoch": 0.3133717737506864, + "grad_norm": 0.3724783658981323, + "learning_rate": 1.8830106662976914e-05, + "loss": 0.5251, + "step": 11413 + }, + { + "epoch": 0.3133992311916529, + "grad_norm": 0.36035382747650146, + "learning_rate": 1.882990394430922e-05, + "loss": 0.5377, + "step": 11414 + }, + { + "epoch": 0.31342668863261947, + "grad_norm": 0.3796662986278534, + "learning_rate": 1.882970120917093e-05, + "loss": 0.4843, + "step": 11415 + }, + { + "epoch": 0.31345414607358596, + "grad_norm": 0.35763317346572876, + "learning_rate": 1.8829498457562435e-05, + "loss": 0.4254, + "step": 11416 + }, + { + "epoch": 0.31348160351455245, + "grad_norm": 0.36443617939949036, + "learning_rate": 1.8829295689484103e-05, + "loss": 0.5082, + "step": 11417 + }, + { + "epoch": 0.31350906095551895, + "grad_norm": 0.42130762338638306, + "learning_rate": 1.882909290493632e-05, + "loss": 0.5953, + "step": 11418 + }, + { + "epoch": 0.31353651839648544, + "grad_norm": 0.487576425075531, + "learning_rate": 1.8828890103919458e-05, + "loss": 0.601, + "step": 11419 + }, + { + "epoch": 0.31356397583745194, + "grad_norm": 0.3987968862056732, + "learning_rate": 1.88286872864339e-05, + "loss": 0.5735, + "step": 11420 + }, + { + "epoch": 0.31359143327841843, + "grad_norm": 0.4013846814632416, + "learning_rate": 1.8828484452480024e-05, + "loss": 0.5936, + "step": 11421 + }, + { + "epoch": 0.313618890719385, + "grad_norm": 0.41978880763053894, + "learning_rate": 1.8828281602058204e-05, + "loss": 0.5468, + "step": 11422 + }, + { + "epoch": 0.3136463481603515, + "grad_norm": 0.3428100645542145, + "learning_rate": 1.8828078735168823e-05, + "loss": 0.5743, + "step": 11423 + }, + { + "epoch": 0.31367380560131797, + "grad_norm": 0.36299219727516174, + "learning_rate": 1.8827875851812254e-05, + "loss": 0.4606, + "step": 11424 + }, + { + "epoch": 0.31370126304228446, + "grad_norm": 0.4280633330345154, + "learning_rate": 1.882767295198888e-05, + "loss": 0.608, + "step": 11425 + }, + { + "epoch": 0.31372872048325096, + "grad_norm": 0.32518595457077026, + "learning_rate": 1.882747003569908e-05, + "loss": 0.472, + "step": 11426 + }, + { + "epoch": 0.31375617792421745, + "grad_norm": 0.4326891005039215, + "learning_rate": 1.8827267102943233e-05, + "loss": 0.5997, + "step": 11427 + }, + { + "epoch": 0.31378363536518394, + "grad_norm": 0.3928942382335663, + "learning_rate": 1.8827064153721714e-05, + "loss": 0.5238, + "step": 11428 + }, + { + "epoch": 0.3138110928061505, + "grad_norm": 0.3610036373138428, + "learning_rate": 1.8826861188034898e-05, + "loss": 0.4697, + "step": 11429 + }, + { + "epoch": 0.313838550247117, + "grad_norm": 0.37714314460754395, + "learning_rate": 1.8826658205883178e-05, + "loss": 0.539, + "step": 11430 + }, + { + "epoch": 0.3138660076880835, + "grad_norm": 0.4490905702114105, + "learning_rate": 1.8826455207266916e-05, + "loss": 0.578, + "step": 11431 + }, + { + "epoch": 0.31389346512905, + "grad_norm": 0.38836225867271423, + "learning_rate": 1.8826252192186502e-05, + "loss": 0.6084, + "step": 11432 + }, + { + "epoch": 0.31392092257001647, + "grad_norm": 0.36520203948020935, + "learning_rate": 1.882604916064231e-05, + "loss": 0.5578, + "step": 11433 + }, + { + "epoch": 0.31394838001098296, + "grad_norm": 0.36134952306747437, + "learning_rate": 1.8825846112634715e-05, + "loss": 0.4851, + "step": 11434 + }, + { + "epoch": 0.31397583745194946, + "grad_norm": 0.35547468066215515, + "learning_rate": 1.8825643048164105e-05, + "loss": 0.6206, + "step": 11435 + }, + { + "epoch": 0.314003294892916, + "grad_norm": 0.3826424479484558, + "learning_rate": 1.882543996723085e-05, + "loss": 0.5012, + "step": 11436 + }, + { + "epoch": 0.3140307523338825, + "grad_norm": 0.4151184856891632, + "learning_rate": 1.8825236869835338e-05, + "loss": 0.5395, + "step": 11437 + }, + { + "epoch": 0.314058209774849, + "grad_norm": 0.3388717472553253, + "learning_rate": 1.8825033755977936e-05, + "loss": 0.4409, + "step": 11438 + }, + { + "epoch": 0.3140856672158155, + "grad_norm": 0.4017639756202698, + "learning_rate": 1.8824830625659035e-05, + "loss": 0.5724, + "step": 11439 + }, + { + "epoch": 0.314113124656782, + "grad_norm": 0.33554476499557495, + "learning_rate": 1.8824627478879008e-05, + "loss": 0.5083, + "step": 11440 + }, + { + "epoch": 0.3141405820977485, + "grad_norm": 0.35722264647483826, + "learning_rate": 1.8824424315638233e-05, + "loss": 0.5388, + "step": 11441 + }, + { + "epoch": 0.31416803953871497, + "grad_norm": 0.39653512835502625, + "learning_rate": 1.8824221135937088e-05, + "loss": 0.475, + "step": 11442 + }, + { + "epoch": 0.3141954969796815, + "grad_norm": 0.36152908205986023, + "learning_rate": 1.8824017939775955e-05, + "loss": 0.524, + "step": 11443 + }, + { + "epoch": 0.314222954420648, + "grad_norm": 0.3253719210624695, + "learning_rate": 1.8823814727155216e-05, + "loss": 0.492, + "step": 11444 + }, + { + "epoch": 0.3142504118616145, + "grad_norm": 0.39274415373802185, + "learning_rate": 1.882361149807524e-05, + "loss": 0.5564, + "step": 11445 + }, + { + "epoch": 0.314277869302581, + "grad_norm": 0.39100995659828186, + "learning_rate": 1.882340825253642e-05, + "loss": 0.5995, + "step": 11446 + }, + { + "epoch": 0.3143053267435475, + "grad_norm": 0.38169682025909424, + "learning_rate": 1.882320499053912e-05, + "loss": 0.5554, + "step": 11447 + }, + { + "epoch": 0.314332784184514, + "grad_norm": 0.5330796837806702, + "learning_rate": 1.882300171208373e-05, + "loss": 0.5365, + "step": 11448 + }, + { + "epoch": 0.3143602416254805, + "grad_norm": 0.34954431653022766, + "learning_rate": 1.8822798417170628e-05, + "loss": 0.4791, + "step": 11449 + }, + { + "epoch": 0.314387699066447, + "grad_norm": 0.34413471817970276, + "learning_rate": 1.882259510580019e-05, + "loss": 0.4972, + "step": 11450 + }, + { + "epoch": 0.31441515650741353, + "grad_norm": 0.3539583683013916, + "learning_rate": 1.8822391777972796e-05, + "loss": 0.5313, + "step": 11451 + }, + { + "epoch": 0.31444261394838, + "grad_norm": 0.42896172404289246, + "learning_rate": 1.8822188433688822e-05, + "loss": 0.5486, + "step": 11452 + }, + { + "epoch": 0.3144700713893465, + "grad_norm": 0.44661253690719604, + "learning_rate": 1.8821985072948655e-05, + "loss": 0.5733, + "step": 11453 + }, + { + "epoch": 0.314497528830313, + "grad_norm": 0.40140625834465027, + "learning_rate": 1.882178169575267e-05, + "loss": 0.5521, + "step": 11454 + }, + { + "epoch": 0.3145249862712795, + "grad_norm": 0.42406731843948364, + "learning_rate": 1.8821578302101246e-05, + "loss": 0.5596, + "step": 11455 + }, + { + "epoch": 0.314552443712246, + "grad_norm": 0.3538399338722229, + "learning_rate": 1.8821374891994762e-05, + "loss": 0.4983, + "step": 11456 + }, + { + "epoch": 0.3145799011532125, + "grad_norm": 0.37343060970306396, + "learning_rate": 1.88211714654336e-05, + "loss": 0.5813, + "step": 11457 + }, + { + "epoch": 0.31460735859417904, + "grad_norm": 0.314078688621521, + "learning_rate": 1.8820968022418134e-05, + "loss": 0.4336, + "step": 11458 + }, + { + "epoch": 0.31463481603514554, + "grad_norm": 0.3489234149456024, + "learning_rate": 1.8820764562948752e-05, + "loss": 0.4489, + "step": 11459 + }, + { + "epoch": 0.31466227347611203, + "grad_norm": 0.3500722050666809, + "learning_rate": 1.8820561087025826e-05, + "loss": 0.4884, + "step": 11460 + }, + { + "epoch": 0.3146897309170785, + "grad_norm": 0.39647892117500305, + "learning_rate": 1.882035759464974e-05, + "loss": 0.5098, + "step": 11461 + }, + { + "epoch": 0.314717188358045, + "grad_norm": 0.48070645332336426, + "learning_rate": 1.882015408582087e-05, + "loss": 0.53, + "step": 11462 + }, + { + "epoch": 0.3147446457990115, + "grad_norm": 0.3557566702365875, + "learning_rate": 1.8819950560539597e-05, + "loss": 0.5094, + "step": 11463 + }, + { + "epoch": 0.314772103239978, + "grad_norm": 0.40291810035705566, + "learning_rate": 1.8819747018806304e-05, + "loss": 0.5257, + "step": 11464 + }, + { + "epoch": 0.31479956068094456, + "grad_norm": 0.4701206386089325, + "learning_rate": 1.8819543460621368e-05, + "loss": 0.5041, + "step": 11465 + }, + { + "epoch": 0.31482701812191105, + "grad_norm": 0.4614526629447937, + "learning_rate": 1.8819339885985165e-05, + "loss": 0.5473, + "step": 11466 + }, + { + "epoch": 0.31485447556287754, + "grad_norm": 0.3847768008708954, + "learning_rate": 1.881913629489808e-05, + "loss": 0.4405, + "step": 11467 + }, + { + "epoch": 0.31488193300384404, + "grad_norm": 0.45060089230537415, + "learning_rate": 1.8818932687360492e-05, + "loss": 0.4809, + "step": 11468 + }, + { + "epoch": 0.31490939044481053, + "grad_norm": 0.3610547184944153, + "learning_rate": 1.8818729063372782e-05, + "loss": 0.5213, + "step": 11469 + }, + { + "epoch": 0.314936847885777, + "grad_norm": 0.36548951268196106, + "learning_rate": 1.8818525422935322e-05, + "loss": 0.4946, + "step": 11470 + }, + { + "epoch": 0.3149643053267435, + "grad_norm": 0.3619285225868225, + "learning_rate": 1.8818321766048503e-05, + "loss": 0.4987, + "step": 11471 + }, + { + "epoch": 0.31499176276771007, + "grad_norm": 0.3898758888244629, + "learning_rate": 1.8818118092712697e-05, + "loss": 0.4597, + "step": 11472 + }, + { + "epoch": 0.31501922020867656, + "grad_norm": 0.3421955406665802, + "learning_rate": 1.8817914402928287e-05, + "loss": 0.5643, + "step": 11473 + }, + { + "epoch": 0.31504667764964306, + "grad_norm": 0.3998892903327942, + "learning_rate": 1.881771069669565e-05, + "loss": 0.5054, + "step": 11474 + }, + { + "epoch": 0.31507413509060955, + "grad_norm": 0.348342627286911, + "learning_rate": 1.881750697401517e-05, + "loss": 0.4562, + "step": 11475 + }, + { + "epoch": 0.31510159253157605, + "grad_norm": 0.3780558705329895, + "learning_rate": 1.881730323488723e-05, + "loss": 0.4743, + "step": 11476 + }, + { + "epoch": 0.31512904997254254, + "grad_norm": 0.3481757640838623, + "learning_rate": 1.8817099479312198e-05, + "loss": 0.4952, + "step": 11477 + }, + { + "epoch": 0.31515650741350903, + "grad_norm": 0.36746710538864136, + "learning_rate": 1.8816895707290464e-05, + "loss": 0.5358, + "step": 11478 + }, + { + "epoch": 0.3151839648544756, + "grad_norm": 0.45495277643203735, + "learning_rate": 1.8816691918822406e-05, + "loss": 0.5057, + "step": 11479 + }, + { + "epoch": 0.3152114222954421, + "grad_norm": 0.3488655984401703, + "learning_rate": 1.8816488113908404e-05, + "loss": 0.5295, + "step": 11480 + }, + { + "epoch": 0.3152388797364086, + "grad_norm": 0.38944074511528015, + "learning_rate": 1.8816284292548833e-05, + "loss": 0.5073, + "step": 11481 + }, + { + "epoch": 0.31526633717737507, + "grad_norm": 0.3641659915447235, + "learning_rate": 1.8816080454744084e-05, + "loss": 0.545, + "step": 11482 + }, + { + "epoch": 0.31529379461834156, + "grad_norm": 0.3780931830406189, + "learning_rate": 1.8815876600494526e-05, + "loss": 0.4525, + "step": 11483 + }, + { + "epoch": 0.31532125205930805, + "grad_norm": 0.3898509740829468, + "learning_rate": 1.8815672729800552e-05, + "loss": 0.535, + "step": 11484 + }, + { + "epoch": 0.31534870950027455, + "grad_norm": 0.35204508900642395, + "learning_rate": 1.881546884266253e-05, + "loss": 0.5053, + "step": 11485 + }, + { + "epoch": 0.3153761669412411, + "grad_norm": 0.3814624547958374, + "learning_rate": 1.8815264939080845e-05, + "loss": 0.5473, + "step": 11486 + }, + { + "epoch": 0.3154036243822076, + "grad_norm": 0.3794768452644348, + "learning_rate": 1.8815061019055875e-05, + "loss": 0.5184, + "step": 11487 + }, + { + "epoch": 0.3154310818231741, + "grad_norm": 0.32853177189826965, + "learning_rate": 1.8814857082588007e-05, + "loss": 0.5757, + "step": 11488 + }, + { + "epoch": 0.3154585392641406, + "grad_norm": 0.35091933608055115, + "learning_rate": 1.8814653129677616e-05, + "loss": 0.5508, + "step": 11489 + }, + { + "epoch": 0.3154859967051071, + "grad_norm": 0.4936169385910034, + "learning_rate": 1.8814449160325083e-05, + "loss": 0.6039, + "step": 11490 + }, + { + "epoch": 0.31551345414607357, + "grad_norm": 0.34139588475227356, + "learning_rate": 1.8814245174530786e-05, + "loss": 0.5111, + "step": 11491 + }, + { + "epoch": 0.31554091158704006, + "grad_norm": 0.40885162353515625, + "learning_rate": 1.8814041172295116e-05, + "loss": 0.5957, + "step": 11492 + }, + { + "epoch": 0.3155683690280066, + "grad_norm": 0.42207106947898865, + "learning_rate": 1.881383715361844e-05, + "loss": 0.5928, + "step": 11493 + }, + { + "epoch": 0.3155958264689731, + "grad_norm": 0.36567479372024536, + "learning_rate": 1.8813633118501145e-05, + "loss": 0.5001, + "step": 11494 + }, + { + "epoch": 0.3156232839099396, + "grad_norm": 0.43504583835601807, + "learning_rate": 1.8813429066943617e-05, + "loss": 0.5177, + "step": 11495 + }, + { + "epoch": 0.3156507413509061, + "grad_norm": 0.3886643648147583, + "learning_rate": 1.8813224998946223e-05, + "loss": 0.5948, + "step": 11496 + }, + { + "epoch": 0.3156781987918726, + "grad_norm": 0.37390103936195374, + "learning_rate": 1.8813020914509356e-05, + "loss": 0.5057, + "step": 11497 + }, + { + "epoch": 0.3157056562328391, + "grad_norm": 0.370434433221817, + "learning_rate": 1.8812816813633392e-05, + "loss": 0.5095, + "step": 11498 + }, + { + "epoch": 0.3157331136738056, + "grad_norm": 0.3845430314540863, + "learning_rate": 1.881261269631871e-05, + "loss": 0.5049, + "step": 11499 + }, + { + "epoch": 0.3157605711147721, + "grad_norm": 0.4968569278717041, + "learning_rate": 1.8812408562565695e-05, + "loss": 0.5537, + "step": 11500 + }, + { + "epoch": 0.3157880285557386, + "grad_norm": 0.413463830947876, + "learning_rate": 1.8812204412374724e-05, + "loss": 0.4184, + "step": 11501 + }, + { + "epoch": 0.3158154859967051, + "grad_norm": 0.435461163520813, + "learning_rate": 1.881200024574618e-05, + "loss": 0.5037, + "step": 11502 + }, + { + "epoch": 0.3158429434376716, + "grad_norm": 0.35665401816368103, + "learning_rate": 1.8811796062680442e-05, + "loss": 0.4741, + "step": 11503 + }, + { + "epoch": 0.3158704008786381, + "grad_norm": 0.3172353208065033, + "learning_rate": 1.8811591863177893e-05, + "loss": 0.4597, + "step": 11504 + }, + { + "epoch": 0.3158978583196046, + "grad_norm": 0.34494349360466003, + "learning_rate": 1.8811387647238914e-05, + "loss": 0.4451, + "step": 11505 + }, + { + "epoch": 0.3159253157605711, + "grad_norm": 0.37418729066848755, + "learning_rate": 1.8811183414863884e-05, + "loss": 0.5216, + "step": 11506 + }, + { + "epoch": 0.31595277320153764, + "grad_norm": 0.43794745206832886, + "learning_rate": 1.8810979166053182e-05, + "loss": 0.5769, + "step": 11507 + }, + { + "epoch": 0.31598023064250413, + "grad_norm": 0.3706580102443695, + "learning_rate": 1.8810774900807195e-05, + "loss": 0.4966, + "step": 11508 + }, + { + "epoch": 0.31600768808347063, + "grad_norm": 0.4037052094936371, + "learning_rate": 1.8810570619126302e-05, + "loss": 0.4976, + "step": 11509 + }, + { + "epoch": 0.3160351455244371, + "grad_norm": 0.3261788487434387, + "learning_rate": 1.8810366321010876e-05, + "loss": 0.4989, + "step": 11510 + }, + { + "epoch": 0.3160626029654036, + "grad_norm": 0.4436168968677521, + "learning_rate": 1.881016200646131e-05, + "loss": 0.5856, + "step": 11511 + }, + { + "epoch": 0.3160900604063701, + "grad_norm": 0.35024094581604004, + "learning_rate": 1.8809957675477978e-05, + "loss": 0.4636, + "step": 11512 + }, + { + "epoch": 0.3161175178473366, + "grad_norm": 0.43110066652297974, + "learning_rate": 1.8809753328061267e-05, + "loss": 0.5881, + "step": 11513 + }, + { + "epoch": 0.31614497528830315, + "grad_norm": 0.37327733635902405, + "learning_rate": 1.880954896421155e-05, + "loss": 0.5206, + "step": 11514 + }, + { + "epoch": 0.31617243272926965, + "grad_norm": 0.3956168591976166, + "learning_rate": 1.8809344583929213e-05, + "loss": 0.4882, + "step": 11515 + }, + { + "epoch": 0.31619989017023614, + "grad_norm": 0.3827648460865021, + "learning_rate": 1.880914018721464e-05, + "loss": 0.5975, + "step": 11516 + }, + { + "epoch": 0.31622734761120264, + "grad_norm": 0.37007343769073486, + "learning_rate": 1.8808935774068206e-05, + "loss": 0.4352, + "step": 11517 + }, + { + "epoch": 0.31625480505216913, + "grad_norm": 0.360985666513443, + "learning_rate": 1.8808731344490295e-05, + "loss": 0.5879, + "step": 11518 + }, + { + "epoch": 0.3162822624931356, + "grad_norm": 0.3572729229927063, + "learning_rate": 1.880852689848129e-05, + "loss": 0.5203, + "step": 11519 + }, + { + "epoch": 0.3163097199341021, + "grad_norm": 0.43787163496017456, + "learning_rate": 1.880832243604157e-05, + "loss": 0.4907, + "step": 11520 + }, + { + "epoch": 0.31633717737506867, + "grad_norm": 0.3678734004497528, + "learning_rate": 1.880811795717152e-05, + "loss": 0.5082, + "step": 11521 + }, + { + "epoch": 0.31636463481603516, + "grad_norm": 0.3624853491783142, + "learning_rate": 1.8807913461871513e-05, + "loss": 0.5246, + "step": 11522 + }, + { + "epoch": 0.31639209225700166, + "grad_norm": 0.3597498834133148, + "learning_rate": 1.8807708950141944e-05, + "loss": 0.5574, + "step": 11523 + }, + { + "epoch": 0.31641954969796815, + "grad_norm": 0.351953387260437, + "learning_rate": 1.880750442198318e-05, + "loss": 0.4898, + "step": 11524 + }, + { + "epoch": 0.31644700713893464, + "grad_norm": 0.3388998508453369, + "learning_rate": 1.8807299877395615e-05, + "loss": 0.4327, + "step": 11525 + }, + { + "epoch": 0.31647446457990114, + "grad_norm": 0.3764038383960724, + "learning_rate": 1.880709531637962e-05, + "loss": 0.5096, + "step": 11526 + }, + { + "epoch": 0.31650192202086763, + "grad_norm": 0.34394365549087524, + "learning_rate": 1.8806890738935582e-05, + "loss": 0.6114, + "step": 11527 + }, + { + "epoch": 0.3165293794618342, + "grad_norm": 0.3825882375240326, + "learning_rate": 1.8806686145063885e-05, + "loss": 0.5053, + "step": 11528 + }, + { + "epoch": 0.3165568369028007, + "grad_norm": 0.4009203016757965, + "learning_rate": 1.8806481534764905e-05, + "loss": 0.5629, + "step": 11529 + }, + { + "epoch": 0.31658429434376717, + "grad_norm": 0.416076123714447, + "learning_rate": 1.8806276908039026e-05, + "loss": 0.545, + "step": 11530 + }, + { + "epoch": 0.31661175178473366, + "grad_norm": 0.3119119703769684, + "learning_rate": 1.880607226488663e-05, + "loss": 0.3959, + "step": 11531 + }, + { + "epoch": 0.31663920922570016, + "grad_norm": 0.5025073885917664, + "learning_rate": 1.88058676053081e-05, + "loss": 0.4718, + "step": 11532 + }, + { + "epoch": 0.31666666666666665, + "grad_norm": 0.3827705383300781, + "learning_rate": 1.8805662929303815e-05, + "loss": 0.5281, + "step": 11533 + }, + { + "epoch": 0.31669412410763315, + "grad_norm": 0.3446609377861023, + "learning_rate": 1.880545823687416e-05, + "loss": 0.5309, + "step": 11534 + }, + { + "epoch": 0.3167215815485997, + "grad_norm": 0.3336891233921051, + "learning_rate": 1.8805253528019515e-05, + "loss": 0.5179, + "step": 11535 + }, + { + "epoch": 0.3167490389895662, + "grad_norm": 0.5082288980484009, + "learning_rate": 1.880504880274026e-05, + "loss": 0.5328, + "step": 11536 + }, + { + "epoch": 0.3167764964305327, + "grad_norm": 0.3335157036781311, + "learning_rate": 1.880484406103678e-05, + "loss": 0.4538, + "step": 11537 + }, + { + "epoch": 0.3168039538714992, + "grad_norm": 0.38271403312683105, + "learning_rate": 1.8804639302909454e-05, + "loss": 0.541, + "step": 11538 + }, + { + "epoch": 0.31683141131246567, + "grad_norm": 0.36313244700431824, + "learning_rate": 1.8804434528358667e-05, + "loss": 0.5155, + "step": 11539 + }, + { + "epoch": 0.31685886875343217, + "grad_norm": 0.3532748520374298, + "learning_rate": 1.88042297373848e-05, + "loss": 0.5145, + "step": 11540 + }, + { + "epoch": 0.31688632619439866, + "grad_norm": 0.35819903016090393, + "learning_rate": 1.8804024929988234e-05, + "loss": 0.4696, + "step": 11541 + }, + { + "epoch": 0.3169137836353652, + "grad_norm": 0.35440605878829956, + "learning_rate": 1.880382010616935e-05, + "loss": 0.5826, + "step": 11542 + }, + { + "epoch": 0.3169412410763317, + "grad_norm": 0.35829195380210876, + "learning_rate": 1.8803615265928537e-05, + "loss": 0.5371, + "step": 11543 + }, + { + "epoch": 0.3169686985172982, + "grad_norm": 0.3524274528026581, + "learning_rate": 1.8803410409266165e-05, + "loss": 0.5204, + "step": 11544 + }, + { + "epoch": 0.3169961559582647, + "grad_norm": 0.3680247366428375, + "learning_rate": 1.8803205536182627e-05, + "loss": 0.4507, + "step": 11545 + }, + { + "epoch": 0.3170236133992312, + "grad_norm": 0.3740106225013733, + "learning_rate": 1.88030006466783e-05, + "loss": 0.5717, + "step": 11546 + }, + { + "epoch": 0.3170510708401977, + "grad_norm": 0.3944126069545746, + "learning_rate": 1.8802795740753567e-05, + "loss": 0.5568, + "step": 11547 + }, + { + "epoch": 0.3170785282811642, + "grad_norm": 0.49929237365722656, + "learning_rate": 1.880259081840881e-05, + "loss": 0.5953, + "step": 11548 + }, + { + "epoch": 0.3171059857221307, + "grad_norm": 0.3294670879840851, + "learning_rate": 1.880238587964441e-05, + "loss": 0.4465, + "step": 11549 + }, + { + "epoch": 0.3171334431630972, + "grad_norm": 0.40135473012924194, + "learning_rate": 1.8802180924460757e-05, + "loss": 0.6064, + "step": 11550 + }, + { + "epoch": 0.3171609006040637, + "grad_norm": 0.37009915709495544, + "learning_rate": 1.880197595285822e-05, + "loss": 0.5726, + "step": 11551 + }, + { + "epoch": 0.3171883580450302, + "grad_norm": 0.33187779784202576, + "learning_rate": 1.8801770964837193e-05, + "loss": 0.4795, + "step": 11552 + }, + { + "epoch": 0.3172158154859967, + "grad_norm": 0.44523945450782776, + "learning_rate": 1.8801565960398055e-05, + "loss": 0.4923, + "step": 11553 + }, + { + "epoch": 0.3172432729269632, + "grad_norm": 0.446218341588974, + "learning_rate": 1.8801360939541183e-05, + "loss": 0.5082, + "step": 11554 + }, + { + "epoch": 0.3172707303679297, + "grad_norm": 0.34654587507247925, + "learning_rate": 1.8801155902266965e-05, + "loss": 0.5511, + "step": 11555 + }, + { + "epoch": 0.31729818780889624, + "grad_norm": 0.39677271246910095, + "learning_rate": 1.8800950848575786e-05, + "loss": 0.5524, + "step": 11556 + }, + { + "epoch": 0.31732564524986273, + "grad_norm": 0.4771231710910797, + "learning_rate": 1.8800745778468023e-05, + "loss": 0.5297, + "step": 11557 + }, + { + "epoch": 0.3173531026908292, + "grad_norm": 0.3777872622013092, + "learning_rate": 1.8800540691944057e-05, + "loss": 0.561, + "step": 11558 + }, + { + "epoch": 0.3173805601317957, + "grad_norm": 0.37661466002464294, + "learning_rate": 1.8800335589004275e-05, + "loss": 0.5606, + "step": 11559 + }, + { + "epoch": 0.3174080175727622, + "grad_norm": 0.34776484966278076, + "learning_rate": 1.880013046964906e-05, + "loss": 0.5157, + "step": 11560 + }, + { + "epoch": 0.3174354750137287, + "grad_norm": 0.35297444462776184, + "learning_rate": 1.8799925333878793e-05, + "loss": 0.5539, + "step": 11561 + }, + { + "epoch": 0.3174629324546952, + "grad_norm": 0.384945273399353, + "learning_rate": 1.8799720181693856e-05, + "loss": 0.5465, + "step": 11562 + }, + { + "epoch": 0.31749038989566175, + "grad_norm": 0.38219723105430603, + "learning_rate": 1.879951501309463e-05, + "loss": 0.604, + "step": 11563 + }, + { + "epoch": 0.31751784733662825, + "grad_norm": 0.35860779881477356, + "learning_rate": 1.8799309828081506e-05, + "loss": 0.511, + "step": 11564 + }, + { + "epoch": 0.31754530477759474, + "grad_norm": 0.39727258682250977, + "learning_rate": 1.8799104626654854e-05, + "loss": 0.5395, + "step": 11565 + }, + { + "epoch": 0.31757276221856123, + "grad_norm": 0.40390148758888245, + "learning_rate": 1.879889940881507e-05, + "loss": 0.524, + "step": 11566 + }, + { + "epoch": 0.3176002196595277, + "grad_norm": 0.648410439491272, + "learning_rate": 1.8798694174562527e-05, + "loss": 0.5025, + "step": 11567 + }, + { + "epoch": 0.3176276771004942, + "grad_norm": 0.3706580698490143, + "learning_rate": 1.879848892389761e-05, + "loss": 0.5135, + "step": 11568 + }, + { + "epoch": 0.3176551345414607, + "grad_norm": 0.3756009340286255, + "learning_rate": 1.8798283656820704e-05, + "loss": 0.5526, + "step": 11569 + }, + { + "epoch": 0.31768259198242726, + "grad_norm": 0.4069195091724396, + "learning_rate": 1.879807837333219e-05, + "loss": 0.5022, + "step": 11570 + }, + { + "epoch": 0.31771004942339376, + "grad_norm": 0.4003545641899109, + "learning_rate": 1.8797873073432452e-05, + "loss": 0.5489, + "step": 11571 + }, + { + "epoch": 0.31773750686436025, + "grad_norm": 0.34933412075042725, + "learning_rate": 1.8797667757121875e-05, + "loss": 0.5158, + "step": 11572 + }, + { + "epoch": 0.31776496430532675, + "grad_norm": 0.38408082723617554, + "learning_rate": 1.8797462424400837e-05, + "loss": 0.5253, + "step": 11573 + }, + { + "epoch": 0.31779242174629324, + "grad_norm": 0.3721802234649658, + "learning_rate": 1.8797257075269726e-05, + "loss": 0.5437, + "step": 11574 + }, + { + "epoch": 0.31781987918725974, + "grad_norm": 0.32017526030540466, + "learning_rate": 1.8797051709728923e-05, + "loss": 0.4925, + "step": 11575 + }, + { + "epoch": 0.31784733662822623, + "grad_norm": 0.41804009675979614, + "learning_rate": 1.8796846327778808e-05, + "loss": 0.5814, + "step": 11576 + }, + { + "epoch": 0.3178747940691928, + "grad_norm": 0.4707590937614441, + "learning_rate": 1.8796640929419768e-05, + "loss": 0.5919, + "step": 11577 + }, + { + "epoch": 0.3179022515101593, + "grad_norm": 0.4131036698818207, + "learning_rate": 1.8796435514652188e-05, + "loss": 0.5845, + "step": 11578 + }, + { + "epoch": 0.31792970895112577, + "grad_norm": 0.3870420455932617, + "learning_rate": 1.8796230083476446e-05, + "loss": 0.5995, + "step": 11579 + }, + { + "epoch": 0.31795716639209226, + "grad_norm": 0.4472878873348236, + "learning_rate": 1.8796024635892926e-05, + "loss": 0.5573, + "step": 11580 + }, + { + "epoch": 0.31798462383305875, + "grad_norm": 0.35519537329673767, + "learning_rate": 1.8795819171902015e-05, + "loss": 0.5528, + "step": 11581 + }, + { + "epoch": 0.31801208127402525, + "grad_norm": 0.3316037952899933, + "learning_rate": 1.8795613691504094e-05, + "loss": 0.5657, + "step": 11582 + }, + { + "epoch": 0.31803953871499174, + "grad_norm": 0.4174230396747589, + "learning_rate": 1.8795408194699544e-05, + "loss": 0.5228, + "step": 11583 + }, + { + "epoch": 0.31806699615595824, + "grad_norm": 0.38040396571159363, + "learning_rate": 1.879520268148875e-05, + "loss": 0.4614, + "step": 11584 + }, + { + "epoch": 0.3180944535969248, + "grad_norm": 0.3828701972961426, + "learning_rate": 1.87949971518721e-05, + "loss": 0.5074, + "step": 11585 + }, + { + "epoch": 0.3181219110378913, + "grad_norm": 0.3946191370487213, + "learning_rate": 1.8794791605849972e-05, + "loss": 0.4755, + "step": 11586 + }, + { + "epoch": 0.3181493684788578, + "grad_norm": 0.38268402218818665, + "learning_rate": 1.8794586043422747e-05, + "loss": 0.6457, + "step": 11587 + }, + { + "epoch": 0.31817682591982427, + "grad_norm": 0.46169114112854004, + "learning_rate": 1.8794380464590815e-05, + "loss": 0.6266, + "step": 11588 + }, + { + "epoch": 0.31820428336079076, + "grad_norm": 0.45835524797439575, + "learning_rate": 1.879417486935456e-05, + "loss": 0.5278, + "step": 11589 + }, + { + "epoch": 0.31823174080175726, + "grad_norm": 0.3857206404209137, + "learning_rate": 1.8793969257714357e-05, + "loss": 0.5272, + "step": 11590 + }, + { + "epoch": 0.31825919824272375, + "grad_norm": 0.3728189766407013, + "learning_rate": 1.8793763629670596e-05, + "loss": 0.5061, + "step": 11591 + }, + { + "epoch": 0.3182866556836903, + "grad_norm": 0.4018442630767822, + "learning_rate": 1.879355798522366e-05, + "loss": 0.5898, + "step": 11592 + }, + { + "epoch": 0.3183141131246568, + "grad_norm": 0.37019404768943787, + "learning_rate": 1.879335232437393e-05, + "loss": 0.5345, + "step": 11593 + }, + { + "epoch": 0.3183415705656233, + "grad_norm": 0.3224369287490845, + "learning_rate": 1.8793146647121795e-05, + "loss": 0.476, + "step": 11594 + }, + { + "epoch": 0.3183690280065898, + "grad_norm": 0.3348217010498047, + "learning_rate": 1.879294095346763e-05, + "loss": 0.4793, + "step": 11595 + }, + { + "epoch": 0.3183964854475563, + "grad_norm": 0.3426550626754761, + "learning_rate": 1.8792735243411827e-05, + "loss": 0.5172, + "step": 11596 + }, + { + "epoch": 0.31842394288852277, + "grad_norm": 0.4189211130142212, + "learning_rate": 1.8792529516954768e-05, + "loss": 0.5364, + "step": 11597 + }, + { + "epoch": 0.31845140032948926, + "grad_norm": 0.36945801973342896, + "learning_rate": 1.8792323774096833e-05, + "loss": 0.4743, + "step": 11598 + }, + { + "epoch": 0.3184788577704558, + "grad_norm": 0.3782860040664673, + "learning_rate": 1.8792118014838406e-05, + "loss": 0.5186, + "step": 11599 + }, + { + "epoch": 0.3185063152114223, + "grad_norm": 0.4049023985862732, + "learning_rate": 1.8791912239179876e-05, + "loss": 0.5905, + "step": 11600 + }, + { + "epoch": 0.3185337726523888, + "grad_norm": 0.3841196894645691, + "learning_rate": 1.8791706447121623e-05, + "loss": 0.4422, + "step": 11601 + }, + { + "epoch": 0.3185612300933553, + "grad_norm": 0.38670873641967773, + "learning_rate": 1.879150063866403e-05, + "loss": 0.5418, + "step": 11602 + }, + { + "epoch": 0.3185886875343218, + "grad_norm": 0.33993688225746155, + "learning_rate": 1.8791294813807485e-05, + "loss": 0.5401, + "step": 11603 + }, + { + "epoch": 0.3186161449752883, + "grad_norm": 0.35934340953826904, + "learning_rate": 1.8791088972552365e-05, + "loss": 0.5623, + "step": 11604 + }, + { + "epoch": 0.3186436024162548, + "grad_norm": 0.3227153420448303, + "learning_rate": 1.8790883114899064e-05, + "loss": 0.5313, + "step": 11605 + }, + { + "epoch": 0.31867105985722133, + "grad_norm": 0.3852930963039398, + "learning_rate": 1.8790677240847954e-05, + "loss": 0.4862, + "step": 11606 + }, + { + "epoch": 0.3186985172981878, + "grad_norm": 0.35360977053642273, + "learning_rate": 1.8790471350399427e-05, + "loss": 0.5084, + "step": 11607 + }, + { + "epoch": 0.3187259747391543, + "grad_norm": 0.40991950035095215, + "learning_rate": 1.8790265443553868e-05, + "loss": 0.5857, + "step": 11608 + }, + { + "epoch": 0.3187534321801208, + "grad_norm": 0.3776117265224457, + "learning_rate": 1.8790059520311658e-05, + "loss": 0.4601, + "step": 11609 + }, + { + "epoch": 0.3187808896210873, + "grad_norm": 0.34307917952537537, + "learning_rate": 1.878985358067318e-05, + "loss": 0.472, + "step": 11610 + }, + { + "epoch": 0.3188083470620538, + "grad_norm": 0.3723316788673401, + "learning_rate": 1.878964762463882e-05, + "loss": 0.5197, + "step": 11611 + }, + { + "epoch": 0.3188358045030203, + "grad_norm": 0.41890260577201843, + "learning_rate": 1.8789441652208965e-05, + "loss": 0.5419, + "step": 11612 + }, + { + "epoch": 0.31886326194398684, + "grad_norm": 0.3821306824684143, + "learning_rate": 1.8789235663383992e-05, + "loss": 0.5199, + "step": 11613 + }, + { + "epoch": 0.31889071938495334, + "grad_norm": 0.3857809007167816, + "learning_rate": 1.878902965816429e-05, + "loss": 0.5224, + "step": 11614 + }, + { + "epoch": 0.31891817682591983, + "grad_norm": 0.33963897824287415, + "learning_rate": 1.8788823636550245e-05, + "loss": 0.5248, + "step": 11615 + }, + { + "epoch": 0.3189456342668863, + "grad_norm": 0.36402612924575806, + "learning_rate": 1.8788617598542237e-05, + "loss": 0.5388, + "step": 11616 + }, + { + "epoch": 0.3189730917078528, + "grad_norm": 0.36881953477859497, + "learning_rate": 1.878841154414065e-05, + "loss": 0.6057, + "step": 11617 + }, + { + "epoch": 0.3190005491488193, + "grad_norm": 0.38719192147254944, + "learning_rate": 1.878820547334587e-05, + "loss": 0.6447, + "step": 11618 + }, + { + "epoch": 0.3190280065897858, + "grad_norm": 0.3778699040412903, + "learning_rate": 1.878799938615829e-05, + "loss": 0.5515, + "step": 11619 + }, + { + "epoch": 0.31905546403075236, + "grad_norm": 0.3776033818721771, + "learning_rate": 1.878779328257828e-05, + "loss": 0.5269, + "step": 11620 + }, + { + "epoch": 0.31908292147171885, + "grad_norm": 0.47917038202285767, + "learning_rate": 1.878758716260623e-05, + "loss": 0.548, + "step": 11621 + }, + { + "epoch": 0.31911037891268534, + "grad_norm": 0.45578888058662415, + "learning_rate": 1.8787381026242528e-05, + "loss": 0.4945, + "step": 11622 + }, + { + "epoch": 0.31913783635365184, + "grad_norm": 0.339183509349823, + "learning_rate": 1.8787174873487556e-05, + "loss": 0.4622, + "step": 11623 + }, + { + "epoch": 0.31916529379461833, + "grad_norm": 0.3797185719013214, + "learning_rate": 1.8786968704341696e-05, + "loss": 0.5711, + "step": 11624 + }, + { + "epoch": 0.3191927512355848, + "grad_norm": 0.3996993899345398, + "learning_rate": 1.8786762518805336e-05, + "loss": 0.618, + "step": 11625 + }, + { + "epoch": 0.3192202086765513, + "grad_norm": 0.3783722519874573, + "learning_rate": 1.8786556316878858e-05, + "loss": 0.5544, + "step": 11626 + }, + { + "epoch": 0.31924766611751787, + "grad_norm": 0.45522722601890564, + "learning_rate": 1.8786350098562655e-05, + "loss": 0.5249, + "step": 11627 + }, + { + "epoch": 0.31927512355848436, + "grad_norm": 0.31489941477775574, + "learning_rate": 1.8786143863857096e-05, + "loss": 0.5011, + "step": 11628 + }, + { + "epoch": 0.31930258099945086, + "grad_norm": 0.3268744945526123, + "learning_rate": 1.8785937612762577e-05, + "loss": 0.527, + "step": 11629 + }, + { + "epoch": 0.31933003844041735, + "grad_norm": 0.4782208502292633, + "learning_rate": 1.8785731345279483e-05, + "loss": 0.5288, + "step": 11630 + }, + { + "epoch": 0.31935749588138385, + "grad_norm": 0.39080944657325745, + "learning_rate": 1.878552506140819e-05, + "loss": 0.5086, + "step": 11631 + }, + { + "epoch": 0.31938495332235034, + "grad_norm": 0.4034901261329651, + "learning_rate": 1.8785318761149096e-05, + "loss": 0.5457, + "step": 11632 + }, + { + "epoch": 0.31941241076331683, + "grad_norm": 0.36330708861351013, + "learning_rate": 1.8785112444502576e-05, + "loss": 0.5211, + "step": 11633 + }, + { + "epoch": 0.3194398682042834, + "grad_norm": 0.3466344177722931, + "learning_rate": 1.8784906111469018e-05, + "loss": 0.5494, + "step": 11634 + }, + { + "epoch": 0.3194673256452499, + "grad_norm": 0.41117408871650696, + "learning_rate": 1.8784699762048804e-05, + "loss": 0.5572, + "step": 11635 + }, + { + "epoch": 0.31949478308621637, + "grad_norm": 0.39266934990882874, + "learning_rate": 1.878449339624232e-05, + "loss": 0.4686, + "step": 11636 + }, + { + "epoch": 0.31952224052718287, + "grad_norm": 0.3957371711730957, + "learning_rate": 1.8784287014049956e-05, + "loss": 0.4832, + "step": 11637 + }, + { + "epoch": 0.31954969796814936, + "grad_norm": 0.37069690227508545, + "learning_rate": 1.878408061547209e-05, + "loss": 0.6524, + "step": 11638 + }, + { + "epoch": 0.31957715540911585, + "grad_norm": 0.4338432550430298, + "learning_rate": 1.8783874200509115e-05, + "loss": 0.5688, + "step": 11639 + }, + { + "epoch": 0.31960461285008235, + "grad_norm": 0.3654215633869171, + "learning_rate": 1.8783667769161408e-05, + "loss": 0.6487, + "step": 11640 + }, + { + "epoch": 0.3196320702910489, + "grad_norm": 0.3625013530254364, + "learning_rate": 1.8783461321429356e-05, + "loss": 0.5534, + "step": 11641 + }, + { + "epoch": 0.3196595277320154, + "grad_norm": 0.3790997564792633, + "learning_rate": 1.8783254857313347e-05, + "loss": 0.5592, + "step": 11642 + }, + { + "epoch": 0.3196869851729819, + "grad_norm": 0.37769949436187744, + "learning_rate": 1.878304837681376e-05, + "loss": 0.5399, + "step": 11643 + }, + { + "epoch": 0.3197144426139484, + "grad_norm": 0.3315916955471039, + "learning_rate": 1.878284187993099e-05, + "loss": 0.5816, + "step": 11644 + }, + { + "epoch": 0.3197419000549149, + "grad_norm": 0.39127054810523987, + "learning_rate": 1.8782635366665416e-05, + "loss": 0.4574, + "step": 11645 + }, + { + "epoch": 0.31976935749588137, + "grad_norm": 0.32830289006233215, + "learning_rate": 1.8782428837017425e-05, + "loss": 0.4049, + "step": 11646 + }, + { + "epoch": 0.31979681493684786, + "grad_norm": 0.5111897587776184, + "learning_rate": 1.8782222290987396e-05, + "loss": 0.5081, + "step": 11647 + }, + { + "epoch": 0.3198242723778144, + "grad_norm": 0.4389162063598633, + "learning_rate": 1.8782015728575723e-05, + "loss": 0.5853, + "step": 11648 + }, + { + "epoch": 0.3198517298187809, + "grad_norm": 0.3880854547023773, + "learning_rate": 1.8781809149782786e-05, + "loss": 0.6067, + "step": 11649 + }, + { + "epoch": 0.3198791872597474, + "grad_norm": 0.33045369386672974, + "learning_rate": 1.8781602554608973e-05, + "loss": 0.4819, + "step": 11650 + }, + { + "epoch": 0.3199066447007139, + "grad_norm": 0.4233371317386627, + "learning_rate": 1.878139594305467e-05, + "loss": 0.4685, + "step": 11651 + }, + { + "epoch": 0.3199341021416804, + "grad_norm": 0.4157913625240326, + "learning_rate": 1.878118931512026e-05, + "loss": 0.5381, + "step": 11652 + }, + { + "epoch": 0.3199615595826469, + "grad_norm": 0.40798288583755493, + "learning_rate": 1.878098267080613e-05, + "loss": 0.5424, + "step": 11653 + }, + { + "epoch": 0.3199890170236134, + "grad_norm": 0.44678348302841187, + "learning_rate": 1.8780776010112665e-05, + "loss": 0.6024, + "step": 11654 + }, + { + "epoch": 0.3200164744645799, + "grad_norm": 0.526862382888794, + "learning_rate": 1.878056933304025e-05, + "loss": 0.4688, + "step": 11655 + }, + { + "epoch": 0.3200439319055464, + "grad_norm": 0.33278653025627136, + "learning_rate": 1.8780362639589266e-05, + "loss": 0.4627, + "step": 11656 + }, + { + "epoch": 0.3200713893465129, + "grad_norm": 0.36074602603912354, + "learning_rate": 1.8780155929760108e-05, + "loss": 0.4496, + "step": 11657 + }, + { + "epoch": 0.3200988467874794, + "grad_norm": 0.3786594271659851, + "learning_rate": 1.8779949203553158e-05, + "loss": 0.5388, + "step": 11658 + }, + { + "epoch": 0.3201263042284459, + "grad_norm": 0.5121511220932007, + "learning_rate": 1.8779742460968795e-05, + "loss": 0.4966, + "step": 11659 + }, + { + "epoch": 0.3201537616694124, + "grad_norm": 0.38612762093544006, + "learning_rate": 1.8779535702007416e-05, + "loss": 0.5444, + "step": 11660 + }, + { + "epoch": 0.3201812191103789, + "grad_norm": 0.39932361245155334, + "learning_rate": 1.8779328926669397e-05, + "loss": 0.4704, + "step": 11661 + }, + { + "epoch": 0.32020867655134544, + "grad_norm": 0.3625341057777405, + "learning_rate": 1.8779122134955128e-05, + "loss": 0.4861, + "step": 11662 + }, + { + "epoch": 0.32023613399231193, + "grad_norm": 0.35291585326194763, + "learning_rate": 1.8778915326865e-05, + "loss": 0.5571, + "step": 11663 + }, + { + "epoch": 0.3202635914332784, + "grad_norm": 0.5690022706985474, + "learning_rate": 1.8778708502399384e-05, + "loss": 0.5642, + "step": 11664 + }, + { + "epoch": 0.3202910488742449, + "grad_norm": 0.5463053584098816, + "learning_rate": 1.8778501661558677e-05, + "loss": 0.5693, + "step": 11665 + }, + { + "epoch": 0.3203185063152114, + "grad_norm": 0.3813205063343048, + "learning_rate": 1.877829480434327e-05, + "loss": 0.6088, + "step": 11666 + }, + { + "epoch": 0.3203459637561779, + "grad_norm": 0.5110883712768555, + "learning_rate": 1.8778087930753535e-05, + "loss": 0.5149, + "step": 11667 + }, + { + "epoch": 0.3203734211971444, + "grad_norm": 0.4248523414134979, + "learning_rate": 1.8777881040789864e-05, + "loss": 0.5626, + "step": 11668 + }, + { + "epoch": 0.32040087863811095, + "grad_norm": 0.34755319356918335, + "learning_rate": 1.8777674134452647e-05, + "loss": 0.479, + "step": 11669 + }, + { + "epoch": 0.32042833607907745, + "grad_norm": 0.41231346130371094, + "learning_rate": 1.8777467211742263e-05, + "loss": 0.5798, + "step": 11670 + }, + { + "epoch": 0.32045579352004394, + "grad_norm": 0.4012342393398285, + "learning_rate": 1.87772602726591e-05, + "loss": 0.5155, + "step": 11671 + }, + { + "epoch": 0.32048325096101044, + "grad_norm": 0.3623591959476471, + "learning_rate": 1.877705331720355e-05, + "loss": 0.5237, + "step": 11672 + }, + { + "epoch": 0.32051070840197693, + "grad_norm": 0.3641941249370575, + "learning_rate": 1.877684634537599e-05, + "loss": 0.5729, + "step": 11673 + }, + { + "epoch": 0.3205381658429434, + "grad_norm": 0.36328041553497314, + "learning_rate": 1.8776639357176815e-05, + "loss": 0.4996, + "step": 11674 + }, + { + "epoch": 0.3205656232839099, + "grad_norm": 0.4021020233631134, + "learning_rate": 1.87764323526064e-05, + "loss": 0.5388, + "step": 11675 + }, + { + "epoch": 0.32059308072487647, + "grad_norm": 0.4047730565071106, + "learning_rate": 1.8776225331665142e-05, + "loss": 0.5238, + "step": 11676 + }, + { + "epoch": 0.32062053816584296, + "grad_norm": 0.3573513925075531, + "learning_rate": 1.8776018294353423e-05, + "loss": 0.4819, + "step": 11677 + }, + { + "epoch": 0.32064799560680946, + "grad_norm": 0.3823285400867462, + "learning_rate": 1.8775811240671626e-05, + "loss": 0.5398, + "step": 11678 + }, + { + "epoch": 0.32067545304777595, + "grad_norm": 0.328141450881958, + "learning_rate": 1.877560417062014e-05, + "loss": 0.5145, + "step": 11679 + }, + { + "epoch": 0.32070291048874244, + "grad_norm": 0.38665664196014404, + "learning_rate": 1.8775397084199356e-05, + "loss": 0.5575, + "step": 11680 + }, + { + "epoch": 0.32073036792970894, + "grad_norm": 0.344605028629303, + "learning_rate": 1.8775189981409655e-05, + "loss": 0.4673, + "step": 11681 + }, + { + "epoch": 0.32075782537067543, + "grad_norm": 0.4010832905769348, + "learning_rate": 1.8774982862251422e-05, + "loss": 0.5113, + "step": 11682 + }, + { + "epoch": 0.320785282811642, + "grad_norm": 0.4564690887928009, + "learning_rate": 1.877477572672504e-05, + "loss": 0.6079, + "step": 11683 + }, + { + "epoch": 0.3208127402526085, + "grad_norm": 0.33443665504455566, + "learning_rate": 1.877456857483091e-05, + "loss": 0.5245, + "step": 11684 + }, + { + "epoch": 0.32084019769357497, + "grad_norm": 0.3897082507610321, + "learning_rate": 1.8774361406569402e-05, + "loss": 0.4983, + "step": 11685 + }, + { + "epoch": 0.32086765513454146, + "grad_norm": 0.3381296694278717, + "learning_rate": 1.8774154221940916e-05, + "loss": 0.5212, + "step": 11686 + }, + { + "epoch": 0.32089511257550796, + "grad_norm": 0.3756665885448456, + "learning_rate": 1.8773947020945826e-05, + "loss": 0.4724, + "step": 11687 + }, + { + "epoch": 0.32092257001647445, + "grad_norm": 0.367234468460083, + "learning_rate": 1.8773739803584527e-05, + "loss": 0.4755, + "step": 11688 + }, + { + "epoch": 0.32095002745744095, + "grad_norm": 0.3668709993362427, + "learning_rate": 1.8773532569857404e-05, + "loss": 0.5903, + "step": 11689 + }, + { + "epoch": 0.3209774848984075, + "grad_norm": 0.3850155472755432, + "learning_rate": 1.8773325319764838e-05, + "loss": 0.5262, + "step": 11690 + }, + { + "epoch": 0.321004942339374, + "grad_norm": 0.34990671277046204, + "learning_rate": 1.8773118053307223e-05, + "loss": 0.4575, + "step": 11691 + }, + { + "epoch": 0.3210323997803405, + "grad_norm": 0.3775002062320709, + "learning_rate": 1.8772910770484945e-05, + "loss": 0.6011, + "step": 11692 + }, + { + "epoch": 0.321059857221307, + "grad_norm": 0.39319416880607605, + "learning_rate": 1.8772703471298387e-05, + "loss": 0.5164, + "step": 11693 + }, + { + "epoch": 0.32108731466227347, + "grad_norm": 0.729278564453125, + "learning_rate": 1.877249615574794e-05, + "loss": 0.4989, + "step": 11694 + }, + { + "epoch": 0.32111477210323996, + "grad_norm": 0.37515923380851746, + "learning_rate": 1.8772288823833982e-05, + "loss": 0.4788, + "step": 11695 + }, + { + "epoch": 0.32114222954420646, + "grad_norm": 0.36077016592025757, + "learning_rate": 1.8772081475556908e-05, + "loss": 0.4234, + "step": 11696 + }, + { + "epoch": 0.321169686985173, + "grad_norm": 0.4090329110622406, + "learning_rate": 1.8771874110917102e-05, + "loss": 0.5138, + "step": 11697 + }, + { + "epoch": 0.3211971444261395, + "grad_norm": 0.3452886939048767, + "learning_rate": 1.8771666729914954e-05, + "loss": 0.5458, + "step": 11698 + }, + { + "epoch": 0.321224601867106, + "grad_norm": 0.36332178115844727, + "learning_rate": 1.8771459332550847e-05, + "loss": 0.5442, + "step": 11699 + }, + { + "epoch": 0.3212520593080725, + "grad_norm": 0.40509194135665894, + "learning_rate": 1.8771251918825166e-05, + "loss": 0.474, + "step": 11700 + }, + { + "epoch": 0.321279516749039, + "grad_norm": 0.39882466197013855, + "learning_rate": 1.87710444887383e-05, + "loss": 0.5298, + "step": 11701 + }, + { + "epoch": 0.3213069741900055, + "grad_norm": 0.37578001618385315, + "learning_rate": 1.877083704229064e-05, + "loss": 0.5583, + "step": 11702 + }, + { + "epoch": 0.321334431630972, + "grad_norm": 0.37791958451271057, + "learning_rate": 1.877062957948257e-05, + "loss": 0.5222, + "step": 11703 + }, + { + "epoch": 0.3213618890719385, + "grad_norm": 0.4049600064754486, + "learning_rate": 1.8770422100314474e-05, + "loss": 0.426, + "step": 11704 + }, + { + "epoch": 0.321389346512905, + "grad_norm": 0.35453349351882935, + "learning_rate": 1.877021460478674e-05, + "loss": 0.5294, + "step": 11705 + }, + { + "epoch": 0.3214168039538715, + "grad_norm": 0.34919509291648865, + "learning_rate": 1.877000709289976e-05, + "loss": 0.472, + "step": 11706 + }, + { + "epoch": 0.321444261394838, + "grad_norm": 0.3662647604942322, + "learning_rate": 1.876979956465392e-05, + "loss": 0.5639, + "step": 11707 + }, + { + "epoch": 0.3214717188358045, + "grad_norm": 0.3434661328792572, + "learning_rate": 1.8769592020049597e-05, + "loss": 0.4983, + "step": 11708 + }, + { + "epoch": 0.321499176276771, + "grad_norm": 0.33679434657096863, + "learning_rate": 1.8769384459087195e-05, + "loss": 0.4651, + "step": 11709 + }, + { + "epoch": 0.3215266337177375, + "grad_norm": 0.3306517004966736, + "learning_rate": 1.8769176881767086e-05, + "loss": 0.4706, + "step": 11710 + }, + { + "epoch": 0.32155409115870404, + "grad_norm": 0.3731515407562256, + "learning_rate": 1.8768969288089666e-05, + "loss": 0.5993, + "step": 11711 + }, + { + "epoch": 0.32158154859967053, + "grad_norm": 0.3641342520713806, + "learning_rate": 1.8768761678055318e-05, + "loss": 0.5438, + "step": 11712 + }, + { + "epoch": 0.321609006040637, + "grad_norm": 0.3975144326686859, + "learning_rate": 1.8768554051664432e-05, + "loss": 0.541, + "step": 11713 + }, + { + "epoch": 0.3216364634816035, + "grad_norm": 0.35624611377716064, + "learning_rate": 1.8768346408917394e-05, + "loss": 0.5627, + "step": 11714 + }, + { + "epoch": 0.32166392092257, + "grad_norm": 0.3859911262989044, + "learning_rate": 1.876813874981459e-05, + "loss": 0.5357, + "step": 11715 + }, + { + "epoch": 0.3216913783635365, + "grad_norm": 0.5796484351158142, + "learning_rate": 1.8767931074356413e-05, + "loss": 0.4921, + "step": 11716 + }, + { + "epoch": 0.321718835804503, + "grad_norm": 0.3390295207500458, + "learning_rate": 1.876772338254324e-05, + "loss": 0.447, + "step": 11717 + }, + { + "epoch": 0.3217462932454695, + "grad_norm": 0.35643672943115234, + "learning_rate": 1.8767515674375466e-05, + "loss": 0.4376, + "step": 11718 + }, + { + "epoch": 0.32177375068643604, + "grad_norm": 0.3699345886707306, + "learning_rate": 1.876730794985348e-05, + "loss": 0.4272, + "step": 11719 + }, + { + "epoch": 0.32180120812740254, + "grad_norm": 0.36987611651420593, + "learning_rate": 1.8767100208977663e-05, + "loss": 0.5553, + "step": 11720 + }, + { + "epoch": 0.32182866556836903, + "grad_norm": 0.3499537706375122, + "learning_rate": 1.876689245174841e-05, + "loss": 0.5239, + "step": 11721 + }, + { + "epoch": 0.3218561230093355, + "grad_norm": 0.37930670380592346, + "learning_rate": 1.87666846781661e-05, + "loss": 0.5478, + "step": 11722 + }, + { + "epoch": 0.321883580450302, + "grad_norm": 0.32309725880622864, + "learning_rate": 1.876647688823113e-05, + "loss": 0.4614, + "step": 11723 + }, + { + "epoch": 0.3219110378912685, + "grad_norm": 0.3830820918083191, + "learning_rate": 1.876626908194388e-05, + "loss": 0.5165, + "step": 11724 + }, + { + "epoch": 0.321938495332235, + "grad_norm": 0.4565393030643463, + "learning_rate": 1.876606125930474e-05, + "loss": 0.5548, + "step": 11725 + }, + { + "epoch": 0.32196595277320156, + "grad_norm": 4.418843746185303, + "learning_rate": 1.87658534203141e-05, + "loss": 0.5428, + "step": 11726 + }, + { + "epoch": 0.32199341021416805, + "grad_norm": 0.3529362976551056, + "learning_rate": 1.8765645564972343e-05, + "loss": 0.4695, + "step": 11727 + }, + { + "epoch": 0.32202086765513455, + "grad_norm": 0.3233625888824463, + "learning_rate": 1.8765437693279858e-05, + "loss": 0.5133, + "step": 11728 + }, + { + "epoch": 0.32204832509610104, + "grad_norm": 0.35758453607559204, + "learning_rate": 1.876522980523704e-05, + "loss": 0.4867, + "step": 11729 + }, + { + "epoch": 0.32207578253706753, + "grad_norm": 0.47603800892829895, + "learning_rate": 1.8765021900844264e-05, + "loss": 0.4853, + "step": 11730 + }, + { + "epoch": 0.32210323997803403, + "grad_norm": 0.41322100162506104, + "learning_rate": 1.876481398010193e-05, + "loss": 0.4746, + "step": 11731 + }, + { + "epoch": 0.3221306974190005, + "grad_norm": 0.36421751976013184, + "learning_rate": 1.876460604301042e-05, + "loss": 0.5048, + "step": 11732 + }, + { + "epoch": 0.32215815485996707, + "grad_norm": 0.35288381576538086, + "learning_rate": 1.876439808957012e-05, + "loss": 0.5416, + "step": 11733 + }, + { + "epoch": 0.32218561230093357, + "grad_norm": 0.38899943232536316, + "learning_rate": 1.876419011978142e-05, + "loss": 0.5479, + "step": 11734 + }, + { + "epoch": 0.32221306974190006, + "grad_norm": 0.4016796946525574, + "learning_rate": 1.8763982133644712e-05, + "loss": 0.5336, + "step": 11735 + }, + { + "epoch": 0.32224052718286655, + "grad_norm": 0.35294830799102783, + "learning_rate": 1.8763774131160376e-05, + "loss": 0.5248, + "step": 11736 + }, + { + "epoch": 0.32226798462383305, + "grad_norm": 0.3345772325992584, + "learning_rate": 1.8763566112328805e-05, + "loss": 0.5239, + "step": 11737 + }, + { + "epoch": 0.32229544206479954, + "grad_norm": 0.3485015630722046, + "learning_rate": 1.8763358077150386e-05, + "loss": 0.4791, + "step": 11738 + }, + { + "epoch": 0.32232289950576604, + "grad_norm": 0.3693860173225403, + "learning_rate": 1.876315002562551e-05, + "loss": 0.5407, + "step": 11739 + }, + { + "epoch": 0.3223503569467326, + "grad_norm": 0.4738292396068573, + "learning_rate": 1.876294195775456e-05, + "loss": 0.5216, + "step": 11740 + }, + { + "epoch": 0.3223778143876991, + "grad_norm": 0.37546899914741516, + "learning_rate": 1.876273387353793e-05, + "loss": 0.5201, + "step": 11741 + }, + { + "epoch": 0.3224052718286656, + "grad_norm": 0.3609587252140045, + "learning_rate": 1.8762525772976e-05, + "loss": 0.5452, + "step": 11742 + }, + { + "epoch": 0.32243272926963207, + "grad_norm": 0.4050866365432739, + "learning_rate": 1.8762317656069164e-05, + "loss": 0.5468, + "step": 11743 + }, + { + "epoch": 0.32246018671059856, + "grad_norm": 0.34959083795547485, + "learning_rate": 1.876210952281781e-05, + "loss": 0.619, + "step": 11744 + }, + { + "epoch": 0.32248764415156506, + "grad_norm": 0.35286134481430054, + "learning_rate": 1.8761901373222324e-05, + "loss": 0.4111, + "step": 11745 + }, + { + "epoch": 0.32251510159253155, + "grad_norm": 0.535861611366272, + "learning_rate": 1.8761693207283095e-05, + "loss": 0.5336, + "step": 11746 + }, + { + "epoch": 0.3225425590334981, + "grad_norm": 0.3382166028022766, + "learning_rate": 1.8761485025000515e-05, + "loss": 0.4143, + "step": 11747 + }, + { + "epoch": 0.3225700164744646, + "grad_norm": 0.35565534234046936, + "learning_rate": 1.8761276826374966e-05, + "loss": 0.4747, + "step": 11748 + }, + { + "epoch": 0.3225974739154311, + "grad_norm": 0.36466243863105774, + "learning_rate": 1.8761068611406838e-05, + "loss": 0.51, + "step": 11749 + }, + { + "epoch": 0.3226249313563976, + "grad_norm": 0.38052818179130554, + "learning_rate": 1.8760860380096524e-05, + "loss": 0.4849, + "step": 11750 + }, + { + "epoch": 0.3226523887973641, + "grad_norm": 0.39832523465156555, + "learning_rate": 1.876065213244441e-05, + "loss": 0.5659, + "step": 11751 + }, + { + "epoch": 0.32267984623833057, + "grad_norm": 0.34363284707069397, + "learning_rate": 1.8760443868450882e-05, + "loss": 0.5698, + "step": 11752 + }, + { + "epoch": 0.32270730367929706, + "grad_norm": 0.37747445702552795, + "learning_rate": 1.876023558811633e-05, + "loss": 0.4953, + "step": 11753 + }, + { + "epoch": 0.3227347611202636, + "grad_norm": 0.376833438873291, + "learning_rate": 1.8760027291441144e-05, + "loss": 0.5668, + "step": 11754 + }, + { + "epoch": 0.3227622185612301, + "grad_norm": 0.34841740131378174, + "learning_rate": 1.875981897842571e-05, + "loss": 0.4654, + "step": 11755 + }, + { + "epoch": 0.3227896760021966, + "grad_norm": 0.3922888934612274, + "learning_rate": 1.875961064907042e-05, + "loss": 0.5388, + "step": 11756 + }, + { + "epoch": 0.3228171334431631, + "grad_norm": 0.3895992040634155, + "learning_rate": 1.8759402303375656e-05, + "loss": 0.5774, + "step": 11757 + }, + { + "epoch": 0.3228445908841296, + "grad_norm": 0.3481987416744232, + "learning_rate": 1.8759193941341815e-05, + "loss": 0.4363, + "step": 11758 + }, + { + "epoch": 0.3228720483250961, + "grad_norm": 0.3576910197734833, + "learning_rate": 1.8758985562969278e-05, + "loss": 0.5135, + "step": 11759 + }, + { + "epoch": 0.3228995057660626, + "grad_norm": 0.36013075709342957, + "learning_rate": 1.875877716825844e-05, + "loss": 0.4949, + "step": 11760 + }, + { + "epoch": 0.3229269632070291, + "grad_norm": 0.35083386301994324, + "learning_rate": 1.8758568757209687e-05, + "loss": 0.5537, + "step": 11761 + }, + { + "epoch": 0.3229544206479956, + "grad_norm": 0.386028915643692, + "learning_rate": 1.8758360329823405e-05, + "loss": 0.5753, + "step": 11762 + }, + { + "epoch": 0.3229818780889621, + "grad_norm": 0.3761747479438782, + "learning_rate": 1.8758151886099993e-05, + "loss": 0.5196, + "step": 11763 + }, + { + "epoch": 0.3230093355299286, + "grad_norm": 0.40966475009918213, + "learning_rate": 1.8757943426039825e-05, + "loss": 0.4777, + "step": 11764 + }, + { + "epoch": 0.3230367929708951, + "grad_norm": 0.3564780354499817, + "learning_rate": 1.87577349496433e-05, + "loss": 0.5493, + "step": 11765 + }, + { + "epoch": 0.3230642504118616, + "grad_norm": 0.35177674889564514, + "learning_rate": 1.8757526456910804e-05, + "loss": 0.5447, + "step": 11766 + }, + { + "epoch": 0.3230917078528281, + "grad_norm": 0.41459929943084717, + "learning_rate": 1.8757317947842727e-05, + "loss": 0.5504, + "step": 11767 + }, + { + "epoch": 0.32311916529379464, + "grad_norm": 0.33840495347976685, + "learning_rate": 1.8757109422439456e-05, + "loss": 0.5479, + "step": 11768 + }, + { + "epoch": 0.32314662273476114, + "grad_norm": 0.3994670808315277, + "learning_rate": 1.8756900880701382e-05, + "loss": 0.5682, + "step": 11769 + }, + { + "epoch": 0.32317408017572763, + "grad_norm": 0.387246310710907, + "learning_rate": 1.8756692322628887e-05, + "loss": 0.4904, + "step": 11770 + }, + { + "epoch": 0.3232015376166941, + "grad_norm": 0.37535202503204346, + "learning_rate": 1.8756483748222372e-05, + "loss": 0.5555, + "step": 11771 + }, + { + "epoch": 0.3232289950576606, + "grad_norm": 0.4275708496570587, + "learning_rate": 1.8756275157482223e-05, + "loss": 0.5589, + "step": 11772 + }, + { + "epoch": 0.3232564524986271, + "grad_norm": 0.3694626986980438, + "learning_rate": 1.875606655040882e-05, + "loss": 0.627, + "step": 11773 + }, + { + "epoch": 0.3232839099395936, + "grad_norm": 0.36067405343055725, + "learning_rate": 1.875585792700256e-05, + "loss": 0.5178, + "step": 11774 + }, + { + "epoch": 0.32331136738056016, + "grad_norm": 0.4087122976779938, + "learning_rate": 1.8755649287263832e-05, + "loss": 0.5461, + "step": 11775 + }, + { + "epoch": 0.32333882482152665, + "grad_norm": 0.3742195963859558, + "learning_rate": 1.8755440631193024e-05, + "loss": 0.4426, + "step": 11776 + }, + { + "epoch": 0.32336628226249314, + "grad_norm": 0.3824004530906677, + "learning_rate": 1.875523195879052e-05, + "loss": 0.5832, + "step": 11777 + }, + { + "epoch": 0.32339373970345964, + "grad_norm": 0.41236451268196106, + "learning_rate": 1.875502327005672e-05, + "loss": 0.5411, + "step": 11778 + }, + { + "epoch": 0.32342119714442613, + "grad_norm": 0.3324199616909027, + "learning_rate": 1.8754814564992006e-05, + "loss": 0.4746, + "step": 11779 + }, + { + "epoch": 0.3234486545853926, + "grad_norm": 0.36813125014305115, + "learning_rate": 1.8754605843596767e-05, + "loss": 0.4913, + "step": 11780 + }, + { + "epoch": 0.3234761120263591, + "grad_norm": 0.4062739908695221, + "learning_rate": 1.875439710587139e-05, + "loss": 0.5379, + "step": 11781 + }, + { + "epoch": 0.32350356946732567, + "grad_norm": 0.35170477628707886, + "learning_rate": 1.8754188351816274e-05, + "loss": 0.5287, + "step": 11782 + }, + { + "epoch": 0.32353102690829216, + "grad_norm": 0.3509424030780792, + "learning_rate": 1.8753979581431804e-05, + "loss": 0.4769, + "step": 11783 + }, + { + "epoch": 0.32355848434925866, + "grad_norm": 0.44032424688339233, + "learning_rate": 1.8753770794718364e-05, + "loss": 0.563, + "step": 11784 + }, + { + "epoch": 0.32358594179022515, + "grad_norm": 0.8945039510726929, + "learning_rate": 1.875356199167635e-05, + "loss": 0.5847, + "step": 11785 + }, + { + "epoch": 0.32361339923119165, + "grad_norm": 0.35138463973999023, + "learning_rate": 1.8753353172306146e-05, + "loss": 0.4875, + "step": 11786 + }, + { + "epoch": 0.32364085667215814, + "grad_norm": 0.4001033306121826, + "learning_rate": 1.8753144336608148e-05, + "loss": 0.5208, + "step": 11787 + }, + { + "epoch": 0.32366831411312463, + "grad_norm": 0.4111853241920471, + "learning_rate": 1.8752935484582742e-05, + "loss": 0.5946, + "step": 11788 + }, + { + "epoch": 0.3236957715540912, + "grad_norm": 0.3526361584663391, + "learning_rate": 1.8752726616230315e-05, + "loss": 0.5281, + "step": 11789 + }, + { + "epoch": 0.3237232289950577, + "grad_norm": 0.35789182782173157, + "learning_rate": 1.8752517731551264e-05, + "loss": 0.5391, + "step": 11790 + }, + { + "epoch": 0.32375068643602417, + "grad_norm": 0.3532872796058655, + "learning_rate": 1.8752308830545968e-05, + "loss": 0.4878, + "step": 11791 + }, + { + "epoch": 0.32377814387699067, + "grad_norm": 0.36151787638664246, + "learning_rate": 1.8752099913214827e-05, + "loss": 0.4764, + "step": 11792 + }, + { + "epoch": 0.32380560131795716, + "grad_norm": 0.38197311758995056, + "learning_rate": 1.8751890979558226e-05, + "loss": 0.5121, + "step": 11793 + }, + { + "epoch": 0.32383305875892365, + "grad_norm": 0.38917815685272217, + "learning_rate": 1.8751682029576554e-05, + "loss": 0.5811, + "step": 11794 + }, + { + "epoch": 0.32386051619989015, + "grad_norm": 0.41152122616767883, + "learning_rate": 1.87514730632702e-05, + "loss": 0.5, + "step": 11795 + }, + { + "epoch": 0.3238879736408567, + "grad_norm": 0.3710826337337494, + "learning_rate": 1.875126408063956e-05, + "loss": 0.5365, + "step": 11796 + }, + { + "epoch": 0.3239154310818232, + "grad_norm": 0.4517432451248169, + "learning_rate": 1.8751055081685012e-05, + "loss": 0.4836, + "step": 11797 + }, + { + "epoch": 0.3239428885227897, + "grad_norm": 0.43861591815948486, + "learning_rate": 1.875084606640696e-05, + "loss": 0.5146, + "step": 11798 + }, + { + "epoch": 0.3239703459637562, + "grad_norm": 0.3344217836856842, + "learning_rate": 1.8750637034805784e-05, + "loss": 0.4734, + "step": 11799 + }, + { + "epoch": 0.3239978034047227, + "grad_norm": 0.35802844166755676, + "learning_rate": 1.8750427986881878e-05, + "loss": 0.501, + "step": 11800 + }, + { + "epoch": 0.32402526084568917, + "grad_norm": 0.39263108372688293, + "learning_rate": 1.8750218922635633e-05, + "loss": 0.6072, + "step": 11801 + }, + { + "epoch": 0.32405271828665566, + "grad_norm": 0.3624120056629181, + "learning_rate": 1.8750009842067433e-05, + "loss": 0.4195, + "step": 11802 + }, + { + "epoch": 0.3240801757276222, + "grad_norm": 0.3647159934043884, + "learning_rate": 1.8749800745177674e-05, + "loss": 0.5131, + "step": 11803 + }, + { + "epoch": 0.3241076331685887, + "grad_norm": 0.4189095199108124, + "learning_rate": 1.8749591631966745e-05, + "loss": 0.5016, + "step": 11804 + }, + { + "epoch": 0.3241350906095552, + "grad_norm": 0.354525089263916, + "learning_rate": 1.8749382502435033e-05, + "loss": 0.4989, + "step": 11805 + }, + { + "epoch": 0.3241625480505217, + "grad_norm": 0.3763374984264374, + "learning_rate": 1.8749173356582933e-05, + "loss": 0.5879, + "step": 11806 + }, + { + "epoch": 0.3241900054914882, + "grad_norm": 0.37494030594825745, + "learning_rate": 1.874896419441083e-05, + "loss": 0.4589, + "step": 11807 + }, + { + "epoch": 0.3242174629324547, + "grad_norm": 0.3371298015117645, + "learning_rate": 1.8748755015919116e-05, + "loss": 0.5154, + "step": 11808 + }, + { + "epoch": 0.3242449203734212, + "grad_norm": 0.36820024251937866, + "learning_rate": 1.8748545821108184e-05, + "loss": 0.4428, + "step": 11809 + }, + { + "epoch": 0.3242723778143877, + "grad_norm": 0.3441532254219055, + "learning_rate": 1.874833660997842e-05, + "loss": 0.5158, + "step": 11810 + }, + { + "epoch": 0.3242998352553542, + "grad_norm": 0.3371216952800751, + "learning_rate": 1.8748127382530218e-05, + "loss": 0.4932, + "step": 11811 + }, + { + "epoch": 0.3243272926963207, + "grad_norm": 0.3727260231971741, + "learning_rate": 1.8747918138763963e-05, + "loss": 0.5257, + "step": 11812 + }, + { + "epoch": 0.3243547501372872, + "grad_norm": 0.3732303977012634, + "learning_rate": 1.8747708878680052e-05, + "loss": 0.5162, + "step": 11813 + }, + { + "epoch": 0.3243822075782537, + "grad_norm": 0.35399365425109863, + "learning_rate": 1.8747499602278868e-05, + "loss": 0.5895, + "step": 11814 + }, + { + "epoch": 0.3244096650192202, + "grad_norm": 0.3818722367286682, + "learning_rate": 1.874729030956081e-05, + "loss": 0.5476, + "step": 11815 + }, + { + "epoch": 0.3244371224601867, + "grad_norm": 0.32948315143585205, + "learning_rate": 1.874708100052626e-05, + "loss": 0.4715, + "step": 11816 + }, + { + "epoch": 0.32446457990115324, + "grad_norm": 0.350591242313385, + "learning_rate": 1.8746871675175616e-05, + "loss": 0.5568, + "step": 11817 + }, + { + "epoch": 0.32449203734211973, + "grad_norm": 0.36386141180992126, + "learning_rate": 1.8746662333509263e-05, + "loss": 0.4461, + "step": 11818 + }, + { + "epoch": 0.3245194947830862, + "grad_norm": 0.3573114275932312, + "learning_rate": 1.874645297552759e-05, + "loss": 0.5324, + "step": 11819 + }, + { + "epoch": 0.3245469522240527, + "grad_norm": 0.37623730301856995, + "learning_rate": 1.8746243601230994e-05, + "loss": 0.4968, + "step": 11820 + }, + { + "epoch": 0.3245744096650192, + "grad_norm": 0.3793499171733856, + "learning_rate": 1.874603421061986e-05, + "loss": 0.5165, + "step": 11821 + }, + { + "epoch": 0.3246018671059857, + "grad_norm": 0.3535768985748291, + "learning_rate": 1.8745824803694583e-05, + "loss": 0.5636, + "step": 11822 + }, + { + "epoch": 0.3246293245469522, + "grad_norm": 0.4491533935070038, + "learning_rate": 1.874561538045555e-05, + "loss": 0.4895, + "step": 11823 + }, + { + "epoch": 0.32465678198791875, + "grad_norm": 0.3881840705871582, + "learning_rate": 1.8745405940903153e-05, + "loss": 0.5451, + "step": 11824 + }, + { + "epoch": 0.32468423942888525, + "grad_norm": 0.39851683378219604, + "learning_rate": 1.8745196485037785e-05, + "loss": 0.5568, + "step": 11825 + }, + { + "epoch": 0.32471169686985174, + "grad_norm": 0.3721083998680115, + "learning_rate": 1.8744987012859832e-05, + "loss": 0.5492, + "step": 11826 + }, + { + "epoch": 0.32473915431081823, + "grad_norm": 0.3399198055267334, + "learning_rate": 1.874477752436969e-05, + "loss": 0.5165, + "step": 11827 + }, + { + "epoch": 0.32476661175178473, + "grad_norm": 0.32275038957595825, + "learning_rate": 1.8744568019567744e-05, + "loss": 0.5055, + "step": 11828 + }, + { + "epoch": 0.3247940691927512, + "grad_norm": 0.387845903635025, + "learning_rate": 1.8744358498454388e-05, + "loss": 0.4623, + "step": 11829 + }, + { + "epoch": 0.3248215266337177, + "grad_norm": 0.35860076546669006, + "learning_rate": 1.8744148961030013e-05, + "loss": 0.5092, + "step": 11830 + }, + { + "epoch": 0.32484898407468427, + "grad_norm": 0.3902624249458313, + "learning_rate": 1.8743939407295012e-05, + "loss": 0.5582, + "step": 11831 + }, + { + "epoch": 0.32487644151565076, + "grad_norm": 0.46027377247810364, + "learning_rate": 1.874372983724977e-05, + "loss": 0.5612, + "step": 11832 + }, + { + "epoch": 0.32490389895661725, + "grad_norm": 0.38625526428222656, + "learning_rate": 1.874352025089468e-05, + "loss": 0.4977, + "step": 11833 + }, + { + "epoch": 0.32493135639758375, + "grad_norm": 0.36495640873908997, + "learning_rate": 1.8743310648230135e-05, + "loss": 0.4983, + "step": 11834 + }, + { + "epoch": 0.32495881383855024, + "grad_norm": 0.3611765503883362, + "learning_rate": 1.8743101029256528e-05, + "loss": 0.5061, + "step": 11835 + }, + { + "epoch": 0.32498627127951674, + "grad_norm": 0.36867478489875793, + "learning_rate": 1.8742891393974246e-05, + "loss": 0.5109, + "step": 11836 + }, + { + "epoch": 0.32501372872048323, + "grad_norm": 0.3954559862613678, + "learning_rate": 1.874268174238368e-05, + "loss": 0.555, + "step": 11837 + }, + { + "epoch": 0.3250411861614498, + "grad_norm": 0.3238357603549957, + "learning_rate": 1.874247207448522e-05, + "loss": 0.4128, + "step": 11838 + }, + { + "epoch": 0.3250686436024163, + "grad_norm": 0.4470655024051666, + "learning_rate": 1.874226239027926e-05, + "loss": 0.5528, + "step": 11839 + }, + { + "epoch": 0.32509610104338277, + "grad_norm": 0.41073381900787354, + "learning_rate": 1.8742052689766197e-05, + "loss": 0.5409, + "step": 11840 + }, + { + "epoch": 0.32512355848434926, + "grad_norm": 0.35816171765327454, + "learning_rate": 1.874184297294641e-05, + "loss": 0.575, + "step": 11841 + }, + { + "epoch": 0.32515101592531576, + "grad_norm": 0.34618017077445984, + "learning_rate": 1.8741633239820297e-05, + "loss": 0.5031, + "step": 11842 + }, + { + "epoch": 0.32517847336628225, + "grad_norm": 0.34049302339553833, + "learning_rate": 1.8741423490388247e-05, + "loss": 0.4425, + "step": 11843 + }, + { + "epoch": 0.32520593080724874, + "grad_norm": 0.731560468673706, + "learning_rate": 1.8741213724650654e-05, + "loss": 0.4997, + "step": 11844 + }, + { + "epoch": 0.3252333882482153, + "grad_norm": 0.3937719464302063, + "learning_rate": 1.8741003942607907e-05, + "loss": 0.5367, + "step": 11845 + }, + { + "epoch": 0.3252608456891818, + "grad_norm": 0.43563374876976013, + "learning_rate": 1.8740794144260396e-05, + "loss": 0.4869, + "step": 11846 + }, + { + "epoch": 0.3252883031301483, + "grad_norm": 0.3960205018520355, + "learning_rate": 1.8740584329608517e-05, + "loss": 0.6345, + "step": 11847 + }, + { + "epoch": 0.3253157605711148, + "grad_norm": 0.4458274841308594, + "learning_rate": 1.8740374498652656e-05, + "loss": 0.4927, + "step": 11848 + }, + { + "epoch": 0.32534321801208127, + "grad_norm": 0.3444967269897461, + "learning_rate": 1.8740164651393207e-05, + "loss": 0.5716, + "step": 11849 + }, + { + "epoch": 0.32537067545304776, + "grad_norm": 0.4570190906524658, + "learning_rate": 1.8739954787830563e-05, + "loss": 0.5786, + "step": 11850 + }, + { + "epoch": 0.32539813289401426, + "grad_norm": 0.41920405626296997, + "learning_rate": 1.8739744907965113e-05, + "loss": 0.5582, + "step": 11851 + }, + { + "epoch": 0.32542559033498075, + "grad_norm": 0.4723065197467804, + "learning_rate": 1.873953501179725e-05, + "loss": 0.5044, + "step": 11852 + }, + { + "epoch": 0.3254530477759473, + "grad_norm": 0.3658095598220825, + "learning_rate": 1.8739325099327366e-05, + "loss": 0.5598, + "step": 11853 + }, + { + "epoch": 0.3254805052169138, + "grad_norm": 0.47806867957115173, + "learning_rate": 1.873911517055585e-05, + "loss": 0.5429, + "step": 11854 + }, + { + "epoch": 0.3255079626578803, + "grad_norm": 0.3586364984512329, + "learning_rate": 1.8738905225483093e-05, + "loss": 0.5933, + "step": 11855 + }, + { + "epoch": 0.3255354200988468, + "grad_norm": 0.3627058267593384, + "learning_rate": 1.873869526410949e-05, + "loss": 0.606, + "step": 11856 + }, + { + "epoch": 0.3255628775398133, + "grad_norm": 0.45916688442230225, + "learning_rate": 1.8738485286435432e-05, + "loss": 0.5252, + "step": 11857 + }, + { + "epoch": 0.32559033498077977, + "grad_norm": 0.49303504824638367, + "learning_rate": 1.873827529246131e-05, + "loss": 0.552, + "step": 11858 + }, + { + "epoch": 0.32561779242174627, + "grad_norm": 0.5051195621490479, + "learning_rate": 1.8738065282187516e-05, + "loss": 0.6283, + "step": 11859 + }, + { + "epoch": 0.3256452498627128, + "grad_norm": 0.4401436150074005, + "learning_rate": 1.873785525561444e-05, + "loss": 0.5461, + "step": 11860 + }, + { + "epoch": 0.3256727073036793, + "grad_norm": 0.35721680521965027, + "learning_rate": 1.8737645212742474e-05, + "loss": 0.4383, + "step": 11861 + }, + { + "epoch": 0.3257001647446458, + "grad_norm": 1.262752890586853, + "learning_rate": 1.8737435153572017e-05, + "loss": 0.3915, + "step": 11862 + }, + { + "epoch": 0.3257276221856123, + "grad_norm": 0.3267965316772461, + "learning_rate": 1.873722507810345e-05, + "loss": 0.4543, + "step": 11863 + }, + { + "epoch": 0.3257550796265788, + "grad_norm": 0.35942745208740234, + "learning_rate": 1.8737014986337167e-05, + "loss": 0.5768, + "step": 11864 + }, + { + "epoch": 0.3257825370675453, + "grad_norm": 0.32210710644721985, + "learning_rate": 1.8736804878273566e-05, + "loss": 0.5179, + "step": 11865 + }, + { + "epoch": 0.3258099945085118, + "grad_norm": 0.3873448669910431, + "learning_rate": 1.8736594753913038e-05, + "loss": 0.5931, + "step": 11866 + }, + { + "epoch": 0.32583745194947833, + "grad_norm": 0.3410918712615967, + "learning_rate": 1.873638461325597e-05, + "loss": 0.5366, + "step": 11867 + }, + { + "epoch": 0.3258649093904448, + "grad_norm": 0.3944343328475952, + "learning_rate": 1.8736174456302755e-05, + "loss": 0.4895, + "step": 11868 + }, + { + "epoch": 0.3258923668314113, + "grad_norm": 0.3842236399650574, + "learning_rate": 1.8735964283053786e-05, + "loss": 0.5166, + "step": 11869 + }, + { + "epoch": 0.3259198242723778, + "grad_norm": 0.4130324721336365, + "learning_rate": 1.8735754093509458e-05, + "loss": 0.5286, + "step": 11870 + }, + { + "epoch": 0.3259472817133443, + "grad_norm": 0.562262237071991, + "learning_rate": 1.873554388767016e-05, + "loss": 0.6011, + "step": 11871 + }, + { + "epoch": 0.3259747391543108, + "grad_norm": 0.3272240459918976, + "learning_rate": 1.873533366553628e-05, + "loss": 0.5056, + "step": 11872 + }, + { + "epoch": 0.3260021965952773, + "grad_norm": 0.3584408760070801, + "learning_rate": 1.873512342710822e-05, + "loss": 0.5003, + "step": 11873 + }, + { + "epoch": 0.32602965403624384, + "grad_norm": 0.355514794588089, + "learning_rate": 1.8734913172386368e-05, + "loss": 0.4823, + "step": 11874 + }, + { + "epoch": 0.32605711147721034, + "grad_norm": 0.3622104823589325, + "learning_rate": 1.873470290137111e-05, + "loss": 0.5491, + "step": 11875 + }, + { + "epoch": 0.32608456891817683, + "grad_norm": 0.45517709851264954, + "learning_rate": 1.8734492614062847e-05, + "loss": 0.6096, + "step": 11876 + }, + { + "epoch": 0.3261120263591433, + "grad_norm": 0.40321797132492065, + "learning_rate": 1.8734282310461965e-05, + "loss": 0.6014, + "step": 11877 + }, + { + "epoch": 0.3261394838001098, + "grad_norm": 0.35896438360214233, + "learning_rate": 1.873407199056886e-05, + "loss": 0.5423, + "step": 11878 + }, + { + "epoch": 0.3261669412410763, + "grad_norm": 0.34002968668937683, + "learning_rate": 1.8733861654383923e-05, + "loss": 0.511, + "step": 11879 + }, + { + "epoch": 0.3261943986820428, + "grad_norm": 0.4070585370063782, + "learning_rate": 1.873365130190755e-05, + "loss": 0.5394, + "step": 11880 + }, + { + "epoch": 0.32622185612300936, + "grad_norm": 0.3630584180355072, + "learning_rate": 1.8733440933140127e-05, + "loss": 0.4681, + "step": 11881 + }, + { + "epoch": 0.32624931356397585, + "grad_norm": 0.38701823353767395, + "learning_rate": 1.8733230548082048e-05, + "loss": 0.4497, + "step": 11882 + }, + { + "epoch": 0.32627677100494235, + "grad_norm": 0.3601362407207489, + "learning_rate": 1.8733020146733706e-05, + "loss": 0.4424, + "step": 11883 + }, + { + "epoch": 0.32630422844590884, + "grad_norm": 0.3628718852996826, + "learning_rate": 1.8732809729095498e-05, + "loss": 0.5318, + "step": 11884 + }, + { + "epoch": 0.32633168588687533, + "grad_norm": 0.40647241473197937, + "learning_rate": 1.873259929516781e-05, + "loss": 0.5951, + "step": 11885 + }, + { + "epoch": 0.3263591433278418, + "grad_norm": 0.3834950923919678, + "learning_rate": 1.8732388844951036e-05, + "loss": 0.5224, + "step": 11886 + }, + { + "epoch": 0.3263866007688083, + "grad_norm": 0.35455188155174255, + "learning_rate": 1.8732178378445572e-05, + "loss": 0.5697, + "step": 11887 + }, + { + "epoch": 0.32641405820977487, + "grad_norm": 0.36037057638168335, + "learning_rate": 1.8731967895651808e-05, + "loss": 0.5764, + "step": 11888 + }, + { + "epoch": 0.32644151565074137, + "grad_norm": 0.4315629303455353, + "learning_rate": 1.8731757396570138e-05, + "loss": 0.5663, + "step": 11889 + }, + { + "epoch": 0.32646897309170786, + "grad_norm": 0.3512505292892456, + "learning_rate": 1.8731546881200953e-05, + "loss": 0.4588, + "step": 11890 + }, + { + "epoch": 0.32649643053267435, + "grad_norm": 0.3286362290382385, + "learning_rate": 1.8731336349544646e-05, + "loss": 0.5014, + "step": 11891 + }, + { + "epoch": 0.32652388797364085, + "grad_norm": 0.3424447178840637, + "learning_rate": 1.873112580160161e-05, + "loss": 0.4551, + "step": 11892 + }, + { + "epoch": 0.32655134541460734, + "grad_norm": 0.3483246862888336, + "learning_rate": 1.8730915237372237e-05, + "loss": 0.4615, + "step": 11893 + }, + { + "epoch": 0.32657880285557384, + "grad_norm": 0.380545437335968, + "learning_rate": 1.8730704656856918e-05, + "loss": 0.5952, + "step": 11894 + }, + { + "epoch": 0.3266062602965404, + "grad_norm": 0.3417699933052063, + "learning_rate": 1.8730494060056052e-05, + "loss": 0.491, + "step": 11895 + }, + { + "epoch": 0.3266337177375069, + "grad_norm": 0.3761958181858063, + "learning_rate": 1.873028344697003e-05, + "loss": 0.5725, + "step": 11896 + }, + { + "epoch": 0.3266611751784734, + "grad_norm": 0.3180446922779083, + "learning_rate": 1.873007281759924e-05, + "loss": 0.4656, + "step": 11897 + }, + { + "epoch": 0.32668863261943987, + "grad_norm": 0.3801502287387848, + "learning_rate": 1.8729862171944074e-05, + "loss": 0.5118, + "step": 11898 + }, + { + "epoch": 0.32671609006040636, + "grad_norm": 0.35072335600852966, + "learning_rate": 1.8729651510004935e-05, + "loss": 0.4917, + "step": 11899 + }, + { + "epoch": 0.32674354750137286, + "grad_norm": 0.39461833238601685, + "learning_rate": 1.8729440831782207e-05, + "loss": 0.5104, + "step": 11900 + }, + { + "epoch": 0.32677100494233935, + "grad_norm": 0.3451365530490875, + "learning_rate": 1.8729230137276287e-05, + "loss": 0.521, + "step": 11901 + }, + { + "epoch": 0.3267984623833059, + "grad_norm": 0.3301972448825836, + "learning_rate": 1.8729019426487565e-05, + "loss": 0.406, + "step": 11902 + }, + { + "epoch": 0.3268259198242724, + "grad_norm": 0.34601786732673645, + "learning_rate": 1.8728808699416437e-05, + "loss": 0.501, + "step": 11903 + }, + { + "epoch": 0.3268533772652389, + "grad_norm": 0.35476481914520264, + "learning_rate": 1.8728597956063293e-05, + "loss": 0.4915, + "step": 11904 + }, + { + "epoch": 0.3268808347062054, + "grad_norm": 0.35447824001312256, + "learning_rate": 1.8728387196428532e-05, + "loss": 0.5726, + "step": 11905 + }, + { + "epoch": 0.3269082921471719, + "grad_norm": 0.3805021047592163, + "learning_rate": 1.872817642051254e-05, + "loss": 0.6128, + "step": 11906 + }, + { + "epoch": 0.32693574958813837, + "grad_norm": 0.34017351269721985, + "learning_rate": 1.8727965628315713e-05, + "loss": 0.4998, + "step": 11907 + }, + { + "epoch": 0.32696320702910486, + "grad_norm": 0.3979431986808777, + "learning_rate": 1.8727754819838448e-05, + "loss": 0.6182, + "step": 11908 + }, + { + "epoch": 0.3269906644700714, + "grad_norm": 0.41271457076072693, + "learning_rate": 1.872754399508113e-05, + "loss": 0.6475, + "step": 11909 + }, + { + "epoch": 0.3270181219110379, + "grad_norm": 0.8775897026062012, + "learning_rate": 1.872733315404416e-05, + "loss": 0.5523, + "step": 11910 + }, + { + "epoch": 0.3270455793520044, + "grad_norm": 0.4202950596809387, + "learning_rate": 1.872712229672793e-05, + "loss": 0.5234, + "step": 11911 + }, + { + "epoch": 0.3270730367929709, + "grad_norm": 0.38990646600723267, + "learning_rate": 1.8726911423132827e-05, + "loss": 0.5206, + "step": 11912 + }, + { + "epoch": 0.3271004942339374, + "grad_norm": 0.4083561897277832, + "learning_rate": 1.872670053325925e-05, + "loss": 0.5644, + "step": 11913 + }, + { + "epoch": 0.3271279516749039, + "grad_norm": 0.3420838415622711, + "learning_rate": 1.8726489627107593e-05, + "loss": 0.5072, + "step": 11914 + }, + { + "epoch": 0.3271554091158704, + "grad_norm": 0.4254245162010193, + "learning_rate": 1.8726278704678246e-05, + "loss": 0.545, + "step": 11915 + }, + { + "epoch": 0.3271828665568369, + "grad_norm": 0.37963324785232544, + "learning_rate": 1.8726067765971606e-05, + "loss": 0.5568, + "step": 11916 + }, + { + "epoch": 0.3272103239978034, + "grad_norm": 0.377331405878067, + "learning_rate": 1.8725856810988063e-05, + "loss": 0.5651, + "step": 11917 + }, + { + "epoch": 0.3272377814387699, + "grad_norm": 0.37602683901786804, + "learning_rate": 1.8725645839728012e-05, + "loss": 0.5085, + "step": 11918 + }, + { + "epoch": 0.3272652388797364, + "grad_norm": 0.43218210339546204, + "learning_rate": 1.8725434852191847e-05, + "loss": 0.631, + "step": 11919 + }, + { + "epoch": 0.3272926963207029, + "grad_norm": 0.3711969256401062, + "learning_rate": 1.8725223848379965e-05, + "loss": 0.5169, + "step": 11920 + }, + { + "epoch": 0.3273201537616694, + "grad_norm": 0.3311774730682373, + "learning_rate": 1.872501282829275e-05, + "loss": 0.5882, + "step": 11921 + }, + { + "epoch": 0.3273476112026359, + "grad_norm": 0.3809075355529785, + "learning_rate": 1.8724801791930605e-05, + "loss": 0.5045, + "step": 11922 + }, + { + "epoch": 0.32737506864360244, + "grad_norm": 0.35884279012680054, + "learning_rate": 1.8724590739293917e-05, + "loss": 0.4942, + "step": 11923 + }, + { + "epoch": 0.32740252608456893, + "grad_norm": 0.3947276771068573, + "learning_rate": 1.8724379670383083e-05, + "loss": 0.6038, + "step": 11924 + }, + { + "epoch": 0.32742998352553543, + "grad_norm": 0.36902132630348206, + "learning_rate": 1.87241685851985e-05, + "loss": 0.4684, + "step": 11925 + }, + { + "epoch": 0.3274574409665019, + "grad_norm": 0.3667244017124176, + "learning_rate": 1.8723957483740555e-05, + "loss": 0.5358, + "step": 11926 + }, + { + "epoch": 0.3274848984074684, + "grad_norm": 0.3329610228538513, + "learning_rate": 1.8723746366009646e-05, + "loss": 0.4196, + "step": 11927 + }, + { + "epoch": 0.3275123558484349, + "grad_norm": 0.3810889422893524, + "learning_rate": 1.8723535232006163e-05, + "loss": 0.5472, + "step": 11928 + }, + { + "epoch": 0.3275398132894014, + "grad_norm": 0.31712639331817627, + "learning_rate": 1.8723324081730507e-05, + "loss": 0.396, + "step": 11929 + }, + { + "epoch": 0.32756727073036795, + "grad_norm": 0.39449983835220337, + "learning_rate": 1.8723112915183063e-05, + "loss": 0.5248, + "step": 11930 + }, + { + "epoch": 0.32759472817133445, + "grad_norm": 0.3842149078845978, + "learning_rate": 1.8722901732364235e-05, + "loss": 0.5218, + "step": 11931 + }, + { + "epoch": 0.32762218561230094, + "grad_norm": 0.3618023097515106, + "learning_rate": 1.8722690533274405e-05, + "loss": 0.4748, + "step": 11932 + }, + { + "epoch": 0.32764964305326744, + "grad_norm": 0.32393065094947815, + "learning_rate": 1.8722479317913977e-05, + "loss": 0.3915, + "step": 11933 + }, + { + "epoch": 0.32767710049423393, + "grad_norm": 0.41031593084335327, + "learning_rate": 1.872226808628334e-05, + "loss": 0.5697, + "step": 11934 + }, + { + "epoch": 0.3277045579352004, + "grad_norm": 0.3688621520996094, + "learning_rate": 1.872205683838289e-05, + "loss": 0.5438, + "step": 11935 + }, + { + "epoch": 0.3277320153761669, + "grad_norm": 0.40309765934944153, + "learning_rate": 1.872184557421302e-05, + "loss": 0.4713, + "step": 11936 + }, + { + "epoch": 0.32775947281713347, + "grad_norm": 0.3495140075683594, + "learning_rate": 1.8721634293774123e-05, + "loss": 0.546, + "step": 11937 + }, + { + "epoch": 0.32778693025809996, + "grad_norm": 0.3573471009731293, + "learning_rate": 1.87214229970666e-05, + "loss": 0.4585, + "step": 11938 + }, + { + "epoch": 0.32781438769906646, + "grad_norm": 0.3342423439025879, + "learning_rate": 1.872121168409083e-05, + "loss": 0.4648, + "step": 11939 + }, + { + "epoch": 0.32784184514003295, + "grad_norm": 0.379768967628479, + "learning_rate": 1.8721000354847224e-05, + "loss": 0.4978, + "step": 11940 + }, + { + "epoch": 0.32786930258099944, + "grad_norm": 0.36495450139045715, + "learning_rate": 1.8720789009336165e-05, + "loss": 0.5121, + "step": 11941 + }, + { + "epoch": 0.32789676002196594, + "grad_norm": 0.37773025035858154, + "learning_rate": 1.8720577647558052e-05, + "loss": 0.5196, + "step": 11942 + }, + { + "epoch": 0.32792421746293243, + "grad_norm": 0.35105395317077637, + "learning_rate": 1.872036626951328e-05, + "loss": 0.6151, + "step": 11943 + }, + { + "epoch": 0.327951674903899, + "grad_norm": 0.3671186566352844, + "learning_rate": 1.8720154875202242e-05, + "loss": 0.5802, + "step": 11944 + }, + { + "epoch": 0.3279791323448655, + "grad_norm": 0.36610904335975647, + "learning_rate": 1.871994346462533e-05, + "loss": 0.4996, + "step": 11945 + }, + { + "epoch": 0.32800658978583197, + "grad_norm": 0.41155996918678284, + "learning_rate": 1.8719732037782945e-05, + "loss": 0.531, + "step": 11946 + }, + { + "epoch": 0.32803404722679846, + "grad_norm": 0.3811686635017395, + "learning_rate": 1.871952059467547e-05, + "loss": 0.5636, + "step": 11947 + }, + { + "epoch": 0.32806150466776496, + "grad_norm": 0.3475353717803955, + "learning_rate": 1.871930913530331e-05, + "loss": 0.4653, + "step": 11948 + }, + { + "epoch": 0.32808896210873145, + "grad_norm": 0.3832700550556183, + "learning_rate": 1.8719097659666854e-05, + "loss": 0.5519, + "step": 11949 + }, + { + "epoch": 0.32811641954969795, + "grad_norm": 0.40531888604164124, + "learning_rate": 1.8718886167766503e-05, + "loss": 0.6075, + "step": 11950 + }, + { + "epoch": 0.3281438769906645, + "grad_norm": 0.36197394132614136, + "learning_rate": 1.8718674659602638e-05, + "loss": 0.5517, + "step": 11951 + }, + { + "epoch": 0.328171334431631, + "grad_norm": 0.37411078810691833, + "learning_rate": 1.871846313517567e-05, + "loss": 0.4971, + "step": 11952 + }, + { + "epoch": 0.3281987918725975, + "grad_norm": 0.34037643671035767, + "learning_rate": 1.871825159448598e-05, + "loss": 0.5419, + "step": 11953 + }, + { + "epoch": 0.328226249313564, + "grad_norm": 0.3751624524593353, + "learning_rate": 1.871804003753397e-05, + "loss": 0.5588, + "step": 11954 + }, + { + "epoch": 0.32825370675453047, + "grad_norm": 0.32654792070388794, + "learning_rate": 1.8717828464320035e-05, + "loss": 0.5553, + "step": 11955 + }, + { + "epoch": 0.32828116419549697, + "grad_norm": 0.3332383930683136, + "learning_rate": 1.8717616874844565e-05, + "loss": 0.4837, + "step": 11956 + }, + { + "epoch": 0.32830862163646346, + "grad_norm": 0.3685525953769684, + "learning_rate": 1.8717405269107956e-05, + "loss": 0.6043, + "step": 11957 + }, + { + "epoch": 0.32833607907743, + "grad_norm": 0.4895397424697876, + "learning_rate": 1.871719364711061e-05, + "loss": 0.5197, + "step": 11958 + }, + { + "epoch": 0.3283635365183965, + "grad_norm": 0.37867289781570435, + "learning_rate": 1.871698200885291e-05, + "loss": 0.5401, + "step": 11959 + }, + { + "epoch": 0.328390993959363, + "grad_norm": 0.3331539034843445, + "learning_rate": 1.8716770354335256e-05, + "loss": 0.4719, + "step": 11960 + }, + { + "epoch": 0.3284184514003295, + "grad_norm": 0.3324986398220062, + "learning_rate": 1.8716558683558046e-05, + "loss": 0.486, + "step": 11961 + }, + { + "epoch": 0.328445908841296, + "grad_norm": 0.3692602217197418, + "learning_rate": 1.871634699652167e-05, + "loss": 0.5442, + "step": 11962 + }, + { + "epoch": 0.3284733662822625, + "grad_norm": 0.3447672724723816, + "learning_rate": 1.8716135293226524e-05, + "loss": 0.5574, + "step": 11963 + }, + { + "epoch": 0.328500823723229, + "grad_norm": 0.4328417181968689, + "learning_rate": 1.871592357367301e-05, + "loss": 0.567, + "step": 11964 + }, + { + "epoch": 0.3285282811641955, + "grad_norm": 0.41026797890663147, + "learning_rate": 1.8715711837861513e-05, + "loss": 0.5316, + "step": 11965 + }, + { + "epoch": 0.328555738605162, + "grad_norm": 0.32474908232688904, + "learning_rate": 1.8715500085792427e-05, + "loss": 0.4859, + "step": 11966 + }, + { + "epoch": 0.3285831960461285, + "grad_norm": 0.4551509916782379, + "learning_rate": 1.8715288317466157e-05, + "loss": 0.5501, + "step": 11967 + }, + { + "epoch": 0.328610653487095, + "grad_norm": 0.3482462763786316, + "learning_rate": 1.8715076532883092e-05, + "loss": 0.521, + "step": 11968 + }, + { + "epoch": 0.3286381109280615, + "grad_norm": 0.39024749398231506, + "learning_rate": 1.8714864732043628e-05, + "loss": 0.4831, + "step": 11969 + }, + { + "epoch": 0.328665568369028, + "grad_norm": 0.39094290137290955, + "learning_rate": 1.871465291494816e-05, + "loss": 0.4918, + "step": 11970 + }, + { + "epoch": 0.3286930258099945, + "grad_norm": 0.3801063001155853, + "learning_rate": 1.871444108159708e-05, + "loss": 0.5443, + "step": 11971 + }, + { + "epoch": 0.32872048325096104, + "grad_norm": 0.3376005291938782, + "learning_rate": 1.871422923199079e-05, + "loss": 0.5127, + "step": 11972 + }, + { + "epoch": 0.32874794069192753, + "grad_norm": 0.36973005533218384, + "learning_rate": 1.871401736612968e-05, + "loss": 0.5915, + "step": 11973 + }, + { + "epoch": 0.328775398132894, + "grad_norm": 0.3496617376804352, + "learning_rate": 1.8713805484014146e-05, + "loss": 0.4672, + "step": 11974 + }, + { + "epoch": 0.3288028555738605, + "grad_norm": 0.4258404076099396, + "learning_rate": 1.8713593585644584e-05, + "loss": 0.5443, + "step": 11975 + }, + { + "epoch": 0.328830313014827, + "grad_norm": 0.36212247610092163, + "learning_rate": 1.8713381671021385e-05, + "loss": 0.5174, + "step": 11976 + }, + { + "epoch": 0.3288577704557935, + "grad_norm": 0.32974421977996826, + "learning_rate": 1.8713169740144952e-05, + "loss": 0.5087, + "step": 11977 + }, + { + "epoch": 0.32888522789676, + "grad_norm": 0.3839033842086792, + "learning_rate": 1.871295779301568e-05, + "loss": 0.5308, + "step": 11978 + }, + { + "epoch": 0.32891268533772655, + "grad_norm": 0.3841021656990051, + "learning_rate": 1.8712745829633956e-05, + "loss": 0.5909, + "step": 11979 + }, + { + "epoch": 0.32894014277869305, + "grad_norm": 0.3926509618759155, + "learning_rate": 1.871253385000018e-05, + "loss": 0.4797, + "step": 11980 + }, + { + "epoch": 0.32896760021965954, + "grad_norm": 0.371751606464386, + "learning_rate": 1.8712321854114747e-05, + "loss": 0.4602, + "step": 11981 + }, + { + "epoch": 0.32899505766062603, + "grad_norm": 0.3857334554195404, + "learning_rate": 1.8712109841978056e-05, + "loss": 0.5749, + "step": 11982 + }, + { + "epoch": 0.3290225151015925, + "grad_norm": 0.34016844630241394, + "learning_rate": 1.87118978135905e-05, + "loss": 0.5286, + "step": 11983 + }, + { + "epoch": 0.329049972542559, + "grad_norm": 0.48504170775413513, + "learning_rate": 1.8711685768952472e-05, + "loss": 0.5168, + "step": 11984 + }, + { + "epoch": 0.3290774299835255, + "grad_norm": 0.3634660243988037, + "learning_rate": 1.871147370806437e-05, + "loss": 0.4876, + "step": 11985 + }, + { + "epoch": 0.329104887424492, + "grad_norm": 0.42379602789878845, + "learning_rate": 1.871126163092659e-05, + "loss": 0.4987, + "step": 11986 + }, + { + "epoch": 0.32913234486545856, + "grad_norm": 0.32686731219291687, + "learning_rate": 1.8711049537539524e-05, + "loss": 0.4696, + "step": 11987 + }, + { + "epoch": 0.32915980230642505, + "grad_norm": 0.4843846261501312, + "learning_rate": 1.8710837427903574e-05, + "loss": 0.5487, + "step": 11988 + }, + { + "epoch": 0.32918725974739155, + "grad_norm": 0.3684459328651428, + "learning_rate": 1.871062530201913e-05, + "loss": 0.4861, + "step": 11989 + }, + { + "epoch": 0.32921471718835804, + "grad_norm": 0.4207896292209625, + "learning_rate": 1.8710413159886588e-05, + "loss": 0.5476, + "step": 11990 + }, + { + "epoch": 0.32924217462932454, + "grad_norm": 0.3507305085659027, + "learning_rate": 1.871020100150635e-05, + "loss": 0.5349, + "step": 11991 + }, + { + "epoch": 0.32926963207029103, + "grad_norm": 0.3623514175415039, + "learning_rate": 1.8709988826878804e-05, + "loss": 0.445, + "step": 11992 + }, + { + "epoch": 0.3292970895112575, + "grad_norm": 0.4062873423099518, + "learning_rate": 1.870977663600435e-05, + "loss": 0.5573, + "step": 11993 + }, + { + "epoch": 0.3293245469522241, + "grad_norm": 0.37034663558006287, + "learning_rate": 1.8709564428883382e-05, + "loss": 0.5369, + "step": 11994 + }, + { + "epoch": 0.32935200439319057, + "grad_norm": 0.6620703339576721, + "learning_rate": 1.8709352205516298e-05, + "loss": 0.5132, + "step": 11995 + }, + { + "epoch": 0.32937946183415706, + "grad_norm": 0.3737325370311737, + "learning_rate": 1.8709139965903488e-05, + "loss": 0.4526, + "step": 11996 + }, + { + "epoch": 0.32940691927512356, + "grad_norm": 0.3490918278694153, + "learning_rate": 1.8708927710045358e-05, + "loss": 0.4534, + "step": 11997 + }, + { + "epoch": 0.32943437671609005, + "grad_norm": 0.33697646856307983, + "learning_rate": 1.8708715437942295e-05, + "loss": 0.5137, + "step": 11998 + }, + { + "epoch": 0.32946183415705654, + "grad_norm": 0.3654334843158722, + "learning_rate": 1.8708503149594703e-05, + "loss": 0.519, + "step": 11999 + }, + { + "epoch": 0.32948929159802304, + "grad_norm": 0.30000773072242737, + "learning_rate": 1.8708290845002966e-05, + "loss": 0.4206, + "step": 12000 + }, + { + "epoch": 0.3295167490389896, + "grad_norm": 0.4063723683357239, + "learning_rate": 1.870807852416749e-05, + "loss": 0.5022, + "step": 12001 + }, + { + "epoch": 0.3295442064799561, + "grad_norm": 0.41489601135253906, + "learning_rate": 1.8707866187088668e-05, + "loss": 0.4791, + "step": 12002 + }, + { + "epoch": 0.3295716639209226, + "grad_norm": 0.3383142352104187, + "learning_rate": 1.87076538337669e-05, + "loss": 0.4896, + "step": 12003 + }, + { + "epoch": 0.32959912136188907, + "grad_norm": 0.39272210001945496, + "learning_rate": 1.8707441464202575e-05, + "loss": 0.5029, + "step": 12004 + }, + { + "epoch": 0.32962657880285556, + "grad_norm": 0.40507009625434875, + "learning_rate": 1.8707229078396093e-05, + "loss": 0.5156, + "step": 12005 + }, + { + "epoch": 0.32965403624382206, + "grad_norm": 0.44115784764289856, + "learning_rate": 1.870701667634785e-05, + "loss": 0.6101, + "step": 12006 + }, + { + "epoch": 0.32968149368478855, + "grad_norm": 0.41089463233947754, + "learning_rate": 1.8706804258058243e-05, + "loss": 0.5663, + "step": 12007 + }, + { + "epoch": 0.3297089511257551, + "grad_norm": 0.44675686955451965, + "learning_rate": 1.8706591823527665e-05, + "loss": 0.4388, + "step": 12008 + }, + { + "epoch": 0.3297364085667216, + "grad_norm": 0.3847343623638153, + "learning_rate": 1.8706379372756513e-05, + "loss": 0.5279, + "step": 12009 + }, + { + "epoch": 0.3297638660076881, + "grad_norm": 0.3951273560523987, + "learning_rate": 1.870616690574519e-05, + "loss": 0.5042, + "step": 12010 + }, + { + "epoch": 0.3297913234486546, + "grad_norm": 0.35910746455192566, + "learning_rate": 1.8705954422494082e-05, + "loss": 0.5204, + "step": 12011 + }, + { + "epoch": 0.3298187808896211, + "grad_norm": 0.40818873047828674, + "learning_rate": 1.870574192300359e-05, + "loss": 0.4511, + "step": 12012 + }, + { + "epoch": 0.32984623833058757, + "grad_norm": 0.3781450092792511, + "learning_rate": 1.8705529407274117e-05, + "loss": 0.5801, + "step": 12013 + }, + { + "epoch": 0.32987369577155407, + "grad_norm": 0.34624141454696655, + "learning_rate": 1.870531687530605e-05, + "loss": 0.4921, + "step": 12014 + }, + { + "epoch": 0.3299011532125206, + "grad_norm": 0.4332934319972992, + "learning_rate": 1.8705104327099786e-05, + "loss": 0.5839, + "step": 12015 + }, + { + "epoch": 0.3299286106534871, + "grad_norm": 0.38911232352256775, + "learning_rate": 1.870489176265573e-05, + "loss": 0.5462, + "step": 12016 + }, + { + "epoch": 0.3299560680944536, + "grad_norm": 0.39207619428634644, + "learning_rate": 1.8704679181974268e-05, + "loss": 0.5065, + "step": 12017 + }, + { + "epoch": 0.3299835255354201, + "grad_norm": 0.4410933554172516, + "learning_rate": 1.87044665850558e-05, + "loss": 0.5662, + "step": 12018 + }, + { + "epoch": 0.3300109829763866, + "grad_norm": 0.3646087944507599, + "learning_rate": 1.8704253971900726e-05, + "loss": 0.5274, + "step": 12019 + }, + { + "epoch": 0.3300384404173531, + "grad_norm": 0.32533836364746094, + "learning_rate": 1.870404134250944e-05, + "loss": 0.431, + "step": 12020 + }, + { + "epoch": 0.3300658978583196, + "grad_norm": 0.3370291590690613, + "learning_rate": 1.870382869688234e-05, + "loss": 0.4744, + "step": 12021 + }, + { + "epoch": 0.33009335529928613, + "grad_norm": 0.4749324917793274, + "learning_rate": 1.8703616035019817e-05, + "loss": 0.5805, + "step": 12022 + }, + { + "epoch": 0.3301208127402526, + "grad_norm": 0.3739639222621918, + "learning_rate": 1.870340335692228e-05, + "loss": 0.5696, + "step": 12023 + }, + { + "epoch": 0.3301482701812191, + "grad_norm": 0.37467482686042786, + "learning_rate": 1.870319066259011e-05, + "loss": 0.523, + "step": 12024 + }, + { + "epoch": 0.3301757276221856, + "grad_norm": 0.3902188539505005, + "learning_rate": 1.8702977952023715e-05, + "loss": 0.5692, + "step": 12025 + }, + { + "epoch": 0.3302031850631521, + "grad_norm": 0.34801313281059265, + "learning_rate": 1.870276522522349e-05, + "loss": 0.5901, + "step": 12026 + }, + { + "epoch": 0.3302306425041186, + "grad_norm": 0.5108718276023865, + "learning_rate": 1.8702552482189827e-05, + "loss": 0.5245, + "step": 12027 + }, + { + "epoch": 0.3302580999450851, + "grad_norm": 0.4149084687232971, + "learning_rate": 1.8702339722923127e-05, + "loss": 0.5488, + "step": 12028 + }, + { + "epoch": 0.33028555738605164, + "grad_norm": 0.3081459701061249, + "learning_rate": 1.870212694742379e-05, + "loss": 0.4508, + "step": 12029 + }, + { + "epoch": 0.33031301482701814, + "grad_norm": 0.3462482690811157, + "learning_rate": 1.8701914155692207e-05, + "loss": 0.455, + "step": 12030 + }, + { + "epoch": 0.33034047226798463, + "grad_norm": 0.3526279926300049, + "learning_rate": 1.8701701347728774e-05, + "loss": 0.5595, + "step": 12031 + }, + { + "epoch": 0.3303679297089511, + "grad_norm": 0.3455398380756378, + "learning_rate": 1.870148852353389e-05, + "loss": 0.4702, + "step": 12032 + }, + { + "epoch": 0.3303953871499176, + "grad_norm": 0.3110741972923279, + "learning_rate": 1.8701275683107957e-05, + "loss": 0.4577, + "step": 12033 + }, + { + "epoch": 0.3304228445908841, + "grad_norm": 0.39964115619659424, + "learning_rate": 1.8701062826451367e-05, + "loss": 0.5103, + "step": 12034 + }, + { + "epoch": 0.3304503020318506, + "grad_norm": 0.46287909150123596, + "learning_rate": 1.8700849953564514e-05, + "loss": 0.5369, + "step": 12035 + }, + { + "epoch": 0.33047775947281716, + "grad_norm": 0.3270765244960785, + "learning_rate": 1.8700637064447803e-05, + "loss": 0.4546, + "step": 12036 + }, + { + "epoch": 0.33050521691378365, + "grad_norm": 0.3289569318294525, + "learning_rate": 1.8700424159101624e-05, + "loss": 0.4793, + "step": 12037 + }, + { + "epoch": 0.33053267435475014, + "grad_norm": 0.3470202088356018, + "learning_rate": 1.870021123752638e-05, + "loss": 0.543, + "step": 12038 + }, + { + "epoch": 0.33056013179571664, + "grad_norm": 0.3927311301231384, + "learning_rate": 1.869999829972246e-05, + "loss": 0.4478, + "step": 12039 + }, + { + "epoch": 0.33058758923668313, + "grad_norm": 0.39717915654182434, + "learning_rate": 1.8699785345690272e-05, + "loss": 0.5784, + "step": 12040 + }, + { + "epoch": 0.3306150466776496, + "grad_norm": 0.32537826895713806, + "learning_rate": 1.8699572375430206e-05, + "loss": 0.4518, + "step": 12041 + }, + { + "epoch": 0.3306425041186161, + "grad_norm": 0.3489329218864441, + "learning_rate": 1.869935938894266e-05, + "loss": 0.4384, + "step": 12042 + }, + { + "epoch": 0.33066996155958267, + "grad_norm": 0.4078299403190613, + "learning_rate": 1.8699146386228035e-05, + "loss": 0.4905, + "step": 12043 + }, + { + "epoch": 0.33069741900054916, + "grad_norm": 0.3919115364551544, + "learning_rate": 1.8698933367286722e-05, + "loss": 0.5269, + "step": 12044 + }, + { + "epoch": 0.33072487644151566, + "grad_norm": 0.37842345237731934, + "learning_rate": 1.8698720332119124e-05, + "loss": 0.5223, + "step": 12045 + }, + { + "epoch": 0.33075233388248215, + "grad_norm": 0.35697075724601746, + "learning_rate": 1.8698507280725634e-05, + "loss": 0.5165, + "step": 12046 + }, + { + "epoch": 0.33077979132344865, + "grad_norm": 0.3557453155517578, + "learning_rate": 1.8698294213106653e-05, + "loss": 0.5171, + "step": 12047 + }, + { + "epoch": 0.33080724876441514, + "grad_norm": 0.40448036789894104, + "learning_rate": 1.869808112926258e-05, + "loss": 0.5101, + "step": 12048 + }, + { + "epoch": 0.33083470620538163, + "grad_norm": 0.3700740337371826, + "learning_rate": 1.8697868029193805e-05, + "loss": 0.4455, + "step": 12049 + }, + { + "epoch": 0.3308621636463482, + "grad_norm": 0.3457902669906616, + "learning_rate": 1.8697654912900733e-05, + "loss": 0.4835, + "step": 12050 + }, + { + "epoch": 0.3308896210873147, + "grad_norm": 0.402881383895874, + "learning_rate": 1.8697441780383757e-05, + "loss": 0.5527, + "step": 12051 + }, + { + "epoch": 0.33091707852828117, + "grad_norm": 0.3516930341720581, + "learning_rate": 1.8697228631643275e-05, + "loss": 0.5179, + "step": 12052 + }, + { + "epoch": 0.33094453596924767, + "grad_norm": 0.35406506061553955, + "learning_rate": 1.869701546667969e-05, + "loss": 0.5472, + "step": 12053 + }, + { + "epoch": 0.33097199341021416, + "grad_norm": 0.4205666184425354, + "learning_rate": 1.8696802285493392e-05, + "loss": 0.6205, + "step": 12054 + }, + { + "epoch": 0.33099945085118065, + "grad_norm": 0.3455674946308136, + "learning_rate": 1.8696589088084786e-05, + "loss": 0.5433, + "step": 12055 + }, + { + "epoch": 0.33102690829214715, + "grad_norm": 0.3698217272758484, + "learning_rate": 1.869637587445426e-05, + "loss": 0.4609, + "step": 12056 + }, + { + "epoch": 0.3310543657331137, + "grad_norm": 0.391926646232605, + "learning_rate": 1.8696162644602222e-05, + "loss": 0.5282, + "step": 12057 + }, + { + "epoch": 0.3310818231740802, + "grad_norm": 0.37686607241630554, + "learning_rate": 1.8695949398529062e-05, + "loss": 0.5662, + "step": 12058 + }, + { + "epoch": 0.3311092806150467, + "grad_norm": 0.3250347673892975, + "learning_rate": 1.8695736136235183e-05, + "loss": 0.5073, + "step": 12059 + }, + { + "epoch": 0.3311367380560132, + "grad_norm": 0.3344913423061371, + "learning_rate": 1.869552285772098e-05, + "loss": 0.4769, + "step": 12060 + }, + { + "epoch": 0.3311641954969797, + "grad_norm": 0.3916247487068176, + "learning_rate": 1.869530956298685e-05, + "loss": 0.5883, + "step": 12061 + }, + { + "epoch": 0.33119165293794617, + "grad_norm": 0.38963887095451355, + "learning_rate": 1.8695096252033196e-05, + "loss": 0.557, + "step": 12062 + }, + { + "epoch": 0.33121911037891266, + "grad_norm": 0.35012879967689514, + "learning_rate": 1.8694882924860408e-05, + "loss": 0.5076, + "step": 12063 + }, + { + "epoch": 0.3312465678198792, + "grad_norm": 0.4036499559879303, + "learning_rate": 1.869466958146889e-05, + "loss": 0.5589, + "step": 12064 + }, + { + "epoch": 0.3312740252608457, + "grad_norm": 0.4382277727127075, + "learning_rate": 1.8694456221859042e-05, + "loss": 0.6802, + "step": 12065 + }, + { + "epoch": 0.3313014827018122, + "grad_norm": 0.593174934387207, + "learning_rate": 1.8694242846031256e-05, + "loss": 0.496, + "step": 12066 + }, + { + "epoch": 0.3313289401427787, + "grad_norm": 0.3283044993877411, + "learning_rate": 1.869402945398593e-05, + "loss": 0.492, + "step": 12067 + }, + { + "epoch": 0.3313563975837452, + "grad_norm": 0.4231805205345154, + "learning_rate": 1.8693816045723466e-05, + "loss": 0.5351, + "step": 12068 + }, + { + "epoch": 0.3313838550247117, + "grad_norm": 0.37303122878074646, + "learning_rate": 1.869360262124426e-05, + "loss": 0.6117, + "step": 12069 + }, + { + "epoch": 0.3314113124656782, + "grad_norm": 0.3678549528121948, + "learning_rate": 1.869338918054871e-05, + "loss": 0.5225, + "step": 12070 + }, + { + "epoch": 0.3314387699066447, + "grad_norm": 0.40193289518356323, + "learning_rate": 1.8693175723637215e-05, + "loss": 0.4503, + "step": 12071 + }, + { + "epoch": 0.3314662273476112, + "grad_norm": 0.5912889838218689, + "learning_rate": 1.8692962250510175e-05, + "loss": 0.5612, + "step": 12072 + }, + { + "epoch": 0.3314936847885777, + "grad_norm": 0.33107373118400574, + "learning_rate": 1.8692748761167984e-05, + "loss": 0.5548, + "step": 12073 + }, + { + "epoch": 0.3315211422295442, + "grad_norm": 0.39332205057144165, + "learning_rate": 1.869253525561104e-05, + "loss": 0.4967, + "step": 12074 + }, + { + "epoch": 0.3315485996705107, + "grad_norm": 0.3109099268913269, + "learning_rate": 1.8692321733839745e-05, + "loss": 0.443, + "step": 12075 + }, + { + "epoch": 0.3315760571114772, + "grad_norm": 0.3873143196105957, + "learning_rate": 1.8692108195854497e-05, + "loss": 0.4905, + "step": 12076 + }, + { + "epoch": 0.3316035145524437, + "grad_norm": 0.3872961401939392, + "learning_rate": 1.869189464165569e-05, + "loss": 0.5468, + "step": 12077 + }, + { + "epoch": 0.33163097199341024, + "grad_norm": 0.3680601418018341, + "learning_rate": 1.869168107124373e-05, + "loss": 0.4542, + "step": 12078 + }, + { + "epoch": 0.33165842943437673, + "grad_norm": 0.349619060754776, + "learning_rate": 1.8691467484619003e-05, + "loss": 0.5134, + "step": 12079 + }, + { + "epoch": 0.3316858868753432, + "grad_norm": 0.34198543429374695, + "learning_rate": 1.8691253881781925e-05, + "loss": 0.4896, + "step": 12080 + }, + { + "epoch": 0.3317133443163097, + "grad_norm": 0.4209119379520416, + "learning_rate": 1.8691040262732877e-05, + "loss": 0.5358, + "step": 12081 + }, + { + "epoch": 0.3317408017572762, + "grad_norm": 0.3897978961467743, + "learning_rate": 1.8690826627472268e-05, + "loss": 0.4977, + "step": 12082 + }, + { + "epoch": 0.3317682591982427, + "grad_norm": 0.37333399057388306, + "learning_rate": 1.8690612976000493e-05, + "loss": 0.6173, + "step": 12083 + }, + { + "epoch": 0.3317957166392092, + "grad_norm": 0.37462806701660156, + "learning_rate": 1.8690399308317954e-05, + "loss": 0.5824, + "step": 12084 + }, + { + "epoch": 0.33182317408017575, + "grad_norm": 0.44185304641723633, + "learning_rate": 1.869018562442504e-05, + "loss": 0.5988, + "step": 12085 + }, + { + "epoch": 0.33185063152114225, + "grad_norm": 0.29703062772750854, + "learning_rate": 1.8689971924322162e-05, + "loss": 0.4688, + "step": 12086 + }, + { + "epoch": 0.33187808896210874, + "grad_norm": 0.4677489995956421, + "learning_rate": 1.8689758208009713e-05, + "loss": 0.5246, + "step": 12087 + }, + { + "epoch": 0.33190554640307524, + "grad_norm": 0.370398610830307, + "learning_rate": 1.868954447548809e-05, + "loss": 0.5618, + "step": 12088 + }, + { + "epoch": 0.33193300384404173, + "grad_norm": 0.3788541257381439, + "learning_rate": 1.8689330726757687e-05, + "loss": 0.5289, + "step": 12089 + }, + { + "epoch": 0.3319604612850082, + "grad_norm": 0.35506099462509155, + "learning_rate": 1.8689116961818916e-05, + "loss": 0.5538, + "step": 12090 + }, + { + "epoch": 0.3319879187259747, + "grad_norm": 0.33543774485588074, + "learning_rate": 1.868890318067217e-05, + "loss": 0.6239, + "step": 12091 + }, + { + "epoch": 0.33201537616694127, + "grad_norm": 0.34431928396224976, + "learning_rate": 1.8688689383317844e-05, + "loss": 0.5264, + "step": 12092 + }, + { + "epoch": 0.33204283360790776, + "grad_norm": 0.33699700236320496, + "learning_rate": 1.868847556975634e-05, + "loss": 0.5364, + "step": 12093 + }, + { + "epoch": 0.33207029104887426, + "grad_norm": 0.36346331238746643, + "learning_rate": 1.8688261739988053e-05, + "loss": 0.5381, + "step": 12094 + }, + { + "epoch": 0.33209774848984075, + "grad_norm": 0.3682243824005127, + "learning_rate": 1.8688047894013386e-05, + "loss": 0.503, + "step": 12095 + }, + { + "epoch": 0.33212520593080724, + "grad_norm": 0.36588090658187866, + "learning_rate": 1.8687834031832735e-05, + "loss": 0.5189, + "step": 12096 + }, + { + "epoch": 0.33215266337177374, + "grad_norm": 0.36555665731430054, + "learning_rate": 1.8687620153446505e-05, + "loss": 0.5454, + "step": 12097 + }, + { + "epoch": 0.33218012081274023, + "grad_norm": 0.3560894727706909, + "learning_rate": 1.868740625885509e-05, + "loss": 0.546, + "step": 12098 + }, + { + "epoch": 0.3322075782537068, + "grad_norm": 0.35019826889038086, + "learning_rate": 1.8687192348058887e-05, + "loss": 0.4706, + "step": 12099 + }, + { + "epoch": 0.3322350356946733, + "grad_norm": 0.38681304454803467, + "learning_rate": 1.86869784210583e-05, + "loss": 0.5708, + "step": 12100 + }, + { + "epoch": 0.33226249313563977, + "grad_norm": 0.4226163327693939, + "learning_rate": 1.8686764477853726e-05, + "loss": 0.5694, + "step": 12101 + }, + { + "epoch": 0.33228995057660626, + "grad_norm": 0.33914461731910706, + "learning_rate": 1.868655051844556e-05, + "loss": 0.5094, + "step": 12102 + }, + { + "epoch": 0.33231740801757276, + "grad_norm": 0.3543728291988373, + "learning_rate": 1.868633654283421e-05, + "loss": 0.5009, + "step": 12103 + }, + { + "epoch": 0.33234486545853925, + "grad_norm": 0.45445477962493896, + "learning_rate": 1.8686122551020066e-05, + "loss": 0.5518, + "step": 12104 + }, + { + "epoch": 0.33237232289950575, + "grad_norm": 0.34171491861343384, + "learning_rate": 1.8685908543003534e-05, + "loss": 0.4789, + "step": 12105 + }, + { + "epoch": 0.3323997803404723, + "grad_norm": 0.3719686269760132, + "learning_rate": 1.868569451878501e-05, + "loss": 0.6077, + "step": 12106 + }, + { + "epoch": 0.3324272377814388, + "grad_norm": 0.40135595202445984, + "learning_rate": 1.8685480478364894e-05, + "loss": 0.4451, + "step": 12107 + }, + { + "epoch": 0.3324546952224053, + "grad_norm": 0.36429738998413086, + "learning_rate": 1.868526642174358e-05, + "loss": 0.5931, + "step": 12108 + }, + { + "epoch": 0.3324821526633718, + "grad_norm": 0.3630122244358063, + "learning_rate": 1.8685052348921474e-05, + "loss": 0.5478, + "step": 12109 + }, + { + "epoch": 0.33250961010433827, + "grad_norm": 0.36155635118484497, + "learning_rate": 1.8684838259898977e-05, + "loss": 0.4655, + "step": 12110 + }, + { + "epoch": 0.33253706754530477, + "grad_norm": 0.41808179020881653, + "learning_rate": 1.868462415467648e-05, + "loss": 0.4833, + "step": 12111 + }, + { + "epoch": 0.33256452498627126, + "grad_norm": 0.38781073689460754, + "learning_rate": 1.8684410033254392e-05, + "loss": 0.4461, + "step": 12112 + }, + { + "epoch": 0.3325919824272378, + "grad_norm": 0.40093326568603516, + "learning_rate": 1.8684195895633105e-05, + "loss": 0.5435, + "step": 12113 + }, + { + "epoch": 0.3326194398682043, + "grad_norm": 1.5812658071517944, + "learning_rate": 1.868398174181302e-05, + "loss": 0.5875, + "step": 12114 + }, + { + "epoch": 0.3326468973091708, + "grad_norm": 0.351866215467453, + "learning_rate": 1.868376757179454e-05, + "loss": 0.4528, + "step": 12115 + }, + { + "epoch": 0.3326743547501373, + "grad_norm": 0.42574647068977356, + "learning_rate": 1.868355338557806e-05, + "loss": 0.5771, + "step": 12116 + }, + { + "epoch": 0.3327018121911038, + "grad_norm": 0.352555513381958, + "learning_rate": 1.8683339183163985e-05, + "loss": 0.5393, + "step": 12117 + }, + { + "epoch": 0.3327292696320703, + "grad_norm": 0.2993614673614502, + "learning_rate": 1.8683124964552707e-05, + "loss": 0.4646, + "step": 12118 + }, + { + "epoch": 0.3327567270730368, + "grad_norm": 0.36872532963752747, + "learning_rate": 1.868291072974463e-05, + "loss": 0.5603, + "step": 12119 + }, + { + "epoch": 0.33278418451400327, + "grad_norm": 0.3609529733657837, + "learning_rate": 1.8682696478740154e-05, + "loss": 0.6538, + "step": 12120 + }, + { + "epoch": 0.3328116419549698, + "grad_norm": 0.36905625462532043, + "learning_rate": 1.8682482211539675e-05, + "loss": 0.5579, + "step": 12121 + }, + { + "epoch": 0.3328390993959363, + "grad_norm": 0.3709834814071655, + "learning_rate": 1.86822679281436e-05, + "loss": 0.4971, + "step": 12122 + }, + { + "epoch": 0.3328665568369028, + "grad_norm": 0.3465105891227722, + "learning_rate": 1.8682053628552325e-05, + "loss": 0.5359, + "step": 12123 + }, + { + "epoch": 0.3328940142778693, + "grad_norm": 0.3874542713165283, + "learning_rate": 1.8681839312766246e-05, + "loss": 0.5046, + "step": 12124 + }, + { + "epoch": 0.3329214717188358, + "grad_norm": 0.3714902400970459, + "learning_rate": 1.8681624980785765e-05, + "loss": 0.4783, + "step": 12125 + }, + { + "epoch": 0.3329489291598023, + "grad_norm": 0.3609793782234192, + "learning_rate": 1.8681410632611284e-05, + "loss": 0.5694, + "step": 12126 + }, + { + "epoch": 0.3329763866007688, + "grad_norm": 0.3704984188079834, + "learning_rate": 1.8681196268243204e-05, + "loss": 0.561, + "step": 12127 + }, + { + "epoch": 0.33300384404173533, + "grad_norm": 0.35756394267082214, + "learning_rate": 1.8680981887681917e-05, + "loss": 0.4623, + "step": 12128 + }, + { + "epoch": 0.3330313014827018, + "grad_norm": 0.3672991096973419, + "learning_rate": 1.868076749092783e-05, + "loss": 0.4207, + "step": 12129 + }, + { + "epoch": 0.3330587589236683, + "grad_norm": 0.37532728910446167, + "learning_rate": 1.8680553077981343e-05, + "loss": 0.5006, + "step": 12130 + }, + { + "epoch": 0.3330862163646348, + "grad_norm": 0.42688310146331787, + "learning_rate": 1.8680338648842852e-05, + "loss": 0.5345, + "step": 12131 + }, + { + "epoch": 0.3331136738056013, + "grad_norm": 0.346250057220459, + "learning_rate": 1.868012420351276e-05, + "loss": 0.4552, + "step": 12132 + }, + { + "epoch": 0.3331411312465678, + "grad_norm": 0.44948047399520874, + "learning_rate": 1.8679909741991465e-05, + "loss": 0.5806, + "step": 12133 + }, + { + "epoch": 0.3331685886875343, + "grad_norm": 0.36561450362205505, + "learning_rate": 1.8679695264279368e-05, + "loss": 0.5347, + "step": 12134 + }, + { + "epoch": 0.33319604612850084, + "grad_norm": 0.4279904067516327, + "learning_rate": 1.8679480770376874e-05, + "loss": 0.5833, + "step": 12135 + }, + { + "epoch": 0.33322350356946734, + "grad_norm": 0.3907735347747803, + "learning_rate": 1.867926626028437e-05, + "loss": 0.5113, + "step": 12136 + }, + { + "epoch": 0.33325096101043383, + "grad_norm": 0.4309743046760559, + "learning_rate": 1.867905173400227e-05, + "loss": 0.5348, + "step": 12137 + }, + { + "epoch": 0.3332784184514003, + "grad_norm": 0.4029633700847626, + "learning_rate": 1.8678837191530967e-05, + "loss": 0.6536, + "step": 12138 + }, + { + "epoch": 0.3333058758923668, + "grad_norm": 0.3549163341522217, + "learning_rate": 1.8678622632870863e-05, + "loss": 0.5534, + "step": 12139 + }, + { + "epoch": 0.3333333333333333, + "grad_norm": 0.5452627539634705, + "learning_rate": 1.867840805802236e-05, + "loss": 0.5361, + "step": 12140 + }, + { + "epoch": 0.3333607907742998, + "grad_norm": 0.4094119966030121, + "learning_rate": 1.8678193466985853e-05, + "loss": 0.5425, + "step": 12141 + }, + { + "epoch": 0.33338824821526636, + "grad_norm": 0.3872023820877075, + "learning_rate": 1.8677978859761745e-05, + "loss": 0.5013, + "step": 12142 + }, + { + "epoch": 0.33341570565623285, + "grad_norm": 0.38617101311683655, + "learning_rate": 1.8677764236350437e-05, + "loss": 0.5312, + "step": 12143 + }, + { + "epoch": 0.33344316309719935, + "grad_norm": 0.3354259133338928, + "learning_rate": 1.867754959675233e-05, + "loss": 0.5445, + "step": 12144 + }, + { + "epoch": 0.33347062053816584, + "grad_norm": 0.6444119811058044, + "learning_rate": 1.867733494096782e-05, + "loss": 0.5703, + "step": 12145 + }, + { + "epoch": 0.33349807797913233, + "grad_norm": 0.3976154923439026, + "learning_rate": 1.8677120268997317e-05, + "loss": 0.5792, + "step": 12146 + }, + { + "epoch": 0.33352553542009883, + "grad_norm": 0.3712121546268463, + "learning_rate": 1.8676905580841213e-05, + "loss": 0.603, + "step": 12147 + }, + { + "epoch": 0.3335529928610653, + "grad_norm": 1.1602619886398315, + "learning_rate": 1.867669087649991e-05, + "loss": 0.5124, + "step": 12148 + }, + { + "epoch": 0.3335804503020319, + "grad_norm": 0.3670958876609802, + "learning_rate": 1.867647615597381e-05, + "loss": 0.5187, + "step": 12149 + }, + { + "epoch": 0.33360790774299837, + "grad_norm": 0.3441571891307831, + "learning_rate": 1.8676261419263314e-05, + "loss": 0.4445, + "step": 12150 + }, + { + "epoch": 0.33363536518396486, + "grad_norm": 0.3844088912010193, + "learning_rate": 1.867604666636882e-05, + "loss": 0.4717, + "step": 12151 + }, + { + "epoch": 0.33366282262493135, + "grad_norm": 0.3463568389415741, + "learning_rate": 1.867583189729073e-05, + "loss": 0.5081, + "step": 12152 + }, + { + "epoch": 0.33369028006589785, + "grad_norm": 0.37555140256881714, + "learning_rate": 1.867561711202944e-05, + "loss": 0.5815, + "step": 12153 + }, + { + "epoch": 0.33371773750686434, + "grad_norm": 0.3874824047088623, + "learning_rate": 1.8675402310585358e-05, + "loss": 0.6192, + "step": 12154 + }, + { + "epoch": 0.33374519494783084, + "grad_norm": 0.3401980400085449, + "learning_rate": 1.8675187492958886e-05, + "loss": 0.4827, + "step": 12155 + }, + { + "epoch": 0.3337726523887974, + "grad_norm": 0.3529663681983948, + "learning_rate": 1.8674972659150414e-05, + "loss": 0.5637, + "step": 12156 + }, + { + "epoch": 0.3338001098297639, + "grad_norm": 0.3410813510417938, + "learning_rate": 1.8674757809160354e-05, + "loss": 0.533, + "step": 12157 + }, + { + "epoch": 0.3338275672707304, + "grad_norm": 0.39642900228500366, + "learning_rate": 1.8674542942989102e-05, + "loss": 0.5631, + "step": 12158 + }, + { + "epoch": 0.33385502471169687, + "grad_norm": 0.5239543914794922, + "learning_rate": 1.8674328060637058e-05, + "loss": 0.5568, + "step": 12159 + }, + { + "epoch": 0.33388248215266336, + "grad_norm": 0.34486326575279236, + "learning_rate": 1.867411316210462e-05, + "loss": 0.406, + "step": 12160 + }, + { + "epoch": 0.33390993959362986, + "grad_norm": 0.3769332468509674, + "learning_rate": 1.8673898247392197e-05, + "loss": 0.5526, + "step": 12161 + }, + { + "epoch": 0.33393739703459635, + "grad_norm": 0.3466017246246338, + "learning_rate": 1.8673683316500184e-05, + "loss": 0.5712, + "step": 12162 + }, + { + "epoch": 0.3339648544755629, + "grad_norm": 0.4155208170413971, + "learning_rate": 1.867346836942898e-05, + "loss": 0.5157, + "step": 12163 + }, + { + "epoch": 0.3339923119165294, + "grad_norm": 0.48610249161720276, + "learning_rate": 1.8673253406178994e-05, + "loss": 0.56, + "step": 12164 + }, + { + "epoch": 0.3340197693574959, + "grad_norm": 0.3727554380893707, + "learning_rate": 1.8673038426750625e-05, + "loss": 0.5691, + "step": 12165 + }, + { + "epoch": 0.3340472267984624, + "grad_norm": 0.39319634437561035, + "learning_rate": 1.8672823431144264e-05, + "loss": 0.5632, + "step": 12166 + }, + { + "epoch": 0.3340746842394289, + "grad_norm": 0.4360782504081726, + "learning_rate": 1.8672608419360323e-05, + "loss": 0.566, + "step": 12167 + }, + { + "epoch": 0.33410214168039537, + "grad_norm": 0.37296417355537415, + "learning_rate": 1.86723933913992e-05, + "loss": 0.5455, + "step": 12168 + }, + { + "epoch": 0.33412959912136186, + "grad_norm": 0.35333916544914246, + "learning_rate": 1.8672178347261293e-05, + "loss": 0.4477, + "step": 12169 + }, + { + "epoch": 0.3341570565623284, + "grad_norm": 0.3674434721469879, + "learning_rate": 1.867196328694701e-05, + "loss": 0.5058, + "step": 12170 + }, + { + "epoch": 0.3341845140032949, + "grad_norm": 0.3275064527988434, + "learning_rate": 1.867174821045674e-05, + "loss": 0.474, + "step": 12171 + }, + { + "epoch": 0.3342119714442614, + "grad_norm": 0.354155570268631, + "learning_rate": 1.8671533117790898e-05, + "loss": 0.529, + "step": 12172 + }, + { + "epoch": 0.3342394288852279, + "grad_norm": 0.35658782720565796, + "learning_rate": 1.8671318008949873e-05, + "loss": 0.5338, + "step": 12173 + }, + { + "epoch": 0.3342668863261944, + "grad_norm": 0.36811596155166626, + "learning_rate": 1.867110288393408e-05, + "loss": 0.6117, + "step": 12174 + }, + { + "epoch": 0.3342943437671609, + "grad_norm": 0.37824440002441406, + "learning_rate": 1.867088774274391e-05, + "loss": 0.5102, + "step": 12175 + }, + { + "epoch": 0.3343218012081274, + "grad_norm": 0.3602931797504425, + "learning_rate": 1.8670672585379764e-05, + "loss": 0.5587, + "step": 12176 + }, + { + "epoch": 0.33434925864909393, + "grad_norm": 0.41337916254997253, + "learning_rate": 1.8670457411842048e-05, + "loss": 0.5076, + "step": 12177 + }, + { + "epoch": 0.3343767160900604, + "grad_norm": 0.43571823835372925, + "learning_rate": 1.8670242222131163e-05, + "loss": 0.5164, + "step": 12178 + }, + { + "epoch": 0.3344041735310269, + "grad_norm": 0.34395861625671387, + "learning_rate": 1.8670027016247508e-05, + "loss": 0.4734, + "step": 12179 + }, + { + "epoch": 0.3344316309719934, + "grad_norm": 0.3572176396846771, + "learning_rate": 1.8669811794191487e-05, + "loss": 0.5064, + "step": 12180 + }, + { + "epoch": 0.3344590884129599, + "grad_norm": 0.42073866724967957, + "learning_rate": 1.86695965559635e-05, + "loss": 0.5437, + "step": 12181 + }, + { + "epoch": 0.3344865458539264, + "grad_norm": 0.35572758316993713, + "learning_rate": 1.8669381301563945e-05, + "loss": 0.5326, + "step": 12182 + }, + { + "epoch": 0.3345140032948929, + "grad_norm": 0.3477490544319153, + "learning_rate": 1.8669166030993228e-05, + "loss": 0.5448, + "step": 12183 + }, + { + "epoch": 0.33454146073585944, + "grad_norm": 0.3768146336078644, + "learning_rate": 1.866895074425175e-05, + "loss": 0.5274, + "step": 12184 + }, + { + "epoch": 0.33456891817682594, + "grad_norm": 0.35373422503471375, + "learning_rate": 1.8668735441339912e-05, + "loss": 0.5015, + "step": 12185 + }, + { + "epoch": 0.33459637561779243, + "grad_norm": 0.36402344703674316, + "learning_rate": 1.8668520122258117e-05, + "loss": 0.499, + "step": 12186 + }, + { + "epoch": 0.3346238330587589, + "grad_norm": 0.37815454602241516, + "learning_rate": 1.8668304787006765e-05, + "loss": 0.4702, + "step": 12187 + }, + { + "epoch": 0.3346512904997254, + "grad_norm": 0.35976994037628174, + "learning_rate": 1.8668089435586254e-05, + "loss": 0.5459, + "step": 12188 + }, + { + "epoch": 0.3346787479406919, + "grad_norm": 0.4207213521003723, + "learning_rate": 1.8667874067996993e-05, + "loss": 0.6455, + "step": 12189 + }, + { + "epoch": 0.3347062053816584, + "grad_norm": 0.36147817969322205, + "learning_rate": 1.8667658684239384e-05, + "loss": 0.5196, + "step": 12190 + }, + { + "epoch": 0.33473366282262496, + "grad_norm": 0.4904122054576874, + "learning_rate": 1.866744328431382e-05, + "loss": 0.5213, + "step": 12191 + }, + { + "epoch": 0.33476112026359145, + "grad_norm": 0.3992885649204254, + "learning_rate": 1.866722786822071e-05, + "loss": 0.5706, + "step": 12192 + }, + { + "epoch": 0.33478857770455794, + "grad_norm": 0.374067485332489, + "learning_rate": 1.8667012435960454e-05, + "loss": 0.4957, + "step": 12193 + }, + { + "epoch": 0.33481603514552444, + "grad_norm": 0.3733747601509094, + "learning_rate": 1.866679698753345e-05, + "loss": 0.4718, + "step": 12194 + }, + { + "epoch": 0.33484349258649093, + "grad_norm": 0.37749797105789185, + "learning_rate": 1.8666581522940106e-05, + "loss": 0.483, + "step": 12195 + }, + { + "epoch": 0.3348709500274574, + "grad_norm": 0.3245806097984314, + "learning_rate": 1.8666366042180823e-05, + "loss": 0.4459, + "step": 12196 + }, + { + "epoch": 0.3348984074684239, + "grad_norm": 0.37576979398727417, + "learning_rate": 1.8666150545256e-05, + "loss": 0.4832, + "step": 12197 + }, + { + "epoch": 0.33492586490939047, + "grad_norm": 0.3478068709373474, + "learning_rate": 1.866593503216604e-05, + "loss": 0.545, + "step": 12198 + }, + { + "epoch": 0.33495332235035696, + "grad_norm": 0.3988746106624603, + "learning_rate": 1.8665719502911347e-05, + "loss": 0.4473, + "step": 12199 + }, + { + "epoch": 0.33498077979132346, + "grad_norm": 0.3786168694496155, + "learning_rate": 1.866550395749232e-05, + "loss": 0.5159, + "step": 12200 + }, + { + "epoch": 0.33500823723228995, + "grad_norm": 0.3746756613254547, + "learning_rate": 1.8665288395909362e-05, + "loss": 0.5113, + "step": 12201 + }, + { + "epoch": 0.33503569467325645, + "grad_norm": 0.37161025404930115, + "learning_rate": 1.8665072818162878e-05, + "loss": 0.5639, + "step": 12202 + }, + { + "epoch": 0.33506315211422294, + "grad_norm": 0.40593913197517395, + "learning_rate": 1.8664857224253264e-05, + "loss": 0.5011, + "step": 12203 + }, + { + "epoch": 0.33509060955518943, + "grad_norm": 0.4121600091457367, + "learning_rate": 1.8664641614180927e-05, + "loss": 0.5775, + "step": 12204 + }, + { + "epoch": 0.335118066996156, + "grad_norm": 0.38124480843544006, + "learning_rate": 1.8664425987946265e-05, + "loss": 0.4453, + "step": 12205 + }, + { + "epoch": 0.3351455244371225, + "grad_norm": 0.45303475856781006, + "learning_rate": 1.866421034554969e-05, + "loss": 0.5189, + "step": 12206 + }, + { + "epoch": 0.33517298187808897, + "grad_norm": 0.3391396701335907, + "learning_rate": 1.8663994686991594e-05, + "loss": 0.5195, + "step": 12207 + }, + { + "epoch": 0.33520043931905547, + "grad_norm": 0.3407212495803833, + "learning_rate": 1.8663779012272384e-05, + "loss": 0.4256, + "step": 12208 + }, + { + "epoch": 0.33522789676002196, + "grad_norm": 0.3795984089374542, + "learning_rate": 1.8663563321392456e-05, + "loss": 0.5101, + "step": 12209 + }, + { + "epoch": 0.33525535420098845, + "grad_norm": 0.3443688154220581, + "learning_rate": 1.8663347614352224e-05, + "loss": 0.6034, + "step": 12210 + }, + { + "epoch": 0.33528281164195495, + "grad_norm": 0.4039149582386017, + "learning_rate": 1.866313189115208e-05, + "loss": 0.5617, + "step": 12211 + }, + { + "epoch": 0.3353102690829215, + "grad_norm": 0.32635635137557983, + "learning_rate": 1.866291615179243e-05, + "loss": 0.4994, + "step": 12212 + }, + { + "epoch": 0.335337726523888, + "grad_norm": 0.35553935170173645, + "learning_rate": 1.8662700396273678e-05, + "loss": 0.5676, + "step": 12213 + }, + { + "epoch": 0.3353651839648545, + "grad_norm": 0.3551388680934906, + "learning_rate": 1.8662484624596223e-05, + "loss": 0.5107, + "step": 12214 + }, + { + "epoch": 0.335392641405821, + "grad_norm": 0.36790353059768677, + "learning_rate": 1.866226883676047e-05, + "loss": 0.4836, + "step": 12215 + }, + { + "epoch": 0.3354200988467875, + "grad_norm": 0.40190771222114563, + "learning_rate": 1.866205303276682e-05, + "loss": 0.4613, + "step": 12216 + }, + { + "epoch": 0.33544755628775397, + "grad_norm": 0.3645809292793274, + "learning_rate": 1.8661837212615677e-05, + "loss": 0.5539, + "step": 12217 + }, + { + "epoch": 0.33547501372872046, + "grad_norm": 0.3380280137062073, + "learning_rate": 1.866162137630745e-05, + "loss": 0.5134, + "step": 12218 + }, + { + "epoch": 0.335502471169687, + "grad_norm": 0.35894468426704407, + "learning_rate": 1.8661405523842523e-05, + "loss": 0.5062, + "step": 12219 + }, + { + "epoch": 0.3355299286106535, + "grad_norm": 0.41869670152664185, + "learning_rate": 1.866118965522132e-05, + "loss": 0.5766, + "step": 12220 + }, + { + "epoch": 0.33555738605162, + "grad_norm": 0.4016895294189453, + "learning_rate": 1.8660973770444227e-05, + "loss": 0.5291, + "step": 12221 + }, + { + "epoch": 0.3355848434925865, + "grad_norm": 0.4200184941291809, + "learning_rate": 1.866075786951166e-05, + "loss": 0.5162, + "step": 12222 + }, + { + "epoch": 0.335612300933553, + "grad_norm": 0.42865708470344543, + "learning_rate": 1.8660541952424013e-05, + "loss": 0.4723, + "step": 12223 + }, + { + "epoch": 0.3356397583745195, + "grad_norm": 0.3986501097679138, + "learning_rate": 1.8660326019181687e-05, + "loss": 0.6406, + "step": 12224 + }, + { + "epoch": 0.335667215815486, + "grad_norm": 0.4323972165584564, + "learning_rate": 1.8660110069785095e-05, + "loss": 0.5553, + "step": 12225 + }, + { + "epoch": 0.3356946732564525, + "grad_norm": 0.36001917719841003, + "learning_rate": 1.865989410423463e-05, + "loss": 0.4732, + "step": 12226 + }, + { + "epoch": 0.335722130697419, + "grad_norm": 0.34065404534339905, + "learning_rate": 1.86596781225307e-05, + "loss": 0.5765, + "step": 12227 + }, + { + "epoch": 0.3357495881383855, + "grad_norm": 0.36439767479896545, + "learning_rate": 1.86594621246737e-05, + "loss": 0.5266, + "step": 12228 + }, + { + "epoch": 0.335777045579352, + "grad_norm": 0.34761327505111694, + "learning_rate": 1.865924611066405e-05, + "loss": 0.408, + "step": 12229 + }, + { + "epoch": 0.3358045030203185, + "grad_norm": 0.3375909626483917, + "learning_rate": 1.8659030080502137e-05, + "loss": 0.4868, + "step": 12230 + }, + { + "epoch": 0.335831960461285, + "grad_norm": 0.35106220841407776, + "learning_rate": 1.8658814034188367e-05, + "loss": 0.5103, + "step": 12231 + }, + { + "epoch": 0.3358594179022515, + "grad_norm": 0.371696412563324, + "learning_rate": 1.865859797172315e-05, + "loss": 0.5261, + "step": 12232 + }, + { + "epoch": 0.33588687534321804, + "grad_norm": 0.40100982785224915, + "learning_rate": 1.8658381893106883e-05, + "loss": 0.5898, + "step": 12233 + }, + { + "epoch": 0.33591433278418453, + "grad_norm": 0.3814913332462311, + "learning_rate": 1.865816579833997e-05, + "loss": 0.5103, + "step": 12234 + }, + { + "epoch": 0.335941790225151, + "grad_norm": 0.551315426826477, + "learning_rate": 1.8657949687422813e-05, + "loss": 0.5139, + "step": 12235 + }, + { + "epoch": 0.3359692476661175, + "grad_norm": 0.3415037989616394, + "learning_rate": 1.8657733560355815e-05, + "loss": 0.4809, + "step": 12236 + }, + { + "epoch": 0.335996705107084, + "grad_norm": 0.3835240304470062, + "learning_rate": 1.8657517417139385e-05, + "loss": 0.5352, + "step": 12237 + }, + { + "epoch": 0.3360241625480505, + "grad_norm": 0.4481523633003235, + "learning_rate": 1.865730125777392e-05, + "loss": 0.5939, + "step": 12238 + }, + { + "epoch": 0.336051619989017, + "grad_norm": 0.38402310013771057, + "learning_rate": 1.8657085082259825e-05, + "loss": 0.5031, + "step": 12239 + }, + { + "epoch": 0.33607907742998355, + "grad_norm": 0.41845640540122986, + "learning_rate": 1.8656868890597505e-05, + "loss": 0.6612, + "step": 12240 + }, + { + "epoch": 0.33610653487095005, + "grad_norm": 0.45300304889678955, + "learning_rate": 1.8656652682787356e-05, + "loss": 0.5224, + "step": 12241 + }, + { + "epoch": 0.33613399231191654, + "grad_norm": 0.3763873875141144, + "learning_rate": 1.865643645882979e-05, + "loss": 0.4357, + "step": 12242 + }, + { + "epoch": 0.33616144975288303, + "grad_norm": 0.3630322813987732, + "learning_rate": 1.8656220218725212e-05, + "loss": 0.5369, + "step": 12243 + }, + { + "epoch": 0.33618890719384953, + "grad_norm": 0.3758666515350342, + "learning_rate": 1.8656003962474018e-05, + "loss": 0.5102, + "step": 12244 + }, + { + "epoch": 0.336216364634816, + "grad_norm": 0.41092216968536377, + "learning_rate": 1.865578769007661e-05, + "loss": 0.5519, + "step": 12245 + }, + { + "epoch": 0.3362438220757825, + "grad_norm": 0.3627135753631592, + "learning_rate": 1.86555714015334e-05, + "loss": 0.5392, + "step": 12246 + }, + { + "epoch": 0.33627127951674907, + "grad_norm": 0.41984620690345764, + "learning_rate": 1.865535509684478e-05, + "loss": 0.6242, + "step": 12247 + }, + { + "epoch": 0.33629873695771556, + "grad_norm": 0.3515876531600952, + "learning_rate": 1.8655138776011167e-05, + "loss": 0.5045, + "step": 12248 + }, + { + "epoch": 0.33632619439868205, + "grad_norm": 0.37769272923469543, + "learning_rate": 1.8654922439032955e-05, + "loss": 0.4963, + "step": 12249 + }, + { + "epoch": 0.33635365183964855, + "grad_norm": 0.3715553283691406, + "learning_rate": 1.8654706085910553e-05, + "loss": 0.552, + "step": 12250 + }, + { + "epoch": 0.33638110928061504, + "grad_norm": 0.3972318470478058, + "learning_rate": 1.865448971664436e-05, + "loss": 0.5045, + "step": 12251 + }, + { + "epoch": 0.33640856672158154, + "grad_norm": 0.42951247096061707, + "learning_rate": 1.8654273331234783e-05, + "loss": 0.6115, + "step": 12252 + }, + { + "epoch": 0.33643602416254803, + "grad_norm": 0.4076823890209198, + "learning_rate": 1.8654056929682222e-05, + "loss": 0.5329, + "step": 12253 + }, + { + "epoch": 0.3364634816035145, + "grad_norm": 0.40803059935569763, + "learning_rate": 1.8653840511987083e-05, + "loss": 0.4908, + "step": 12254 + }, + { + "epoch": 0.3364909390444811, + "grad_norm": 0.405326247215271, + "learning_rate": 1.8653624078149766e-05, + "loss": 0.6085, + "step": 12255 + }, + { + "epoch": 0.33651839648544757, + "grad_norm": 0.40463972091674805, + "learning_rate": 1.8653407628170685e-05, + "loss": 0.5734, + "step": 12256 + }, + { + "epoch": 0.33654585392641406, + "grad_norm": 0.39930757880210876, + "learning_rate": 1.8653191162050235e-05, + "loss": 0.5684, + "step": 12257 + }, + { + "epoch": 0.33657331136738056, + "grad_norm": 0.37083762884140015, + "learning_rate": 1.865297467978882e-05, + "loss": 0.5889, + "step": 12258 + }, + { + "epoch": 0.33660076880834705, + "grad_norm": 0.34814825654029846, + "learning_rate": 1.8652758181386844e-05, + "loss": 0.5698, + "step": 12259 + }, + { + "epoch": 0.33662822624931354, + "grad_norm": 0.39724937081336975, + "learning_rate": 1.8652541666844716e-05, + "loss": 0.6186, + "step": 12260 + }, + { + "epoch": 0.33665568369028004, + "grad_norm": 0.35423409938812256, + "learning_rate": 1.8652325136162836e-05, + "loss": 0.5007, + "step": 12261 + }, + { + "epoch": 0.3366831411312466, + "grad_norm": 0.3681260645389557, + "learning_rate": 1.8652108589341607e-05, + "loss": 0.5694, + "step": 12262 + }, + { + "epoch": 0.3367105985722131, + "grad_norm": 0.3532644212245941, + "learning_rate": 1.8651892026381432e-05, + "loss": 0.4338, + "step": 12263 + }, + { + "epoch": 0.3367380560131796, + "grad_norm": 0.45802879333496094, + "learning_rate": 1.865167544728272e-05, + "loss": 0.4626, + "step": 12264 + }, + { + "epoch": 0.33676551345414607, + "grad_norm": 0.4022749066352844, + "learning_rate": 1.865145885204587e-05, + "loss": 0.5096, + "step": 12265 + }, + { + "epoch": 0.33679297089511256, + "grad_norm": 0.3634476065635681, + "learning_rate": 1.8651242240671286e-05, + "loss": 0.4978, + "step": 12266 + }, + { + "epoch": 0.33682042833607906, + "grad_norm": 0.3663511574268341, + "learning_rate": 1.865102561315938e-05, + "loss": 0.5275, + "step": 12267 + }, + { + "epoch": 0.33684788577704555, + "grad_norm": 0.37149372696876526, + "learning_rate": 1.8650808969510547e-05, + "loss": 0.4852, + "step": 12268 + }, + { + "epoch": 0.3368753432180121, + "grad_norm": 0.36191242933273315, + "learning_rate": 1.8650592309725195e-05, + "loss": 0.5046, + "step": 12269 + }, + { + "epoch": 0.3369028006589786, + "grad_norm": 0.4134601950645447, + "learning_rate": 1.8650375633803724e-05, + "loss": 0.608, + "step": 12270 + }, + { + "epoch": 0.3369302580999451, + "grad_norm": 0.36362940073013306, + "learning_rate": 1.8650158941746547e-05, + "loss": 0.4153, + "step": 12271 + }, + { + "epoch": 0.3369577155409116, + "grad_norm": 0.3547532856464386, + "learning_rate": 1.864994223355406e-05, + "loss": 0.5343, + "step": 12272 + }, + { + "epoch": 0.3369851729818781, + "grad_norm": 0.3606550395488739, + "learning_rate": 1.864972550922667e-05, + "loss": 0.4196, + "step": 12273 + }, + { + "epoch": 0.3370126304228446, + "grad_norm": 0.35373443365097046, + "learning_rate": 1.8649508768764782e-05, + "loss": 0.5243, + "step": 12274 + }, + { + "epoch": 0.33704008786381107, + "grad_norm": 0.4609447121620178, + "learning_rate": 1.86492920121688e-05, + "loss": 0.5464, + "step": 12275 + }, + { + "epoch": 0.3370675453047776, + "grad_norm": 0.3898613750934601, + "learning_rate": 1.8649075239439123e-05, + "loss": 0.5867, + "step": 12276 + }, + { + "epoch": 0.3370950027457441, + "grad_norm": 0.34443727135658264, + "learning_rate": 1.8648858450576163e-05, + "loss": 0.5104, + "step": 12277 + }, + { + "epoch": 0.3371224601867106, + "grad_norm": 0.3428122401237488, + "learning_rate": 1.8648641645580325e-05, + "loss": 0.5451, + "step": 12278 + }, + { + "epoch": 0.3371499176276771, + "grad_norm": 0.35781967639923096, + "learning_rate": 1.8648424824452006e-05, + "loss": 0.477, + "step": 12279 + }, + { + "epoch": 0.3371773750686436, + "grad_norm": 0.3872476816177368, + "learning_rate": 1.8648207987191616e-05, + "loss": 0.5087, + "step": 12280 + }, + { + "epoch": 0.3372048325096101, + "grad_norm": 0.37101325392723083, + "learning_rate": 1.8647991133799558e-05, + "loss": 0.4322, + "step": 12281 + }, + { + "epoch": 0.3372322899505766, + "grad_norm": 0.39960938692092896, + "learning_rate": 1.8647774264276238e-05, + "loss": 0.5194, + "step": 12282 + }, + { + "epoch": 0.33725974739154313, + "grad_norm": 0.3782779276371002, + "learning_rate": 1.8647557378622057e-05, + "loss": 0.5814, + "step": 12283 + }, + { + "epoch": 0.3372872048325096, + "grad_norm": 0.3230981230735779, + "learning_rate": 1.864734047683742e-05, + "loss": 0.4516, + "step": 12284 + }, + { + "epoch": 0.3373146622734761, + "grad_norm": 0.39579570293426514, + "learning_rate": 1.8647123558922736e-05, + "loss": 0.577, + "step": 12285 + }, + { + "epoch": 0.3373421197144426, + "grad_norm": 0.3730456233024597, + "learning_rate": 1.8646906624878403e-05, + "loss": 0.5419, + "step": 12286 + }, + { + "epoch": 0.3373695771554091, + "grad_norm": 0.37980756163597107, + "learning_rate": 1.8646689674704835e-05, + "loss": 0.566, + "step": 12287 + }, + { + "epoch": 0.3373970345963756, + "grad_norm": 0.3712944984436035, + "learning_rate": 1.8646472708402424e-05, + "loss": 0.4567, + "step": 12288 + }, + { + "epoch": 0.3374244920373421, + "grad_norm": 0.37411099672317505, + "learning_rate": 1.8646255725971588e-05, + "loss": 0.5074, + "step": 12289 + }, + { + "epoch": 0.33745194947830864, + "grad_norm": 0.3757662773132324, + "learning_rate": 1.8646038727412726e-05, + "loss": 0.534, + "step": 12290 + }, + { + "epoch": 0.33747940691927514, + "grad_norm": 0.37685900926589966, + "learning_rate": 1.864582171272624e-05, + "loss": 0.5398, + "step": 12291 + }, + { + "epoch": 0.33750686436024163, + "grad_norm": 0.3579704165458679, + "learning_rate": 1.8645604681912535e-05, + "loss": 0.4653, + "step": 12292 + }, + { + "epoch": 0.3375343218012081, + "grad_norm": 0.44706645607948303, + "learning_rate": 1.864538763497202e-05, + "loss": 0.5697, + "step": 12293 + }, + { + "epoch": 0.3375617792421746, + "grad_norm": 0.3792467713356018, + "learning_rate": 1.8645170571905096e-05, + "loss": 0.4414, + "step": 12294 + }, + { + "epoch": 0.3375892366831411, + "grad_norm": 0.4254254698753357, + "learning_rate": 1.8644953492712174e-05, + "loss": 0.508, + "step": 12295 + }, + { + "epoch": 0.3376166941241076, + "grad_norm": 0.44086042046546936, + "learning_rate": 1.864473639739365e-05, + "loss": 0.5728, + "step": 12296 + }, + { + "epoch": 0.33764415156507416, + "grad_norm": 0.5534827709197998, + "learning_rate": 1.8644519285949933e-05, + "loss": 0.582, + "step": 12297 + }, + { + "epoch": 0.33767160900604065, + "grad_norm": 0.45145976543426514, + "learning_rate": 1.864430215838143e-05, + "loss": 0.5287, + "step": 12298 + }, + { + "epoch": 0.33769906644700715, + "grad_norm": 0.3428511321544647, + "learning_rate": 1.8644085014688546e-05, + "loss": 0.4833, + "step": 12299 + }, + { + "epoch": 0.33772652388797364, + "grad_norm": 0.3595331609249115, + "learning_rate": 1.8643867854871684e-05, + "loss": 0.4736, + "step": 12300 + }, + { + "epoch": 0.33775398132894013, + "grad_norm": 0.37817004323005676, + "learning_rate": 1.8643650678931248e-05, + "loss": 0.549, + "step": 12301 + }, + { + "epoch": 0.33778143876990663, + "grad_norm": 0.38260671496391296, + "learning_rate": 1.864343348686765e-05, + "loss": 0.5196, + "step": 12302 + }, + { + "epoch": 0.3378088962108731, + "grad_norm": 0.3613940477371216, + "learning_rate": 1.8643216278681285e-05, + "loss": 0.5373, + "step": 12303 + }, + { + "epoch": 0.33783635365183967, + "grad_norm": 0.37521806359291077, + "learning_rate": 1.8642999054372564e-05, + "loss": 0.5921, + "step": 12304 + }, + { + "epoch": 0.33786381109280617, + "grad_norm": 0.35872453451156616, + "learning_rate": 1.864278181394189e-05, + "loss": 0.4109, + "step": 12305 + }, + { + "epoch": 0.33789126853377266, + "grad_norm": 0.37959444522857666, + "learning_rate": 1.864256455738967e-05, + "loss": 0.4749, + "step": 12306 + }, + { + "epoch": 0.33791872597473915, + "grad_norm": 0.360113263130188, + "learning_rate": 1.864234728471631e-05, + "loss": 0.5281, + "step": 12307 + }, + { + "epoch": 0.33794618341570565, + "grad_norm": 0.3299367129802704, + "learning_rate": 1.8642129995922215e-05, + "loss": 0.5673, + "step": 12308 + }, + { + "epoch": 0.33797364085667214, + "grad_norm": 0.3171939551830292, + "learning_rate": 1.8641912691007786e-05, + "loss": 0.4786, + "step": 12309 + }, + { + "epoch": 0.33800109829763864, + "grad_norm": 0.3738742172718048, + "learning_rate": 1.8641695369973435e-05, + "loss": 0.4371, + "step": 12310 + }, + { + "epoch": 0.3380285557386052, + "grad_norm": 0.3594864308834076, + "learning_rate": 1.8641478032819563e-05, + "loss": 0.5408, + "step": 12311 + }, + { + "epoch": 0.3380560131795717, + "grad_norm": 0.38559210300445557, + "learning_rate": 1.8641260679546572e-05, + "loss": 0.4735, + "step": 12312 + }, + { + "epoch": 0.3380834706205382, + "grad_norm": 0.5643694996833801, + "learning_rate": 1.8641043310154877e-05, + "loss": 0.6501, + "step": 12313 + }, + { + "epoch": 0.33811092806150467, + "grad_norm": 0.38152432441711426, + "learning_rate": 1.864082592464488e-05, + "loss": 0.5175, + "step": 12314 + }, + { + "epoch": 0.33813838550247116, + "grad_norm": 0.3851596713066101, + "learning_rate": 1.864060852301698e-05, + "loss": 0.5719, + "step": 12315 + }, + { + "epoch": 0.33816584294343766, + "grad_norm": 0.34640422463417053, + "learning_rate": 1.864039110527159e-05, + "loss": 0.5371, + "step": 12316 + }, + { + "epoch": 0.33819330038440415, + "grad_norm": 0.37013259530067444, + "learning_rate": 1.8640173671409113e-05, + "loss": 0.5209, + "step": 12317 + }, + { + "epoch": 0.3382207578253707, + "grad_norm": 0.4598497152328491, + "learning_rate": 1.863995622142995e-05, + "loss": 0.5944, + "step": 12318 + }, + { + "epoch": 0.3382482152663372, + "grad_norm": 0.33150139451026917, + "learning_rate": 1.8639738755334517e-05, + "loss": 0.4611, + "step": 12319 + }, + { + "epoch": 0.3382756727073037, + "grad_norm": 0.3093506991863251, + "learning_rate": 1.863952127312321e-05, + "loss": 0.4334, + "step": 12320 + }, + { + "epoch": 0.3383031301482702, + "grad_norm": 0.35145682096481323, + "learning_rate": 1.863930377479644e-05, + "loss": 0.4214, + "step": 12321 + }, + { + "epoch": 0.3383305875892367, + "grad_norm": 0.4030013680458069, + "learning_rate": 1.8639086260354612e-05, + "loss": 0.5167, + "step": 12322 + }, + { + "epoch": 0.33835804503020317, + "grad_norm": 0.3948081433773041, + "learning_rate": 1.863886872979813e-05, + "loss": 0.4276, + "step": 12323 + }, + { + "epoch": 0.33838550247116966, + "grad_norm": 0.43430545926094055, + "learning_rate": 1.86386511831274e-05, + "loss": 0.5692, + "step": 12324 + }, + { + "epoch": 0.3384129599121362, + "grad_norm": 0.41613835096359253, + "learning_rate": 1.863843362034283e-05, + "loss": 0.5457, + "step": 12325 + }, + { + "epoch": 0.3384404173531027, + "grad_norm": 0.36906808614730835, + "learning_rate": 1.863821604144482e-05, + "loss": 0.4701, + "step": 12326 + }, + { + "epoch": 0.3384678747940692, + "grad_norm": 0.9441107511520386, + "learning_rate": 1.8637998446433785e-05, + "loss": 0.5439, + "step": 12327 + }, + { + "epoch": 0.3384953322350357, + "grad_norm": 0.4258708655834198, + "learning_rate": 1.8637780835310125e-05, + "loss": 0.5619, + "step": 12328 + }, + { + "epoch": 0.3385227896760022, + "grad_norm": 0.4126697778701782, + "learning_rate": 1.8637563208074245e-05, + "loss": 0.5644, + "step": 12329 + }, + { + "epoch": 0.3385502471169687, + "grad_norm": 0.3614232838153839, + "learning_rate": 1.8637345564726554e-05, + "loss": 0.4439, + "step": 12330 + }, + { + "epoch": 0.3385777045579352, + "grad_norm": 0.37252941727638245, + "learning_rate": 1.8637127905267457e-05, + "loss": 0.5506, + "step": 12331 + }, + { + "epoch": 0.3386051619989017, + "grad_norm": 0.3750757873058319, + "learning_rate": 1.863691022969736e-05, + "loss": 0.5594, + "step": 12332 + }, + { + "epoch": 0.3386326194398682, + "grad_norm": 0.4187791347503662, + "learning_rate": 1.863669253801667e-05, + "loss": 0.5271, + "step": 12333 + }, + { + "epoch": 0.3386600768808347, + "grad_norm": 0.3930562138557434, + "learning_rate": 1.8636474830225787e-05, + "loss": 0.5001, + "step": 12334 + }, + { + "epoch": 0.3386875343218012, + "grad_norm": 0.39050620794296265, + "learning_rate": 1.8636257106325127e-05, + "loss": 0.4167, + "step": 12335 + }, + { + "epoch": 0.3387149917627677, + "grad_norm": 0.4423873722553253, + "learning_rate": 1.8636039366315088e-05, + "loss": 0.7007, + "step": 12336 + }, + { + "epoch": 0.3387424492037342, + "grad_norm": 0.6910070180892944, + "learning_rate": 1.863582161019608e-05, + "loss": 0.5401, + "step": 12337 + }, + { + "epoch": 0.3387699066447007, + "grad_norm": 0.36229830980300903, + "learning_rate": 1.863560383796851e-05, + "loss": 0.5225, + "step": 12338 + }, + { + "epoch": 0.33879736408566724, + "grad_norm": 0.3740771412849426, + "learning_rate": 1.8635386049632783e-05, + "loss": 0.4709, + "step": 12339 + }, + { + "epoch": 0.33882482152663373, + "grad_norm": 0.5569590926170349, + "learning_rate": 1.8635168245189302e-05, + "loss": 0.5815, + "step": 12340 + }, + { + "epoch": 0.33885227896760023, + "grad_norm": 0.41412991285324097, + "learning_rate": 1.8634950424638477e-05, + "loss": 0.5606, + "step": 12341 + }, + { + "epoch": 0.3388797364085667, + "grad_norm": 0.3704295754432678, + "learning_rate": 1.8634732587980714e-05, + "loss": 0.4886, + "step": 12342 + }, + { + "epoch": 0.3389071938495332, + "grad_norm": 0.35827991366386414, + "learning_rate": 1.863451473521642e-05, + "loss": 0.4911, + "step": 12343 + }, + { + "epoch": 0.3389346512904997, + "grad_norm": 0.3694479167461395, + "learning_rate": 1.8634296866346e-05, + "loss": 0.5076, + "step": 12344 + }, + { + "epoch": 0.3389621087314662, + "grad_norm": 0.35337457060813904, + "learning_rate": 1.863407898136986e-05, + "loss": 0.4513, + "step": 12345 + }, + { + "epoch": 0.33898956617243275, + "grad_norm": 0.43388286232948303, + "learning_rate": 1.8633861080288408e-05, + "loss": 0.4968, + "step": 12346 + }, + { + "epoch": 0.33901702361339925, + "grad_norm": 0.3522838056087494, + "learning_rate": 1.8633643163102047e-05, + "loss": 0.5055, + "step": 12347 + }, + { + "epoch": 0.33904448105436574, + "grad_norm": 0.3700820803642273, + "learning_rate": 1.8633425229811187e-05, + "loss": 0.581, + "step": 12348 + }, + { + "epoch": 0.33907193849533224, + "grad_norm": 0.5878597497940063, + "learning_rate": 1.8633207280416236e-05, + "loss": 0.6278, + "step": 12349 + }, + { + "epoch": 0.33909939593629873, + "grad_norm": 0.5447765588760376, + "learning_rate": 1.8632989314917593e-05, + "loss": 0.6206, + "step": 12350 + }, + { + "epoch": 0.3391268533772652, + "grad_norm": 0.41337907314300537, + "learning_rate": 1.8632771333315674e-05, + "loss": 0.561, + "step": 12351 + }, + { + "epoch": 0.3391543108182317, + "grad_norm": 0.401763379573822, + "learning_rate": 1.863255333561088e-05, + "loss": 0.561, + "step": 12352 + }, + { + "epoch": 0.33918176825919827, + "grad_norm": 0.3655933737754822, + "learning_rate": 1.863233532180362e-05, + "loss": 0.5668, + "step": 12353 + }, + { + "epoch": 0.33920922570016476, + "grad_norm": 0.3661268949508667, + "learning_rate": 1.8632117291894297e-05, + "loss": 0.5681, + "step": 12354 + }, + { + "epoch": 0.33923668314113126, + "grad_norm": 0.3617664575576782, + "learning_rate": 1.863189924588332e-05, + "loss": 0.4502, + "step": 12355 + }, + { + "epoch": 0.33926414058209775, + "grad_norm": 0.36743050813674927, + "learning_rate": 1.86316811837711e-05, + "loss": 0.5831, + "step": 12356 + }, + { + "epoch": 0.33929159802306424, + "grad_norm": 0.3674524128437042, + "learning_rate": 1.8631463105558033e-05, + "loss": 0.5612, + "step": 12357 + }, + { + "epoch": 0.33931905546403074, + "grad_norm": 0.5027694702148438, + "learning_rate": 1.8631245011244537e-05, + "loss": 0.5648, + "step": 12358 + }, + { + "epoch": 0.33934651290499723, + "grad_norm": 0.4020113945007324, + "learning_rate": 1.8631026900831014e-05, + "loss": 0.5139, + "step": 12359 + }, + { + "epoch": 0.3393739703459638, + "grad_norm": 0.3367515802383423, + "learning_rate": 1.8630808774317872e-05, + "loss": 0.456, + "step": 12360 + }, + { + "epoch": 0.3394014277869303, + "grad_norm": 0.3334173262119293, + "learning_rate": 1.8630590631705514e-05, + "loss": 0.4403, + "step": 12361 + }, + { + "epoch": 0.33942888522789677, + "grad_norm": 0.32617881894111633, + "learning_rate": 1.8630372472994352e-05, + "loss": 0.4942, + "step": 12362 + }, + { + "epoch": 0.33945634266886326, + "grad_norm": 0.34045061469078064, + "learning_rate": 1.863015429818479e-05, + "loss": 0.4927, + "step": 12363 + }, + { + "epoch": 0.33948380010982976, + "grad_norm": 0.34071052074432373, + "learning_rate": 1.862993610727724e-05, + "loss": 0.4918, + "step": 12364 + }, + { + "epoch": 0.33951125755079625, + "grad_norm": 0.32686951756477356, + "learning_rate": 1.86297179002721e-05, + "loss": 0.4385, + "step": 12365 + }, + { + "epoch": 0.33953871499176275, + "grad_norm": 0.38040047883987427, + "learning_rate": 1.8629499677169782e-05, + "loss": 0.5351, + "step": 12366 + }, + { + "epoch": 0.3395661724327293, + "grad_norm": 0.41433972120285034, + "learning_rate": 1.862928143797069e-05, + "loss": 0.5592, + "step": 12367 + }, + { + "epoch": 0.3395936298736958, + "grad_norm": 0.4037224054336548, + "learning_rate": 1.862906318267524e-05, + "loss": 0.5497, + "step": 12368 + }, + { + "epoch": 0.3396210873146623, + "grad_norm": 0.36436253786087036, + "learning_rate": 1.8628844911283832e-05, + "loss": 0.5001, + "step": 12369 + }, + { + "epoch": 0.3396485447556288, + "grad_norm": 0.3786572813987732, + "learning_rate": 1.8628626623796873e-05, + "loss": 0.5339, + "step": 12370 + }, + { + "epoch": 0.3396760021965953, + "grad_norm": 0.39340558648109436, + "learning_rate": 1.862840832021477e-05, + "loss": 0.5385, + "step": 12371 + }, + { + "epoch": 0.33970345963756177, + "grad_norm": 0.41884729266166687, + "learning_rate": 1.8628190000537935e-05, + "loss": 0.5175, + "step": 12372 + }, + { + "epoch": 0.33973091707852826, + "grad_norm": 0.36940476298332214, + "learning_rate": 1.862797166476677e-05, + "loss": 0.5029, + "step": 12373 + }, + { + "epoch": 0.3397583745194948, + "grad_norm": 0.38178202509880066, + "learning_rate": 1.8627753312901685e-05, + "loss": 0.4913, + "step": 12374 + }, + { + "epoch": 0.3397858319604613, + "grad_norm": 0.39873433113098145, + "learning_rate": 1.8627534944943085e-05, + "loss": 0.4895, + "step": 12375 + }, + { + "epoch": 0.3398132894014278, + "grad_norm": 0.3858374059200287, + "learning_rate": 1.862731656089138e-05, + "loss": 0.4912, + "step": 12376 + }, + { + "epoch": 0.3398407468423943, + "grad_norm": 0.357807993888855, + "learning_rate": 1.8627098160746976e-05, + "loss": 0.556, + "step": 12377 + }, + { + "epoch": 0.3398682042833608, + "grad_norm": 0.34629297256469727, + "learning_rate": 1.8626879744510277e-05, + "loss": 0.4069, + "step": 12378 + }, + { + "epoch": 0.3398956617243273, + "grad_norm": 0.3755984902381897, + "learning_rate": 1.8626661312181696e-05, + "loss": 0.4824, + "step": 12379 + }, + { + "epoch": 0.3399231191652938, + "grad_norm": 0.3901381492614746, + "learning_rate": 1.862644286376164e-05, + "loss": 0.5628, + "step": 12380 + }, + { + "epoch": 0.3399505766062603, + "grad_norm": 0.37018489837646484, + "learning_rate": 1.8626224399250515e-05, + "loss": 0.5507, + "step": 12381 + }, + { + "epoch": 0.3399780340472268, + "grad_norm": 0.33702513575553894, + "learning_rate": 1.8626005918648727e-05, + "loss": 0.4758, + "step": 12382 + }, + { + "epoch": 0.3400054914881933, + "grad_norm": 0.36110442876815796, + "learning_rate": 1.8625787421956684e-05, + "loss": 0.5717, + "step": 12383 + }, + { + "epoch": 0.3400329489291598, + "grad_norm": 0.37742021679878235, + "learning_rate": 1.8625568909174797e-05, + "loss": 0.4661, + "step": 12384 + }, + { + "epoch": 0.3400604063701263, + "grad_norm": 0.4361168444156647, + "learning_rate": 1.8625350380303468e-05, + "loss": 0.5575, + "step": 12385 + }, + { + "epoch": 0.3400878638110928, + "grad_norm": 0.3802225589752197, + "learning_rate": 1.862513183534311e-05, + "loss": 0.4849, + "step": 12386 + }, + { + "epoch": 0.3401153212520593, + "grad_norm": 0.3888755440711975, + "learning_rate": 1.8624913274294128e-05, + "loss": 0.5363, + "step": 12387 + }, + { + "epoch": 0.3401427786930258, + "grad_norm": 0.359723299741745, + "learning_rate": 1.862469469715693e-05, + "loss": 0.4921, + "step": 12388 + }, + { + "epoch": 0.34017023613399233, + "grad_norm": 0.3222038149833679, + "learning_rate": 1.862447610393192e-05, + "loss": 0.4841, + "step": 12389 + }, + { + "epoch": 0.3401976935749588, + "grad_norm": 0.3656991720199585, + "learning_rate": 1.8624257494619512e-05, + "loss": 0.5863, + "step": 12390 + }, + { + "epoch": 0.3402251510159253, + "grad_norm": 0.36762839555740356, + "learning_rate": 1.8624038869220115e-05, + "loss": 0.5493, + "step": 12391 + }, + { + "epoch": 0.3402526084568918, + "grad_norm": 0.4071088135242462, + "learning_rate": 1.862382022773413e-05, + "loss": 0.5881, + "step": 12392 + }, + { + "epoch": 0.3402800658978583, + "grad_norm": 0.3734399378299713, + "learning_rate": 1.8623601570161968e-05, + "loss": 0.5637, + "step": 12393 + }, + { + "epoch": 0.3403075233388248, + "grad_norm": 0.38561490178108215, + "learning_rate": 1.8623382896504038e-05, + "loss": 0.5483, + "step": 12394 + }, + { + "epoch": 0.3403349807797913, + "grad_norm": 0.42822587490081787, + "learning_rate": 1.8623164206760746e-05, + "loss": 0.6326, + "step": 12395 + }, + { + "epoch": 0.34036243822075785, + "grad_norm": 0.3804072141647339, + "learning_rate": 1.8622945500932497e-05, + "loss": 0.4989, + "step": 12396 + }, + { + "epoch": 0.34038989566172434, + "grad_norm": 0.44036227464675903, + "learning_rate": 1.8622726779019703e-05, + "loss": 0.6435, + "step": 12397 + }, + { + "epoch": 0.34041735310269083, + "grad_norm": 0.34180787205696106, + "learning_rate": 1.8622508041022777e-05, + "loss": 0.4462, + "step": 12398 + }, + { + "epoch": 0.34044481054365733, + "grad_norm": 0.378200888633728, + "learning_rate": 1.8622289286942117e-05, + "loss": 0.4878, + "step": 12399 + }, + { + "epoch": 0.3404722679846238, + "grad_norm": 0.41270050406455994, + "learning_rate": 1.8622070516778135e-05, + "loss": 0.5893, + "step": 12400 + }, + { + "epoch": 0.3404997254255903, + "grad_norm": 0.3788231909275055, + "learning_rate": 1.862185173053124e-05, + "loss": 0.6649, + "step": 12401 + }, + { + "epoch": 0.3405271828665568, + "grad_norm": 0.4233676493167877, + "learning_rate": 1.8621632928201843e-05, + "loss": 0.5745, + "step": 12402 + }, + { + "epoch": 0.34055464030752336, + "grad_norm": 0.348463237285614, + "learning_rate": 1.8621414109790346e-05, + "loss": 0.545, + "step": 12403 + }, + { + "epoch": 0.34058209774848985, + "grad_norm": 0.3803178369998932, + "learning_rate": 1.8621195275297163e-05, + "loss": 0.5165, + "step": 12404 + }, + { + "epoch": 0.34060955518945635, + "grad_norm": 0.37968289852142334, + "learning_rate": 1.8620976424722697e-05, + "loss": 0.4565, + "step": 12405 + }, + { + "epoch": 0.34063701263042284, + "grad_norm": 0.5169217586517334, + "learning_rate": 1.8620757558067358e-05, + "loss": 0.5259, + "step": 12406 + }, + { + "epoch": 0.34066447007138934, + "grad_norm": 0.3714880049228668, + "learning_rate": 1.8620538675331555e-05, + "loss": 0.535, + "step": 12407 + }, + { + "epoch": 0.34069192751235583, + "grad_norm": 0.3667689263820648, + "learning_rate": 1.86203197765157e-05, + "loss": 0.4902, + "step": 12408 + }, + { + "epoch": 0.3407193849533223, + "grad_norm": 0.3574177026748657, + "learning_rate": 1.862010086162019e-05, + "loss": 0.4402, + "step": 12409 + }, + { + "epoch": 0.3407468423942889, + "grad_norm": 0.3938351273536682, + "learning_rate": 1.8619881930645446e-05, + "loss": 0.5181, + "step": 12410 + }, + { + "epoch": 0.34077429983525537, + "grad_norm": 0.5523144602775574, + "learning_rate": 1.861966298359187e-05, + "loss": 0.504, + "step": 12411 + }, + { + "epoch": 0.34080175727622186, + "grad_norm": 0.36027419567108154, + "learning_rate": 1.861944402045987e-05, + "loss": 0.4619, + "step": 12412 + }, + { + "epoch": 0.34082921471718836, + "grad_norm": 0.37237390875816345, + "learning_rate": 1.861922504124986e-05, + "loss": 0.538, + "step": 12413 + }, + { + "epoch": 0.34085667215815485, + "grad_norm": 0.4055570662021637, + "learning_rate": 1.861900604596224e-05, + "loss": 0.6089, + "step": 12414 + }, + { + "epoch": 0.34088412959912134, + "grad_norm": 0.34897580742836, + "learning_rate": 1.8618787034597426e-05, + "loss": 0.4285, + "step": 12415 + }, + { + "epoch": 0.34091158704008784, + "grad_norm": 0.3791837692260742, + "learning_rate": 1.861856800715582e-05, + "loss": 0.4547, + "step": 12416 + }, + { + "epoch": 0.3409390444810544, + "grad_norm": 0.4035971760749817, + "learning_rate": 1.8618348963637836e-05, + "loss": 0.4493, + "step": 12417 + }, + { + "epoch": 0.3409665019220209, + "grad_norm": 0.41616013646125793, + "learning_rate": 1.861812990404388e-05, + "loss": 0.5377, + "step": 12418 + }, + { + "epoch": 0.3409939593629874, + "grad_norm": 0.3754522204399109, + "learning_rate": 1.861791082837436e-05, + "loss": 0.5748, + "step": 12419 + }, + { + "epoch": 0.34102141680395387, + "grad_norm": 0.46873170137405396, + "learning_rate": 1.861769173662969e-05, + "loss": 0.6039, + "step": 12420 + }, + { + "epoch": 0.34104887424492036, + "grad_norm": 0.360714852809906, + "learning_rate": 1.861747262881027e-05, + "loss": 0.4089, + "step": 12421 + }, + { + "epoch": 0.34107633168588686, + "grad_norm": 0.3650037348270416, + "learning_rate": 1.8617253504916513e-05, + "loss": 0.5147, + "step": 12422 + }, + { + "epoch": 0.34110378912685335, + "grad_norm": 0.40146762132644653, + "learning_rate": 1.861703436494883e-05, + "loss": 0.5459, + "step": 12423 + }, + { + "epoch": 0.3411312465678199, + "grad_norm": 0.3985220193862915, + "learning_rate": 1.8616815208907627e-05, + "loss": 0.546, + "step": 12424 + }, + { + "epoch": 0.3411587040087864, + "grad_norm": 0.3417741060256958, + "learning_rate": 1.8616596036793315e-05, + "loss": 0.5084, + "step": 12425 + }, + { + "epoch": 0.3411861614497529, + "grad_norm": 0.35618314146995544, + "learning_rate": 1.86163768486063e-05, + "loss": 0.5016, + "step": 12426 + }, + { + "epoch": 0.3412136188907194, + "grad_norm": 0.37012559175491333, + "learning_rate": 1.861615764434699e-05, + "loss": 0.4429, + "step": 12427 + }, + { + "epoch": 0.3412410763316859, + "grad_norm": 0.574319064617157, + "learning_rate": 1.86159384240158e-05, + "loss": 0.5153, + "step": 12428 + }, + { + "epoch": 0.34126853377265237, + "grad_norm": 0.40714189410209656, + "learning_rate": 1.8615719187613133e-05, + "loss": 0.5487, + "step": 12429 + }, + { + "epoch": 0.34129599121361887, + "grad_norm": 0.354127436876297, + "learning_rate": 1.86154999351394e-05, + "loss": 0.4633, + "step": 12430 + }, + { + "epoch": 0.3413234486545854, + "grad_norm": 0.3709923326969147, + "learning_rate": 1.861528066659501e-05, + "loss": 0.552, + "step": 12431 + }, + { + "epoch": 0.3413509060955519, + "grad_norm": 0.41628921031951904, + "learning_rate": 1.861506138198037e-05, + "loss": 0.5572, + "step": 12432 + }, + { + "epoch": 0.3413783635365184, + "grad_norm": 0.37817472219467163, + "learning_rate": 1.8614842081295893e-05, + "loss": 0.5313, + "step": 12433 + }, + { + "epoch": 0.3414058209774849, + "grad_norm": 0.3538553714752197, + "learning_rate": 1.8614622764541987e-05, + "loss": 0.5745, + "step": 12434 + }, + { + "epoch": 0.3414332784184514, + "grad_norm": 0.3838284909725189, + "learning_rate": 1.8614403431719057e-05, + "loss": 0.5648, + "step": 12435 + }, + { + "epoch": 0.3414607358594179, + "grad_norm": 0.4210561513900757, + "learning_rate": 1.8614184082827518e-05, + "loss": 0.5507, + "step": 12436 + }, + { + "epoch": 0.3414881933003844, + "grad_norm": 0.33766961097717285, + "learning_rate": 1.8613964717867775e-05, + "loss": 0.4836, + "step": 12437 + }, + { + "epoch": 0.34151565074135093, + "grad_norm": 0.3755749464035034, + "learning_rate": 1.861374533684024e-05, + "loss": 0.4847, + "step": 12438 + }, + { + "epoch": 0.3415431081823174, + "grad_norm": 0.37769976258277893, + "learning_rate": 1.8613525939745317e-05, + "loss": 0.5452, + "step": 12439 + }, + { + "epoch": 0.3415705656232839, + "grad_norm": 0.35986432433128357, + "learning_rate": 1.8613306526583422e-05, + "loss": 0.5138, + "step": 12440 + }, + { + "epoch": 0.3415980230642504, + "grad_norm": 0.3905627131462097, + "learning_rate": 1.861308709735496e-05, + "loss": 0.5263, + "step": 12441 + }, + { + "epoch": 0.3416254805052169, + "grad_norm": 0.39064085483551025, + "learning_rate": 1.8612867652060344e-05, + "loss": 0.6245, + "step": 12442 + }, + { + "epoch": 0.3416529379461834, + "grad_norm": 0.40754708647727966, + "learning_rate": 1.861264819069998e-05, + "loss": 0.5301, + "step": 12443 + }, + { + "epoch": 0.3416803953871499, + "grad_norm": 0.3976210355758667, + "learning_rate": 1.8612428713274276e-05, + "loss": 0.5108, + "step": 12444 + }, + { + "epoch": 0.34170785282811644, + "grad_norm": 0.3640335202217102, + "learning_rate": 1.8612209219783646e-05, + "loss": 0.4789, + "step": 12445 + }, + { + "epoch": 0.34173531026908294, + "grad_norm": 0.3840211033821106, + "learning_rate": 1.8611989710228497e-05, + "loss": 0.5509, + "step": 12446 + }, + { + "epoch": 0.34176276771004943, + "grad_norm": 0.4026833474636078, + "learning_rate": 1.8611770184609237e-05, + "loss": 0.5243, + "step": 12447 + }, + { + "epoch": 0.3417902251510159, + "grad_norm": 0.3280714750289917, + "learning_rate": 1.861155064292628e-05, + "loss": 0.5493, + "step": 12448 + }, + { + "epoch": 0.3418176825919824, + "grad_norm": 0.3767845034599304, + "learning_rate": 1.861133108518003e-05, + "loss": 0.4415, + "step": 12449 + }, + { + "epoch": 0.3418451400329489, + "grad_norm": 0.34884724020957947, + "learning_rate": 1.86111115113709e-05, + "loss": 0.4681, + "step": 12450 + }, + { + "epoch": 0.3418725974739154, + "grad_norm": 0.3761449158191681, + "learning_rate": 1.8610891921499297e-05, + "loss": 0.5501, + "step": 12451 + }, + { + "epoch": 0.34190005491488196, + "grad_norm": 0.5941891074180603, + "learning_rate": 1.8610672315565633e-05, + "loss": 0.4971, + "step": 12452 + }, + { + "epoch": 0.34192751235584845, + "grad_norm": 0.37456271052360535, + "learning_rate": 1.861045269357032e-05, + "loss": 0.5319, + "step": 12453 + }, + { + "epoch": 0.34195496979681494, + "grad_norm": 0.3723776340484619, + "learning_rate": 1.861023305551376e-05, + "loss": 0.5101, + "step": 12454 + }, + { + "epoch": 0.34198242723778144, + "grad_norm": 0.3292977213859558, + "learning_rate": 1.861001340139637e-05, + "loss": 0.4383, + "step": 12455 + }, + { + "epoch": 0.34200988467874793, + "grad_norm": 0.3781681954860687, + "learning_rate": 1.8609793731218556e-05, + "loss": 0.5332, + "step": 12456 + }, + { + "epoch": 0.3420373421197144, + "grad_norm": 0.3383600115776062, + "learning_rate": 1.860957404498073e-05, + "loss": 0.4931, + "step": 12457 + }, + { + "epoch": 0.3420647995606809, + "grad_norm": 0.34892529249191284, + "learning_rate": 1.86093543426833e-05, + "loss": 0.5087, + "step": 12458 + }, + { + "epoch": 0.34209225700164747, + "grad_norm": 0.4160260856151581, + "learning_rate": 1.8609134624326675e-05, + "loss": 0.5562, + "step": 12459 + }, + { + "epoch": 0.34211971444261396, + "grad_norm": 0.46445804834365845, + "learning_rate": 1.860891488991127e-05, + "loss": 0.4814, + "step": 12460 + }, + { + "epoch": 0.34214717188358046, + "grad_norm": 0.3425804674625397, + "learning_rate": 1.8608695139437486e-05, + "loss": 0.5229, + "step": 12461 + }, + { + "epoch": 0.34217462932454695, + "grad_norm": 0.45031970739364624, + "learning_rate": 1.860847537290574e-05, + "loss": 0.4581, + "step": 12462 + }, + { + "epoch": 0.34220208676551345, + "grad_norm": 0.42683106660842896, + "learning_rate": 1.860825559031644e-05, + "loss": 0.531, + "step": 12463 + }, + { + "epoch": 0.34222954420647994, + "grad_norm": 0.4106147885322571, + "learning_rate": 1.8608035791669997e-05, + "loss": 0.62, + "step": 12464 + }, + { + "epoch": 0.34225700164744643, + "grad_norm": 0.3677841126918793, + "learning_rate": 1.8607815976966818e-05, + "loss": 0.4677, + "step": 12465 + }, + { + "epoch": 0.342284459088413, + "grad_norm": 0.3894851505756378, + "learning_rate": 1.8607596146207316e-05, + "loss": 0.5785, + "step": 12466 + }, + { + "epoch": 0.3423119165293795, + "grad_norm": 0.33146294951438904, + "learning_rate": 1.86073762993919e-05, + "loss": 0.5208, + "step": 12467 + }, + { + "epoch": 0.342339373970346, + "grad_norm": 0.3290240168571472, + "learning_rate": 1.860715643652098e-05, + "loss": 0.5761, + "step": 12468 + }, + { + "epoch": 0.34236683141131247, + "grad_norm": 0.3947576582431793, + "learning_rate": 1.8606936557594967e-05, + "loss": 0.5094, + "step": 12469 + }, + { + "epoch": 0.34239428885227896, + "grad_norm": 0.3360365629196167, + "learning_rate": 1.860671666261427e-05, + "loss": 0.5083, + "step": 12470 + }, + { + "epoch": 0.34242174629324545, + "grad_norm": 0.3637236952781677, + "learning_rate": 1.8606496751579298e-05, + "loss": 0.545, + "step": 12471 + }, + { + "epoch": 0.34244920373421195, + "grad_norm": 0.3957521617412567, + "learning_rate": 1.8606276824490462e-05, + "loss": 0.5217, + "step": 12472 + }, + { + "epoch": 0.3424766611751785, + "grad_norm": 0.34500351548194885, + "learning_rate": 1.8606056881348177e-05, + "loss": 0.4974, + "step": 12473 + }, + { + "epoch": 0.342504118616145, + "grad_norm": 0.37129634618759155, + "learning_rate": 1.8605836922152846e-05, + "loss": 0.5369, + "step": 12474 + }, + { + "epoch": 0.3425315760571115, + "grad_norm": 0.39873069524765015, + "learning_rate": 1.8605616946904885e-05, + "loss": 0.5652, + "step": 12475 + }, + { + "epoch": 0.342559033498078, + "grad_norm": 0.34749987721443176, + "learning_rate": 1.86053969556047e-05, + "loss": 0.4899, + "step": 12476 + }, + { + "epoch": 0.3425864909390445, + "grad_norm": 0.32393577694892883, + "learning_rate": 1.86051769482527e-05, + "loss": 0.4903, + "step": 12477 + }, + { + "epoch": 0.34261394838001097, + "grad_norm": 0.38452261686325073, + "learning_rate": 1.8604956924849304e-05, + "loss": 0.5277, + "step": 12478 + }, + { + "epoch": 0.34264140582097746, + "grad_norm": 0.3591618835926056, + "learning_rate": 1.8604736885394917e-05, + "loss": 0.4912, + "step": 12479 + }, + { + "epoch": 0.342668863261944, + "grad_norm": 0.3449806272983551, + "learning_rate": 1.8604516829889944e-05, + "loss": 0.5345, + "step": 12480 + }, + { + "epoch": 0.3426963207029105, + "grad_norm": 0.37548309564590454, + "learning_rate": 1.8604296758334804e-05, + "loss": 0.5328, + "step": 12481 + }, + { + "epoch": 0.342723778143877, + "grad_norm": 0.36883246898651123, + "learning_rate": 1.8604076670729905e-05, + "loss": 0.5689, + "step": 12482 + }, + { + "epoch": 0.3427512355848435, + "grad_norm": 0.35661500692367554, + "learning_rate": 1.860385656707566e-05, + "loss": 0.4678, + "step": 12483 + }, + { + "epoch": 0.34277869302581, + "grad_norm": 0.3643106520175934, + "learning_rate": 1.860363644737247e-05, + "loss": 0.5079, + "step": 12484 + }, + { + "epoch": 0.3428061504667765, + "grad_norm": 0.3650033473968506, + "learning_rate": 1.860341631162075e-05, + "loss": 0.5285, + "step": 12485 + }, + { + "epoch": 0.342833607907743, + "grad_norm": 0.3510039448738098, + "learning_rate": 1.860319615982092e-05, + "loss": 0.5098, + "step": 12486 + }, + { + "epoch": 0.3428610653487095, + "grad_norm": 0.36367693543434143, + "learning_rate": 1.8602975991973383e-05, + "loss": 0.5392, + "step": 12487 + }, + { + "epoch": 0.342888522789676, + "grad_norm": 0.3612433969974518, + "learning_rate": 1.8602755808078547e-05, + "loss": 0.555, + "step": 12488 + }, + { + "epoch": 0.3429159802306425, + "grad_norm": 0.3998146653175354, + "learning_rate": 1.8602535608136828e-05, + "loss": 0.5215, + "step": 12489 + }, + { + "epoch": 0.342943437671609, + "grad_norm": 0.35060691833496094, + "learning_rate": 1.8602315392148632e-05, + "loss": 0.5331, + "step": 12490 + }, + { + "epoch": 0.3429708951125755, + "grad_norm": 0.329326868057251, + "learning_rate": 1.8602095160114373e-05, + "loss": 0.453, + "step": 12491 + }, + { + "epoch": 0.342998352553542, + "grad_norm": 0.351910799741745, + "learning_rate": 1.860187491203446e-05, + "loss": 0.4895, + "step": 12492 + }, + { + "epoch": 0.3430258099945085, + "grad_norm": 0.36010316014289856, + "learning_rate": 1.8601654647909307e-05, + "loss": 0.4606, + "step": 12493 + }, + { + "epoch": 0.34305326743547504, + "grad_norm": 2.198714256286621, + "learning_rate": 1.860143436773932e-05, + "loss": 0.6337, + "step": 12494 + }, + { + "epoch": 0.34308072487644153, + "grad_norm": 1.2635620832443237, + "learning_rate": 1.8601214071524918e-05, + "loss": 0.5827, + "step": 12495 + }, + { + "epoch": 0.34310818231740803, + "grad_norm": 0.43448880314826965, + "learning_rate": 1.8600993759266505e-05, + "loss": 0.4854, + "step": 12496 + }, + { + "epoch": 0.3431356397583745, + "grad_norm": 0.36445188522338867, + "learning_rate": 1.8600773430964488e-05, + "loss": 0.5326, + "step": 12497 + }, + { + "epoch": 0.343163097199341, + "grad_norm": 0.3614044785499573, + "learning_rate": 1.8600553086619288e-05, + "loss": 0.481, + "step": 12498 + }, + { + "epoch": 0.3431905546403075, + "grad_norm": 0.37353554368019104, + "learning_rate": 1.860033272623131e-05, + "loss": 0.5469, + "step": 12499 + }, + { + "epoch": 0.343218012081274, + "grad_norm": 0.3660149574279785, + "learning_rate": 1.860011234980097e-05, + "loss": 0.5934, + "step": 12500 + }, + { + "epoch": 0.34324546952224055, + "grad_norm": 0.3388027250766754, + "learning_rate": 1.859989195732867e-05, + "loss": 0.5028, + "step": 12501 + }, + { + "epoch": 0.34327292696320705, + "grad_norm": 0.37738460302352905, + "learning_rate": 1.859967154881483e-05, + "loss": 0.5669, + "step": 12502 + }, + { + "epoch": 0.34330038440417354, + "grad_norm": 0.38425740599632263, + "learning_rate": 1.859945112425986e-05, + "loss": 0.6531, + "step": 12503 + }, + { + "epoch": 0.34332784184514004, + "grad_norm": 0.3569954037666321, + "learning_rate": 1.8599230683664163e-05, + "loss": 0.5528, + "step": 12504 + }, + { + "epoch": 0.34335529928610653, + "grad_norm": 0.3778884708881378, + "learning_rate": 1.859901022702816e-05, + "loss": 0.5437, + "step": 12505 + }, + { + "epoch": 0.343382756727073, + "grad_norm": 0.3584970533847809, + "learning_rate": 1.859878975435226e-05, + "loss": 0.4754, + "step": 12506 + }, + { + "epoch": 0.3434102141680395, + "grad_norm": 0.34502074122428894, + "learning_rate": 1.859856926563687e-05, + "loss": 0.5165, + "step": 12507 + }, + { + "epoch": 0.34343767160900607, + "grad_norm": 0.44411519169807434, + "learning_rate": 1.8598348760882404e-05, + "loss": 0.5446, + "step": 12508 + }, + { + "epoch": 0.34346512904997256, + "grad_norm": 0.3735094964504242, + "learning_rate": 1.8598128240089277e-05, + "loss": 0.5584, + "step": 12509 + }, + { + "epoch": 0.34349258649093906, + "grad_norm": 0.38954228162765503, + "learning_rate": 1.8597907703257893e-05, + "loss": 0.5498, + "step": 12510 + }, + { + "epoch": 0.34352004393190555, + "grad_norm": 0.3487035036087036, + "learning_rate": 1.8597687150388668e-05, + "loss": 0.5664, + "step": 12511 + }, + { + "epoch": 0.34354750137287204, + "grad_norm": 0.39853784441947937, + "learning_rate": 1.8597466581482013e-05, + "loss": 0.6092, + "step": 12512 + }, + { + "epoch": 0.34357495881383854, + "grad_norm": 0.4248814582824707, + "learning_rate": 1.8597245996538337e-05, + "loss": 0.5115, + "step": 12513 + }, + { + "epoch": 0.34360241625480503, + "grad_norm": 0.3748270869255066, + "learning_rate": 1.8597025395558054e-05, + "loss": 0.4932, + "step": 12514 + }, + { + "epoch": 0.3436298736957716, + "grad_norm": 0.37733325362205505, + "learning_rate": 1.8596804778541574e-05, + "loss": 0.5763, + "step": 12515 + }, + { + "epoch": 0.3436573311367381, + "grad_norm": 0.4772658348083496, + "learning_rate": 1.859658414548931e-05, + "loss": 0.5957, + "step": 12516 + }, + { + "epoch": 0.34368478857770457, + "grad_norm": 0.3823661804199219, + "learning_rate": 1.8596363496401676e-05, + "loss": 0.5669, + "step": 12517 + }, + { + "epoch": 0.34371224601867106, + "grad_norm": 0.3263639211654663, + "learning_rate": 1.8596142831279074e-05, + "loss": 0.4735, + "step": 12518 + }, + { + "epoch": 0.34373970345963756, + "grad_norm": 0.3318633437156677, + "learning_rate": 1.859592215012193e-05, + "loss": 0.3987, + "step": 12519 + }, + { + "epoch": 0.34376716090060405, + "grad_norm": 0.3850191533565521, + "learning_rate": 1.859570145293064e-05, + "loss": 0.5533, + "step": 12520 + }, + { + "epoch": 0.34379461834157055, + "grad_norm": 0.41838932037353516, + "learning_rate": 1.859548073970563e-05, + "loss": 0.5673, + "step": 12521 + }, + { + "epoch": 0.34382207578253704, + "grad_norm": 0.38155174255371094, + "learning_rate": 1.85952600104473e-05, + "loss": 0.5472, + "step": 12522 + }, + { + "epoch": 0.3438495332235036, + "grad_norm": 0.36532118916511536, + "learning_rate": 1.859503926515607e-05, + "loss": 0.6311, + "step": 12523 + }, + { + "epoch": 0.3438769906644701, + "grad_norm": 1.0349308252334595, + "learning_rate": 1.8594818503832347e-05, + "loss": 0.4433, + "step": 12524 + }, + { + "epoch": 0.3439044481054366, + "grad_norm": 0.3971216678619385, + "learning_rate": 1.8594597726476542e-05, + "loss": 0.5239, + "step": 12525 + }, + { + "epoch": 0.34393190554640307, + "grad_norm": 0.3294568359851837, + "learning_rate": 1.8594376933089073e-05, + "loss": 0.5352, + "step": 12526 + }, + { + "epoch": 0.34395936298736957, + "grad_norm": 0.3669932186603546, + "learning_rate": 1.859415612367035e-05, + "loss": 0.538, + "step": 12527 + }, + { + "epoch": 0.34398682042833606, + "grad_norm": 0.34438735246658325, + "learning_rate": 1.859393529822078e-05, + "loss": 0.5185, + "step": 12528 + }, + { + "epoch": 0.34401427786930255, + "grad_norm": 0.3622181713581085, + "learning_rate": 1.859371445674077e-05, + "loss": 0.499, + "step": 12529 + }, + { + "epoch": 0.3440417353102691, + "grad_norm": 0.37381789088249207, + "learning_rate": 1.859349359923075e-05, + "loss": 0.5719, + "step": 12530 + }, + { + "epoch": 0.3440691927512356, + "grad_norm": 0.35320615768432617, + "learning_rate": 1.859327272569112e-05, + "loss": 0.5209, + "step": 12531 + }, + { + "epoch": 0.3440966501922021, + "grad_norm": 0.4062102138996124, + "learning_rate": 1.859305183612229e-05, + "loss": 0.4452, + "step": 12532 + }, + { + "epoch": 0.3441241076331686, + "grad_norm": 0.38576367497444153, + "learning_rate": 1.859283093052468e-05, + "loss": 0.561, + "step": 12533 + }, + { + "epoch": 0.3441515650741351, + "grad_norm": 0.37779340147972107, + "learning_rate": 1.8592610008898697e-05, + "loss": 0.5168, + "step": 12534 + }, + { + "epoch": 0.3441790225151016, + "grad_norm": 0.3470847010612488, + "learning_rate": 1.8592389071244753e-05, + "loss": 0.4421, + "step": 12535 + }, + { + "epoch": 0.34420647995606807, + "grad_norm": 0.3575572967529297, + "learning_rate": 1.8592168117563264e-05, + "loss": 0.5279, + "step": 12536 + }, + { + "epoch": 0.3442339373970346, + "grad_norm": 0.3466982841491699, + "learning_rate": 1.8591947147854635e-05, + "loss": 0.4521, + "step": 12537 + }, + { + "epoch": 0.3442613948380011, + "grad_norm": 0.3703071177005768, + "learning_rate": 1.8591726162119285e-05, + "loss": 0.4548, + "step": 12538 + }, + { + "epoch": 0.3442888522789676, + "grad_norm": 0.35229015350341797, + "learning_rate": 1.8591505160357618e-05, + "loss": 0.5301, + "step": 12539 + }, + { + "epoch": 0.3443163097199341, + "grad_norm": 0.3755365312099457, + "learning_rate": 1.8591284142570057e-05, + "loss": 0.5182, + "step": 12540 + }, + { + "epoch": 0.3443437671609006, + "grad_norm": 0.347196102142334, + "learning_rate": 1.859106310875701e-05, + "loss": 0.4578, + "step": 12541 + }, + { + "epoch": 0.3443712246018671, + "grad_norm": 0.4409518241882324, + "learning_rate": 1.8590842058918883e-05, + "loss": 0.4652, + "step": 12542 + }, + { + "epoch": 0.3443986820428336, + "grad_norm": 0.45305538177490234, + "learning_rate": 1.85906209930561e-05, + "loss": 0.5599, + "step": 12543 + }, + { + "epoch": 0.34442613948380013, + "grad_norm": 0.41491568088531494, + "learning_rate": 1.859039991116906e-05, + "loss": 0.5216, + "step": 12544 + }, + { + "epoch": 0.3444535969247666, + "grad_norm": 0.36472949385643005, + "learning_rate": 1.859017881325819e-05, + "loss": 0.5467, + "step": 12545 + }, + { + "epoch": 0.3444810543657331, + "grad_norm": 0.44581353664398193, + "learning_rate": 1.858995769932389e-05, + "loss": 0.5782, + "step": 12546 + }, + { + "epoch": 0.3445085118066996, + "grad_norm": 0.43733033537864685, + "learning_rate": 1.858973656936658e-05, + "loss": 0.5851, + "step": 12547 + }, + { + "epoch": 0.3445359692476661, + "grad_norm": 0.36269429326057434, + "learning_rate": 1.8589515423386667e-05, + "loss": 0.5391, + "step": 12548 + }, + { + "epoch": 0.3445634266886326, + "grad_norm": 0.35754284262657166, + "learning_rate": 1.8589294261384568e-05, + "loss": 0.5828, + "step": 12549 + }, + { + "epoch": 0.3445908841295991, + "grad_norm": 0.4186266362667084, + "learning_rate": 1.858907308336069e-05, + "loss": 0.5528, + "step": 12550 + }, + { + "epoch": 0.34461834157056564, + "grad_norm": 0.36250320076942444, + "learning_rate": 1.8588851889315457e-05, + "loss": 0.5713, + "step": 12551 + }, + { + "epoch": 0.34464579901153214, + "grad_norm": 0.32410547137260437, + "learning_rate": 1.8588630679249265e-05, + "loss": 0.5768, + "step": 12552 + }, + { + "epoch": 0.34467325645249863, + "grad_norm": 0.3781469166278839, + "learning_rate": 1.8588409453162543e-05, + "loss": 0.5458, + "step": 12553 + }, + { + "epoch": 0.3447007138934651, + "grad_norm": 0.36061641573905945, + "learning_rate": 1.8588188211055694e-05, + "loss": 0.4982, + "step": 12554 + }, + { + "epoch": 0.3447281713344316, + "grad_norm": 0.33096417784690857, + "learning_rate": 1.8587966952929134e-05, + "loss": 0.5317, + "step": 12555 + }, + { + "epoch": 0.3447556287753981, + "grad_norm": 0.3772202134132385, + "learning_rate": 1.858774567878327e-05, + "loss": 0.4823, + "step": 12556 + }, + { + "epoch": 0.3447830862163646, + "grad_norm": 0.35657283663749695, + "learning_rate": 1.8587524388618523e-05, + "loss": 0.5557, + "step": 12557 + }, + { + "epoch": 0.34481054365733116, + "grad_norm": 0.3761727809906006, + "learning_rate": 1.8587303082435305e-05, + "loss": 0.4941, + "step": 12558 + }, + { + "epoch": 0.34483800109829765, + "grad_norm": 0.45941758155822754, + "learning_rate": 1.8587081760234024e-05, + "loss": 0.4645, + "step": 12559 + }, + { + "epoch": 0.34486545853926415, + "grad_norm": 0.413360059261322, + "learning_rate": 1.858686042201509e-05, + "loss": 0.5752, + "step": 12560 + }, + { + "epoch": 0.34489291598023064, + "grad_norm": 0.3964988589286804, + "learning_rate": 1.8586639067778925e-05, + "loss": 0.5321, + "step": 12561 + }, + { + "epoch": 0.34492037342119714, + "grad_norm": 0.3585057556629181, + "learning_rate": 1.858641769752594e-05, + "loss": 0.5144, + "step": 12562 + }, + { + "epoch": 0.34494783086216363, + "grad_norm": 0.3535102307796478, + "learning_rate": 1.858619631125654e-05, + "loss": 0.5496, + "step": 12563 + }, + { + "epoch": 0.3449752883031301, + "grad_norm": 0.3658895194530487, + "learning_rate": 1.8585974908971147e-05, + "loss": 0.504, + "step": 12564 + }, + { + "epoch": 0.3450027457440967, + "grad_norm": 0.35339754819869995, + "learning_rate": 1.858575349067017e-05, + "loss": 0.4541, + "step": 12565 + }, + { + "epoch": 0.34503020318506317, + "grad_norm": 0.2955337464809418, + "learning_rate": 1.858553205635402e-05, + "loss": 0.3666, + "step": 12566 + }, + { + "epoch": 0.34505766062602966, + "grad_norm": 0.36888229846954346, + "learning_rate": 1.8585310606023116e-05, + "loss": 0.5204, + "step": 12567 + }, + { + "epoch": 0.34508511806699615, + "grad_norm": 0.33436739444732666, + "learning_rate": 1.8585089139677867e-05, + "loss": 0.5474, + "step": 12568 + }, + { + "epoch": 0.34511257550796265, + "grad_norm": 0.4198274612426758, + "learning_rate": 1.8584867657318684e-05, + "loss": 0.5903, + "step": 12569 + }, + { + "epoch": 0.34514003294892914, + "grad_norm": 0.3595098853111267, + "learning_rate": 1.8584646158945984e-05, + "loss": 0.5077, + "step": 12570 + }, + { + "epoch": 0.34516749038989564, + "grad_norm": 0.3708937168121338, + "learning_rate": 1.858442464456018e-05, + "loss": 0.5575, + "step": 12571 + }, + { + "epoch": 0.3451949478308622, + "grad_norm": 0.4717086851596832, + "learning_rate": 1.8584203114161687e-05, + "loss": 0.5941, + "step": 12572 + }, + { + "epoch": 0.3452224052718287, + "grad_norm": 0.4951496720314026, + "learning_rate": 1.858398156775091e-05, + "loss": 0.5318, + "step": 12573 + }, + { + "epoch": 0.3452498627127952, + "grad_norm": 0.3295329213142395, + "learning_rate": 1.858376000532827e-05, + "loss": 0.4509, + "step": 12574 + }, + { + "epoch": 0.34527732015376167, + "grad_norm": 0.3636619746685028, + "learning_rate": 1.8583538426894177e-05, + "loss": 0.4703, + "step": 12575 + }, + { + "epoch": 0.34530477759472816, + "grad_norm": 0.36063358187675476, + "learning_rate": 1.8583316832449048e-05, + "loss": 0.5193, + "step": 12576 + }, + { + "epoch": 0.34533223503569466, + "grad_norm": 0.47096073627471924, + "learning_rate": 1.858309522199329e-05, + "loss": 0.5805, + "step": 12577 + }, + { + "epoch": 0.34535969247666115, + "grad_norm": 0.34318244457244873, + "learning_rate": 1.8582873595527324e-05, + "loss": 0.5531, + "step": 12578 + }, + { + "epoch": 0.3453871499176277, + "grad_norm": 0.3895787000656128, + "learning_rate": 1.8582651953051555e-05, + "loss": 0.5509, + "step": 12579 + }, + { + "epoch": 0.3454146073585942, + "grad_norm": 0.36655959486961365, + "learning_rate": 1.8582430294566406e-05, + "loss": 0.5143, + "step": 12580 + }, + { + "epoch": 0.3454420647995607, + "grad_norm": 0.4519481658935547, + "learning_rate": 1.8582208620072283e-05, + "loss": 0.5571, + "step": 12581 + }, + { + "epoch": 0.3454695222405272, + "grad_norm": 0.37004977464675903, + "learning_rate": 1.85819869295696e-05, + "loss": 0.5283, + "step": 12582 + }, + { + "epoch": 0.3454969796814937, + "grad_norm": 0.34681248664855957, + "learning_rate": 1.8581765223058774e-05, + "loss": 0.541, + "step": 12583 + }, + { + "epoch": 0.34552443712246017, + "grad_norm": 0.3371754288673401, + "learning_rate": 1.8581543500540214e-05, + "loss": 0.4932, + "step": 12584 + }, + { + "epoch": 0.34555189456342666, + "grad_norm": 0.44022807478904724, + "learning_rate": 1.858132176201434e-05, + "loss": 0.5804, + "step": 12585 + }, + { + "epoch": 0.3455793520043932, + "grad_norm": 0.4288005530834198, + "learning_rate": 1.858110000748156e-05, + "loss": 0.6112, + "step": 12586 + }, + { + "epoch": 0.3456068094453597, + "grad_norm": 0.3526349365711212, + "learning_rate": 1.858087823694229e-05, + "loss": 0.5009, + "step": 12587 + }, + { + "epoch": 0.3456342668863262, + "grad_norm": 0.4124602973461151, + "learning_rate": 1.8580656450396945e-05, + "loss": 0.4929, + "step": 12588 + }, + { + "epoch": 0.3456617243272927, + "grad_norm": 0.38749369978904724, + "learning_rate": 1.858043464784594e-05, + "loss": 0.5433, + "step": 12589 + }, + { + "epoch": 0.3456891817682592, + "grad_norm": 0.41337698698043823, + "learning_rate": 1.8580212829289682e-05, + "loss": 0.516, + "step": 12590 + }, + { + "epoch": 0.3457166392092257, + "grad_norm": 0.4181760549545288, + "learning_rate": 1.8579990994728586e-05, + "loss": 0.5273, + "step": 12591 + }, + { + "epoch": 0.3457440966501922, + "grad_norm": 0.4541707932949066, + "learning_rate": 1.8579769144163073e-05, + "loss": 0.5728, + "step": 12592 + }, + { + "epoch": 0.34577155409115873, + "grad_norm": 0.3735782504081726, + "learning_rate": 1.857954727759355e-05, + "loss": 0.5792, + "step": 12593 + }, + { + "epoch": 0.3457990115321252, + "grad_norm": 0.43929624557495117, + "learning_rate": 1.8579325395020434e-05, + "loss": 0.5598, + "step": 12594 + }, + { + "epoch": 0.3458264689730917, + "grad_norm": 0.3621836006641388, + "learning_rate": 1.8579103496444135e-05, + "loss": 0.4317, + "step": 12595 + }, + { + "epoch": 0.3458539264140582, + "grad_norm": 0.3541490435600281, + "learning_rate": 1.8578881581865072e-05, + "loss": 0.4959, + "step": 12596 + }, + { + "epoch": 0.3458813838550247, + "grad_norm": 0.37178272008895874, + "learning_rate": 1.8578659651283657e-05, + "loss": 0.5221, + "step": 12597 + }, + { + "epoch": 0.3459088412959912, + "grad_norm": 0.3842722773551941, + "learning_rate": 1.8578437704700305e-05, + "loss": 0.6163, + "step": 12598 + }, + { + "epoch": 0.3459362987369577, + "grad_norm": 0.40147021412849426, + "learning_rate": 1.8578215742115427e-05, + "loss": 0.5713, + "step": 12599 + }, + { + "epoch": 0.34596375617792424, + "grad_norm": 0.360003799200058, + "learning_rate": 1.857799376352944e-05, + "loss": 0.4893, + "step": 12600 + }, + { + "epoch": 0.34599121361889074, + "grad_norm": 0.3810230493545532, + "learning_rate": 1.8577771768942755e-05, + "loss": 0.5357, + "step": 12601 + }, + { + "epoch": 0.34601867105985723, + "grad_norm": 0.7750182151794434, + "learning_rate": 1.857754975835579e-05, + "loss": 0.4874, + "step": 12602 + }, + { + "epoch": 0.3460461285008237, + "grad_norm": 0.3628039062023163, + "learning_rate": 1.8577327731768954e-05, + "loss": 0.472, + "step": 12603 + }, + { + "epoch": 0.3460735859417902, + "grad_norm": 0.3613610863685608, + "learning_rate": 1.8577105689182668e-05, + "loss": 0.5248, + "step": 12604 + }, + { + "epoch": 0.3461010433827567, + "grad_norm": 0.386263906955719, + "learning_rate": 1.857688363059734e-05, + "loss": 0.5373, + "step": 12605 + }, + { + "epoch": 0.3461285008237232, + "grad_norm": 7.524552822113037, + "learning_rate": 1.8576661556013387e-05, + "loss": 0.839, + "step": 12606 + }, + { + "epoch": 0.34615595826468976, + "grad_norm": 0.4306505620479584, + "learning_rate": 1.8576439465431225e-05, + "loss": 0.5836, + "step": 12607 + }, + { + "epoch": 0.34618341570565625, + "grad_norm": 0.32971298694610596, + "learning_rate": 1.8576217358851264e-05, + "loss": 0.429, + "step": 12608 + }, + { + "epoch": 0.34621087314662274, + "grad_norm": 0.3432246446609497, + "learning_rate": 1.8575995236273918e-05, + "loss": 0.4706, + "step": 12609 + }, + { + "epoch": 0.34623833058758924, + "grad_norm": 0.3342866003513336, + "learning_rate": 1.8575773097699608e-05, + "loss": 0.4977, + "step": 12610 + }, + { + "epoch": 0.34626578802855573, + "grad_norm": 0.35850274562835693, + "learning_rate": 1.8575550943128742e-05, + "loss": 0.5337, + "step": 12611 + }, + { + "epoch": 0.3462932454695222, + "grad_norm": 0.37367355823516846, + "learning_rate": 1.8575328772561737e-05, + "loss": 0.4298, + "step": 12612 + }, + { + "epoch": 0.3463207029104887, + "grad_norm": 0.4273996353149414, + "learning_rate": 1.8575106585999007e-05, + "loss": 0.5516, + "step": 12613 + }, + { + "epoch": 0.34634816035145527, + "grad_norm": 0.43801572918891907, + "learning_rate": 1.8574884383440967e-05, + "loss": 0.5033, + "step": 12614 + }, + { + "epoch": 0.34637561779242176, + "grad_norm": 0.4426799416542053, + "learning_rate": 1.857466216488803e-05, + "loss": 0.4923, + "step": 12615 + }, + { + "epoch": 0.34640307523338826, + "grad_norm": 0.34461185336112976, + "learning_rate": 1.8574439930340612e-05, + "loss": 0.5115, + "step": 12616 + }, + { + "epoch": 0.34643053267435475, + "grad_norm": 0.36092299222946167, + "learning_rate": 1.857421767979913e-05, + "loss": 0.5669, + "step": 12617 + }, + { + "epoch": 0.34645799011532125, + "grad_norm": 0.4030083417892456, + "learning_rate": 1.857399541326399e-05, + "loss": 0.5501, + "step": 12618 + }, + { + "epoch": 0.34648544755628774, + "grad_norm": 0.3334687352180481, + "learning_rate": 1.857377313073561e-05, + "loss": 0.4838, + "step": 12619 + }, + { + "epoch": 0.34651290499725423, + "grad_norm": 0.3064964711666107, + "learning_rate": 1.8573550832214412e-05, + "loss": 0.3982, + "step": 12620 + }, + { + "epoch": 0.3465403624382208, + "grad_norm": 0.3856368958950043, + "learning_rate": 1.8573328517700805e-05, + "loss": 0.5618, + "step": 12621 + }, + { + "epoch": 0.3465678198791873, + "grad_norm": 0.34063056111335754, + "learning_rate": 1.8573106187195203e-05, + "loss": 0.5213, + "step": 12622 + }, + { + "epoch": 0.34659527732015377, + "grad_norm": 0.3651998043060303, + "learning_rate": 1.8572883840698018e-05, + "loss": 0.5476, + "step": 12623 + }, + { + "epoch": 0.34662273476112027, + "grad_norm": 0.3949795663356781, + "learning_rate": 1.8572661478209675e-05, + "loss": 0.5731, + "step": 12624 + }, + { + "epoch": 0.34665019220208676, + "grad_norm": 0.44128307700157166, + "learning_rate": 1.857243909973058e-05, + "loss": 0.4941, + "step": 12625 + }, + { + "epoch": 0.34667764964305325, + "grad_norm": 0.5818378925323486, + "learning_rate": 1.857221670526115e-05, + "loss": 0.6558, + "step": 12626 + }, + { + "epoch": 0.34670510708401975, + "grad_norm": 0.36324968934059143, + "learning_rate": 1.8571994294801796e-05, + "loss": 0.5412, + "step": 12627 + }, + { + "epoch": 0.3467325645249863, + "grad_norm": 0.3635287582874298, + "learning_rate": 1.857177186835294e-05, + "loss": 0.5338, + "step": 12628 + }, + { + "epoch": 0.3467600219659528, + "grad_norm": 0.37550461292266846, + "learning_rate": 1.8571549425914997e-05, + "loss": 0.4425, + "step": 12629 + }, + { + "epoch": 0.3467874794069193, + "grad_norm": 0.3531869351863861, + "learning_rate": 1.8571326967488375e-05, + "loss": 0.5721, + "step": 12630 + }, + { + "epoch": 0.3468149368478858, + "grad_norm": 0.468712717294693, + "learning_rate": 1.8571104493073495e-05, + "loss": 0.5234, + "step": 12631 + }, + { + "epoch": 0.3468423942888523, + "grad_norm": 0.3820745348930359, + "learning_rate": 1.8570882002670765e-05, + "loss": 0.55, + "step": 12632 + }, + { + "epoch": 0.34686985172981877, + "grad_norm": 0.41026949882507324, + "learning_rate": 1.8570659496280608e-05, + "loss": 0.5244, + "step": 12633 + }, + { + "epoch": 0.34689730917078526, + "grad_norm": 0.32711026072502136, + "learning_rate": 1.8570436973903435e-05, + "loss": 0.4778, + "step": 12634 + }, + { + "epoch": 0.3469247666117518, + "grad_norm": 0.34249842166900635, + "learning_rate": 1.857021443553966e-05, + "loss": 0.5649, + "step": 12635 + }, + { + "epoch": 0.3469522240527183, + "grad_norm": 0.3711831867694855, + "learning_rate": 1.85699918811897e-05, + "loss": 0.5599, + "step": 12636 + }, + { + "epoch": 0.3469796814936848, + "grad_norm": 0.35696038603782654, + "learning_rate": 1.856976931085397e-05, + "loss": 0.524, + "step": 12637 + }, + { + "epoch": 0.3470071389346513, + "grad_norm": 0.4327448606491089, + "learning_rate": 1.856954672453289e-05, + "loss": 0.5927, + "step": 12638 + }, + { + "epoch": 0.3470345963756178, + "grad_norm": 0.3969862759113312, + "learning_rate": 1.8569324122226865e-05, + "loss": 0.5845, + "step": 12639 + }, + { + "epoch": 0.3470620538165843, + "grad_norm": 0.3609474003314972, + "learning_rate": 1.8569101503936317e-05, + "loss": 0.5648, + "step": 12640 + }, + { + "epoch": 0.3470895112575508, + "grad_norm": 0.378280371427536, + "learning_rate": 1.856887886966166e-05, + "loss": 0.5533, + "step": 12641 + }, + { + "epoch": 0.3471169686985173, + "grad_norm": 0.33960744738578796, + "learning_rate": 1.8568656219403306e-05, + "loss": 0.5316, + "step": 12642 + }, + { + "epoch": 0.3471444261394838, + "grad_norm": 0.438093364238739, + "learning_rate": 1.8568433553161677e-05, + "loss": 0.5649, + "step": 12643 + }, + { + "epoch": 0.3471718835804503, + "grad_norm": 0.3610592186450958, + "learning_rate": 1.8568210870937183e-05, + "loss": 0.5684, + "step": 12644 + }, + { + "epoch": 0.3471993410214168, + "grad_norm": 0.41504061222076416, + "learning_rate": 1.856798817273024e-05, + "loss": 0.5887, + "step": 12645 + }, + { + "epoch": 0.3472267984623833, + "grad_norm": 0.353227436542511, + "learning_rate": 1.856776545854127e-05, + "loss": 0.5031, + "step": 12646 + }, + { + "epoch": 0.3472542559033498, + "grad_norm": 0.3714257478713989, + "learning_rate": 1.8567542728370678e-05, + "loss": 0.5387, + "step": 12647 + }, + { + "epoch": 0.3472817133443163, + "grad_norm": 0.3157273232936859, + "learning_rate": 1.8567319982218887e-05, + "loss": 0.4931, + "step": 12648 + }, + { + "epoch": 0.34730917078528284, + "grad_norm": 0.3749190866947174, + "learning_rate": 1.8567097220086308e-05, + "loss": 0.5485, + "step": 12649 + }, + { + "epoch": 0.34733662822624933, + "grad_norm": 0.377169668674469, + "learning_rate": 1.8566874441973358e-05, + "loss": 0.47, + "step": 12650 + }, + { + "epoch": 0.3473640856672158, + "grad_norm": 0.3514383137226105, + "learning_rate": 1.8566651647880454e-05, + "loss": 0.5512, + "step": 12651 + }, + { + "epoch": 0.3473915431081823, + "grad_norm": 0.3752650022506714, + "learning_rate": 1.8566428837808012e-05, + "loss": 0.4709, + "step": 12652 + }, + { + "epoch": 0.3474190005491488, + "grad_norm": 0.46758154034614563, + "learning_rate": 1.8566206011756443e-05, + "loss": 0.66, + "step": 12653 + }, + { + "epoch": 0.3474464579901153, + "grad_norm": 0.3444725275039673, + "learning_rate": 1.8565983169726166e-05, + "loss": 0.4485, + "step": 12654 + }, + { + "epoch": 0.3474739154310818, + "grad_norm": 0.39415305852890015, + "learning_rate": 1.85657603117176e-05, + "loss": 0.5179, + "step": 12655 + }, + { + "epoch": 0.3475013728720483, + "grad_norm": 0.37960362434387207, + "learning_rate": 1.8565537437731152e-05, + "loss": 0.4972, + "step": 12656 + }, + { + "epoch": 0.34752883031301485, + "grad_norm": 0.7715675830841064, + "learning_rate": 1.8565314547767247e-05, + "loss": 0.5379, + "step": 12657 + }, + { + "epoch": 0.34755628775398134, + "grad_norm": 0.42669180035591125, + "learning_rate": 1.8565091641826295e-05, + "loss": 0.4943, + "step": 12658 + }, + { + "epoch": 0.34758374519494784, + "grad_norm": 0.3571719229221344, + "learning_rate": 1.8564868719908713e-05, + "loss": 0.5319, + "step": 12659 + }, + { + "epoch": 0.34761120263591433, + "grad_norm": 0.4460781216621399, + "learning_rate": 1.856464578201492e-05, + "loss": 0.476, + "step": 12660 + }, + { + "epoch": 0.3476386600768808, + "grad_norm": 0.3776991665363312, + "learning_rate": 1.8564422828145327e-05, + "loss": 0.5341, + "step": 12661 + }, + { + "epoch": 0.3476661175178473, + "grad_norm": 0.3844689428806305, + "learning_rate": 1.856419985830035e-05, + "loss": 0.5207, + "step": 12662 + }, + { + "epoch": 0.3476935749588138, + "grad_norm": 0.457171767950058, + "learning_rate": 1.856397687248041e-05, + "loss": 0.5157, + "step": 12663 + }, + { + "epoch": 0.34772103239978036, + "grad_norm": 0.5097456574440002, + "learning_rate": 1.856375387068592e-05, + "loss": 0.6521, + "step": 12664 + }, + { + "epoch": 0.34774848984074685, + "grad_norm": 0.4551370441913605, + "learning_rate": 1.8563530852917293e-05, + "loss": 0.5223, + "step": 12665 + }, + { + "epoch": 0.34777594728171335, + "grad_norm": 0.4334767758846283, + "learning_rate": 1.856330781917495e-05, + "loss": 0.5329, + "step": 12666 + }, + { + "epoch": 0.34780340472267984, + "grad_norm": 0.39035770297050476, + "learning_rate": 1.8563084769459303e-05, + "loss": 0.6251, + "step": 12667 + }, + { + "epoch": 0.34783086216364634, + "grad_norm": 0.3291419744491577, + "learning_rate": 1.856286170377077e-05, + "loss": 0.532, + "step": 12668 + }, + { + "epoch": 0.34785831960461283, + "grad_norm": 0.4791546165943146, + "learning_rate": 1.856263862210977e-05, + "loss": 0.6054, + "step": 12669 + }, + { + "epoch": 0.3478857770455793, + "grad_norm": 0.419318825006485, + "learning_rate": 1.8562415524476714e-05, + "loss": 0.5956, + "step": 12670 + }, + { + "epoch": 0.3479132344865459, + "grad_norm": 0.3686564564704895, + "learning_rate": 1.856219241087202e-05, + "loss": 0.5119, + "step": 12671 + }, + { + "epoch": 0.34794069192751237, + "grad_norm": 0.4044903516769409, + "learning_rate": 1.8561969281296103e-05, + "loss": 0.587, + "step": 12672 + }, + { + "epoch": 0.34796814936847886, + "grad_norm": 0.35701268911361694, + "learning_rate": 1.8561746135749384e-05, + "loss": 0.5276, + "step": 12673 + }, + { + "epoch": 0.34799560680944536, + "grad_norm": 0.31550541520118713, + "learning_rate": 1.8561522974232273e-05, + "loss": 0.4819, + "step": 12674 + }, + { + "epoch": 0.34802306425041185, + "grad_norm": 0.4033799171447754, + "learning_rate": 1.8561299796745193e-05, + "loss": 0.5118, + "step": 12675 + }, + { + "epoch": 0.34805052169137835, + "grad_norm": 0.38989681005477905, + "learning_rate": 1.8561076603288553e-05, + "loss": 0.6018, + "step": 12676 + }, + { + "epoch": 0.34807797913234484, + "grad_norm": 0.3164607584476471, + "learning_rate": 1.8560853393862774e-05, + "loss": 0.4325, + "step": 12677 + }, + { + "epoch": 0.3481054365733114, + "grad_norm": 0.36645808815956116, + "learning_rate": 1.856063016846827e-05, + "loss": 0.655, + "step": 12678 + }, + { + "epoch": 0.3481328940142779, + "grad_norm": 0.3638545274734497, + "learning_rate": 1.8560406927105458e-05, + "loss": 0.5429, + "step": 12679 + }, + { + "epoch": 0.3481603514552444, + "grad_norm": 0.3753422796726227, + "learning_rate": 1.8560183669774758e-05, + "loss": 0.5253, + "step": 12680 + }, + { + "epoch": 0.34818780889621087, + "grad_norm": 0.3696102797985077, + "learning_rate": 1.855996039647658e-05, + "loss": 0.557, + "step": 12681 + }, + { + "epoch": 0.34821526633717736, + "grad_norm": 0.39345821738243103, + "learning_rate": 1.8559737107211348e-05, + "loss": 0.5393, + "step": 12682 + }, + { + "epoch": 0.34824272377814386, + "grad_norm": 0.3660038113594055, + "learning_rate": 1.855951380197947e-05, + "loss": 0.5734, + "step": 12683 + }, + { + "epoch": 0.34827018121911035, + "grad_norm": 0.3815183639526367, + "learning_rate": 1.855929048078137e-05, + "loss": 0.5138, + "step": 12684 + }, + { + "epoch": 0.3482976386600769, + "grad_norm": 0.34636929631233215, + "learning_rate": 1.8559067143617458e-05, + "loss": 0.4594, + "step": 12685 + }, + { + "epoch": 0.3483250961010434, + "grad_norm": 0.36037972569465637, + "learning_rate": 1.855884379048816e-05, + "loss": 0.5037, + "step": 12686 + }, + { + "epoch": 0.3483525535420099, + "grad_norm": 0.3991076648235321, + "learning_rate": 1.8558620421393877e-05, + "loss": 0.5144, + "step": 12687 + }, + { + "epoch": 0.3483800109829764, + "grad_norm": 0.3529602587223053, + "learning_rate": 1.8558397036335044e-05, + "loss": 0.5001, + "step": 12688 + }, + { + "epoch": 0.3484074684239429, + "grad_norm": 0.48140037059783936, + "learning_rate": 1.8558173635312065e-05, + "loss": 0.5983, + "step": 12689 + }, + { + "epoch": 0.3484349258649094, + "grad_norm": 0.432038813829422, + "learning_rate": 1.855795021832536e-05, + "loss": 0.6027, + "step": 12690 + }, + { + "epoch": 0.34846238330587587, + "grad_norm": 0.3885188400745392, + "learning_rate": 1.855772678537535e-05, + "loss": 0.4654, + "step": 12691 + }, + { + "epoch": 0.3484898407468424, + "grad_norm": 0.3622991144657135, + "learning_rate": 1.8557503336462447e-05, + "loss": 0.4782, + "step": 12692 + }, + { + "epoch": 0.3485172981878089, + "grad_norm": 0.41469234228134155, + "learning_rate": 1.855727987158707e-05, + "loss": 0.5633, + "step": 12693 + }, + { + "epoch": 0.3485447556287754, + "grad_norm": 0.3547157347202301, + "learning_rate": 1.8557056390749633e-05, + "loss": 0.5342, + "step": 12694 + }, + { + "epoch": 0.3485722130697419, + "grad_norm": 0.37723395228385925, + "learning_rate": 1.8556832893950555e-05, + "loss": 0.492, + "step": 12695 + }, + { + "epoch": 0.3485996705107084, + "grad_norm": 0.40916070342063904, + "learning_rate": 1.855660938119025e-05, + "loss": 0.5964, + "step": 12696 + }, + { + "epoch": 0.3486271279516749, + "grad_norm": 0.3483370542526245, + "learning_rate": 1.855638585246914e-05, + "loss": 0.4685, + "step": 12697 + }, + { + "epoch": 0.3486545853926414, + "grad_norm": 0.3677574396133423, + "learning_rate": 1.8556162307787642e-05, + "loss": 0.5245, + "step": 12698 + }, + { + "epoch": 0.34868204283360793, + "grad_norm": 0.37959524989128113, + "learning_rate": 1.8555938747146167e-05, + "loss": 0.5292, + "step": 12699 + }, + { + "epoch": 0.3487095002745744, + "grad_norm": 0.4152543246746063, + "learning_rate": 1.8555715170545138e-05, + "loss": 0.6112, + "step": 12700 + }, + { + "epoch": 0.3487369577155409, + "grad_norm": 0.3743566572666168, + "learning_rate": 1.8555491577984967e-05, + "loss": 0.4732, + "step": 12701 + }, + { + "epoch": 0.3487644151565074, + "grad_norm": 0.33464205265045166, + "learning_rate": 1.8555267969466076e-05, + "loss": 0.5436, + "step": 12702 + }, + { + "epoch": 0.3487918725974739, + "grad_norm": 0.4057473838329315, + "learning_rate": 1.8555044344988877e-05, + "loss": 0.4758, + "step": 12703 + }, + { + "epoch": 0.3488193300384404, + "grad_norm": 0.35516592860221863, + "learning_rate": 1.8554820704553795e-05, + "loss": 0.6022, + "step": 12704 + }, + { + "epoch": 0.3488467874794069, + "grad_norm": 0.4343249499797821, + "learning_rate": 1.8554597048161235e-05, + "loss": 0.5497, + "step": 12705 + }, + { + "epoch": 0.34887424492037344, + "grad_norm": 0.3886490762233734, + "learning_rate": 1.8554373375811625e-05, + "loss": 0.5605, + "step": 12706 + }, + { + "epoch": 0.34890170236133994, + "grad_norm": 0.6382454037666321, + "learning_rate": 1.8554149687505377e-05, + "loss": 0.4582, + "step": 12707 + }, + { + "epoch": 0.34892915980230643, + "grad_norm": 0.37029969692230225, + "learning_rate": 1.8553925983242913e-05, + "loss": 0.4814, + "step": 12708 + }, + { + "epoch": 0.3489566172432729, + "grad_norm": 0.4169829487800598, + "learning_rate": 1.8553702263024645e-05, + "loss": 0.6322, + "step": 12709 + }, + { + "epoch": 0.3489840746842394, + "grad_norm": 0.36576756834983826, + "learning_rate": 1.855347852685099e-05, + "loss": 0.5533, + "step": 12710 + }, + { + "epoch": 0.3490115321252059, + "grad_norm": 0.35884466767311096, + "learning_rate": 1.8553254774722373e-05, + "loss": 0.5604, + "step": 12711 + }, + { + "epoch": 0.3490389895661724, + "grad_norm": 0.3826451301574707, + "learning_rate": 1.8553031006639198e-05, + "loss": 0.4887, + "step": 12712 + }, + { + "epoch": 0.34906644700713896, + "grad_norm": 0.33862385153770447, + "learning_rate": 1.8552807222601897e-05, + "loss": 0.4632, + "step": 12713 + }, + { + "epoch": 0.34909390444810545, + "grad_norm": 0.37098926305770874, + "learning_rate": 1.855258342261088e-05, + "loss": 0.5172, + "step": 12714 + }, + { + "epoch": 0.34912136188907195, + "grad_norm": 0.353633314371109, + "learning_rate": 1.8552359606666562e-05, + "loss": 0.485, + "step": 12715 + }, + { + "epoch": 0.34914881933003844, + "grad_norm": 1.1264735460281372, + "learning_rate": 1.8552135774769365e-05, + "loss": 0.5854, + "step": 12716 + }, + { + "epoch": 0.34917627677100493, + "grad_norm": 0.34871238470077515, + "learning_rate": 1.8551911926919705e-05, + "loss": 0.5267, + "step": 12717 + }, + { + "epoch": 0.34920373421197143, + "grad_norm": 0.3877100944519043, + "learning_rate": 1.8551688063118003e-05, + "loss": 0.5429, + "step": 12718 + }, + { + "epoch": 0.3492311916529379, + "grad_norm": 0.35262084007263184, + "learning_rate": 1.8551464183364668e-05, + "loss": 0.4494, + "step": 12719 + }, + { + "epoch": 0.34925864909390447, + "grad_norm": 0.4484689235687256, + "learning_rate": 1.855124028766013e-05, + "loss": 0.5421, + "step": 12720 + }, + { + "epoch": 0.34928610653487097, + "grad_norm": 0.379679799079895, + "learning_rate": 1.8551016376004796e-05, + "loss": 0.508, + "step": 12721 + }, + { + "epoch": 0.34931356397583746, + "grad_norm": 0.43114110827445984, + "learning_rate": 1.8550792448399083e-05, + "loss": 0.5378, + "step": 12722 + }, + { + "epoch": 0.34934102141680395, + "grad_norm": 0.35555553436279297, + "learning_rate": 1.855056850484342e-05, + "loss": 0.478, + "step": 12723 + }, + { + "epoch": 0.34936847885777045, + "grad_norm": 0.39386579394340515, + "learning_rate": 1.8550344545338214e-05, + "loss": 0.501, + "step": 12724 + }, + { + "epoch": 0.34939593629873694, + "grad_norm": 0.3838627338409424, + "learning_rate": 1.8550120569883887e-05, + "loss": 0.4911, + "step": 12725 + }, + { + "epoch": 0.34942339373970344, + "grad_norm": 0.36489996314048767, + "learning_rate": 1.8549896578480856e-05, + "loss": 0.4717, + "step": 12726 + }, + { + "epoch": 0.34945085118067, + "grad_norm": 0.3553563058376312, + "learning_rate": 1.854967257112954e-05, + "loss": 0.4237, + "step": 12727 + }, + { + "epoch": 0.3494783086216365, + "grad_norm": 0.3648727238178253, + "learning_rate": 1.8549448547830356e-05, + "loss": 0.5015, + "step": 12728 + }, + { + "epoch": 0.349505766062603, + "grad_norm": 0.4029054045677185, + "learning_rate": 1.8549224508583722e-05, + "loss": 0.5437, + "step": 12729 + }, + { + "epoch": 0.34953322350356947, + "grad_norm": 0.3872370421886444, + "learning_rate": 1.8549000453390053e-05, + "loss": 0.5273, + "step": 12730 + }, + { + "epoch": 0.34956068094453596, + "grad_norm": 0.4240622818470001, + "learning_rate": 1.8548776382249772e-05, + "loss": 0.4971, + "step": 12731 + }, + { + "epoch": 0.34958813838550246, + "grad_norm": 0.3131033182144165, + "learning_rate": 1.8548552295163294e-05, + "loss": 0.4216, + "step": 12732 + }, + { + "epoch": 0.34961559582646895, + "grad_norm": 0.34314465522766113, + "learning_rate": 1.8548328192131037e-05, + "loss": 0.5537, + "step": 12733 + }, + { + "epoch": 0.3496430532674355, + "grad_norm": 0.35740411281585693, + "learning_rate": 1.854810407315342e-05, + "loss": 0.4894, + "step": 12734 + }, + { + "epoch": 0.349670510708402, + "grad_norm": 0.3857709765434265, + "learning_rate": 1.8547879938230862e-05, + "loss": 0.5537, + "step": 12735 + }, + { + "epoch": 0.3496979681493685, + "grad_norm": 0.35877251625061035, + "learning_rate": 1.8547655787363778e-05, + "loss": 0.5082, + "step": 12736 + }, + { + "epoch": 0.349725425590335, + "grad_norm": 0.3999338746070862, + "learning_rate": 1.8547431620552586e-05, + "loss": 0.593, + "step": 12737 + }, + { + "epoch": 0.3497528830313015, + "grad_norm": 0.352750688791275, + "learning_rate": 1.8547207437797713e-05, + "loss": 0.506, + "step": 12738 + }, + { + "epoch": 0.34978034047226797, + "grad_norm": 0.3492289185523987, + "learning_rate": 1.8546983239099563e-05, + "loss": 0.5164, + "step": 12739 + }, + { + "epoch": 0.34980779791323446, + "grad_norm": 0.3752346336841583, + "learning_rate": 1.8546759024458565e-05, + "loss": 0.4911, + "step": 12740 + }, + { + "epoch": 0.349835255354201, + "grad_norm": 0.40652841329574585, + "learning_rate": 1.8546534793875134e-05, + "loss": 0.5714, + "step": 12741 + }, + { + "epoch": 0.3498627127951675, + "grad_norm": 0.31728947162628174, + "learning_rate": 1.8546310547349685e-05, + "loss": 0.5137, + "step": 12742 + }, + { + "epoch": 0.349890170236134, + "grad_norm": 0.3243911564350128, + "learning_rate": 1.8546086284882642e-05, + "loss": 0.5372, + "step": 12743 + }, + { + "epoch": 0.3499176276771005, + "grad_norm": 0.33510416746139526, + "learning_rate": 1.854586200647442e-05, + "loss": 0.4561, + "step": 12744 + }, + { + "epoch": 0.349945085118067, + "grad_norm": 0.3367617428302765, + "learning_rate": 1.8545637712125436e-05, + "loss": 0.4979, + "step": 12745 + }, + { + "epoch": 0.3499725425590335, + "grad_norm": 0.34926047921180725, + "learning_rate": 1.854541340183611e-05, + "loss": 0.4727, + "step": 12746 + }, + { + "epoch": 0.35, + "grad_norm": 0.4349781274795532, + "learning_rate": 1.8545189075606865e-05, + "loss": 0.5567, + "step": 12747 + }, + { + "epoch": 0.3500274574409665, + "grad_norm": 0.335662305355072, + "learning_rate": 1.8544964733438112e-05, + "loss": 0.4645, + "step": 12748 + }, + { + "epoch": 0.350054914881933, + "grad_norm": 0.6128681302070618, + "learning_rate": 1.8544740375330272e-05, + "loss": 0.4064, + "step": 12749 + }, + { + "epoch": 0.3500823723228995, + "grad_norm": 0.4202558696269989, + "learning_rate": 1.8544516001283762e-05, + "loss": 0.5582, + "step": 12750 + }, + { + "epoch": 0.350109829763866, + "grad_norm": 0.33661505579948425, + "learning_rate": 1.854429161129901e-05, + "loss": 0.5238, + "step": 12751 + }, + { + "epoch": 0.3501372872048325, + "grad_norm": 0.32788875699043274, + "learning_rate": 1.8544067205376417e-05, + "loss": 0.4553, + "step": 12752 + }, + { + "epoch": 0.350164744645799, + "grad_norm": 0.42758116126060486, + "learning_rate": 1.8543842783516418e-05, + "loss": 0.4639, + "step": 12753 + }, + { + "epoch": 0.3501922020867655, + "grad_norm": 0.3572043180465698, + "learning_rate": 1.8543618345719424e-05, + "loss": 0.5102, + "step": 12754 + }, + { + "epoch": 0.35021965952773204, + "grad_norm": 0.3937124013900757, + "learning_rate": 1.8543393891985853e-05, + "loss": 0.5021, + "step": 12755 + }, + { + "epoch": 0.35024711696869854, + "grad_norm": 0.4226818084716797, + "learning_rate": 1.854316942231613e-05, + "loss": 0.458, + "step": 12756 + }, + { + "epoch": 0.35027457440966503, + "grad_norm": 0.36984461545944214, + "learning_rate": 1.854294493671067e-05, + "loss": 0.507, + "step": 12757 + }, + { + "epoch": 0.3503020318506315, + "grad_norm": 0.3662862479686737, + "learning_rate": 1.8542720435169888e-05, + "loss": 0.5671, + "step": 12758 + }, + { + "epoch": 0.350329489291598, + "grad_norm": 0.4066954553127289, + "learning_rate": 1.8542495917694205e-05, + "loss": 0.5575, + "step": 12759 + }, + { + "epoch": 0.3503569467325645, + "grad_norm": 0.3352821171283722, + "learning_rate": 1.8542271384284043e-05, + "loss": 0.5312, + "step": 12760 + }, + { + "epoch": 0.350384404173531, + "grad_norm": 0.3678243160247803, + "learning_rate": 1.8542046834939816e-05, + "loss": 0.5047, + "step": 12761 + }, + { + "epoch": 0.35041186161449756, + "grad_norm": 0.3693774342536926, + "learning_rate": 1.8541822269661947e-05, + "loss": 0.5954, + "step": 12762 + }, + { + "epoch": 0.35043931905546405, + "grad_norm": 0.4238046407699585, + "learning_rate": 1.8541597688450853e-05, + "loss": 0.5149, + "step": 12763 + }, + { + "epoch": 0.35046677649643054, + "grad_norm": 0.3897447884082794, + "learning_rate": 1.8541373091306955e-05, + "loss": 0.5516, + "step": 12764 + }, + { + "epoch": 0.35049423393739704, + "grad_norm": 0.35969260334968567, + "learning_rate": 1.8541148478230668e-05, + "loss": 0.4635, + "step": 12765 + }, + { + "epoch": 0.35052169137836353, + "grad_norm": 0.3547324538230896, + "learning_rate": 1.8540923849222412e-05, + "loss": 0.4954, + "step": 12766 + }, + { + "epoch": 0.35054914881933, + "grad_norm": 0.3481164574623108, + "learning_rate": 1.854069920428261e-05, + "loss": 0.4254, + "step": 12767 + }, + { + "epoch": 0.3505766062602965, + "grad_norm": 0.35063424706459045, + "learning_rate": 1.8540474543411675e-05, + "loss": 0.5396, + "step": 12768 + }, + { + "epoch": 0.35060406370126307, + "grad_norm": 0.3863530158996582, + "learning_rate": 1.8540249866610033e-05, + "loss": 0.5965, + "step": 12769 + }, + { + "epoch": 0.35063152114222956, + "grad_norm": 0.3833959102630615, + "learning_rate": 1.8540025173878097e-05, + "loss": 0.5034, + "step": 12770 + }, + { + "epoch": 0.35065897858319606, + "grad_norm": 0.34697434306144714, + "learning_rate": 1.8539800465216287e-05, + "loss": 0.539, + "step": 12771 + }, + { + "epoch": 0.35068643602416255, + "grad_norm": 0.3505365550518036, + "learning_rate": 1.8539575740625026e-05, + "loss": 0.4604, + "step": 12772 + }, + { + "epoch": 0.35071389346512905, + "grad_norm": 0.3836917579174042, + "learning_rate": 1.8539351000104727e-05, + "loss": 0.5126, + "step": 12773 + }, + { + "epoch": 0.35074135090609554, + "grad_norm": 0.35812002420425415, + "learning_rate": 1.8539126243655815e-05, + "loss": 0.5647, + "step": 12774 + }, + { + "epoch": 0.35076880834706203, + "grad_norm": 0.5677432417869568, + "learning_rate": 1.853890147127871e-05, + "loss": 0.4971, + "step": 12775 + }, + { + "epoch": 0.3507962657880286, + "grad_norm": 0.3886649012565613, + "learning_rate": 1.8538676682973826e-05, + "loss": 0.4616, + "step": 12776 + }, + { + "epoch": 0.3508237232289951, + "grad_norm": 0.38589757680892944, + "learning_rate": 1.8538451878741583e-05, + "loss": 0.5971, + "step": 12777 + }, + { + "epoch": 0.35085118066996157, + "grad_norm": 0.41209521889686584, + "learning_rate": 1.8538227058582407e-05, + "loss": 0.531, + "step": 12778 + }, + { + "epoch": 0.35087863811092806, + "grad_norm": 0.315022349357605, + "learning_rate": 1.8538002222496707e-05, + "loss": 0.4432, + "step": 12779 + }, + { + "epoch": 0.35090609555189456, + "grad_norm": 0.3819486200809479, + "learning_rate": 1.853777737048491e-05, + "loss": 0.5419, + "step": 12780 + }, + { + "epoch": 0.35093355299286105, + "grad_norm": 0.5199286937713623, + "learning_rate": 1.8537552502547435e-05, + "loss": 0.5752, + "step": 12781 + }, + { + "epoch": 0.35096101043382755, + "grad_norm": 0.35057711601257324, + "learning_rate": 1.8537327618684696e-05, + "loss": 0.5092, + "step": 12782 + }, + { + "epoch": 0.3509884678747941, + "grad_norm": 0.4272479712963104, + "learning_rate": 1.853710271889712e-05, + "loss": 0.515, + "step": 12783 + }, + { + "epoch": 0.3510159253157606, + "grad_norm": 0.3476881682872772, + "learning_rate": 1.853687780318512e-05, + "loss": 0.4821, + "step": 12784 + }, + { + "epoch": 0.3510433827567271, + "grad_norm": 0.3694227933883667, + "learning_rate": 1.853665287154912e-05, + "loss": 0.5293, + "step": 12785 + }, + { + "epoch": 0.3510708401976936, + "grad_norm": 0.40199264883995056, + "learning_rate": 1.853642792398954e-05, + "loss": 0.5242, + "step": 12786 + }, + { + "epoch": 0.3510982976386601, + "grad_norm": 0.36939477920532227, + "learning_rate": 1.8536202960506793e-05, + "loss": 0.4786, + "step": 12787 + }, + { + "epoch": 0.35112575507962657, + "grad_norm": 0.4210872948169708, + "learning_rate": 1.8535977981101306e-05, + "loss": 0.5389, + "step": 12788 + }, + { + "epoch": 0.35115321252059306, + "grad_norm": 0.4220708906650543, + "learning_rate": 1.8535752985773493e-05, + "loss": 0.558, + "step": 12789 + }, + { + "epoch": 0.35118066996155956, + "grad_norm": 0.39541953802108765, + "learning_rate": 1.8535527974523775e-05, + "loss": 0.5173, + "step": 12790 + }, + { + "epoch": 0.3512081274025261, + "grad_norm": 0.4044537842273712, + "learning_rate": 1.853530294735258e-05, + "loss": 0.5359, + "step": 12791 + }, + { + "epoch": 0.3512355848434926, + "grad_norm": 0.3896861970424652, + "learning_rate": 1.8535077904260315e-05, + "loss": 0.5585, + "step": 12792 + }, + { + "epoch": 0.3512630422844591, + "grad_norm": 0.3692465126514435, + "learning_rate": 1.8534852845247408e-05, + "loss": 0.5377, + "step": 12793 + }, + { + "epoch": 0.3512904997254256, + "grad_norm": 0.3557482957839966, + "learning_rate": 1.8534627770314277e-05, + "loss": 0.5238, + "step": 12794 + }, + { + "epoch": 0.3513179571663921, + "grad_norm": 0.36346128582954407, + "learning_rate": 1.8534402679461338e-05, + "loss": 0.4571, + "step": 12795 + }, + { + "epoch": 0.3513454146073586, + "grad_norm": 0.3818471431732178, + "learning_rate": 1.8534177572689018e-05, + "loss": 0.5025, + "step": 12796 + }, + { + "epoch": 0.35137287204832507, + "grad_norm": 0.38022857904434204, + "learning_rate": 1.853395244999773e-05, + "loss": 0.5901, + "step": 12797 + }, + { + "epoch": 0.3514003294892916, + "grad_norm": 0.4028988480567932, + "learning_rate": 1.8533727311387898e-05, + "loss": 0.6092, + "step": 12798 + }, + { + "epoch": 0.3514277869302581, + "grad_norm": 0.43498778343200684, + "learning_rate": 1.853350215685994e-05, + "loss": 0.5886, + "step": 12799 + }, + { + "epoch": 0.3514552443712246, + "grad_norm": 0.375965416431427, + "learning_rate": 1.853327698641428e-05, + "loss": 0.506, + "step": 12800 + }, + { + "epoch": 0.3514827018121911, + "grad_norm": 0.34415990114212036, + "learning_rate": 1.8533051800051333e-05, + "loss": 0.495, + "step": 12801 + }, + { + "epoch": 0.3515101592531576, + "grad_norm": 0.3847366273403168, + "learning_rate": 1.853282659777152e-05, + "loss": 0.516, + "step": 12802 + }, + { + "epoch": 0.3515376166941241, + "grad_norm": 0.6048359870910645, + "learning_rate": 1.8532601379575263e-05, + "loss": 0.5826, + "step": 12803 + }, + { + "epoch": 0.3515650741350906, + "grad_norm": 0.40873315930366516, + "learning_rate": 1.853237614546298e-05, + "loss": 0.5244, + "step": 12804 + }, + { + "epoch": 0.35159253157605713, + "grad_norm": 0.4180256128311157, + "learning_rate": 1.8532150895435095e-05, + "loss": 0.4362, + "step": 12805 + }, + { + "epoch": 0.3516199890170236, + "grad_norm": 0.347024530172348, + "learning_rate": 1.8531925629492026e-05, + "loss": 0.5762, + "step": 12806 + }, + { + "epoch": 0.3516474464579901, + "grad_norm": 0.3762400150299072, + "learning_rate": 1.853170034763419e-05, + "loss": 0.5328, + "step": 12807 + }, + { + "epoch": 0.3516749038989566, + "grad_norm": 0.3446353077888489, + "learning_rate": 1.853147504986201e-05, + "loss": 0.4289, + "step": 12808 + }, + { + "epoch": 0.3517023613399231, + "grad_norm": 0.40376850962638855, + "learning_rate": 1.8531249736175905e-05, + "loss": 0.6095, + "step": 12809 + }, + { + "epoch": 0.3517298187808896, + "grad_norm": 0.3772500455379486, + "learning_rate": 1.8531024406576297e-05, + "loss": 0.5399, + "step": 12810 + }, + { + "epoch": 0.3517572762218561, + "grad_norm": 0.3339478671550751, + "learning_rate": 1.8530799061063607e-05, + "loss": 0.4992, + "step": 12811 + }, + { + "epoch": 0.35178473366282265, + "grad_norm": 0.36901745200157166, + "learning_rate": 1.853057369963825e-05, + "loss": 0.4759, + "step": 12812 + }, + { + "epoch": 0.35181219110378914, + "grad_norm": 0.3863079249858856, + "learning_rate": 1.8530348322300657e-05, + "loss": 0.4362, + "step": 12813 + }, + { + "epoch": 0.35183964854475563, + "grad_norm": 0.3718009889125824, + "learning_rate": 1.853012292905124e-05, + "loss": 0.5582, + "step": 12814 + }, + { + "epoch": 0.35186710598572213, + "grad_norm": 0.3525647521018982, + "learning_rate": 1.8529897519890417e-05, + "loss": 0.5195, + "step": 12815 + }, + { + "epoch": 0.3518945634266886, + "grad_norm": 0.40134939551353455, + "learning_rate": 1.852967209481862e-05, + "loss": 0.549, + "step": 12816 + }, + { + "epoch": 0.3519220208676551, + "grad_norm": 0.43578633666038513, + "learning_rate": 1.8529446653836257e-05, + "loss": 0.5981, + "step": 12817 + }, + { + "epoch": 0.3519494783086216, + "grad_norm": 0.3593917191028595, + "learning_rate": 1.8529221196943755e-05, + "loss": 0.5465, + "step": 12818 + }, + { + "epoch": 0.35197693574958816, + "grad_norm": 0.35442444682121277, + "learning_rate": 1.852899572414153e-05, + "loss": 0.5559, + "step": 12819 + }, + { + "epoch": 0.35200439319055465, + "grad_norm": 0.3310231864452362, + "learning_rate": 1.8528770235430008e-05, + "loss": 0.4823, + "step": 12820 + }, + { + "epoch": 0.35203185063152115, + "grad_norm": 0.38789474964141846, + "learning_rate": 1.852854473080961e-05, + "loss": 0.4664, + "step": 12821 + }, + { + "epoch": 0.35205930807248764, + "grad_norm": 0.38769039511680603, + "learning_rate": 1.852831921028075e-05, + "loss": 0.6271, + "step": 12822 + }, + { + "epoch": 0.35208676551345414, + "grad_norm": 0.3682776093482971, + "learning_rate": 1.8528093673843855e-05, + "loss": 0.5802, + "step": 12823 + }, + { + "epoch": 0.35211422295442063, + "grad_norm": 0.3962457478046417, + "learning_rate": 1.8527868121499343e-05, + "loss": 0.4792, + "step": 12824 + }, + { + "epoch": 0.3521416803953871, + "grad_norm": 0.38238704204559326, + "learning_rate": 1.8527642553247637e-05, + "loss": 0.4732, + "step": 12825 + }, + { + "epoch": 0.3521691378363537, + "grad_norm": 0.4394192099571228, + "learning_rate": 1.8527416969089153e-05, + "loss": 0.5149, + "step": 12826 + }, + { + "epoch": 0.35219659527732017, + "grad_norm": 0.39303654432296753, + "learning_rate": 1.8527191369024316e-05, + "loss": 0.4709, + "step": 12827 + }, + { + "epoch": 0.35222405271828666, + "grad_norm": 0.3971758782863617, + "learning_rate": 1.8526965753053546e-05, + "loss": 0.4593, + "step": 12828 + }, + { + "epoch": 0.35225151015925316, + "grad_norm": 0.5130875110626221, + "learning_rate": 1.8526740121177265e-05, + "loss": 0.5363, + "step": 12829 + }, + { + "epoch": 0.35227896760021965, + "grad_norm": 0.33994677662849426, + "learning_rate": 1.852651447339589e-05, + "loss": 0.5003, + "step": 12830 + }, + { + "epoch": 0.35230642504118614, + "grad_norm": 0.32734236121177673, + "learning_rate": 1.8526288809709846e-05, + "loss": 0.5521, + "step": 12831 + }, + { + "epoch": 0.35233388248215264, + "grad_norm": 0.4052627682685852, + "learning_rate": 1.852606313011955e-05, + "loss": 0.5121, + "step": 12832 + }, + { + "epoch": 0.3523613399231192, + "grad_norm": 0.40140101313591003, + "learning_rate": 1.8525837434625425e-05, + "loss": 0.535, + "step": 12833 + }, + { + "epoch": 0.3523887973640857, + "grad_norm": 0.3887263536453247, + "learning_rate": 1.8525611723227892e-05, + "loss": 0.486, + "step": 12834 + }, + { + "epoch": 0.3524162548050522, + "grad_norm": 0.45914196968078613, + "learning_rate": 1.8525385995927373e-05, + "loss": 0.5076, + "step": 12835 + }, + { + "epoch": 0.35244371224601867, + "grad_norm": 0.3840571939945221, + "learning_rate": 1.8525160252724288e-05, + "loss": 0.4862, + "step": 12836 + }, + { + "epoch": 0.35247116968698516, + "grad_norm": 0.3851255178451538, + "learning_rate": 1.8524934493619054e-05, + "loss": 0.5704, + "step": 12837 + }, + { + "epoch": 0.35249862712795166, + "grad_norm": 0.34848812222480774, + "learning_rate": 1.8524708718612104e-05, + "loss": 0.4727, + "step": 12838 + }, + { + "epoch": 0.35252608456891815, + "grad_norm": 0.4177800118923187, + "learning_rate": 1.852448292770385e-05, + "loss": 0.517, + "step": 12839 + }, + { + "epoch": 0.3525535420098847, + "grad_norm": 0.41203561425209045, + "learning_rate": 1.852425712089471e-05, + "loss": 0.5718, + "step": 12840 + }, + { + "epoch": 0.3525809994508512, + "grad_norm": 0.37807831168174744, + "learning_rate": 1.8524031298185114e-05, + "loss": 0.573, + "step": 12841 + }, + { + "epoch": 0.3526084568918177, + "grad_norm": 0.4216982424259186, + "learning_rate": 1.8523805459575473e-05, + "loss": 0.5767, + "step": 12842 + }, + { + "epoch": 0.3526359143327842, + "grad_norm": 0.379564493894577, + "learning_rate": 1.8523579605066218e-05, + "loss": 0.5847, + "step": 12843 + }, + { + "epoch": 0.3526633717737507, + "grad_norm": 0.38498130440711975, + "learning_rate": 1.852335373465777e-05, + "loss": 0.4862, + "step": 12844 + }, + { + "epoch": 0.35269082921471717, + "grad_norm": 0.32204508781433105, + "learning_rate": 1.8523127848350544e-05, + "loss": 0.4844, + "step": 12845 + }, + { + "epoch": 0.35271828665568367, + "grad_norm": 0.3881717920303345, + "learning_rate": 1.8522901946144968e-05, + "loss": 0.6119, + "step": 12846 + }, + { + "epoch": 0.3527457440966502, + "grad_norm": 0.3525262176990509, + "learning_rate": 1.852267602804145e-05, + "loss": 0.5149, + "step": 12847 + }, + { + "epoch": 0.3527732015376167, + "grad_norm": 0.3706952631473541, + "learning_rate": 1.852245009404043e-05, + "loss": 0.4492, + "step": 12848 + }, + { + "epoch": 0.3528006589785832, + "grad_norm": 0.386439710855484, + "learning_rate": 1.8522224144142317e-05, + "loss": 0.4742, + "step": 12849 + }, + { + "epoch": 0.3528281164195497, + "grad_norm": 0.35717129707336426, + "learning_rate": 1.852199817834754e-05, + "loss": 0.4295, + "step": 12850 + }, + { + "epoch": 0.3528555738605162, + "grad_norm": 0.37986671924591064, + "learning_rate": 1.8521772196656513e-05, + "loss": 0.5157, + "step": 12851 + }, + { + "epoch": 0.3528830313014827, + "grad_norm": 0.4034775495529175, + "learning_rate": 1.852154619906966e-05, + "loss": 0.532, + "step": 12852 + }, + { + "epoch": 0.3529104887424492, + "grad_norm": 0.4948137700557709, + "learning_rate": 1.8521320185587405e-05, + "loss": 0.6043, + "step": 12853 + }, + { + "epoch": 0.35293794618341573, + "grad_norm": 0.3808092474937439, + "learning_rate": 1.852109415621017e-05, + "loss": 0.6346, + "step": 12854 + }, + { + "epoch": 0.3529654036243822, + "grad_norm": 0.3583548963069916, + "learning_rate": 1.852086811093837e-05, + "loss": 0.4636, + "step": 12855 + }, + { + "epoch": 0.3529928610653487, + "grad_norm": 0.37770089507102966, + "learning_rate": 1.8520642049772437e-05, + "loss": 0.4505, + "step": 12856 + }, + { + "epoch": 0.3530203185063152, + "grad_norm": 0.3642883598804474, + "learning_rate": 1.8520415972712783e-05, + "loss": 0.5099, + "step": 12857 + }, + { + "epoch": 0.3530477759472817, + "grad_norm": 0.34953078627586365, + "learning_rate": 1.8520189879759836e-05, + "loss": 0.4943, + "step": 12858 + }, + { + "epoch": 0.3530752333882482, + "grad_norm": 0.3851338028907776, + "learning_rate": 1.8519963770914014e-05, + "loss": 0.5536, + "step": 12859 + }, + { + "epoch": 0.3531026908292147, + "grad_norm": 0.41256478428840637, + "learning_rate": 1.8519737646175743e-05, + "loss": 0.564, + "step": 12860 + }, + { + "epoch": 0.35313014827018124, + "grad_norm": 0.4466272294521332, + "learning_rate": 1.851951150554544e-05, + "loss": 0.5406, + "step": 12861 + }, + { + "epoch": 0.35315760571114774, + "grad_norm": 0.3466949462890625, + "learning_rate": 1.8519285349023527e-05, + "loss": 0.5324, + "step": 12862 + }, + { + "epoch": 0.35318506315211423, + "grad_norm": 0.40668949484825134, + "learning_rate": 1.8519059176610433e-05, + "loss": 0.4992, + "step": 12863 + }, + { + "epoch": 0.3532125205930807, + "grad_norm": 0.35628795623779297, + "learning_rate": 1.851883298830657e-05, + "loss": 0.502, + "step": 12864 + }, + { + "epoch": 0.3532399780340472, + "grad_norm": 0.37629130482673645, + "learning_rate": 1.8518606784112367e-05, + "loss": 0.4996, + "step": 12865 + }, + { + "epoch": 0.3532674354750137, + "grad_norm": 0.3676777184009552, + "learning_rate": 1.8518380564028242e-05, + "loss": 0.4634, + "step": 12866 + }, + { + "epoch": 0.3532948929159802, + "grad_norm": 0.4142403304576874, + "learning_rate": 1.851815432805462e-05, + "loss": 0.5415, + "step": 12867 + }, + { + "epoch": 0.35332235035694676, + "grad_norm": 0.33964458107948303, + "learning_rate": 1.8517928076191922e-05, + "loss": 0.5115, + "step": 12868 + }, + { + "epoch": 0.35334980779791325, + "grad_norm": 0.41167959570884705, + "learning_rate": 1.8517701808440568e-05, + "loss": 0.5489, + "step": 12869 + }, + { + "epoch": 0.35337726523887975, + "grad_norm": 0.3361583948135376, + "learning_rate": 1.8517475524800982e-05, + "loss": 0.5227, + "step": 12870 + }, + { + "epoch": 0.35340472267984624, + "grad_norm": 0.37269118428230286, + "learning_rate": 1.8517249225273583e-05, + "loss": 0.5123, + "step": 12871 + }, + { + "epoch": 0.35343218012081273, + "grad_norm": 0.36148127913475037, + "learning_rate": 1.85170229098588e-05, + "loss": 0.5627, + "step": 12872 + }, + { + "epoch": 0.3534596375617792, + "grad_norm": 0.33478206396102905, + "learning_rate": 1.851679657855705e-05, + "loss": 0.4503, + "step": 12873 + }, + { + "epoch": 0.3534870950027457, + "grad_norm": 0.34238138794898987, + "learning_rate": 1.8516570231368757e-05, + "loss": 0.54, + "step": 12874 + }, + { + "epoch": 0.35351455244371227, + "grad_norm": 0.387583464384079, + "learning_rate": 1.8516343868294337e-05, + "loss": 0.5691, + "step": 12875 + }, + { + "epoch": 0.35354200988467877, + "grad_norm": 0.43100106716156006, + "learning_rate": 1.8516117489334224e-05, + "loss": 0.4988, + "step": 12876 + }, + { + "epoch": 0.35356946732564526, + "grad_norm": 0.36195090413093567, + "learning_rate": 1.851589109448883e-05, + "loss": 0.5612, + "step": 12877 + }, + { + "epoch": 0.35359692476661175, + "grad_norm": 0.3720771074295044, + "learning_rate": 1.8515664683758583e-05, + "loss": 0.4335, + "step": 12878 + }, + { + "epoch": 0.35362438220757825, + "grad_norm": 0.4265027642250061, + "learning_rate": 1.85154382571439e-05, + "loss": 0.5071, + "step": 12879 + }, + { + "epoch": 0.35365183964854474, + "grad_norm": 0.37814003229141235, + "learning_rate": 1.851521181464521e-05, + "loss": 0.5772, + "step": 12880 + }, + { + "epoch": 0.35367929708951124, + "grad_norm": 0.34996068477630615, + "learning_rate": 1.8514985356262932e-05, + "loss": 0.5582, + "step": 12881 + }, + { + "epoch": 0.3537067545304778, + "grad_norm": 0.4252006709575653, + "learning_rate": 1.851475888199749e-05, + "loss": 0.616, + "step": 12882 + }, + { + "epoch": 0.3537342119714443, + "grad_norm": 0.3613317012786865, + "learning_rate": 1.8514532391849304e-05, + "loss": 0.4737, + "step": 12883 + }, + { + "epoch": 0.3537616694124108, + "grad_norm": 0.3640919029712677, + "learning_rate": 1.8514305885818796e-05, + "loss": 0.5329, + "step": 12884 + }, + { + "epoch": 0.35378912685337727, + "grad_norm": 0.31983834505081177, + "learning_rate": 1.8514079363906393e-05, + "loss": 0.4673, + "step": 12885 + }, + { + "epoch": 0.35381658429434376, + "grad_norm": 0.3426242172718048, + "learning_rate": 1.8513852826112512e-05, + "loss": 0.4935, + "step": 12886 + }, + { + "epoch": 0.35384404173531026, + "grad_norm": 0.3996743857860565, + "learning_rate": 1.851362627243758e-05, + "loss": 0.5476, + "step": 12887 + }, + { + "epoch": 0.35387149917627675, + "grad_norm": 0.3796069324016571, + "learning_rate": 1.8513399702882016e-05, + "loss": 0.4978, + "step": 12888 + }, + { + "epoch": 0.3538989566172433, + "grad_norm": 0.3981380760669708, + "learning_rate": 1.8513173117446245e-05, + "loss": 0.5627, + "step": 12889 + }, + { + "epoch": 0.3539264140582098, + "grad_norm": 0.3749481439590454, + "learning_rate": 1.851294651613069e-05, + "loss": 0.5093, + "step": 12890 + }, + { + "epoch": 0.3539538714991763, + "grad_norm": 0.3840797543525696, + "learning_rate": 1.851271989893577e-05, + "loss": 0.5261, + "step": 12891 + }, + { + "epoch": 0.3539813289401428, + "grad_norm": 0.3574707806110382, + "learning_rate": 1.8512493265861914e-05, + "loss": 0.4366, + "step": 12892 + }, + { + "epoch": 0.3540087863811093, + "grad_norm": 0.3929075002670288, + "learning_rate": 1.851226661690954e-05, + "loss": 0.6172, + "step": 12893 + }, + { + "epoch": 0.35403624382207577, + "grad_norm": 0.3466016352176666, + "learning_rate": 1.851203995207907e-05, + "loss": 0.5613, + "step": 12894 + }, + { + "epoch": 0.35406370126304226, + "grad_norm": 0.3712824285030365, + "learning_rate": 1.8511813271370932e-05, + "loss": 0.5098, + "step": 12895 + }, + { + "epoch": 0.3540911587040088, + "grad_norm": 0.39508551359176636, + "learning_rate": 1.8511586574785542e-05, + "loss": 0.5356, + "step": 12896 + }, + { + "epoch": 0.3541186161449753, + "grad_norm": 0.3669546842575073, + "learning_rate": 1.851135986232333e-05, + "loss": 0.473, + "step": 12897 + }, + { + "epoch": 0.3541460735859418, + "grad_norm": 0.36558735370635986, + "learning_rate": 1.8511133133984712e-05, + "loss": 0.4587, + "step": 12898 + }, + { + "epoch": 0.3541735310269083, + "grad_norm": 0.6372311115264893, + "learning_rate": 1.8510906389770118e-05, + "loss": 0.4192, + "step": 12899 + }, + { + "epoch": 0.3542009884678748, + "grad_norm": 0.38712602853775024, + "learning_rate": 1.8510679629679968e-05, + "loss": 0.5134, + "step": 12900 + }, + { + "epoch": 0.3542284459088413, + "grad_norm": 0.37107235193252563, + "learning_rate": 1.851045285371468e-05, + "loss": 0.5634, + "step": 12901 + }, + { + "epoch": 0.3542559033498078, + "grad_norm": 0.3554193377494812, + "learning_rate": 1.851022606187468e-05, + "loss": 0.498, + "step": 12902 + }, + { + "epoch": 0.3542833607907743, + "grad_norm": 0.3444443345069885, + "learning_rate": 1.8509999254160396e-05, + "loss": 0.5057, + "step": 12903 + }, + { + "epoch": 0.3543108182317408, + "grad_norm": 0.3791157305240631, + "learning_rate": 1.8509772430572245e-05, + "loss": 0.5177, + "step": 12904 + }, + { + "epoch": 0.3543382756727073, + "grad_norm": 0.4048106074333191, + "learning_rate": 1.8509545591110652e-05, + "loss": 0.6075, + "step": 12905 + }, + { + "epoch": 0.3543657331136738, + "grad_norm": 0.3326372504234314, + "learning_rate": 1.8509318735776046e-05, + "loss": 0.4806, + "step": 12906 + }, + { + "epoch": 0.3543931905546403, + "grad_norm": 0.3435409665107727, + "learning_rate": 1.850909186456884e-05, + "loss": 0.523, + "step": 12907 + }, + { + "epoch": 0.3544206479956068, + "grad_norm": 0.378873735666275, + "learning_rate": 1.850886497748946e-05, + "loss": 0.578, + "step": 12908 + }, + { + "epoch": 0.3544481054365733, + "grad_norm": 0.39823320508003235, + "learning_rate": 1.8508638074538335e-05, + "loss": 0.5833, + "step": 12909 + }, + { + "epoch": 0.35447556287753984, + "grad_norm": 0.3670293688774109, + "learning_rate": 1.8508411155715885e-05, + "loss": 0.5578, + "step": 12910 + }, + { + "epoch": 0.35450302031850633, + "grad_norm": 0.3475448489189148, + "learning_rate": 1.850818422102253e-05, + "loss": 0.5059, + "step": 12911 + }, + { + "epoch": 0.35453047775947283, + "grad_norm": 0.33383315801620483, + "learning_rate": 1.8507957270458696e-05, + "loss": 0.5205, + "step": 12912 + }, + { + "epoch": 0.3545579352004393, + "grad_norm": 0.3599684238433838, + "learning_rate": 1.850773030402481e-05, + "loss": 0.5265, + "step": 12913 + }, + { + "epoch": 0.3545853926414058, + "grad_norm": 0.3549947738647461, + "learning_rate": 1.8507503321721287e-05, + "loss": 0.5596, + "step": 12914 + }, + { + "epoch": 0.3546128500823723, + "grad_norm": 0.34308668971061707, + "learning_rate": 1.8507276323548557e-05, + "loss": 0.5148, + "step": 12915 + }, + { + "epoch": 0.3546403075233388, + "grad_norm": 0.3525458574295044, + "learning_rate": 1.850704930950704e-05, + "loss": 0.4756, + "step": 12916 + }, + { + "epoch": 0.35466776496430535, + "grad_norm": 0.3469778597354889, + "learning_rate": 1.850682227959716e-05, + "loss": 0.531, + "step": 12917 + }, + { + "epoch": 0.35469522240527185, + "grad_norm": 0.3996882736682892, + "learning_rate": 1.8506595233819345e-05, + "loss": 0.5354, + "step": 12918 + }, + { + "epoch": 0.35472267984623834, + "grad_norm": 0.4604485034942627, + "learning_rate": 1.8506368172174014e-05, + "loss": 0.586, + "step": 12919 + }, + { + "epoch": 0.35475013728720484, + "grad_norm": 0.36522454023361206, + "learning_rate": 1.850614109466159e-05, + "loss": 0.5225, + "step": 12920 + }, + { + "epoch": 0.35477759472817133, + "grad_norm": 0.3789752125740051, + "learning_rate": 1.85059140012825e-05, + "loss": 0.5469, + "step": 12921 + }, + { + "epoch": 0.3548050521691378, + "grad_norm": 0.38411080837249756, + "learning_rate": 1.8505686892037166e-05, + "loss": 0.5517, + "step": 12922 + }, + { + "epoch": 0.3548325096101043, + "grad_norm": 0.3515183925628662, + "learning_rate": 1.8505459766926004e-05, + "loss": 0.5019, + "step": 12923 + }, + { + "epoch": 0.3548599670510708, + "grad_norm": 0.4095139801502228, + "learning_rate": 1.8505232625949455e-05, + "loss": 0.5275, + "step": 12924 + }, + { + "epoch": 0.35488742449203736, + "grad_norm": 0.3980804681777954, + "learning_rate": 1.8505005469107927e-05, + "loss": 0.5866, + "step": 12925 + }, + { + "epoch": 0.35491488193300386, + "grad_norm": 0.3363350033760071, + "learning_rate": 1.8504778296401852e-05, + "loss": 0.4267, + "step": 12926 + }, + { + "epoch": 0.35494233937397035, + "grad_norm": 0.3788478374481201, + "learning_rate": 1.8504551107831646e-05, + "loss": 0.5497, + "step": 12927 + }, + { + "epoch": 0.35496979681493684, + "grad_norm": 0.39669445157051086, + "learning_rate": 1.8504323903397743e-05, + "loss": 0.5068, + "step": 12928 + }, + { + "epoch": 0.35499725425590334, + "grad_norm": 0.3539443612098694, + "learning_rate": 1.850409668310056e-05, + "loss": 0.536, + "step": 12929 + }, + { + "epoch": 0.35502471169686983, + "grad_norm": 0.35917991399765015, + "learning_rate": 1.8503869446940522e-05, + "loss": 0.468, + "step": 12930 + }, + { + "epoch": 0.3550521691378363, + "grad_norm": 0.37359488010406494, + "learning_rate": 1.8503642194918055e-05, + "loss": 0.5642, + "step": 12931 + }, + { + "epoch": 0.3550796265788029, + "grad_norm": 0.4151769280433655, + "learning_rate": 1.850341492703358e-05, + "loss": 0.6652, + "step": 12932 + }, + { + "epoch": 0.35510708401976937, + "grad_norm": 0.37705880403518677, + "learning_rate": 1.850318764328752e-05, + "loss": 0.5299, + "step": 12933 + }, + { + "epoch": 0.35513454146073586, + "grad_norm": 0.3238154649734497, + "learning_rate": 1.8502960343680304e-05, + "loss": 0.5032, + "step": 12934 + }, + { + "epoch": 0.35516199890170236, + "grad_norm": 0.3359978497028351, + "learning_rate": 1.8502733028212355e-05, + "loss": 0.488, + "step": 12935 + }, + { + "epoch": 0.35518945634266885, + "grad_norm": 0.35553961992263794, + "learning_rate": 1.850250569688409e-05, + "loss": 0.506, + "step": 12936 + }, + { + "epoch": 0.35521691378363535, + "grad_norm": 0.5508958697319031, + "learning_rate": 1.8502278349695943e-05, + "loss": 0.5668, + "step": 12937 + }, + { + "epoch": 0.35524437122460184, + "grad_norm": 0.38974401354789734, + "learning_rate": 1.850205098664833e-05, + "loss": 0.4646, + "step": 12938 + }, + { + "epoch": 0.3552718286655684, + "grad_norm": 0.3833053708076477, + "learning_rate": 1.850182360774168e-05, + "loss": 0.5745, + "step": 12939 + }, + { + "epoch": 0.3552992861065349, + "grad_norm": 0.375117689371109, + "learning_rate": 1.8501596212976417e-05, + "loss": 0.5945, + "step": 12940 + }, + { + "epoch": 0.3553267435475014, + "grad_norm": 0.4733688235282898, + "learning_rate": 1.850136880235296e-05, + "loss": 0.4298, + "step": 12941 + }, + { + "epoch": 0.35535420098846787, + "grad_norm": 0.4538436532020569, + "learning_rate": 1.8501141375871738e-05, + "loss": 0.4678, + "step": 12942 + }, + { + "epoch": 0.35538165842943437, + "grad_norm": 0.36682039499282837, + "learning_rate": 1.8500913933533175e-05, + "loss": 0.5335, + "step": 12943 + }, + { + "epoch": 0.35540911587040086, + "grad_norm": 0.4080280363559723, + "learning_rate": 1.8500686475337694e-05, + "loss": 0.5377, + "step": 12944 + }, + { + "epoch": 0.35543657331136735, + "grad_norm": 0.3294958770275116, + "learning_rate": 1.850045900128572e-05, + "loss": 0.4046, + "step": 12945 + }, + { + "epoch": 0.3554640307523339, + "grad_norm": 0.32070714235305786, + "learning_rate": 1.8500231511377674e-05, + "loss": 0.4308, + "step": 12946 + }, + { + "epoch": 0.3554914881933004, + "grad_norm": 0.32708218693733215, + "learning_rate": 1.850000400561399e-05, + "loss": 0.4791, + "step": 12947 + }, + { + "epoch": 0.3555189456342669, + "grad_norm": 0.3574189245700836, + "learning_rate": 1.849977648399508e-05, + "loss": 0.5955, + "step": 12948 + }, + { + "epoch": 0.3555464030752334, + "grad_norm": 0.38079383969306946, + "learning_rate": 1.8499548946521374e-05, + "loss": 0.5889, + "step": 12949 + }, + { + "epoch": 0.3555738605161999, + "grad_norm": 0.3683728277683258, + "learning_rate": 1.84993213931933e-05, + "loss": 0.5528, + "step": 12950 + }, + { + "epoch": 0.3556013179571664, + "grad_norm": 0.36731061339378357, + "learning_rate": 1.8499093824011277e-05, + "loss": 0.6018, + "step": 12951 + }, + { + "epoch": 0.35562877539813287, + "grad_norm": 0.4952526390552521, + "learning_rate": 1.849886623897573e-05, + "loss": 0.4918, + "step": 12952 + }, + { + "epoch": 0.3556562328390994, + "grad_norm": 0.33449846506118774, + "learning_rate": 1.8498638638087087e-05, + "loss": 0.4079, + "step": 12953 + }, + { + "epoch": 0.3556836902800659, + "grad_norm": 0.3640614449977875, + "learning_rate": 1.849841102134577e-05, + "loss": 0.5628, + "step": 12954 + }, + { + "epoch": 0.3557111477210324, + "grad_norm": 0.37276479601860046, + "learning_rate": 1.8498183388752204e-05, + "loss": 0.5214, + "step": 12955 + }, + { + "epoch": 0.3557386051619989, + "grad_norm": 0.35916051268577576, + "learning_rate": 1.849795574030681e-05, + "loss": 0.5291, + "step": 12956 + }, + { + "epoch": 0.3557660626029654, + "grad_norm": 0.37045803666114807, + "learning_rate": 1.8497728076010024e-05, + "loss": 0.5691, + "step": 12957 + }, + { + "epoch": 0.3557935200439319, + "grad_norm": 0.39132675528526306, + "learning_rate": 1.8497500395862258e-05, + "loss": 0.6215, + "step": 12958 + }, + { + "epoch": 0.3558209774848984, + "grad_norm": 0.34931838512420654, + "learning_rate": 1.849727269986394e-05, + "loss": 0.5387, + "step": 12959 + }, + { + "epoch": 0.35584843492586493, + "grad_norm": 0.37427324056625366, + "learning_rate": 1.84970449880155e-05, + "loss": 0.4876, + "step": 12960 + }, + { + "epoch": 0.3558758923668314, + "grad_norm": 0.3529096841812134, + "learning_rate": 1.849681726031736e-05, + "loss": 0.5528, + "step": 12961 + }, + { + "epoch": 0.3559033498077979, + "grad_norm": 0.35555729269981384, + "learning_rate": 1.849658951676994e-05, + "loss": 0.5054, + "step": 12962 + }, + { + "epoch": 0.3559308072487644, + "grad_norm": 0.37021487951278687, + "learning_rate": 1.8496361757373674e-05, + "loss": 0.4592, + "step": 12963 + }, + { + "epoch": 0.3559582646897309, + "grad_norm": 0.3301765024662018, + "learning_rate": 1.8496133982128977e-05, + "loss": 0.5248, + "step": 12964 + }, + { + "epoch": 0.3559857221306974, + "grad_norm": 0.4845414161682129, + "learning_rate": 1.8495906191036277e-05, + "loss": 0.5785, + "step": 12965 + }, + { + "epoch": 0.3560131795716639, + "grad_norm": 0.34625008702278137, + "learning_rate": 1.8495678384096004e-05, + "loss": 0.4992, + "step": 12966 + }, + { + "epoch": 0.35604063701263045, + "grad_norm": 0.48607999086380005, + "learning_rate": 1.8495450561308577e-05, + "loss": 0.4906, + "step": 12967 + }, + { + "epoch": 0.35606809445359694, + "grad_norm": 0.3235108554363251, + "learning_rate": 1.849522272267443e-05, + "loss": 0.5672, + "step": 12968 + }, + { + "epoch": 0.35609555189456343, + "grad_norm": 0.3906828463077545, + "learning_rate": 1.849499486819397e-05, + "loss": 0.4886, + "step": 12969 + }, + { + "epoch": 0.3561230093355299, + "grad_norm": 0.3271802067756653, + "learning_rate": 1.8494766997867643e-05, + "loss": 0.4656, + "step": 12970 + }, + { + "epoch": 0.3561504667764964, + "grad_norm": 0.3509131669998169, + "learning_rate": 1.849453911169586e-05, + "loss": 0.5182, + "step": 12971 + }, + { + "epoch": 0.3561779242174629, + "grad_norm": 0.3706603944301605, + "learning_rate": 1.8494311209679052e-05, + "loss": 0.4967, + "step": 12972 + }, + { + "epoch": 0.3562053816584294, + "grad_norm": 0.39906302094459534, + "learning_rate": 1.8494083291817643e-05, + "loss": 0.4853, + "step": 12973 + }, + { + "epoch": 0.35623283909939596, + "grad_norm": 0.39738091826438904, + "learning_rate": 1.8493855358112055e-05, + "loss": 0.5229, + "step": 12974 + }, + { + "epoch": 0.35626029654036245, + "grad_norm": 0.43350231647491455, + "learning_rate": 1.849362740856272e-05, + "loss": 0.5823, + "step": 12975 + }, + { + "epoch": 0.35628775398132895, + "grad_norm": 0.3906891345977783, + "learning_rate": 1.8493399443170052e-05, + "loss": 0.5572, + "step": 12976 + }, + { + "epoch": 0.35631521142229544, + "grad_norm": 0.3743065893650055, + "learning_rate": 1.8493171461934488e-05, + "loss": 0.5273, + "step": 12977 + }, + { + "epoch": 0.35634266886326194, + "grad_norm": 0.4400683343410492, + "learning_rate": 1.8492943464856447e-05, + "loss": 0.5607, + "step": 12978 + }, + { + "epoch": 0.35637012630422843, + "grad_norm": 0.40293678641319275, + "learning_rate": 1.8492715451936355e-05, + "loss": 0.5352, + "step": 12979 + }, + { + "epoch": 0.3563975837451949, + "grad_norm": 0.3455598056316376, + "learning_rate": 1.8492487423174642e-05, + "loss": 0.485, + "step": 12980 + }, + { + "epoch": 0.3564250411861615, + "grad_norm": 0.37043312191963196, + "learning_rate": 1.8492259378571727e-05, + "loss": 0.4995, + "step": 12981 + }, + { + "epoch": 0.35645249862712797, + "grad_norm": 0.3668033480644226, + "learning_rate": 1.8492031318128038e-05, + "loss": 0.4731, + "step": 12982 + }, + { + "epoch": 0.35647995606809446, + "grad_norm": 0.40452343225479126, + "learning_rate": 1.8491803241844e-05, + "loss": 0.6119, + "step": 12983 + }, + { + "epoch": 0.35650741350906096, + "grad_norm": 0.3592095375061035, + "learning_rate": 1.849157514972004e-05, + "loss": 0.5198, + "step": 12984 + }, + { + "epoch": 0.35653487095002745, + "grad_norm": 0.4131249189376831, + "learning_rate": 1.8491347041756582e-05, + "loss": 0.5608, + "step": 12985 + }, + { + "epoch": 0.35656232839099394, + "grad_norm": 0.3285183012485504, + "learning_rate": 1.849111891795405e-05, + "loss": 0.4859, + "step": 12986 + }, + { + "epoch": 0.35658978583196044, + "grad_norm": 0.40792542695999146, + "learning_rate": 1.849089077831287e-05, + "loss": 0.411, + "step": 12987 + }, + { + "epoch": 0.356617243272927, + "grad_norm": 0.38132575154304504, + "learning_rate": 1.8490662622833472e-05, + "loss": 0.5168, + "step": 12988 + }, + { + "epoch": 0.3566447007138935, + "grad_norm": 0.43043336272239685, + "learning_rate": 1.8490434451516277e-05, + "loss": 0.446, + "step": 12989 + }, + { + "epoch": 0.35667215815486, + "grad_norm": 0.34616991877555847, + "learning_rate": 1.849020626436171e-05, + "loss": 0.4123, + "step": 12990 + }, + { + "epoch": 0.35669961559582647, + "grad_norm": 0.3501774072647095, + "learning_rate": 1.84899780613702e-05, + "loss": 0.5301, + "step": 12991 + }, + { + "epoch": 0.35672707303679296, + "grad_norm": 0.3966620862483978, + "learning_rate": 1.8489749842542176e-05, + "loss": 0.5644, + "step": 12992 + }, + { + "epoch": 0.35675453047775946, + "grad_norm": 0.3552907109260559, + "learning_rate": 1.8489521607878054e-05, + "loss": 0.5126, + "step": 12993 + }, + { + "epoch": 0.35678198791872595, + "grad_norm": 0.36467838287353516, + "learning_rate": 1.8489293357378268e-05, + "loss": 0.4886, + "step": 12994 + }, + { + "epoch": 0.3568094453596925, + "grad_norm": 0.369855672121048, + "learning_rate": 1.8489065091043235e-05, + "loss": 0.48, + "step": 12995 + }, + { + "epoch": 0.356836902800659, + "grad_norm": 0.3316080570220947, + "learning_rate": 1.8488836808873388e-05, + "loss": 0.4843, + "step": 12996 + }, + { + "epoch": 0.3568643602416255, + "grad_norm": 0.3672640323638916, + "learning_rate": 1.848860851086915e-05, + "loss": 0.524, + "step": 12997 + }, + { + "epoch": 0.356891817682592, + "grad_norm": 0.3153313994407654, + "learning_rate": 1.8488380197030952e-05, + "loss": 0.3922, + "step": 12998 + }, + { + "epoch": 0.3569192751235585, + "grad_norm": 0.3830428421497345, + "learning_rate": 1.8488151867359213e-05, + "loss": 0.5361, + "step": 12999 + }, + { + "epoch": 0.35694673256452497, + "grad_norm": 0.3186010718345642, + "learning_rate": 1.8487923521854362e-05, + "loss": 0.4292, + "step": 13000 + }, + { + "epoch": 0.35697419000549147, + "grad_norm": 0.418986976146698, + "learning_rate": 1.8487695160516825e-05, + "loss": 0.5912, + "step": 13001 + }, + { + "epoch": 0.357001647446458, + "grad_norm": 0.29527774453163147, + "learning_rate": 1.848746678334703e-05, + "loss": 0.4461, + "step": 13002 + }, + { + "epoch": 0.3570291048874245, + "grad_norm": 0.33104610443115234, + "learning_rate": 1.84872383903454e-05, + "loss": 0.4369, + "step": 13003 + }, + { + "epoch": 0.357056562328391, + "grad_norm": 0.3309026062488556, + "learning_rate": 1.8487009981512356e-05, + "loss": 0.4262, + "step": 13004 + }, + { + "epoch": 0.3570840197693575, + "grad_norm": 0.34110087156295776, + "learning_rate": 1.848678155684833e-05, + "loss": 0.4564, + "step": 13005 + }, + { + "epoch": 0.357111477210324, + "grad_norm": 0.3449648916721344, + "learning_rate": 1.8486553116353753e-05, + "loss": 0.5038, + "step": 13006 + }, + { + "epoch": 0.3571389346512905, + "grad_norm": 0.48508477210998535, + "learning_rate": 1.8486324660029044e-05, + "loss": 0.5809, + "step": 13007 + }, + { + "epoch": 0.357166392092257, + "grad_norm": 0.3240523934364319, + "learning_rate": 1.848609618787463e-05, + "loss": 0.4684, + "step": 13008 + }, + { + "epoch": 0.35719384953322353, + "grad_norm": 0.33209729194641113, + "learning_rate": 1.848586769989094e-05, + "loss": 0.4949, + "step": 13009 + }, + { + "epoch": 0.35722130697419, + "grad_norm": 0.35862213373184204, + "learning_rate": 1.8485639196078398e-05, + "loss": 0.4779, + "step": 13010 + }, + { + "epoch": 0.3572487644151565, + "grad_norm": 0.42602968215942383, + "learning_rate": 1.848541067643743e-05, + "loss": 0.4632, + "step": 13011 + }, + { + "epoch": 0.357276221856123, + "grad_norm": 0.36546412110328674, + "learning_rate": 1.8485182140968462e-05, + "loss": 0.4212, + "step": 13012 + }, + { + "epoch": 0.3573036792970895, + "grad_norm": 0.3444959223270416, + "learning_rate": 1.8484953589671925e-05, + "loss": 0.5027, + "step": 13013 + }, + { + "epoch": 0.357331136738056, + "grad_norm": 0.3905417323112488, + "learning_rate": 1.848472502254824e-05, + "loss": 0.5873, + "step": 13014 + }, + { + "epoch": 0.3573585941790225, + "grad_norm": 0.3522876501083374, + "learning_rate": 1.848449643959783e-05, + "loss": 0.511, + "step": 13015 + }, + { + "epoch": 0.35738605161998904, + "grad_norm": 0.4264282286167145, + "learning_rate": 1.8484267840821128e-05, + "loss": 0.5367, + "step": 13016 + }, + { + "epoch": 0.35741350906095554, + "grad_norm": 0.40064695477485657, + "learning_rate": 1.848403922621856e-05, + "loss": 0.5791, + "step": 13017 + }, + { + "epoch": 0.35744096650192203, + "grad_norm": 0.33717551827430725, + "learning_rate": 1.848381059579055e-05, + "loss": 0.4977, + "step": 13018 + }, + { + "epoch": 0.3574684239428885, + "grad_norm": 0.3351287841796875, + "learning_rate": 1.848358194953753e-05, + "loss": 0.4445, + "step": 13019 + }, + { + "epoch": 0.357495881383855, + "grad_norm": 0.3363860845565796, + "learning_rate": 1.8483353287459917e-05, + "loss": 0.5623, + "step": 13020 + }, + { + "epoch": 0.3575233388248215, + "grad_norm": 0.5071297883987427, + "learning_rate": 1.8483124609558145e-05, + "loss": 0.555, + "step": 13021 + }, + { + "epoch": 0.357550796265788, + "grad_norm": 0.4095757305622101, + "learning_rate": 1.8482895915832638e-05, + "loss": 0.5408, + "step": 13022 + }, + { + "epoch": 0.35757825370675456, + "grad_norm": 0.44816747307777405, + "learning_rate": 1.848266720628382e-05, + "loss": 0.518, + "step": 13023 + }, + { + "epoch": 0.35760571114772105, + "grad_norm": 0.4228252172470093, + "learning_rate": 1.8482438480912122e-05, + "loss": 0.49, + "step": 13024 + }, + { + "epoch": 0.35763316858868754, + "grad_norm": 0.3684860169887543, + "learning_rate": 1.848220973971797e-05, + "loss": 0.5092, + "step": 13025 + }, + { + "epoch": 0.35766062602965404, + "grad_norm": 0.3365960419178009, + "learning_rate": 1.848198098270179e-05, + "loss": 0.5303, + "step": 13026 + }, + { + "epoch": 0.35768808347062053, + "grad_norm": 0.368191123008728, + "learning_rate": 1.8481752209864005e-05, + "loss": 0.5781, + "step": 13027 + }, + { + "epoch": 0.357715540911587, + "grad_norm": 0.36805853247642517, + "learning_rate": 1.8481523421205048e-05, + "loss": 0.5166, + "step": 13028 + }, + { + "epoch": 0.3577429983525535, + "grad_norm": 0.3759543001651764, + "learning_rate": 1.8481294616725342e-05, + "loss": 0.5516, + "step": 13029 + }, + { + "epoch": 0.35777045579352007, + "grad_norm": 0.35152962803840637, + "learning_rate": 1.8481065796425316e-05, + "loss": 0.5144, + "step": 13030 + }, + { + "epoch": 0.35779791323448656, + "grad_norm": 0.358797550201416, + "learning_rate": 1.8480836960305396e-05, + "loss": 0.4505, + "step": 13031 + }, + { + "epoch": 0.35782537067545306, + "grad_norm": 0.3807108402252197, + "learning_rate": 1.8480608108366006e-05, + "loss": 0.4824, + "step": 13032 + }, + { + "epoch": 0.35785282811641955, + "grad_norm": 0.4163348376750946, + "learning_rate": 1.8480379240607575e-05, + "loss": 0.5701, + "step": 13033 + }, + { + "epoch": 0.35788028555738605, + "grad_norm": 0.44013962149620056, + "learning_rate": 1.848015035703053e-05, + "loss": 0.507, + "step": 13034 + }, + { + "epoch": 0.35790774299835254, + "grad_norm": 0.32004866003990173, + "learning_rate": 1.84799214576353e-05, + "loss": 0.4683, + "step": 13035 + }, + { + "epoch": 0.35793520043931903, + "grad_norm": 0.4632068872451782, + "learning_rate": 1.8479692542422308e-05, + "loss": 0.5794, + "step": 13036 + }, + { + "epoch": 0.3579626578802856, + "grad_norm": 0.40559110045433044, + "learning_rate": 1.8479463611391983e-05, + "loss": 0.6156, + "step": 13037 + }, + { + "epoch": 0.3579901153212521, + "grad_norm": 0.3532363772392273, + "learning_rate": 1.8479234664544754e-05, + "loss": 0.4866, + "step": 13038 + }, + { + "epoch": 0.35801757276221857, + "grad_norm": 0.32363638281822205, + "learning_rate": 1.8479005701881042e-05, + "loss": 0.4546, + "step": 13039 + }, + { + "epoch": 0.35804503020318507, + "grad_norm": 0.5130186676979065, + "learning_rate": 1.8478776723401283e-05, + "loss": 0.5997, + "step": 13040 + }, + { + "epoch": 0.35807248764415156, + "grad_norm": 0.4118053615093231, + "learning_rate": 1.8478547729105898e-05, + "loss": 0.6036, + "step": 13041 + }, + { + "epoch": 0.35809994508511805, + "grad_norm": 0.3968612551689148, + "learning_rate": 1.8478318718995313e-05, + "loss": 0.555, + "step": 13042 + }, + { + "epoch": 0.35812740252608455, + "grad_norm": 0.3946733772754669, + "learning_rate": 1.847808969306996e-05, + "loss": 0.5078, + "step": 13043 + }, + { + "epoch": 0.3581548599670511, + "grad_norm": 0.3719615638256073, + "learning_rate": 1.8477860651330263e-05, + "loss": 0.5209, + "step": 13044 + }, + { + "epoch": 0.3581823174080176, + "grad_norm": 0.380568265914917, + "learning_rate": 1.847763159377665e-05, + "loss": 0.4757, + "step": 13045 + }, + { + "epoch": 0.3582097748489841, + "grad_norm": 0.3815455734729767, + "learning_rate": 1.8477402520409547e-05, + "loss": 0.541, + "step": 13046 + }, + { + "epoch": 0.3582372322899506, + "grad_norm": 0.3085971772670746, + "learning_rate": 1.8477173431229386e-05, + "loss": 0.4161, + "step": 13047 + }, + { + "epoch": 0.3582646897309171, + "grad_norm": 0.36803120374679565, + "learning_rate": 1.847694432623659e-05, + "loss": 0.4783, + "step": 13048 + }, + { + "epoch": 0.35829214717188357, + "grad_norm": 0.359698086977005, + "learning_rate": 1.8476715205431585e-05, + "loss": 0.5494, + "step": 13049 + }, + { + "epoch": 0.35831960461285006, + "grad_norm": 0.37097787857055664, + "learning_rate": 1.8476486068814807e-05, + "loss": 0.532, + "step": 13050 + }, + { + "epoch": 0.3583470620538166, + "grad_norm": 0.3404868245124817, + "learning_rate": 1.847625691638667e-05, + "loss": 0.4956, + "step": 13051 + }, + { + "epoch": 0.3583745194947831, + "grad_norm": 0.37275034189224243, + "learning_rate": 1.847602774814761e-05, + "loss": 0.5066, + "step": 13052 + }, + { + "epoch": 0.3584019769357496, + "grad_norm": 0.3579447269439697, + "learning_rate": 1.8475798564098056e-05, + "loss": 0.4605, + "step": 13053 + }, + { + "epoch": 0.3584294343767161, + "grad_norm": 0.36870473623275757, + "learning_rate": 1.847556936423843e-05, + "loss": 0.5008, + "step": 13054 + }, + { + "epoch": 0.3584568918176826, + "grad_norm": 0.44797536730766296, + "learning_rate": 1.8475340148569164e-05, + "loss": 0.4853, + "step": 13055 + }, + { + "epoch": 0.3584843492586491, + "grad_norm": 0.37473922967910767, + "learning_rate": 1.847511091709068e-05, + "loss": 0.5637, + "step": 13056 + }, + { + "epoch": 0.3585118066996156, + "grad_norm": 0.3497028350830078, + "learning_rate": 1.8474881669803412e-05, + "loss": 0.5354, + "step": 13057 + }, + { + "epoch": 0.35853926414058207, + "grad_norm": 0.41104856133461, + "learning_rate": 1.8474652406707782e-05, + "loss": 0.5502, + "step": 13058 + }, + { + "epoch": 0.3585667215815486, + "grad_norm": 0.37893322110176086, + "learning_rate": 1.8474423127804224e-05, + "loss": 0.5007, + "step": 13059 + }, + { + "epoch": 0.3585941790225151, + "grad_norm": 0.44994205236434937, + "learning_rate": 1.8474193833093158e-05, + "loss": 0.5665, + "step": 13060 + }, + { + "epoch": 0.3586216364634816, + "grad_norm": 0.3552468419075012, + "learning_rate": 1.847396452257502e-05, + "loss": 0.5719, + "step": 13061 + }, + { + "epoch": 0.3586490939044481, + "grad_norm": 0.3795653283596039, + "learning_rate": 1.8473735196250234e-05, + "loss": 0.4946, + "step": 13062 + }, + { + "epoch": 0.3586765513454146, + "grad_norm": 0.43495243787765503, + "learning_rate": 1.8473505854119227e-05, + "loss": 0.5005, + "step": 13063 + }, + { + "epoch": 0.3587040087863811, + "grad_norm": 0.3547394573688507, + "learning_rate": 1.8473276496182423e-05, + "loss": 0.5087, + "step": 13064 + }, + { + "epoch": 0.3587314662273476, + "grad_norm": 0.3855637311935425, + "learning_rate": 1.847304712244026e-05, + "loss": 0.5825, + "step": 13065 + }, + { + "epoch": 0.35875892366831413, + "grad_norm": 0.38034605979919434, + "learning_rate": 1.8472817732893154e-05, + "loss": 0.5195, + "step": 13066 + }, + { + "epoch": 0.3587863811092806, + "grad_norm": 0.3455883860588074, + "learning_rate": 1.847258832754154e-05, + "loss": 0.4379, + "step": 13067 + }, + { + "epoch": 0.3588138385502471, + "grad_norm": 0.4013471305370331, + "learning_rate": 1.8472358906385848e-05, + "loss": 0.4941, + "step": 13068 + }, + { + "epoch": 0.3588412959912136, + "grad_norm": 0.3758194148540497, + "learning_rate": 1.84721294694265e-05, + "loss": 0.4903, + "step": 13069 + }, + { + "epoch": 0.3588687534321801, + "grad_norm": 0.3752117455005646, + "learning_rate": 1.8471900016663925e-05, + "loss": 0.5051, + "step": 13070 + }, + { + "epoch": 0.3588962108731466, + "grad_norm": 0.4019255042076111, + "learning_rate": 1.8471670548098557e-05, + "loss": 0.4613, + "step": 13071 + }, + { + "epoch": 0.3589236683141131, + "grad_norm": 0.3693023920059204, + "learning_rate": 1.8471441063730816e-05, + "loss": 0.3838, + "step": 13072 + }, + { + "epoch": 0.35895112575507965, + "grad_norm": 0.3698081374168396, + "learning_rate": 1.8471211563561134e-05, + "loss": 0.5156, + "step": 13073 + }, + { + "epoch": 0.35897858319604614, + "grad_norm": 0.3611887991428375, + "learning_rate": 1.847098204758994e-05, + "loss": 0.4362, + "step": 13074 + }, + { + "epoch": 0.35900604063701264, + "grad_norm": 0.38488391041755676, + "learning_rate": 1.847075251581766e-05, + "loss": 0.5033, + "step": 13075 + }, + { + "epoch": 0.35903349807797913, + "grad_norm": 0.35095134377479553, + "learning_rate": 1.8470522968244723e-05, + "loss": 0.4971, + "step": 13076 + }, + { + "epoch": 0.3590609555189456, + "grad_norm": 0.4106890559196472, + "learning_rate": 1.847029340487156e-05, + "loss": 0.5372, + "step": 13077 + }, + { + "epoch": 0.3590884129599121, + "grad_norm": 0.3366299271583557, + "learning_rate": 1.8470063825698593e-05, + "loss": 0.5388, + "step": 13078 + }, + { + "epoch": 0.3591158704008786, + "grad_norm": 0.4108119308948517, + "learning_rate": 1.8469834230726252e-05, + "loss": 0.4623, + "step": 13079 + }, + { + "epoch": 0.35914332784184516, + "grad_norm": 0.331798255443573, + "learning_rate": 1.8469604619954972e-05, + "loss": 0.4875, + "step": 13080 + }, + { + "epoch": 0.35917078528281166, + "grad_norm": 0.3658464550971985, + "learning_rate": 1.8469374993385175e-05, + "loss": 0.5501, + "step": 13081 + }, + { + "epoch": 0.35919824272377815, + "grad_norm": 0.33240431547164917, + "learning_rate": 1.846914535101729e-05, + "loss": 0.4777, + "step": 13082 + }, + { + "epoch": 0.35922570016474464, + "grad_norm": 0.4235239028930664, + "learning_rate": 1.8468915692851744e-05, + "loss": 0.6611, + "step": 13083 + }, + { + "epoch": 0.35925315760571114, + "grad_norm": 0.37577369809150696, + "learning_rate": 1.8468686018888967e-05, + "loss": 0.5779, + "step": 13084 + }, + { + "epoch": 0.35928061504667763, + "grad_norm": 0.36330413818359375, + "learning_rate": 1.846845632912939e-05, + "loss": 0.4816, + "step": 13085 + }, + { + "epoch": 0.3593080724876441, + "grad_norm": 0.37065958976745605, + "learning_rate": 1.8468226623573436e-05, + "loss": 0.4527, + "step": 13086 + }, + { + "epoch": 0.3593355299286107, + "grad_norm": 0.35448479652404785, + "learning_rate": 1.846799690222154e-05, + "loss": 0.5491, + "step": 13087 + }, + { + "epoch": 0.35936298736957717, + "grad_norm": 0.3839220106601715, + "learning_rate": 1.8467767165074127e-05, + "loss": 0.5742, + "step": 13088 + }, + { + "epoch": 0.35939044481054366, + "grad_norm": 0.37982094287872314, + "learning_rate": 1.8467537412131623e-05, + "loss": 0.5859, + "step": 13089 + }, + { + "epoch": 0.35941790225151016, + "grad_norm": 0.3659600615501404, + "learning_rate": 1.846730764339446e-05, + "loss": 0.5112, + "step": 13090 + }, + { + "epoch": 0.35944535969247665, + "grad_norm": 0.41604796051979065, + "learning_rate": 1.846707785886307e-05, + "loss": 0.5074, + "step": 13091 + }, + { + "epoch": 0.35947281713344315, + "grad_norm": 0.31994569301605225, + "learning_rate": 1.8466848058537873e-05, + "loss": 0.4347, + "step": 13092 + }, + { + "epoch": 0.35950027457440964, + "grad_norm": 0.393343448638916, + "learning_rate": 1.84666182424193e-05, + "loss": 0.5366, + "step": 13093 + }, + { + "epoch": 0.3595277320153762, + "grad_norm": 0.3669256567955017, + "learning_rate": 1.8466388410507786e-05, + "loss": 0.4809, + "step": 13094 + }, + { + "epoch": 0.3595551894563427, + "grad_norm": 0.3888470530509949, + "learning_rate": 1.8466158562803755e-05, + "loss": 0.4876, + "step": 13095 + }, + { + "epoch": 0.3595826468973092, + "grad_norm": 0.34312117099761963, + "learning_rate": 1.8465928699307635e-05, + "loss": 0.4739, + "step": 13096 + }, + { + "epoch": 0.35961010433827567, + "grad_norm": 0.3285973370075226, + "learning_rate": 1.8465698820019857e-05, + "loss": 0.4729, + "step": 13097 + }, + { + "epoch": 0.35963756177924217, + "grad_norm": 0.38379350304603577, + "learning_rate": 1.8465468924940846e-05, + "loss": 0.5174, + "step": 13098 + }, + { + "epoch": 0.35966501922020866, + "grad_norm": 0.3782350420951843, + "learning_rate": 1.8465239014071037e-05, + "loss": 0.4385, + "step": 13099 + }, + { + "epoch": 0.35969247666117515, + "grad_norm": 0.38308411836624146, + "learning_rate": 1.8465009087410854e-05, + "loss": 0.5082, + "step": 13100 + }, + { + "epoch": 0.3597199341021417, + "grad_norm": 0.734104335308075, + "learning_rate": 1.8464779144960726e-05, + "loss": 0.5013, + "step": 13101 + }, + { + "epoch": 0.3597473915431082, + "grad_norm": 0.37240442633628845, + "learning_rate": 1.8464549186721088e-05, + "loss": 0.5436, + "step": 13102 + }, + { + "epoch": 0.3597748489840747, + "grad_norm": 0.389226496219635, + "learning_rate": 1.8464319212692362e-05, + "loss": 0.5195, + "step": 13103 + }, + { + "epoch": 0.3598023064250412, + "grad_norm": 0.41736090183258057, + "learning_rate": 1.8464089222874973e-05, + "loss": 0.5272, + "step": 13104 + }, + { + "epoch": 0.3598297638660077, + "grad_norm": 0.3341962695121765, + "learning_rate": 1.8463859217269364e-05, + "loss": 0.4099, + "step": 13105 + }, + { + "epoch": 0.3598572213069742, + "grad_norm": 0.3410189747810364, + "learning_rate": 1.8463629195875952e-05, + "loss": 0.5594, + "step": 13106 + }, + { + "epoch": 0.35988467874794067, + "grad_norm": 0.3729605972766876, + "learning_rate": 1.846339915869517e-05, + "loss": 0.4815, + "step": 13107 + }, + { + "epoch": 0.3599121361889072, + "grad_norm": 0.35947906970977783, + "learning_rate": 1.8463169105727454e-05, + "loss": 0.4737, + "step": 13108 + }, + { + "epoch": 0.3599395936298737, + "grad_norm": 0.3681703805923462, + "learning_rate": 1.846293903697322e-05, + "loss": 0.4602, + "step": 13109 + }, + { + "epoch": 0.3599670510708402, + "grad_norm": 0.40846383571624756, + "learning_rate": 1.8462708952432903e-05, + "loss": 0.6694, + "step": 13110 + }, + { + "epoch": 0.3599945085118067, + "grad_norm": 0.3333899676799774, + "learning_rate": 1.8462478852106937e-05, + "loss": 0.5265, + "step": 13111 + }, + { + "epoch": 0.3600219659527732, + "grad_norm": 0.3525049686431885, + "learning_rate": 1.8462248735995746e-05, + "loss": 0.4571, + "step": 13112 + }, + { + "epoch": 0.3600494233937397, + "grad_norm": 0.32668542861938477, + "learning_rate": 1.8462018604099757e-05, + "loss": 0.4737, + "step": 13113 + }, + { + "epoch": 0.3600768808347062, + "grad_norm": 0.36160656809806824, + "learning_rate": 1.8461788456419408e-05, + "loss": 0.5553, + "step": 13114 + }, + { + "epoch": 0.36010433827567273, + "grad_norm": 0.3445206582546234, + "learning_rate": 1.8461558292955118e-05, + "loss": 0.466, + "step": 13115 + }, + { + "epoch": 0.3601317957166392, + "grad_norm": 0.33575722575187683, + "learning_rate": 1.8461328113707325e-05, + "loss": 0.4661, + "step": 13116 + }, + { + "epoch": 0.3601592531576057, + "grad_norm": 0.3560367226600647, + "learning_rate": 1.846109791867645e-05, + "loss": 0.5741, + "step": 13117 + }, + { + "epoch": 0.3601867105985722, + "grad_norm": 0.40430423617362976, + "learning_rate": 1.846086770786293e-05, + "loss": 0.544, + "step": 13118 + }, + { + "epoch": 0.3602141680395387, + "grad_norm": 0.37835022807121277, + "learning_rate": 1.846063748126719e-05, + "loss": 0.4992, + "step": 13119 + }, + { + "epoch": 0.3602416254805052, + "grad_norm": 0.33275657892227173, + "learning_rate": 1.846040723888966e-05, + "loss": 0.4596, + "step": 13120 + }, + { + "epoch": 0.3602690829214717, + "grad_norm": 0.41295212507247925, + "learning_rate": 1.8460176980730777e-05, + "loss": 0.448, + "step": 13121 + }, + { + "epoch": 0.36029654036243824, + "grad_norm": 0.35496413707733154, + "learning_rate": 1.8459946706790958e-05, + "loss": 0.5629, + "step": 13122 + }, + { + "epoch": 0.36032399780340474, + "grad_norm": 0.38105282187461853, + "learning_rate": 1.8459716417070637e-05, + "loss": 0.5751, + "step": 13123 + }, + { + "epoch": 0.36035145524437123, + "grad_norm": 0.3538092076778412, + "learning_rate": 1.8459486111570245e-05, + "loss": 0.5372, + "step": 13124 + }, + { + "epoch": 0.3603789126853377, + "grad_norm": 0.32643380761146545, + "learning_rate": 1.8459255790290215e-05, + "loss": 0.5287, + "step": 13125 + }, + { + "epoch": 0.3604063701263042, + "grad_norm": 0.3678705394268036, + "learning_rate": 1.845902545323097e-05, + "loss": 0.4908, + "step": 13126 + }, + { + "epoch": 0.3604338275672707, + "grad_norm": 0.4382529556751251, + "learning_rate": 1.8458795100392947e-05, + "loss": 0.5614, + "step": 13127 + }, + { + "epoch": 0.3604612850082372, + "grad_norm": 0.3411422073841095, + "learning_rate": 1.8458564731776566e-05, + "loss": 0.475, + "step": 13128 + }, + { + "epoch": 0.36048874244920376, + "grad_norm": 0.35569262504577637, + "learning_rate": 1.8458334347382264e-05, + "loss": 0.5252, + "step": 13129 + }, + { + "epoch": 0.36051619989017025, + "grad_norm": 0.3134927749633789, + "learning_rate": 1.8458103947210467e-05, + "loss": 0.4775, + "step": 13130 + }, + { + "epoch": 0.36054365733113675, + "grad_norm": 0.3469892740249634, + "learning_rate": 1.845787353126161e-05, + "loss": 0.4792, + "step": 13131 + }, + { + "epoch": 0.36057111477210324, + "grad_norm": 0.3935772776603699, + "learning_rate": 1.8457643099536117e-05, + "loss": 0.5468, + "step": 13132 + }, + { + "epoch": 0.36059857221306973, + "grad_norm": 0.4262789189815521, + "learning_rate": 1.8457412652034423e-05, + "loss": 0.5306, + "step": 13133 + }, + { + "epoch": 0.36062602965403623, + "grad_norm": 0.42334407567977905, + "learning_rate": 1.8457182188756956e-05, + "loss": 0.5394, + "step": 13134 + }, + { + "epoch": 0.3606534870950027, + "grad_norm": 0.4125106632709503, + "learning_rate": 1.845695170970414e-05, + "loss": 0.5264, + "step": 13135 + }, + { + "epoch": 0.3606809445359693, + "grad_norm": 0.4267720878124237, + "learning_rate": 1.8456721214876416e-05, + "loss": 0.5849, + "step": 13136 + }, + { + "epoch": 0.36070840197693577, + "grad_norm": 0.33667778968811035, + "learning_rate": 1.8456490704274205e-05, + "loss": 0.4892, + "step": 13137 + }, + { + "epoch": 0.36073585941790226, + "grad_norm": 0.3911696672439575, + "learning_rate": 1.8456260177897934e-05, + "loss": 0.5899, + "step": 13138 + }, + { + "epoch": 0.36076331685886875, + "grad_norm": 0.406293123960495, + "learning_rate": 1.8456029635748046e-05, + "loss": 0.5856, + "step": 13139 + }, + { + "epoch": 0.36079077429983525, + "grad_norm": 0.35822010040283203, + "learning_rate": 1.845579907782496e-05, + "loss": 0.5028, + "step": 13140 + }, + { + "epoch": 0.36081823174080174, + "grad_norm": 0.4146757125854492, + "learning_rate": 1.8455568504129117e-05, + "loss": 0.5706, + "step": 13141 + }, + { + "epoch": 0.36084568918176824, + "grad_norm": 0.3982425332069397, + "learning_rate": 1.8455337914660932e-05, + "loss": 0.5577, + "step": 13142 + }, + { + "epoch": 0.3608731466227348, + "grad_norm": 0.3931189775466919, + "learning_rate": 1.845510730942085e-05, + "loss": 0.6377, + "step": 13143 + }, + { + "epoch": 0.3609006040637013, + "grad_norm": 0.419359028339386, + "learning_rate": 1.845487668840929e-05, + "loss": 0.6127, + "step": 13144 + }, + { + "epoch": 0.3609280615046678, + "grad_norm": 0.34221982955932617, + "learning_rate": 1.8454646051626682e-05, + "loss": 0.52, + "step": 13145 + }, + { + "epoch": 0.36095551894563427, + "grad_norm": 0.3729400336742401, + "learning_rate": 1.8454415399073466e-05, + "loss": 0.5021, + "step": 13146 + }, + { + "epoch": 0.36098297638660076, + "grad_norm": 0.35129475593566895, + "learning_rate": 1.845418473075007e-05, + "loss": 0.5305, + "step": 13147 + }, + { + "epoch": 0.36101043382756726, + "grad_norm": 0.3269643187522888, + "learning_rate": 1.8453954046656917e-05, + "loss": 0.5117, + "step": 13148 + }, + { + "epoch": 0.36103789126853375, + "grad_norm": 0.5407090187072754, + "learning_rate": 1.845372334679444e-05, + "loss": 0.5464, + "step": 13149 + }, + { + "epoch": 0.3610653487095003, + "grad_norm": 0.3360693156719208, + "learning_rate": 1.8453492631163074e-05, + "loss": 0.5566, + "step": 13150 + }, + { + "epoch": 0.3610928061504668, + "grad_norm": 0.5453652739524841, + "learning_rate": 1.8453261899763247e-05, + "loss": 0.5698, + "step": 13151 + }, + { + "epoch": 0.3611202635914333, + "grad_norm": 0.42272889614105225, + "learning_rate": 1.8453031152595387e-05, + "loss": 0.6524, + "step": 13152 + }, + { + "epoch": 0.3611477210323998, + "grad_norm": 0.3863818049430847, + "learning_rate": 1.8452800389659927e-05, + "loss": 0.5219, + "step": 13153 + }, + { + "epoch": 0.3611751784733663, + "grad_norm": 0.41362398862838745, + "learning_rate": 1.8452569610957294e-05, + "loss": 0.6084, + "step": 13154 + }, + { + "epoch": 0.36120263591433277, + "grad_norm": 0.3814869523048401, + "learning_rate": 1.8452338816487923e-05, + "loss": 0.4526, + "step": 13155 + }, + { + "epoch": 0.36123009335529926, + "grad_norm": 0.3521794080734253, + "learning_rate": 1.845210800625224e-05, + "loss": 0.5706, + "step": 13156 + }, + { + "epoch": 0.3612575507962658, + "grad_norm": 0.34441110491752625, + "learning_rate": 1.845187718025068e-05, + "loss": 0.4927, + "step": 13157 + }, + { + "epoch": 0.3612850082372323, + "grad_norm": 0.3489800989627838, + "learning_rate": 1.8451646338483673e-05, + "loss": 0.5497, + "step": 13158 + }, + { + "epoch": 0.3613124656781988, + "grad_norm": 0.42725157737731934, + "learning_rate": 1.8451415480951645e-05, + "loss": 0.4967, + "step": 13159 + }, + { + "epoch": 0.3613399231191653, + "grad_norm": 0.34447064995765686, + "learning_rate": 1.845118460765503e-05, + "loss": 0.418, + "step": 13160 + }, + { + "epoch": 0.3613673805601318, + "grad_norm": 0.40395209193229675, + "learning_rate": 1.8450953718594263e-05, + "loss": 0.5611, + "step": 13161 + }, + { + "epoch": 0.3613948380010983, + "grad_norm": 0.39303383231163025, + "learning_rate": 1.8450722813769766e-05, + "loss": 0.5256, + "step": 13162 + }, + { + "epoch": 0.3614222954420648, + "grad_norm": 0.38242316246032715, + "learning_rate": 1.8450491893181973e-05, + "loss": 0.5202, + "step": 13163 + }, + { + "epoch": 0.36144975288303133, + "grad_norm": 0.3126254379749298, + "learning_rate": 1.8450260956831317e-05, + "loss": 0.4693, + "step": 13164 + }, + { + "epoch": 0.3614772103239978, + "grad_norm": 0.3535137176513672, + "learning_rate": 1.8450030004718228e-05, + "loss": 0.4772, + "step": 13165 + }, + { + "epoch": 0.3615046677649643, + "grad_norm": 0.3869374990463257, + "learning_rate": 1.8449799036843136e-05, + "loss": 0.5096, + "step": 13166 + }, + { + "epoch": 0.3615321252059308, + "grad_norm": 0.3483353555202484, + "learning_rate": 1.844956805320647e-05, + "loss": 0.5399, + "step": 13167 + }, + { + "epoch": 0.3615595826468973, + "grad_norm": 0.3912252187728882, + "learning_rate": 1.8449337053808663e-05, + "loss": 0.4449, + "step": 13168 + }, + { + "epoch": 0.3615870400878638, + "grad_norm": 0.40818560123443604, + "learning_rate": 1.8449106038650146e-05, + "loss": 0.5448, + "step": 13169 + }, + { + "epoch": 0.3616144975288303, + "grad_norm": 0.43586814403533936, + "learning_rate": 1.844887500773135e-05, + "loss": 0.5357, + "step": 13170 + }, + { + "epoch": 0.36164195496979684, + "grad_norm": 0.520947277545929, + "learning_rate": 1.844864396105271e-05, + "loss": 0.5185, + "step": 13171 + }, + { + "epoch": 0.36166941241076334, + "grad_norm": 0.3438819646835327, + "learning_rate": 1.8448412898614645e-05, + "loss": 0.5169, + "step": 13172 + }, + { + "epoch": 0.36169686985172983, + "grad_norm": 0.36924096941947937, + "learning_rate": 1.8448181820417595e-05, + "loss": 0.5146, + "step": 13173 + }, + { + "epoch": 0.3617243272926963, + "grad_norm": 0.37733086943626404, + "learning_rate": 1.8447950726461995e-05, + "loss": 0.5214, + "step": 13174 + }, + { + "epoch": 0.3617517847336628, + "grad_norm": 0.3620932102203369, + "learning_rate": 1.844771961674827e-05, + "loss": 0.5114, + "step": 13175 + }, + { + "epoch": 0.3617792421746293, + "grad_norm": 0.3815581500530243, + "learning_rate": 1.8447488491276846e-05, + "loss": 0.4929, + "step": 13176 + }, + { + "epoch": 0.3618066996155958, + "grad_norm": 0.3580508828163147, + "learning_rate": 1.844725735004816e-05, + "loss": 0.4502, + "step": 13177 + }, + { + "epoch": 0.36183415705656236, + "grad_norm": 0.38225069642066956, + "learning_rate": 1.844702619306265e-05, + "loss": 0.5262, + "step": 13178 + }, + { + "epoch": 0.36186161449752885, + "grad_norm": 0.42518308758735657, + "learning_rate": 1.8446795020320734e-05, + "loss": 0.5644, + "step": 13179 + }, + { + "epoch": 0.36188907193849534, + "grad_norm": 0.37176260352134705, + "learning_rate": 1.8446563831822854e-05, + "loss": 0.5339, + "step": 13180 + }, + { + "epoch": 0.36191652937946184, + "grad_norm": 0.3548765182495117, + "learning_rate": 1.844633262756943e-05, + "loss": 0.4857, + "step": 13181 + }, + { + "epoch": 0.36194398682042833, + "grad_norm": 0.36172688007354736, + "learning_rate": 1.8446101407560903e-05, + "loss": 0.4875, + "step": 13182 + }, + { + "epoch": 0.3619714442613948, + "grad_norm": 0.44124525785446167, + "learning_rate": 1.8445870171797703e-05, + "loss": 0.5258, + "step": 13183 + }, + { + "epoch": 0.3619989017023613, + "grad_norm": 0.3759884536266327, + "learning_rate": 1.844563892028026e-05, + "loss": 0.5499, + "step": 13184 + }, + { + "epoch": 0.36202635914332787, + "grad_norm": 0.3830901086330414, + "learning_rate": 1.8445407653009006e-05, + "loss": 0.5356, + "step": 13185 + }, + { + "epoch": 0.36205381658429436, + "grad_norm": 0.44437599182128906, + "learning_rate": 1.8445176369984366e-05, + "loss": 0.5315, + "step": 13186 + }, + { + "epoch": 0.36208127402526086, + "grad_norm": 0.3915611803531647, + "learning_rate": 1.8444945071206777e-05, + "loss": 0.6632, + "step": 13187 + }, + { + "epoch": 0.36210873146622735, + "grad_norm": 0.35195913910865784, + "learning_rate": 1.8444713756676672e-05, + "loss": 0.4534, + "step": 13188 + }, + { + "epoch": 0.36213618890719385, + "grad_norm": 0.3453114628791809, + "learning_rate": 1.844448242639448e-05, + "loss": 0.5676, + "step": 13189 + }, + { + "epoch": 0.36216364634816034, + "grad_norm": 0.41373419761657715, + "learning_rate": 1.8444251080360636e-05, + "loss": 0.5032, + "step": 13190 + }, + { + "epoch": 0.36219110378912683, + "grad_norm": 0.384966641664505, + "learning_rate": 1.8444019718575566e-05, + "loss": 0.5519, + "step": 13191 + }, + { + "epoch": 0.3622185612300933, + "grad_norm": 0.3556605577468872, + "learning_rate": 1.844378834103971e-05, + "loss": 0.484, + "step": 13192 + }, + { + "epoch": 0.3622460186710599, + "grad_norm": 0.35879799723625183, + "learning_rate": 1.8443556947753486e-05, + "loss": 0.4987, + "step": 13193 + }, + { + "epoch": 0.36227347611202637, + "grad_norm": 0.35934722423553467, + "learning_rate": 1.8443325538717337e-05, + "loss": 0.5528, + "step": 13194 + }, + { + "epoch": 0.36230093355299287, + "grad_norm": 0.34954097867012024, + "learning_rate": 1.844309411393169e-05, + "loss": 0.4357, + "step": 13195 + }, + { + "epoch": 0.36232839099395936, + "grad_norm": 0.3962495028972626, + "learning_rate": 1.844286267339698e-05, + "loss": 0.5581, + "step": 13196 + }, + { + "epoch": 0.36235584843492585, + "grad_norm": 0.36189860105514526, + "learning_rate": 1.844263121711363e-05, + "loss": 0.4055, + "step": 13197 + }, + { + "epoch": 0.36238330587589235, + "grad_norm": 0.34917619824409485, + "learning_rate": 1.8442399745082085e-05, + "loss": 0.5146, + "step": 13198 + }, + { + "epoch": 0.36241076331685884, + "grad_norm": 0.3552559018135071, + "learning_rate": 1.8442168257302765e-05, + "loss": 0.5284, + "step": 13199 + }, + { + "epoch": 0.3624382207578254, + "grad_norm": 0.36629894375801086, + "learning_rate": 1.844193675377611e-05, + "loss": 0.5283, + "step": 13200 + }, + { + "epoch": 0.3624656781987919, + "grad_norm": 0.40735283493995667, + "learning_rate": 1.844170523450255e-05, + "loss": 0.6192, + "step": 13201 + }, + { + "epoch": 0.3624931356397584, + "grad_norm": 0.3476910889148712, + "learning_rate": 1.8441473699482514e-05, + "loss": 0.5274, + "step": 13202 + }, + { + "epoch": 0.3625205930807249, + "grad_norm": 0.3860296905040741, + "learning_rate": 1.8441242148716435e-05, + "loss": 0.4959, + "step": 13203 + }, + { + "epoch": 0.36254805052169137, + "grad_norm": 0.36859825253486633, + "learning_rate": 1.8441010582204745e-05, + "loss": 0.4489, + "step": 13204 + }, + { + "epoch": 0.36257550796265786, + "grad_norm": 0.3629544675350189, + "learning_rate": 1.8440778999947878e-05, + "loss": 0.4756, + "step": 13205 + }, + { + "epoch": 0.36260296540362436, + "grad_norm": 0.5369066596031189, + "learning_rate": 1.844054740194626e-05, + "loss": 0.4895, + "step": 13206 + }, + { + "epoch": 0.3626304228445909, + "grad_norm": 0.3333066999912262, + "learning_rate": 1.844031578820033e-05, + "loss": 0.5562, + "step": 13207 + }, + { + "epoch": 0.3626578802855574, + "grad_norm": 0.3899949789047241, + "learning_rate": 1.8440084158710518e-05, + "loss": 0.5443, + "step": 13208 + }, + { + "epoch": 0.3626853377265239, + "grad_norm": 0.4164447486400604, + "learning_rate": 1.8439852513477253e-05, + "loss": 0.5486, + "step": 13209 + }, + { + "epoch": 0.3627127951674904, + "grad_norm": 0.31232699751853943, + "learning_rate": 1.843962085250097e-05, + "loss": 0.4765, + "step": 13210 + }, + { + "epoch": 0.3627402526084569, + "grad_norm": 0.3985652029514313, + "learning_rate": 1.84393891757821e-05, + "loss": 0.4922, + "step": 13211 + }, + { + "epoch": 0.3627677100494234, + "grad_norm": 0.3238932490348816, + "learning_rate": 1.8439157483321076e-05, + "loss": 0.4735, + "step": 13212 + }, + { + "epoch": 0.36279516749038987, + "grad_norm": 0.35253220796585083, + "learning_rate": 1.843892577511833e-05, + "loss": 0.5657, + "step": 13213 + }, + { + "epoch": 0.3628226249313564, + "grad_norm": 0.35974276065826416, + "learning_rate": 1.84386940511743e-05, + "loss": 0.4492, + "step": 13214 + }, + { + "epoch": 0.3628500823723229, + "grad_norm": 0.33148103952407837, + "learning_rate": 1.8438462311489403e-05, + "loss": 0.4519, + "step": 13215 + }, + { + "epoch": 0.3628775398132894, + "grad_norm": 0.36951327323913574, + "learning_rate": 1.8438230556064087e-05, + "loss": 0.5158, + "step": 13216 + }, + { + "epoch": 0.3629049972542559, + "grad_norm": 0.3457728922367096, + "learning_rate": 1.843799878489877e-05, + "loss": 0.5452, + "step": 13217 + }, + { + "epoch": 0.3629324546952224, + "grad_norm": 0.35920214653015137, + "learning_rate": 1.8437766997993902e-05, + "loss": 0.454, + "step": 13218 + }, + { + "epoch": 0.3629599121361889, + "grad_norm": 0.36417731642723083, + "learning_rate": 1.8437535195349897e-05, + "loss": 0.5292, + "step": 13219 + }, + { + "epoch": 0.3629873695771554, + "grad_norm": 0.4050922691822052, + "learning_rate": 1.8437303376967204e-05, + "loss": 0.6041, + "step": 13220 + }, + { + "epoch": 0.36301482701812193, + "grad_norm": 0.375888466835022, + "learning_rate": 1.8437071542846242e-05, + "loss": 0.455, + "step": 13221 + }, + { + "epoch": 0.3630422844590884, + "grad_norm": 0.3890566825866699, + "learning_rate": 1.843683969298745e-05, + "loss": 0.4854, + "step": 13222 + }, + { + "epoch": 0.3630697419000549, + "grad_norm": 0.42696088552474976, + "learning_rate": 1.843660782739126e-05, + "loss": 0.4708, + "step": 13223 + }, + { + "epoch": 0.3630971993410214, + "grad_norm": 0.3846897780895233, + "learning_rate": 1.8436375946058102e-05, + "loss": 0.5251, + "step": 13224 + }, + { + "epoch": 0.3631246567819879, + "grad_norm": 0.39463454484939575, + "learning_rate": 1.8436144048988413e-05, + "loss": 0.5094, + "step": 13225 + }, + { + "epoch": 0.3631521142229544, + "grad_norm": 0.35220158100128174, + "learning_rate": 1.843591213618262e-05, + "loss": 0.4811, + "step": 13226 + }, + { + "epoch": 0.3631795716639209, + "grad_norm": 0.3524731993675232, + "learning_rate": 1.8435680207641158e-05, + "loss": 0.5088, + "step": 13227 + }, + { + "epoch": 0.36320702910488745, + "grad_norm": 0.3048100173473358, + "learning_rate": 1.8435448263364462e-05, + "loss": 0.4389, + "step": 13228 + }, + { + "epoch": 0.36323448654585394, + "grad_norm": 0.3411879241466522, + "learning_rate": 1.8435216303352964e-05, + "loss": 0.4781, + "step": 13229 + }, + { + "epoch": 0.36326194398682043, + "grad_norm": 0.49210768938064575, + "learning_rate": 1.843498432760709e-05, + "loss": 0.5579, + "step": 13230 + }, + { + "epoch": 0.36328940142778693, + "grad_norm": 0.5711226463317871, + "learning_rate": 1.8434752336127285e-05, + "loss": 0.5607, + "step": 13231 + }, + { + "epoch": 0.3633168588687534, + "grad_norm": 0.37616774439811707, + "learning_rate": 1.843452032891397e-05, + "loss": 0.5404, + "step": 13232 + }, + { + "epoch": 0.3633443163097199, + "grad_norm": 0.4114203155040741, + "learning_rate": 1.8434288305967584e-05, + "loss": 0.5403, + "step": 13233 + }, + { + "epoch": 0.3633717737506864, + "grad_norm": 0.37984347343444824, + "learning_rate": 1.8434056267288558e-05, + "loss": 0.5362, + "step": 13234 + }, + { + "epoch": 0.36339923119165296, + "grad_norm": 0.3873461186885834, + "learning_rate": 1.843382421287733e-05, + "loss": 0.5504, + "step": 13235 + }, + { + "epoch": 0.36342668863261945, + "grad_norm": 0.35314255952835083, + "learning_rate": 1.843359214273432e-05, + "loss": 0.4769, + "step": 13236 + }, + { + "epoch": 0.36345414607358595, + "grad_norm": 0.36450108885765076, + "learning_rate": 1.8433360056859976e-05, + "loss": 0.4736, + "step": 13237 + }, + { + "epoch": 0.36348160351455244, + "grad_norm": 0.44163474440574646, + "learning_rate": 1.843312795525472e-05, + "loss": 0.5611, + "step": 13238 + }, + { + "epoch": 0.36350906095551894, + "grad_norm": 0.391765832901001, + "learning_rate": 1.843289583791899e-05, + "loss": 0.5211, + "step": 13239 + }, + { + "epoch": 0.36353651839648543, + "grad_norm": 0.3340924382209778, + "learning_rate": 1.8432663704853215e-05, + "loss": 0.4928, + "step": 13240 + }, + { + "epoch": 0.3635639758374519, + "grad_norm": 0.3424559533596039, + "learning_rate": 1.8432431556057832e-05, + "loss": 0.4978, + "step": 13241 + }, + { + "epoch": 0.3635914332784185, + "grad_norm": 0.3453432619571686, + "learning_rate": 1.8432199391533275e-05, + "loss": 0.5094, + "step": 13242 + }, + { + "epoch": 0.36361889071938497, + "grad_norm": 0.35446932911872864, + "learning_rate": 1.8431967211279974e-05, + "loss": 0.4666, + "step": 13243 + }, + { + "epoch": 0.36364634816035146, + "grad_norm": 0.3816090524196625, + "learning_rate": 1.843173501529836e-05, + "loss": 0.5249, + "step": 13244 + }, + { + "epoch": 0.36367380560131796, + "grad_norm": 0.3792325258255005, + "learning_rate": 1.8431502803588875e-05, + "loss": 0.488, + "step": 13245 + }, + { + "epoch": 0.36370126304228445, + "grad_norm": 0.3273666203022003, + "learning_rate": 1.843127057615194e-05, + "loss": 0.4314, + "step": 13246 + }, + { + "epoch": 0.36372872048325094, + "grad_norm": 0.42347052693367004, + "learning_rate": 1.8431038332988002e-05, + "loss": 0.4779, + "step": 13247 + }, + { + "epoch": 0.36375617792421744, + "grad_norm": 0.3682761490345001, + "learning_rate": 1.843080607409748e-05, + "loss": 0.5004, + "step": 13248 + }, + { + "epoch": 0.363783635365184, + "grad_norm": 0.37068304419517517, + "learning_rate": 1.8430573799480818e-05, + "loss": 0.5762, + "step": 13249 + }, + { + "epoch": 0.3638110928061505, + "grad_norm": 0.4145568907260895, + "learning_rate": 1.843034150913844e-05, + "loss": 0.5163, + "step": 13250 + }, + { + "epoch": 0.363838550247117, + "grad_norm": 0.48526614904403687, + "learning_rate": 1.843010920307079e-05, + "loss": 0.6048, + "step": 13251 + }, + { + "epoch": 0.36386600768808347, + "grad_norm": 0.4196273982524872, + "learning_rate": 1.8429876881278295e-05, + "loss": 0.5088, + "step": 13252 + }, + { + "epoch": 0.36389346512904996, + "grad_norm": 0.33530065417289734, + "learning_rate": 1.8429644543761384e-05, + "loss": 0.5107, + "step": 13253 + }, + { + "epoch": 0.36392092257001646, + "grad_norm": 0.5058258175849915, + "learning_rate": 1.84294121905205e-05, + "loss": 0.4961, + "step": 13254 + }, + { + "epoch": 0.36394838001098295, + "grad_norm": 0.3661845624446869, + "learning_rate": 1.8429179821556072e-05, + "loss": 0.5008, + "step": 13255 + }, + { + "epoch": 0.3639758374519495, + "grad_norm": 0.3951188623905182, + "learning_rate": 1.8428947436868533e-05, + "loss": 0.6414, + "step": 13256 + }, + { + "epoch": 0.364003294892916, + "grad_norm": 0.4396534562110901, + "learning_rate": 1.8428715036458317e-05, + "loss": 0.6328, + "step": 13257 + }, + { + "epoch": 0.3640307523338825, + "grad_norm": 0.36103951930999756, + "learning_rate": 1.8428482620325855e-05, + "loss": 0.5646, + "step": 13258 + }, + { + "epoch": 0.364058209774849, + "grad_norm": 0.3870995044708252, + "learning_rate": 1.8428250188471584e-05, + "loss": 0.4883, + "step": 13259 + }, + { + "epoch": 0.3640856672158155, + "grad_norm": 0.3736732304096222, + "learning_rate": 1.8428017740895935e-05, + "loss": 0.5266, + "step": 13260 + }, + { + "epoch": 0.364113124656782, + "grad_norm": 0.35943564772605896, + "learning_rate": 1.8427785277599347e-05, + "loss": 0.4844, + "step": 13261 + }, + { + "epoch": 0.36414058209774847, + "grad_norm": 0.3676561713218689, + "learning_rate": 1.8427552798582248e-05, + "loss": 0.5371, + "step": 13262 + }, + { + "epoch": 0.364168039538715, + "grad_norm": 0.3424157202243805, + "learning_rate": 1.8427320303845074e-05, + "loss": 0.473, + "step": 13263 + }, + { + "epoch": 0.3641954969796815, + "grad_norm": 0.35734015703201294, + "learning_rate": 1.8427087793388253e-05, + "loss": 0.4592, + "step": 13264 + }, + { + "epoch": 0.364222954420648, + "grad_norm": 0.4090169668197632, + "learning_rate": 1.842685526721223e-05, + "loss": 0.5029, + "step": 13265 + }, + { + "epoch": 0.3642504118616145, + "grad_norm": 0.3673754036426544, + "learning_rate": 1.8426622725317427e-05, + "loss": 0.5285, + "step": 13266 + }, + { + "epoch": 0.364277869302581, + "grad_norm": 0.3527148365974426, + "learning_rate": 1.8426390167704287e-05, + "loss": 0.5705, + "step": 13267 + }, + { + "epoch": 0.3643053267435475, + "grad_norm": 0.37671226263046265, + "learning_rate": 1.8426157594373237e-05, + "loss": 0.5725, + "step": 13268 + }, + { + "epoch": 0.364332784184514, + "grad_norm": 0.46316054463386536, + "learning_rate": 1.8425925005324718e-05, + "loss": 0.5113, + "step": 13269 + }, + { + "epoch": 0.36436024162548053, + "grad_norm": 0.40641331672668457, + "learning_rate": 1.8425692400559154e-05, + "loss": 0.5243, + "step": 13270 + }, + { + "epoch": 0.364387699066447, + "grad_norm": 0.3726562261581421, + "learning_rate": 1.8425459780076988e-05, + "loss": 0.4781, + "step": 13271 + }, + { + "epoch": 0.3644151565074135, + "grad_norm": 0.37236955761909485, + "learning_rate": 1.842522714387865e-05, + "loss": 0.4593, + "step": 13272 + }, + { + "epoch": 0.36444261394838, + "grad_norm": 0.3926211893558502, + "learning_rate": 1.8424994491964577e-05, + "loss": 0.5173, + "step": 13273 + }, + { + "epoch": 0.3644700713893465, + "grad_norm": 0.46923619508743286, + "learning_rate": 1.8424761824335195e-05, + "loss": 0.5126, + "step": 13274 + }, + { + "epoch": 0.364497528830313, + "grad_norm": 0.646196186542511, + "learning_rate": 1.8424529140990947e-05, + "loss": 0.5773, + "step": 13275 + }, + { + "epoch": 0.3645249862712795, + "grad_norm": 0.3653760552406311, + "learning_rate": 1.8424296441932262e-05, + "loss": 0.6268, + "step": 13276 + }, + { + "epoch": 0.36455244371224604, + "grad_norm": 0.3569766581058502, + "learning_rate": 1.8424063727159578e-05, + "loss": 0.4741, + "step": 13277 + }, + { + "epoch": 0.36457990115321254, + "grad_norm": 0.3716336488723755, + "learning_rate": 1.842383099667332e-05, + "loss": 0.6097, + "step": 13278 + }, + { + "epoch": 0.36460735859417903, + "grad_norm": 0.3834122121334076, + "learning_rate": 1.8423598250473936e-05, + "loss": 0.5966, + "step": 13279 + }, + { + "epoch": 0.3646348160351455, + "grad_norm": 0.3711458146572113, + "learning_rate": 1.842336548856185e-05, + "loss": 0.4877, + "step": 13280 + }, + { + "epoch": 0.364662273476112, + "grad_norm": 0.3967645764350891, + "learning_rate": 1.8423132710937498e-05, + "loss": 0.4957, + "step": 13281 + }, + { + "epoch": 0.3646897309170785, + "grad_norm": 0.3778120279312134, + "learning_rate": 1.8422899917601315e-05, + "loss": 0.5248, + "step": 13282 + }, + { + "epoch": 0.364717188358045, + "grad_norm": 0.40993818640708923, + "learning_rate": 1.842266710855374e-05, + "loss": 0.5725, + "step": 13283 + }, + { + "epoch": 0.36474464579901156, + "grad_norm": 0.36683452129364014, + "learning_rate": 1.8422434283795195e-05, + "loss": 0.5839, + "step": 13284 + }, + { + "epoch": 0.36477210323997805, + "grad_norm": 0.3821203112602234, + "learning_rate": 1.8422201443326125e-05, + "loss": 0.5705, + "step": 13285 + }, + { + "epoch": 0.36479956068094455, + "grad_norm": 0.3230799734592438, + "learning_rate": 1.8421968587146962e-05, + "loss": 0.484, + "step": 13286 + }, + { + "epoch": 0.36482701812191104, + "grad_norm": 0.3225272297859192, + "learning_rate": 1.842173571525814e-05, + "loss": 0.4209, + "step": 13287 + }, + { + "epoch": 0.36485447556287753, + "grad_norm": 0.43021076917648315, + "learning_rate": 1.8421502827660093e-05, + "loss": 0.4809, + "step": 13288 + }, + { + "epoch": 0.36488193300384403, + "grad_norm": 0.476222425699234, + "learning_rate": 1.8421269924353257e-05, + "loss": 0.4386, + "step": 13289 + }, + { + "epoch": 0.3649093904448105, + "grad_norm": 0.34194478392601013, + "learning_rate": 1.842103700533806e-05, + "loss": 0.4293, + "step": 13290 + }, + { + "epoch": 0.36493684788577707, + "grad_norm": 0.39638620615005493, + "learning_rate": 1.8420804070614944e-05, + "loss": 0.4768, + "step": 13291 + }, + { + "epoch": 0.36496430532674357, + "grad_norm": 0.33885541558265686, + "learning_rate": 1.842057112018434e-05, + "loss": 0.4805, + "step": 13292 + }, + { + "epoch": 0.36499176276771006, + "grad_norm": 0.3689919114112854, + "learning_rate": 1.8420338154046685e-05, + "loss": 0.494, + "step": 13293 + }, + { + "epoch": 0.36501922020867655, + "grad_norm": 0.4467167258262634, + "learning_rate": 1.8420105172202412e-05, + "loss": 0.5304, + "step": 13294 + }, + { + "epoch": 0.36504667764964305, + "grad_norm": 0.3628845810890198, + "learning_rate": 1.8419872174651952e-05, + "loss": 0.5268, + "step": 13295 + }, + { + "epoch": 0.36507413509060954, + "grad_norm": 0.37182343006134033, + "learning_rate": 1.8419639161395747e-05, + "loss": 0.5787, + "step": 13296 + }, + { + "epoch": 0.36510159253157604, + "grad_norm": 0.38182374835014343, + "learning_rate": 1.8419406132434226e-05, + "loss": 0.58, + "step": 13297 + }, + { + "epoch": 0.3651290499725426, + "grad_norm": 0.3162412941455841, + "learning_rate": 1.8419173087767827e-05, + "loss": 0.4231, + "step": 13298 + }, + { + "epoch": 0.3651565074135091, + "grad_norm": 0.3668927550315857, + "learning_rate": 1.841894002739698e-05, + "loss": 0.5457, + "step": 13299 + }, + { + "epoch": 0.3651839648544756, + "grad_norm": 0.3886664807796478, + "learning_rate": 1.8418706951322124e-05, + "loss": 0.5313, + "step": 13300 + }, + { + "epoch": 0.36521142229544207, + "grad_norm": 0.35190919041633606, + "learning_rate": 1.8418473859543694e-05, + "loss": 0.5529, + "step": 13301 + }, + { + "epoch": 0.36523887973640856, + "grad_norm": 0.38471975922584534, + "learning_rate": 1.8418240752062124e-05, + "loss": 0.5425, + "step": 13302 + }, + { + "epoch": 0.36526633717737506, + "grad_norm": 0.35083475708961487, + "learning_rate": 1.8418007628877848e-05, + "loss": 0.5016, + "step": 13303 + }, + { + "epoch": 0.36529379461834155, + "grad_norm": 0.397013783454895, + "learning_rate": 1.8417774489991298e-05, + "loss": 0.5111, + "step": 13304 + }, + { + "epoch": 0.3653212520593081, + "grad_norm": 0.34346768260002136, + "learning_rate": 1.8417541335402912e-05, + "loss": 0.518, + "step": 13305 + }, + { + "epoch": 0.3653487095002746, + "grad_norm": 0.42549484968185425, + "learning_rate": 1.841730816511313e-05, + "loss": 0.5396, + "step": 13306 + }, + { + "epoch": 0.3653761669412411, + "grad_norm": 0.332579106092453, + "learning_rate": 1.8417074979122378e-05, + "loss": 0.5163, + "step": 13307 + }, + { + "epoch": 0.3654036243822076, + "grad_norm": 0.3657087981700897, + "learning_rate": 1.8416841777431096e-05, + "loss": 0.526, + "step": 13308 + }, + { + "epoch": 0.3654310818231741, + "grad_norm": 0.35419946908950806, + "learning_rate": 1.8416608560039714e-05, + "loss": 0.5068, + "step": 13309 + }, + { + "epoch": 0.36545853926414057, + "grad_norm": 0.35014206171035767, + "learning_rate": 1.8416375326948674e-05, + "loss": 0.532, + "step": 13310 + }, + { + "epoch": 0.36548599670510706, + "grad_norm": 0.369619756937027, + "learning_rate": 1.841614207815841e-05, + "loss": 0.5229, + "step": 13311 + }, + { + "epoch": 0.3655134541460736, + "grad_norm": 0.48485833406448364, + "learning_rate": 1.8415908813669352e-05, + "loss": 0.5445, + "step": 13312 + }, + { + "epoch": 0.3655409115870401, + "grad_norm": 0.36239004135131836, + "learning_rate": 1.841567553348194e-05, + "loss": 0.5601, + "step": 13313 + }, + { + "epoch": 0.3655683690280066, + "grad_norm": 0.559315025806427, + "learning_rate": 1.8415442237596602e-05, + "loss": 0.4434, + "step": 13314 + }, + { + "epoch": 0.3655958264689731, + "grad_norm": 0.3629678785800934, + "learning_rate": 1.8415208926013784e-05, + "loss": 0.4974, + "step": 13315 + }, + { + "epoch": 0.3656232839099396, + "grad_norm": 0.3580913543701172, + "learning_rate": 1.8414975598733913e-05, + "loss": 0.4208, + "step": 13316 + }, + { + "epoch": 0.3656507413509061, + "grad_norm": 0.35370516777038574, + "learning_rate": 1.8414742255757428e-05, + "loss": 0.5659, + "step": 13317 + }, + { + "epoch": 0.3656781987918726, + "grad_norm": 0.41372695565223694, + "learning_rate": 1.841450889708476e-05, + "loss": 0.5267, + "step": 13318 + }, + { + "epoch": 0.3657056562328391, + "grad_norm": 0.4300137162208557, + "learning_rate": 1.841427552271635e-05, + "loss": 0.5192, + "step": 13319 + }, + { + "epoch": 0.3657331136738056, + "grad_norm": 0.3510093688964844, + "learning_rate": 1.8414042132652632e-05, + "loss": 0.54, + "step": 13320 + }, + { + "epoch": 0.3657605711147721, + "grad_norm": 0.38661229610443115, + "learning_rate": 1.8413808726894038e-05, + "loss": 0.5037, + "step": 13321 + }, + { + "epoch": 0.3657880285557386, + "grad_norm": 0.36446958780288696, + "learning_rate": 1.8413575305441003e-05, + "loss": 0.479, + "step": 13322 + }, + { + "epoch": 0.3658154859967051, + "grad_norm": 0.354449599981308, + "learning_rate": 1.8413341868293966e-05, + "loss": 0.5303, + "step": 13323 + }, + { + "epoch": 0.3658429434376716, + "grad_norm": 0.36447587609291077, + "learning_rate": 1.8413108415453367e-05, + "loss": 0.4952, + "step": 13324 + }, + { + "epoch": 0.3658704008786381, + "grad_norm": 0.3748722970485687, + "learning_rate": 1.841287494691963e-05, + "loss": 0.5207, + "step": 13325 + }, + { + "epoch": 0.3658978583196046, + "grad_norm": 0.3768123388290405, + "learning_rate": 1.8412641462693197e-05, + "loss": 0.4826, + "step": 13326 + }, + { + "epoch": 0.36592531576057113, + "grad_norm": 0.3471638262271881, + "learning_rate": 1.8412407962774503e-05, + "loss": 0.4881, + "step": 13327 + }, + { + "epoch": 0.36595277320153763, + "grad_norm": 0.35733985900878906, + "learning_rate": 1.8412174447163985e-05, + "loss": 0.4858, + "step": 13328 + }, + { + "epoch": 0.3659802306425041, + "grad_norm": 0.3693062365055084, + "learning_rate": 1.8411940915862074e-05, + "loss": 0.5779, + "step": 13329 + }, + { + "epoch": 0.3660076880834706, + "grad_norm": 0.37515556812286377, + "learning_rate": 1.841170736886921e-05, + "loss": 0.4679, + "step": 13330 + }, + { + "epoch": 0.3660351455244371, + "grad_norm": 0.4107637107372284, + "learning_rate": 1.8411473806185825e-05, + "loss": 0.4869, + "step": 13331 + }, + { + "epoch": 0.3660626029654036, + "grad_norm": 0.3195800185203552, + "learning_rate": 1.841124022781236e-05, + "loss": 0.4851, + "step": 13332 + }, + { + "epoch": 0.3660900604063701, + "grad_norm": 0.3781949579715729, + "learning_rate": 1.8411006633749245e-05, + "loss": 0.5283, + "step": 13333 + }, + { + "epoch": 0.36611751784733665, + "grad_norm": 0.3918650448322296, + "learning_rate": 1.8410773023996917e-05, + "loss": 0.5671, + "step": 13334 + }, + { + "epoch": 0.36614497528830314, + "grad_norm": 0.3476279675960541, + "learning_rate": 1.8410539398555815e-05, + "loss": 0.494, + "step": 13335 + }, + { + "epoch": 0.36617243272926964, + "grad_norm": 0.3372535705566406, + "learning_rate": 1.841030575742637e-05, + "loss": 0.4992, + "step": 13336 + }, + { + "epoch": 0.36619989017023613, + "grad_norm": 0.37522092461586, + "learning_rate": 1.8410072100609024e-05, + "loss": 0.5081, + "step": 13337 + }, + { + "epoch": 0.3662273476112026, + "grad_norm": 0.37428897619247437, + "learning_rate": 1.840983842810421e-05, + "loss": 0.481, + "step": 13338 + }, + { + "epoch": 0.3662548050521691, + "grad_norm": 0.34694284200668335, + "learning_rate": 1.840960473991236e-05, + "loss": 0.4469, + "step": 13339 + }, + { + "epoch": 0.3662822624931356, + "grad_norm": 0.9279324412345886, + "learning_rate": 1.8409371036033918e-05, + "loss": 0.5331, + "step": 13340 + }, + { + "epoch": 0.36630971993410216, + "grad_norm": 0.3372042775154114, + "learning_rate": 1.8409137316469308e-05, + "loss": 0.5046, + "step": 13341 + }, + { + "epoch": 0.36633717737506866, + "grad_norm": 0.5034865140914917, + "learning_rate": 1.8408903581218976e-05, + "loss": 0.5451, + "step": 13342 + }, + { + "epoch": 0.36636463481603515, + "grad_norm": 0.3319418430328369, + "learning_rate": 1.8408669830283356e-05, + "loss": 0.4404, + "step": 13343 + }, + { + "epoch": 0.36639209225700164, + "grad_norm": 0.3865717649459839, + "learning_rate": 1.8408436063662884e-05, + "loss": 0.597, + "step": 13344 + }, + { + "epoch": 0.36641954969796814, + "grad_norm": 0.38112810254096985, + "learning_rate": 1.8408202281357992e-05, + "loss": 0.5088, + "step": 13345 + }, + { + "epoch": 0.36644700713893463, + "grad_norm": 0.42365604639053345, + "learning_rate": 1.8407968483369122e-05, + "loss": 0.5389, + "step": 13346 + }, + { + "epoch": 0.3664744645799011, + "grad_norm": 0.37940099835395813, + "learning_rate": 1.8407734669696706e-05, + "loss": 0.5693, + "step": 13347 + }, + { + "epoch": 0.3665019220208677, + "grad_norm": 0.4405479431152344, + "learning_rate": 1.840750084034118e-05, + "loss": 0.562, + "step": 13348 + }, + { + "epoch": 0.36652937946183417, + "grad_norm": 0.34387168288230896, + "learning_rate": 1.8407266995302984e-05, + "loss": 0.5312, + "step": 13349 + }, + { + "epoch": 0.36655683690280066, + "grad_norm": 0.5587919354438782, + "learning_rate": 1.840703313458255e-05, + "loss": 0.4429, + "step": 13350 + }, + { + "epoch": 0.36658429434376716, + "grad_norm": 0.44959282875061035, + "learning_rate": 1.840679925818032e-05, + "loss": 0.564, + "step": 13351 + }, + { + "epoch": 0.36661175178473365, + "grad_norm": 0.4271070957183838, + "learning_rate": 1.8406565366096722e-05, + "loss": 0.5375, + "step": 13352 + }, + { + "epoch": 0.36663920922570015, + "grad_norm": 0.39512133598327637, + "learning_rate": 1.8406331458332196e-05, + "loss": 0.6326, + "step": 13353 + }, + { + "epoch": 0.36666666666666664, + "grad_norm": 0.4771334230899811, + "learning_rate": 1.840609753488718e-05, + "loss": 0.6156, + "step": 13354 + }, + { + "epoch": 0.3666941241076332, + "grad_norm": 0.36504268646240234, + "learning_rate": 1.840586359576211e-05, + "loss": 0.4946, + "step": 13355 + }, + { + "epoch": 0.3667215815485997, + "grad_norm": 0.41533011198043823, + "learning_rate": 1.8405629640957422e-05, + "loss": 0.514, + "step": 13356 + }, + { + "epoch": 0.3667490389895662, + "grad_norm": 0.41306278109550476, + "learning_rate": 1.8405395670473552e-05, + "loss": 0.6179, + "step": 13357 + }, + { + "epoch": 0.3667764964305327, + "grad_norm": 0.34606435894966125, + "learning_rate": 1.8405161684310936e-05, + "loss": 0.4607, + "step": 13358 + }, + { + "epoch": 0.36680395387149917, + "grad_norm": 0.36875876784324646, + "learning_rate": 1.840492768247001e-05, + "loss": 0.6177, + "step": 13359 + }, + { + "epoch": 0.36683141131246566, + "grad_norm": 0.3480891287326813, + "learning_rate": 1.8404693664951212e-05, + "loss": 0.532, + "step": 13360 + }, + { + "epoch": 0.36685886875343215, + "grad_norm": 0.4269990921020508, + "learning_rate": 1.8404459631754978e-05, + "loss": 0.5067, + "step": 13361 + }, + { + "epoch": 0.3668863261943987, + "grad_norm": 0.3628652095794678, + "learning_rate": 1.8404225582881742e-05, + "loss": 0.537, + "step": 13362 + }, + { + "epoch": 0.3669137836353652, + "grad_norm": 0.3758205473423004, + "learning_rate": 1.8403991518331947e-05, + "loss": 0.5416, + "step": 13363 + }, + { + "epoch": 0.3669412410763317, + "grad_norm": 0.40794235467910767, + "learning_rate": 1.840375743810602e-05, + "loss": 0.5665, + "step": 13364 + }, + { + "epoch": 0.3669686985172982, + "grad_norm": 0.37940531969070435, + "learning_rate": 1.8403523342204408e-05, + "loss": 0.5604, + "step": 13365 + }, + { + "epoch": 0.3669961559582647, + "grad_norm": 0.41265398263931274, + "learning_rate": 1.8403289230627543e-05, + "loss": 0.4935, + "step": 13366 + }, + { + "epoch": 0.3670236133992312, + "grad_norm": 0.35071253776550293, + "learning_rate": 1.840305510337586e-05, + "loss": 0.486, + "step": 13367 + }, + { + "epoch": 0.36705107084019767, + "grad_norm": 0.32659846544265747, + "learning_rate": 1.8402820960449796e-05, + "loss": 0.5739, + "step": 13368 + }, + { + "epoch": 0.3670785282811642, + "grad_norm": 0.3483704626560211, + "learning_rate": 1.840258680184979e-05, + "loss": 0.5128, + "step": 13369 + }, + { + "epoch": 0.3671059857221307, + "grad_norm": 0.39068934321403503, + "learning_rate": 1.840235262757628e-05, + "loss": 0.4804, + "step": 13370 + }, + { + "epoch": 0.3671334431630972, + "grad_norm": 0.44622352719306946, + "learning_rate": 1.84021184376297e-05, + "loss": 0.5305, + "step": 13371 + }, + { + "epoch": 0.3671609006040637, + "grad_norm": 0.32348060607910156, + "learning_rate": 1.8401884232010487e-05, + "loss": 0.4738, + "step": 13372 + }, + { + "epoch": 0.3671883580450302, + "grad_norm": 0.42738184332847595, + "learning_rate": 1.8401650010719073e-05, + "loss": 0.5034, + "step": 13373 + }, + { + "epoch": 0.3672158154859967, + "grad_norm": 0.41680678725242615, + "learning_rate": 1.8401415773755908e-05, + "loss": 0.499, + "step": 13374 + }, + { + "epoch": 0.3672432729269632, + "grad_norm": 0.37779971957206726, + "learning_rate": 1.8401181521121416e-05, + "loss": 0.4817, + "step": 13375 + }, + { + "epoch": 0.36727073036792973, + "grad_norm": 0.38788700103759766, + "learning_rate": 1.8400947252816042e-05, + "loss": 0.5086, + "step": 13376 + }, + { + "epoch": 0.3672981878088962, + "grad_norm": 0.38359692692756653, + "learning_rate": 1.840071296884022e-05, + "loss": 0.4914, + "step": 13377 + }, + { + "epoch": 0.3673256452498627, + "grad_norm": 0.6101033687591553, + "learning_rate": 1.8400478669194386e-05, + "loss": 0.555, + "step": 13378 + }, + { + "epoch": 0.3673531026908292, + "grad_norm": 0.8869770765304565, + "learning_rate": 1.840024435387898e-05, + "loss": 0.5054, + "step": 13379 + }, + { + "epoch": 0.3673805601317957, + "grad_norm": 0.39183473587036133, + "learning_rate": 1.8400010022894434e-05, + "loss": 0.4972, + "step": 13380 + }, + { + "epoch": 0.3674080175727622, + "grad_norm": 0.33186694979667664, + "learning_rate": 1.8399775676241192e-05, + "loss": 0.5564, + "step": 13381 + }, + { + "epoch": 0.3674354750137287, + "grad_norm": 0.3348950445652008, + "learning_rate": 1.8399541313919685e-05, + "loss": 0.4643, + "step": 13382 + }, + { + "epoch": 0.36746293245469525, + "grad_norm": 0.39207109808921814, + "learning_rate": 1.839930693593035e-05, + "loss": 0.5221, + "step": 13383 + }, + { + "epoch": 0.36749038989566174, + "grad_norm": 0.37054672837257385, + "learning_rate": 1.8399072542273632e-05, + "loss": 0.4804, + "step": 13384 + }, + { + "epoch": 0.36751784733662823, + "grad_norm": 0.38118672370910645, + "learning_rate": 1.839883813294996e-05, + "loss": 0.5, + "step": 13385 + }, + { + "epoch": 0.36754530477759473, + "grad_norm": 0.3411831855773926, + "learning_rate": 1.839860370795978e-05, + "loss": 0.5216, + "step": 13386 + }, + { + "epoch": 0.3675727622185612, + "grad_norm": 0.3712208867073059, + "learning_rate": 1.839836926730352e-05, + "loss": 0.4941, + "step": 13387 + }, + { + "epoch": 0.3676002196595277, + "grad_norm": 0.42792174220085144, + "learning_rate": 1.8398134810981615e-05, + "loss": 0.5228, + "step": 13388 + }, + { + "epoch": 0.3676276771004942, + "grad_norm": 0.4152697026729584, + "learning_rate": 1.839790033899451e-05, + "loss": 0.5454, + "step": 13389 + }, + { + "epoch": 0.36765513454146076, + "grad_norm": 0.3684476912021637, + "learning_rate": 1.8397665851342647e-05, + "loss": 0.5698, + "step": 13390 + }, + { + "epoch": 0.36768259198242725, + "grad_norm": 0.4602998197078705, + "learning_rate": 1.8397431348026457e-05, + "loss": 0.4696, + "step": 13391 + }, + { + "epoch": 0.36771004942339375, + "grad_norm": 0.3246760666370392, + "learning_rate": 1.8397196829046372e-05, + "loss": 0.4816, + "step": 13392 + }, + { + "epoch": 0.36773750686436024, + "grad_norm": 0.3695104420185089, + "learning_rate": 1.8396962294402834e-05, + "loss": 0.5457, + "step": 13393 + }, + { + "epoch": 0.36776496430532674, + "grad_norm": 0.3247019648551941, + "learning_rate": 1.8396727744096286e-05, + "loss": 0.4431, + "step": 13394 + }, + { + "epoch": 0.36779242174629323, + "grad_norm": 0.3578256666660309, + "learning_rate": 1.839649317812716e-05, + "loss": 0.5124, + "step": 13395 + }, + { + "epoch": 0.3678198791872597, + "grad_norm": 0.35054466128349304, + "learning_rate": 1.8396258596495893e-05, + "loss": 0.5506, + "step": 13396 + }, + { + "epoch": 0.3678473366282263, + "grad_norm": 0.456494003534317, + "learning_rate": 1.8396023999202922e-05, + "loss": 0.5143, + "step": 13397 + }, + { + "epoch": 0.36787479406919277, + "grad_norm": 0.3311481475830078, + "learning_rate": 1.8395789386248692e-05, + "loss": 0.4286, + "step": 13398 + }, + { + "epoch": 0.36790225151015926, + "grad_norm": 0.3279074728488922, + "learning_rate": 1.839555475763363e-05, + "loss": 0.5202, + "step": 13399 + }, + { + "epoch": 0.36792970895112576, + "grad_norm": 0.3917251229286194, + "learning_rate": 1.839532011335818e-05, + "loss": 0.5842, + "step": 13400 + }, + { + "epoch": 0.36795716639209225, + "grad_norm": 0.37432000041007996, + "learning_rate": 1.839508545342278e-05, + "loss": 0.5285, + "step": 13401 + }, + { + "epoch": 0.36798462383305874, + "grad_norm": 0.3498668074607849, + "learning_rate": 1.8394850777827867e-05, + "loss": 0.5762, + "step": 13402 + }, + { + "epoch": 0.36801208127402524, + "grad_norm": 0.3375849425792694, + "learning_rate": 1.8394616086573874e-05, + "loss": 0.467, + "step": 13403 + }, + { + "epoch": 0.3680395387149918, + "grad_norm": 0.3397333323955536, + "learning_rate": 1.8394381379661247e-05, + "loss": 0.4955, + "step": 13404 + }, + { + "epoch": 0.3680669961559583, + "grad_norm": 0.34500449895858765, + "learning_rate": 1.839414665709042e-05, + "loss": 0.4981, + "step": 13405 + }, + { + "epoch": 0.3680944535969248, + "grad_norm": 0.3981265425682068, + "learning_rate": 1.8393911918861826e-05, + "loss": 0.5512, + "step": 13406 + }, + { + "epoch": 0.36812191103789127, + "grad_norm": 0.3952910006046295, + "learning_rate": 1.839367716497591e-05, + "loss": 0.4454, + "step": 13407 + }, + { + "epoch": 0.36814936847885776, + "grad_norm": 0.3936050832271576, + "learning_rate": 1.8393442395433104e-05, + "loss": 0.4764, + "step": 13408 + }, + { + "epoch": 0.36817682591982426, + "grad_norm": 0.33462223410606384, + "learning_rate": 1.8393207610233853e-05, + "loss": 0.4174, + "step": 13409 + }, + { + "epoch": 0.36820428336079075, + "grad_norm": 0.41745761036872864, + "learning_rate": 1.839297280937859e-05, + "loss": 0.4574, + "step": 13410 + }, + { + "epoch": 0.3682317408017573, + "grad_norm": 0.37675222754478455, + "learning_rate": 1.8392737992867754e-05, + "loss": 0.5584, + "step": 13411 + }, + { + "epoch": 0.3682591982427238, + "grad_norm": 0.3506487011909485, + "learning_rate": 1.8392503160701783e-05, + "loss": 0.5312, + "step": 13412 + }, + { + "epoch": 0.3682866556836903, + "grad_norm": 0.3985677659511566, + "learning_rate": 1.8392268312881115e-05, + "loss": 0.4947, + "step": 13413 + }, + { + "epoch": 0.3683141131246568, + "grad_norm": 0.3659696877002716, + "learning_rate": 1.8392033449406188e-05, + "loss": 0.551, + "step": 13414 + }, + { + "epoch": 0.3683415705656233, + "grad_norm": 0.3907739818096161, + "learning_rate": 1.839179857027744e-05, + "loss": 0.5036, + "step": 13415 + }, + { + "epoch": 0.36836902800658977, + "grad_norm": 0.358896940946579, + "learning_rate": 1.8391563675495308e-05, + "loss": 0.5042, + "step": 13416 + }, + { + "epoch": 0.36839648544755627, + "grad_norm": 0.40342676639556885, + "learning_rate": 1.8391328765060234e-05, + "loss": 0.5728, + "step": 13417 + }, + { + "epoch": 0.3684239428885228, + "grad_norm": 0.36156249046325684, + "learning_rate": 1.839109383897265e-05, + "loss": 0.4264, + "step": 13418 + }, + { + "epoch": 0.3684514003294893, + "grad_norm": 0.37061867117881775, + "learning_rate": 1.8390858897233e-05, + "loss": 0.5392, + "step": 13419 + }, + { + "epoch": 0.3684788577704558, + "grad_norm": 0.33382901549339294, + "learning_rate": 1.8390623939841723e-05, + "loss": 0.4712, + "step": 13420 + }, + { + "epoch": 0.3685063152114223, + "grad_norm": 0.40484169125556946, + "learning_rate": 1.8390388966799252e-05, + "loss": 0.5934, + "step": 13421 + }, + { + "epoch": 0.3685337726523888, + "grad_norm": 0.36332035064697266, + "learning_rate": 1.8390153978106028e-05, + "loss": 0.5676, + "step": 13422 + }, + { + "epoch": 0.3685612300933553, + "grad_norm": 0.36634042859077454, + "learning_rate": 1.8389918973762487e-05, + "loss": 0.5966, + "step": 13423 + }, + { + "epoch": 0.3685886875343218, + "grad_norm": 0.4249381422996521, + "learning_rate": 1.8389683953769068e-05, + "loss": 0.5258, + "step": 13424 + }, + { + "epoch": 0.36861614497528833, + "grad_norm": 0.35385769605636597, + "learning_rate": 1.8389448918126217e-05, + "loss": 0.4605, + "step": 13425 + }, + { + "epoch": 0.3686436024162548, + "grad_norm": 0.3607059121131897, + "learning_rate": 1.838921386683436e-05, + "loss": 0.5206, + "step": 13426 + }, + { + "epoch": 0.3686710598572213, + "grad_norm": 0.40478041768074036, + "learning_rate": 1.8388978799893947e-05, + "loss": 0.4819, + "step": 13427 + }, + { + "epoch": 0.3686985172981878, + "grad_norm": 0.3853946924209595, + "learning_rate": 1.838874371730541e-05, + "loss": 0.5599, + "step": 13428 + }, + { + "epoch": 0.3687259747391543, + "grad_norm": 0.37743517756462097, + "learning_rate": 1.838850861906918e-05, + "loss": 0.4975, + "step": 13429 + }, + { + "epoch": 0.3687534321801208, + "grad_norm": 0.35615721344947815, + "learning_rate": 1.8388273505185714e-05, + "loss": 0.5103, + "step": 13430 + }, + { + "epoch": 0.3687808896210873, + "grad_norm": 0.3602025806903839, + "learning_rate": 1.838803837565544e-05, + "loss": 0.4604, + "step": 13431 + }, + { + "epoch": 0.36880834706205384, + "grad_norm": 0.37747034430503845, + "learning_rate": 1.8387803230478795e-05, + "loss": 0.4539, + "step": 13432 + }, + { + "epoch": 0.36883580450302034, + "grad_norm": 0.40808364748954773, + "learning_rate": 1.8387568069656218e-05, + "loss": 0.575, + "step": 13433 + }, + { + "epoch": 0.36886326194398683, + "grad_norm": 0.3677160441875458, + "learning_rate": 1.8387332893188152e-05, + "loss": 0.4999, + "step": 13434 + }, + { + "epoch": 0.3688907193849533, + "grad_norm": 0.3718004524707794, + "learning_rate": 1.8387097701075032e-05, + "loss": 0.5094, + "step": 13435 + }, + { + "epoch": 0.3689181768259198, + "grad_norm": 0.41247034072875977, + "learning_rate": 1.8386862493317302e-05, + "loss": 0.5522, + "step": 13436 + }, + { + "epoch": 0.3689456342668863, + "grad_norm": 0.36637231707572937, + "learning_rate": 1.8386627269915392e-05, + "loss": 0.5134, + "step": 13437 + }, + { + "epoch": 0.3689730917078528, + "grad_norm": 0.3613288104534149, + "learning_rate": 1.8386392030869745e-05, + "loss": 0.4786, + "step": 13438 + }, + { + "epoch": 0.36900054914881936, + "grad_norm": 0.43460720777511597, + "learning_rate": 1.8386156776180803e-05, + "loss": 0.5397, + "step": 13439 + }, + { + "epoch": 0.36902800658978585, + "grad_norm": 0.5147268772125244, + "learning_rate": 1.8385921505849e-05, + "loss": 0.4, + "step": 13440 + }, + { + "epoch": 0.36905546403075234, + "grad_norm": 0.9536510109901428, + "learning_rate": 1.838568621987478e-05, + "loss": 0.4629, + "step": 13441 + }, + { + "epoch": 0.36908292147171884, + "grad_norm": 0.4116055965423584, + "learning_rate": 1.8385450918258578e-05, + "loss": 0.5894, + "step": 13442 + }, + { + "epoch": 0.36911037891268533, + "grad_norm": 0.36125192046165466, + "learning_rate": 1.838521560100083e-05, + "loss": 0.5136, + "step": 13443 + }, + { + "epoch": 0.3691378363536518, + "grad_norm": 0.5452654361724854, + "learning_rate": 1.838498026810198e-05, + "loss": 0.4699, + "step": 13444 + }, + { + "epoch": 0.3691652937946183, + "grad_norm": 0.3885881304740906, + "learning_rate": 1.838474491956247e-05, + "loss": 0.5478, + "step": 13445 + }, + { + "epoch": 0.36919275123558487, + "grad_norm": 0.4133382737636566, + "learning_rate": 1.838450955538273e-05, + "loss": 0.5612, + "step": 13446 + }, + { + "epoch": 0.36922020867655136, + "grad_norm": 0.3920286297798157, + "learning_rate": 1.8384274175563206e-05, + "loss": 0.6014, + "step": 13447 + }, + { + "epoch": 0.36924766611751786, + "grad_norm": 0.35896822810173035, + "learning_rate": 1.8384038780104333e-05, + "loss": 0.5208, + "step": 13448 + }, + { + "epoch": 0.36927512355848435, + "grad_norm": 0.35007244348526, + "learning_rate": 1.8383803369006553e-05, + "loss": 0.4882, + "step": 13449 + }, + { + "epoch": 0.36930258099945085, + "grad_norm": 0.297490656375885, + "learning_rate": 1.83835679422703e-05, + "loss": 0.4041, + "step": 13450 + }, + { + "epoch": 0.36933003844041734, + "grad_norm": 0.5628771185874939, + "learning_rate": 1.8383332499896025e-05, + "loss": 0.4768, + "step": 13451 + }, + { + "epoch": 0.36935749588138383, + "grad_norm": 0.3211683928966522, + "learning_rate": 1.838309704188415e-05, + "loss": 0.515, + "step": 13452 + }, + { + "epoch": 0.3693849533223504, + "grad_norm": 0.3560747802257538, + "learning_rate": 1.8382861568235132e-05, + "loss": 0.4266, + "step": 13453 + }, + { + "epoch": 0.3694124107633169, + "grad_norm": 0.40635713934898376, + "learning_rate": 1.8382626078949397e-05, + "loss": 0.4771, + "step": 13454 + }, + { + "epoch": 0.3694398682042834, + "grad_norm": 0.731410562992096, + "learning_rate": 1.838239057402739e-05, + "loss": 0.5702, + "step": 13455 + }, + { + "epoch": 0.36946732564524987, + "grad_norm": 0.3759952783584595, + "learning_rate": 1.838215505346955e-05, + "loss": 0.5052, + "step": 13456 + }, + { + "epoch": 0.36949478308621636, + "grad_norm": 0.3417091965675354, + "learning_rate": 1.8381919517276315e-05, + "loss": 0.4519, + "step": 13457 + }, + { + "epoch": 0.36952224052718285, + "grad_norm": 0.32542070746421814, + "learning_rate": 1.8381683965448123e-05, + "loss": 0.4789, + "step": 13458 + }, + { + "epoch": 0.36954969796814935, + "grad_norm": 0.3838872015476227, + "learning_rate": 1.8381448397985418e-05, + "loss": 0.5371, + "step": 13459 + }, + { + "epoch": 0.36957715540911584, + "grad_norm": 0.39199793338775635, + "learning_rate": 1.8381212814888635e-05, + "loss": 0.5497, + "step": 13460 + }, + { + "epoch": 0.3696046128500824, + "grad_norm": 0.3589835464954376, + "learning_rate": 1.8380977216158217e-05, + "loss": 0.5077, + "step": 13461 + }, + { + "epoch": 0.3696320702910489, + "grad_norm": 0.3949224352836609, + "learning_rate": 1.83807416017946e-05, + "loss": 0.5287, + "step": 13462 + }, + { + "epoch": 0.3696595277320154, + "grad_norm": 0.35895246267318726, + "learning_rate": 1.8380505971798222e-05, + "loss": 0.4998, + "step": 13463 + }, + { + "epoch": 0.3696869851729819, + "grad_norm": 0.35966747999191284, + "learning_rate": 1.838027032616953e-05, + "loss": 0.49, + "step": 13464 + }, + { + "epoch": 0.36971444261394837, + "grad_norm": 0.3691622316837311, + "learning_rate": 1.8380034664908958e-05, + "loss": 0.483, + "step": 13465 + }, + { + "epoch": 0.36974190005491486, + "grad_norm": 0.34271135926246643, + "learning_rate": 1.8379798988016947e-05, + "loss": 0.4742, + "step": 13466 + }, + { + "epoch": 0.36976935749588136, + "grad_norm": 0.46871277689933777, + "learning_rate": 1.8379563295493937e-05, + "loss": 0.5121, + "step": 13467 + }, + { + "epoch": 0.3697968149368479, + "grad_norm": 0.34897077083587646, + "learning_rate": 1.8379327587340366e-05, + "loss": 0.4713, + "step": 13468 + }, + { + "epoch": 0.3698242723778144, + "grad_norm": 0.3653803765773773, + "learning_rate": 1.8379091863556677e-05, + "loss": 0.4804, + "step": 13469 + }, + { + "epoch": 0.3698517298187809, + "grad_norm": 0.35771897435188293, + "learning_rate": 1.8378856124143304e-05, + "loss": 0.4895, + "step": 13470 + }, + { + "epoch": 0.3698791872597474, + "grad_norm": 0.36172038316726685, + "learning_rate": 1.8378620369100694e-05, + "loss": 0.5288, + "step": 13471 + }, + { + "epoch": 0.3699066447007139, + "grad_norm": 0.3492182791233063, + "learning_rate": 1.837838459842928e-05, + "loss": 0.544, + "step": 13472 + }, + { + "epoch": 0.3699341021416804, + "grad_norm": 0.3304196000099182, + "learning_rate": 1.8378148812129502e-05, + "loss": 0.4002, + "step": 13473 + }, + { + "epoch": 0.36996155958264687, + "grad_norm": 0.43680962920188904, + "learning_rate": 1.8377913010201805e-05, + "loss": 0.5018, + "step": 13474 + }, + { + "epoch": 0.3699890170236134, + "grad_norm": 0.3614024817943573, + "learning_rate": 1.8377677192646625e-05, + "loss": 0.4945, + "step": 13475 + }, + { + "epoch": 0.3700164744645799, + "grad_norm": 0.3626757562160492, + "learning_rate": 1.8377441359464408e-05, + "loss": 0.5217, + "step": 13476 + }, + { + "epoch": 0.3700439319055464, + "grad_norm": 0.38755473494529724, + "learning_rate": 1.8377205510655586e-05, + "loss": 0.543, + "step": 13477 + }, + { + "epoch": 0.3700713893465129, + "grad_norm": 0.3552868366241455, + "learning_rate": 1.8376969646220598e-05, + "loss": 0.4732, + "step": 13478 + }, + { + "epoch": 0.3700988467874794, + "grad_norm": 0.3445885181427002, + "learning_rate": 1.8376733766159895e-05, + "loss": 0.4834, + "step": 13479 + }, + { + "epoch": 0.3701263042284459, + "grad_norm": 0.3257645070552826, + "learning_rate": 1.8376497870473908e-05, + "loss": 0.4803, + "step": 13480 + }, + { + "epoch": 0.3701537616694124, + "grad_norm": 0.5585044026374817, + "learning_rate": 1.8376261959163076e-05, + "loss": 0.5694, + "step": 13481 + }, + { + "epoch": 0.37018121911037893, + "grad_norm": 0.40360313653945923, + "learning_rate": 1.8376026032227845e-05, + "loss": 0.5159, + "step": 13482 + }, + { + "epoch": 0.37020867655134543, + "grad_norm": 0.3324277400970459, + "learning_rate": 1.837579008966865e-05, + "loss": 0.551, + "step": 13483 + }, + { + "epoch": 0.3702361339923119, + "grad_norm": 0.489388108253479, + "learning_rate": 1.837555413148594e-05, + "loss": 0.519, + "step": 13484 + }, + { + "epoch": 0.3702635914332784, + "grad_norm": 0.35615482926368713, + "learning_rate": 1.837531815768014e-05, + "loss": 0.4934, + "step": 13485 + }, + { + "epoch": 0.3702910488742449, + "grad_norm": 0.42173638939857483, + "learning_rate": 1.83750821682517e-05, + "loss": 0.504, + "step": 13486 + }, + { + "epoch": 0.3703185063152114, + "grad_norm": 0.3315696716308594, + "learning_rate": 1.837484616320106e-05, + "loss": 0.4793, + "step": 13487 + }, + { + "epoch": 0.3703459637561779, + "grad_norm": 0.34786510467529297, + "learning_rate": 1.8374610142528658e-05, + "loss": 0.4626, + "step": 13488 + }, + { + "epoch": 0.37037342119714445, + "grad_norm": 0.39985573291778564, + "learning_rate": 1.8374374106234937e-05, + "loss": 0.5236, + "step": 13489 + }, + { + "epoch": 0.37040087863811094, + "grad_norm": 0.3681473135948181, + "learning_rate": 1.8374138054320333e-05, + "loss": 0.4869, + "step": 13490 + }, + { + "epoch": 0.37042833607907744, + "grad_norm": 0.40194016695022583, + "learning_rate": 1.8373901986785292e-05, + "loss": 0.5491, + "step": 13491 + }, + { + "epoch": 0.37045579352004393, + "grad_norm": 0.4003845155239105, + "learning_rate": 1.837366590363025e-05, + "loss": 0.6287, + "step": 13492 + }, + { + "epoch": 0.3704832509610104, + "grad_norm": 0.3771802484989166, + "learning_rate": 1.837342980485565e-05, + "loss": 0.5069, + "step": 13493 + }, + { + "epoch": 0.3705107084019769, + "grad_norm": 0.3755689263343811, + "learning_rate": 1.837319369046193e-05, + "loss": 0.51, + "step": 13494 + }, + { + "epoch": 0.3705381658429434, + "grad_norm": 0.393771767616272, + "learning_rate": 1.837295756044953e-05, + "loss": 0.6057, + "step": 13495 + }, + { + "epoch": 0.37056562328390996, + "grad_norm": 0.32857391238212585, + "learning_rate": 1.837272141481889e-05, + "loss": 0.5052, + "step": 13496 + }, + { + "epoch": 0.37059308072487646, + "grad_norm": 0.38793322443962097, + "learning_rate": 1.8372485253570456e-05, + "loss": 0.4669, + "step": 13497 + }, + { + "epoch": 0.37062053816584295, + "grad_norm": 0.3856356143951416, + "learning_rate": 1.8372249076704665e-05, + "loss": 0.6104, + "step": 13498 + }, + { + "epoch": 0.37064799560680944, + "grad_norm": 0.3609086275100708, + "learning_rate": 1.8372012884221955e-05, + "loss": 0.5664, + "step": 13499 + }, + { + "epoch": 0.37067545304777594, + "grad_norm": 0.35234951972961426, + "learning_rate": 1.837177667612277e-05, + "loss": 0.5655, + "step": 13500 + }, + { + "epoch": 0.37070291048874243, + "grad_norm": 0.42199960350990295, + "learning_rate": 1.837154045240755e-05, + "loss": 0.453, + "step": 13501 + }, + { + "epoch": 0.3707303679297089, + "grad_norm": 0.33890193700790405, + "learning_rate": 1.8371304213076734e-05, + "loss": 0.4537, + "step": 13502 + }, + { + "epoch": 0.3707578253706755, + "grad_norm": 0.3710574805736542, + "learning_rate": 1.8371067958130765e-05, + "loss": 0.494, + "step": 13503 + }, + { + "epoch": 0.37078528281164197, + "grad_norm": 0.3753916621208191, + "learning_rate": 1.837083168757008e-05, + "loss": 0.527, + "step": 13504 + }, + { + "epoch": 0.37081274025260846, + "grad_norm": 0.39860716462135315, + "learning_rate": 1.8370595401395124e-05, + "loss": 0.5814, + "step": 13505 + }, + { + "epoch": 0.37084019769357496, + "grad_norm": 0.32664522528648376, + "learning_rate": 1.8370359099606335e-05, + "loss": 0.4786, + "step": 13506 + }, + { + "epoch": 0.37086765513454145, + "grad_norm": 0.33673277497291565, + "learning_rate": 1.8370122782204158e-05, + "loss": 0.5172, + "step": 13507 + }, + { + "epoch": 0.37089511257550795, + "grad_norm": 0.4607031047344208, + "learning_rate": 1.8369886449189026e-05, + "loss": 0.5134, + "step": 13508 + }, + { + "epoch": 0.37092257001647444, + "grad_norm": 0.31031668186187744, + "learning_rate": 1.836965010056139e-05, + "loss": 0.426, + "step": 13509 + }, + { + "epoch": 0.370950027457441, + "grad_norm": 0.5600857734680176, + "learning_rate": 1.8369413736321678e-05, + "loss": 0.552, + "step": 13510 + }, + { + "epoch": 0.3709774848984075, + "grad_norm": 0.4069803059101105, + "learning_rate": 1.8369177356470344e-05, + "loss": 0.537, + "step": 13511 + }, + { + "epoch": 0.371004942339374, + "grad_norm": 0.3489322066307068, + "learning_rate": 1.8368940961007823e-05, + "loss": 0.4271, + "step": 13512 + }, + { + "epoch": 0.37103239978034047, + "grad_norm": 0.41022416949272156, + "learning_rate": 1.8368704549934552e-05, + "loss": 0.5953, + "step": 13513 + }, + { + "epoch": 0.37105985722130697, + "grad_norm": 0.43324166536331177, + "learning_rate": 1.836846812325098e-05, + "loss": 0.5222, + "step": 13514 + }, + { + "epoch": 0.37108731466227346, + "grad_norm": 0.3590271472930908, + "learning_rate": 1.836823168095754e-05, + "loss": 0.5132, + "step": 13515 + }, + { + "epoch": 0.37111477210323995, + "grad_norm": 0.33972927927970886, + "learning_rate": 1.8367995223054682e-05, + "loss": 0.4821, + "step": 13516 + }, + { + "epoch": 0.3711422295442065, + "grad_norm": 0.4470710754394531, + "learning_rate": 1.8367758749542842e-05, + "loss": 0.5411, + "step": 13517 + }, + { + "epoch": 0.371169686985173, + "grad_norm": 0.35545089840888977, + "learning_rate": 1.8367522260422458e-05, + "loss": 0.4309, + "step": 13518 + }, + { + "epoch": 0.3711971444261395, + "grad_norm": 0.36455854773521423, + "learning_rate": 1.8367285755693975e-05, + "loss": 0.535, + "step": 13519 + }, + { + "epoch": 0.371224601867106, + "grad_norm": 0.3926265835762024, + "learning_rate": 1.836704923535783e-05, + "loss": 0.4792, + "step": 13520 + }, + { + "epoch": 0.3712520593080725, + "grad_norm": 0.3918059766292572, + "learning_rate": 1.8366812699414476e-05, + "loss": 0.4376, + "step": 13521 + }, + { + "epoch": 0.371279516749039, + "grad_norm": 0.33682382106781006, + "learning_rate": 1.836657614786434e-05, + "loss": 0.4591, + "step": 13522 + }, + { + "epoch": 0.37130697419000547, + "grad_norm": 0.6314610838890076, + "learning_rate": 1.8366339580707873e-05, + "loss": 0.4806, + "step": 13523 + }, + { + "epoch": 0.371334431630972, + "grad_norm": 0.39139148592948914, + "learning_rate": 1.836610299794551e-05, + "loss": 0.4873, + "step": 13524 + }, + { + "epoch": 0.3713618890719385, + "grad_norm": 0.3173617422580719, + "learning_rate": 1.8365866399577693e-05, + "loss": 0.5543, + "step": 13525 + }, + { + "epoch": 0.371389346512905, + "grad_norm": 0.36914023756980896, + "learning_rate": 1.836562978560487e-05, + "loss": 0.5502, + "step": 13526 + }, + { + "epoch": 0.3714168039538715, + "grad_norm": 0.4496302306652069, + "learning_rate": 1.8365393156027473e-05, + "loss": 0.5325, + "step": 13527 + }, + { + "epoch": 0.371444261394838, + "grad_norm": 0.3897566795349121, + "learning_rate": 1.8365156510845948e-05, + "loss": 0.4833, + "step": 13528 + }, + { + "epoch": 0.3714717188358045, + "grad_norm": 0.4587305188179016, + "learning_rate": 1.8364919850060737e-05, + "loss": 0.5332, + "step": 13529 + }, + { + "epoch": 0.371499176276771, + "grad_norm": 0.37378132343292236, + "learning_rate": 1.8364683173672282e-05, + "loss": 0.4551, + "step": 13530 + }, + { + "epoch": 0.37152663371773753, + "grad_norm": 0.3466116189956665, + "learning_rate": 1.8364446481681022e-05, + "loss": 0.4896, + "step": 13531 + }, + { + "epoch": 0.371554091158704, + "grad_norm": 0.3967624604701996, + "learning_rate": 1.8364209774087402e-05, + "loss": 0.5253, + "step": 13532 + }, + { + "epoch": 0.3715815485996705, + "grad_norm": 0.6458766460418701, + "learning_rate": 1.836397305089186e-05, + "loss": 0.5823, + "step": 13533 + }, + { + "epoch": 0.371609006040637, + "grad_norm": 0.3637692332267761, + "learning_rate": 1.8363736312094836e-05, + "loss": 0.5276, + "step": 13534 + }, + { + "epoch": 0.3716364634816035, + "grad_norm": 0.3686804175376892, + "learning_rate": 1.8363499557696777e-05, + "loss": 0.5979, + "step": 13535 + }, + { + "epoch": 0.37166392092257, + "grad_norm": 0.5284117460250854, + "learning_rate": 1.8363262787698124e-05, + "loss": 0.5848, + "step": 13536 + }, + { + "epoch": 0.3716913783635365, + "grad_norm": 0.365894079208374, + "learning_rate": 1.8363026002099315e-05, + "loss": 0.4886, + "step": 13537 + }, + { + "epoch": 0.37171883580450304, + "grad_norm": 0.3758604824542999, + "learning_rate": 1.836278920090079e-05, + "loss": 0.5221, + "step": 13538 + }, + { + "epoch": 0.37174629324546954, + "grad_norm": 0.36317819356918335, + "learning_rate": 1.8362552384102995e-05, + "loss": 0.5065, + "step": 13539 + }, + { + "epoch": 0.37177375068643603, + "grad_norm": 0.365349680185318, + "learning_rate": 1.8362315551706372e-05, + "loss": 0.5459, + "step": 13540 + }, + { + "epoch": 0.3718012081274025, + "grad_norm": 0.4305405616760254, + "learning_rate": 1.8362078703711366e-05, + "loss": 0.5543, + "step": 13541 + }, + { + "epoch": 0.371828665568369, + "grad_norm": 0.37639522552490234, + "learning_rate": 1.8361841840118407e-05, + "loss": 0.5227, + "step": 13542 + }, + { + "epoch": 0.3718561230093355, + "grad_norm": 0.4040382206439972, + "learning_rate": 1.836160496092795e-05, + "loss": 0.6356, + "step": 13543 + }, + { + "epoch": 0.371883580450302, + "grad_norm": 0.3789028823375702, + "learning_rate": 1.8361368066140428e-05, + "loss": 0.5378, + "step": 13544 + }, + { + "epoch": 0.37191103789126856, + "grad_norm": 0.3810126483440399, + "learning_rate": 1.8361131155756285e-05, + "loss": 0.4782, + "step": 13545 + }, + { + "epoch": 0.37193849533223505, + "grad_norm": 0.5209364295005798, + "learning_rate": 1.836089422977597e-05, + "loss": 0.5124, + "step": 13546 + }, + { + "epoch": 0.37196595277320155, + "grad_norm": 0.331025630235672, + "learning_rate": 1.8360657288199912e-05, + "loss": 0.5262, + "step": 13547 + }, + { + "epoch": 0.37199341021416804, + "grad_norm": 0.5741205811500549, + "learning_rate": 1.8360420331028563e-05, + "loss": 0.5602, + "step": 13548 + }, + { + "epoch": 0.37202086765513454, + "grad_norm": 0.36510753631591797, + "learning_rate": 1.836018335826236e-05, + "loss": 0.5406, + "step": 13549 + }, + { + "epoch": 0.37204832509610103, + "grad_norm": 0.5135376453399658, + "learning_rate": 1.8359946369901744e-05, + "loss": 0.5116, + "step": 13550 + }, + { + "epoch": 0.3720757825370675, + "grad_norm": 0.4119746685028076, + "learning_rate": 1.8359709365947166e-05, + "loss": 0.494, + "step": 13551 + }, + { + "epoch": 0.3721032399780341, + "grad_norm": 0.3640202581882477, + "learning_rate": 1.835947234639906e-05, + "loss": 0.5645, + "step": 13552 + }, + { + "epoch": 0.37213069741900057, + "grad_norm": 0.3495558202266693, + "learning_rate": 1.8359235311257867e-05, + "loss": 0.5596, + "step": 13553 + }, + { + "epoch": 0.37215815485996706, + "grad_norm": 0.4443800449371338, + "learning_rate": 1.8358998260524034e-05, + "loss": 0.4834, + "step": 13554 + }, + { + "epoch": 0.37218561230093355, + "grad_norm": 0.3818444013595581, + "learning_rate": 1.8358761194198e-05, + "loss": 0.4913, + "step": 13555 + }, + { + "epoch": 0.37221306974190005, + "grad_norm": 0.344837486743927, + "learning_rate": 1.835852411228021e-05, + "loss": 0.5345, + "step": 13556 + }, + { + "epoch": 0.37224052718286654, + "grad_norm": 0.3685751259326935, + "learning_rate": 1.8358287014771107e-05, + "loss": 0.5384, + "step": 13557 + }, + { + "epoch": 0.37226798462383304, + "grad_norm": 0.33291080594062805, + "learning_rate": 1.835804990167113e-05, + "loss": 0.4715, + "step": 13558 + }, + { + "epoch": 0.3722954420647996, + "grad_norm": 0.4233565032482147, + "learning_rate": 1.835781277298072e-05, + "loss": 0.5745, + "step": 13559 + }, + { + "epoch": 0.3723228995057661, + "grad_norm": 0.37394723296165466, + "learning_rate": 1.8357575628700325e-05, + "loss": 0.5363, + "step": 13560 + }, + { + "epoch": 0.3723503569467326, + "grad_norm": 0.3805095851421356, + "learning_rate": 1.835733846883038e-05, + "loss": 0.4837, + "step": 13561 + }, + { + "epoch": 0.37237781438769907, + "grad_norm": 0.3747875392436981, + "learning_rate": 1.8357101293371332e-05, + "loss": 0.4738, + "step": 13562 + }, + { + "epoch": 0.37240527182866556, + "grad_norm": 0.3979951739311218, + "learning_rate": 1.835686410232363e-05, + "loss": 0.4797, + "step": 13563 + }, + { + "epoch": 0.37243272926963206, + "grad_norm": 0.35897505283355713, + "learning_rate": 1.8356626895687703e-05, + "loss": 0.4499, + "step": 13564 + }, + { + "epoch": 0.37246018671059855, + "grad_norm": 0.32801464200019836, + "learning_rate": 1.8356389673464e-05, + "loss": 0.494, + "step": 13565 + }, + { + "epoch": 0.3724876441515651, + "grad_norm": 0.3646252453327179, + "learning_rate": 1.8356152435652962e-05, + "loss": 0.503, + "step": 13566 + }, + { + "epoch": 0.3725151015925316, + "grad_norm": 0.34575527906417847, + "learning_rate": 1.8355915182255035e-05, + "loss": 0.4632, + "step": 13567 + }, + { + "epoch": 0.3725425590334981, + "grad_norm": 0.35846036672592163, + "learning_rate": 1.8355677913270658e-05, + "loss": 0.5131, + "step": 13568 + }, + { + "epoch": 0.3725700164744646, + "grad_norm": 0.3653334975242615, + "learning_rate": 1.8355440628700278e-05, + "loss": 0.5606, + "step": 13569 + }, + { + "epoch": 0.3725974739154311, + "grad_norm": 0.36575305461883545, + "learning_rate": 1.835520332854433e-05, + "loss": 0.4491, + "step": 13570 + }, + { + "epoch": 0.37262493135639757, + "grad_norm": 0.3377826511859894, + "learning_rate": 1.8354966012803262e-05, + "loss": 0.453, + "step": 13571 + }, + { + "epoch": 0.37265238879736406, + "grad_norm": 0.4060770273208618, + "learning_rate": 1.8354728681477517e-05, + "loss": 0.6031, + "step": 13572 + }, + { + "epoch": 0.3726798462383306, + "grad_norm": 0.45115557312965393, + "learning_rate": 1.8354491334567535e-05, + "loss": 0.5014, + "step": 13573 + }, + { + "epoch": 0.3727073036792971, + "grad_norm": 0.34351640939712524, + "learning_rate": 1.8354253972073763e-05, + "loss": 0.456, + "step": 13574 + }, + { + "epoch": 0.3727347611202636, + "grad_norm": 0.3583424985408783, + "learning_rate": 1.835401659399664e-05, + "loss": 0.4888, + "step": 13575 + }, + { + "epoch": 0.3727622185612301, + "grad_norm": 0.41385048627853394, + "learning_rate": 1.8353779200336607e-05, + "loss": 0.545, + "step": 13576 + }, + { + "epoch": 0.3727896760021966, + "grad_norm": 0.44504550099372864, + "learning_rate": 1.8353541791094114e-05, + "loss": 0.5236, + "step": 13577 + }, + { + "epoch": 0.3728171334431631, + "grad_norm": 0.3668714761734009, + "learning_rate": 1.8353304366269596e-05, + "loss": 0.4663, + "step": 13578 + }, + { + "epoch": 0.3728445908841296, + "grad_norm": 0.3718351423740387, + "learning_rate": 1.8353066925863504e-05, + "loss": 0.5264, + "step": 13579 + }, + { + "epoch": 0.37287204832509613, + "grad_norm": 0.3916780650615692, + "learning_rate": 1.8352829469876268e-05, + "loss": 0.4821, + "step": 13580 + }, + { + "epoch": 0.3728995057660626, + "grad_norm": 0.36601608991622925, + "learning_rate": 1.8352591998308346e-05, + "loss": 0.5395, + "step": 13581 + }, + { + "epoch": 0.3729269632070291, + "grad_norm": 0.3421332836151123, + "learning_rate": 1.8352354511160174e-05, + "loss": 0.458, + "step": 13582 + }, + { + "epoch": 0.3729544206479956, + "grad_norm": 0.3327290713787079, + "learning_rate": 1.8352117008432193e-05, + "loss": 0.4733, + "step": 13583 + }, + { + "epoch": 0.3729818780889621, + "grad_norm": 0.3519177734851837, + "learning_rate": 1.8351879490124846e-05, + "loss": 0.5193, + "step": 13584 + }, + { + "epoch": 0.3730093355299286, + "grad_norm": 0.42011547088623047, + "learning_rate": 1.835164195623858e-05, + "loss": 0.6162, + "step": 13585 + }, + { + "epoch": 0.3730367929708951, + "grad_norm": 0.35859423875808716, + "learning_rate": 1.8351404406773837e-05, + "loss": 0.5022, + "step": 13586 + }, + { + "epoch": 0.37306425041186164, + "grad_norm": 0.38481977581977844, + "learning_rate": 1.835116684173106e-05, + "loss": 0.5267, + "step": 13587 + }, + { + "epoch": 0.37309170785282814, + "grad_norm": 0.44922056794166565, + "learning_rate": 1.835092926111069e-05, + "loss": 0.5257, + "step": 13588 + }, + { + "epoch": 0.37311916529379463, + "grad_norm": 0.43255722522735596, + "learning_rate": 1.835069166491317e-05, + "loss": 0.52, + "step": 13589 + }, + { + "epoch": 0.3731466227347611, + "grad_norm": 0.3337383270263672, + "learning_rate": 1.8350454053138945e-05, + "loss": 0.4295, + "step": 13590 + }, + { + "epoch": 0.3731740801757276, + "grad_norm": 0.4449613392353058, + "learning_rate": 1.8350216425788463e-05, + "loss": 0.5014, + "step": 13591 + }, + { + "epoch": 0.3732015376166941, + "grad_norm": 0.38105249404907227, + "learning_rate": 1.834997878286216e-05, + "loss": 0.4788, + "step": 13592 + }, + { + "epoch": 0.3732289950576606, + "grad_norm": 0.37735384702682495, + "learning_rate": 1.834974112436048e-05, + "loss": 0.5306, + "step": 13593 + }, + { + "epoch": 0.3732564524986271, + "grad_norm": 0.3525785505771637, + "learning_rate": 1.8349503450283866e-05, + "loss": 0.506, + "step": 13594 + }, + { + "epoch": 0.37328390993959365, + "grad_norm": 0.42406898736953735, + "learning_rate": 1.834926576063277e-05, + "loss": 0.5199, + "step": 13595 + }, + { + "epoch": 0.37331136738056014, + "grad_norm": 0.34691059589385986, + "learning_rate": 1.8349028055407623e-05, + "loss": 0.4972, + "step": 13596 + }, + { + "epoch": 0.37333882482152664, + "grad_norm": 0.3701878488063812, + "learning_rate": 1.8348790334608876e-05, + "loss": 0.5093, + "step": 13597 + }, + { + "epoch": 0.37336628226249313, + "grad_norm": 0.3630932867527008, + "learning_rate": 1.834855259823697e-05, + "loss": 0.4475, + "step": 13598 + }, + { + "epoch": 0.3733937397034596, + "grad_norm": 0.42347002029418945, + "learning_rate": 1.8348314846292346e-05, + "loss": 0.5745, + "step": 13599 + }, + { + "epoch": 0.3734211971444261, + "grad_norm": 0.3519357442855835, + "learning_rate": 1.834807707877545e-05, + "loss": 0.4855, + "step": 13600 + }, + { + "epoch": 0.3734486545853926, + "grad_norm": 0.34048527479171753, + "learning_rate": 1.834783929568673e-05, + "loss": 0.4465, + "step": 13601 + }, + { + "epoch": 0.37347611202635916, + "grad_norm": 0.4002440273761749, + "learning_rate": 1.8347601497026627e-05, + "loss": 0.5746, + "step": 13602 + }, + { + "epoch": 0.37350356946732566, + "grad_norm": 0.4054774045944214, + "learning_rate": 1.834736368279558e-05, + "loss": 0.4422, + "step": 13603 + }, + { + "epoch": 0.37353102690829215, + "grad_norm": 0.3621819019317627, + "learning_rate": 1.8347125852994034e-05, + "loss": 0.4875, + "step": 13604 + }, + { + "epoch": 0.37355848434925865, + "grad_norm": 0.3768141269683838, + "learning_rate": 1.8346888007622435e-05, + "loss": 0.5672, + "step": 13605 + }, + { + "epoch": 0.37358594179022514, + "grad_norm": 0.3424219489097595, + "learning_rate": 1.834665014668123e-05, + "loss": 0.4903, + "step": 13606 + }, + { + "epoch": 0.37361339923119163, + "grad_norm": 0.39286351203918457, + "learning_rate": 1.8346412270170853e-05, + "loss": 0.576, + "step": 13607 + }, + { + "epoch": 0.37364085667215813, + "grad_norm": 0.4168302118778229, + "learning_rate": 1.8346174378091756e-05, + "loss": 0.5432, + "step": 13608 + }, + { + "epoch": 0.3736683141131247, + "grad_norm": 0.38480257987976074, + "learning_rate": 1.834593647044438e-05, + "loss": 0.5046, + "step": 13609 + }, + { + "epoch": 0.37369577155409117, + "grad_norm": 0.46783673763275146, + "learning_rate": 1.8345698547229167e-05, + "loss": 0.5493, + "step": 13610 + }, + { + "epoch": 0.37372322899505767, + "grad_norm": 0.39057886600494385, + "learning_rate": 1.8345460608446564e-05, + "loss": 0.501, + "step": 13611 + }, + { + "epoch": 0.37375068643602416, + "grad_norm": 0.3717291057109833, + "learning_rate": 1.834522265409701e-05, + "loss": 0.4769, + "step": 13612 + }, + { + "epoch": 0.37377814387699065, + "grad_norm": 0.37747645378112793, + "learning_rate": 1.834498468418096e-05, + "loss": 0.5474, + "step": 13613 + }, + { + "epoch": 0.37380560131795715, + "grad_norm": 0.33693552017211914, + "learning_rate": 1.8344746698698843e-05, + "loss": 0.5204, + "step": 13614 + }, + { + "epoch": 0.37383305875892364, + "grad_norm": 0.40179744362831116, + "learning_rate": 1.8344508697651117e-05, + "loss": 0.5916, + "step": 13615 + }, + { + "epoch": 0.3738605161998902, + "grad_norm": 0.3203463554382324, + "learning_rate": 1.834427068103821e-05, + "loss": 0.5132, + "step": 13616 + }, + { + "epoch": 0.3738879736408567, + "grad_norm": 0.34138023853302, + "learning_rate": 1.834403264886058e-05, + "loss": 0.5146, + "step": 13617 + }, + { + "epoch": 0.3739154310818232, + "grad_norm": 0.36731094121932983, + "learning_rate": 1.8343794601118668e-05, + "loss": 0.5548, + "step": 13618 + }, + { + "epoch": 0.3739428885227897, + "grad_norm": 0.40143483877182007, + "learning_rate": 1.8343556537812916e-05, + "loss": 0.4933, + "step": 13619 + }, + { + "epoch": 0.37397034596375617, + "grad_norm": 0.402612566947937, + "learning_rate": 1.8343318458943762e-05, + "loss": 0.6493, + "step": 13620 + }, + { + "epoch": 0.37399780340472266, + "grad_norm": 0.35615062713623047, + "learning_rate": 1.834308036451166e-05, + "loss": 0.4543, + "step": 13621 + }, + { + "epoch": 0.37402526084568916, + "grad_norm": 0.3366524279117584, + "learning_rate": 1.834284225451705e-05, + "loss": 0.4641, + "step": 13622 + }, + { + "epoch": 0.3740527182866557, + "grad_norm": 0.3823081851005554, + "learning_rate": 1.834260412896038e-05, + "loss": 0.5218, + "step": 13623 + }, + { + "epoch": 0.3740801757276222, + "grad_norm": 0.601913571357727, + "learning_rate": 1.8342365987842085e-05, + "loss": 0.5449, + "step": 13624 + }, + { + "epoch": 0.3741076331685887, + "grad_norm": 0.5608243942260742, + "learning_rate": 1.834212783116262e-05, + "loss": 0.5353, + "step": 13625 + }, + { + "epoch": 0.3741350906095552, + "grad_norm": 0.3408958613872528, + "learning_rate": 1.834188965892242e-05, + "loss": 0.5415, + "step": 13626 + }, + { + "epoch": 0.3741625480505217, + "grad_norm": 0.600914478302002, + "learning_rate": 1.8341651471121935e-05, + "loss": 0.5485, + "step": 13627 + }, + { + "epoch": 0.3741900054914882, + "grad_norm": 0.3909434974193573, + "learning_rate": 1.8341413267761608e-05, + "loss": 0.5724, + "step": 13628 + }, + { + "epoch": 0.37421746293245467, + "grad_norm": 0.3877597451210022, + "learning_rate": 1.8341175048841883e-05, + "loss": 0.5566, + "step": 13629 + }, + { + "epoch": 0.3742449203734212, + "grad_norm": 0.3789316415786743, + "learning_rate": 1.8340936814363203e-05, + "loss": 0.5659, + "step": 13630 + }, + { + "epoch": 0.3742723778143877, + "grad_norm": 0.4058796465396881, + "learning_rate": 1.8340698564326014e-05, + "loss": 0.573, + "step": 13631 + }, + { + "epoch": 0.3742998352553542, + "grad_norm": 0.3654789328575134, + "learning_rate": 1.834046029873076e-05, + "loss": 0.5153, + "step": 13632 + }, + { + "epoch": 0.3743272926963207, + "grad_norm": 0.3973502218723297, + "learning_rate": 1.8340222017577886e-05, + "loss": 0.6001, + "step": 13633 + }, + { + "epoch": 0.3743547501372872, + "grad_norm": 0.4984470009803772, + "learning_rate": 1.8339983720867834e-05, + "loss": 0.5084, + "step": 13634 + }, + { + "epoch": 0.3743822075782537, + "grad_norm": 0.33285006880760193, + "learning_rate": 1.8339745408601057e-05, + "loss": 0.4862, + "step": 13635 + }, + { + "epoch": 0.3744096650192202, + "grad_norm": 0.4968203008174896, + "learning_rate": 1.8339507080777987e-05, + "loss": 0.567, + "step": 13636 + }, + { + "epoch": 0.37443712246018673, + "grad_norm": 0.3765037953853607, + "learning_rate": 1.8339268737399076e-05, + "loss": 0.5417, + "step": 13637 + }, + { + "epoch": 0.3744645799011532, + "grad_norm": 0.4549657106399536, + "learning_rate": 1.8339030378464767e-05, + "loss": 0.5379, + "step": 13638 + }, + { + "epoch": 0.3744920373421197, + "grad_norm": 0.3607883155345917, + "learning_rate": 1.8338792003975508e-05, + "loss": 0.428, + "step": 13639 + }, + { + "epoch": 0.3745194947830862, + "grad_norm": 0.42920708656311035, + "learning_rate": 1.833855361393174e-05, + "loss": 0.5118, + "step": 13640 + }, + { + "epoch": 0.3745469522240527, + "grad_norm": 0.3624459207057953, + "learning_rate": 1.8338315208333904e-05, + "loss": 0.4847, + "step": 13641 + }, + { + "epoch": 0.3745744096650192, + "grad_norm": 0.351351261138916, + "learning_rate": 1.833807678718245e-05, + "loss": 0.5052, + "step": 13642 + }, + { + "epoch": 0.3746018671059857, + "grad_norm": 0.3489241600036621, + "learning_rate": 1.8337838350477822e-05, + "loss": 0.4668, + "step": 13643 + }, + { + "epoch": 0.37462932454695225, + "grad_norm": 0.35177093744277954, + "learning_rate": 1.8337599898220466e-05, + "loss": 0.4491, + "step": 13644 + }, + { + "epoch": 0.37465678198791874, + "grad_norm": 0.42894965410232544, + "learning_rate": 1.8337361430410822e-05, + "loss": 0.5328, + "step": 13645 + }, + { + "epoch": 0.37468423942888524, + "grad_norm": 1.0149849653244019, + "learning_rate": 1.8337122947049342e-05, + "loss": 0.5272, + "step": 13646 + }, + { + "epoch": 0.37471169686985173, + "grad_norm": 1.2288599014282227, + "learning_rate": 1.8336884448136466e-05, + "loss": 0.5258, + "step": 13647 + }, + { + "epoch": 0.3747391543108182, + "grad_norm": 0.3333112299442291, + "learning_rate": 1.8336645933672637e-05, + "loss": 0.5264, + "step": 13648 + }, + { + "epoch": 0.3747666117517847, + "grad_norm": 0.35462716221809387, + "learning_rate": 1.8336407403658307e-05, + "loss": 0.5786, + "step": 13649 + }, + { + "epoch": 0.3747940691927512, + "grad_norm": 0.35489848256111145, + "learning_rate": 1.833616885809391e-05, + "loss": 0.4705, + "step": 13650 + }, + { + "epoch": 0.37482152663371776, + "grad_norm": 0.33283761143684387, + "learning_rate": 1.8335930296979903e-05, + "loss": 0.4588, + "step": 13651 + }, + { + "epoch": 0.37484898407468425, + "grad_norm": 0.3648979067802429, + "learning_rate": 1.8335691720316723e-05, + "loss": 0.497, + "step": 13652 + }, + { + "epoch": 0.37487644151565075, + "grad_norm": 0.4289059340953827, + "learning_rate": 1.8335453128104818e-05, + "loss": 0.5956, + "step": 13653 + }, + { + "epoch": 0.37490389895661724, + "grad_norm": 0.40145841240882874, + "learning_rate": 1.8335214520344634e-05, + "loss": 0.6205, + "step": 13654 + }, + { + "epoch": 0.37493135639758374, + "grad_norm": 0.3786064088344574, + "learning_rate": 1.833497589703661e-05, + "loss": 0.5545, + "step": 13655 + }, + { + "epoch": 0.37495881383855023, + "grad_norm": 0.38580256700515747, + "learning_rate": 1.8334737258181203e-05, + "loss": 0.5043, + "step": 13656 + }, + { + "epoch": 0.3749862712795167, + "grad_norm": 0.39813482761383057, + "learning_rate": 1.8334498603778844e-05, + "loss": 0.5518, + "step": 13657 + }, + { + "epoch": 0.3750137287204833, + "grad_norm": 0.5074609518051147, + "learning_rate": 1.8334259933829988e-05, + "loss": 0.5085, + "step": 13658 + }, + { + "epoch": 0.37504118616144977, + "grad_norm": 0.3537602424621582, + "learning_rate": 1.8334021248335076e-05, + "loss": 0.5276, + "step": 13659 + }, + { + "epoch": 0.37506864360241626, + "grad_norm": 0.3575875461101532, + "learning_rate": 1.8333782547294555e-05, + "loss": 0.5754, + "step": 13660 + }, + { + "epoch": 0.37509610104338276, + "grad_norm": 0.37960201501846313, + "learning_rate": 1.833354383070887e-05, + "loss": 0.4578, + "step": 13661 + }, + { + "epoch": 0.37512355848434925, + "grad_norm": 0.35247349739074707, + "learning_rate": 1.833330509857847e-05, + "loss": 0.4847, + "step": 13662 + }, + { + "epoch": 0.37515101592531575, + "grad_norm": 0.3333853483200073, + "learning_rate": 1.833306635090379e-05, + "loss": 0.4591, + "step": 13663 + }, + { + "epoch": 0.37517847336628224, + "grad_norm": 0.3834823966026306, + "learning_rate": 1.8332827587685288e-05, + "loss": 0.4895, + "step": 13664 + }, + { + "epoch": 0.3752059308072488, + "grad_norm": 0.4257274866104126, + "learning_rate": 1.83325888089234e-05, + "loss": 0.5602, + "step": 13665 + }, + { + "epoch": 0.3752333882482153, + "grad_norm": 0.37803417444229126, + "learning_rate": 1.8332350014618573e-05, + "loss": 0.5855, + "step": 13666 + }, + { + "epoch": 0.3752608456891818, + "grad_norm": 0.35939860343933105, + "learning_rate": 1.8332111204771252e-05, + "loss": 0.4967, + "step": 13667 + }, + { + "epoch": 0.37528830313014827, + "grad_norm": 0.4147886633872986, + "learning_rate": 1.8331872379381886e-05, + "loss": 0.5377, + "step": 13668 + }, + { + "epoch": 0.37531576057111476, + "grad_norm": 0.3812200129032135, + "learning_rate": 1.8331633538450923e-05, + "loss": 0.5529, + "step": 13669 + }, + { + "epoch": 0.37534321801208126, + "grad_norm": 0.4146744906902313, + "learning_rate": 1.83313946819788e-05, + "loss": 0.5655, + "step": 13670 + }, + { + "epoch": 0.37537067545304775, + "grad_norm": 0.34130722284317017, + "learning_rate": 1.8331155809965967e-05, + "loss": 0.5698, + "step": 13671 + }, + { + "epoch": 0.3753981328940143, + "grad_norm": 0.33568617701530457, + "learning_rate": 1.833091692241287e-05, + "loss": 0.4654, + "step": 13672 + }, + { + "epoch": 0.3754255903349808, + "grad_norm": 0.36410287022590637, + "learning_rate": 1.8330678019319955e-05, + "loss": 0.5194, + "step": 13673 + }, + { + "epoch": 0.3754530477759473, + "grad_norm": 0.39468976855278015, + "learning_rate": 1.8330439100687667e-05, + "loss": 0.4904, + "step": 13674 + }, + { + "epoch": 0.3754805052169138, + "grad_norm": 0.4069713354110718, + "learning_rate": 1.8330200166516448e-05, + "loss": 0.493, + "step": 13675 + }, + { + "epoch": 0.3755079626578803, + "grad_norm": 0.3312029242515564, + "learning_rate": 1.8329961216806752e-05, + "loss": 0.4935, + "step": 13676 + }, + { + "epoch": 0.3755354200988468, + "grad_norm": 0.3970401883125305, + "learning_rate": 1.8329722251559016e-05, + "loss": 0.5809, + "step": 13677 + }, + { + "epoch": 0.37556287753981327, + "grad_norm": 0.3626049757003784, + "learning_rate": 1.832948327077369e-05, + "loss": 0.5469, + "step": 13678 + }, + { + "epoch": 0.3755903349807798, + "grad_norm": 0.37325718998908997, + "learning_rate": 1.832924427445122e-05, + "loss": 0.4604, + "step": 13679 + }, + { + "epoch": 0.3756177924217463, + "grad_norm": 0.39087650179862976, + "learning_rate": 1.832900526259205e-05, + "loss": 0.5751, + "step": 13680 + }, + { + "epoch": 0.3756452498627128, + "grad_norm": 0.3618304431438446, + "learning_rate": 1.8328766235196628e-05, + "loss": 0.4677, + "step": 13681 + }, + { + "epoch": 0.3756727073036793, + "grad_norm": 0.4601861536502838, + "learning_rate": 1.8328527192265396e-05, + "loss": 0.5857, + "step": 13682 + }, + { + "epoch": 0.3757001647446458, + "grad_norm": 0.37404394149780273, + "learning_rate": 1.8328288133798808e-05, + "loss": 0.5684, + "step": 13683 + }, + { + "epoch": 0.3757276221856123, + "grad_norm": 0.3630339205265045, + "learning_rate": 1.83280490597973e-05, + "loss": 0.5135, + "step": 13684 + }, + { + "epoch": 0.3757550796265788, + "grad_norm": 0.5023232102394104, + "learning_rate": 1.8327809970261325e-05, + "loss": 0.5391, + "step": 13685 + }, + { + "epoch": 0.37578253706754533, + "grad_norm": 0.3846082389354706, + "learning_rate": 1.8327570865191323e-05, + "loss": 0.5462, + "step": 13686 + }, + { + "epoch": 0.3758099945085118, + "grad_norm": 0.3777088522911072, + "learning_rate": 1.832733174458775e-05, + "loss": 0.4621, + "step": 13687 + }, + { + "epoch": 0.3758374519494783, + "grad_norm": 0.34557801485061646, + "learning_rate": 1.8327092608451038e-05, + "loss": 0.4571, + "step": 13688 + }, + { + "epoch": 0.3758649093904448, + "grad_norm": 0.37498417496681213, + "learning_rate": 1.8326853456781647e-05, + "loss": 0.4586, + "step": 13689 + }, + { + "epoch": 0.3758923668314113, + "grad_norm": 0.3447803854942322, + "learning_rate": 1.8326614289580012e-05, + "loss": 0.4999, + "step": 13690 + }, + { + "epoch": 0.3759198242723778, + "grad_norm": 0.33163172006607056, + "learning_rate": 1.8326375106846585e-05, + "loss": 0.4342, + "step": 13691 + }, + { + "epoch": 0.3759472817133443, + "grad_norm": 0.3422918915748596, + "learning_rate": 1.832613590858181e-05, + "loss": 0.4347, + "step": 13692 + }, + { + "epoch": 0.37597473915431084, + "grad_norm": 0.375461220741272, + "learning_rate": 1.832589669478614e-05, + "loss": 0.5111, + "step": 13693 + }, + { + "epoch": 0.37600219659527734, + "grad_norm": 0.3888751268386841, + "learning_rate": 1.832565746546001e-05, + "loss": 0.5852, + "step": 13694 + }, + { + "epoch": 0.37602965403624383, + "grad_norm": 0.476036936044693, + "learning_rate": 1.832541822060387e-05, + "loss": 0.53, + "step": 13695 + }, + { + "epoch": 0.3760571114772103, + "grad_norm": 0.35866236686706543, + "learning_rate": 1.832517896021817e-05, + "loss": 0.4801, + "step": 13696 + }, + { + "epoch": 0.3760845689181768, + "grad_norm": 0.35138964653015137, + "learning_rate": 1.8324939684303354e-05, + "loss": 0.5017, + "step": 13697 + }, + { + "epoch": 0.3761120263591433, + "grad_norm": 0.4006747305393219, + "learning_rate": 1.8324700392859872e-05, + "loss": 0.5287, + "step": 13698 + }, + { + "epoch": 0.3761394838001098, + "grad_norm": 0.3433343172073364, + "learning_rate": 1.8324461085888162e-05, + "loss": 0.5652, + "step": 13699 + }, + { + "epoch": 0.37616694124107636, + "grad_norm": 0.47992366552352905, + "learning_rate": 1.8324221763388678e-05, + "loss": 0.6073, + "step": 13700 + }, + { + "epoch": 0.37619439868204285, + "grad_norm": 0.3678651750087738, + "learning_rate": 1.8323982425361864e-05, + "loss": 0.4974, + "step": 13701 + }, + { + "epoch": 0.37622185612300935, + "grad_norm": 0.35094812512397766, + "learning_rate": 1.8323743071808163e-05, + "loss": 0.4943, + "step": 13702 + }, + { + "epoch": 0.37624931356397584, + "grad_norm": 0.4463077485561371, + "learning_rate": 1.8323503702728027e-05, + "loss": 0.5303, + "step": 13703 + }, + { + "epoch": 0.37627677100494233, + "grad_norm": 0.44660282135009766, + "learning_rate": 1.8323264318121898e-05, + "loss": 0.5485, + "step": 13704 + }, + { + "epoch": 0.37630422844590883, + "grad_norm": 0.4668506681919098, + "learning_rate": 1.832302491799023e-05, + "loss": 0.4704, + "step": 13705 + }, + { + "epoch": 0.3763316858868753, + "grad_norm": 0.3822377920150757, + "learning_rate": 1.8322785502333458e-05, + "loss": 0.5873, + "step": 13706 + }, + { + "epoch": 0.37635914332784187, + "grad_norm": 0.3679632842540741, + "learning_rate": 1.8322546071152037e-05, + "loss": 0.5189, + "step": 13707 + }, + { + "epoch": 0.37638660076880837, + "grad_norm": 0.37616217136383057, + "learning_rate": 1.832230662444641e-05, + "loss": 0.5226, + "step": 13708 + }, + { + "epoch": 0.37641405820977486, + "grad_norm": 0.37874022126197815, + "learning_rate": 1.8322067162217026e-05, + "loss": 0.5314, + "step": 13709 + }, + { + "epoch": 0.37644151565074135, + "grad_norm": 0.3716884255409241, + "learning_rate": 1.832182768446433e-05, + "loss": 0.5288, + "step": 13710 + }, + { + "epoch": 0.37646897309170785, + "grad_norm": 0.43205004930496216, + "learning_rate": 1.832158819118877e-05, + "loss": 0.5817, + "step": 13711 + }, + { + "epoch": 0.37649643053267434, + "grad_norm": 0.38665080070495605, + "learning_rate": 1.8321348682390794e-05, + "loss": 0.5458, + "step": 13712 + }, + { + "epoch": 0.37652388797364084, + "grad_norm": 0.3597867488861084, + "learning_rate": 1.8321109158070843e-05, + "loss": 0.4999, + "step": 13713 + }, + { + "epoch": 0.3765513454146074, + "grad_norm": 0.4103391468524933, + "learning_rate": 1.832086961822937e-05, + "loss": 0.5114, + "step": 13714 + }, + { + "epoch": 0.3765788028555739, + "grad_norm": 0.3207416236400604, + "learning_rate": 1.832063006286682e-05, + "loss": 0.4862, + "step": 13715 + }, + { + "epoch": 0.3766062602965404, + "grad_norm": 0.35100480914115906, + "learning_rate": 1.832039049198364e-05, + "loss": 0.5566, + "step": 13716 + }, + { + "epoch": 0.37663371773750687, + "grad_norm": 0.34482520818710327, + "learning_rate": 1.8320150905580272e-05, + "loss": 0.524, + "step": 13717 + }, + { + "epoch": 0.37666117517847336, + "grad_norm": 0.4472881555557251, + "learning_rate": 1.831991130365717e-05, + "loss": 0.5996, + "step": 13718 + }, + { + "epoch": 0.37668863261943986, + "grad_norm": 0.37280508875846863, + "learning_rate": 1.8319671686214778e-05, + "loss": 0.5024, + "step": 13719 + }, + { + "epoch": 0.37671609006040635, + "grad_norm": 0.3883019685745239, + "learning_rate": 1.8319432053253545e-05, + "loss": 0.5398, + "step": 13720 + }, + { + "epoch": 0.3767435475013729, + "grad_norm": 0.4250096380710602, + "learning_rate": 1.8319192404773912e-05, + "loss": 0.538, + "step": 13721 + }, + { + "epoch": 0.3767710049423394, + "grad_norm": 0.4090476632118225, + "learning_rate": 1.831895274077633e-05, + "loss": 0.5007, + "step": 13722 + }, + { + "epoch": 0.3767984623833059, + "grad_norm": 0.4028523862361908, + "learning_rate": 1.8318713061261248e-05, + "loss": 0.5148, + "step": 13723 + }, + { + "epoch": 0.3768259198242724, + "grad_norm": 0.36102262139320374, + "learning_rate": 1.831847336622911e-05, + "loss": 0.4788, + "step": 13724 + }, + { + "epoch": 0.3768533772652389, + "grad_norm": 0.3572014570236206, + "learning_rate": 1.8318233655680365e-05, + "loss": 0.5058, + "step": 13725 + }, + { + "epoch": 0.37688083470620537, + "grad_norm": 0.3868630528450012, + "learning_rate": 1.8317993929615462e-05, + "loss": 0.4662, + "step": 13726 + }, + { + "epoch": 0.37690829214717186, + "grad_norm": 0.38985690474510193, + "learning_rate": 1.831775418803484e-05, + "loss": 0.4921, + "step": 13727 + }, + { + "epoch": 0.37693574958813836, + "grad_norm": 0.36705881357192993, + "learning_rate": 1.8317514430938956e-05, + "loss": 0.4957, + "step": 13728 + }, + { + "epoch": 0.3769632070291049, + "grad_norm": 0.4421990215778351, + "learning_rate": 1.831727465832825e-05, + "loss": 0.5236, + "step": 13729 + }, + { + "epoch": 0.3769906644700714, + "grad_norm": 0.32195189595222473, + "learning_rate": 1.8317034870203175e-05, + "loss": 0.4051, + "step": 13730 + }, + { + "epoch": 0.3770181219110379, + "grad_norm": 0.3715932369232178, + "learning_rate": 1.8316795066564172e-05, + "loss": 0.4398, + "step": 13731 + }, + { + "epoch": 0.3770455793520044, + "grad_norm": 0.38214507699012756, + "learning_rate": 1.8316555247411697e-05, + "loss": 0.5176, + "step": 13732 + }, + { + "epoch": 0.3770730367929709, + "grad_norm": 0.3481929302215576, + "learning_rate": 1.831631541274619e-05, + "loss": 0.4478, + "step": 13733 + }, + { + "epoch": 0.3771004942339374, + "grad_norm": 0.4311850368976593, + "learning_rate": 1.8316075562568096e-05, + "loss": 0.5566, + "step": 13734 + }, + { + "epoch": 0.37712795167490387, + "grad_norm": 0.42929187417030334, + "learning_rate": 1.831583569687787e-05, + "loss": 0.5601, + "step": 13735 + }, + { + "epoch": 0.3771554091158704, + "grad_norm": 0.419512540102005, + "learning_rate": 1.8315595815675958e-05, + "loss": 0.5238, + "step": 13736 + }, + { + "epoch": 0.3771828665568369, + "grad_norm": 0.37914663553237915, + "learning_rate": 1.8315355918962803e-05, + "loss": 0.5192, + "step": 13737 + }, + { + "epoch": 0.3772103239978034, + "grad_norm": 0.4331400692462921, + "learning_rate": 1.831511600673886e-05, + "loss": 0.5302, + "step": 13738 + }, + { + "epoch": 0.3772377814387699, + "grad_norm": 0.35491684079170227, + "learning_rate": 1.831487607900457e-05, + "loss": 0.4751, + "step": 13739 + }, + { + "epoch": 0.3772652388797364, + "grad_norm": 0.3807246685028076, + "learning_rate": 1.831463613576038e-05, + "loss": 0.5499, + "step": 13740 + }, + { + "epoch": 0.3772926963207029, + "grad_norm": 0.4057830274105072, + "learning_rate": 1.831439617700674e-05, + "loss": 0.5762, + "step": 13741 + }, + { + "epoch": 0.3773201537616694, + "grad_norm": 0.3789764642715454, + "learning_rate": 1.8314156202744096e-05, + "loss": 0.4833, + "step": 13742 + }, + { + "epoch": 0.37734761120263594, + "grad_norm": 0.4038592576980591, + "learning_rate": 1.83139162129729e-05, + "loss": 0.5502, + "step": 13743 + }, + { + "epoch": 0.37737506864360243, + "grad_norm": 0.3741764724254608, + "learning_rate": 1.8313676207693595e-05, + "loss": 0.4961, + "step": 13744 + }, + { + "epoch": 0.3774025260845689, + "grad_norm": 0.3956778049468994, + "learning_rate": 1.8313436186906634e-05, + "loss": 0.5162, + "step": 13745 + }, + { + "epoch": 0.3774299835255354, + "grad_norm": 0.3458876311779022, + "learning_rate": 1.8313196150612458e-05, + "loss": 0.4656, + "step": 13746 + }, + { + "epoch": 0.3774574409665019, + "grad_norm": 0.36063453555107117, + "learning_rate": 1.8312956098811518e-05, + "loss": 0.5755, + "step": 13747 + }, + { + "epoch": 0.3774848984074684, + "grad_norm": 0.375021368265152, + "learning_rate": 1.8312716031504264e-05, + "loss": 0.5577, + "step": 13748 + }, + { + "epoch": 0.3775123558484349, + "grad_norm": 0.3624420762062073, + "learning_rate": 1.831247594869114e-05, + "loss": 0.5517, + "step": 13749 + }, + { + "epoch": 0.37753981328940145, + "grad_norm": 0.3998534083366394, + "learning_rate": 1.831223585037259e-05, + "loss": 0.5863, + "step": 13750 + }, + { + "epoch": 0.37756727073036794, + "grad_norm": 0.4463280141353607, + "learning_rate": 1.8311995736549076e-05, + "loss": 0.5406, + "step": 13751 + }, + { + "epoch": 0.37759472817133444, + "grad_norm": 0.4440414011478424, + "learning_rate": 1.8311755607221033e-05, + "loss": 0.5913, + "step": 13752 + }, + { + "epoch": 0.37762218561230093, + "grad_norm": 0.3355647623538971, + "learning_rate": 1.8311515462388913e-05, + "loss": 0.46, + "step": 13753 + }, + { + "epoch": 0.3776496430532674, + "grad_norm": 0.3777865767478943, + "learning_rate": 1.8311275302053166e-05, + "loss": 0.4793, + "step": 13754 + }, + { + "epoch": 0.3776771004942339, + "grad_norm": 0.36069706082344055, + "learning_rate": 1.8311035126214235e-05, + "loss": 0.5107, + "step": 13755 + }, + { + "epoch": 0.3777045579352004, + "grad_norm": 0.4785435199737549, + "learning_rate": 1.8310794934872572e-05, + "loss": 0.4713, + "step": 13756 + }, + { + "epoch": 0.37773201537616696, + "grad_norm": 0.4115641415119171, + "learning_rate": 1.8310554728028624e-05, + "loss": 0.5505, + "step": 13757 + }, + { + "epoch": 0.37775947281713346, + "grad_norm": 0.5262405276298523, + "learning_rate": 1.8310314505682842e-05, + "loss": 0.5945, + "step": 13758 + }, + { + "epoch": 0.37778693025809995, + "grad_norm": 0.37769246101379395, + "learning_rate": 1.831007426783567e-05, + "loss": 0.5276, + "step": 13759 + }, + { + "epoch": 0.37781438769906645, + "grad_norm": 0.37171003222465515, + "learning_rate": 1.830983401448755e-05, + "loss": 0.4441, + "step": 13760 + }, + { + "epoch": 0.37784184514003294, + "grad_norm": 0.4143519699573517, + "learning_rate": 1.8309593745638945e-05, + "loss": 0.6698, + "step": 13761 + }, + { + "epoch": 0.37786930258099943, + "grad_norm": 0.4104626774787903, + "learning_rate": 1.8309353461290293e-05, + "loss": 0.5241, + "step": 13762 + }, + { + "epoch": 0.3778967600219659, + "grad_norm": 0.41879019141197205, + "learning_rate": 1.8309113161442043e-05, + "loss": 0.5456, + "step": 13763 + }, + { + "epoch": 0.3779242174629325, + "grad_norm": 0.591071367263794, + "learning_rate": 1.830887284609465e-05, + "loss": 0.4948, + "step": 13764 + }, + { + "epoch": 0.37795167490389897, + "grad_norm": 0.6019638180732727, + "learning_rate": 1.8308632515248553e-05, + "loss": 0.5714, + "step": 13765 + }, + { + "epoch": 0.37797913234486546, + "grad_norm": 0.5351055860519409, + "learning_rate": 1.8308392168904205e-05, + "loss": 0.6151, + "step": 13766 + }, + { + "epoch": 0.37800658978583196, + "grad_norm": 0.37898558378219604, + "learning_rate": 1.8308151807062057e-05, + "loss": 0.4675, + "step": 13767 + }, + { + "epoch": 0.37803404722679845, + "grad_norm": 0.33782288432121277, + "learning_rate": 1.830791142972255e-05, + "loss": 0.5295, + "step": 13768 + }, + { + "epoch": 0.37806150466776495, + "grad_norm": 0.34968432784080505, + "learning_rate": 1.8307671036886142e-05, + "loss": 0.5673, + "step": 13769 + }, + { + "epoch": 0.37808896210873144, + "grad_norm": 0.3685080409049988, + "learning_rate": 1.8307430628553273e-05, + "loss": 0.4932, + "step": 13770 + }, + { + "epoch": 0.378116419549698, + "grad_norm": 0.3498368561267853, + "learning_rate": 1.8307190204724393e-05, + "loss": 0.5004, + "step": 13771 + }, + { + "epoch": 0.3781438769906645, + "grad_norm": 0.35040590167045593, + "learning_rate": 1.830694976539995e-05, + "loss": 0.5532, + "step": 13772 + }, + { + "epoch": 0.378171334431631, + "grad_norm": 0.46094202995300293, + "learning_rate": 1.83067093105804e-05, + "loss": 0.5126, + "step": 13773 + }, + { + "epoch": 0.3781987918725975, + "grad_norm": 0.35522404313087463, + "learning_rate": 1.830646884026618e-05, + "loss": 0.4716, + "step": 13774 + }, + { + "epoch": 0.37822624931356397, + "grad_norm": 0.3499138653278351, + "learning_rate": 1.830622835445775e-05, + "loss": 0.5419, + "step": 13775 + }, + { + "epoch": 0.37825370675453046, + "grad_norm": 0.3815102279186249, + "learning_rate": 1.830598785315555e-05, + "loss": 0.5275, + "step": 13776 + }, + { + "epoch": 0.37828116419549696, + "grad_norm": 0.34564128518104553, + "learning_rate": 1.8305747336360034e-05, + "loss": 0.4466, + "step": 13777 + }, + { + "epoch": 0.3783086216364635, + "grad_norm": 0.3671666085720062, + "learning_rate": 1.8305506804071645e-05, + "loss": 0.498, + "step": 13778 + }, + { + "epoch": 0.37833607907743, + "grad_norm": 0.35796022415161133, + "learning_rate": 1.8305266256290838e-05, + "loss": 0.5484, + "step": 13779 + }, + { + "epoch": 0.3783635365183965, + "grad_norm": 0.35656628012657166, + "learning_rate": 1.8305025693018058e-05, + "loss": 0.54, + "step": 13780 + }, + { + "epoch": 0.378390993959363, + "grad_norm": 0.4042435884475708, + "learning_rate": 1.8304785114253757e-05, + "loss": 0.5974, + "step": 13781 + }, + { + "epoch": 0.3784184514003295, + "grad_norm": 0.37099510431289673, + "learning_rate": 1.8304544519998376e-05, + "loss": 0.5037, + "step": 13782 + }, + { + "epoch": 0.378445908841296, + "grad_norm": 0.44777950644493103, + "learning_rate": 1.830430391025237e-05, + "loss": 0.6092, + "step": 13783 + }, + { + "epoch": 0.37847336628226247, + "grad_norm": 0.3814082145690918, + "learning_rate": 1.830406328501619e-05, + "loss": 0.5229, + "step": 13784 + }, + { + "epoch": 0.378500823723229, + "grad_norm": 0.4603172540664673, + "learning_rate": 1.8303822644290278e-05, + "loss": 0.543, + "step": 13785 + }, + { + "epoch": 0.3785282811641955, + "grad_norm": 0.3858430087566376, + "learning_rate": 1.830358198807509e-05, + "loss": 0.5969, + "step": 13786 + }, + { + "epoch": 0.378555738605162, + "grad_norm": 0.38362088799476624, + "learning_rate": 1.8303341316371068e-05, + "loss": 0.5464, + "step": 13787 + }, + { + "epoch": 0.3785831960461285, + "grad_norm": 0.4237290918827057, + "learning_rate": 1.8303100629178666e-05, + "loss": 0.4442, + "step": 13788 + }, + { + "epoch": 0.378610653487095, + "grad_norm": 0.36064085364341736, + "learning_rate": 1.830285992649833e-05, + "loss": 0.524, + "step": 13789 + }, + { + "epoch": 0.3786381109280615, + "grad_norm": 0.4329741299152374, + "learning_rate": 1.8302619208330513e-05, + "loss": 0.6093, + "step": 13790 + }, + { + "epoch": 0.378665568369028, + "grad_norm": 0.37747669219970703, + "learning_rate": 1.830237847467566e-05, + "loss": 0.4857, + "step": 13791 + }, + { + "epoch": 0.37869302580999453, + "grad_norm": 0.38027170300483704, + "learning_rate": 1.8302137725534223e-05, + "loss": 0.5112, + "step": 13792 + }, + { + "epoch": 0.378720483250961, + "grad_norm": 0.4061524569988251, + "learning_rate": 1.8301896960906648e-05, + "loss": 0.5197, + "step": 13793 + }, + { + "epoch": 0.3787479406919275, + "grad_norm": 0.38775455951690674, + "learning_rate": 1.830165618079338e-05, + "loss": 0.4315, + "step": 13794 + }, + { + "epoch": 0.378775398132894, + "grad_norm": 0.48909252882003784, + "learning_rate": 1.8301415385194882e-05, + "loss": 0.5184, + "step": 13795 + }, + { + "epoch": 0.3788028555738605, + "grad_norm": 1.6780592203140259, + "learning_rate": 1.8301174574111592e-05, + "loss": 0.5745, + "step": 13796 + }, + { + "epoch": 0.378830313014827, + "grad_norm": 0.3711349368095398, + "learning_rate": 1.8300933747543957e-05, + "loss": 0.4913, + "step": 13797 + }, + { + "epoch": 0.3788577704557935, + "grad_norm": 0.3502753674983978, + "learning_rate": 1.8300692905492438e-05, + "loss": 0.4641, + "step": 13798 + }, + { + "epoch": 0.37888522789676005, + "grad_norm": 0.44958019256591797, + "learning_rate": 1.8300452047957478e-05, + "loss": 0.5263, + "step": 13799 + }, + { + "epoch": 0.37891268533772654, + "grad_norm": 0.36767876148223877, + "learning_rate": 1.830021117493952e-05, + "loss": 0.4953, + "step": 13800 + }, + { + "epoch": 0.37894014277869303, + "grad_norm": 0.37956705689430237, + "learning_rate": 1.8299970286439023e-05, + "loss": 0.692, + "step": 13801 + }, + { + "epoch": 0.37896760021965953, + "grad_norm": 0.39017900824546814, + "learning_rate": 1.829972938245643e-05, + "loss": 0.5399, + "step": 13802 + }, + { + "epoch": 0.378995057660626, + "grad_norm": 0.3339664936065674, + "learning_rate": 1.8299488462992197e-05, + "loss": 0.4303, + "step": 13803 + }, + { + "epoch": 0.3790225151015925, + "grad_norm": 0.3564557731151581, + "learning_rate": 1.8299247528046764e-05, + "loss": 0.5023, + "step": 13804 + }, + { + "epoch": 0.379049972542559, + "grad_norm": 0.32645806670188904, + "learning_rate": 1.8299006577620585e-05, + "loss": 0.555, + "step": 13805 + }, + { + "epoch": 0.37907742998352556, + "grad_norm": 0.4376627206802368, + "learning_rate": 1.8298765611714116e-05, + "loss": 0.4829, + "step": 13806 + }, + { + "epoch": 0.37910488742449205, + "grad_norm": 0.4172261357307434, + "learning_rate": 1.8298524630327798e-05, + "loss": 0.569, + "step": 13807 + }, + { + "epoch": 0.37913234486545855, + "grad_norm": 0.3637852668762207, + "learning_rate": 1.829828363346208e-05, + "loss": 0.5444, + "step": 13808 + }, + { + "epoch": 0.37915980230642504, + "grad_norm": 0.3990805447101593, + "learning_rate": 1.8298042621117417e-05, + "loss": 0.5135, + "step": 13809 + }, + { + "epoch": 0.37918725974739154, + "grad_norm": 0.3700745403766632, + "learning_rate": 1.8297801593294256e-05, + "loss": 0.5305, + "step": 13810 + }, + { + "epoch": 0.37921471718835803, + "grad_norm": 0.5437168478965759, + "learning_rate": 1.8297560549993044e-05, + "loss": 0.5261, + "step": 13811 + }, + { + "epoch": 0.3792421746293245, + "grad_norm": 0.3400055766105652, + "learning_rate": 1.8297319491214237e-05, + "loss": 0.5335, + "step": 13812 + }, + { + "epoch": 0.3792696320702911, + "grad_norm": 0.3455132246017456, + "learning_rate": 1.829707841695828e-05, + "loss": 0.5365, + "step": 13813 + }, + { + "epoch": 0.37929708951125757, + "grad_norm": 0.32821378111839294, + "learning_rate": 1.8296837327225624e-05, + "loss": 0.4527, + "step": 13814 + }, + { + "epoch": 0.37932454695222406, + "grad_norm": 0.3797248303890228, + "learning_rate": 1.8296596222016717e-05, + "loss": 0.521, + "step": 13815 + }, + { + "epoch": 0.37935200439319056, + "grad_norm": 0.35051530599594116, + "learning_rate": 1.8296355101332012e-05, + "loss": 0.5434, + "step": 13816 + }, + { + "epoch": 0.37937946183415705, + "grad_norm": 0.3556916415691376, + "learning_rate": 1.8296113965171954e-05, + "loss": 0.4998, + "step": 13817 + }, + { + "epoch": 0.37940691927512354, + "grad_norm": 0.47228267788887024, + "learning_rate": 1.8295872813536998e-05, + "loss": 0.5363, + "step": 13818 + }, + { + "epoch": 0.37943437671609004, + "grad_norm": 0.3994893431663513, + "learning_rate": 1.829563164642759e-05, + "loss": 0.496, + "step": 13819 + }, + { + "epoch": 0.3794618341570566, + "grad_norm": 0.3593752682209015, + "learning_rate": 1.8295390463844184e-05, + "loss": 0.5136, + "step": 13820 + }, + { + "epoch": 0.3794892915980231, + "grad_norm": 0.35013872385025024, + "learning_rate": 1.8295149265787224e-05, + "loss": 0.5363, + "step": 13821 + }, + { + "epoch": 0.3795167490389896, + "grad_norm": 0.4385868310928345, + "learning_rate": 1.8294908052257164e-05, + "loss": 0.5925, + "step": 13822 + }, + { + "epoch": 0.37954420647995607, + "grad_norm": 0.3450391888618469, + "learning_rate": 1.8294666823254452e-05, + "loss": 0.5212, + "step": 13823 + }, + { + "epoch": 0.37957166392092256, + "grad_norm": 0.4294957220554352, + "learning_rate": 1.829442557877954e-05, + "loss": 0.5014, + "step": 13824 + }, + { + "epoch": 0.37959912136188906, + "grad_norm": 0.317129522562027, + "learning_rate": 1.829418431883288e-05, + "loss": 0.3968, + "step": 13825 + }, + { + "epoch": 0.37962657880285555, + "grad_norm": 0.4086359441280365, + "learning_rate": 1.8293943043414913e-05, + "loss": 0.5317, + "step": 13826 + }, + { + "epoch": 0.3796540362438221, + "grad_norm": 0.325891375541687, + "learning_rate": 1.82937017525261e-05, + "loss": 0.4868, + "step": 13827 + }, + { + "epoch": 0.3796814936847886, + "grad_norm": 0.3927406966686249, + "learning_rate": 1.8293460446166884e-05, + "loss": 0.5368, + "step": 13828 + }, + { + "epoch": 0.3797089511257551, + "grad_norm": 0.533870279788971, + "learning_rate": 1.829321912433772e-05, + "loss": 0.4927, + "step": 13829 + }, + { + "epoch": 0.3797364085667216, + "grad_norm": 0.3444902002811432, + "learning_rate": 1.8292977787039053e-05, + "loss": 0.508, + "step": 13830 + }, + { + "epoch": 0.3797638660076881, + "grad_norm": 0.3405660092830658, + "learning_rate": 1.8292736434271336e-05, + "loss": 0.4414, + "step": 13831 + }, + { + "epoch": 0.37979132344865457, + "grad_norm": 0.43637871742248535, + "learning_rate": 1.829249506603502e-05, + "loss": 0.5539, + "step": 13832 + }, + { + "epoch": 0.37981878088962107, + "grad_norm": 1.0683155059814453, + "learning_rate": 1.829225368233055e-05, + "loss": 0.3956, + "step": 13833 + }, + { + "epoch": 0.3798462383305876, + "grad_norm": 0.3641483187675476, + "learning_rate": 1.8292012283158384e-05, + "loss": 0.4792, + "step": 13834 + }, + { + "epoch": 0.3798736957715541, + "grad_norm": 0.3743440508842468, + "learning_rate": 1.829177086851897e-05, + "loss": 0.5113, + "step": 13835 + }, + { + "epoch": 0.3799011532125206, + "grad_norm": 0.37791576981544495, + "learning_rate": 1.8291529438412753e-05, + "loss": 0.5443, + "step": 13836 + }, + { + "epoch": 0.3799286106534871, + "grad_norm": 0.3418792486190796, + "learning_rate": 1.829128799284019e-05, + "loss": 0.5972, + "step": 13837 + }, + { + "epoch": 0.3799560680944536, + "grad_norm": 0.47639182209968567, + "learning_rate": 1.829104653180173e-05, + "loss": 0.4946, + "step": 13838 + }, + { + "epoch": 0.3799835255354201, + "grad_norm": 0.3716491460800171, + "learning_rate": 1.8290805055297816e-05, + "loss": 0.4612, + "step": 13839 + }, + { + "epoch": 0.3800109829763866, + "grad_norm": 0.4136045277118683, + "learning_rate": 1.829056356332891e-05, + "loss": 0.6134, + "step": 13840 + }, + { + "epoch": 0.38003844041735313, + "grad_norm": 0.36192354559898376, + "learning_rate": 1.8290322055895454e-05, + "loss": 0.5571, + "step": 13841 + }, + { + "epoch": 0.3800658978583196, + "grad_norm": 0.4953792691230774, + "learning_rate": 1.82900805329979e-05, + "loss": 0.6025, + "step": 13842 + }, + { + "epoch": 0.3800933552992861, + "grad_norm": 0.3811550736427307, + "learning_rate": 1.8289838994636705e-05, + "loss": 0.5277, + "step": 13843 + }, + { + "epoch": 0.3801208127402526, + "grad_norm": 0.3509882688522339, + "learning_rate": 1.828959744081231e-05, + "loss": 0.501, + "step": 13844 + }, + { + "epoch": 0.3801482701812191, + "grad_norm": 0.36605462431907654, + "learning_rate": 1.8289355871525174e-05, + "loss": 0.5052, + "step": 13845 + }, + { + "epoch": 0.3801757276221856, + "grad_norm": 0.48164623975753784, + "learning_rate": 1.828911428677574e-05, + "loss": 0.5333, + "step": 13846 + }, + { + "epoch": 0.3802031850631521, + "grad_norm": 0.41701173782348633, + "learning_rate": 1.8288872686564463e-05, + "loss": 0.5299, + "step": 13847 + }, + { + "epoch": 0.38023064250411864, + "grad_norm": 0.4387149512767792, + "learning_rate": 1.8288631070891797e-05, + "loss": 0.4693, + "step": 13848 + }, + { + "epoch": 0.38025809994508514, + "grad_norm": 0.3895706832408905, + "learning_rate": 1.8288389439758184e-05, + "loss": 0.4672, + "step": 13849 + }, + { + "epoch": 0.38028555738605163, + "grad_norm": 0.3437942564487457, + "learning_rate": 1.8288147793164078e-05, + "loss": 0.4924, + "step": 13850 + }, + { + "epoch": 0.3803130148270181, + "grad_norm": 0.3893894851207733, + "learning_rate": 1.8287906131109934e-05, + "loss": 0.6084, + "step": 13851 + }, + { + "epoch": 0.3803404722679846, + "grad_norm": 0.3889264166355133, + "learning_rate": 1.8287664453596202e-05, + "loss": 0.5275, + "step": 13852 + }, + { + "epoch": 0.3803679297089511, + "grad_norm": 0.34500807523727417, + "learning_rate": 1.828742276062333e-05, + "loss": 0.4904, + "step": 13853 + }, + { + "epoch": 0.3803953871499176, + "grad_norm": 0.45810258388519287, + "learning_rate": 1.8287181052191766e-05, + "loss": 0.6142, + "step": 13854 + }, + { + "epoch": 0.38042284459088416, + "grad_norm": 0.3453022837638855, + "learning_rate": 1.8286939328301967e-05, + "loss": 0.4724, + "step": 13855 + }, + { + "epoch": 0.38045030203185065, + "grad_norm": 0.3395344614982605, + "learning_rate": 1.828669758895438e-05, + "loss": 0.4971, + "step": 13856 + }, + { + "epoch": 0.38047775947281715, + "grad_norm": 0.4008251130580902, + "learning_rate": 1.8286455834149456e-05, + "loss": 0.5027, + "step": 13857 + }, + { + "epoch": 0.38050521691378364, + "grad_norm": 0.39034906029701233, + "learning_rate": 1.828621406388765e-05, + "loss": 0.6209, + "step": 13858 + }, + { + "epoch": 0.38053267435475013, + "grad_norm": 0.41206830739974976, + "learning_rate": 1.828597227816941e-05, + "loss": 0.5874, + "step": 13859 + }, + { + "epoch": 0.3805601317957166, + "grad_norm": 0.4211733341217041, + "learning_rate": 1.828573047699518e-05, + "loss": 0.5405, + "step": 13860 + }, + { + "epoch": 0.3805875892366831, + "grad_norm": 0.4897020757198334, + "learning_rate": 1.828548866036543e-05, + "loss": 0.5716, + "step": 13861 + }, + { + "epoch": 0.3806150466776496, + "grad_norm": 0.37768909335136414, + "learning_rate": 1.828524682828059e-05, + "loss": 0.5444, + "step": 13862 + }, + { + "epoch": 0.38064250411861617, + "grad_norm": 0.3970952033996582, + "learning_rate": 1.8285004980741126e-05, + "loss": 0.5229, + "step": 13863 + }, + { + "epoch": 0.38066996155958266, + "grad_norm": 0.3401690721511841, + "learning_rate": 1.828476311774748e-05, + "loss": 0.547, + "step": 13864 + }, + { + "epoch": 0.38069741900054915, + "grad_norm": 0.38028448820114136, + "learning_rate": 1.8284521239300108e-05, + "loss": 0.5722, + "step": 13865 + }, + { + "epoch": 0.38072487644151565, + "grad_norm": 0.4574624300003052, + "learning_rate": 1.828427934539946e-05, + "loss": 0.4947, + "step": 13866 + }, + { + "epoch": 0.38075233388248214, + "grad_norm": 0.4043523371219635, + "learning_rate": 1.8284037436045986e-05, + "loss": 0.5845, + "step": 13867 + }, + { + "epoch": 0.38077979132344864, + "grad_norm": 0.3510778248310089, + "learning_rate": 1.828379551124014e-05, + "loss": 0.4833, + "step": 13868 + }, + { + "epoch": 0.38080724876441513, + "grad_norm": 0.384405255317688, + "learning_rate": 1.8283553570982372e-05, + "loss": 0.5664, + "step": 13869 + }, + { + "epoch": 0.3808347062053817, + "grad_norm": 0.48133954405784607, + "learning_rate": 1.828331161527313e-05, + "loss": 0.6205, + "step": 13870 + }, + { + "epoch": 0.3808621636463482, + "grad_norm": 0.387666255235672, + "learning_rate": 1.828306964411287e-05, + "loss": 0.4264, + "step": 13871 + }, + { + "epoch": 0.38088962108731467, + "grad_norm": 0.37103021144866943, + "learning_rate": 1.8282827657502042e-05, + "loss": 0.4947, + "step": 13872 + }, + { + "epoch": 0.38091707852828116, + "grad_norm": 0.36014223098754883, + "learning_rate": 1.8282585655441094e-05, + "loss": 0.5068, + "step": 13873 + }, + { + "epoch": 0.38094453596924766, + "grad_norm": 0.40497809648513794, + "learning_rate": 1.828234363793048e-05, + "loss": 0.6456, + "step": 13874 + }, + { + "epoch": 0.38097199341021415, + "grad_norm": 0.4754997789859772, + "learning_rate": 1.8282101604970655e-05, + "loss": 0.5316, + "step": 13875 + }, + { + "epoch": 0.38099945085118064, + "grad_norm": 0.3719891309738159, + "learning_rate": 1.8281859556562063e-05, + "loss": 0.4907, + "step": 13876 + }, + { + "epoch": 0.3810269082921472, + "grad_norm": 0.35601556301116943, + "learning_rate": 1.828161749270516e-05, + "loss": 0.4763, + "step": 13877 + }, + { + "epoch": 0.3810543657331137, + "grad_norm": 0.33887535333633423, + "learning_rate": 1.8281375413400406e-05, + "loss": 0.5315, + "step": 13878 + }, + { + "epoch": 0.3810818231740802, + "grad_norm": 0.40073615312576294, + "learning_rate": 1.8281133318648235e-05, + "loss": 0.5426, + "step": 13879 + }, + { + "epoch": 0.3811092806150467, + "grad_norm": 0.35356661677360535, + "learning_rate": 1.828089120844911e-05, + "loss": 0.5358, + "step": 13880 + }, + { + "epoch": 0.38113673805601317, + "grad_norm": 0.3895391523838043, + "learning_rate": 1.8280649082803478e-05, + "loss": 0.5153, + "step": 13881 + }, + { + "epoch": 0.38116419549697966, + "grad_norm": 0.36865657567977905, + "learning_rate": 1.8280406941711795e-05, + "loss": 0.5019, + "step": 13882 + }, + { + "epoch": 0.38119165293794616, + "grad_norm": 0.5565499067306519, + "learning_rate": 1.828016478517451e-05, + "loss": 0.4671, + "step": 13883 + }, + { + "epoch": 0.3812191103789127, + "grad_norm": 0.3717564344406128, + "learning_rate": 1.8279922613192075e-05, + "loss": 0.5273, + "step": 13884 + }, + { + "epoch": 0.3812465678198792, + "grad_norm": 0.3725970983505249, + "learning_rate": 1.827968042576494e-05, + "loss": 0.5265, + "step": 13885 + }, + { + "epoch": 0.3812740252608457, + "grad_norm": 0.36449384689331055, + "learning_rate": 1.8279438222893556e-05, + "loss": 0.5094, + "step": 13886 + }, + { + "epoch": 0.3813014827018122, + "grad_norm": 0.39664626121520996, + "learning_rate": 1.8279196004578384e-05, + "loss": 0.5491, + "step": 13887 + }, + { + "epoch": 0.3813289401427787, + "grad_norm": 0.3828599154949188, + "learning_rate": 1.8278953770819864e-05, + "loss": 0.5368, + "step": 13888 + }, + { + "epoch": 0.3813563975837452, + "grad_norm": 0.3700379729270935, + "learning_rate": 1.8278711521618456e-05, + "loss": 0.514, + "step": 13889 + }, + { + "epoch": 0.38138385502471167, + "grad_norm": 0.3430037498474121, + "learning_rate": 1.8278469256974607e-05, + "loss": 0.5059, + "step": 13890 + }, + { + "epoch": 0.3814113124656782, + "grad_norm": 0.35493800044059753, + "learning_rate": 1.8278226976888768e-05, + "loss": 0.4991, + "step": 13891 + }, + { + "epoch": 0.3814387699066447, + "grad_norm": 0.3717474639415741, + "learning_rate": 1.8277984681361397e-05, + "loss": 0.5731, + "step": 13892 + }, + { + "epoch": 0.3814662273476112, + "grad_norm": 0.35460329055786133, + "learning_rate": 1.8277742370392943e-05, + "loss": 0.5226, + "step": 13893 + }, + { + "epoch": 0.3814936847885777, + "grad_norm": 0.3527994453907013, + "learning_rate": 1.827750004398386e-05, + "loss": 0.4094, + "step": 13894 + }, + { + "epoch": 0.3815211422295442, + "grad_norm": 0.38936564326286316, + "learning_rate": 1.827725770213459e-05, + "loss": 0.4847, + "step": 13895 + }, + { + "epoch": 0.3815485996705107, + "grad_norm": 0.33073586225509644, + "learning_rate": 1.8277015344845597e-05, + "loss": 0.5038, + "step": 13896 + }, + { + "epoch": 0.3815760571114772, + "grad_norm": 0.36350539326667786, + "learning_rate": 1.827677297211733e-05, + "loss": 0.4847, + "step": 13897 + }, + { + "epoch": 0.38160351455244373, + "grad_norm": 0.3488752245903015, + "learning_rate": 1.827653058395024e-05, + "loss": 0.477, + "step": 13898 + }, + { + "epoch": 0.38163097199341023, + "grad_norm": 0.35129135847091675, + "learning_rate": 1.8276288180344773e-05, + "loss": 0.4928, + "step": 13899 + }, + { + "epoch": 0.3816584294343767, + "grad_norm": 0.3267246186733246, + "learning_rate": 1.8276045761301394e-05, + "loss": 0.4782, + "step": 13900 + }, + { + "epoch": 0.3816858868753432, + "grad_norm": 0.43150362372398376, + "learning_rate": 1.8275803326820545e-05, + "loss": 0.4833, + "step": 13901 + }, + { + "epoch": 0.3817133443163097, + "grad_norm": 0.37292107939720154, + "learning_rate": 1.8275560876902682e-05, + "loss": 0.5014, + "step": 13902 + }, + { + "epoch": 0.3817408017572762, + "grad_norm": 0.3498885929584503, + "learning_rate": 1.8275318411548254e-05, + "loss": 0.4885, + "step": 13903 + }, + { + "epoch": 0.3817682591982427, + "grad_norm": 0.40131014585494995, + "learning_rate": 1.827507593075772e-05, + "loss": 0.4835, + "step": 13904 + }, + { + "epoch": 0.38179571663920925, + "grad_norm": 0.37820082902908325, + "learning_rate": 1.8274833434531527e-05, + "loss": 0.5764, + "step": 13905 + }, + { + "epoch": 0.38182317408017574, + "grad_norm": 0.3251263201236725, + "learning_rate": 1.827459092287013e-05, + "loss": 0.5124, + "step": 13906 + }, + { + "epoch": 0.38185063152114224, + "grad_norm": 0.39515992999076843, + "learning_rate": 1.8274348395773976e-05, + "loss": 0.5622, + "step": 13907 + }, + { + "epoch": 0.38187808896210873, + "grad_norm": 0.3760525584220886, + "learning_rate": 1.8274105853243524e-05, + "loss": 0.4205, + "step": 13908 + }, + { + "epoch": 0.3819055464030752, + "grad_norm": 0.36672553420066833, + "learning_rate": 1.8273863295279223e-05, + "loss": 0.5298, + "step": 13909 + }, + { + "epoch": 0.3819330038440417, + "grad_norm": 0.3371449112892151, + "learning_rate": 1.8273620721881527e-05, + "loss": 0.4396, + "step": 13910 + }, + { + "epoch": 0.3819604612850082, + "grad_norm": 0.4214079976081848, + "learning_rate": 1.827337813305089e-05, + "loss": 0.5577, + "step": 13911 + }, + { + "epoch": 0.38198791872597476, + "grad_norm": 0.3772503435611725, + "learning_rate": 1.827313552878776e-05, + "loss": 0.5273, + "step": 13912 + }, + { + "epoch": 0.38201537616694126, + "grad_norm": 0.4505605697631836, + "learning_rate": 1.827289290909259e-05, + "loss": 0.5291, + "step": 13913 + }, + { + "epoch": 0.38204283360790775, + "grad_norm": 0.41761326789855957, + "learning_rate": 1.8272650273965836e-05, + "loss": 0.5467, + "step": 13914 + }, + { + "epoch": 0.38207029104887424, + "grad_norm": 0.3570132851600647, + "learning_rate": 1.827240762340795e-05, + "loss": 0.6023, + "step": 13915 + }, + { + "epoch": 0.38209774848984074, + "grad_norm": 0.4497723877429962, + "learning_rate": 1.827216495741938e-05, + "loss": 0.5705, + "step": 13916 + }, + { + "epoch": 0.38212520593080723, + "grad_norm": 0.36400407552719116, + "learning_rate": 1.8271922276000587e-05, + "loss": 0.4924, + "step": 13917 + }, + { + "epoch": 0.3821526633717737, + "grad_norm": 0.3898507356643677, + "learning_rate": 1.8271679579152014e-05, + "loss": 0.498, + "step": 13918 + }, + { + "epoch": 0.3821801208127403, + "grad_norm": 0.35017484426498413, + "learning_rate": 1.8271436866874123e-05, + "loss": 0.4883, + "step": 13919 + }, + { + "epoch": 0.38220757825370677, + "grad_norm": 0.37291964888572693, + "learning_rate": 1.827119413916736e-05, + "loss": 0.5296, + "step": 13920 + }, + { + "epoch": 0.38223503569467326, + "grad_norm": 0.3607848882675171, + "learning_rate": 1.827095139603218e-05, + "loss": 0.4777, + "step": 13921 + }, + { + "epoch": 0.38226249313563976, + "grad_norm": 0.3584233522415161, + "learning_rate": 1.8270708637469034e-05, + "loss": 0.4104, + "step": 13922 + }, + { + "epoch": 0.38228995057660625, + "grad_norm": 0.3595774173736572, + "learning_rate": 1.827046586347838e-05, + "loss": 0.4619, + "step": 13923 + }, + { + "epoch": 0.38231740801757275, + "grad_norm": 0.3315858840942383, + "learning_rate": 1.8270223074060668e-05, + "loss": 0.3786, + "step": 13924 + }, + { + "epoch": 0.38234486545853924, + "grad_norm": 0.30800914764404297, + "learning_rate": 1.826998026921635e-05, + "loss": 0.4011, + "step": 13925 + }, + { + "epoch": 0.3823723228995058, + "grad_norm": 0.38222214579582214, + "learning_rate": 1.8269737448945877e-05, + "loss": 0.5894, + "step": 13926 + }, + { + "epoch": 0.3823997803404723, + "grad_norm": 0.3308667838573456, + "learning_rate": 1.8269494613249704e-05, + "loss": 0.4619, + "step": 13927 + }, + { + "epoch": 0.3824272377814388, + "grad_norm": 0.3982369303703308, + "learning_rate": 1.8269251762128286e-05, + "loss": 0.4722, + "step": 13928 + }, + { + "epoch": 0.38245469522240527, + "grad_norm": 0.3653436303138733, + "learning_rate": 1.8269008895582073e-05, + "loss": 0.482, + "step": 13929 + }, + { + "epoch": 0.38248215266337177, + "grad_norm": 0.31632134318351746, + "learning_rate": 1.8268766013611522e-05, + "loss": 0.4787, + "step": 13930 + }, + { + "epoch": 0.38250961010433826, + "grad_norm": 0.3835001587867737, + "learning_rate": 1.826852311621708e-05, + "loss": 0.3859, + "step": 13931 + }, + { + "epoch": 0.38253706754530475, + "grad_norm": 0.3605281412601471, + "learning_rate": 1.8268280203399205e-05, + "loss": 0.5087, + "step": 13932 + }, + { + "epoch": 0.3825645249862713, + "grad_norm": 0.3690866529941559, + "learning_rate": 1.8268037275158348e-05, + "loss": 0.5236, + "step": 13933 + }, + { + "epoch": 0.3825919824272378, + "grad_norm": 0.3921816051006317, + "learning_rate": 1.8267794331494965e-05, + "loss": 0.4789, + "step": 13934 + }, + { + "epoch": 0.3826194398682043, + "grad_norm": 0.34866631031036377, + "learning_rate": 1.82675513724095e-05, + "loss": 0.5621, + "step": 13935 + }, + { + "epoch": 0.3826468973091708, + "grad_norm": 0.3998859226703644, + "learning_rate": 1.826730839790242e-05, + "loss": 0.5026, + "step": 13936 + }, + { + "epoch": 0.3826743547501373, + "grad_norm": 0.3465539515018463, + "learning_rate": 1.826706540797417e-05, + "loss": 0.445, + "step": 13937 + }, + { + "epoch": 0.3827018121911038, + "grad_norm": 0.4993263781070709, + "learning_rate": 1.8266822402625202e-05, + "loss": 0.5743, + "step": 13938 + }, + { + "epoch": 0.38272926963207027, + "grad_norm": 0.408836305141449, + "learning_rate": 1.8266579381855973e-05, + "loss": 0.5285, + "step": 13939 + }, + { + "epoch": 0.3827567270730368, + "grad_norm": 0.33840101957321167, + "learning_rate": 1.8266336345666936e-05, + "loss": 0.4905, + "step": 13940 + }, + { + "epoch": 0.3827841845140033, + "grad_norm": 0.39024287462234497, + "learning_rate": 1.8266093294058542e-05, + "loss": 0.4985, + "step": 13941 + }, + { + "epoch": 0.3828116419549698, + "grad_norm": 0.4047720432281494, + "learning_rate": 1.8265850227031248e-05, + "loss": 0.5125, + "step": 13942 + }, + { + "epoch": 0.3828390993959363, + "grad_norm": 0.37581437826156616, + "learning_rate": 1.8265607144585504e-05, + "loss": 0.5274, + "step": 13943 + }, + { + "epoch": 0.3828665568369028, + "grad_norm": 0.3401198387145996, + "learning_rate": 1.8265364046721763e-05, + "loss": 0.5265, + "step": 13944 + }, + { + "epoch": 0.3828940142778693, + "grad_norm": 0.4069547951221466, + "learning_rate": 1.8265120933440483e-05, + "loss": 0.4694, + "step": 13945 + }, + { + "epoch": 0.3829214717188358, + "grad_norm": 0.3465431034564972, + "learning_rate": 1.8264877804742112e-05, + "loss": 0.5206, + "step": 13946 + }, + { + "epoch": 0.38294892915980233, + "grad_norm": 0.3165905475616455, + "learning_rate": 1.826463466062711e-05, + "loss": 0.4064, + "step": 13947 + }, + { + "epoch": 0.3829763866007688, + "grad_norm": 0.3559388518333435, + "learning_rate": 1.826439150109592e-05, + "loss": 0.585, + "step": 13948 + }, + { + "epoch": 0.3830038440417353, + "grad_norm": 0.43184831738471985, + "learning_rate": 1.8264148326149004e-05, + "loss": 0.5302, + "step": 13949 + }, + { + "epoch": 0.3830313014827018, + "grad_norm": 0.3486242890357971, + "learning_rate": 1.826390513578682e-05, + "loss": 0.56, + "step": 13950 + }, + { + "epoch": 0.3830587589236683, + "grad_norm": 0.4300706386566162, + "learning_rate": 1.826366193000981e-05, + "loss": 0.4843, + "step": 13951 + }, + { + "epoch": 0.3830862163646348, + "grad_norm": 0.39146852493286133, + "learning_rate": 1.826341870881843e-05, + "loss": 0.5198, + "step": 13952 + }, + { + "epoch": 0.3831136738056013, + "grad_norm": 0.4085060954093933, + "learning_rate": 1.8263175472213143e-05, + "loss": 0.4467, + "step": 13953 + }, + { + "epoch": 0.38314113124656785, + "grad_norm": 0.37857067584991455, + "learning_rate": 1.8262932220194392e-05, + "loss": 0.4888, + "step": 13954 + }, + { + "epoch": 0.38316858868753434, + "grad_norm": 0.36319029331207275, + "learning_rate": 1.8262688952762637e-05, + "loss": 0.5147, + "step": 13955 + }, + { + "epoch": 0.38319604612850083, + "grad_norm": 0.3573656380176544, + "learning_rate": 1.826244566991833e-05, + "loss": 0.4803, + "step": 13956 + }, + { + "epoch": 0.3832235035694673, + "grad_norm": 0.3512285351753235, + "learning_rate": 1.8262202371661925e-05, + "loss": 0.5616, + "step": 13957 + }, + { + "epoch": 0.3832509610104338, + "grad_norm": 0.42042943835258484, + "learning_rate": 1.8261959057993876e-05, + "loss": 0.6351, + "step": 13958 + }, + { + "epoch": 0.3832784184514003, + "grad_norm": 0.4108961820602417, + "learning_rate": 1.8261715728914636e-05, + "loss": 0.5151, + "step": 13959 + }, + { + "epoch": 0.3833058758923668, + "grad_norm": 0.3968958556652069, + "learning_rate": 1.826147238442466e-05, + "loss": 0.552, + "step": 13960 + }, + { + "epoch": 0.38333333333333336, + "grad_norm": 0.39028382301330566, + "learning_rate": 1.82612290245244e-05, + "loss": 0.543, + "step": 13961 + }, + { + "epoch": 0.38336079077429985, + "grad_norm": 0.3289088010787964, + "learning_rate": 1.826098564921431e-05, + "loss": 0.3926, + "step": 13962 + }, + { + "epoch": 0.38338824821526635, + "grad_norm": 0.3756379187107086, + "learning_rate": 1.826074225849485e-05, + "loss": 0.5692, + "step": 13963 + }, + { + "epoch": 0.38341570565623284, + "grad_norm": 0.3858208954334259, + "learning_rate": 1.8260498852366467e-05, + "loss": 0.5092, + "step": 13964 + }, + { + "epoch": 0.38344316309719934, + "grad_norm": 0.41005489230155945, + "learning_rate": 1.8260255430829613e-05, + "loss": 0.4794, + "step": 13965 + }, + { + "epoch": 0.38347062053816583, + "grad_norm": 0.43410614132881165, + "learning_rate": 1.8260011993884748e-05, + "loss": 0.5918, + "step": 13966 + }, + { + "epoch": 0.3834980779791323, + "grad_norm": 0.4048735201358795, + "learning_rate": 1.825976854153233e-05, + "loss": 0.5153, + "step": 13967 + }, + { + "epoch": 0.3835255354200989, + "grad_norm": 0.36828282475471497, + "learning_rate": 1.8259525073772804e-05, + "loss": 0.5631, + "step": 13968 + }, + { + "epoch": 0.38355299286106537, + "grad_norm": 0.3681231737136841, + "learning_rate": 1.8259281590606622e-05, + "loss": 0.417, + "step": 13969 + }, + { + "epoch": 0.38358045030203186, + "grad_norm": 0.3890265226364136, + "learning_rate": 1.8259038092034253e-05, + "loss": 0.4867, + "step": 13970 + }, + { + "epoch": 0.38360790774299836, + "grad_norm": 0.360311359167099, + "learning_rate": 1.8258794578056136e-05, + "loss": 0.4867, + "step": 13971 + }, + { + "epoch": 0.38363536518396485, + "grad_norm": 0.34107324481010437, + "learning_rate": 1.8258551048672733e-05, + "loss": 0.5005, + "step": 13972 + }, + { + "epoch": 0.38366282262493134, + "grad_norm": 0.3347267806529999, + "learning_rate": 1.8258307503884495e-05, + "loss": 0.4645, + "step": 13973 + }, + { + "epoch": 0.38369028006589784, + "grad_norm": 0.39427459239959717, + "learning_rate": 1.825806394369188e-05, + "loss": 0.5398, + "step": 13974 + }, + { + "epoch": 0.3837177375068644, + "grad_norm": 0.38120797276496887, + "learning_rate": 1.825782036809534e-05, + "loss": 0.5797, + "step": 13975 + }, + { + "epoch": 0.3837451949478309, + "grad_norm": 0.483855277299881, + "learning_rate": 1.8257576777095328e-05, + "loss": 0.4858, + "step": 13976 + }, + { + "epoch": 0.3837726523887974, + "grad_norm": 0.38435935974121094, + "learning_rate": 1.8257333170692303e-05, + "loss": 0.4475, + "step": 13977 + }, + { + "epoch": 0.38380010982976387, + "grad_norm": 0.3731548488140106, + "learning_rate": 1.825708954888671e-05, + "loss": 0.5471, + "step": 13978 + }, + { + "epoch": 0.38382756727073036, + "grad_norm": 0.343902587890625, + "learning_rate": 1.8256845911679014e-05, + "loss": 0.4471, + "step": 13979 + }, + { + "epoch": 0.38385502471169686, + "grad_norm": 0.31429341435432434, + "learning_rate": 1.8256602259069665e-05, + "loss": 0.4512, + "step": 13980 + }, + { + "epoch": 0.38388248215266335, + "grad_norm": 0.36044129729270935, + "learning_rate": 1.8256358591059115e-05, + "loss": 0.4701, + "step": 13981 + }, + { + "epoch": 0.3839099395936299, + "grad_norm": 0.3851945996284485, + "learning_rate": 1.8256114907647826e-05, + "loss": 0.5746, + "step": 13982 + }, + { + "epoch": 0.3839373970345964, + "grad_norm": 0.4410184621810913, + "learning_rate": 1.825587120883624e-05, + "loss": 0.6042, + "step": 13983 + }, + { + "epoch": 0.3839648544755629, + "grad_norm": 0.38871657848358154, + "learning_rate": 1.8255627494624824e-05, + "loss": 0.4139, + "step": 13984 + }, + { + "epoch": 0.3839923119165294, + "grad_norm": 0.4199775159358978, + "learning_rate": 1.825538376501403e-05, + "loss": 0.4382, + "step": 13985 + }, + { + "epoch": 0.3840197693574959, + "grad_norm": 0.36846691370010376, + "learning_rate": 1.8255140020004306e-05, + "loss": 0.5365, + "step": 13986 + }, + { + "epoch": 0.38404722679846237, + "grad_norm": 0.40677642822265625, + "learning_rate": 1.8254896259596116e-05, + "loss": 0.5233, + "step": 13987 + }, + { + "epoch": 0.38407468423942887, + "grad_norm": 0.38567695021629333, + "learning_rate": 1.8254652483789907e-05, + "loss": 0.5867, + "step": 13988 + }, + { + "epoch": 0.3841021416803954, + "grad_norm": 0.3708449602127075, + "learning_rate": 1.8254408692586136e-05, + "loss": 0.5614, + "step": 13989 + }, + { + "epoch": 0.3841295991213619, + "grad_norm": 0.418364018201828, + "learning_rate": 1.825416488598526e-05, + "loss": 0.4937, + "step": 13990 + }, + { + "epoch": 0.3841570565623284, + "grad_norm": 0.41072723269462585, + "learning_rate": 1.825392106398773e-05, + "loss": 0.5583, + "step": 13991 + }, + { + "epoch": 0.3841845140032949, + "grad_norm": 0.40427446365356445, + "learning_rate": 1.8253677226594003e-05, + "loss": 0.5418, + "step": 13992 + }, + { + "epoch": 0.3842119714442614, + "grad_norm": 0.3257812261581421, + "learning_rate": 1.8253433373804538e-05, + "loss": 0.4247, + "step": 13993 + }, + { + "epoch": 0.3842394288852279, + "grad_norm": 0.7855621576309204, + "learning_rate": 1.825318950561978e-05, + "loss": 0.4454, + "step": 13994 + }, + { + "epoch": 0.3842668863261944, + "grad_norm": 0.35842397809028625, + "learning_rate": 1.8252945622040192e-05, + "loss": 0.4693, + "step": 13995 + }, + { + "epoch": 0.3842943437671609, + "grad_norm": 0.43894949555397034, + "learning_rate": 1.8252701723066225e-05, + "loss": 0.5711, + "step": 13996 + }, + { + "epoch": 0.3843218012081274, + "grad_norm": 0.3998887240886688, + "learning_rate": 1.8252457808698338e-05, + "loss": 0.609, + "step": 13997 + }, + { + "epoch": 0.3843492586490939, + "grad_norm": 0.3589972257614136, + "learning_rate": 1.8252213878936982e-05, + "loss": 0.5141, + "step": 13998 + }, + { + "epoch": 0.3843767160900604, + "grad_norm": 0.32889533042907715, + "learning_rate": 1.8251969933782612e-05, + "loss": 0.4457, + "step": 13999 + }, + { + "epoch": 0.3844041735310269, + "grad_norm": 0.4249653220176697, + "learning_rate": 1.8251725973235685e-05, + "loss": 0.558, + "step": 14000 + }, + { + "epoch": 0.3844316309719934, + "grad_norm": 0.3567918837070465, + "learning_rate": 1.8251481997296654e-05, + "loss": 0.4887, + "step": 14001 + }, + { + "epoch": 0.3844590884129599, + "grad_norm": 0.3720819056034088, + "learning_rate": 1.8251238005965978e-05, + "loss": 0.5025, + "step": 14002 + }, + { + "epoch": 0.3844865458539264, + "grad_norm": 0.39419111609458923, + "learning_rate": 1.825099399924411e-05, + "loss": 0.5973, + "step": 14003 + }, + { + "epoch": 0.38451400329489294, + "grad_norm": 0.3126358389854431, + "learning_rate": 1.8250749977131504e-05, + "loss": 0.4178, + "step": 14004 + }, + { + "epoch": 0.38454146073585943, + "grad_norm": 0.3296810984611511, + "learning_rate": 1.8250505939628615e-05, + "loss": 0.5636, + "step": 14005 + }, + { + "epoch": 0.3845689181768259, + "grad_norm": 0.5792973637580872, + "learning_rate": 1.82502618867359e-05, + "loss": 0.4358, + "step": 14006 + }, + { + "epoch": 0.3845963756177924, + "grad_norm": 0.3696569502353668, + "learning_rate": 1.8250017818453815e-05, + "loss": 0.4272, + "step": 14007 + }, + { + "epoch": 0.3846238330587589, + "grad_norm": 0.39589837193489075, + "learning_rate": 1.824977373478281e-05, + "loss": 0.572, + "step": 14008 + }, + { + "epoch": 0.3846512904997254, + "grad_norm": 0.3693767488002777, + "learning_rate": 1.8249529635723348e-05, + "loss": 0.4989, + "step": 14009 + }, + { + "epoch": 0.3846787479406919, + "grad_norm": 0.3434186279773712, + "learning_rate": 1.8249285521275877e-05, + "loss": 0.401, + "step": 14010 + }, + { + "epoch": 0.38470620538165845, + "grad_norm": 0.36127105355262756, + "learning_rate": 1.8249041391440854e-05, + "loss": 0.4969, + "step": 14011 + }, + { + "epoch": 0.38473366282262494, + "grad_norm": 0.4253843426704407, + "learning_rate": 1.8248797246218738e-05, + "loss": 0.4871, + "step": 14012 + }, + { + "epoch": 0.38476112026359144, + "grad_norm": 0.40651535987854004, + "learning_rate": 1.8248553085609984e-05, + "loss": 0.4897, + "step": 14013 + }, + { + "epoch": 0.38478857770455793, + "grad_norm": 0.34978434443473816, + "learning_rate": 1.8248308909615046e-05, + "loss": 0.4496, + "step": 14014 + }, + { + "epoch": 0.3848160351455244, + "grad_norm": 0.4662056565284729, + "learning_rate": 1.8248064718234378e-05, + "loss": 0.5444, + "step": 14015 + }, + { + "epoch": 0.3848434925864909, + "grad_norm": 0.4043571352958679, + "learning_rate": 1.8247820511468438e-05, + "loss": 0.5548, + "step": 14016 + }, + { + "epoch": 0.3848709500274574, + "grad_norm": 0.38807183504104614, + "learning_rate": 1.8247576289317678e-05, + "loss": 0.5701, + "step": 14017 + }, + { + "epoch": 0.38489840746842396, + "grad_norm": 0.33410945534706116, + "learning_rate": 1.8247332051782558e-05, + "loss": 0.5059, + "step": 14018 + }, + { + "epoch": 0.38492586490939046, + "grad_norm": 0.37202829122543335, + "learning_rate": 1.8247087798863533e-05, + "loss": 0.5459, + "step": 14019 + }, + { + "epoch": 0.38495332235035695, + "grad_norm": 0.3552018702030182, + "learning_rate": 1.8246843530561052e-05, + "loss": 0.5432, + "step": 14020 + }, + { + "epoch": 0.38498077979132345, + "grad_norm": 0.3733099400997162, + "learning_rate": 1.824659924687558e-05, + "loss": 0.5477, + "step": 14021 + }, + { + "epoch": 0.38500823723228994, + "grad_norm": 0.3672376275062561, + "learning_rate": 1.8246354947807567e-05, + "loss": 0.5702, + "step": 14022 + }, + { + "epoch": 0.38503569467325643, + "grad_norm": 0.3447495698928833, + "learning_rate": 1.824611063335747e-05, + "loss": 0.5636, + "step": 14023 + }, + { + "epoch": 0.38506315211422293, + "grad_norm": 0.5106338858604431, + "learning_rate": 1.8245866303525744e-05, + "loss": 0.4314, + "step": 14024 + }, + { + "epoch": 0.3850906095551895, + "grad_norm": 0.33148735761642456, + "learning_rate": 1.824562195831285e-05, + "loss": 0.4857, + "step": 14025 + }, + { + "epoch": 0.38511806699615597, + "grad_norm": 0.6828351020812988, + "learning_rate": 1.8245377597719235e-05, + "loss": 0.4843, + "step": 14026 + }, + { + "epoch": 0.38514552443712247, + "grad_norm": 0.3644402325153351, + "learning_rate": 1.8245133221745358e-05, + "loss": 0.5431, + "step": 14027 + }, + { + "epoch": 0.38517298187808896, + "grad_norm": 0.3804607093334198, + "learning_rate": 1.824488883039168e-05, + "loss": 0.557, + "step": 14028 + }, + { + "epoch": 0.38520043931905545, + "grad_norm": 0.39939379692077637, + "learning_rate": 1.8244644423658648e-05, + "loss": 0.5242, + "step": 14029 + }, + { + "epoch": 0.38522789676002195, + "grad_norm": 0.40380755066871643, + "learning_rate": 1.8244400001546727e-05, + "loss": 0.5214, + "step": 14030 + }, + { + "epoch": 0.38525535420098844, + "grad_norm": 0.4032784402370453, + "learning_rate": 1.8244155564056367e-05, + "loss": 0.5471, + "step": 14031 + }, + { + "epoch": 0.385282811641955, + "grad_norm": 0.36109986901283264, + "learning_rate": 1.8243911111188026e-05, + "loss": 0.5066, + "step": 14032 + }, + { + "epoch": 0.3853102690829215, + "grad_norm": 0.3474540412425995, + "learning_rate": 1.824366664294216e-05, + "loss": 0.5161, + "step": 14033 + }, + { + "epoch": 0.385337726523888, + "grad_norm": 0.4094393253326416, + "learning_rate": 1.8243422159319226e-05, + "loss": 0.5935, + "step": 14034 + }, + { + "epoch": 0.3853651839648545, + "grad_norm": 0.3866804838180542, + "learning_rate": 1.8243177660319674e-05, + "loss": 0.4964, + "step": 14035 + }, + { + "epoch": 0.38539264140582097, + "grad_norm": 0.3495941460132599, + "learning_rate": 1.8242933145943968e-05, + "loss": 0.5254, + "step": 14036 + }, + { + "epoch": 0.38542009884678746, + "grad_norm": 0.375085711479187, + "learning_rate": 1.824268861619256e-05, + "loss": 0.488, + "step": 14037 + }, + { + "epoch": 0.38544755628775396, + "grad_norm": 0.381561815738678, + "learning_rate": 1.824244407106591e-05, + "loss": 0.5985, + "step": 14038 + }, + { + "epoch": 0.3854750137287205, + "grad_norm": 0.33077648282051086, + "learning_rate": 1.8242199510564468e-05, + "loss": 0.3744, + "step": 14039 + }, + { + "epoch": 0.385502471169687, + "grad_norm": 0.33609816431999207, + "learning_rate": 1.8241954934688694e-05, + "loss": 0.3931, + "step": 14040 + }, + { + "epoch": 0.3855299286106535, + "grad_norm": 0.3833978474140167, + "learning_rate": 1.8241710343439042e-05, + "loss": 0.7034, + "step": 14041 + }, + { + "epoch": 0.38555738605162, + "grad_norm": 0.39826369285583496, + "learning_rate": 1.8241465736815972e-05, + "loss": 0.4482, + "step": 14042 + }, + { + "epoch": 0.3855848434925865, + "grad_norm": 0.3782382309436798, + "learning_rate": 1.824122111481994e-05, + "loss": 0.4914, + "step": 14043 + }, + { + "epoch": 0.385612300933553, + "grad_norm": 0.3173300623893738, + "learning_rate": 1.8240976477451396e-05, + "loss": 0.4391, + "step": 14044 + }, + { + "epoch": 0.38563975837451947, + "grad_norm": 0.39536771178245544, + "learning_rate": 1.82407318247108e-05, + "loss": 0.4865, + "step": 14045 + }, + { + "epoch": 0.385667215815486, + "grad_norm": 0.353209525346756, + "learning_rate": 1.824048715659861e-05, + "loss": 0.514, + "step": 14046 + }, + { + "epoch": 0.3856946732564525, + "grad_norm": 0.3811784088611603, + "learning_rate": 1.8240242473115288e-05, + "loss": 0.4977, + "step": 14047 + }, + { + "epoch": 0.385722130697419, + "grad_norm": 0.36604616045951843, + "learning_rate": 1.8239997774261277e-05, + "loss": 0.5675, + "step": 14048 + }, + { + "epoch": 0.3857495881383855, + "grad_norm": 0.33414775133132935, + "learning_rate": 1.823975306003704e-05, + "loss": 0.4361, + "step": 14049 + }, + { + "epoch": 0.385777045579352, + "grad_norm": 0.5045976638793945, + "learning_rate": 1.8239508330443035e-05, + "loss": 0.4098, + "step": 14050 + }, + { + "epoch": 0.3858045030203185, + "grad_norm": 0.390389084815979, + "learning_rate": 1.823926358547972e-05, + "loss": 0.5387, + "step": 14051 + }, + { + "epoch": 0.385831960461285, + "grad_norm": 0.3621509373188019, + "learning_rate": 1.8239018825147546e-05, + "loss": 0.428, + "step": 14052 + }, + { + "epoch": 0.38585941790225153, + "grad_norm": 0.37463808059692383, + "learning_rate": 1.8238774049446972e-05, + "loss": 0.52, + "step": 14053 + }, + { + "epoch": 0.385886875343218, + "grad_norm": 0.41150549054145813, + "learning_rate": 1.8238529258378457e-05, + "loss": 0.4896, + "step": 14054 + }, + { + "epoch": 0.3859143327841845, + "grad_norm": 0.339849054813385, + "learning_rate": 1.8238284451942453e-05, + "loss": 0.4447, + "step": 14055 + }, + { + "epoch": 0.385941790225151, + "grad_norm": 0.7891032695770264, + "learning_rate": 1.8238039630139418e-05, + "loss": 0.572, + "step": 14056 + }, + { + "epoch": 0.3859692476661175, + "grad_norm": 0.3172597885131836, + "learning_rate": 1.8237794792969813e-05, + "loss": 0.5291, + "step": 14057 + }, + { + "epoch": 0.385996705107084, + "grad_norm": 0.36595073342323303, + "learning_rate": 1.823754994043409e-05, + "loss": 0.4992, + "step": 14058 + }, + { + "epoch": 0.3860241625480505, + "grad_norm": 0.39051225781440735, + "learning_rate": 1.8237305072532708e-05, + "loss": 0.5249, + "step": 14059 + }, + { + "epoch": 0.38605161998901705, + "grad_norm": 0.4220277965068817, + "learning_rate": 1.8237060189266123e-05, + "loss": 0.5976, + "step": 14060 + }, + { + "epoch": 0.38607907742998354, + "grad_norm": 0.37225541472435, + "learning_rate": 1.8236815290634794e-05, + "loss": 0.4884, + "step": 14061 + }, + { + "epoch": 0.38610653487095004, + "grad_norm": 0.3839455544948578, + "learning_rate": 1.8236570376639172e-05, + "loss": 0.5519, + "step": 14062 + }, + { + "epoch": 0.38613399231191653, + "grad_norm": 0.3510797619819641, + "learning_rate": 1.823632544727972e-05, + "loss": 0.4345, + "step": 14063 + }, + { + "epoch": 0.386161449752883, + "grad_norm": 0.4299132823944092, + "learning_rate": 1.823608050255689e-05, + "loss": 0.5302, + "step": 14064 + }, + { + "epoch": 0.3861889071938495, + "grad_norm": 0.36782678961753845, + "learning_rate": 1.8235835542471143e-05, + "loss": 0.5627, + "step": 14065 + }, + { + "epoch": 0.386216364634816, + "grad_norm": 0.39651867747306824, + "learning_rate": 1.823559056702293e-05, + "loss": 0.5497, + "step": 14066 + }, + { + "epoch": 0.38624382207578256, + "grad_norm": 0.35798338055610657, + "learning_rate": 1.823534557621272e-05, + "loss": 0.5119, + "step": 14067 + }, + { + "epoch": 0.38627127951674906, + "grad_norm": 0.36801934242248535, + "learning_rate": 1.8235100570040957e-05, + "loss": 0.5283, + "step": 14068 + }, + { + "epoch": 0.38629873695771555, + "grad_norm": 0.40801045298576355, + "learning_rate": 1.8234855548508102e-05, + "loss": 0.5833, + "step": 14069 + }, + { + "epoch": 0.38632619439868204, + "grad_norm": 0.405068963766098, + "learning_rate": 1.8234610511614616e-05, + "loss": 0.5606, + "step": 14070 + }, + { + "epoch": 0.38635365183964854, + "grad_norm": 0.3675159513950348, + "learning_rate": 1.8234365459360953e-05, + "loss": 0.4817, + "step": 14071 + }, + { + "epoch": 0.38638110928061503, + "grad_norm": 0.39243021607398987, + "learning_rate": 1.8234120391747568e-05, + "loss": 0.5133, + "step": 14072 + }, + { + "epoch": 0.3864085667215815, + "grad_norm": 0.393812894821167, + "learning_rate": 1.823387530877492e-05, + "loss": 0.5517, + "step": 14073 + }, + { + "epoch": 0.3864360241625481, + "grad_norm": 0.427926629781723, + "learning_rate": 1.823363021044347e-05, + "loss": 0.56, + "step": 14074 + }, + { + "epoch": 0.38646348160351457, + "grad_norm": 0.3570818603038788, + "learning_rate": 1.823338509675367e-05, + "loss": 0.5352, + "step": 14075 + }, + { + "epoch": 0.38649093904448106, + "grad_norm": 0.43916141986846924, + "learning_rate": 1.823313996770598e-05, + "loss": 0.5472, + "step": 14076 + }, + { + "epoch": 0.38651839648544756, + "grad_norm": 0.3785596787929535, + "learning_rate": 1.8232894823300852e-05, + "loss": 0.4988, + "step": 14077 + }, + { + "epoch": 0.38654585392641405, + "grad_norm": 0.40699413418769836, + "learning_rate": 1.8232649663538753e-05, + "loss": 0.5369, + "step": 14078 + }, + { + "epoch": 0.38657331136738055, + "grad_norm": 0.3654366135597229, + "learning_rate": 1.8232404488420132e-05, + "loss": 0.4853, + "step": 14079 + }, + { + "epoch": 0.38660076880834704, + "grad_norm": 0.3845691382884979, + "learning_rate": 1.8232159297945448e-05, + "loss": 0.5628, + "step": 14080 + }, + { + "epoch": 0.3866282262493136, + "grad_norm": 0.4137239456176758, + "learning_rate": 1.8231914092115164e-05, + "loss": 0.4861, + "step": 14081 + }, + { + "epoch": 0.3866556836902801, + "grad_norm": 0.3792054355144501, + "learning_rate": 1.8231668870929728e-05, + "loss": 0.5647, + "step": 14082 + }, + { + "epoch": 0.3866831411312466, + "grad_norm": 0.3637066185474396, + "learning_rate": 1.8231423634389608e-05, + "loss": 0.4797, + "step": 14083 + }, + { + "epoch": 0.38671059857221307, + "grad_norm": 0.35625526309013367, + "learning_rate": 1.823117838249525e-05, + "loss": 0.4935, + "step": 14084 + }, + { + "epoch": 0.38673805601317957, + "grad_norm": 0.3656194508075714, + "learning_rate": 1.823093311524712e-05, + "loss": 0.5389, + "step": 14085 + }, + { + "epoch": 0.38676551345414606, + "grad_norm": 0.44352519512176514, + "learning_rate": 1.8230687832645672e-05, + "loss": 0.5604, + "step": 14086 + }, + { + "epoch": 0.38679297089511255, + "grad_norm": 0.44022423028945923, + "learning_rate": 1.8230442534691363e-05, + "loss": 0.4034, + "step": 14087 + }, + { + "epoch": 0.3868204283360791, + "grad_norm": 0.3629774749279022, + "learning_rate": 1.8230197221384657e-05, + "loss": 0.4299, + "step": 14088 + }, + { + "epoch": 0.3868478857770456, + "grad_norm": 0.31954848766326904, + "learning_rate": 1.8229951892726e-05, + "loss": 0.4094, + "step": 14089 + }, + { + "epoch": 0.3868753432180121, + "grad_norm": 0.3402932584285736, + "learning_rate": 1.8229706548715857e-05, + "loss": 0.5206, + "step": 14090 + }, + { + "epoch": 0.3869028006589786, + "grad_norm": 0.3694913387298584, + "learning_rate": 1.822946118935469e-05, + "loss": 0.5409, + "step": 14091 + }, + { + "epoch": 0.3869302580999451, + "grad_norm": 0.40915629267692566, + "learning_rate": 1.8229215814642945e-05, + "loss": 0.5873, + "step": 14092 + }, + { + "epoch": 0.3869577155409116, + "grad_norm": 0.3574916124343872, + "learning_rate": 1.8228970424581088e-05, + "loss": 0.4856, + "step": 14093 + }, + { + "epoch": 0.38698517298187807, + "grad_norm": 0.42402222752571106, + "learning_rate": 1.8228725019169578e-05, + "loss": 0.4935, + "step": 14094 + }, + { + "epoch": 0.3870126304228446, + "grad_norm": 0.34526070952415466, + "learning_rate": 1.8228479598408868e-05, + "loss": 0.5298, + "step": 14095 + }, + { + "epoch": 0.3870400878638111, + "grad_norm": 0.41104555130004883, + "learning_rate": 1.8228234162299413e-05, + "loss": 0.5465, + "step": 14096 + }, + { + "epoch": 0.3870675453047776, + "grad_norm": 0.3349936306476593, + "learning_rate": 1.8227988710841684e-05, + "loss": 0.493, + "step": 14097 + }, + { + "epoch": 0.3870950027457441, + "grad_norm": 0.3800337314605713, + "learning_rate": 1.822774324403612e-05, + "loss": 0.5017, + "step": 14098 + }, + { + "epoch": 0.3871224601867106, + "grad_norm": 0.4985032379627228, + "learning_rate": 1.8227497761883195e-05, + "loss": 0.4953, + "step": 14099 + }, + { + "epoch": 0.3871499176276771, + "grad_norm": 8.311792373657227, + "learning_rate": 1.822725226438336e-05, + "loss": 0.4752, + "step": 14100 + }, + { + "epoch": 0.3871773750686436, + "grad_norm": 0.35975801944732666, + "learning_rate": 1.822700675153707e-05, + "loss": 0.4602, + "step": 14101 + }, + { + "epoch": 0.38720483250961013, + "grad_norm": 0.38360512256622314, + "learning_rate": 1.822676122334479e-05, + "loss": 0.4896, + "step": 14102 + }, + { + "epoch": 0.3872322899505766, + "grad_norm": 0.3947080969810486, + "learning_rate": 1.8226515679806976e-05, + "loss": 0.6012, + "step": 14103 + }, + { + "epoch": 0.3872597473915431, + "grad_norm": 0.4513695538043976, + "learning_rate": 1.8226270120924084e-05, + "loss": 0.5103, + "step": 14104 + }, + { + "epoch": 0.3872872048325096, + "grad_norm": 0.3797403872013092, + "learning_rate": 1.822602454669657e-05, + "loss": 0.5841, + "step": 14105 + }, + { + "epoch": 0.3873146622734761, + "grad_norm": 0.37784725427627563, + "learning_rate": 1.8225778957124897e-05, + "loss": 0.5177, + "step": 14106 + }, + { + "epoch": 0.3873421197144426, + "grad_norm": 0.44778233766555786, + "learning_rate": 1.822553335220952e-05, + "loss": 0.5326, + "step": 14107 + }, + { + "epoch": 0.3873695771554091, + "grad_norm": 0.588991641998291, + "learning_rate": 1.8225287731950898e-05, + "loss": 0.5185, + "step": 14108 + }, + { + "epoch": 0.38739703459637564, + "grad_norm": 0.4092925190925598, + "learning_rate": 1.822504209634949e-05, + "loss": 0.5627, + "step": 14109 + }, + { + "epoch": 0.38742449203734214, + "grad_norm": 0.8498042821884155, + "learning_rate": 1.822479644540575e-05, + "loss": 0.5154, + "step": 14110 + }, + { + "epoch": 0.38745194947830863, + "grad_norm": 0.36591655015945435, + "learning_rate": 1.8224550779120144e-05, + "loss": 0.5236, + "step": 14111 + }, + { + "epoch": 0.3874794069192751, + "grad_norm": 0.3656807541847229, + "learning_rate": 1.8224305097493124e-05, + "loss": 0.5845, + "step": 14112 + }, + { + "epoch": 0.3875068643602416, + "grad_norm": 0.423927903175354, + "learning_rate": 1.822405940052515e-05, + "loss": 0.6141, + "step": 14113 + }, + { + "epoch": 0.3875343218012081, + "grad_norm": 0.4219048321247101, + "learning_rate": 1.822381368821668e-05, + "loss": 0.5482, + "step": 14114 + }, + { + "epoch": 0.3875617792421746, + "grad_norm": 0.3731614053249359, + "learning_rate": 1.8223567960568175e-05, + "loss": 0.4444, + "step": 14115 + }, + { + "epoch": 0.38758923668314116, + "grad_norm": 0.362409770488739, + "learning_rate": 1.822332221758009e-05, + "loss": 0.5831, + "step": 14116 + }, + { + "epoch": 0.38761669412410765, + "grad_norm": 0.3474118709564209, + "learning_rate": 1.8223076459252885e-05, + "loss": 0.4717, + "step": 14117 + }, + { + "epoch": 0.38764415156507415, + "grad_norm": 0.44740843772888184, + "learning_rate": 1.8222830685587017e-05, + "loss": 0.4837, + "step": 14118 + }, + { + "epoch": 0.38767160900604064, + "grad_norm": 0.3600844740867615, + "learning_rate": 1.8222584896582942e-05, + "loss": 0.5199, + "step": 14119 + }, + { + "epoch": 0.38769906644700713, + "grad_norm": 0.37647369503974915, + "learning_rate": 1.8222339092241126e-05, + "loss": 0.6043, + "step": 14120 + }, + { + "epoch": 0.38772652388797363, + "grad_norm": 0.5159639716148376, + "learning_rate": 1.8222093272562023e-05, + "loss": 0.5815, + "step": 14121 + }, + { + "epoch": 0.3877539813289401, + "grad_norm": 0.35462674498558044, + "learning_rate": 1.822184743754609e-05, + "loss": 0.4952, + "step": 14122 + }, + { + "epoch": 0.3877814387699067, + "grad_norm": 0.3894962966442108, + "learning_rate": 1.8221601587193788e-05, + "loss": 0.4966, + "step": 14123 + }, + { + "epoch": 0.38780889621087317, + "grad_norm": 0.36336007714271545, + "learning_rate": 1.8221355721505577e-05, + "loss": 0.568, + "step": 14124 + }, + { + "epoch": 0.38783635365183966, + "grad_norm": 0.36869803071022034, + "learning_rate": 1.8221109840481912e-05, + "loss": 0.556, + "step": 14125 + }, + { + "epoch": 0.38786381109280615, + "grad_norm": 0.3675497770309448, + "learning_rate": 1.8220863944123254e-05, + "loss": 0.5077, + "step": 14126 + }, + { + "epoch": 0.38789126853377265, + "grad_norm": 0.3234081268310547, + "learning_rate": 1.822061803243006e-05, + "loss": 0.5265, + "step": 14127 + }, + { + "epoch": 0.38791872597473914, + "grad_norm": 0.3902517855167389, + "learning_rate": 1.822037210540279e-05, + "loss": 0.5284, + "step": 14128 + }, + { + "epoch": 0.38794618341570564, + "grad_norm": 0.3330402374267578, + "learning_rate": 1.8220126163041906e-05, + "loss": 0.5158, + "step": 14129 + }, + { + "epoch": 0.38797364085667213, + "grad_norm": 0.379728227853775, + "learning_rate": 1.8219880205347858e-05, + "loss": 0.4775, + "step": 14130 + }, + { + "epoch": 0.3880010982976387, + "grad_norm": 0.3282727003097534, + "learning_rate": 1.821963423232111e-05, + "loss": 0.5201, + "step": 14131 + }, + { + "epoch": 0.3880285557386052, + "grad_norm": 0.40717265009880066, + "learning_rate": 1.8219388243962122e-05, + "loss": 0.5493, + "step": 14132 + }, + { + "epoch": 0.38805601317957167, + "grad_norm": 0.39037635922431946, + "learning_rate": 1.8219142240271353e-05, + "loss": 0.5555, + "step": 14133 + }, + { + "epoch": 0.38808347062053816, + "grad_norm": 0.3914187252521515, + "learning_rate": 1.821889622124926e-05, + "loss": 0.5109, + "step": 14134 + }, + { + "epoch": 0.38811092806150466, + "grad_norm": 0.36696669459342957, + "learning_rate": 1.82186501868963e-05, + "loss": 0.5048, + "step": 14135 + }, + { + "epoch": 0.38813838550247115, + "grad_norm": 0.36073067784309387, + "learning_rate": 1.8218404137212936e-05, + "loss": 0.544, + "step": 14136 + }, + { + "epoch": 0.38816584294343764, + "grad_norm": 0.35066846013069153, + "learning_rate": 1.8218158072199624e-05, + "loss": 0.4626, + "step": 14137 + }, + { + "epoch": 0.3881933003844042, + "grad_norm": 0.37658631801605225, + "learning_rate": 1.821791199185683e-05, + "loss": 0.5338, + "step": 14138 + }, + { + "epoch": 0.3882207578253707, + "grad_norm": 0.36394035816192627, + "learning_rate": 1.8217665896185003e-05, + "loss": 0.5462, + "step": 14139 + }, + { + "epoch": 0.3882482152663372, + "grad_norm": 0.38717687129974365, + "learning_rate": 1.8217419785184605e-05, + "loss": 0.6125, + "step": 14140 + }, + { + "epoch": 0.3882756727073037, + "grad_norm": 0.3724403381347656, + "learning_rate": 1.82171736588561e-05, + "loss": 0.5848, + "step": 14141 + }, + { + "epoch": 0.38830313014827017, + "grad_norm": 0.38526830077171326, + "learning_rate": 1.821692751719994e-05, + "loss": 0.5591, + "step": 14142 + }, + { + "epoch": 0.38833058758923666, + "grad_norm": 0.3456844389438629, + "learning_rate": 1.821668136021659e-05, + "loss": 0.5172, + "step": 14143 + }, + { + "epoch": 0.38835804503020316, + "grad_norm": 0.3599540591239929, + "learning_rate": 1.8216435187906504e-05, + "loss": 0.4683, + "step": 14144 + }, + { + "epoch": 0.3883855024711697, + "grad_norm": 0.3907967805862427, + "learning_rate": 1.8216189000270143e-05, + "loss": 0.5158, + "step": 14145 + }, + { + "epoch": 0.3884129599121362, + "grad_norm": 0.4056791663169861, + "learning_rate": 1.821594279730797e-05, + "loss": 0.5442, + "step": 14146 + }, + { + "epoch": 0.3884404173531027, + "grad_norm": 0.359477698802948, + "learning_rate": 1.8215696579020442e-05, + "loss": 0.4188, + "step": 14147 + }, + { + "epoch": 0.3884678747940692, + "grad_norm": 0.3789295256137848, + "learning_rate": 1.821545034540802e-05, + "loss": 0.5662, + "step": 14148 + }, + { + "epoch": 0.3884953322350357, + "grad_norm": 0.3442433774471283, + "learning_rate": 1.8215204096471156e-05, + "loss": 0.4989, + "step": 14149 + }, + { + "epoch": 0.3885227896760022, + "grad_norm": 0.40632107853889465, + "learning_rate": 1.8214957832210318e-05, + "loss": 0.5195, + "step": 14150 + }, + { + "epoch": 0.38855024711696867, + "grad_norm": 0.3592703938484192, + "learning_rate": 1.821471155262596e-05, + "loss": 0.5224, + "step": 14151 + }, + { + "epoch": 0.3885777045579352, + "grad_norm": 0.3774462640285492, + "learning_rate": 1.821446525771854e-05, + "loss": 0.4692, + "step": 14152 + }, + { + "epoch": 0.3886051619989017, + "grad_norm": 0.8145847916603088, + "learning_rate": 1.8214218947488526e-05, + "loss": 0.5129, + "step": 14153 + }, + { + "epoch": 0.3886326194398682, + "grad_norm": 0.6568920612335205, + "learning_rate": 1.821397262193637e-05, + "loss": 0.5257, + "step": 14154 + }, + { + "epoch": 0.3886600768808347, + "grad_norm": 0.3524383306503296, + "learning_rate": 1.8213726281062533e-05, + "loss": 0.4985, + "step": 14155 + }, + { + "epoch": 0.3886875343218012, + "grad_norm": 0.3843335211277008, + "learning_rate": 1.8213479924867476e-05, + "loss": 0.488, + "step": 14156 + }, + { + "epoch": 0.3887149917627677, + "grad_norm": 0.3518824577331543, + "learning_rate": 1.8213233553351656e-05, + "loss": 0.4793, + "step": 14157 + }, + { + "epoch": 0.3887424492037342, + "grad_norm": 0.33451294898986816, + "learning_rate": 1.8212987166515535e-05, + "loss": 0.5018, + "step": 14158 + }, + { + "epoch": 0.38876990664470074, + "grad_norm": 0.4087715446949005, + "learning_rate": 1.8212740764359572e-05, + "loss": 0.5195, + "step": 14159 + }, + { + "epoch": 0.38879736408566723, + "grad_norm": 0.4526727497577667, + "learning_rate": 1.8212494346884226e-05, + "loss": 0.5279, + "step": 14160 + }, + { + "epoch": 0.3888248215266337, + "grad_norm": 0.40942421555519104, + "learning_rate": 1.8212247914089954e-05, + "loss": 0.4722, + "step": 14161 + }, + { + "epoch": 0.3888522789676002, + "grad_norm": 0.35815927386283875, + "learning_rate": 1.8212001465977223e-05, + "loss": 0.4689, + "step": 14162 + }, + { + "epoch": 0.3888797364085667, + "grad_norm": 0.36330166459083557, + "learning_rate": 1.8211755002546487e-05, + "loss": 0.5281, + "step": 14163 + }, + { + "epoch": 0.3889071938495332, + "grad_norm": 0.3240397870540619, + "learning_rate": 1.8211508523798207e-05, + "loss": 0.4056, + "step": 14164 + }, + { + "epoch": 0.3889346512904997, + "grad_norm": 0.36651650071144104, + "learning_rate": 1.821126202973284e-05, + "loss": 0.5555, + "step": 14165 + }, + { + "epoch": 0.38896210873146625, + "grad_norm": 0.4208066165447235, + "learning_rate": 1.8211015520350856e-05, + "loss": 0.5504, + "step": 14166 + }, + { + "epoch": 0.38898956617243274, + "grad_norm": 0.3900386691093445, + "learning_rate": 1.8210768995652702e-05, + "loss": 0.5147, + "step": 14167 + }, + { + "epoch": 0.38901702361339924, + "grad_norm": 0.4119853079319, + "learning_rate": 1.821052245563884e-05, + "loss": 0.5198, + "step": 14168 + }, + { + "epoch": 0.38904448105436573, + "grad_norm": 0.31523963809013367, + "learning_rate": 1.8210275900309736e-05, + "loss": 0.4919, + "step": 14169 + }, + { + "epoch": 0.3890719384953322, + "grad_norm": 0.3643524944782257, + "learning_rate": 1.821002932966585e-05, + "loss": 0.4924, + "step": 14170 + }, + { + "epoch": 0.3890993959362987, + "grad_norm": 0.3701658248901367, + "learning_rate": 1.8209782743707637e-05, + "loss": 0.5746, + "step": 14171 + }, + { + "epoch": 0.3891268533772652, + "grad_norm": 0.3582409620285034, + "learning_rate": 1.820953614243556e-05, + "loss": 0.4729, + "step": 14172 + }, + { + "epoch": 0.38915431081823176, + "grad_norm": 0.38151058554649353, + "learning_rate": 1.8209289525850073e-05, + "loss": 0.5201, + "step": 14173 + }, + { + "epoch": 0.38918176825919826, + "grad_norm": 0.4196951389312744, + "learning_rate": 1.8209042893951647e-05, + "loss": 0.6073, + "step": 14174 + }, + { + "epoch": 0.38920922570016475, + "grad_norm": 0.3833915889263153, + "learning_rate": 1.820879624674073e-05, + "loss": 0.5417, + "step": 14175 + }, + { + "epoch": 0.38923668314113125, + "grad_norm": 0.3679640591144562, + "learning_rate": 1.8208549584217795e-05, + "loss": 0.4772, + "step": 14176 + }, + { + "epoch": 0.38926414058209774, + "grad_norm": 0.6714296936988831, + "learning_rate": 1.820830290638329e-05, + "loss": 0.5586, + "step": 14177 + }, + { + "epoch": 0.38929159802306423, + "grad_norm": 0.3289254903793335, + "learning_rate": 1.820805621323768e-05, + "loss": 0.4315, + "step": 14178 + }, + { + "epoch": 0.3893190554640307, + "grad_norm": 0.4000771641731262, + "learning_rate": 1.820780950478143e-05, + "loss": 0.4719, + "step": 14179 + }, + { + "epoch": 0.3893465129049973, + "grad_norm": 0.38257360458374023, + "learning_rate": 1.8207562781014997e-05, + "loss": 0.4819, + "step": 14180 + }, + { + "epoch": 0.38937397034596377, + "grad_norm": 0.4019245505332947, + "learning_rate": 1.8207316041938835e-05, + "loss": 0.4197, + "step": 14181 + }, + { + "epoch": 0.38940142778693027, + "grad_norm": 0.44085603952407837, + "learning_rate": 1.8207069287553407e-05, + "loss": 0.6315, + "step": 14182 + }, + { + "epoch": 0.38942888522789676, + "grad_norm": 0.3328128755092621, + "learning_rate": 1.820682251785918e-05, + "loss": 0.4699, + "step": 14183 + }, + { + "epoch": 0.38945634266886325, + "grad_norm": 0.3373732268810272, + "learning_rate": 1.8206575732856607e-05, + "loss": 0.5531, + "step": 14184 + }, + { + "epoch": 0.38948380010982975, + "grad_norm": 0.39614737033843994, + "learning_rate": 1.8206328932546153e-05, + "loss": 0.4916, + "step": 14185 + }, + { + "epoch": 0.38951125755079624, + "grad_norm": 0.3504573702812195, + "learning_rate": 1.8206082116928273e-05, + "loss": 0.4965, + "step": 14186 + }, + { + "epoch": 0.3895387149917628, + "grad_norm": 0.4065588414669037, + "learning_rate": 1.8205835286003436e-05, + "loss": 0.5832, + "step": 14187 + }, + { + "epoch": 0.3895661724327293, + "grad_norm": 0.373182088136673, + "learning_rate": 1.8205588439772094e-05, + "loss": 0.574, + "step": 14188 + }, + { + "epoch": 0.3895936298736958, + "grad_norm": 0.3479503393173218, + "learning_rate": 1.820534157823471e-05, + "loss": 0.4707, + "step": 14189 + }, + { + "epoch": 0.3896210873146623, + "grad_norm": 0.37380242347717285, + "learning_rate": 1.8205094701391747e-05, + "loss": 0.4957, + "step": 14190 + }, + { + "epoch": 0.38964854475562877, + "grad_norm": 0.34749823808670044, + "learning_rate": 1.8204847809243664e-05, + "loss": 0.4963, + "step": 14191 + }, + { + "epoch": 0.38967600219659526, + "grad_norm": 0.38759130239486694, + "learning_rate": 1.8204600901790922e-05, + "loss": 0.4887, + "step": 14192 + }, + { + "epoch": 0.38970345963756176, + "grad_norm": 0.3784238398075104, + "learning_rate": 1.820435397903398e-05, + "loss": 0.5379, + "step": 14193 + }, + { + "epoch": 0.3897309170785283, + "grad_norm": 0.35628509521484375, + "learning_rate": 1.8204107040973294e-05, + "loss": 0.468, + "step": 14194 + }, + { + "epoch": 0.3897583745194948, + "grad_norm": 0.3369622826576233, + "learning_rate": 1.8203860087609334e-05, + "loss": 0.4564, + "step": 14195 + }, + { + "epoch": 0.3897858319604613, + "grad_norm": 0.3466031849384308, + "learning_rate": 1.8203613118942557e-05, + "loss": 0.4104, + "step": 14196 + }, + { + "epoch": 0.3898132894014278, + "grad_norm": 0.341572642326355, + "learning_rate": 1.8203366134973422e-05, + "loss": 0.4953, + "step": 14197 + }, + { + "epoch": 0.3898407468423943, + "grad_norm": 0.34590843319892883, + "learning_rate": 1.8203119135702393e-05, + "loss": 0.4402, + "step": 14198 + }, + { + "epoch": 0.3898682042833608, + "grad_norm": 0.43109604716300964, + "learning_rate": 1.8202872121129928e-05, + "loss": 0.5447, + "step": 14199 + }, + { + "epoch": 0.38989566172432727, + "grad_norm": 0.3146997392177582, + "learning_rate": 1.8202625091256486e-05, + "loss": 0.463, + "step": 14200 + }, + { + "epoch": 0.3899231191652938, + "grad_norm": 0.35704031586647034, + "learning_rate": 1.8202378046082533e-05, + "loss": 0.4974, + "step": 14201 + }, + { + "epoch": 0.3899505766062603, + "grad_norm": 0.37320858240127563, + "learning_rate": 1.8202130985608525e-05, + "loss": 0.5793, + "step": 14202 + }, + { + "epoch": 0.3899780340472268, + "grad_norm": 0.36564984917640686, + "learning_rate": 1.8201883909834927e-05, + "loss": 0.5987, + "step": 14203 + }, + { + "epoch": 0.3900054914881933, + "grad_norm": 0.3436274528503418, + "learning_rate": 1.8201636818762197e-05, + "loss": 0.5451, + "step": 14204 + }, + { + "epoch": 0.3900329489291598, + "grad_norm": 0.4244338572025299, + "learning_rate": 1.8201389712390797e-05, + "loss": 0.6275, + "step": 14205 + }, + { + "epoch": 0.3900604063701263, + "grad_norm": 0.36793252825737, + "learning_rate": 1.8201142590721187e-05, + "loss": 0.5598, + "step": 14206 + }, + { + "epoch": 0.3900878638110928, + "grad_norm": 0.3964211642742157, + "learning_rate": 1.820089545375383e-05, + "loss": 0.4938, + "step": 14207 + }, + { + "epoch": 0.39011532125205933, + "grad_norm": 0.41055482625961304, + "learning_rate": 1.8200648301489185e-05, + "loss": 0.5165, + "step": 14208 + }, + { + "epoch": 0.3901427786930258, + "grad_norm": 0.4416419565677643, + "learning_rate": 1.820040113392771e-05, + "loss": 0.5274, + "step": 14209 + }, + { + "epoch": 0.3901702361339923, + "grad_norm": 0.38212892413139343, + "learning_rate": 1.8200153951069874e-05, + "loss": 0.5191, + "step": 14210 + }, + { + "epoch": 0.3901976935749588, + "grad_norm": 0.3509543240070343, + "learning_rate": 1.8199906752916133e-05, + "loss": 0.4577, + "step": 14211 + }, + { + "epoch": 0.3902251510159253, + "grad_norm": 0.3406324088573456, + "learning_rate": 1.8199659539466945e-05, + "loss": 0.4757, + "step": 14212 + }, + { + "epoch": 0.3902526084568918, + "grad_norm": 0.3732936978340149, + "learning_rate": 1.8199412310722778e-05, + "loss": 0.5589, + "step": 14213 + }, + { + "epoch": 0.3902800658978583, + "grad_norm": 0.3693386912345886, + "learning_rate": 1.819916506668409e-05, + "loss": 0.5525, + "step": 14214 + }, + { + "epoch": 0.39030752333882485, + "grad_norm": 0.33651870489120483, + "learning_rate": 1.819891780735134e-05, + "loss": 0.5016, + "step": 14215 + }, + { + "epoch": 0.39033498077979134, + "grad_norm": 0.36061784625053406, + "learning_rate": 1.8198670532724998e-05, + "loss": 0.4646, + "step": 14216 + }, + { + "epoch": 0.39036243822075783, + "grad_norm": 0.33554673194885254, + "learning_rate": 1.8198423242805513e-05, + "loss": 0.5279, + "step": 14217 + }, + { + "epoch": 0.39038989566172433, + "grad_norm": 0.34567615389823914, + "learning_rate": 1.8198175937593356e-05, + "loss": 0.5831, + "step": 14218 + }, + { + "epoch": 0.3904173531026908, + "grad_norm": 0.4101155698299408, + "learning_rate": 1.8197928617088978e-05, + "loss": 0.5026, + "step": 14219 + }, + { + "epoch": 0.3904448105436573, + "grad_norm": 0.3647191822528839, + "learning_rate": 1.8197681281292854e-05, + "loss": 0.6059, + "step": 14220 + }, + { + "epoch": 0.3904722679846238, + "grad_norm": 0.3935253322124481, + "learning_rate": 1.8197433930205433e-05, + "loss": 0.5325, + "step": 14221 + }, + { + "epoch": 0.39049972542559036, + "grad_norm": 0.35056453943252563, + "learning_rate": 1.8197186563827184e-05, + "loss": 0.5034, + "step": 14222 + }, + { + "epoch": 0.39052718286655685, + "grad_norm": 0.3785375654697418, + "learning_rate": 1.8196939182158564e-05, + "loss": 0.4699, + "step": 14223 + }, + { + "epoch": 0.39055464030752335, + "grad_norm": 0.4354499876499176, + "learning_rate": 1.8196691785200037e-05, + "loss": 0.616, + "step": 14224 + }, + { + "epoch": 0.39058209774848984, + "grad_norm": 0.34960412979125977, + "learning_rate": 1.8196444372952064e-05, + "loss": 0.4675, + "step": 14225 + }, + { + "epoch": 0.39060955518945634, + "grad_norm": 0.3778553605079651, + "learning_rate": 1.8196196945415105e-05, + "loss": 0.5421, + "step": 14226 + }, + { + "epoch": 0.39063701263042283, + "grad_norm": 0.396759569644928, + "learning_rate": 1.8195949502589622e-05, + "loss": 0.5779, + "step": 14227 + }, + { + "epoch": 0.3906644700713893, + "grad_norm": 0.3644416928291321, + "learning_rate": 1.819570204447608e-05, + "loss": 0.497, + "step": 14228 + }, + { + "epoch": 0.3906919275123559, + "grad_norm": 0.49630051851272583, + "learning_rate": 1.819545457107494e-05, + "loss": 0.5339, + "step": 14229 + }, + { + "epoch": 0.39071938495332237, + "grad_norm": 0.3265737295150757, + "learning_rate": 1.8195207082386657e-05, + "loss": 0.4796, + "step": 14230 + }, + { + "epoch": 0.39074684239428886, + "grad_norm": 0.3725236654281616, + "learning_rate": 1.81949595784117e-05, + "loss": 0.4888, + "step": 14231 + }, + { + "epoch": 0.39077429983525536, + "grad_norm": 0.4259030222892761, + "learning_rate": 1.8194712059150523e-05, + "loss": 0.5132, + "step": 14232 + }, + { + "epoch": 0.39080175727622185, + "grad_norm": 0.33178651332855225, + "learning_rate": 1.8194464524603596e-05, + "loss": 0.3965, + "step": 14233 + }, + { + "epoch": 0.39082921471718834, + "grad_norm": 0.38900455832481384, + "learning_rate": 1.819421697477138e-05, + "loss": 0.5611, + "step": 14234 + }, + { + "epoch": 0.39085667215815484, + "grad_norm": 0.4552185535430908, + "learning_rate": 1.819396940965433e-05, + "loss": 0.4966, + "step": 14235 + }, + { + "epoch": 0.3908841295991214, + "grad_norm": 0.5107800960540771, + "learning_rate": 1.8193721829252915e-05, + "loss": 0.5086, + "step": 14236 + }, + { + "epoch": 0.3909115870400879, + "grad_norm": 0.4319400489330292, + "learning_rate": 1.819347423356759e-05, + "loss": 0.5333, + "step": 14237 + }, + { + "epoch": 0.3909390444810544, + "grad_norm": 0.337201863527298, + "learning_rate": 1.8193226622598823e-05, + "loss": 0.4275, + "step": 14238 + }, + { + "epoch": 0.39096650192202087, + "grad_norm": 0.4444050192832947, + "learning_rate": 1.8192978996347075e-05, + "loss": 0.6, + "step": 14239 + }, + { + "epoch": 0.39099395936298736, + "grad_norm": 0.351843923330307, + "learning_rate": 1.8192731354812802e-05, + "loss": 0.4171, + "step": 14240 + }, + { + "epoch": 0.39102141680395386, + "grad_norm": 0.43709656596183777, + "learning_rate": 1.8192483697996473e-05, + "loss": 0.5885, + "step": 14241 + }, + { + "epoch": 0.39104887424492035, + "grad_norm": 0.36837807297706604, + "learning_rate": 1.8192236025898548e-05, + "loss": 0.4684, + "step": 14242 + }, + { + "epoch": 0.3910763316858869, + "grad_norm": 0.389503538608551, + "learning_rate": 1.8191988338519487e-05, + "loss": 0.532, + "step": 14243 + }, + { + "epoch": 0.3911037891268534, + "grad_norm": 0.4001576602458954, + "learning_rate": 1.8191740635859753e-05, + "loss": 0.5561, + "step": 14244 + }, + { + "epoch": 0.3911312465678199, + "grad_norm": 0.39867258071899414, + "learning_rate": 1.8191492917919805e-05, + "loss": 0.4813, + "step": 14245 + }, + { + "epoch": 0.3911587040087864, + "grad_norm": 0.3352884352207184, + "learning_rate": 1.8191245184700114e-05, + "loss": 0.4904, + "step": 14246 + }, + { + "epoch": 0.3911861614497529, + "grad_norm": 0.36604851484298706, + "learning_rate": 1.8190997436201132e-05, + "loss": 0.552, + "step": 14247 + }, + { + "epoch": 0.3912136188907194, + "grad_norm": 0.40263667702674866, + "learning_rate": 1.819074967242333e-05, + "loss": 0.5472, + "step": 14248 + }, + { + "epoch": 0.39124107633168587, + "grad_norm": 0.37671059370040894, + "learning_rate": 1.819050189336716e-05, + "loss": 0.4621, + "step": 14249 + }, + { + "epoch": 0.3912685337726524, + "grad_norm": 0.4031587243080139, + "learning_rate": 1.8190254099033095e-05, + "loss": 0.5587, + "step": 14250 + }, + { + "epoch": 0.3912959912136189, + "grad_norm": 0.3886023163795471, + "learning_rate": 1.819000628942159e-05, + "loss": 0.4756, + "step": 14251 + }, + { + "epoch": 0.3913234486545854, + "grad_norm": 0.41531938314437866, + "learning_rate": 1.8189758464533107e-05, + "loss": 0.4642, + "step": 14252 + }, + { + "epoch": 0.3913509060955519, + "grad_norm": 0.34660154581069946, + "learning_rate": 1.818951062436811e-05, + "loss": 0.5333, + "step": 14253 + }, + { + "epoch": 0.3913783635365184, + "grad_norm": 0.3744528591632843, + "learning_rate": 1.818926276892707e-05, + "loss": 0.5384, + "step": 14254 + }, + { + "epoch": 0.3914058209774849, + "grad_norm": 0.4529491066932678, + "learning_rate": 1.8189014898210434e-05, + "loss": 0.6175, + "step": 14255 + }, + { + "epoch": 0.3914332784184514, + "grad_norm": 0.40804523229599, + "learning_rate": 1.8188767012218675e-05, + "loss": 0.6277, + "step": 14256 + }, + { + "epoch": 0.39146073585941793, + "grad_norm": 0.40339669585227966, + "learning_rate": 1.818851911095225e-05, + "loss": 0.5888, + "step": 14257 + }, + { + "epoch": 0.3914881933003844, + "grad_norm": 0.34468919038772583, + "learning_rate": 1.8188271194411623e-05, + "loss": 0.5356, + "step": 14258 + }, + { + "epoch": 0.3915156507413509, + "grad_norm": 0.3805680572986603, + "learning_rate": 1.8188023262597255e-05, + "loss": 0.5675, + "step": 14259 + }, + { + "epoch": 0.3915431081823174, + "grad_norm": 0.3082553446292877, + "learning_rate": 1.8187775315509616e-05, + "loss": 0.4374, + "step": 14260 + }, + { + "epoch": 0.3915705656232839, + "grad_norm": 0.3613046705722809, + "learning_rate": 1.818752735314916e-05, + "loss": 0.5064, + "step": 14261 + }, + { + "epoch": 0.3915980230642504, + "grad_norm": 0.33887192606925964, + "learning_rate": 1.818727937551635e-05, + "loss": 0.5095, + "step": 14262 + }, + { + "epoch": 0.3916254805052169, + "grad_norm": 0.43501752614974976, + "learning_rate": 1.8187031382611655e-05, + "loss": 0.5368, + "step": 14263 + }, + { + "epoch": 0.3916529379461834, + "grad_norm": 0.3109287619590759, + "learning_rate": 1.818678337443553e-05, + "loss": 0.5208, + "step": 14264 + }, + { + "epoch": 0.39168039538714994, + "grad_norm": 0.34201422333717346, + "learning_rate": 1.8186535350988443e-05, + "loss": 0.4768, + "step": 14265 + }, + { + "epoch": 0.39170785282811643, + "grad_norm": 0.355509877204895, + "learning_rate": 1.8186287312270853e-05, + "loss": 0.5513, + "step": 14266 + }, + { + "epoch": 0.3917353102690829, + "grad_norm": 0.3723170757293701, + "learning_rate": 1.8186039258283224e-05, + "loss": 0.4946, + "step": 14267 + }, + { + "epoch": 0.3917627677100494, + "grad_norm": 0.39129167795181274, + "learning_rate": 1.8185791189026024e-05, + "loss": 0.4845, + "step": 14268 + }, + { + "epoch": 0.3917902251510159, + "grad_norm": 0.3755074143409729, + "learning_rate": 1.8185543104499707e-05, + "loss": 0.4643, + "step": 14269 + }, + { + "epoch": 0.3918176825919824, + "grad_norm": 0.4093831777572632, + "learning_rate": 1.818529500470474e-05, + "loss": 0.6231, + "step": 14270 + }, + { + "epoch": 0.3918451400329489, + "grad_norm": 0.396809458732605, + "learning_rate": 1.8185046889641586e-05, + "loss": 0.5317, + "step": 14271 + }, + { + "epoch": 0.39187259747391545, + "grad_norm": 0.35425686836242676, + "learning_rate": 1.8184798759310705e-05, + "loss": 0.5118, + "step": 14272 + }, + { + "epoch": 0.39190005491488195, + "grad_norm": 0.36968156695365906, + "learning_rate": 1.8184550613712567e-05, + "loss": 0.5982, + "step": 14273 + }, + { + "epoch": 0.39192751235584844, + "grad_norm": 0.5451480150222778, + "learning_rate": 1.8184302452847626e-05, + "loss": 0.5875, + "step": 14274 + }, + { + "epoch": 0.39195496979681493, + "grad_norm": 0.5284992456436157, + "learning_rate": 1.8184054276716348e-05, + "loss": 0.5075, + "step": 14275 + }, + { + "epoch": 0.39198242723778143, + "grad_norm": 0.44914931058883667, + "learning_rate": 1.81838060853192e-05, + "loss": 0.5543, + "step": 14276 + }, + { + "epoch": 0.3920098846787479, + "grad_norm": 0.4895966947078705, + "learning_rate": 1.818355787865664e-05, + "loss": 0.5727, + "step": 14277 + }, + { + "epoch": 0.3920373421197144, + "grad_norm": 0.38273563981056213, + "learning_rate": 1.8183309656729134e-05, + "loss": 0.5476, + "step": 14278 + }, + { + "epoch": 0.39206479956068097, + "grad_norm": 0.4048324525356293, + "learning_rate": 1.818306141953714e-05, + "loss": 0.5421, + "step": 14279 + }, + { + "epoch": 0.39209225700164746, + "grad_norm": 0.40851259231567383, + "learning_rate": 1.8182813167081125e-05, + "loss": 0.5588, + "step": 14280 + }, + { + "epoch": 0.39211971444261395, + "grad_norm": 0.39047983288764954, + "learning_rate": 1.8182564899361558e-05, + "loss": 0.529, + "step": 14281 + }, + { + "epoch": 0.39214717188358045, + "grad_norm": 0.34100568294525146, + "learning_rate": 1.818231661637889e-05, + "loss": 0.5665, + "step": 14282 + }, + { + "epoch": 0.39217462932454694, + "grad_norm": 0.3686695098876953, + "learning_rate": 1.8182068318133594e-05, + "loss": 0.5028, + "step": 14283 + }, + { + "epoch": 0.39220208676551344, + "grad_norm": 0.3856965899467468, + "learning_rate": 1.8181820004626125e-05, + "loss": 0.6032, + "step": 14284 + }, + { + "epoch": 0.39222954420647993, + "grad_norm": 0.38735201954841614, + "learning_rate": 1.818157167585695e-05, + "loss": 0.4909, + "step": 14285 + }, + { + "epoch": 0.3922570016474465, + "grad_norm": 0.38801878690719604, + "learning_rate": 1.8181323331826534e-05, + "loss": 0.4859, + "step": 14286 + }, + { + "epoch": 0.392284459088413, + "grad_norm": 0.33168232440948486, + "learning_rate": 1.818107497253534e-05, + "loss": 0.4551, + "step": 14287 + }, + { + "epoch": 0.39231191652937947, + "grad_norm": 0.3752933740615845, + "learning_rate": 1.8180826597983832e-05, + "loss": 0.4787, + "step": 14288 + }, + { + "epoch": 0.39233937397034596, + "grad_norm": 0.36348283290863037, + "learning_rate": 1.818057820817247e-05, + "loss": 0.4885, + "step": 14289 + }, + { + "epoch": 0.39236683141131246, + "grad_norm": 0.41049525141716003, + "learning_rate": 1.8180329803101714e-05, + "loss": 0.4348, + "step": 14290 + }, + { + "epoch": 0.39239428885227895, + "grad_norm": 0.3690605163574219, + "learning_rate": 1.8180081382772038e-05, + "loss": 0.488, + "step": 14291 + }, + { + "epoch": 0.39242174629324544, + "grad_norm": 0.371503084897995, + "learning_rate": 1.8179832947183896e-05, + "loss": 0.5223, + "step": 14292 + }, + { + "epoch": 0.392449203734212, + "grad_norm": 0.38965025544166565, + "learning_rate": 1.8179584496337757e-05, + "loss": 0.6257, + "step": 14293 + }, + { + "epoch": 0.3924766611751785, + "grad_norm": 0.40255874395370483, + "learning_rate": 1.817933603023408e-05, + "loss": 0.6039, + "step": 14294 + }, + { + "epoch": 0.392504118616145, + "grad_norm": 3.228938102722168, + "learning_rate": 1.8179087548873333e-05, + "loss": 0.6341, + "step": 14295 + }, + { + "epoch": 0.3925315760571115, + "grad_norm": 0.34678196907043457, + "learning_rate": 1.817883905225598e-05, + "loss": 0.5029, + "step": 14296 + }, + { + "epoch": 0.39255903349807797, + "grad_norm": 0.40346604585647583, + "learning_rate": 1.8178590540382475e-05, + "loss": 0.5979, + "step": 14297 + }, + { + "epoch": 0.39258649093904446, + "grad_norm": 0.33120250701904297, + "learning_rate": 1.817834201325329e-05, + "loss": 0.496, + "step": 14298 + }, + { + "epoch": 0.39261394838001096, + "grad_norm": 0.4096700847148895, + "learning_rate": 1.817809347086889e-05, + "loss": 0.5772, + "step": 14299 + }, + { + "epoch": 0.3926414058209775, + "grad_norm": 0.4056386947631836, + "learning_rate": 1.8177844913229735e-05, + "loss": 0.5148, + "step": 14300 + }, + { + "epoch": 0.392668863261944, + "grad_norm": 0.41495776176452637, + "learning_rate": 1.817759634033629e-05, + "loss": 0.4818, + "step": 14301 + }, + { + "epoch": 0.3926963207029105, + "grad_norm": 0.3829416036605835, + "learning_rate": 1.8177347752189013e-05, + "loss": 0.4766, + "step": 14302 + }, + { + "epoch": 0.392723778143877, + "grad_norm": 0.36880138516426086, + "learning_rate": 1.8177099148788374e-05, + "loss": 0.4803, + "step": 14303 + }, + { + "epoch": 0.3927512355848435, + "grad_norm": 0.3927137553691864, + "learning_rate": 1.817685053013484e-05, + "loss": 0.461, + "step": 14304 + }, + { + "epoch": 0.39277869302581, + "grad_norm": 0.3460618257522583, + "learning_rate": 1.8176601896228866e-05, + "loss": 0.4733, + "step": 14305 + }, + { + "epoch": 0.39280615046677647, + "grad_norm": 0.3692206144332886, + "learning_rate": 1.817635324707092e-05, + "loss": 0.4661, + "step": 14306 + }, + { + "epoch": 0.392833607907743, + "grad_norm": 0.3588316738605499, + "learning_rate": 1.8176104582661467e-05, + "loss": 0.5325, + "step": 14307 + }, + { + "epoch": 0.3928610653487095, + "grad_norm": 0.3750323951244354, + "learning_rate": 1.8175855903000968e-05, + "loss": 0.5282, + "step": 14308 + }, + { + "epoch": 0.392888522789676, + "grad_norm": 0.3777378499507904, + "learning_rate": 1.8175607208089888e-05, + "loss": 0.5075, + "step": 14309 + }, + { + "epoch": 0.3929159802306425, + "grad_norm": 0.35878852009773254, + "learning_rate": 1.8175358497928695e-05, + "loss": 0.5083, + "step": 14310 + }, + { + "epoch": 0.392943437671609, + "grad_norm": 0.4226461350917816, + "learning_rate": 1.8175109772517846e-05, + "loss": 0.5201, + "step": 14311 + }, + { + "epoch": 0.3929708951125755, + "grad_norm": 0.3282892107963562, + "learning_rate": 1.8174861031857808e-05, + "loss": 0.396, + "step": 14312 + }, + { + "epoch": 0.392998352553542, + "grad_norm": 0.3686719536781311, + "learning_rate": 1.8174612275949046e-05, + "loss": 0.4847, + "step": 14313 + }, + { + "epoch": 0.39302580999450853, + "grad_norm": 1.2552671432495117, + "learning_rate": 1.817436350479202e-05, + "loss": 0.5559, + "step": 14314 + }, + { + "epoch": 0.39305326743547503, + "grad_norm": 0.38067808747291565, + "learning_rate": 1.81741147183872e-05, + "loss": 0.5671, + "step": 14315 + }, + { + "epoch": 0.3930807248764415, + "grad_norm": 0.4018470346927643, + "learning_rate": 1.8173865916735048e-05, + "loss": 0.5639, + "step": 14316 + }, + { + "epoch": 0.393108182317408, + "grad_norm": 0.33744317293167114, + "learning_rate": 1.8173617099836024e-05, + "loss": 0.461, + "step": 14317 + }, + { + "epoch": 0.3931356397583745, + "grad_norm": 0.3522387146949768, + "learning_rate": 1.81733682676906e-05, + "loss": 0.4593, + "step": 14318 + }, + { + "epoch": 0.393163097199341, + "grad_norm": 0.33220893144607544, + "learning_rate": 1.817311942029923e-05, + "loss": 0.4607, + "step": 14319 + }, + { + "epoch": 0.3931905546403075, + "grad_norm": 0.3800939917564392, + "learning_rate": 1.8172870557662387e-05, + "loss": 0.5297, + "step": 14320 + }, + { + "epoch": 0.39321801208127405, + "grad_norm": 0.3762393295764923, + "learning_rate": 1.8172621679780532e-05, + "loss": 0.5406, + "step": 14321 + }, + { + "epoch": 0.39324546952224054, + "grad_norm": 0.7611905336380005, + "learning_rate": 1.817237278665413e-05, + "loss": 0.4957, + "step": 14322 + }, + { + "epoch": 0.39327292696320704, + "grad_norm": 0.36765310168266296, + "learning_rate": 1.8172123878283643e-05, + "loss": 0.5097, + "step": 14323 + }, + { + "epoch": 0.39330038440417353, + "grad_norm": 0.3736571669578552, + "learning_rate": 1.817187495466954e-05, + "loss": 0.517, + "step": 14324 + }, + { + "epoch": 0.39332784184514, + "grad_norm": 0.40405210852622986, + "learning_rate": 1.8171626015812277e-05, + "loss": 0.513, + "step": 14325 + }, + { + "epoch": 0.3933552992861065, + "grad_norm": 0.40092921257019043, + "learning_rate": 1.8171377061712324e-05, + "loss": 0.5402, + "step": 14326 + }, + { + "epoch": 0.393382756727073, + "grad_norm": 0.3487306833267212, + "learning_rate": 1.817112809237015e-05, + "loss": 0.4908, + "step": 14327 + }, + { + "epoch": 0.39341021416803956, + "grad_norm": 0.4664003551006317, + "learning_rate": 1.8170879107786208e-05, + "loss": 0.5174, + "step": 14328 + }, + { + "epoch": 0.39343767160900606, + "grad_norm": 0.3467338979244232, + "learning_rate": 1.817063010796097e-05, + "loss": 0.4567, + "step": 14329 + }, + { + "epoch": 0.39346512904997255, + "grad_norm": 0.36842775344848633, + "learning_rate": 1.8170381092894902e-05, + "loss": 0.5144, + "step": 14330 + }, + { + "epoch": 0.39349258649093904, + "grad_norm": 0.36066094040870667, + "learning_rate": 1.8170132062588462e-05, + "loss": 0.4883, + "step": 14331 + }, + { + "epoch": 0.39352004393190554, + "grad_norm": 0.528595507144928, + "learning_rate": 1.8169883017042124e-05, + "loss": 0.6018, + "step": 14332 + }, + { + "epoch": 0.39354750137287203, + "grad_norm": 0.3317814767360687, + "learning_rate": 1.816963395625634e-05, + "loss": 0.5174, + "step": 14333 + }, + { + "epoch": 0.3935749588138385, + "grad_norm": 0.3409140706062317, + "learning_rate": 1.8169384880231586e-05, + "loss": 0.4609, + "step": 14334 + }, + { + "epoch": 0.3936024162548051, + "grad_norm": 0.44564637541770935, + "learning_rate": 1.8169135788968318e-05, + "loss": 0.6392, + "step": 14335 + }, + { + "epoch": 0.39362987369577157, + "grad_norm": 0.3388068675994873, + "learning_rate": 1.8168886682467005e-05, + "loss": 0.473, + "step": 14336 + }, + { + "epoch": 0.39365733113673806, + "grad_norm": 0.38963058590888977, + "learning_rate": 1.8168637560728112e-05, + "loss": 0.5162, + "step": 14337 + }, + { + "epoch": 0.39368478857770456, + "grad_norm": 0.4068855345249176, + "learning_rate": 1.8168388423752102e-05, + "loss": 0.5184, + "step": 14338 + }, + { + "epoch": 0.39371224601867105, + "grad_norm": 0.3677029311656952, + "learning_rate": 1.8168139271539444e-05, + "loss": 0.5206, + "step": 14339 + }, + { + "epoch": 0.39373970345963755, + "grad_norm": 0.3854289948940277, + "learning_rate": 1.8167890104090597e-05, + "loss": 0.5086, + "step": 14340 + }, + { + "epoch": 0.39376716090060404, + "grad_norm": 0.41467520594596863, + "learning_rate": 1.8167640921406026e-05, + "loss": 0.6086, + "step": 14341 + }, + { + "epoch": 0.3937946183415706, + "grad_norm": 0.3660565912723541, + "learning_rate": 1.81673917234862e-05, + "loss": 0.4695, + "step": 14342 + }, + { + "epoch": 0.3938220757825371, + "grad_norm": 0.3693685531616211, + "learning_rate": 1.816714251033158e-05, + "loss": 0.4544, + "step": 14343 + }, + { + "epoch": 0.3938495332235036, + "grad_norm": 0.3218265771865845, + "learning_rate": 1.8166893281942636e-05, + "loss": 0.5674, + "step": 14344 + }, + { + "epoch": 0.3938769906644701, + "grad_norm": 0.3856382668018341, + "learning_rate": 1.816664403831983e-05, + "loss": 0.5532, + "step": 14345 + }, + { + "epoch": 0.39390444810543657, + "grad_norm": 0.3584349751472473, + "learning_rate": 1.816639477946362e-05, + "loss": 0.5516, + "step": 14346 + }, + { + "epoch": 0.39393190554640306, + "grad_norm": 0.3933478593826294, + "learning_rate": 1.816614550537448e-05, + "loss": 0.4937, + "step": 14347 + }, + { + "epoch": 0.39395936298736955, + "grad_norm": 0.4056166708469391, + "learning_rate": 1.8165896216052874e-05, + "loss": 0.5102, + "step": 14348 + }, + { + "epoch": 0.3939868204283361, + "grad_norm": 0.40892094373703003, + "learning_rate": 1.8165646911499266e-05, + "loss": 0.5459, + "step": 14349 + }, + { + "epoch": 0.3940142778693026, + "grad_norm": 0.3326300084590912, + "learning_rate": 1.816539759171412e-05, + "loss": 0.4805, + "step": 14350 + }, + { + "epoch": 0.3940417353102691, + "grad_norm": 0.3471738398075104, + "learning_rate": 1.81651482566979e-05, + "loss": 0.5342, + "step": 14351 + }, + { + "epoch": 0.3940691927512356, + "grad_norm": 0.3591236472129822, + "learning_rate": 1.8164898906451073e-05, + "loss": 0.5799, + "step": 14352 + }, + { + "epoch": 0.3940966501922021, + "grad_norm": 0.7422052025794983, + "learning_rate": 1.8164649540974105e-05, + "loss": 0.6046, + "step": 14353 + }, + { + "epoch": 0.3941241076331686, + "grad_norm": 0.33611729741096497, + "learning_rate": 1.8164400160267458e-05, + "loss": 0.4617, + "step": 14354 + }, + { + "epoch": 0.39415156507413507, + "grad_norm": 0.359722375869751, + "learning_rate": 1.8164150764331596e-05, + "loss": 0.5793, + "step": 14355 + }, + { + "epoch": 0.3941790225151016, + "grad_norm": 0.35249119997024536, + "learning_rate": 1.816390135316699e-05, + "loss": 0.4781, + "step": 14356 + }, + { + "epoch": 0.3942064799560681, + "grad_norm": 0.3764329254627228, + "learning_rate": 1.8163651926774106e-05, + "loss": 0.453, + "step": 14357 + }, + { + "epoch": 0.3942339373970346, + "grad_norm": 0.36782407760620117, + "learning_rate": 1.8163402485153403e-05, + "loss": 0.5797, + "step": 14358 + }, + { + "epoch": 0.3942613948380011, + "grad_norm": 0.3680437505245209, + "learning_rate": 1.8163153028305348e-05, + "loss": 0.4941, + "step": 14359 + }, + { + "epoch": 0.3942888522789676, + "grad_norm": 0.34286239743232727, + "learning_rate": 1.8162903556230406e-05, + "loss": 0.4518, + "step": 14360 + }, + { + "epoch": 0.3943163097199341, + "grad_norm": 0.35229504108428955, + "learning_rate": 1.8162654068929043e-05, + "loss": 0.6032, + "step": 14361 + }, + { + "epoch": 0.3943437671609006, + "grad_norm": 0.37516653537750244, + "learning_rate": 1.816240456640173e-05, + "loss": 0.5732, + "step": 14362 + }, + { + "epoch": 0.39437122460186713, + "grad_norm": 0.3398737907409668, + "learning_rate": 1.8162155048648925e-05, + "loss": 0.5044, + "step": 14363 + }, + { + "epoch": 0.3943986820428336, + "grad_norm": 0.378580242395401, + "learning_rate": 1.8161905515671094e-05, + "loss": 0.497, + "step": 14364 + }, + { + "epoch": 0.3944261394838001, + "grad_norm": 0.37471821904182434, + "learning_rate": 1.8161655967468706e-05, + "loss": 0.5287, + "step": 14365 + }, + { + "epoch": 0.3944535969247666, + "grad_norm": 0.43900078535079956, + "learning_rate": 1.8161406404042226e-05, + "loss": 0.5746, + "step": 14366 + }, + { + "epoch": 0.3944810543657331, + "grad_norm": 0.336305171251297, + "learning_rate": 1.8161156825392114e-05, + "loss": 0.5473, + "step": 14367 + }, + { + "epoch": 0.3945085118066996, + "grad_norm": 0.3502452075481415, + "learning_rate": 1.816090723151884e-05, + "loss": 0.4754, + "step": 14368 + }, + { + "epoch": 0.3945359692476661, + "grad_norm": 0.4074419438838959, + "learning_rate": 1.8160657622422872e-05, + "loss": 0.5292, + "step": 14369 + }, + { + "epoch": 0.39456342668863265, + "grad_norm": 0.376528263092041, + "learning_rate": 1.8160407998104674e-05, + "loss": 0.5427, + "step": 14370 + }, + { + "epoch": 0.39459088412959914, + "grad_norm": 0.32999691367149353, + "learning_rate": 1.816015835856471e-05, + "loss": 0.4294, + "step": 14371 + }, + { + "epoch": 0.39461834157056563, + "grad_norm": 0.37979549169540405, + "learning_rate": 1.8159908703803447e-05, + "loss": 0.5248, + "step": 14372 + }, + { + "epoch": 0.39464579901153213, + "grad_norm": 0.37842562794685364, + "learning_rate": 1.8159659033821344e-05, + "loss": 0.508, + "step": 14373 + }, + { + "epoch": 0.3946732564524986, + "grad_norm": 0.39701932668685913, + "learning_rate": 1.815940934861888e-05, + "loss": 0.6142, + "step": 14374 + }, + { + "epoch": 0.3947007138934651, + "grad_norm": 0.3511054515838623, + "learning_rate": 1.8159159648196508e-05, + "loss": 0.5042, + "step": 14375 + }, + { + "epoch": 0.3947281713344316, + "grad_norm": 0.48574700951576233, + "learning_rate": 1.8158909932554704e-05, + "loss": 0.5188, + "step": 14376 + }, + { + "epoch": 0.39475562877539816, + "grad_norm": 0.3460776209831238, + "learning_rate": 1.8158660201693924e-05, + "loss": 0.4909, + "step": 14377 + }, + { + "epoch": 0.39478308621636465, + "grad_norm": 0.3414953351020813, + "learning_rate": 1.815841045561464e-05, + "loss": 0.4974, + "step": 14378 + }, + { + "epoch": 0.39481054365733115, + "grad_norm": 0.3545130491256714, + "learning_rate": 1.815816069431732e-05, + "loss": 0.5413, + "step": 14379 + }, + { + "epoch": 0.39483800109829764, + "grad_norm": 0.3637526333332062, + "learning_rate": 1.8157910917802423e-05, + "loss": 0.4043, + "step": 14380 + }, + { + "epoch": 0.39486545853926414, + "grad_norm": 0.4308595359325409, + "learning_rate": 1.815766112607042e-05, + "loss": 0.5694, + "step": 14381 + }, + { + "epoch": 0.39489291598023063, + "grad_norm": 0.3780059218406677, + "learning_rate": 1.8157411319121774e-05, + "loss": 0.5452, + "step": 14382 + }, + { + "epoch": 0.3949203734211971, + "grad_norm": 0.3979055881500244, + "learning_rate": 1.8157161496956954e-05, + "loss": 0.5977, + "step": 14383 + }, + { + "epoch": 0.3949478308621637, + "grad_norm": 0.3748335540294647, + "learning_rate": 1.8156911659576424e-05, + "loss": 0.4832, + "step": 14384 + }, + { + "epoch": 0.39497528830313017, + "grad_norm": 0.3996511697769165, + "learning_rate": 1.8156661806980647e-05, + "loss": 0.5695, + "step": 14385 + }, + { + "epoch": 0.39500274574409666, + "grad_norm": 0.3419221341609955, + "learning_rate": 1.81564119391701e-05, + "loss": 0.3724, + "step": 14386 + }, + { + "epoch": 0.39503020318506316, + "grad_norm": 0.3960464894771576, + "learning_rate": 1.8156162056145237e-05, + "loss": 0.5139, + "step": 14387 + }, + { + "epoch": 0.39505766062602965, + "grad_norm": 0.3650602400302887, + "learning_rate": 1.8155912157906528e-05, + "loss": 0.4956, + "step": 14388 + }, + { + "epoch": 0.39508511806699614, + "grad_norm": 0.3952792286872864, + "learning_rate": 1.8155662244454443e-05, + "loss": 0.5511, + "step": 14389 + }, + { + "epoch": 0.39511257550796264, + "grad_norm": 0.4352703094482422, + "learning_rate": 1.815541231578944e-05, + "loss": 0.5206, + "step": 14390 + }, + { + "epoch": 0.3951400329489292, + "grad_norm": 0.9478187561035156, + "learning_rate": 1.8155162371911992e-05, + "loss": 0.4569, + "step": 14391 + }, + { + "epoch": 0.3951674903898957, + "grad_norm": 0.4123595654964447, + "learning_rate": 1.815491241282256e-05, + "loss": 0.5227, + "step": 14392 + }, + { + "epoch": 0.3951949478308622, + "grad_norm": 0.44079041481018066, + "learning_rate": 1.8154662438521622e-05, + "loss": 0.4743, + "step": 14393 + }, + { + "epoch": 0.39522240527182867, + "grad_norm": 0.488781213760376, + "learning_rate": 1.815441244900963e-05, + "loss": 0.6081, + "step": 14394 + }, + { + "epoch": 0.39524986271279516, + "grad_norm": 0.3972320556640625, + "learning_rate": 1.815416244428706e-05, + "loss": 0.4727, + "step": 14395 + }, + { + "epoch": 0.39527732015376166, + "grad_norm": 0.4054119884967804, + "learning_rate": 1.815391242435437e-05, + "loss": 0.4616, + "step": 14396 + }, + { + "epoch": 0.39530477759472815, + "grad_norm": 0.36461690068244934, + "learning_rate": 1.8153662389212034e-05, + "loss": 0.5851, + "step": 14397 + }, + { + "epoch": 0.39533223503569465, + "grad_norm": 0.3512401282787323, + "learning_rate": 1.8153412338860515e-05, + "loss": 0.4927, + "step": 14398 + }, + { + "epoch": 0.3953596924766612, + "grad_norm": 0.3708093762397766, + "learning_rate": 1.8153162273300277e-05, + "loss": 0.5878, + "step": 14399 + }, + { + "epoch": 0.3953871499176277, + "grad_norm": 0.3932078182697296, + "learning_rate": 1.815291219253179e-05, + "loss": 0.4735, + "step": 14400 + }, + { + "epoch": 0.3954146073585942, + "grad_norm": 0.34062668681144714, + "learning_rate": 1.815266209655552e-05, + "loss": 0.5021, + "step": 14401 + }, + { + "epoch": 0.3954420647995607, + "grad_norm": 0.36861753463745117, + "learning_rate": 1.815241198537194e-05, + "loss": 0.5499, + "step": 14402 + }, + { + "epoch": 0.39546952224052717, + "grad_norm": 0.3486413359642029, + "learning_rate": 1.81521618589815e-05, + "loss": 0.5062, + "step": 14403 + }, + { + "epoch": 0.39549697968149367, + "grad_norm": 0.3656655251979828, + "learning_rate": 1.815191171738468e-05, + "loss": 0.5098, + "step": 14404 + }, + { + "epoch": 0.39552443712246016, + "grad_norm": 0.3634564280509949, + "learning_rate": 1.8151661560581944e-05, + "loss": 0.5226, + "step": 14405 + }, + { + "epoch": 0.3955518945634267, + "grad_norm": 0.47357064485549927, + "learning_rate": 1.8151411388573756e-05, + "loss": 0.5656, + "step": 14406 + }, + { + "epoch": 0.3955793520043932, + "grad_norm": 0.4105850160121918, + "learning_rate": 1.8151161201360582e-05, + "loss": 0.5759, + "step": 14407 + }, + { + "epoch": 0.3956068094453597, + "grad_norm": 0.3494517505168915, + "learning_rate": 1.8150910998942895e-05, + "loss": 0.5045, + "step": 14408 + }, + { + "epoch": 0.3956342668863262, + "grad_norm": 0.4125193655490875, + "learning_rate": 1.8150660781321157e-05, + "loss": 0.513, + "step": 14409 + }, + { + "epoch": 0.3956617243272927, + "grad_norm": 0.3544021248817444, + "learning_rate": 1.815041054849583e-05, + "loss": 0.4412, + "step": 14410 + }, + { + "epoch": 0.3956891817682592, + "grad_norm": 0.4214726686477661, + "learning_rate": 1.8150160300467393e-05, + "loss": 0.6312, + "step": 14411 + }, + { + "epoch": 0.3957166392092257, + "grad_norm": 0.3551655411720276, + "learning_rate": 1.8149910037236305e-05, + "loss": 0.5684, + "step": 14412 + }, + { + "epoch": 0.3957440966501922, + "grad_norm": 0.33052247762680054, + "learning_rate": 1.814965975880303e-05, + "loss": 0.4319, + "step": 14413 + }, + { + "epoch": 0.3957715540911587, + "grad_norm": 0.37991657853126526, + "learning_rate": 1.814940946516804e-05, + "loss": 0.4395, + "step": 14414 + }, + { + "epoch": 0.3957990115321252, + "grad_norm": 0.3741739094257355, + "learning_rate": 1.8149159156331803e-05, + "loss": 0.5446, + "step": 14415 + }, + { + "epoch": 0.3958264689730917, + "grad_norm": 0.3910101652145386, + "learning_rate": 1.8148908832294777e-05, + "loss": 0.5051, + "step": 14416 + }, + { + "epoch": 0.3958539264140582, + "grad_norm": 0.35212814807891846, + "learning_rate": 1.8148658493057438e-05, + "loss": 0.5077, + "step": 14417 + }, + { + "epoch": 0.3958813838550247, + "grad_norm": 0.38807716965675354, + "learning_rate": 1.814840813862025e-05, + "loss": 0.5274, + "step": 14418 + }, + { + "epoch": 0.3959088412959912, + "grad_norm": 0.35686561465263367, + "learning_rate": 1.814815776898368e-05, + "loss": 0.4945, + "step": 14419 + }, + { + "epoch": 0.39593629873695774, + "grad_norm": 0.3364298641681671, + "learning_rate": 1.81479073841482e-05, + "loss": 0.5735, + "step": 14420 + }, + { + "epoch": 0.39596375617792423, + "grad_norm": 0.3943302035331726, + "learning_rate": 1.8147656984114268e-05, + "loss": 0.5547, + "step": 14421 + }, + { + "epoch": 0.3959912136188907, + "grad_norm": 0.3697696626186371, + "learning_rate": 1.8147406568882355e-05, + "loss": 0.5593, + "step": 14422 + }, + { + "epoch": 0.3960186710598572, + "grad_norm": 0.48685356974601746, + "learning_rate": 1.814715613845293e-05, + "loss": 0.4469, + "step": 14423 + }, + { + "epoch": 0.3960461285008237, + "grad_norm": 3.049593687057495, + "learning_rate": 1.8146905692826457e-05, + "loss": 0.6475, + "step": 14424 + }, + { + "epoch": 0.3960735859417902, + "grad_norm": 0.3993189334869385, + "learning_rate": 1.814665523200341e-05, + "loss": 0.5517, + "step": 14425 + }, + { + "epoch": 0.3961010433827567, + "grad_norm": 0.39891138672828674, + "learning_rate": 1.814640475598424e-05, + "loss": 0.4937, + "step": 14426 + }, + { + "epoch": 0.39612850082372325, + "grad_norm": 0.3261817395687103, + "learning_rate": 1.8146154264769435e-05, + "loss": 0.478, + "step": 14427 + }, + { + "epoch": 0.39615595826468974, + "grad_norm": 0.3889504671096802, + "learning_rate": 1.8145903758359447e-05, + "loss": 0.5594, + "step": 14428 + }, + { + "epoch": 0.39618341570565624, + "grad_norm": 0.4257969856262207, + "learning_rate": 1.814565323675475e-05, + "loss": 0.499, + "step": 14429 + }, + { + "epoch": 0.39621087314662273, + "grad_norm": 0.38312384486198425, + "learning_rate": 1.8145402699955813e-05, + "loss": 0.4914, + "step": 14430 + }, + { + "epoch": 0.3962383305875892, + "grad_norm": 0.3869914710521698, + "learning_rate": 1.8145152147963096e-05, + "loss": 0.5198, + "step": 14431 + }, + { + "epoch": 0.3962657880285557, + "grad_norm": 0.4019356071949005, + "learning_rate": 1.8144901580777074e-05, + "loss": 0.5216, + "step": 14432 + }, + { + "epoch": 0.3962932454695222, + "grad_norm": 0.3708321750164032, + "learning_rate": 1.814465099839821e-05, + "loss": 0.5015, + "step": 14433 + }, + { + "epoch": 0.39632070291048876, + "grad_norm": 0.3907548785209656, + "learning_rate": 1.814440040082697e-05, + "loss": 0.4888, + "step": 14434 + }, + { + "epoch": 0.39634816035145526, + "grad_norm": 0.3585502803325653, + "learning_rate": 1.8144149788063827e-05, + "loss": 0.4868, + "step": 14435 + }, + { + "epoch": 0.39637561779242175, + "grad_norm": 0.34310612082481384, + "learning_rate": 1.814389916010924e-05, + "loss": 0.4488, + "step": 14436 + }, + { + "epoch": 0.39640307523338825, + "grad_norm": 0.38141995668411255, + "learning_rate": 1.8143648516963684e-05, + "loss": 0.5288, + "step": 14437 + }, + { + "epoch": 0.39643053267435474, + "grad_norm": 0.3732544183731079, + "learning_rate": 1.814339785862763e-05, + "loss": 0.5142, + "step": 14438 + }, + { + "epoch": 0.39645799011532123, + "grad_norm": 0.3870595693588257, + "learning_rate": 1.814314718510153e-05, + "loss": 0.5375, + "step": 14439 + }, + { + "epoch": 0.39648544755628773, + "grad_norm": 0.3490780293941498, + "learning_rate": 1.8142896496385872e-05, + "loss": 0.5515, + "step": 14440 + }, + { + "epoch": 0.3965129049972543, + "grad_norm": 0.38211989402770996, + "learning_rate": 1.814264579248111e-05, + "loss": 0.5556, + "step": 14441 + }, + { + "epoch": 0.3965403624382208, + "grad_norm": 0.3723224103450775, + "learning_rate": 1.8142395073387714e-05, + "loss": 0.5053, + "step": 14442 + }, + { + "epoch": 0.39656781987918727, + "grad_norm": 0.37324294447898865, + "learning_rate": 1.8142144339106148e-05, + "loss": 0.5669, + "step": 14443 + }, + { + "epoch": 0.39659527732015376, + "grad_norm": 0.4912918210029602, + "learning_rate": 1.814189358963689e-05, + "loss": 0.597, + "step": 14444 + }, + { + "epoch": 0.39662273476112025, + "grad_norm": 0.34916189312934875, + "learning_rate": 1.8141642824980398e-05, + "loss": 0.4783, + "step": 14445 + }, + { + "epoch": 0.39665019220208675, + "grad_norm": 0.4050712287425995, + "learning_rate": 1.8141392045137146e-05, + "loss": 0.6448, + "step": 14446 + }, + { + "epoch": 0.39667764964305324, + "grad_norm": 0.37061649560928345, + "learning_rate": 1.81411412501076e-05, + "loss": 0.5585, + "step": 14447 + }, + { + "epoch": 0.3967051070840198, + "grad_norm": 0.3359757959842682, + "learning_rate": 1.8140890439892226e-05, + "loss": 0.4351, + "step": 14448 + }, + { + "epoch": 0.3967325645249863, + "grad_norm": 0.4002884328365326, + "learning_rate": 1.8140639614491494e-05, + "loss": 0.5637, + "step": 14449 + }, + { + "epoch": 0.3967600219659528, + "grad_norm": 0.36259621381759644, + "learning_rate": 1.814038877390587e-05, + "loss": 0.5238, + "step": 14450 + }, + { + "epoch": 0.3967874794069193, + "grad_norm": 0.33976393938064575, + "learning_rate": 1.814013791813582e-05, + "loss": 0.4088, + "step": 14451 + }, + { + "epoch": 0.39681493684788577, + "grad_norm": 0.38816940784454346, + "learning_rate": 1.813988704718182e-05, + "loss": 0.5231, + "step": 14452 + }, + { + "epoch": 0.39684239428885226, + "grad_norm": 0.3644917607307434, + "learning_rate": 1.8139636161044328e-05, + "loss": 0.5434, + "step": 14453 + }, + { + "epoch": 0.39686985172981876, + "grad_norm": 0.3634960949420929, + "learning_rate": 1.8139385259723822e-05, + "loss": 0.4427, + "step": 14454 + }, + { + "epoch": 0.3968973091707853, + "grad_norm": 0.3813188672065735, + "learning_rate": 1.813913434322076e-05, + "loss": 0.5732, + "step": 14455 + }, + { + "epoch": 0.3969247666117518, + "grad_norm": 0.43568918108940125, + "learning_rate": 1.8138883411535616e-05, + "loss": 0.5248, + "step": 14456 + }, + { + "epoch": 0.3969522240527183, + "grad_norm": 0.37514665722846985, + "learning_rate": 1.8138632464668858e-05, + "loss": 0.5785, + "step": 14457 + }, + { + "epoch": 0.3969796814936848, + "grad_norm": 0.3837616443634033, + "learning_rate": 1.813838150262095e-05, + "loss": 0.5016, + "step": 14458 + }, + { + "epoch": 0.3970071389346513, + "grad_norm": 0.3434537351131439, + "learning_rate": 1.8138130525392366e-05, + "loss": 0.5079, + "step": 14459 + }, + { + "epoch": 0.3970345963756178, + "grad_norm": 0.35390210151672363, + "learning_rate": 1.813787953298357e-05, + "loss": 0.4906, + "step": 14460 + }, + { + "epoch": 0.39706205381658427, + "grad_norm": 0.3842248022556305, + "learning_rate": 1.8137628525395032e-05, + "loss": 0.4683, + "step": 14461 + }, + { + "epoch": 0.3970895112575508, + "grad_norm": 0.3654112219810486, + "learning_rate": 1.813737750262722e-05, + "loss": 0.5776, + "step": 14462 + }, + { + "epoch": 0.3971169686985173, + "grad_norm": 0.3322075307369232, + "learning_rate": 1.81371264646806e-05, + "loss": 0.469, + "step": 14463 + }, + { + "epoch": 0.3971444261394838, + "grad_norm": 0.36738666892051697, + "learning_rate": 1.8136875411555644e-05, + "loss": 0.515, + "step": 14464 + }, + { + "epoch": 0.3971718835804503, + "grad_norm": 0.34361276030540466, + "learning_rate": 1.8136624343252815e-05, + "loss": 0.4605, + "step": 14465 + }, + { + "epoch": 0.3971993410214168, + "grad_norm": 0.3995765149593353, + "learning_rate": 1.8136373259772588e-05, + "loss": 0.5117, + "step": 14466 + }, + { + "epoch": 0.3972267984623833, + "grad_norm": 0.4049709141254425, + "learning_rate": 1.8136122161115425e-05, + "loss": 0.5081, + "step": 14467 + }, + { + "epoch": 0.3972542559033498, + "grad_norm": 0.3173080086708069, + "learning_rate": 1.81358710472818e-05, + "loss": 0.5112, + "step": 14468 + }, + { + "epoch": 0.39728171334431633, + "grad_norm": 0.36215972900390625, + "learning_rate": 1.8135619918272184e-05, + "loss": 0.5326, + "step": 14469 + }, + { + "epoch": 0.39730917078528283, + "grad_norm": 0.3517303466796875, + "learning_rate": 1.813536877408703e-05, + "loss": 0.5509, + "step": 14470 + }, + { + "epoch": 0.3973366282262493, + "grad_norm": 0.45490527153015137, + "learning_rate": 1.813511761472682e-05, + "loss": 0.5419, + "step": 14471 + }, + { + "epoch": 0.3973640856672158, + "grad_norm": 0.41371825337409973, + "learning_rate": 1.8134866440192023e-05, + "loss": 0.5035, + "step": 14472 + }, + { + "epoch": 0.3973915431081823, + "grad_norm": 0.4454978108406067, + "learning_rate": 1.81346152504831e-05, + "loss": 0.5101, + "step": 14473 + }, + { + "epoch": 0.3974190005491488, + "grad_norm": 1.0419477224349976, + "learning_rate": 1.8134364045600525e-05, + "loss": 0.5199, + "step": 14474 + }, + { + "epoch": 0.3974464579901153, + "grad_norm": 0.368676096200943, + "learning_rate": 1.8134112825544768e-05, + "loss": 0.5482, + "step": 14475 + }, + { + "epoch": 0.39747391543108185, + "grad_norm": 0.3917219340801239, + "learning_rate": 1.8133861590316288e-05, + "loss": 0.5852, + "step": 14476 + }, + { + "epoch": 0.39750137287204834, + "grad_norm": 0.4231390357017517, + "learning_rate": 1.8133610339915565e-05, + "loss": 0.5029, + "step": 14477 + }, + { + "epoch": 0.39752883031301484, + "grad_norm": 0.374780535697937, + "learning_rate": 1.813335907434306e-05, + "loss": 0.4725, + "step": 14478 + }, + { + "epoch": 0.39755628775398133, + "grad_norm": 0.3917389512062073, + "learning_rate": 1.8133107793599246e-05, + "loss": 0.5371, + "step": 14479 + }, + { + "epoch": 0.3975837451949478, + "grad_norm": 0.345502108335495, + "learning_rate": 1.8132856497684586e-05, + "loss": 0.4588, + "step": 14480 + }, + { + "epoch": 0.3976112026359143, + "grad_norm": 0.4269584119319916, + "learning_rate": 1.813260518659956e-05, + "loss": 0.3843, + "step": 14481 + }, + { + "epoch": 0.3976386600768808, + "grad_norm": 0.35465359687805176, + "learning_rate": 1.8132353860344623e-05, + "loss": 0.5306, + "step": 14482 + }, + { + "epoch": 0.39766611751784736, + "grad_norm": 0.38518092036247253, + "learning_rate": 1.8132102518920255e-05, + "loss": 0.5243, + "step": 14483 + }, + { + "epoch": 0.39769357495881386, + "grad_norm": 0.3390117585659027, + "learning_rate": 1.813185116232692e-05, + "loss": 0.5051, + "step": 14484 + }, + { + "epoch": 0.39772103239978035, + "grad_norm": 0.35954269766807556, + "learning_rate": 1.8131599790565087e-05, + "loss": 0.491, + "step": 14485 + }, + { + "epoch": 0.39774848984074684, + "grad_norm": 0.3492797315120697, + "learning_rate": 1.8131348403635223e-05, + "loss": 0.5076, + "step": 14486 + }, + { + "epoch": 0.39777594728171334, + "grad_norm": 0.424371600151062, + "learning_rate": 1.81310970015378e-05, + "loss": 0.5436, + "step": 14487 + }, + { + "epoch": 0.39780340472267983, + "grad_norm": 0.3581925332546234, + "learning_rate": 1.8130845584273287e-05, + "loss": 0.4728, + "step": 14488 + }, + { + "epoch": 0.3978308621636463, + "grad_norm": 0.3906252384185791, + "learning_rate": 1.813059415184215e-05, + "loss": 0.5833, + "step": 14489 + }, + { + "epoch": 0.3978583196046129, + "grad_norm": 0.3868602216243744, + "learning_rate": 1.8130342704244863e-05, + "loss": 0.5131, + "step": 14490 + }, + { + "epoch": 0.39788577704557937, + "grad_norm": 0.4076305627822876, + "learning_rate": 1.813009124148189e-05, + "loss": 0.5598, + "step": 14491 + }, + { + "epoch": 0.39791323448654586, + "grad_norm": 0.40997737646102905, + "learning_rate": 1.81298397635537e-05, + "loss": 0.5484, + "step": 14492 + }, + { + "epoch": 0.39794069192751236, + "grad_norm": 0.40395835041999817, + "learning_rate": 1.8129588270460768e-05, + "loss": 0.4636, + "step": 14493 + }, + { + "epoch": 0.39796814936847885, + "grad_norm": 0.4241991639137268, + "learning_rate": 1.8129336762203554e-05, + "loss": 0.5185, + "step": 14494 + }, + { + "epoch": 0.39799560680944535, + "grad_norm": 0.37198787927627563, + "learning_rate": 1.812908523878254e-05, + "loss": 0.5204, + "step": 14495 + }, + { + "epoch": 0.39802306425041184, + "grad_norm": 0.3696286678314209, + "learning_rate": 1.8128833700198182e-05, + "loss": 0.5049, + "step": 14496 + }, + { + "epoch": 0.3980505216913784, + "grad_norm": 0.39488378167152405, + "learning_rate": 1.8128582146450955e-05, + "loss": 0.5172, + "step": 14497 + }, + { + "epoch": 0.3980779791323449, + "grad_norm": 0.48609480261802673, + "learning_rate": 1.812833057754133e-05, + "loss": 0.6212, + "step": 14498 + }, + { + "epoch": 0.3981054365733114, + "grad_norm": 0.3461854159832001, + "learning_rate": 1.8128078993469772e-05, + "loss": 0.4556, + "step": 14499 + }, + { + "epoch": 0.39813289401427787, + "grad_norm": 0.5373040437698364, + "learning_rate": 1.8127827394236754e-05, + "loss": 0.6253, + "step": 14500 + }, + { + "epoch": 0.39816035145524437, + "grad_norm": 0.3424187898635864, + "learning_rate": 1.8127575779842744e-05, + "loss": 0.4815, + "step": 14501 + }, + { + "epoch": 0.39818780889621086, + "grad_norm": 0.3860345482826233, + "learning_rate": 1.812732415028821e-05, + "loss": 0.5505, + "step": 14502 + }, + { + "epoch": 0.39821526633717735, + "grad_norm": 0.387165904045105, + "learning_rate": 1.8127072505573622e-05, + "loss": 0.4657, + "step": 14503 + }, + { + "epoch": 0.3982427237781439, + "grad_norm": 0.43072494864463806, + "learning_rate": 1.8126820845699452e-05, + "loss": 0.5798, + "step": 14504 + }, + { + "epoch": 0.3982701812191104, + "grad_norm": 0.4204075336456299, + "learning_rate": 1.8126569170666167e-05, + "loss": 0.5313, + "step": 14505 + }, + { + "epoch": 0.3982976386600769, + "grad_norm": 0.40721285343170166, + "learning_rate": 1.8126317480474235e-05, + "loss": 0.5391, + "step": 14506 + }, + { + "epoch": 0.3983250961010434, + "grad_norm": 0.3725564777851105, + "learning_rate": 1.812606577512413e-05, + "loss": 0.515, + "step": 14507 + }, + { + "epoch": 0.3983525535420099, + "grad_norm": 0.3631134331226349, + "learning_rate": 1.812581405461632e-05, + "loss": 0.4796, + "step": 14508 + }, + { + "epoch": 0.3983800109829764, + "grad_norm": 0.35960710048675537, + "learning_rate": 1.8125562318951267e-05, + "loss": 0.4827, + "step": 14509 + }, + { + "epoch": 0.39840746842394287, + "grad_norm": 0.3299707770347595, + "learning_rate": 1.812531056812945e-05, + "loss": 0.4821, + "step": 14510 + }, + { + "epoch": 0.3984349258649094, + "grad_norm": 0.3916800320148468, + "learning_rate": 1.8125058802151337e-05, + "loss": 0.5145, + "step": 14511 + }, + { + "epoch": 0.3984623833058759, + "grad_norm": 0.37003663182258606, + "learning_rate": 1.8124807021017393e-05, + "loss": 0.5541, + "step": 14512 + }, + { + "epoch": 0.3984898407468424, + "grad_norm": 0.35234832763671875, + "learning_rate": 1.8124555224728096e-05, + "loss": 0.5012, + "step": 14513 + }, + { + "epoch": 0.3985172981878089, + "grad_norm": 0.3942378759384155, + "learning_rate": 1.8124303413283906e-05, + "loss": 0.4889, + "step": 14514 + }, + { + "epoch": 0.3985447556287754, + "grad_norm": 0.36064383387565613, + "learning_rate": 1.81240515866853e-05, + "loss": 0.597, + "step": 14515 + }, + { + "epoch": 0.3985722130697419, + "grad_norm": 0.37178730964660645, + "learning_rate": 1.8123799744932742e-05, + "loss": 0.5357, + "step": 14516 + }, + { + "epoch": 0.3985996705107084, + "grad_norm": 0.33967429399490356, + "learning_rate": 1.8123547888026705e-05, + "loss": 0.5376, + "step": 14517 + }, + { + "epoch": 0.39862712795167493, + "grad_norm": 0.37728002667427063, + "learning_rate": 1.812329601596766e-05, + "loss": 0.5087, + "step": 14518 + }, + { + "epoch": 0.3986545853926414, + "grad_norm": 0.36005493998527527, + "learning_rate": 1.8123044128756072e-05, + "loss": 0.5387, + "step": 14519 + }, + { + "epoch": 0.3986820428336079, + "grad_norm": 0.3660268485546112, + "learning_rate": 1.8122792226392422e-05, + "loss": 0.473, + "step": 14520 + }, + { + "epoch": 0.3987095002745744, + "grad_norm": 0.3849450349807739, + "learning_rate": 1.8122540308877165e-05, + "loss": 0.5186, + "step": 14521 + }, + { + "epoch": 0.3987369577155409, + "grad_norm": 0.39646607637405396, + "learning_rate": 1.812228837621078e-05, + "loss": 0.5681, + "step": 14522 + }, + { + "epoch": 0.3987644151565074, + "grad_norm": 0.4173043370246887, + "learning_rate": 1.8122036428393732e-05, + "loss": 0.5668, + "step": 14523 + }, + { + "epoch": 0.3987918725974739, + "grad_norm": 0.3815686106681824, + "learning_rate": 1.81217844654265e-05, + "loss": 0.5754, + "step": 14524 + }, + { + "epoch": 0.39881933003844044, + "grad_norm": 0.3157976269721985, + "learning_rate": 1.812153248730954e-05, + "loss": 0.4429, + "step": 14525 + }, + { + "epoch": 0.39884678747940694, + "grad_norm": 0.36779212951660156, + "learning_rate": 1.8121280494043338e-05, + "loss": 0.4494, + "step": 14526 + }, + { + "epoch": 0.39887424492037343, + "grad_norm": 0.42627009749412537, + "learning_rate": 1.812102848562835e-05, + "loss": 0.5596, + "step": 14527 + }, + { + "epoch": 0.3989017023613399, + "grad_norm": 0.3311494290828705, + "learning_rate": 1.8120776462065054e-05, + "loss": 0.5265, + "step": 14528 + }, + { + "epoch": 0.3989291598023064, + "grad_norm": 0.364096462726593, + "learning_rate": 1.8120524423353917e-05, + "loss": 0.5542, + "step": 14529 + }, + { + "epoch": 0.3989566172432729, + "grad_norm": 0.3950422704219818, + "learning_rate": 1.812027236949541e-05, + "loss": 0.6113, + "step": 14530 + }, + { + "epoch": 0.3989840746842394, + "grad_norm": 0.36956584453582764, + "learning_rate": 1.8120020300490007e-05, + "loss": 0.532, + "step": 14531 + }, + { + "epoch": 0.3990115321252059, + "grad_norm": 0.3593474328517914, + "learning_rate": 1.8119768216338172e-05, + "loss": 0.4592, + "step": 14532 + }, + { + "epoch": 0.39903898956617245, + "grad_norm": 0.3933591842651367, + "learning_rate": 1.8119516117040375e-05, + "loss": 0.4273, + "step": 14533 + }, + { + "epoch": 0.39906644700713895, + "grad_norm": 0.37855952978134155, + "learning_rate": 1.8119264002597094e-05, + "loss": 0.5175, + "step": 14534 + }, + { + "epoch": 0.39909390444810544, + "grad_norm": 0.3626176416873932, + "learning_rate": 1.8119011873008792e-05, + "loss": 0.5515, + "step": 14535 + }, + { + "epoch": 0.39912136188907194, + "grad_norm": 0.3726052939891815, + "learning_rate": 1.811875972827594e-05, + "loss": 0.6072, + "step": 14536 + }, + { + "epoch": 0.39914881933003843, + "grad_norm": 0.4318467974662781, + "learning_rate": 1.811850756839901e-05, + "loss": 0.5074, + "step": 14537 + }, + { + "epoch": 0.3991762767710049, + "grad_norm": 0.3993469476699829, + "learning_rate": 1.8118255393378478e-05, + "loss": 0.5401, + "step": 14538 + }, + { + "epoch": 0.3992037342119714, + "grad_norm": 0.41841697692871094, + "learning_rate": 1.8118003203214805e-05, + "loss": 0.5782, + "step": 14539 + }, + { + "epoch": 0.39923119165293797, + "grad_norm": 0.39724504947662354, + "learning_rate": 1.8117750997908463e-05, + "loss": 0.5093, + "step": 14540 + }, + { + "epoch": 0.39925864909390446, + "grad_norm": 0.3400951325893402, + "learning_rate": 1.8117498777459926e-05, + "loss": 0.548, + "step": 14541 + }, + { + "epoch": 0.39928610653487095, + "grad_norm": 0.4158440828323364, + "learning_rate": 1.8117246541869662e-05, + "loss": 0.5076, + "step": 14542 + }, + { + "epoch": 0.39931356397583745, + "grad_norm": 0.3467126786708832, + "learning_rate": 1.8116994291138143e-05, + "loss": 0.5161, + "step": 14543 + }, + { + "epoch": 0.39934102141680394, + "grad_norm": 0.34650006890296936, + "learning_rate": 1.811674202526584e-05, + "loss": 0.5382, + "step": 14544 + }, + { + "epoch": 0.39936847885777044, + "grad_norm": 0.35564664006233215, + "learning_rate": 1.8116489744253222e-05, + "loss": 0.5223, + "step": 14545 + }, + { + "epoch": 0.39939593629873693, + "grad_norm": 0.38355106115341187, + "learning_rate": 1.811623744810076e-05, + "loss": 0.5258, + "step": 14546 + }, + { + "epoch": 0.3994233937397035, + "grad_norm": 0.41693946719169617, + "learning_rate": 1.8115985136808923e-05, + "loss": 0.5383, + "step": 14547 + }, + { + "epoch": 0.39945085118067, + "grad_norm": 0.35699573159217834, + "learning_rate": 1.8115732810378187e-05, + "loss": 0.5691, + "step": 14548 + }, + { + "epoch": 0.39947830862163647, + "grad_norm": 0.3869537115097046, + "learning_rate": 1.8115480468809018e-05, + "loss": 0.522, + "step": 14549 + }, + { + "epoch": 0.39950576606260296, + "grad_norm": 0.3860298991203308, + "learning_rate": 1.8115228112101885e-05, + "loss": 0.5552, + "step": 14550 + }, + { + "epoch": 0.39953322350356946, + "grad_norm": 0.34042471647262573, + "learning_rate": 1.8114975740257265e-05, + "loss": 0.4784, + "step": 14551 + }, + { + "epoch": 0.39956068094453595, + "grad_norm": 0.5583789944648743, + "learning_rate": 1.8114723353275624e-05, + "loss": 0.5335, + "step": 14552 + }, + { + "epoch": 0.39958813838550244, + "grad_norm": 0.33001068234443665, + "learning_rate": 1.8114470951157432e-05, + "loss": 0.5381, + "step": 14553 + }, + { + "epoch": 0.399615595826469, + "grad_norm": 0.3628605604171753, + "learning_rate": 1.8114218533903165e-05, + "loss": 0.5554, + "step": 14554 + }, + { + "epoch": 0.3996430532674355, + "grad_norm": 0.3614124655723572, + "learning_rate": 1.811396610151329e-05, + "loss": 0.5385, + "step": 14555 + }, + { + "epoch": 0.399670510708402, + "grad_norm": 0.753084123134613, + "learning_rate": 1.8113713653988275e-05, + "loss": 0.5529, + "step": 14556 + }, + { + "epoch": 0.3996979681493685, + "grad_norm": 0.4159970283508301, + "learning_rate": 1.8113461191328597e-05, + "loss": 0.5547, + "step": 14557 + }, + { + "epoch": 0.39972542559033497, + "grad_norm": 0.347696453332901, + "learning_rate": 1.8113208713534727e-05, + "loss": 0.4588, + "step": 14558 + }, + { + "epoch": 0.39975288303130146, + "grad_norm": 0.32873183488845825, + "learning_rate": 1.8112956220607133e-05, + "loss": 0.4147, + "step": 14559 + }, + { + "epoch": 0.39978034047226796, + "grad_norm": 0.4027496576309204, + "learning_rate": 1.811270371254628e-05, + "loss": 0.5228, + "step": 14560 + }, + { + "epoch": 0.3998077979132345, + "grad_norm": 0.37907877564430237, + "learning_rate": 1.8112451189352653e-05, + "loss": 0.4891, + "step": 14561 + }, + { + "epoch": 0.399835255354201, + "grad_norm": 0.3620721399784088, + "learning_rate": 1.8112198651026708e-05, + "loss": 0.4832, + "step": 14562 + }, + { + "epoch": 0.3998627127951675, + "grad_norm": 0.4649926424026489, + "learning_rate": 1.811194609756893e-05, + "loss": 0.5398, + "step": 14563 + }, + { + "epoch": 0.399890170236134, + "grad_norm": 0.3576048016548157, + "learning_rate": 1.8111693528979782e-05, + "loss": 0.5137, + "step": 14564 + }, + { + "epoch": 0.3999176276771005, + "grad_norm": 0.33361539244651794, + "learning_rate": 1.8111440945259733e-05, + "loss": 0.4386, + "step": 14565 + }, + { + "epoch": 0.399945085118067, + "grad_norm": 0.3763444423675537, + "learning_rate": 1.811118834640926e-05, + "loss": 0.538, + "step": 14566 + }, + { + "epoch": 0.3999725425590335, + "grad_norm": 0.4196290969848633, + "learning_rate": 1.8110935732428833e-05, + "loss": 0.5631, + "step": 14567 + }, + { + "epoch": 0.4, + "grad_norm": 0.39718329906463623, + "learning_rate": 1.811068310331892e-05, + "loss": 0.5706, + "step": 14568 + }, + { + "epoch": 0.4000274574409665, + "grad_norm": 0.40439608693122864, + "learning_rate": 1.8110430459079996e-05, + "loss": 0.5172, + "step": 14569 + }, + { + "epoch": 0.400054914881933, + "grad_norm": 0.5045031309127808, + "learning_rate": 1.8110177799712532e-05, + "loss": 0.5214, + "step": 14570 + }, + { + "epoch": 0.4000823723228995, + "grad_norm": 0.32855746150016785, + "learning_rate": 1.8109925125216995e-05, + "loss": 0.5805, + "step": 14571 + }, + { + "epoch": 0.400109829763866, + "grad_norm": 0.36896461248397827, + "learning_rate": 1.810967243559386e-05, + "loss": 0.5011, + "step": 14572 + }, + { + "epoch": 0.4001372872048325, + "grad_norm": 0.4506534934043884, + "learning_rate": 1.8109419730843597e-05, + "loss": 0.5419, + "step": 14573 + }, + { + "epoch": 0.400164744645799, + "grad_norm": 0.3578657805919647, + "learning_rate": 1.810916701096668e-05, + "loss": 0.5084, + "step": 14574 + }, + { + "epoch": 0.40019220208676554, + "grad_norm": 0.351916640996933, + "learning_rate": 1.810891427596358e-05, + "loss": 0.55, + "step": 14575 + }, + { + "epoch": 0.40021965952773203, + "grad_norm": 0.3611028790473938, + "learning_rate": 1.8108661525834762e-05, + "loss": 0.4616, + "step": 14576 + }, + { + "epoch": 0.4002471169686985, + "grad_norm": 0.4008693993091583, + "learning_rate": 1.8108408760580705e-05, + "loss": 0.5085, + "step": 14577 + }, + { + "epoch": 0.400274574409665, + "grad_norm": 0.33283573389053345, + "learning_rate": 1.8108155980201876e-05, + "loss": 0.455, + "step": 14578 + }, + { + "epoch": 0.4003020318506315, + "grad_norm": 0.44301578402519226, + "learning_rate": 1.810790318469875e-05, + "loss": 0.58, + "step": 14579 + }, + { + "epoch": 0.400329489291598, + "grad_norm": 0.3604641854763031, + "learning_rate": 1.8107650374071795e-05, + "loss": 0.5088, + "step": 14580 + }, + { + "epoch": 0.4003569467325645, + "grad_norm": 0.3848716616630554, + "learning_rate": 1.8107397548321487e-05, + "loss": 0.5678, + "step": 14581 + }, + { + "epoch": 0.40038440417353105, + "grad_norm": 0.32435905933380127, + "learning_rate": 1.8107144707448296e-05, + "loss": 0.4134, + "step": 14582 + }, + { + "epoch": 0.40041186161449754, + "grad_norm": 0.36878836154937744, + "learning_rate": 1.8106891851452687e-05, + "loss": 0.5509, + "step": 14583 + }, + { + "epoch": 0.40043931905546404, + "grad_norm": 0.38231074810028076, + "learning_rate": 1.810663898033514e-05, + "loss": 0.5863, + "step": 14584 + }, + { + "epoch": 0.40046677649643053, + "grad_norm": 0.4346892535686493, + "learning_rate": 1.8106386094096126e-05, + "loss": 0.6265, + "step": 14585 + }, + { + "epoch": 0.400494233937397, + "grad_norm": 0.3891448974609375, + "learning_rate": 1.8106133192736113e-05, + "loss": 0.5226, + "step": 14586 + }, + { + "epoch": 0.4005216913783635, + "grad_norm": 0.32054242491722107, + "learning_rate": 1.8105880276255575e-05, + "loss": 0.428, + "step": 14587 + }, + { + "epoch": 0.40054914881933, + "grad_norm": 0.34748271107673645, + "learning_rate": 1.8105627344654984e-05, + "loss": 0.4694, + "step": 14588 + }, + { + "epoch": 0.40057660626029656, + "grad_norm": 0.3230980932712555, + "learning_rate": 1.8105374397934808e-05, + "loss": 0.5682, + "step": 14589 + }, + { + "epoch": 0.40060406370126306, + "grad_norm": 0.38302499055862427, + "learning_rate": 1.8105121436095524e-05, + "loss": 0.5033, + "step": 14590 + }, + { + "epoch": 0.40063152114222955, + "grad_norm": 0.3759732246398926, + "learning_rate": 1.81048684591376e-05, + "loss": 0.557, + "step": 14591 + }, + { + "epoch": 0.40065897858319605, + "grad_norm": 0.3411097526550293, + "learning_rate": 1.8104615467061513e-05, + "loss": 0.5231, + "step": 14592 + }, + { + "epoch": 0.40068643602416254, + "grad_norm": 0.3845718502998352, + "learning_rate": 1.810436245986773e-05, + "loss": 0.5494, + "step": 14593 + }, + { + "epoch": 0.40071389346512903, + "grad_norm": 0.4285094141960144, + "learning_rate": 1.8104109437556725e-05, + "loss": 0.5376, + "step": 14594 + }, + { + "epoch": 0.40074135090609553, + "grad_norm": 0.4668791592121124, + "learning_rate": 1.8103856400128967e-05, + "loss": 0.6033, + "step": 14595 + }, + { + "epoch": 0.4007688083470621, + "grad_norm": 0.362150102853775, + "learning_rate": 1.810360334758493e-05, + "loss": 0.5198, + "step": 14596 + }, + { + "epoch": 0.40079626578802857, + "grad_norm": 0.35687899589538574, + "learning_rate": 1.810335027992509e-05, + "loss": 0.5229, + "step": 14597 + }, + { + "epoch": 0.40082372322899507, + "grad_norm": 0.4366203844547272, + "learning_rate": 1.810309719714991e-05, + "loss": 0.5117, + "step": 14598 + }, + { + "epoch": 0.40085118066996156, + "grad_norm": 0.47704675793647766, + "learning_rate": 1.8102844099259874e-05, + "loss": 0.5685, + "step": 14599 + }, + { + "epoch": 0.40087863811092805, + "grad_norm": 0.3518785238265991, + "learning_rate": 1.810259098625544e-05, + "loss": 0.551, + "step": 14600 + }, + { + "epoch": 0.40090609555189455, + "grad_norm": 0.36356934905052185, + "learning_rate": 1.8102337858137094e-05, + "loss": 0.4711, + "step": 14601 + }, + { + "epoch": 0.40093355299286104, + "grad_norm": 0.3679013252258301, + "learning_rate": 1.81020847149053e-05, + "loss": 0.5702, + "step": 14602 + }, + { + "epoch": 0.4009610104338276, + "grad_norm": 0.38238468766212463, + "learning_rate": 1.8101831556560533e-05, + "loss": 0.5566, + "step": 14603 + }, + { + "epoch": 0.4009884678747941, + "grad_norm": 0.38162165880203247, + "learning_rate": 1.8101578383103264e-05, + "loss": 0.5227, + "step": 14604 + }, + { + "epoch": 0.4010159253157606, + "grad_norm": 0.38608041405677795, + "learning_rate": 1.8101325194533965e-05, + "loss": 0.5085, + "step": 14605 + }, + { + "epoch": 0.4010433827567271, + "grad_norm": 0.389178991317749, + "learning_rate": 1.810107199085311e-05, + "loss": 0.49, + "step": 14606 + }, + { + "epoch": 0.40107084019769357, + "grad_norm": 0.3352128267288208, + "learning_rate": 1.810081877206117e-05, + "loss": 0.4114, + "step": 14607 + }, + { + "epoch": 0.40109829763866006, + "grad_norm": 0.35472002625465393, + "learning_rate": 1.8100565538158617e-05, + "loss": 0.5123, + "step": 14608 + }, + { + "epoch": 0.40112575507962656, + "grad_norm": 0.4181106686592102, + "learning_rate": 1.810031228914592e-05, + "loss": 0.5225, + "step": 14609 + }, + { + "epoch": 0.4011532125205931, + "grad_norm": 0.3696853518486023, + "learning_rate": 1.8100059025023563e-05, + "loss": 0.5066, + "step": 14610 + }, + { + "epoch": 0.4011806699615596, + "grad_norm": 0.36421290040016174, + "learning_rate": 1.8099805745792004e-05, + "loss": 0.5232, + "step": 14611 + }, + { + "epoch": 0.4012081274025261, + "grad_norm": 0.42319098114967346, + "learning_rate": 1.8099552451451726e-05, + "loss": 0.5266, + "step": 14612 + }, + { + "epoch": 0.4012355848434926, + "grad_norm": 0.37401083111763, + "learning_rate": 1.8099299142003195e-05, + "loss": 0.5191, + "step": 14613 + }, + { + "epoch": 0.4012630422844591, + "grad_norm": 0.4107027053833008, + "learning_rate": 1.809904581744689e-05, + "loss": 0.575, + "step": 14614 + }, + { + "epoch": 0.4012904997254256, + "grad_norm": 0.38722971081733704, + "learning_rate": 1.8098792477783274e-05, + "loss": 0.5443, + "step": 14615 + }, + { + "epoch": 0.40131795716639207, + "grad_norm": 0.3746570646762848, + "learning_rate": 1.809853912301283e-05, + "loss": 0.5467, + "step": 14616 + }, + { + "epoch": 0.4013454146073586, + "grad_norm": 0.3936925530433655, + "learning_rate": 1.8098285753136023e-05, + "loss": 0.5053, + "step": 14617 + }, + { + "epoch": 0.4013728720483251, + "grad_norm": 0.3695125877857208, + "learning_rate": 1.809803236815333e-05, + "loss": 0.5375, + "step": 14618 + }, + { + "epoch": 0.4014003294892916, + "grad_norm": 0.3335988223552704, + "learning_rate": 1.8097778968065222e-05, + "loss": 0.5268, + "step": 14619 + }, + { + "epoch": 0.4014277869302581, + "grad_norm": 0.3471565842628479, + "learning_rate": 1.809752555287217e-05, + "loss": 0.5487, + "step": 14620 + }, + { + "epoch": 0.4014552443712246, + "grad_norm": 0.35443001985549927, + "learning_rate": 1.8097272122574653e-05, + "loss": 0.4857, + "step": 14621 + }, + { + "epoch": 0.4014827018121911, + "grad_norm": 0.3892938494682312, + "learning_rate": 1.8097018677173133e-05, + "loss": 0.5787, + "step": 14622 + }, + { + "epoch": 0.4015101592531576, + "grad_norm": 0.3583002984523773, + "learning_rate": 1.809676521666809e-05, + "loss": 0.4904, + "step": 14623 + }, + { + "epoch": 0.40153761669412413, + "grad_norm": 0.35209596157073975, + "learning_rate": 1.809651174106e-05, + "loss": 0.5368, + "step": 14624 + }, + { + "epoch": 0.4015650741350906, + "grad_norm": 0.37248530983924866, + "learning_rate": 1.809625825034933e-05, + "loss": 0.5593, + "step": 14625 + }, + { + "epoch": 0.4015925315760571, + "grad_norm": 0.3603897988796234, + "learning_rate": 1.809600474453655e-05, + "loss": 0.4755, + "step": 14626 + }, + { + "epoch": 0.4016199890170236, + "grad_norm": 0.35966944694519043, + "learning_rate": 1.809575122362214e-05, + "loss": 0.4805, + "step": 14627 + }, + { + "epoch": 0.4016474464579901, + "grad_norm": 0.3559662997722626, + "learning_rate": 1.8095497687606573e-05, + "loss": 0.5058, + "step": 14628 + }, + { + "epoch": 0.4016749038989566, + "grad_norm": 0.37940093874931335, + "learning_rate": 1.8095244136490316e-05, + "loss": 0.4732, + "step": 14629 + }, + { + "epoch": 0.4017023613399231, + "grad_norm": 0.3827895522117615, + "learning_rate": 1.8094990570273844e-05, + "loss": 0.5145, + "step": 14630 + }, + { + "epoch": 0.40172981878088965, + "grad_norm": 0.40670597553253174, + "learning_rate": 1.8094736988957634e-05, + "loss": 0.4797, + "step": 14631 + }, + { + "epoch": 0.40175727622185614, + "grad_norm": 0.39492595195770264, + "learning_rate": 1.8094483392542154e-05, + "loss": 0.5621, + "step": 14632 + }, + { + "epoch": 0.40178473366282264, + "grad_norm": 0.4619510769844055, + "learning_rate": 1.8094229781027877e-05, + "loss": 0.4798, + "step": 14633 + }, + { + "epoch": 0.40181219110378913, + "grad_norm": 0.3305439054965973, + "learning_rate": 1.8093976154415283e-05, + "loss": 0.4672, + "step": 14634 + }, + { + "epoch": 0.4018396485447556, + "grad_norm": 0.41355642676353455, + "learning_rate": 1.8093722512704838e-05, + "loss": 0.5364, + "step": 14635 + }, + { + "epoch": 0.4018671059857221, + "grad_norm": 0.43116313219070435, + "learning_rate": 1.8093468855897016e-05, + "loss": 0.4914, + "step": 14636 + }, + { + "epoch": 0.4018945634266886, + "grad_norm": 0.3630656898021698, + "learning_rate": 1.8093215183992293e-05, + "loss": 0.4595, + "step": 14637 + }, + { + "epoch": 0.40192202086765516, + "grad_norm": 0.3815681040287018, + "learning_rate": 1.8092961496991138e-05, + "loss": 0.5282, + "step": 14638 + }, + { + "epoch": 0.40194947830862165, + "grad_norm": 0.35625553131103516, + "learning_rate": 1.809270779489403e-05, + "loss": 0.4567, + "step": 14639 + }, + { + "epoch": 0.40197693574958815, + "grad_norm": 0.37330207228660583, + "learning_rate": 1.8092454077701437e-05, + "loss": 0.5838, + "step": 14640 + }, + { + "epoch": 0.40200439319055464, + "grad_norm": 0.37651076912879944, + "learning_rate": 1.809220034541384e-05, + "loss": 0.5914, + "step": 14641 + }, + { + "epoch": 0.40203185063152114, + "grad_norm": 0.40282702445983887, + "learning_rate": 1.8091946598031696e-05, + "loss": 0.5327, + "step": 14642 + }, + { + "epoch": 0.40205930807248763, + "grad_norm": 0.33519160747528076, + "learning_rate": 1.8091692835555498e-05, + "loss": 0.4848, + "step": 14643 + }, + { + "epoch": 0.4020867655134541, + "grad_norm": 0.3642624616622925, + "learning_rate": 1.8091439057985708e-05, + "loss": 0.5929, + "step": 14644 + }, + { + "epoch": 0.4021142229544207, + "grad_norm": 0.3314264416694641, + "learning_rate": 1.80911852653228e-05, + "loss": 0.5036, + "step": 14645 + }, + { + "epoch": 0.40214168039538717, + "grad_norm": 0.4152487814426422, + "learning_rate": 1.809093145756725e-05, + "loss": 0.5889, + "step": 14646 + }, + { + "epoch": 0.40216913783635366, + "grad_norm": 0.3866230845451355, + "learning_rate": 1.809067763471953e-05, + "loss": 0.4619, + "step": 14647 + }, + { + "epoch": 0.40219659527732016, + "grad_norm": 0.3414499759674072, + "learning_rate": 1.8090423796780114e-05, + "loss": 0.4552, + "step": 14648 + }, + { + "epoch": 0.40222405271828665, + "grad_norm": 0.4598245620727539, + "learning_rate": 1.8090169943749477e-05, + "loss": 0.5363, + "step": 14649 + }, + { + "epoch": 0.40225151015925315, + "grad_norm": 0.42448121309280396, + "learning_rate": 1.808991607562809e-05, + "loss": 0.6011, + "step": 14650 + }, + { + "epoch": 0.40227896760021964, + "grad_norm": 0.38276731967926025, + "learning_rate": 1.8089662192416426e-05, + "loss": 0.4952, + "step": 14651 + }, + { + "epoch": 0.4023064250411862, + "grad_norm": 0.34084275364875793, + "learning_rate": 1.8089408294114964e-05, + "loss": 0.5046, + "step": 14652 + }, + { + "epoch": 0.4023338824821527, + "grad_norm": 0.3555589020252228, + "learning_rate": 1.808915438072417e-05, + "loss": 0.6407, + "step": 14653 + }, + { + "epoch": 0.4023613399231192, + "grad_norm": 0.37718692421913147, + "learning_rate": 1.8088900452244523e-05, + "loss": 0.5538, + "step": 14654 + }, + { + "epoch": 0.40238879736408567, + "grad_norm": 0.40285661816596985, + "learning_rate": 1.808864650867649e-05, + "loss": 0.4983, + "step": 14655 + }, + { + "epoch": 0.40241625480505216, + "grad_norm": 0.357095330953598, + "learning_rate": 1.8088392550020556e-05, + "loss": 0.4466, + "step": 14656 + }, + { + "epoch": 0.40244371224601866, + "grad_norm": 0.350110799074173, + "learning_rate": 1.8088138576277186e-05, + "loss": 0.5806, + "step": 14657 + }, + { + "epoch": 0.40247116968698515, + "grad_norm": 0.39888471364974976, + "learning_rate": 1.808788458744686e-05, + "loss": 0.5911, + "step": 14658 + }, + { + "epoch": 0.4024986271279517, + "grad_norm": 0.40994754433631897, + "learning_rate": 1.8087630583530042e-05, + "loss": 0.6132, + "step": 14659 + }, + { + "epoch": 0.4025260845689182, + "grad_norm": 0.3647705316543579, + "learning_rate": 1.8087376564527215e-05, + "loss": 0.5736, + "step": 14660 + }, + { + "epoch": 0.4025535420098847, + "grad_norm": 0.3608563542366028, + "learning_rate": 1.8087122530438847e-05, + "loss": 0.5674, + "step": 14661 + }, + { + "epoch": 0.4025809994508512, + "grad_norm": 0.42957037687301636, + "learning_rate": 1.808686848126542e-05, + "loss": 0.5049, + "step": 14662 + }, + { + "epoch": 0.4026084568918177, + "grad_norm": 0.40946075320243835, + "learning_rate": 1.8086614417007397e-05, + "loss": 0.5047, + "step": 14663 + }, + { + "epoch": 0.4026359143327842, + "grad_norm": 0.352499395608902, + "learning_rate": 1.8086360337665257e-05, + "loss": 0.4078, + "step": 14664 + }, + { + "epoch": 0.40266337177375067, + "grad_norm": 0.32900241017341614, + "learning_rate": 1.8086106243239473e-05, + "loss": 0.475, + "step": 14665 + }, + { + "epoch": 0.40269082921471716, + "grad_norm": 0.3819458484649658, + "learning_rate": 1.8085852133730525e-05, + "loss": 0.5212, + "step": 14666 + }, + { + "epoch": 0.4027182866556837, + "grad_norm": 0.3217844069004059, + "learning_rate": 1.8085598009138882e-05, + "loss": 0.4655, + "step": 14667 + }, + { + "epoch": 0.4027457440966502, + "grad_norm": 0.37932631373405457, + "learning_rate": 1.8085343869465014e-05, + "loss": 0.5388, + "step": 14668 + }, + { + "epoch": 0.4027732015376167, + "grad_norm": 0.5394917726516724, + "learning_rate": 1.80850897147094e-05, + "loss": 0.5952, + "step": 14669 + }, + { + "epoch": 0.4028006589785832, + "grad_norm": 0.4047355055809021, + "learning_rate": 1.8084835544872513e-05, + "loss": 0.557, + "step": 14670 + }, + { + "epoch": 0.4028281164195497, + "grad_norm": 0.37440744042396545, + "learning_rate": 1.808458135995483e-05, + "loss": 0.5222, + "step": 14671 + }, + { + "epoch": 0.4028555738605162, + "grad_norm": 0.4280283451080322, + "learning_rate": 1.8084327159956822e-05, + "loss": 0.5065, + "step": 14672 + }, + { + "epoch": 0.4028830313014827, + "grad_norm": 0.41443192958831787, + "learning_rate": 1.808407294487896e-05, + "loss": 0.5886, + "step": 14673 + }, + { + "epoch": 0.4029104887424492, + "grad_norm": 0.34922876954078674, + "learning_rate": 1.8083818714721726e-05, + "loss": 0.529, + "step": 14674 + }, + { + "epoch": 0.4029379461834157, + "grad_norm": 0.37352287769317627, + "learning_rate": 1.8083564469485588e-05, + "loss": 0.5442, + "step": 14675 + }, + { + "epoch": 0.4029654036243822, + "grad_norm": 0.36386463046073914, + "learning_rate": 1.8083310209171023e-05, + "loss": 0.4846, + "step": 14676 + }, + { + "epoch": 0.4029928610653487, + "grad_norm": 0.39302533864974976, + "learning_rate": 1.8083055933778503e-05, + "loss": 0.6006, + "step": 14677 + }, + { + "epoch": 0.4030203185063152, + "grad_norm": 0.36087071895599365, + "learning_rate": 1.8082801643308505e-05, + "loss": 0.4786, + "step": 14678 + }, + { + "epoch": 0.4030477759472817, + "grad_norm": 0.3479931652545929, + "learning_rate": 1.8082547337761503e-05, + "loss": 0.5022, + "step": 14679 + }, + { + "epoch": 0.4030752333882482, + "grad_norm": 0.37048014998435974, + "learning_rate": 1.808229301713797e-05, + "loss": 0.4879, + "step": 14680 + }, + { + "epoch": 0.40310269082921474, + "grad_norm": 0.39467135071754456, + "learning_rate": 1.808203868143838e-05, + "loss": 0.5516, + "step": 14681 + }, + { + "epoch": 0.40313014827018123, + "grad_norm": 0.35756564140319824, + "learning_rate": 1.808178433066321e-05, + "loss": 0.4935, + "step": 14682 + }, + { + "epoch": 0.4031576057111477, + "grad_norm": 0.42653781175613403, + "learning_rate": 1.808152996481293e-05, + "loss": 0.5046, + "step": 14683 + }, + { + "epoch": 0.4031850631521142, + "grad_norm": 0.49832984805107117, + "learning_rate": 1.8081275583888024e-05, + "loss": 0.5692, + "step": 14684 + }, + { + "epoch": 0.4032125205930807, + "grad_norm": 0.37209516763687134, + "learning_rate": 1.8081021187888953e-05, + "loss": 0.6149, + "step": 14685 + }, + { + "epoch": 0.4032399780340472, + "grad_norm": 0.3182792365550995, + "learning_rate": 1.80807667768162e-05, + "loss": 0.43, + "step": 14686 + }, + { + "epoch": 0.4032674354750137, + "grad_norm": 0.3850499987602234, + "learning_rate": 1.8080512350670238e-05, + "loss": 0.5579, + "step": 14687 + }, + { + "epoch": 0.40329489291598025, + "grad_norm": 0.5896596312522888, + "learning_rate": 1.8080257909451543e-05, + "loss": 0.5545, + "step": 14688 + }, + { + "epoch": 0.40332235035694675, + "grad_norm": 0.3459538221359253, + "learning_rate": 1.8080003453160585e-05, + "loss": 0.5328, + "step": 14689 + }, + { + "epoch": 0.40334980779791324, + "grad_norm": 0.35975804924964905, + "learning_rate": 1.8079748981797844e-05, + "loss": 0.4849, + "step": 14690 + }, + { + "epoch": 0.40337726523887973, + "grad_norm": 0.40735894441604614, + "learning_rate": 1.8079494495363793e-05, + "loss": 0.4933, + "step": 14691 + }, + { + "epoch": 0.40340472267984623, + "grad_norm": 0.34842443466186523, + "learning_rate": 1.8079239993858903e-05, + "loss": 0.5368, + "step": 14692 + }, + { + "epoch": 0.4034321801208127, + "grad_norm": 0.35095924139022827, + "learning_rate": 1.807898547728366e-05, + "loss": 0.4492, + "step": 14693 + }, + { + "epoch": 0.4034596375617792, + "grad_norm": 0.38816022872924805, + "learning_rate": 1.807873094563852e-05, + "loss": 0.527, + "step": 14694 + }, + { + "epoch": 0.40348709500274577, + "grad_norm": 0.3750323951244354, + "learning_rate": 1.807847639892397e-05, + "loss": 0.5398, + "step": 14695 + }, + { + "epoch": 0.40351455244371226, + "grad_norm": 0.3734166920185089, + "learning_rate": 1.8078221837140487e-05, + "loss": 0.5275, + "step": 14696 + }, + { + "epoch": 0.40354200988467875, + "grad_norm": 0.4601304531097412, + "learning_rate": 1.8077967260288544e-05, + "loss": 0.5641, + "step": 14697 + }, + { + "epoch": 0.40356946732564525, + "grad_norm": 0.41426676511764526, + "learning_rate": 1.807771266836861e-05, + "loss": 0.5358, + "step": 14698 + }, + { + "epoch": 0.40359692476661174, + "grad_norm": 0.3694614768028259, + "learning_rate": 1.8077458061381167e-05, + "loss": 0.4691, + "step": 14699 + }, + { + "epoch": 0.40362438220757824, + "grad_norm": 0.37172043323516846, + "learning_rate": 1.807720343932668e-05, + "loss": 0.5042, + "step": 14700 + }, + { + "epoch": 0.40365183964854473, + "grad_norm": 0.38693997263908386, + "learning_rate": 1.8076948802205637e-05, + "loss": 0.5139, + "step": 14701 + }, + { + "epoch": 0.4036792970895113, + "grad_norm": 0.38801929354667664, + "learning_rate": 1.8076694150018506e-05, + "loss": 0.5034, + "step": 14702 + }, + { + "epoch": 0.4037067545304778, + "grad_norm": 0.3789730668067932, + "learning_rate": 1.807643948276576e-05, + "loss": 0.5463, + "step": 14703 + }, + { + "epoch": 0.40373421197144427, + "grad_norm": 0.3823707103729248, + "learning_rate": 1.8076184800447878e-05, + "loss": 0.5941, + "step": 14704 + }, + { + "epoch": 0.40376166941241076, + "grad_norm": 0.3918725550174713, + "learning_rate": 1.8075930103065332e-05, + "loss": 0.5635, + "step": 14705 + }, + { + "epoch": 0.40378912685337726, + "grad_norm": 0.5018916726112366, + "learning_rate": 1.80756753906186e-05, + "loss": 0.5068, + "step": 14706 + }, + { + "epoch": 0.40381658429434375, + "grad_norm": 0.3257279098033905, + "learning_rate": 1.807542066310816e-05, + "loss": 0.487, + "step": 14707 + }, + { + "epoch": 0.40384404173531024, + "grad_norm": 0.34055647253990173, + "learning_rate": 1.807516592053448e-05, + "loss": 0.4276, + "step": 14708 + }, + { + "epoch": 0.4038714991762768, + "grad_norm": 0.3674917221069336, + "learning_rate": 1.8074911162898034e-05, + "loss": 0.5285, + "step": 14709 + }, + { + "epoch": 0.4038989566172433, + "grad_norm": 0.4070485830307007, + "learning_rate": 1.8074656390199305e-05, + "loss": 0.5518, + "step": 14710 + }, + { + "epoch": 0.4039264140582098, + "grad_norm": 0.4247615933418274, + "learning_rate": 1.8074401602438767e-05, + "loss": 0.6084, + "step": 14711 + }, + { + "epoch": 0.4039538714991763, + "grad_norm": 0.36032289266586304, + "learning_rate": 1.807414679961689e-05, + "loss": 0.4712, + "step": 14712 + }, + { + "epoch": 0.40398132894014277, + "grad_norm": 0.39505571126937866, + "learning_rate": 1.8073891981734153e-05, + "loss": 0.4806, + "step": 14713 + }, + { + "epoch": 0.40400878638110926, + "grad_norm": 0.3583564758300781, + "learning_rate": 1.807363714879103e-05, + "loss": 0.4543, + "step": 14714 + }, + { + "epoch": 0.40403624382207576, + "grad_norm": 0.37262532114982605, + "learning_rate": 1.8073382300788e-05, + "loss": 0.5515, + "step": 14715 + }, + { + "epoch": 0.4040637012630423, + "grad_norm": 0.3515414297580719, + "learning_rate": 1.807312743772553e-05, + "loss": 0.5656, + "step": 14716 + }, + { + "epoch": 0.4040911587040088, + "grad_norm": 0.3217518627643585, + "learning_rate": 1.8072872559604105e-05, + "loss": 0.5052, + "step": 14717 + }, + { + "epoch": 0.4041186161449753, + "grad_norm": 0.38194459676742554, + "learning_rate": 1.8072617666424195e-05, + "loss": 0.5457, + "step": 14718 + }, + { + "epoch": 0.4041460735859418, + "grad_norm": 0.35004082322120667, + "learning_rate": 1.807236275818628e-05, + "loss": 0.4648, + "step": 14719 + }, + { + "epoch": 0.4041735310269083, + "grad_norm": 0.3920309543609619, + "learning_rate": 1.8072107834890823e-05, + "loss": 0.5016, + "step": 14720 + }, + { + "epoch": 0.4042009884678748, + "grad_norm": 0.4213228225708008, + "learning_rate": 1.8071852896538314e-05, + "loss": 0.5765, + "step": 14721 + }, + { + "epoch": 0.40422844590884127, + "grad_norm": 0.5486566424369812, + "learning_rate": 1.8071597943129225e-05, + "loss": 0.5685, + "step": 14722 + }, + { + "epoch": 0.4042559033498078, + "grad_norm": 0.4181772470474243, + "learning_rate": 1.8071342974664027e-05, + "loss": 0.4338, + "step": 14723 + }, + { + "epoch": 0.4042833607907743, + "grad_norm": 0.34769001603126526, + "learning_rate": 1.8071087991143198e-05, + "loss": 0.4945, + "step": 14724 + }, + { + "epoch": 0.4043108182317408, + "grad_norm": 0.33215898275375366, + "learning_rate": 1.8070832992567215e-05, + "loss": 0.5034, + "step": 14725 + }, + { + "epoch": 0.4043382756727073, + "grad_norm": 0.38020437955856323, + "learning_rate": 1.8070577978936553e-05, + "loss": 0.5375, + "step": 14726 + }, + { + "epoch": 0.4043657331136738, + "grad_norm": 0.3811856508255005, + "learning_rate": 1.8070322950251687e-05, + "loss": 0.5456, + "step": 14727 + }, + { + "epoch": 0.4043931905546403, + "grad_norm": 12.372997283935547, + "learning_rate": 1.8070067906513093e-05, + "loss": 0.7174, + "step": 14728 + }, + { + "epoch": 0.4044206479956068, + "grad_norm": 0.35053667426109314, + "learning_rate": 1.8069812847721247e-05, + "loss": 0.4778, + "step": 14729 + }, + { + "epoch": 0.40444810543657334, + "grad_norm": 0.31888505816459656, + "learning_rate": 1.8069557773876622e-05, + "loss": 0.4348, + "step": 14730 + }, + { + "epoch": 0.40447556287753983, + "grad_norm": 0.3730185627937317, + "learning_rate": 1.8069302684979697e-05, + "loss": 0.5085, + "step": 14731 + }, + { + "epoch": 0.4045030203185063, + "grad_norm": 0.350248247385025, + "learning_rate": 1.806904758103095e-05, + "loss": 0.493, + "step": 14732 + }, + { + "epoch": 0.4045304777594728, + "grad_norm": 0.33271321654319763, + "learning_rate": 1.8068792462030852e-05, + "loss": 0.4662, + "step": 14733 + }, + { + "epoch": 0.4045579352004393, + "grad_norm": 0.4063245356082916, + "learning_rate": 1.806853732797988e-05, + "loss": 0.5703, + "step": 14734 + }, + { + "epoch": 0.4045853926414058, + "grad_norm": 0.3604525625705719, + "learning_rate": 1.8068282178878514e-05, + "loss": 0.4553, + "step": 14735 + }, + { + "epoch": 0.4046128500823723, + "grad_norm": 0.43405142426490784, + "learning_rate": 1.806802701472722e-05, + "loss": 0.4396, + "step": 14736 + }, + { + "epoch": 0.40464030752333885, + "grad_norm": 0.37002667784690857, + "learning_rate": 1.8067771835526487e-05, + "loss": 0.5008, + "step": 14737 + }, + { + "epoch": 0.40466776496430534, + "grad_norm": 0.3984539806842804, + "learning_rate": 1.8067516641276782e-05, + "loss": 0.513, + "step": 14738 + }, + { + "epoch": 0.40469522240527184, + "grad_norm": 0.4028794765472412, + "learning_rate": 1.8067261431978586e-05, + "loss": 0.6076, + "step": 14739 + }, + { + "epoch": 0.40472267984623833, + "grad_norm": 0.45551371574401855, + "learning_rate": 1.8067006207632368e-05, + "loss": 0.5745, + "step": 14740 + }, + { + "epoch": 0.4047501372872048, + "grad_norm": 0.36363714933395386, + "learning_rate": 1.806675096823861e-05, + "loss": 0.4649, + "step": 14741 + }, + { + "epoch": 0.4047775947281713, + "grad_norm": 0.6082791686058044, + "learning_rate": 1.806649571379779e-05, + "loss": 0.5841, + "step": 14742 + }, + { + "epoch": 0.4048050521691378, + "grad_norm": 0.37922120094299316, + "learning_rate": 1.806624044431038e-05, + "loss": 0.5155, + "step": 14743 + }, + { + "epoch": 0.40483250961010436, + "grad_norm": 0.32244646549224854, + "learning_rate": 1.8065985159776857e-05, + "loss": 0.4229, + "step": 14744 + }, + { + "epoch": 0.40485996705107086, + "grad_norm": 0.3943827748298645, + "learning_rate": 1.8065729860197697e-05, + "loss": 0.4726, + "step": 14745 + }, + { + "epoch": 0.40488742449203735, + "grad_norm": 0.4028409421443939, + "learning_rate": 1.8065474545573373e-05, + "loss": 0.5841, + "step": 14746 + }, + { + "epoch": 0.40491488193300385, + "grad_norm": 0.3933042585849762, + "learning_rate": 1.8065219215904368e-05, + "loss": 0.6072, + "step": 14747 + }, + { + "epoch": 0.40494233937397034, + "grad_norm": 0.3126187324523926, + "learning_rate": 1.8064963871191156e-05, + "loss": 0.5227, + "step": 14748 + }, + { + "epoch": 0.40496979681493683, + "grad_norm": 0.3280591666698456, + "learning_rate": 1.8064708511434213e-05, + "loss": 0.517, + "step": 14749 + }, + { + "epoch": 0.4049972542559033, + "grad_norm": 0.33515962958335876, + "learning_rate": 1.806445313663401e-05, + "loss": 0.5742, + "step": 14750 + }, + { + "epoch": 0.4050247116968699, + "grad_norm": 0.39739400148391724, + "learning_rate": 1.8064197746791034e-05, + "loss": 0.6028, + "step": 14751 + }, + { + "epoch": 0.40505216913783637, + "grad_norm": 0.368132621049881, + "learning_rate": 1.806394234190575e-05, + "loss": 0.5248, + "step": 14752 + }, + { + "epoch": 0.40507962657880286, + "grad_norm": 0.3222810924053192, + "learning_rate": 1.806368692197864e-05, + "loss": 0.3576, + "step": 14753 + }, + { + "epoch": 0.40510708401976936, + "grad_norm": 0.38772234320640564, + "learning_rate": 1.8063431487010184e-05, + "loss": 0.5263, + "step": 14754 + }, + { + "epoch": 0.40513454146073585, + "grad_norm": 0.4019448459148407, + "learning_rate": 1.8063176037000853e-05, + "loss": 0.5117, + "step": 14755 + }, + { + "epoch": 0.40516199890170235, + "grad_norm": 0.34009474515914917, + "learning_rate": 1.8062920571951124e-05, + "loss": 0.4982, + "step": 14756 + }, + { + "epoch": 0.40518945634266884, + "grad_norm": 0.37580329179763794, + "learning_rate": 1.806266509186148e-05, + "loss": 0.4839, + "step": 14757 + }, + { + "epoch": 0.4052169137836354, + "grad_norm": 0.4985986053943634, + "learning_rate": 1.8062409596732387e-05, + "loss": 0.5585, + "step": 14758 + }, + { + "epoch": 0.4052443712246019, + "grad_norm": 0.353579044342041, + "learning_rate": 1.8062154086564327e-05, + "loss": 0.4769, + "step": 14759 + }, + { + "epoch": 0.4052718286655684, + "grad_norm": 0.4060033857822418, + "learning_rate": 1.806189856135778e-05, + "loss": 0.571, + "step": 14760 + }, + { + "epoch": 0.4052992861065349, + "grad_norm": 0.38128459453582764, + "learning_rate": 1.8061643021113215e-05, + "loss": 0.5477, + "step": 14761 + }, + { + "epoch": 0.40532674354750137, + "grad_norm": 0.366176038980484, + "learning_rate": 1.8061387465831117e-05, + "loss": 0.5598, + "step": 14762 + }, + { + "epoch": 0.40535420098846786, + "grad_norm": 0.3576662540435791, + "learning_rate": 1.8061131895511953e-05, + "loss": 0.5325, + "step": 14763 + }, + { + "epoch": 0.40538165842943436, + "grad_norm": 0.3693085014820099, + "learning_rate": 1.806087631015621e-05, + "loss": 0.5494, + "step": 14764 + }, + { + "epoch": 0.4054091158704009, + "grad_norm": 0.3811831474304199, + "learning_rate": 1.806062070976436e-05, + "loss": 0.4419, + "step": 14765 + }, + { + "epoch": 0.4054365733113674, + "grad_norm": 0.5193513035774231, + "learning_rate": 1.8060365094336877e-05, + "loss": 0.515, + "step": 14766 + }, + { + "epoch": 0.4054640307523339, + "grad_norm": 0.38256219029426575, + "learning_rate": 1.806010946387424e-05, + "loss": 0.4551, + "step": 14767 + }, + { + "epoch": 0.4054914881933004, + "grad_norm": 0.3178194761276245, + "learning_rate": 1.805985381837693e-05, + "loss": 0.3917, + "step": 14768 + }, + { + "epoch": 0.4055189456342669, + "grad_norm": 0.3867782652378082, + "learning_rate": 1.8059598157845413e-05, + "loss": 0.5637, + "step": 14769 + }, + { + "epoch": 0.4055464030752334, + "grad_norm": 0.362291157245636, + "learning_rate": 1.805934248228018e-05, + "loss": 0.5107, + "step": 14770 + }, + { + "epoch": 0.40557386051619987, + "grad_norm": 0.37956178188323975, + "learning_rate": 1.8059086791681702e-05, + "loss": 0.506, + "step": 14771 + }, + { + "epoch": 0.4056013179571664, + "grad_norm": 0.3656371533870697, + "learning_rate": 1.8058831086050452e-05, + "loss": 0.4423, + "step": 14772 + }, + { + "epoch": 0.4056287753981329, + "grad_norm": 0.39335089921951294, + "learning_rate": 1.805857536538691e-05, + "loss": 0.5167, + "step": 14773 + }, + { + "epoch": 0.4056562328390994, + "grad_norm": 0.4061412513256073, + "learning_rate": 1.8058319629691552e-05, + "loss": 0.5637, + "step": 14774 + }, + { + "epoch": 0.4056836902800659, + "grad_norm": 0.387127161026001, + "learning_rate": 1.8058063878964857e-05, + "loss": 0.4868, + "step": 14775 + }, + { + "epoch": 0.4057111477210324, + "grad_norm": 0.33942294120788574, + "learning_rate": 1.8057808113207302e-05, + "loss": 0.5166, + "step": 14776 + }, + { + "epoch": 0.4057386051619989, + "grad_norm": 0.38170599937438965, + "learning_rate": 1.805755233241936e-05, + "loss": 0.4661, + "step": 14777 + }, + { + "epoch": 0.4057660626029654, + "grad_norm": 0.37924861907958984, + "learning_rate": 1.8057296536601518e-05, + "loss": 0.4775, + "step": 14778 + }, + { + "epoch": 0.40579352004393193, + "grad_norm": 0.368163526058197, + "learning_rate": 1.805704072575424e-05, + "loss": 0.4737, + "step": 14779 + }, + { + "epoch": 0.4058209774848984, + "grad_norm": 0.3912263512611389, + "learning_rate": 1.805678489987801e-05, + "loss": 0.5155, + "step": 14780 + }, + { + "epoch": 0.4058484349258649, + "grad_norm": 0.36123356223106384, + "learning_rate": 1.805652905897331e-05, + "loss": 0.4784, + "step": 14781 + }, + { + "epoch": 0.4058758923668314, + "grad_norm": 0.3870141804218292, + "learning_rate": 1.8056273203040606e-05, + "loss": 0.6058, + "step": 14782 + }, + { + "epoch": 0.4059033498077979, + "grad_norm": 0.45444825291633606, + "learning_rate": 1.8056017332080383e-05, + "loss": 0.6091, + "step": 14783 + }, + { + "epoch": 0.4059308072487644, + "grad_norm": 0.33832305669784546, + "learning_rate": 1.8055761446093118e-05, + "loss": 0.4856, + "step": 14784 + }, + { + "epoch": 0.4059582646897309, + "grad_norm": 0.3368736505508423, + "learning_rate": 1.8055505545079285e-05, + "loss": 0.5075, + "step": 14785 + }, + { + "epoch": 0.40598572213069745, + "grad_norm": 0.35209953784942627, + "learning_rate": 1.8055249629039366e-05, + "loss": 0.4638, + "step": 14786 + }, + { + "epoch": 0.40601317957166394, + "grad_norm": 0.3681807219982147, + "learning_rate": 1.805499369797383e-05, + "loss": 0.4728, + "step": 14787 + }, + { + "epoch": 0.40604063701263043, + "grad_norm": 0.3845827877521515, + "learning_rate": 1.8054737751883164e-05, + "loss": 0.4864, + "step": 14788 + }, + { + "epoch": 0.40606809445359693, + "grad_norm": 0.39308351278305054, + "learning_rate": 1.8054481790767838e-05, + "loss": 0.4898, + "step": 14789 + }, + { + "epoch": 0.4060955518945634, + "grad_norm": 0.3448549211025238, + "learning_rate": 1.8054225814628336e-05, + "loss": 0.5768, + "step": 14790 + }, + { + "epoch": 0.4061230093355299, + "grad_norm": 0.35632315278053284, + "learning_rate": 1.8053969823465132e-05, + "loss": 0.5138, + "step": 14791 + }, + { + "epoch": 0.4061504667764964, + "grad_norm": 0.4095469117164612, + "learning_rate": 1.8053713817278704e-05, + "loss": 0.5001, + "step": 14792 + }, + { + "epoch": 0.40617792421746296, + "grad_norm": 0.5304561257362366, + "learning_rate": 1.8053457796069528e-05, + "loss": 0.5707, + "step": 14793 + }, + { + "epoch": 0.40620538165842945, + "grad_norm": 0.35167619585990906, + "learning_rate": 1.8053201759838083e-05, + "loss": 0.5274, + "step": 14794 + }, + { + "epoch": 0.40623283909939595, + "grad_norm": 0.37087327241897583, + "learning_rate": 1.8052945708584847e-05, + "loss": 0.5586, + "step": 14795 + }, + { + "epoch": 0.40626029654036244, + "grad_norm": 0.3480444550514221, + "learning_rate": 1.8052689642310296e-05, + "loss": 0.4627, + "step": 14796 + }, + { + "epoch": 0.40628775398132894, + "grad_norm": 0.36207038164138794, + "learning_rate": 1.805243356101491e-05, + "loss": 0.4812, + "step": 14797 + }, + { + "epoch": 0.40631521142229543, + "grad_norm": 0.34522387385368347, + "learning_rate": 1.8052177464699165e-05, + "loss": 0.5525, + "step": 14798 + }, + { + "epoch": 0.4063426688632619, + "grad_norm": 0.4199666976928711, + "learning_rate": 1.805192135336354e-05, + "loss": 0.5525, + "step": 14799 + }, + { + "epoch": 0.4063701263042284, + "grad_norm": 0.3506101667881012, + "learning_rate": 1.805166522700851e-05, + "loss": 0.5734, + "step": 14800 + }, + { + "epoch": 0.40639758374519497, + "grad_norm": 0.34345313906669617, + "learning_rate": 1.8051409085634556e-05, + "loss": 0.4338, + "step": 14801 + }, + { + "epoch": 0.40642504118616146, + "grad_norm": 0.46493247151374817, + "learning_rate": 1.8051152929242156e-05, + "loss": 0.592, + "step": 14802 + }, + { + "epoch": 0.40645249862712796, + "grad_norm": 0.3640405535697937, + "learning_rate": 1.805089675783178e-05, + "loss": 0.5784, + "step": 14803 + }, + { + "epoch": 0.40647995606809445, + "grad_norm": 0.42128583788871765, + "learning_rate": 1.805064057140392e-05, + "loss": 0.4946, + "step": 14804 + }, + { + "epoch": 0.40650741350906094, + "grad_norm": 0.39340364933013916, + "learning_rate": 1.805038436995904e-05, + "loss": 0.5172, + "step": 14805 + }, + { + "epoch": 0.40653487095002744, + "grad_norm": 0.4195333421230316, + "learning_rate": 1.8050128153497625e-05, + "loss": 0.5529, + "step": 14806 + }, + { + "epoch": 0.40656232839099393, + "grad_norm": 0.36465057730674744, + "learning_rate": 1.8049871922020153e-05, + "loss": 0.5356, + "step": 14807 + }, + { + "epoch": 0.4065897858319605, + "grad_norm": 0.38446545600891113, + "learning_rate": 1.80496156755271e-05, + "loss": 0.4939, + "step": 14808 + }, + { + "epoch": 0.406617243272927, + "grad_norm": 0.3240787386894226, + "learning_rate": 1.8049359414018942e-05, + "loss": 0.4618, + "step": 14809 + }, + { + "epoch": 0.40664470071389347, + "grad_norm": 0.523323655128479, + "learning_rate": 1.8049103137496166e-05, + "loss": 0.5408, + "step": 14810 + }, + { + "epoch": 0.40667215815485996, + "grad_norm": 0.3925125300884247, + "learning_rate": 1.804884684595924e-05, + "loss": 0.4855, + "step": 14811 + }, + { + "epoch": 0.40669961559582646, + "grad_norm": 0.35637539625167847, + "learning_rate": 1.8048590539408644e-05, + "loss": 0.4949, + "step": 14812 + }, + { + "epoch": 0.40672707303679295, + "grad_norm": 0.3661719858646393, + "learning_rate": 1.8048334217844857e-05, + "loss": 0.5132, + "step": 14813 + }, + { + "epoch": 0.40675453047775945, + "grad_norm": 0.31979912519454956, + "learning_rate": 1.8048077881268364e-05, + "loss": 0.4578, + "step": 14814 + }, + { + "epoch": 0.406781987918726, + "grad_norm": 0.47598007321357727, + "learning_rate": 1.8047821529679634e-05, + "loss": 0.5236, + "step": 14815 + }, + { + "epoch": 0.4068094453596925, + "grad_norm": 0.3691399395465851, + "learning_rate": 1.8047565163079146e-05, + "loss": 0.4885, + "step": 14816 + }, + { + "epoch": 0.406836902800659, + "grad_norm": 0.3480229675769806, + "learning_rate": 1.804730878146738e-05, + "loss": 0.5166, + "step": 14817 + }, + { + "epoch": 0.4068643602416255, + "grad_norm": 0.39590421319007874, + "learning_rate": 1.804705238484482e-05, + "loss": 0.4298, + "step": 14818 + }, + { + "epoch": 0.40689181768259197, + "grad_norm": 0.41145452857017517, + "learning_rate": 1.8046795973211933e-05, + "loss": 0.5628, + "step": 14819 + }, + { + "epoch": 0.40691927512355847, + "grad_norm": 0.3855990469455719, + "learning_rate": 1.804653954656921e-05, + "loss": 0.5482, + "step": 14820 + }, + { + "epoch": 0.40694673256452496, + "grad_norm": 0.7361854910850525, + "learning_rate": 1.8046283104917116e-05, + "loss": 0.4865, + "step": 14821 + }, + { + "epoch": 0.4069741900054915, + "grad_norm": 0.3213246464729309, + "learning_rate": 1.804602664825614e-05, + "loss": 0.4456, + "step": 14822 + }, + { + "epoch": 0.407001647446458, + "grad_norm": 0.381661593914032, + "learning_rate": 1.8045770176586753e-05, + "loss": 0.5176, + "step": 14823 + }, + { + "epoch": 0.4070291048874245, + "grad_norm": 0.40129345655441284, + "learning_rate": 1.804551368990944e-05, + "loss": 0.5538, + "step": 14824 + }, + { + "epoch": 0.407056562328391, + "grad_norm": 0.3722538948059082, + "learning_rate": 1.8045257188224674e-05, + "loss": 0.5262, + "step": 14825 + }, + { + "epoch": 0.4070840197693575, + "grad_norm": 0.36713552474975586, + "learning_rate": 1.8045000671532936e-05, + "loss": 0.5366, + "step": 14826 + }, + { + "epoch": 0.407111477210324, + "grad_norm": 0.349770724773407, + "learning_rate": 1.8044744139834708e-05, + "loss": 0.4978, + "step": 14827 + }, + { + "epoch": 0.4071389346512905, + "grad_norm": 0.3892853856086731, + "learning_rate": 1.8044487593130458e-05, + "loss": 0.4774, + "step": 14828 + }, + { + "epoch": 0.407166392092257, + "grad_norm": 0.38004061579704285, + "learning_rate": 1.8044231031420674e-05, + "loss": 0.4579, + "step": 14829 + }, + { + "epoch": 0.4071938495332235, + "grad_norm": 0.41190171241760254, + "learning_rate": 1.804397445470583e-05, + "loss": 0.5797, + "step": 14830 + }, + { + "epoch": 0.40722130697419, + "grad_norm": 0.3893198072910309, + "learning_rate": 1.804371786298641e-05, + "loss": 0.589, + "step": 14831 + }, + { + "epoch": 0.4072487644151565, + "grad_norm": 0.3601135015487671, + "learning_rate": 1.8043461256262887e-05, + "loss": 0.5285, + "step": 14832 + }, + { + "epoch": 0.407276221856123, + "grad_norm": 0.3108764886856079, + "learning_rate": 1.8043204634535743e-05, + "loss": 0.4146, + "step": 14833 + }, + { + "epoch": 0.4073036792970895, + "grad_norm": 0.38985463976860046, + "learning_rate": 1.8042947997805454e-05, + "loss": 0.5501, + "step": 14834 + }, + { + "epoch": 0.407331136738056, + "grad_norm": 0.34739187359809875, + "learning_rate": 1.80426913460725e-05, + "loss": 0.4597, + "step": 14835 + }, + { + "epoch": 0.40735859417902254, + "grad_norm": 0.39019495248794556, + "learning_rate": 1.804243467933736e-05, + "loss": 0.4936, + "step": 14836 + }, + { + "epoch": 0.40738605161998903, + "grad_norm": 0.3715498745441437, + "learning_rate": 1.8042177997600513e-05, + "loss": 0.4967, + "step": 14837 + }, + { + "epoch": 0.4074135090609555, + "grad_norm": 0.3712135851383209, + "learning_rate": 1.8041921300862435e-05, + "loss": 0.6033, + "step": 14838 + }, + { + "epoch": 0.407440966501922, + "grad_norm": 0.5873866081237793, + "learning_rate": 1.8041664589123607e-05, + "loss": 0.457, + "step": 14839 + }, + { + "epoch": 0.4074684239428885, + "grad_norm": 0.3611989915370941, + "learning_rate": 1.804140786238451e-05, + "loss": 0.4988, + "step": 14840 + }, + { + "epoch": 0.407495881383855, + "grad_norm": 0.4438962936401367, + "learning_rate": 1.8041151120645622e-05, + "loss": 0.5422, + "step": 14841 + }, + { + "epoch": 0.4075233388248215, + "grad_norm": 0.7328953146934509, + "learning_rate": 1.8040894363907417e-05, + "loss": 0.4726, + "step": 14842 + }, + { + "epoch": 0.40755079626578805, + "grad_norm": 0.40454646944999695, + "learning_rate": 1.804063759217038e-05, + "loss": 0.5508, + "step": 14843 + }, + { + "epoch": 0.40757825370675455, + "grad_norm": 0.42692846059799194, + "learning_rate": 1.8040380805434986e-05, + "loss": 0.5214, + "step": 14844 + }, + { + "epoch": 0.40760571114772104, + "grad_norm": 0.369985967874527, + "learning_rate": 1.8040124003701716e-05, + "loss": 0.4173, + "step": 14845 + }, + { + "epoch": 0.40763316858868753, + "grad_norm": 0.36094173789024353, + "learning_rate": 1.803986718697105e-05, + "loss": 0.4513, + "step": 14846 + }, + { + "epoch": 0.407660626029654, + "grad_norm": 0.37603121995925903, + "learning_rate": 1.8039610355243467e-05, + "loss": 0.4698, + "step": 14847 + }, + { + "epoch": 0.4076880834706205, + "grad_norm": 0.3871299922466278, + "learning_rate": 1.8039353508519442e-05, + "loss": 0.4944, + "step": 14848 + }, + { + "epoch": 0.407715540911587, + "grad_norm": 0.4270170331001282, + "learning_rate": 1.8039096646799458e-05, + "loss": 0.5012, + "step": 14849 + }, + { + "epoch": 0.40774299835255357, + "grad_norm": 0.3768533170223236, + "learning_rate": 1.8038839770083993e-05, + "loss": 0.6236, + "step": 14850 + }, + { + "epoch": 0.40777045579352006, + "grad_norm": 0.36142006516456604, + "learning_rate": 1.8038582878373527e-05, + "loss": 0.4272, + "step": 14851 + }, + { + "epoch": 0.40779791323448655, + "grad_norm": 0.641895055770874, + "learning_rate": 1.8038325971668534e-05, + "loss": 0.5417, + "step": 14852 + }, + { + "epoch": 0.40782537067545305, + "grad_norm": 0.3605983853340149, + "learning_rate": 1.8038069049969504e-05, + "loss": 0.55, + "step": 14853 + }, + { + "epoch": 0.40785282811641954, + "grad_norm": 0.5123260617256165, + "learning_rate": 1.8037812113276906e-05, + "loss": 0.5381, + "step": 14854 + }, + { + "epoch": 0.40788028555738604, + "grad_norm": 0.3892963230609894, + "learning_rate": 1.803755516159122e-05, + "loss": 0.4099, + "step": 14855 + }, + { + "epoch": 0.40790774299835253, + "grad_norm": 0.36296361684799194, + "learning_rate": 1.8037298194912932e-05, + "loss": 0.6043, + "step": 14856 + }, + { + "epoch": 0.4079352004393191, + "grad_norm": 0.40396344661712646, + "learning_rate": 1.803704121324252e-05, + "loss": 0.5029, + "step": 14857 + }, + { + "epoch": 0.4079626578802856, + "grad_norm": 0.3551046848297119, + "learning_rate": 1.803678421658046e-05, + "loss": 0.4832, + "step": 14858 + }, + { + "epoch": 0.40799011532125207, + "grad_norm": 0.432334303855896, + "learning_rate": 1.803652720492723e-05, + "loss": 0.577, + "step": 14859 + }, + { + "epoch": 0.40801757276221856, + "grad_norm": 0.355660080909729, + "learning_rate": 1.8036270178283312e-05, + "loss": 0.4906, + "step": 14860 + }, + { + "epoch": 0.40804503020318506, + "grad_norm": 0.40323567390441895, + "learning_rate": 1.8036013136649186e-05, + "loss": 0.5956, + "step": 14861 + }, + { + "epoch": 0.40807248764415155, + "grad_norm": 0.3822386562824249, + "learning_rate": 1.8035756080025332e-05, + "loss": 0.5398, + "step": 14862 + }, + { + "epoch": 0.40809994508511804, + "grad_norm": 0.4175046682357788, + "learning_rate": 1.8035499008412224e-05, + "loss": 0.558, + "step": 14863 + }, + { + "epoch": 0.4081274025260846, + "grad_norm": 0.39199790358543396, + "learning_rate": 1.8035241921810353e-05, + "loss": 0.515, + "step": 14864 + }, + { + "epoch": 0.4081548599670511, + "grad_norm": 0.3819971978664398, + "learning_rate": 1.8034984820220183e-05, + "loss": 0.5637, + "step": 14865 + }, + { + "epoch": 0.4081823174080176, + "grad_norm": 0.43272218108177185, + "learning_rate": 1.8034727703642208e-05, + "loss": 0.597, + "step": 14866 + }, + { + "epoch": 0.4082097748489841, + "grad_norm": 0.38267287611961365, + "learning_rate": 1.8034470572076897e-05, + "loss": 0.4929, + "step": 14867 + }, + { + "epoch": 0.40823723228995057, + "grad_norm": 0.36657601594924927, + "learning_rate": 1.8034213425524738e-05, + "loss": 0.5495, + "step": 14868 + }, + { + "epoch": 0.40826468973091706, + "grad_norm": 0.3921767473220825, + "learning_rate": 1.8033956263986204e-05, + "loss": 0.5001, + "step": 14869 + }, + { + "epoch": 0.40829214717188356, + "grad_norm": 0.4294326603412628, + "learning_rate": 1.8033699087461777e-05, + "loss": 0.5217, + "step": 14870 + }, + { + "epoch": 0.4083196046128501, + "grad_norm": 0.3309621512889862, + "learning_rate": 1.803344189595194e-05, + "loss": 0.4226, + "step": 14871 + }, + { + "epoch": 0.4083470620538166, + "grad_norm": 0.3394681513309479, + "learning_rate": 1.8033184689457167e-05, + "loss": 0.4917, + "step": 14872 + }, + { + "epoch": 0.4083745194947831, + "grad_norm": 0.3920499086380005, + "learning_rate": 1.8032927467977943e-05, + "loss": 0.5162, + "step": 14873 + }, + { + "epoch": 0.4084019769357496, + "grad_norm": 0.3825346827507019, + "learning_rate": 1.8032670231514747e-05, + "loss": 0.5342, + "step": 14874 + }, + { + "epoch": 0.4084294343767161, + "grad_norm": 0.3564504384994507, + "learning_rate": 1.8032412980068054e-05, + "loss": 0.492, + "step": 14875 + }, + { + "epoch": 0.4084568918176826, + "grad_norm": 0.3759433329105377, + "learning_rate": 1.8032155713638347e-05, + "loss": 0.5656, + "step": 14876 + }, + { + "epoch": 0.40848434925864907, + "grad_norm": 0.4075092375278473, + "learning_rate": 1.8031898432226107e-05, + "loss": 0.5701, + "step": 14877 + }, + { + "epoch": 0.4085118066996156, + "grad_norm": 0.4592485725879669, + "learning_rate": 1.8031641135831814e-05, + "loss": 0.5478, + "step": 14878 + }, + { + "epoch": 0.4085392641405821, + "grad_norm": 0.39568856358528137, + "learning_rate": 1.8031383824455943e-05, + "loss": 0.6091, + "step": 14879 + }, + { + "epoch": 0.4085667215815486, + "grad_norm": 0.3633610010147095, + "learning_rate": 1.803112649809898e-05, + "loss": 0.5463, + "step": 14880 + }, + { + "epoch": 0.4085941790225151, + "grad_norm": 0.369634211063385, + "learning_rate": 1.8030869156761403e-05, + "loss": 0.4715, + "step": 14881 + }, + { + "epoch": 0.4086216364634816, + "grad_norm": 0.4010128676891327, + "learning_rate": 1.8030611800443692e-05, + "loss": 0.5392, + "step": 14882 + }, + { + "epoch": 0.4086490939044481, + "grad_norm": 0.42297226190567017, + "learning_rate": 1.803035442914633e-05, + "loss": 0.5633, + "step": 14883 + }, + { + "epoch": 0.4086765513454146, + "grad_norm": 0.370386004447937, + "learning_rate": 1.8030097042869788e-05, + "loss": 0.5202, + "step": 14884 + }, + { + "epoch": 0.40870400878638113, + "grad_norm": 0.34938517212867737, + "learning_rate": 1.802983964161456e-05, + "loss": 0.5666, + "step": 14885 + }, + { + "epoch": 0.40873146622734763, + "grad_norm": 0.355739563703537, + "learning_rate": 1.802958222538111e-05, + "loss": 0.4501, + "step": 14886 + }, + { + "epoch": 0.4087589236683141, + "grad_norm": 0.35048630833625793, + "learning_rate": 1.8029324794169932e-05, + "loss": 0.4975, + "step": 14887 + }, + { + "epoch": 0.4087863811092806, + "grad_norm": 0.35882246494293213, + "learning_rate": 1.8029067347981497e-05, + "loss": 0.5137, + "step": 14888 + }, + { + "epoch": 0.4088138385502471, + "grad_norm": 0.39398396015167236, + "learning_rate": 1.8028809886816293e-05, + "loss": 0.5594, + "step": 14889 + }, + { + "epoch": 0.4088412959912136, + "grad_norm": 0.35467544198036194, + "learning_rate": 1.802855241067479e-05, + "loss": 0.5525, + "step": 14890 + }, + { + "epoch": 0.4088687534321801, + "grad_norm": 0.3748033046722412, + "learning_rate": 1.802829491955748e-05, + "loss": 0.5159, + "step": 14891 + }, + { + "epoch": 0.40889621087314665, + "grad_norm": 0.3947317600250244, + "learning_rate": 1.8028037413464834e-05, + "loss": 0.5118, + "step": 14892 + }, + { + "epoch": 0.40892366831411314, + "grad_norm": 0.41923508048057556, + "learning_rate": 1.8027779892397338e-05, + "loss": 0.6106, + "step": 14893 + }, + { + "epoch": 0.40895112575507964, + "grad_norm": 0.40595853328704834, + "learning_rate": 1.8027522356355472e-05, + "loss": 0.5796, + "step": 14894 + }, + { + "epoch": 0.40897858319604613, + "grad_norm": 0.7132221460342407, + "learning_rate": 1.8027264805339713e-05, + "loss": 0.4802, + "step": 14895 + }, + { + "epoch": 0.4090060406370126, + "grad_norm": 0.3711858093738556, + "learning_rate": 1.802700723935054e-05, + "loss": 0.5467, + "step": 14896 + }, + { + "epoch": 0.4090334980779791, + "grad_norm": 0.4466070532798767, + "learning_rate": 1.802674965838844e-05, + "loss": 0.494, + "step": 14897 + }, + { + "epoch": 0.4090609555189456, + "grad_norm": 0.381904661655426, + "learning_rate": 1.802649206245389e-05, + "loss": 0.4789, + "step": 14898 + }, + { + "epoch": 0.40908841295991216, + "grad_norm": 0.4134085476398468, + "learning_rate": 1.802623445154737e-05, + "loss": 0.5378, + "step": 14899 + }, + { + "epoch": 0.40911587040087866, + "grad_norm": 0.36785486340522766, + "learning_rate": 1.802597682566936e-05, + "loss": 0.5536, + "step": 14900 + }, + { + "epoch": 0.40914332784184515, + "grad_norm": 0.37199467420578003, + "learning_rate": 1.802571918482034e-05, + "loss": 0.5189, + "step": 14901 + }, + { + "epoch": 0.40917078528281164, + "grad_norm": 0.38149160146713257, + "learning_rate": 1.8025461529000796e-05, + "loss": 0.4629, + "step": 14902 + }, + { + "epoch": 0.40919824272377814, + "grad_norm": 0.3939107358455658, + "learning_rate": 1.8025203858211203e-05, + "loss": 0.53, + "step": 14903 + }, + { + "epoch": 0.40922570016474463, + "grad_norm": 0.3472651243209839, + "learning_rate": 1.8024946172452046e-05, + "loss": 0.531, + "step": 14904 + }, + { + "epoch": 0.4092531576057111, + "grad_norm": 0.39366745948791504, + "learning_rate": 1.8024688471723803e-05, + "loss": 0.5713, + "step": 14905 + }, + { + "epoch": 0.4092806150466777, + "grad_norm": 0.3902493715286255, + "learning_rate": 1.802443075602695e-05, + "loss": 0.4766, + "step": 14906 + }, + { + "epoch": 0.40930807248764417, + "grad_norm": 0.3360219895839691, + "learning_rate": 1.8024173025361978e-05, + "loss": 0.4521, + "step": 14907 + }, + { + "epoch": 0.40933552992861066, + "grad_norm": 0.34063348174095154, + "learning_rate": 1.8023915279729358e-05, + "loss": 0.4791, + "step": 14908 + }, + { + "epoch": 0.40936298736957716, + "grad_norm": 0.37399619817733765, + "learning_rate": 1.8023657519129577e-05, + "loss": 0.5386, + "step": 14909 + }, + { + "epoch": 0.40939044481054365, + "grad_norm": 0.37100523710250854, + "learning_rate": 1.8023399743563114e-05, + "loss": 0.5286, + "step": 14910 + }, + { + "epoch": 0.40941790225151015, + "grad_norm": 0.31943148374557495, + "learning_rate": 1.802314195303045e-05, + "loss": 0.4888, + "step": 14911 + }, + { + "epoch": 0.40944535969247664, + "grad_norm": 0.36010581254959106, + "learning_rate": 1.8022884147532065e-05, + "loss": 0.5405, + "step": 14912 + }, + { + "epoch": 0.4094728171334432, + "grad_norm": 0.39181894063949585, + "learning_rate": 1.8022626327068437e-05, + "loss": 0.4569, + "step": 14913 + }, + { + "epoch": 0.4095002745744097, + "grad_norm": 0.3485989272594452, + "learning_rate": 1.8022368491640055e-05, + "loss": 0.4924, + "step": 14914 + }, + { + "epoch": 0.4095277320153762, + "grad_norm": 0.36405041813850403, + "learning_rate": 1.8022110641247396e-05, + "loss": 0.4914, + "step": 14915 + }, + { + "epoch": 0.40955518945634267, + "grad_norm": 0.4030599892139435, + "learning_rate": 1.8021852775890938e-05, + "loss": 0.5319, + "step": 14916 + }, + { + "epoch": 0.40958264689730917, + "grad_norm": 0.33321407437324524, + "learning_rate": 1.8021594895571165e-05, + "loss": 0.491, + "step": 14917 + }, + { + "epoch": 0.40961010433827566, + "grad_norm": 0.37069597840309143, + "learning_rate": 1.8021337000288555e-05, + "loss": 0.5186, + "step": 14918 + }, + { + "epoch": 0.40963756177924215, + "grad_norm": 1.017444372177124, + "learning_rate": 1.8021079090043595e-05, + "loss": 0.557, + "step": 14919 + }, + { + "epoch": 0.4096650192202087, + "grad_norm": 0.3768616318702698, + "learning_rate": 1.802082116483676e-05, + "loss": 0.4982, + "step": 14920 + }, + { + "epoch": 0.4096924766611752, + "grad_norm": 0.38224297761917114, + "learning_rate": 1.8020563224668534e-05, + "loss": 0.5001, + "step": 14921 + }, + { + "epoch": 0.4097199341021417, + "grad_norm": 0.391998291015625, + "learning_rate": 1.8020305269539397e-05, + "loss": 0.5254, + "step": 14922 + }, + { + "epoch": 0.4097473915431082, + "grad_norm": 0.3757077753543854, + "learning_rate": 1.8020047299449835e-05, + "loss": 0.5295, + "step": 14923 + }, + { + "epoch": 0.4097748489840747, + "grad_norm": 0.4474014937877655, + "learning_rate": 1.801978931440032e-05, + "loss": 0.5046, + "step": 14924 + }, + { + "epoch": 0.4098023064250412, + "grad_norm": 0.39377737045288086, + "learning_rate": 1.801953131439134e-05, + "loss": 0.5348, + "step": 14925 + }, + { + "epoch": 0.40982976386600767, + "grad_norm": 0.3966543972492218, + "learning_rate": 1.8019273299423377e-05, + "loss": 0.5605, + "step": 14926 + }, + { + "epoch": 0.4098572213069742, + "grad_norm": 0.36608457565307617, + "learning_rate": 1.8019015269496908e-05, + "loss": 0.4768, + "step": 14927 + }, + { + "epoch": 0.4098846787479407, + "grad_norm": 0.3948429226875305, + "learning_rate": 1.8018757224612416e-05, + "loss": 0.4694, + "step": 14928 + }, + { + "epoch": 0.4099121361889072, + "grad_norm": 0.36695829033851624, + "learning_rate": 1.8018499164770383e-05, + "loss": 0.5066, + "step": 14929 + }, + { + "epoch": 0.4099395936298737, + "grad_norm": 0.3620828092098236, + "learning_rate": 1.8018241089971286e-05, + "loss": 0.4607, + "step": 14930 + }, + { + "epoch": 0.4099670510708402, + "grad_norm": 0.35110461711883545, + "learning_rate": 1.8017983000215617e-05, + "loss": 0.4851, + "step": 14931 + }, + { + "epoch": 0.4099945085118067, + "grad_norm": 0.3691723942756653, + "learning_rate": 1.8017724895503846e-05, + "loss": 0.3953, + "step": 14932 + }, + { + "epoch": 0.4100219659527732, + "grad_norm": 0.31375938653945923, + "learning_rate": 1.801746677583646e-05, + "loss": 0.431, + "step": 14933 + }, + { + "epoch": 0.4100494233937397, + "grad_norm": 0.35002169013023376, + "learning_rate": 1.801720864121394e-05, + "loss": 0.4849, + "step": 14934 + }, + { + "epoch": 0.4100768808347062, + "grad_norm": 0.4523935914039612, + "learning_rate": 1.8016950491636764e-05, + "loss": 0.5401, + "step": 14935 + }, + { + "epoch": 0.4101043382756727, + "grad_norm": 0.3678473234176636, + "learning_rate": 1.801669232710542e-05, + "loss": 0.6036, + "step": 14936 + }, + { + "epoch": 0.4101317957166392, + "grad_norm": 0.35240787267684937, + "learning_rate": 1.8016434147620386e-05, + "loss": 0.5077, + "step": 14937 + }, + { + "epoch": 0.4101592531576057, + "grad_norm": 0.332116037607193, + "learning_rate": 1.8016175953182144e-05, + "loss": 0.4532, + "step": 14938 + }, + { + "epoch": 0.4101867105985722, + "grad_norm": 0.3356744945049286, + "learning_rate": 1.8015917743791176e-05, + "loss": 0.4575, + "step": 14939 + }, + { + "epoch": 0.4102141680395387, + "grad_norm": 0.342543363571167, + "learning_rate": 1.801565951944796e-05, + "loss": 0.5761, + "step": 14940 + }, + { + "epoch": 0.4102416254805052, + "grad_norm": 0.40437307953834534, + "learning_rate": 1.8015401280152986e-05, + "loss": 0.4899, + "step": 14941 + }, + { + "epoch": 0.41026908292147174, + "grad_norm": 0.40330877900123596, + "learning_rate": 1.8015143025906724e-05, + "loss": 0.4823, + "step": 14942 + }, + { + "epoch": 0.41029654036243823, + "grad_norm": 0.38051944971084595, + "learning_rate": 1.801488475670967e-05, + "loss": 0.5441, + "step": 14943 + }, + { + "epoch": 0.4103239978034047, + "grad_norm": 0.4266375005245209, + "learning_rate": 1.801462647256229e-05, + "loss": 0.5115, + "step": 14944 + }, + { + "epoch": 0.4103514552443712, + "grad_norm": 0.35357171297073364, + "learning_rate": 1.8014368173465074e-05, + "loss": 0.4545, + "step": 14945 + }, + { + "epoch": 0.4103789126853377, + "grad_norm": 0.33500656485557556, + "learning_rate": 1.8014109859418506e-05, + "loss": 0.4705, + "step": 14946 + }, + { + "epoch": 0.4104063701263042, + "grad_norm": 0.353322297334671, + "learning_rate": 1.8013851530423066e-05, + "loss": 0.5872, + "step": 14947 + }, + { + "epoch": 0.4104338275672707, + "grad_norm": 0.35315221548080444, + "learning_rate": 1.8013593186479237e-05, + "loss": 0.5184, + "step": 14948 + }, + { + "epoch": 0.41046128500823725, + "grad_norm": 0.3813753128051758, + "learning_rate": 1.8013334827587495e-05, + "loss": 0.5124, + "step": 14949 + }, + { + "epoch": 0.41048874244920375, + "grad_norm": 0.3620849847793579, + "learning_rate": 1.801307645374833e-05, + "loss": 0.4822, + "step": 14950 + }, + { + "epoch": 0.41051619989017024, + "grad_norm": 0.4730587899684906, + "learning_rate": 1.8012818064962217e-05, + "loss": 0.4484, + "step": 14951 + }, + { + "epoch": 0.41054365733113674, + "grad_norm": 0.3624492287635803, + "learning_rate": 1.8012559661229642e-05, + "loss": 0.4477, + "step": 14952 + }, + { + "epoch": 0.41057111477210323, + "grad_norm": 0.3689165413379669, + "learning_rate": 1.8012301242551086e-05, + "loss": 0.4524, + "step": 14953 + }, + { + "epoch": 0.4105985722130697, + "grad_norm": 0.38393163681030273, + "learning_rate": 1.8012042808927027e-05, + "loss": 0.4552, + "step": 14954 + }, + { + "epoch": 0.4106260296540362, + "grad_norm": 0.3271533250808716, + "learning_rate": 1.8011784360357955e-05, + "loss": 0.475, + "step": 14955 + }, + { + "epoch": 0.41065348709500277, + "grad_norm": 0.3902164399623871, + "learning_rate": 1.801152589684435e-05, + "loss": 0.4658, + "step": 14956 + }, + { + "epoch": 0.41068094453596926, + "grad_norm": 0.3856469392776489, + "learning_rate": 1.8011267418386687e-05, + "loss": 0.5716, + "step": 14957 + }, + { + "epoch": 0.41070840197693576, + "grad_norm": 0.38177722692489624, + "learning_rate": 1.8011008924985457e-05, + "loss": 0.4535, + "step": 14958 + }, + { + "epoch": 0.41073585941790225, + "grad_norm": 0.3581541180610657, + "learning_rate": 1.8010750416641138e-05, + "loss": 0.5152, + "step": 14959 + }, + { + "epoch": 0.41076331685886874, + "grad_norm": 0.379116952419281, + "learning_rate": 1.8010491893354213e-05, + "loss": 0.4961, + "step": 14960 + }, + { + "epoch": 0.41079077429983524, + "grad_norm": 0.37521880865097046, + "learning_rate": 1.8010233355125163e-05, + "loss": 0.4764, + "step": 14961 + }, + { + "epoch": 0.41081823174080173, + "grad_norm": 0.3584449887275696, + "learning_rate": 1.800997480195447e-05, + "loss": 0.5392, + "step": 14962 + }, + { + "epoch": 0.4108456891817683, + "grad_norm": 0.34422430396080017, + "learning_rate": 1.800971623384262e-05, + "loss": 0.4945, + "step": 14963 + }, + { + "epoch": 0.4108731466227348, + "grad_norm": 0.40402641892433167, + "learning_rate": 1.800945765079009e-05, + "loss": 0.5783, + "step": 14964 + }, + { + "epoch": 0.41090060406370127, + "grad_norm": 0.4081943929195404, + "learning_rate": 1.800919905279737e-05, + "loss": 0.5113, + "step": 14965 + }, + { + "epoch": 0.41092806150466776, + "grad_norm": 0.49681979417800903, + "learning_rate": 1.8008940439864934e-05, + "loss": 0.5715, + "step": 14966 + }, + { + "epoch": 0.41095551894563426, + "grad_norm": 0.9535386562347412, + "learning_rate": 1.800868181199327e-05, + "loss": 0.6052, + "step": 14967 + }, + { + "epoch": 0.41098297638660075, + "grad_norm": 0.3501814603805542, + "learning_rate": 1.8008423169182856e-05, + "loss": 0.4313, + "step": 14968 + }, + { + "epoch": 0.41101043382756725, + "grad_norm": 0.38390710949897766, + "learning_rate": 1.8008164511434177e-05, + "loss": 0.6066, + "step": 14969 + }, + { + "epoch": 0.4110378912685338, + "grad_norm": 0.3468882441520691, + "learning_rate": 1.8007905838747716e-05, + "loss": 0.5032, + "step": 14970 + }, + { + "epoch": 0.4110653487095003, + "grad_norm": 0.34227868914604187, + "learning_rate": 1.8007647151123955e-05, + "loss": 0.5229, + "step": 14971 + }, + { + "epoch": 0.4110928061504668, + "grad_norm": 0.344760000705719, + "learning_rate": 1.8007388448563374e-05, + "loss": 0.4993, + "step": 14972 + }, + { + "epoch": 0.4111202635914333, + "grad_norm": 0.3503035306930542, + "learning_rate": 1.8007129731066464e-05, + "loss": 0.4702, + "step": 14973 + }, + { + "epoch": 0.41114772103239977, + "grad_norm": 0.37122756242752075, + "learning_rate": 1.8006870998633698e-05, + "loss": 0.4972, + "step": 14974 + }, + { + "epoch": 0.41117517847336627, + "grad_norm": 0.41246047616004944, + "learning_rate": 1.800661225126556e-05, + "loss": 0.5563, + "step": 14975 + }, + { + "epoch": 0.41120263591433276, + "grad_norm": 0.35467013716697693, + "learning_rate": 1.8006353488962534e-05, + "loss": 0.4985, + "step": 14976 + }, + { + "epoch": 0.4112300933552993, + "grad_norm": 0.3738156259059906, + "learning_rate": 1.8006094711725106e-05, + "loss": 0.5118, + "step": 14977 + }, + { + "epoch": 0.4112575507962658, + "grad_norm": 0.39219215512275696, + "learning_rate": 1.8005835919553755e-05, + "loss": 0.5322, + "step": 14978 + }, + { + "epoch": 0.4112850082372323, + "grad_norm": 0.36336153745651245, + "learning_rate": 1.800557711244897e-05, + "loss": 0.5171, + "step": 14979 + }, + { + "epoch": 0.4113124656781988, + "grad_norm": 0.3850153088569641, + "learning_rate": 1.800531829041122e-05, + "loss": 0.518, + "step": 14980 + }, + { + "epoch": 0.4113399231191653, + "grad_norm": 0.36123451590538025, + "learning_rate": 1.8005059453441002e-05, + "loss": 0.6024, + "step": 14981 + }, + { + "epoch": 0.4113673805601318, + "grad_norm": 0.4330444931983948, + "learning_rate": 1.800480060153879e-05, + "loss": 0.5002, + "step": 14982 + }, + { + "epoch": 0.4113948380010983, + "grad_norm": 2.0935049057006836, + "learning_rate": 1.800454173470507e-05, + "loss": 0.5635, + "step": 14983 + }, + { + "epoch": 0.4114222954420648, + "grad_norm": 0.3442435562610626, + "learning_rate": 1.8004282852940332e-05, + "loss": 0.4739, + "step": 14984 + }, + { + "epoch": 0.4114497528830313, + "grad_norm": 0.359579473733902, + "learning_rate": 1.8004023956245044e-05, + "loss": 0.4641, + "step": 14985 + }, + { + "epoch": 0.4114772103239978, + "grad_norm": 0.44316166639328003, + "learning_rate": 1.80037650446197e-05, + "loss": 0.4923, + "step": 14986 + }, + { + "epoch": 0.4115046677649643, + "grad_norm": 0.34696292877197266, + "learning_rate": 1.800350611806478e-05, + "loss": 0.5147, + "step": 14987 + }, + { + "epoch": 0.4115321252059308, + "grad_norm": 0.37955164909362793, + "learning_rate": 1.8003247176580765e-05, + "loss": 0.4411, + "step": 14988 + }, + { + "epoch": 0.4115595826468973, + "grad_norm": 0.339478075504303, + "learning_rate": 1.800298822016814e-05, + "loss": 0.5398, + "step": 14989 + }, + { + "epoch": 0.4115870400878638, + "grad_norm": 0.3913586735725403, + "learning_rate": 1.8002729248827386e-05, + "loss": 0.554, + "step": 14990 + }, + { + "epoch": 0.41161449752883034, + "grad_norm": 0.411346971988678, + "learning_rate": 1.800247026255899e-05, + "loss": 0.5509, + "step": 14991 + }, + { + "epoch": 0.41164195496979683, + "grad_norm": 0.38574495911598206, + "learning_rate": 1.8002211261363432e-05, + "loss": 0.5025, + "step": 14992 + }, + { + "epoch": 0.4116694124107633, + "grad_norm": 0.39728978276252747, + "learning_rate": 1.80019522452412e-05, + "loss": 0.4718, + "step": 14993 + }, + { + "epoch": 0.4116968698517298, + "grad_norm": 0.34998294711112976, + "learning_rate": 1.800169321419277e-05, + "loss": 0.4728, + "step": 14994 + }, + { + "epoch": 0.4117243272926963, + "grad_norm": 0.377285897731781, + "learning_rate": 1.800143416821863e-05, + "loss": 0.5281, + "step": 14995 + }, + { + "epoch": 0.4117517847336628, + "grad_norm": 0.3662301003932953, + "learning_rate": 1.8001175107319255e-05, + "loss": 0.5106, + "step": 14996 + }, + { + "epoch": 0.4117792421746293, + "grad_norm": 0.40407758951187134, + "learning_rate": 1.800091603149514e-05, + "loss": 0.5803, + "step": 14997 + }, + { + "epoch": 0.41180669961559585, + "grad_norm": 0.4000510573387146, + "learning_rate": 1.8000656940746763e-05, + "loss": 0.6477, + "step": 14998 + }, + { + "epoch": 0.41183415705656234, + "grad_norm": 0.3954298198223114, + "learning_rate": 1.8000397835074607e-05, + "loss": 0.5729, + "step": 14999 + }, + { + "epoch": 0.41186161449752884, + "grad_norm": 0.3695518970489502, + "learning_rate": 1.8000138714479157e-05, + "loss": 0.5006, + "step": 15000 + }, + { + "epoch": 0.41188907193849533, + "grad_norm": 0.4085278809070587, + "learning_rate": 1.799987957896089e-05, + "loss": 0.5519, + "step": 15001 + }, + { + "epoch": 0.4119165293794618, + "grad_norm": 0.35721802711486816, + "learning_rate": 1.7999620428520298e-05, + "loss": 0.5789, + "step": 15002 + }, + { + "epoch": 0.4119439868204283, + "grad_norm": 0.3643236756324768, + "learning_rate": 1.7999361263157862e-05, + "loss": 0.4836, + "step": 15003 + }, + { + "epoch": 0.4119714442613948, + "grad_norm": 0.36840301752090454, + "learning_rate": 1.7999102082874063e-05, + "loss": 0.5299, + "step": 15004 + }, + { + "epoch": 0.41199890170236136, + "grad_norm": 0.36411091685295105, + "learning_rate": 1.7998842887669383e-05, + "loss": 0.4696, + "step": 15005 + }, + { + "epoch": 0.41202635914332786, + "grad_norm": 0.4620400667190552, + "learning_rate": 1.7998583677544312e-05, + "loss": 0.5299, + "step": 15006 + }, + { + "epoch": 0.41205381658429435, + "grad_norm": 0.3748687505722046, + "learning_rate": 1.7998324452499325e-05, + "loss": 0.572, + "step": 15007 + }, + { + "epoch": 0.41208127402526085, + "grad_norm": 0.36257603764533997, + "learning_rate": 1.7998065212534916e-05, + "loss": 0.5519, + "step": 15008 + }, + { + "epoch": 0.41210873146622734, + "grad_norm": 0.34694191813468933, + "learning_rate": 1.7997805957651557e-05, + "loss": 0.4771, + "step": 15009 + }, + { + "epoch": 0.41213618890719383, + "grad_norm": 0.37165603041648865, + "learning_rate": 1.799754668784974e-05, + "loss": 0.462, + "step": 15010 + }, + { + "epoch": 0.41216364634816033, + "grad_norm": 0.40199095010757446, + "learning_rate": 1.7997287403129945e-05, + "loss": 0.4449, + "step": 15011 + }, + { + "epoch": 0.4121911037891269, + "grad_norm": 0.3486410975456238, + "learning_rate": 1.799702810349266e-05, + "loss": 0.4606, + "step": 15012 + }, + { + "epoch": 0.41221856123009337, + "grad_norm": 0.4198271930217743, + "learning_rate": 1.7996768788938362e-05, + "loss": 0.5556, + "step": 15013 + }, + { + "epoch": 0.41224601867105987, + "grad_norm": 0.7209258675575256, + "learning_rate": 1.7996509459467538e-05, + "loss": 0.5187, + "step": 15014 + }, + { + "epoch": 0.41227347611202636, + "grad_norm": 0.38599756360054016, + "learning_rate": 1.799625011508067e-05, + "loss": 0.4754, + "step": 15015 + }, + { + "epoch": 0.41230093355299285, + "grad_norm": 0.38290077447891235, + "learning_rate": 1.7995990755778247e-05, + "loss": 0.5558, + "step": 15016 + }, + { + "epoch": 0.41232839099395935, + "grad_norm": 0.42361727356910706, + "learning_rate": 1.799573138156075e-05, + "loss": 0.5006, + "step": 15017 + }, + { + "epoch": 0.41235584843492584, + "grad_norm": 0.36995482444763184, + "learning_rate": 1.7995471992428658e-05, + "loss": 0.5467, + "step": 15018 + }, + { + "epoch": 0.4123833058758924, + "grad_norm": 0.3529278337955475, + "learning_rate": 1.799521258838246e-05, + "loss": 0.5272, + "step": 15019 + }, + { + "epoch": 0.4124107633168589, + "grad_norm": 0.3259970247745514, + "learning_rate": 1.799495316942264e-05, + "loss": 0.4275, + "step": 15020 + }, + { + "epoch": 0.4124382207578254, + "grad_norm": 0.3615996837615967, + "learning_rate": 1.799469373554968e-05, + "loss": 0.4703, + "step": 15021 + }, + { + "epoch": 0.4124656781987919, + "grad_norm": 0.33804556727409363, + "learning_rate": 1.7994434286764063e-05, + "loss": 0.5011, + "step": 15022 + }, + { + "epoch": 0.41249313563975837, + "grad_norm": 0.3930164873600006, + "learning_rate": 1.7994174823066276e-05, + "loss": 0.5447, + "step": 15023 + }, + { + "epoch": 0.41252059308072486, + "grad_norm": 0.3955042064189911, + "learning_rate": 1.7993915344456802e-05, + "loss": 0.4748, + "step": 15024 + }, + { + "epoch": 0.41254805052169136, + "grad_norm": 0.3855774998664856, + "learning_rate": 1.7993655850936126e-05, + "loss": 0.484, + "step": 15025 + }, + { + "epoch": 0.4125755079626579, + "grad_norm": 0.41312840580940247, + "learning_rate": 1.7993396342504727e-05, + "loss": 0.5481, + "step": 15026 + }, + { + "epoch": 0.4126029654036244, + "grad_norm": 0.3739473223686218, + "learning_rate": 1.7993136819163094e-05, + "loss": 0.5009, + "step": 15027 + }, + { + "epoch": 0.4126304228445909, + "grad_norm": 0.446560800075531, + "learning_rate": 1.799287728091171e-05, + "loss": 0.5383, + "step": 15028 + }, + { + "epoch": 0.4126578802855574, + "grad_norm": 0.35191380977630615, + "learning_rate": 1.7992617727751058e-05, + "loss": 0.5401, + "step": 15029 + }, + { + "epoch": 0.4126853377265239, + "grad_norm": 0.34725046157836914, + "learning_rate": 1.7992358159681624e-05, + "loss": 0.5707, + "step": 15030 + }, + { + "epoch": 0.4127127951674904, + "grad_norm": 0.3396152853965759, + "learning_rate": 1.7992098576703895e-05, + "loss": 0.4215, + "step": 15031 + }, + { + "epoch": 0.41274025260845687, + "grad_norm": 0.40499258041381836, + "learning_rate": 1.7991838978818345e-05, + "loss": 0.5106, + "step": 15032 + }, + { + "epoch": 0.4127677100494234, + "grad_norm": 0.422358900308609, + "learning_rate": 1.7991579366025466e-05, + "loss": 0.6443, + "step": 15033 + }, + { + "epoch": 0.4127951674903899, + "grad_norm": 0.36296772956848145, + "learning_rate": 1.7991319738325742e-05, + "loss": 0.4889, + "step": 15034 + }, + { + "epoch": 0.4128226249313564, + "grad_norm": 0.3764576017856598, + "learning_rate": 1.7991060095719658e-05, + "loss": 0.5543, + "step": 15035 + }, + { + "epoch": 0.4128500823723229, + "grad_norm": 0.3656993508338928, + "learning_rate": 1.7990800438207695e-05, + "loss": 0.5061, + "step": 15036 + }, + { + "epoch": 0.4128775398132894, + "grad_norm": 0.35426440834999084, + "learning_rate": 1.7990540765790337e-05, + "loss": 0.4909, + "step": 15037 + }, + { + "epoch": 0.4129049972542559, + "grad_norm": 0.36149582266807556, + "learning_rate": 1.799028107846807e-05, + "loss": 0.4946, + "step": 15038 + }, + { + "epoch": 0.4129324546952224, + "grad_norm": 0.3987847566604614, + "learning_rate": 1.799002137624138e-05, + "loss": 0.508, + "step": 15039 + }, + { + "epoch": 0.41295991213618893, + "grad_norm": 0.3373507559299469, + "learning_rate": 1.798976165911075e-05, + "loss": 0.535, + "step": 15040 + }, + { + "epoch": 0.4129873695771554, + "grad_norm": 0.3186223804950714, + "learning_rate": 1.7989501927076663e-05, + "loss": 0.4513, + "step": 15041 + }, + { + "epoch": 0.4130148270181219, + "grad_norm": 0.38160842657089233, + "learning_rate": 1.7989242180139607e-05, + "loss": 0.5278, + "step": 15042 + }, + { + "epoch": 0.4130422844590884, + "grad_norm": 0.3726808726787567, + "learning_rate": 1.7988982418300066e-05, + "loss": 0.435, + "step": 15043 + }, + { + "epoch": 0.4130697419000549, + "grad_norm": 0.40342631936073303, + "learning_rate": 1.7988722641558522e-05, + "loss": 0.5356, + "step": 15044 + }, + { + "epoch": 0.4130971993410214, + "grad_norm": 0.36245793104171753, + "learning_rate": 1.7988462849915456e-05, + "loss": 0.5342, + "step": 15045 + }, + { + "epoch": 0.4131246567819879, + "grad_norm": 0.3491043746471405, + "learning_rate": 1.7988203043371363e-05, + "loss": 0.5066, + "step": 15046 + }, + { + "epoch": 0.41315211422295445, + "grad_norm": 0.37365448474884033, + "learning_rate": 1.798794322192672e-05, + "loss": 0.5032, + "step": 15047 + }, + { + "epoch": 0.41317957166392094, + "grad_norm": 0.33374086022377014, + "learning_rate": 1.7987683385582012e-05, + "loss": 0.4711, + "step": 15048 + }, + { + "epoch": 0.41320702910488744, + "grad_norm": 0.37682196497917175, + "learning_rate": 1.7987423534337727e-05, + "loss": 0.5006, + "step": 15049 + }, + { + "epoch": 0.41323448654585393, + "grad_norm": 0.42380237579345703, + "learning_rate": 1.7987163668194346e-05, + "loss": 0.4874, + "step": 15050 + }, + { + "epoch": 0.4132619439868204, + "grad_norm": 0.38004785776138306, + "learning_rate": 1.798690378715236e-05, + "loss": 0.4429, + "step": 15051 + }, + { + "epoch": 0.4132894014277869, + "grad_norm": 0.3915775716304779, + "learning_rate": 1.7986643891212244e-05, + "loss": 0.6003, + "step": 15052 + }, + { + "epoch": 0.4133168588687534, + "grad_norm": 0.4075359106063843, + "learning_rate": 1.7986383980374488e-05, + "loss": 0.4597, + "step": 15053 + }, + { + "epoch": 0.41334431630971996, + "grad_norm": 0.32012638449668884, + "learning_rate": 1.798612405463958e-05, + "loss": 0.4796, + "step": 15054 + }, + { + "epoch": 0.41337177375068646, + "grad_norm": 0.4112090766429901, + "learning_rate": 1.7985864114008e-05, + "loss": 0.593, + "step": 15055 + }, + { + "epoch": 0.41339923119165295, + "grad_norm": 0.3807457387447357, + "learning_rate": 1.7985604158480234e-05, + "loss": 0.5002, + "step": 15056 + }, + { + "epoch": 0.41342668863261944, + "grad_norm": 0.36276352405548096, + "learning_rate": 1.798534418805677e-05, + "loss": 0.6136, + "step": 15057 + }, + { + "epoch": 0.41345414607358594, + "grad_norm": 0.30379223823547363, + "learning_rate": 1.798508420273809e-05, + "loss": 0.3847, + "step": 15058 + }, + { + "epoch": 0.41348160351455243, + "grad_norm": 0.37754058837890625, + "learning_rate": 1.7984824202524676e-05, + "loss": 0.5658, + "step": 15059 + }, + { + "epoch": 0.4135090609555189, + "grad_norm": 0.4127773940563202, + "learning_rate": 1.7984564187417023e-05, + "loss": 0.5506, + "step": 15060 + }, + { + "epoch": 0.4135365183964855, + "grad_norm": 0.4121566712856293, + "learning_rate": 1.7984304157415602e-05, + "loss": 0.5488, + "step": 15061 + }, + { + "epoch": 0.41356397583745197, + "grad_norm": 0.3987113833427429, + "learning_rate": 1.798404411252091e-05, + "loss": 0.5066, + "step": 15062 + }, + { + "epoch": 0.41359143327841846, + "grad_norm": 0.38142672181129456, + "learning_rate": 1.7983784052733427e-05, + "loss": 0.5759, + "step": 15063 + }, + { + "epoch": 0.41361889071938496, + "grad_norm": 0.42130959033966064, + "learning_rate": 1.7983523978053638e-05, + "loss": 0.5886, + "step": 15064 + }, + { + "epoch": 0.41364634816035145, + "grad_norm": 0.4252500534057617, + "learning_rate": 1.798326388848203e-05, + "loss": 0.3582, + "step": 15065 + }, + { + "epoch": 0.41367380560131795, + "grad_norm": 0.3746863007545471, + "learning_rate": 1.7983003784019085e-05, + "loss": 0.5456, + "step": 15066 + }, + { + "epoch": 0.41370126304228444, + "grad_norm": 0.3952763080596924, + "learning_rate": 1.798274366466529e-05, + "loss": 0.5155, + "step": 15067 + }, + { + "epoch": 0.41372872048325093, + "grad_norm": 0.4074193239212036, + "learning_rate": 1.798248353042113e-05, + "loss": 0.5189, + "step": 15068 + }, + { + "epoch": 0.4137561779242175, + "grad_norm": 0.4592621326446533, + "learning_rate": 1.7982223381287092e-05, + "loss": 0.4905, + "step": 15069 + }, + { + "epoch": 0.413783635365184, + "grad_norm": 0.3510452210903168, + "learning_rate": 1.798196321726366e-05, + "loss": 0.469, + "step": 15070 + }, + { + "epoch": 0.41381109280615047, + "grad_norm": 0.4013993442058563, + "learning_rate": 1.7981703038351317e-05, + "loss": 0.5862, + "step": 15071 + }, + { + "epoch": 0.41383855024711697, + "grad_norm": 0.3407565951347351, + "learning_rate": 1.7981442844550553e-05, + "loss": 0.516, + "step": 15072 + }, + { + "epoch": 0.41386600768808346, + "grad_norm": 0.3507640063762665, + "learning_rate": 1.798118263586185e-05, + "loss": 0.4762, + "step": 15073 + }, + { + "epoch": 0.41389346512904995, + "grad_norm": 0.39844363927841187, + "learning_rate": 1.7980922412285694e-05, + "loss": 0.5272, + "step": 15074 + }, + { + "epoch": 0.41392092257001645, + "grad_norm": 0.3646073341369629, + "learning_rate": 1.7980662173822567e-05, + "loss": 0.4746, + "step": 15075 + }, + { + "epoch": 0.413948380010983, + "grad_norm": 0.6399762630462646, + "learning_rate": 1.7980401920472965e-05, + "loss": 0.5321, + "step": 15076 + }, + { + "epoch": 0.4139758374519495, + "grad_norm": 0.37406185269355774, + "learning_rate": 1.7980141652237358e-05, + "loss": 0.5278, + "step": 15077 + }, + { + "epoch": 0.414003294892916, + "grad_norm": 0.33969220519065857, + "learning_rate": 1.7979881369116245e-05, + "loss": 0.4594, + "step": 15078 + }, + { + "epoch": 0.4140307523338825, + "grad_norm": 0.4435085654258728, + "learning_rate": 1.7979621071110104e-05, + "loss": 0.7447, + "step": 15079 + }, + { + "epoch": 0.414058209774849, + "grad_norm": 0.3593395948410034, + "learning_rate": 1.7979360758219425e-05, + "loss": 0.4995, + "step": 15080 + }, + { + "epoch": 0.41408566721581547, + "grad_norm": 0.39058804512023926, + "learning_rate": 1.797910043044469e-05, + "loss": 0.6328, + "step": 15081 + }, + { + "epoch": 0.41411312465678196, + "grad_norm": 0.4026370346546173, + "learning_rate": 1.797884008778639e-05, + "loss": 0.5121, + "step": 15082 + }, + { + "epoch": 0.4141405820977485, + "grad_norm": 0.37211495637893677, + "learning_rate": 1.7978579730245004e-05, + "loss": 0.462, + "step": 15083 + }, + { + "epoch": 0.414168039538715, + "grad_norm": 0.3663313388824463, + "learning_rate": 1.7978319357821017e-05, + "loss": 0.462, + "step": 15084 + }, + { + "epoch": 0.4141954969796815, + "grad_norm": 0.3696273863315582, + "learning_rate": 1.7978058970514923e-05, + "loss": 0.5022, + "step": 15085 + }, + { + "epoch": 0.414222954420648, + "grad_norm": 0.37834489345550537, + "learning_rate": 1.79777985683272e-05, + "loss": 0.4825, + "step": 15086 + }, + { + "epoch": 0.4142504118616145, + "grad_norm": 0.36541667580604553, + "learning_rate": 1.797753815125834e-05, + "loss": 0.5099, + "step": 15087 + }, + { + "epoch": 0.414277869302581, + "grad_norm": 0.37876564264297485, + "learning_rate": 1.7977277719308817e-05, + "loss": 0.4972, + "step": 15088 + }, + { + "epoch": 0.4143053267435475, + "grad_norm": 0.3967914879322052, + "learning_rate": 1.7977017272479133e-05, + "loss": 0.4504, + "step": 15089 + }, + { + "epoch": 0.414332784184514, + "grad_norm": 0.3591483533382416, + "learning_rate": 1.7976756810769763e-05, + "loss": 0.4715, + "step": 15090 + }, + { + "epoch": 0.4143602416254805, + "grad_norm": 0.37334051728248596, + "learning_rate": 1.7976496334181195e-05, + "loss": 0.6164, + "step": 15091 + }, + { + "epoch": 0.414387699066447, + "grad_norm": 0.42060375213623047, + "learning_rate": 1.7976235842713918e-05, + "loss": 0.5063, + "step": 15092 + }, + { + "epoch": 0.4144151565074135, + "grad_norm": 0.3622444272041321, + "learning_rate": 1.7975975336368412e-05, + "loss": 0.4853, + "step": 15093 + }, + { + "epoch": 0.41444261394838, + "grad_norm": 0.4200735092163086, + "learning_rate": 1.797571481514517e-05, + "loss": 0.5449, + "step": 15094 + }, + { + "epoch": 0.4144700713893465, + "grad_norm": 0.3511117398738861, + "learning_rate": 1.797545427904467e-05, + "loss": 0.5939, + "step": 15095 + }, + { + "epoch": 0.414497528830313, + "grad_norm": 0.4344049096107483, + "learning_rate": 1.7975193728067405e-05, + "loss": 0.5597, + "step": 15096 + }, + { + "epoch": 0.41452498627127954, + "grad_norm": 0.32005149126052856, + "learning_rate": 1.797493316221386e-05, + "loss": 0.5572, + "step": 15097 + }, + { + "epoch": 0.41455244371224603, + "grad_norm": 0.3578105568885803, + "learning_rate": 1.7974672581484516e-05, + "loss": 0.4952, + "step": 15098 + }, + { + "epoch": 0.4145799011532125, + "grad_norm": 0.3380603790283203, + "learning_rate": 1.7974411985879864e-05, + "loss": 0.5176, + "step": 15099 + }, + { + "epoch": 0.414607358594179, + "grad_norm": 0.32448604702949524, + "learning_rate": 1.797415137540039e-05, + "loss": 0.4692, + "step": 15100 + }, + { + "epoch": 0.4146348160351455, + "grad_norm": 0.37053239345550537, + "learning_rate": 1.7973890750046574e-05, + "loss": 0.5067, + "step": 15101 + }, + { + "epoch": 0.414662273476112, + "grad_norm": 0.41565102338790894, + "learning_rate": 1.7973630109818908e-05, + "loss": 0.5645, + "step": 15102 + }, + { + "epoch": 0.4146897309170785, + "grad_norm": 0.40012621879577637, + "learning_rate": 1.7973369454717883e-05, + "loss": 0.5731, + "step": 15103 + }, + { + "epoch": 0.41471718835804505, + "grad_norm": 0.33103978633880615, + "learning_rate": 1.7973108784743975e-05, + "loss": 0.4633, + "step": 15104 + }, + { + "epoch": 0.41474464579901155, + "grad_norm": 0.3613058030605316, + "learning_rate": 1.7972848099897673e-05, + "loss": 0.5286, + "step": 15105 + }, + { + "epoch": 0.41477210323997804, + "grad_norm": 0.35736632347106934, + "learning_rate": 1.7972587400179465e-05, + "loss": 0.4935, + "step": 15106 + }, + { + "epoch": 0.41479956068094453, + "grad_norm": 0.38830623030662537, + "learning_rate": 1.7972326685589838e-05, + "loss": 0.4799, + "step": 15107 + }, + { + "epoch": 0.41482701812191103, + "grad_norm": 0.4238798916339874, + "learning_rate": 1.7972065956129278e-05, + "loss": 0.4638, + "step": 15108 + }, + { + "epoch": 0.4148544755628775, + "grad_norm": 0.4025736153125763, + "learning_rate": 1.797180521179827e-05, + "loss": 0.468, + "step": 15109 + }, + { + "epoch": 0.414881933003844, + "grad_norm": 0.3550979793071747, + "learning_rate": 1.79715444525973e-05, + "loss": 0.5031, + "step": 15110 + }, + { + "epoch": 0.41490939044481057, + "grad_norm": 0.3712541460990906, + "learning_rate": 1.7971283678526858e-05, + "loss": 0.4998, + "step": 15111 + }, + { + "epoch": 0.41493684788577706, + "grad_norm": 0.40286174416542053, + "learning_rate": 1.7971022889587423e-05, + "loss": 0.5007, + "step": 15112 + }, + { + "epoch": 0.41496430532674355, + "grad_norm": 0.332398921251297, + "learning_rate": 1.797076208577949e-05, + "loss": 0.4015, + "step": 15113 + }, + { + "epoch": 0.41499176276771005, + "grad_norm": 0.3508703410625458, + "learning_rate": 1.797050126710354e-05, + "loss": 0.4766, + "step": 15114 + }, + { + "epoch": 0.41501922020867654, + "grad_norm": 0.38119110465049744, + "learning_rate": 1.797024043356006e-05, + "loss": 0.5674, + "step": 15115 + }, + { + "epoch": 0.41504667764964304, + "grad_norm": 0.3987010717391968, + "learning_rate": 1.7969979585149542e-05, + "loss": 0.4906, + "step": 15116 + }, + { + "epoch": 0.41507413509060953, + "grad_norm": 0.3541288673877716, + "learning_rate": 1.7969718721872467e-05, + "loss": 0.4735, + "step": 15117 + }, + { + "epoch": 0.4151015925315761, + "grad_norm": 0.39629483222961426, + "learning_rate": 1.7969457843729323e-05, + "loss": 0.5043, + "step": 15118 + }, + { + "epoch": 0.4151290499725426, + "grad_norm": 0.39659783244132996, + "learning_rate": 1.7969196950720595e-05, + "loss": 0.4362, + "step": 15119 + }, + { + "epoch": 0.41515650741350907, + "grad_norm": 0.3451426029205322, + "learning_rate": 1.796893604284677e-05, + "loss": 0.4712, + "step": 15120 + }, + { + "epoch": 0.41518396485447556, + "grad_norm": 0.3320528268814087, + "learning_rate": 1.7968675120108338e-05, + "loss": 0.4179, + "step": 15121 + }, + { + "epoch": 0.41521142229544206, + "grad_norm": 0.3702262043952942, + "learning_rate": 1.7968414182505785e-05, + "loss": 0.5016, + "step": 15122 + }, + { + "epoch": 0.41523887973640855, + "grad_norm": 0.38519778847694397, + "learning_rate": 1.7968153230039593e-05, + "loss": 0.5651, + "step": 15123 + }, + { + "epoch": 0.41526633717737504, + "grad_norm": 0.3897005021572113, + "learning_rate": 1.7967892262710256e-05, + "loss": 0.4803, + "step": 15124 + }, + { + "epoch": 0.4152937946183416, + "grad_norm": 0.36023688316345215, + "learning_rate": 1.7967631280518253e-05, + "loss": 0.6076, + "step": 15125 + }, + { + "epoch": 0.4153212520593081, + "grad_norm": 0.35873493552207947, + "learning_rate": 1.7967370283464077e-05, + "loss": 0.4671, + "step": 15126 + }, + { + "epoch": 0.4153487095002746, + "grad_norm": 0.39464670419692993, + "learning_rate": 1.796710927154821e-05, + "loss": 0.5261, + "step": 15127 + }, + { + "epoch": 0.4153761669412411, + "grad_norm": 0.37927767634391785, + "learning_rate": 1.7966848244771146e-05, + "loss": 0.4719, + "step": 15128 + }, + { + "epoch": 0.41540362438220757, + "grad_norm": 0.40275925397872925, + "learning_rate": 1.7966587203133364e-05, + "loss": 0.5557, + "step": 15129 + }, + { + "epoch": 0.41543108182317406, + "grad_norm": 0.4543324112892151, + "learning_rate": 1.7966326146635354e-05, + "loss": 0.5647, + "step": 15130 + }, + { + "epoch": 0.41545853926414056, + "grad_norm": 0.4163845479488373, + "learning_rate": 1.7966065075277605e-05, + "loss": 0.6178, + "step": 15131 + }, + { + "epoch": 0.4154859967051071, + "grad_norm": 0.35727325081825256, + "learning_rate": 1.79658039890606e-05, + "loss": 0.4937, + "step": 15132 + }, + { + "epoch": 0.4155134541460736, + "grad_norm": 0.32168424129486084, + "learning_rate": 1.796554288798483e-05, + "loss": 0.3873, + "step": 15133 + }, + { + "epoch": 0.4155409115870401, + "grad_norm": 0.389689177274704, + "learning_rate": 1.796528177205078e-05, + "loss": 0.579, + "step": 15134 + }, + { + "epoch": 0.4155683690280066, + "grad_norm": 0.3675808310508728, + "learning_rate": 1.7965020641258937e-05, + "loss": 0.544, + "step": 15135 + }, + { + "epoch": 0.4155958264689731, + "grad_norm": 0.3441985547542572, + "learning_rate": 1.7964759495609788e-05, + "loss": 0.5373, + "step": 15136 + }, + { + "epoch": 0.4156232839099396, + "grad_norm": 0.3805503845214844, + "learning_rate": 1.796449833510382e-05, + "loss": 0.4976, + "step": 15137 + }, + { + "epoch": 0.41565074135090607, + "grad_norm": 0.3442896604537964, + "learning_rate": 1.796423715974152e-05, + "loss": 0.5546, + "step": 15138 + }, + { + "epoch": 0.4156781987918726, + "grad_norm": 0.3551958203315735, + "learning_rate": 1.796397596952338e-05, + "loss": 0.5508, + "step": 15139 + }, + { + "epoch": 0.4157056562328391, + "grad_norm": 0.35242903232574463, + "learning_rate": 1.7963714764449878e-05, + "loss": 0.4451, + "step": 15140 + }, + { + "epoch": 0.4157331136738056, + "grad_norm": 0.43241074681282043, + "learning_rate": 1.796345354452151e-05, + "loss": 0.611, + "step": 15141 + }, + { + "epoch": 0.4157605711147721, + "grad_norm": 0.43856722116470337, + "learning_rate": 1.796319230973876e-05, + "loss": 0.5704, + "step": 15142 + }, + { + "epoch": 0.4157880285557386, + "grad_norm": 0.3902216851711273, + "learning_rate": 1.796293106010211e-05, + "loss": 0.532, + "step": 15143 + }, + { + "epoch": 0.4158154859967051, + "grad_norm": 0.3251062333583832, + "learning_rate": 1.7962669795612056e-05, + "loss": 0.4372, + "step": 15144 + }, + { + "epoch": 0.4158429434376716, + "grad_norm": 0.3818252682685852, + "learning_rate": 1.796240851626908e-05, + "loss": 0.5255, + "step": 15145 + }, + { + "epoch": 0.41587040087863814, + "grad_norm": 0.3514412045478821, + "learning_rate": 1.796214722207367e-05, + "loss": 0.4287, + "step": 15146 + }, + { + "epoch": 0.41589785831960463, + "grad_norm": 0.35643914341926575, + "learning_rate": 1.7961885913026315e-05, + "loss": 0.5597, + "step": 15147 + }, + { + "epoch": 0.4159253157605711, + "grad_norm": 0.3706134259700775, + "learning_rate": 1.79616245891275e-05, + "loss": 0.4681, + "step": 15148 + }, + { + "epoch": 0.4159527732015376, + "grad_norm": 0.345575213432312, + "learning_rate": 1.7961363250377717e-05, + "loss": 0.4781, + "step": 15149 + }, + { + "epoch": 0.4159802306425041, + "grad_norm": 0.3620861768722534, + "learning_rate": 1.796110189677745e-05, + "loss": 0.5826, + "step": 15150 + }, + { + "epoch": 0.4160076880834706, + "grad_norm": 0.38967716693878174, + "learning_rate": 1.7960840528327187e-05, + "loss": 0.5056, + "step": 15151 + }, + { + "epoch": 0.4160351455244371, + "grad_norm": 0.346583753824234, + "learning_rate": 1.7960579145027414e-05, + "loss": 0.5446, + "step": 15152 + }, + { + "epoch": 0.41606260296540365, + "grad_norm": 0.392593652009964, + "learning_rate": 1.796031774687862e-05, + "loss": 0.5156, + "step": 15153 + }, + { + "epoch": 0.41609006040637014, + "grad_norm": 0.3864503800868988, + "learning_rate": 1.7960056333881296e-05, + "loss": 0.5244, + "step": 15154 + }, + { + "epoch": 0.41611751784733664, + "grad_norm": 0.4476706087589264, + "learning_rate": 1.7959794906035925e-05, + "loss": 0.4858, + "step": 15155 + }, + { + "epoch": 0.41614497528830313, + "grad_norm": 0.37372276186943054, + "learning_rate": 1.7959533463342994e-05, + "loss": 0.5523, + "step": 15156 + }, + { + "epoch": 0.4161724327292696, + "grad_norm": 0.3652246594429016, + "learning_rate": 1.7959272005803e-05, + "loss": 0.4661, + "step": 15157 + }, + { + "epoch": 0.4161998901702361, + "grad_norm": 0.3733341693878174, + "learning_rate": 1.7959010533416413e-05, + "loss": 0.5243, + "step": 15158 + }, + { + "epoch": 0.4162273476112026, + "grad_norm": 0.407382071018219, + "learning_rate": 1.7958749046183735e-05, + "loss": 0.5717, + "step": 15159 + }, + { + "epoch": 0.41625480505216916, + "grad_norm": 0.44936129450798035, + "learning_rate": 1.7958487544105453e-05, + "loss": 0.4908, + "step": 15160 + }, + { + "epoch": 0.41628226249313566, + "grad_norm": 0.42208752036094666, + "learning_rate": 1.795822602718205e-05, + "loss": 0.4351, + "step": 15161 + }, + { + "epoch": 0.41630971993410215, + "grad_norm": 0.37937131524086, + "learning_rate": 1.7957964495414014e-05, + "loss": 0.5183, + "step": 15162 + }, + { + "epoch": 0.41633717737506865, + "grad_norm": 0.31449344754219055, + "learning_rate": 1.7957702948801836e-05, + "loss": 0.461, + "step": 15163 + }, + { + "epoch": 0.41636463481603514, + "grad_norm": 0.3396669328212738, + "learning_rate": 1.7957441387346e-05, + "loss": 0.479, + "step": 15164 + }, + { + "epoch": 0.41639209225700163, + "grad_norm": 0.4005783498287201, + "learning_rate": 1.7957179811046997e-05, + "loss": 0.5654, + "step": 15165 + }, + { + "epoch": 0.4164195496979681, + "grad_norm": 0.32353830337524414, + "learning_rate": 1.7956918219905315e-05, + "loss": 0.4461, + "step": 15166 + }, + { + "epoch": 0.4164470071389347, + "grad_norm": 0.35457631945610046, + "learning_rate": 1.795665661392144e-05, + "loss": 0.5446, + "step": 15167 + }, + { + "epoch": 0.41647446457990117, + "grad_norm": 0.3853802978992462, + "learning_rate": 1.795639499309586e-05, + "loss": 0.5339, + "step": 15168 + }, + { + "epoch": 0.41650192202086767, + "grad_norm": 0.3242754340171814, + "learning_rate": 1.795613335742907e-05, + "loss": 0.4759, + "step": 15169 + }, + { + "epoch": 0.41652937946183416, + "grad_norm": 0.3706827759742737, + "learning_rate": 1.7955871706921547e-05, + "loss": 0.5446, + "step": 15170 + }, + { + "epoch": 0.41655683690280065, + "grad_norm": 0.37988412380218506, + "learning_rate": 1.7955610041573782e-05, + "loss": 0.4575, + "step": 15171 + }, + { + "epoch": 0.41658429434376715, + "grad_norm": 0.3997329771518707, + "learning_rate": 1.795534836138627e-05, + "loss": 0.5489, + "step": 15172 + }, + { + "epoch": 0.41661175178473364, + "grad_norm": 0.35064855217933655, + "learning_rate": 1.795508666635949e-05, + "loss": 0.4891, + "step": 15173 + }, + { + "epoch": 0.4166392092257002, + "grad_norm": 0.39843207597732544, + "learning_rate": 1.7954824956493936e-05, + "loss": 0.4961, + "step": 15174 + }, + { + "epoch": 0.4166666666666667, + "grad_norm": 0.39532947540283203, + "learning_rate": 1.7954563231790096e-05, + "loss": 0.5738, + "step": 15175 + }, + { + "epoch": 0.4166941241076332, + "grad_norm": 0.4294738471508026, + "learning_rate": 1.7954301492248455e-05, + "loss": 0.5237, + "step": 15176 + }, + { + "epoch": 0.4167215815485997, + "grad_norm": 0.4093337655067444, + "learning_rate": 1.79540397378695e-05, + "loss": 0.5161, + "step": 15177 + }, + { + "epoch": 0.41674903898956617, + "grad_norm": 0.8251845836639404, + "learning_rate": 1.795377796865373e-05, + "loss": 0.5037, + "step": 15178 + }, + { + "epoch": 0.41677649643053266, + "grad_norm": 0.375183641910553, + "learning_rate": 1.795351618460162e-05, + "loss": 0.44, + "step": 15179 + }, + { + "epoch": 0.41680395387149916, + "grad_norm": 0.5054689049720764, + "learning_rate": 1.7953254385713665e-05, + "loss": 0.559, + "step": 15180 + }, + { + "epoch": 0.4168314113124657, + "grad_norm": 0.34898144006729126, + "learning_rate": 1.7952992571990352e-05, + "loss": 0.4531, + "step": 15181 + }, + { + "epoch": 0.4168588687534322, + "grad_norm": 0.3627343475818634, + "learning_rate": 1.795273074343217e-05, + "loss": 0.4641, + "step": 15182 + }, + { + "epoch": 0.4168863261943987, + "grad_norm": 0.40318411588668823, + "learning_rate": 1.795246890003961e-05, + "loss": 0.4618, + "step": 15183 + }, + { + "epoch": 0.4169137836353652, + "grad_norm": 0.37886038422584534, + "learning_rate": 1.795220704181315e-05, + "loss": 0.527, + "step": 15184 + }, + { + "epoch": 0.4169412410763317, + "grad_norm": 0.3279079496860504, + "learning_rate": 1.795194516875329e-05, + "loss": 0.5095, + "step": 15185 + }, + { + "epoch": 0.4169686985172982, + "grad_norm": 0.3722805380821228, + "learning_rate": 1.7951683280860516e-05, + "loss": 0.5185, + "step": 15186 + }, + { + "epoch": 0.41699615595826467, + "grad_norm": 0.35024121403694153, + "learning_rate": 1.795142137813531e-05, + "loss": 0.5393, + "step": 15187 + }, + { + "epoch": 0.4170236133992312, + "grad_norm": 0.3796120882034302, + "learning_rate": 1.7951159460578173e-05, + "loss": 0.4707, + "step": 15188 + }, + { + "epoch": 0.4170510708401977, + "grad_norm": 0.32420870661735535, + "learning_rate": 1.795089752818958e-05, + "loss": 0.3998, + "step": 15189 + }, + { + "epoch": 0.4170785282811642, + "grad_norm": 0.35028010606765747, + "learning_rate": 1.7950635580970025e-05, + "loss": 0.4372, + "step": 15190 + }, + { + "epoch": 0.4171059857221307, + "grad_norm": 0.3392666280269623, + "learning_rate": 1.795037361892e-05, + "loss": 0.4134, + "step": 15191 + }, + { + "epoch": 0.4171334431630972, + "grad_norm": 0.3706549406051636, + "learning_rate": 1.7950111642039993e-05, + "loss": 0.5169, + "step": 15192 + }, + { + "epoch": 0.4171609006040637, + "grad_norm": 0.3842490017414093, + "learning_rate": 1.7949849650330483e-05, + "loss": 0.5704, + "step": 15193 + }, + { + "epoch": 0.4171883580450302, + "grad_norm": 0.346832275390625, + "learning_rate": 1.7949587643791972e-05, + "loss": 0.5316, + "step": 15194 + }, + { + "epoch": 0.41721581548599673, + "grad_norm": 0.4100469648838043, + "learning_rate": 1.7949325622424942e-05, + "loss": 0.6389, + "step": 15195 + }, + { + "epoch": 0.4172432729269632, + "grad_norm": 0.40443432331085205, + "learning_rate": 1.794906358622988e-05, + "loss": 0.4804, + "step": 15196 + }, + { + "epoch": 0.4172707303679297, + "grad_norm": 0.4335372745990753, + "learning_rate": 1.7948801535207278e-05, + "loss": 0.4891, + "step": 15197 + }, + { + "epoch": 0.4172981878088962, + "grad_norm": 0.4176918864250183, + "learning_rate": 1.7948539469357625e-05, + "loss": 0.6109, + "step": 15198 + }, + { + "epoch": 0.4173256452498627, + "grad_norm": 0.3630468249320984, + "learning_rate": 1.794827738868141e-05, + "loss": 0.4856, + "step": 15199 + }, + { + "epoch": 0.4173531026908292, + "grad_norm": 0.3530628979206085, + "learning_rate": 1.794801529317912e-05, + "loss": 0.4325, + "step": 15200 + }, + { + "epoch": 0.4173805601317957, + "grad_norm": 0.4143669903278351, + "learning_rate": 1.7947753182851248e-05, + "loss": 0.5734, + "step": 15201 + }, + { + "epoch": 0.4174080175727622, + "grad_norm": 0.3700553774833679, + "learning_rate": 1.794749105769827e-05, + "loss": 0.5355, + "step": 15202 + }, + { + "epoch": 0.41743547501372874, + "grad_norm": 0.38821202516555786, + "learning_rate": 1.7947228917720693e-05, + "loss": 0.4863, + "step": 15203 + }, + { + "epoch": 0.41746293245469523, + "grad_norm": 0.4122658967971802, + "learning_rate": 1.7946966762918997e-05, + "loss": 0.5516, + "step": 15204 + }, + { + "epoch": 0.41749038989566173, + "grad_norm": 0.49512141942977905, + "learning_rate": 1.794670459329367e-05, + "loss": 0.6327, + "step": 15205 + }, + { + "epoch": 0.4175178473366282, + "grad_norm": 0.2973226010799408, + "learning_rate": 1.7946442408845205e-05, + "loss": 0.4343, + "step": 15206 + }, + { + "epoch": 0.4175453047775947, + "grad_norm": 0.38532623648643494, + "learning_rate": 1.7946180209574084e-05, + "loss": 0.4863, + "step": 15207 + }, + { + "epoch": 0.4175727622185612, + "grad_norm": 0.31764155626296997, + "learning_rate": 1.7945917995480804e-05, + "loss": 0.4439, + "step": 15208 + }, + { + "epoch": 0.4176002196595277, + "grad_norm": 0.34586647152900696, + "learning_rate": 1.794565576656585e-05, + "loss": 0.457, + "step": 15209 + }, + { + "epoch": 0.41762767710049425, + "grad_norm": 0.3692816495895386, + "learning_rate": 1.794539352282971e-05, + "loss": 0.4893, + "step": 15210 + }, + { + "epoch": 0.41765513454146075, + "grad_norm": 0.41298091411590576, + "learning_rate": 1.794513126427288e-05, + "loss": 0.5939, + "step": 15211 + }, + { + "epoch": 0.41768259198242724, + "grad_norm": 0.37909144163131714, + "learning_rate": 1.7944868990895843e-05, + "loss": 0.5114, + "step": 15212 + }, + { + "epoch": 0.41771004942339374, + "grad_norm": 0.37444770336151123, + "learning_rate": 1.794460670269909e-05, + "loss": 0.5066, + "step": 15213 + }, + { + "epoch": 0.41773750686436023, + "grad_norm": 0.3567681908607483, + "learning_rate": 1.794434439968311e-05, + "loss": 0.4792, + "step": 15214 + }, + { + "epoch": 0.4177649643053267, + "grad_norm": 0.3916453719139099, + "learning_rate": 1.7944082081848388e-05, + "loss": 0.578, + "step": 15215 + }, + { + "epoch": 0.4177924217462932, + "grad_norm": 0.3343442380428314, + "learning_rate": 1.794381974919542e-05, + "loss": 0.4567, + "step": 15216 + }, + { + "epoch": 0.41781987918725977, + "grad_norm": 0.33129844069480896, + "learning_rate": 1.7943557401724692e-05, + "loss": 0.4132, + "step": 15217 + }, + { + "epoch": 0.41784733662822626, + "grad_norm": 0.3678651750087738, + "learning_rate": 1.7943295039436693e-05, + "loss": 0.574, + "step": 15218 + }, + { + "epoch": 0.41787479406919276, + "grad_norm": 0.36911168694496155, + "learning_rate": 1.794303266233192e-05, + "loss": 0.4939, + "step": 15219 + }, + { + "epoch": 0.41790225151015925, + "grad_norm": 0.38076600432395935, + "learning_rate": 1.794277027041085e-05, + "loss": 0.5287, + "step": 15220 + }, + { + "epoch": 0.41792970895112574, + "grad_norm": 0.3355116844177246, + "learning_rate": 1.794250786367398e-05, + "loss": 0.4739, + "step": 15221 + }, + { + "epoch": 0.41795716639209224, + "grad_norm": 0.3640083372592926, + "learning_rate": 1.79422454421218e-05, + "loss": 0.5097, + "step": 15222 + }, + { + "epoch": 0.41798462383305873, + "grad_norm": 0.3776264190673828, + "learning_rate": 1.794198300575479e-05, + "loss": 0.6441, + "step": 15223 + }, + { + "epoch": 0.4180120812740253, + "grad_norm": 0.3572663962841034, + "learning_rate": 1.7941720554573453e-05, + "loss": 0.4427, + "step": 15224 + }, + { + "epoch": 0.4180395387149918, + "grad_norm": 0.31871452927589417, + "learning_rate": 1.7941458088578273e-05, + "loss": 0.5184, + "step": 15225 + }, + { + "epoch": 0.41806699615595827, + "grad_norm": 0.3544771075248718, + "learning_rate": 1.7941195607769738e-05, + "loss": 0.5256, + "step": 15226 + }, + { + "epoch": 0.41809445359692476, + "grad_norm": 0.505176305770874, + "learning_rate": 1.7940933112148338e-05, + "loss": 0.443, + "step": 15227 + }, + { + "epoch": 0.41812191103789126, + "grad_norm": 0.33331599831581116, + "learning_rate": 1.7940670601714564e-05, + "loss": 0.4649, + "step": 15228 + }, + { + "epoch": 0.41814936847885775, + "grad_norm": 0.4302249550819397, + "learning_rate": 1.79404080764689e-05, + "loss": 0.5332, + "step": 15229 + }, + { + "epoch": 0.41817682591982425, + "grad_norm": 0.35126766562461853, + "learning_rate": 1.7940145536411848e-05, + "loss": 0.5168, + "step": 15230 + }, + { + "epoch": 0.4182042833607908, + "grad_norm": 0.3519653379917145, + "learning_rate": 1.7939882981543888e-05, + "loss": 0.4752, + "step": 15231 + }, + { + "epoch": 0.4182317408017573, + "grad_norm": 0.43141913414001465, + "learning_rate": 1.7939620411865513e-05, + "loss": 0.5442, + "step": 15232 + }, + { + "epoch": 0.4182591982427238, + "grad_norm": 0.3695811629295349, + "learning_rate": 1.7939357827377208e-05, + "loss": 0.4541, + "step": 15233 + }, + { + "epoch": 0.4182866556836903, + "grad_norm": 0.5064337253570557, + "learning_rate": 1.793909522807947e-05, + "loss": 0.5272, + "step": 15234 + }, + { + "epoch": 0.4183141131246568, + "grad_norm": 0.5421488881111145, + "learning_rate": 1.7938832613972787e-05, + "loss": 0.4435, + "step": 15235 + }, + { + "epoch": 0.41834157056562327, + "grad_norm": 0.3841310739517212, + "learning_rate": 1.7938569985057643e-05, + "loss": 0.5321, + "step": 15236 + }, + { + "epoch": 0.41836902800658976, + "grad_norm": 0.38323256373405457, + "learning_rate": 1.7938307341334535e-05, + "loss": 0.484, + "step": 15237 + }, + { + "epoch": 0.4183964854475563, + "grad_norm": 0.436128705739975, + "learning_rate": 1.793804468280395e-05, + "loss": 0.4946, + "step": 15238 + }, + { + "epoch": 0.4184239428885228, + "grad_norm": 0.34347254037857056, + "learning_rate": 1.793778200946638e-05, + "loss": 0.4633, + "step": 15239 + }, + { + "epoch": 0.4184514003294893, + "grad_norm": 0.4188711643218994, + "learning_rate": 1.793751932132231e-05, + "loss": 0.5629, + "step": 15240 + }, + { + "epoch": 0.4184788577704558, + "grad_norm": 0.4226456880569458, + "learning_rate": 1.793725661837223e-05, + "loss": 0.5321, + "step": 15241 + }, + { + "epoch": 0.4185063152114223, + "grad_norm": 0.37490054965019226, + "learning_rate": 1.793699390061664e-05, + "loss": 0.5362, + "step": 15242 + }, + { + "epoch": 0.4185337726523888, + "grad_norm": 0.4089394211769104, + "learning_rate": 1.7936731168056018e-05, + "loss": 0.4764, + "step": 15243 + }, + { + "epoch": 0.4185612300933553, + "grad_norm": 0.3971954882144928, + "learning_rate": 1.7936468420690866e-05, + "loss": 0.6009, + "step": 15244 + }, + { + "epoch": 0.4185886875343218, + "grad_norm": 0.4038858413696289, + "learning_rate": 1.7936205658521658e-05, + "loss": 0.4817, + "step": 15245 + }, + { + "epoch": 0.4186161449752883, + "grad_norm": 1.6984504461288452, + "learning_rate": 1.79359428815489e-05, + "loss": 0.5065, + "step": 15246 + }, + { + "epoch": 0.4186436024162548, + "grad_norm": 0.34362056851387024, + "learning_rate": 1.7935680089773075e-05, + "loss": 0.4995, + "step": 15247 + }, + { + "epoch": 0.4186710598572213, + "grad_norm": 0.39992356300354004, + "learning_rate": 1.7935417283194672e-05, + "loss": 0.5487, + "step": 15248 + }, + { + "epoch": 0.4186985172981878, + "grad_norm": 0.3988901674747467, + "learning_rate": 1.7935154461814186e-05, + "loss": 0.5087, + "step": 15249 + }, + { + "epoch": 0.4187259747391543, + "grad_norm": 0.41481512784957886, + "learning_rate": 1.7934891625632103e-05, + "loss": 0.5348, + "step": 15250 + }, + { + "epoch": 0.4187534321801208, + "grad_norm": 0.40403103828430176, + "learning_rate": 1.7934628774648913e-05, + "loss": 0.4787, + "step": 15251 + }, + { + "epoch": 0.41878088962108734, + "grad_norm": 0.4502447545528412, + "learning_rate": 1.793436590886511e-05, + "loss": 0.4711, + "step": 15252 + }, + { + "epoch": 0.41880834706205383, + "grad_norm": 0.4025229215621948, + "learning_rate": 1.793410302828118e-05, + "loss": 0.6034, + "step": 15253 + }, + { + "epoch": 0.4188358045030203, + "grad_norm": 0.33284130692481995, + "learning_rate": 1.7933840132897617e-05, + "loss": 0.472, + "step": 15254 + }, + { + "epoch": 0.4188632619439868, + "grad_norm": 0.37839657068252563, + "learning_rate": 1.7933577222714907e-05, + "loss": 0.479, + "step": 15255 + }, + { + "epoch": 0.4188907193849533, + "grad_norm": 0.35313835740089417, + "learning_rate": 1.793331429773355e-05, + "loss": 0.5265, + "step": 15256 + }, + { + "epoch": 0.4189181768259198, + "grad_norm": 0.42182794213294983, + "learning_rate": 1.7933051357954025e-05, + "loss": 0.5942, + "step": 15257 + }, + { + "epoch": 0.4189456342668863, + "grad_norm": 0.4152265191078186, + "learning_rate": 1.7932788403376825e-05, + "loss": 0.5921, + "step": 15258 + }, + { + "epoch": 0.41897309170785285, + "grad_norm": 0.32745838165283203, + "learning_rate": 1.7932525434002447e-05, + "loss": 0.4748, + "step": 15259 + }, + { + "epoch": 0.41900054914881935, + "grad_norm": 0.38718685507774353, + "learning_rate": 1.7932262449831375e-05, + "loss": 0.4928, + "step": 15260 + }, + { + "epoch": 0.41902800658978584, + "grad_norm": 0.37362417578697205, + "learning_rate": 1.7931999450864106e-05, + "loss": 0.5379, + "step": 15261 + }, + { + "epoch": 0.41905546403075233, + "grad_norm": 0.3261251151561737, + "learning_rate": 1.7931736437101125e-05, + "loss": 0.4674, + "step": 15262 + }, + { + "epoch": 0.41908292147171883, + "grad_norm": 0.37144577503204346, + "learning_rate": 1.793147340854292e-05, + "loss": 0.5437, + "step": 15263 + }, + { + "epoch": 0.4191103789126853, + "grad_norm": 0.3454795181751251, + "learning_rate": 1.7931210365189988e-05, + "loss": 0.4516, + "step": 15264 + }, + { + "epoch": 0.4191378363536518, + "grad_norm": 0.3380047082901001, + "learning_rate": 1.7930947307042816e-05, + "loss": 0.4582, + "step": 15265 + }, + { + "epoch": 0.41916529379461837, + "grad_norm": 0.36208897829055786, + "learning_rate": 1.79306842341019e-05, + "loss": 0.5467, + "step": 15266 + }, + { + "epoch": 0.41919275123558486, + "grad_norm": 0.40777358412742615, + "learning_rate": 1.7930421146367725e-05, + "loss": 0.4365, + "step": 15267 + }, + { + "epoch": 0.41922020867655135, + "grad_norm": 0.36325693130493164, + "learning_rate": 1.793015804384078e-05, + "loss": 0.5694, + "step": 15268 + }, + { + "epoch": 0.41924766611751785, + "grad_norm": 0.4155294895172119, + "learning_rate": 1.792989492652156e-05, + "loss": 0.6737, + "step": 15269 + }, + { + "epoch": 0.41927512355848434, + "grad_norm": 0.3594723641872406, + "learning_rate": 1.792963179441056e-05, + "loss": 0.5172, + "step": 15270 + }, + { + "epoch": 0.41930258099945084, + "grad_norm": 0.35906028747558594, + "learning_rate": 1.7929368647508262e-05, + "loss": 0.5083, + "step": 15271 + }, + { + "epoch": 0.41933003844041733, + "grad_norm": 0.35050269961357117, + "learning_rate": 1.7929105485815162e-05, + "loss": 0.5137, + "step": 15272 + }, + { + "epoch": 0.4193574958813839, + "grad_norm": 0.32556068897247314, + "learning_rate": 1.792884230933175e-05, + "loss": 0.4946, + "step": 15273 + }, + { + "epoch": 0.4193849533223504, + "grad_norm": 0.34760379791259766, + "learning_rate": 1.7928579118058516e-05, + "loss": 0.4721, + "step": 15274 + }, + { + "epoch": 0.41941241076331687, + "grad_norm": 0.3533351421356201, + "learning_rate": 1.792831591199595e-05, + "loss": 0.4822, + "step": 15275 + }, + { + "epoch": 0.41943986820428336, + "grad_norm": 0.39869123697280884, + "learning_rate": 1.7928052691144544e-05, + "loss": 0.528, + "step": 15276 + }, + { + "epoch": 0.41946732564524986, + "grad_norm": 0.40949746966362, + "learning_rate": 1.7927789455504793e-05, + "loss": 0.4982, + "step": 15277 + }, + { + "epoch": 0.41949478308621635, + "grad_norm": 0.36607569456100464, + "learning_rate": 1.792752620507718e-05, + "loss": 0.4927, + "step": 15278 + }, + { + "epoch": 0.41952224052718284, + "grad_norm": 0.5324241518974304, + "learning_rate": 1.7927262939862205e-05, + "loss": 0.5616, + "step": 15279 + }, + { + "epoch": 0.4195496979681494, + "grad_norm": 0.3106774687767029, + "learning_rate": 1.792699965986035e-05, + "loss": 0.4386, + "step": 15280 + }, + { + "epoch": 0.4195771554091159, + "grad_norm": 0.5006164908409119, + "learning_rate": 1.7926736365072116e-05, + "loss": 0.5248, + "step": 15281 + }, + { + "epoch": 0.4196046128500824, + "grad_norm": 0.45002618432044983, + "learning_rate": 1.7926473055497983e-05, + "loss": 0.5205, + "step": 15282 + }, + { + "epoch": 0.4196320702910489, + "grad_norm": 0.41641536355018616, + "learning_rate": 1.792620973113845e-05, + "loss": 0.5341, + "step": 15283 + }, + { + "epoch": 0.41965952773201537, + "grad_norm": 0.35450318455696106, + "learning_rate": 1.792594639199401e-05, + "loss": 0.5257, + "step": 15284 + }, + { + "epoch": 0.41968698517298186, + "grad_norm": 0.34146779775619507, + "learning_rate": 1.7925683038065146e-05, + "loss": 0.4896, + "step": 15285 + }, + { + "epoch": 0.41971444261394836, + "grad_norm": 0.39708206057548523, + "learning_rate": 1.7925419669352352e-05, + "loss": 0.5104, + "step": 15286 + }, + { + "epoch": 0.4197419000549149, + "grad_norm": 0.5250860452651978, + "learning_rate": 1.7925156285856125e-05, + "loss": 0.4735, + "step": 15287 + }, + { + "epoch": 0.4197693574958814, + "grad_norm": 0.40989360213279724, + "learning_rate": 1.792489288757695e-05, + "loss": 0.55, + "step": 15288 + }, + { + "epoch": 0.4197968149368479, + "grad_norm": 0.37427303194999695, + "learning_rate": 1.792462947451532e-05, + "loss": 0.4791, + "step": 15289 + }, + { + "epoch": 0.4198242723778144, + "grad_norm": 0.4145068824291229, + "learning_rate": 1.792436604667173e-05, + "loss": 0.572, + "step": 15290 + }, + { + "epoch": 0.4198517298187809, + "grad_norm": 0.33887937664985657, + "learning_rate": 1.7924102604046664e-05, + "loss": 0.4886, + "step": 15291 + }, + { + "epoch": 0.4198791872597474, + "grad_norm": 0.3504195511341095, + "learning_rate": 1.792383914664062e-05, + "loss": 0.5077, + "step": 15292 + }, + { + "epoch": 0.41990664470071387, + "grad_norm": 0.3542267680168152, + "learning_rate": 1.7923575674454084e-05, + "loss": 0.453, + "step": 15293 + }, + { + "epoch": 0.4199341021416804, + "grad_norm": 0.35250815749168396, + "learning_rate": 1.7923312187487556e-05, + "loss": 0.4122, + "step": 15294 + }, + { + "epoch": 0.4199615595826469, + "grad_norm": 0.3564620018005371, + "learning_rate": 1.792304868574152e-05, + "loss": 0.4968, + "step": 15295 + }, + { + "epoch": 0.4199890170236134, + "grad_norm": 0.3526124358177185, + "learning_rate": 1.7922785169216464e-05, + "loss": 0.4621, + "step": 15296 + }, + { + "epoch": 0.4200164744645799, + "grad_norm": 0.41022413969039917, + "learning_rate": 1.792252163791289e-05, + "loss": 0.5179, + "step": 15297 + }, + { + "epoch": 0.4200439319055464, + "grad_norm": 0.417562335729599, + "learning_rate": 1.7922258091831285e-05, + "loss": 0.4451, + "step": 15298 + }, + { + "epoch": 0.4200713893465129, + "grad_norm": 0.3519323170185089, + "learning_rate": 1.7921994530972136e-05, + "loss": 0.4995, + "step": 15299 + }, + { + "epoch": 0.4200988467874794, + "grad_norm": 0.3477555513381958, + "learning_rate": 1.7921730955335946e-05, + "loss": 0.474, + "step": 15300 + }, + { + "epoch": 0.42012630422844593, + "grad_norm": 0.32916495203971863, + "learning_rate": 1.7921467364923194e-05, + "loss": 0.405, + "step": 15301 + }, + { + "epoch": 0.42015376166941243, + "grad_norm": 0.3810907304286957, + "learning_rate": 1.792120375973438e-05, + "loss": 0.5376, + "step": 15302 + }, + { + "epoch": 0.4201812191103789, + "grad_norm": 0.40240195393562317, + "learning_rate": 1.792094013976999e-05, + "loss": 0.5426, + "step": 15303 + }, + { + "epoch": 0.4202086765513454, + "grad_norm": 0.41851550340652466, + "learning_rate": 1.792067650503052e-05, + "loss": 0.5311, + "step": 15304 + }, + { + "epoch": 0.4202361339923119, + "grad_norm": 0.3339415490627289, + "learning_rate": 1.7920412855516458e-05, + "loss": 0.4778, + "step": 15305 + }, + { + "epoch": 0.4202635914332784, + "grad_norm": 0.38420525193214417, + "learning_rate": 1.7920149191228298e-05, + "loss": 0.4601, + "step": 15306 + }, + { + "epoch": 0.4202910488742449, + "grad_norm": 0.383918434381485, + "learning_rate": 1.791988551216654e-05, + "loss": 0.5169, + "step": 15307 + }, + { + "epoch": 0.42031850631521145, + "grad_norm": 0.376121461391449, + "learning_rate": 1.7919621818331657e-05, + "loss": 0.5187, + "step": 15308 + }, + { + "epoch": 0.42034596375617794, + "grad_norm": 0.46004611253738403, + "learning_rate": 1.7919358109724155e-05, + "loss": 0.548, + "step": 15309 + }, + { + "epoch": 0.42037342119714444, + "grad_norm": 0.3626701235771179, + "learning_rate": 1.7919094386344527e-05, + "loss": 0.5422, + "step": 15310 + }, + { + "epoch": 0.42040087863811093, + "grad_norm": 0.4068359136581421, + "learning_rate": 1.7918830648193258e-05, + "loss": 0.5432, + "step": 15311 + }, + { + "epoch": 0.4204283360790774, + "grad_norm": 0.4663786292076111, + "learning_rate": 1.7918566895270842e-05, + "loss": 0.5329, + "step": 15312 + }, + { + "epoch": 0.4204557935200439, + "grad_norm": 0.38895300030708313, + "learning_rate": 1.791830312757777e-05, + "loss": 0.4918, + "step": 15313 + }, + { + "epoch": 0.4204832509610104, + "grad_norm": 0.3841363489627838, + "learning_rate": 1.791803934511454e-05, + "loss": 0.5713, + "step": 15314 + }, + { + "epoch": 0.42051070840197696, + "grad_norm": 0.5920829772949219, + "learning_rate": 1.7917775547881634e-05, + "loss": 0.554, + "step": 15315 + }, + { + "epoch": 0.42053816584294346, + "grad_norm": 0.34538766741752625, + "learning_rate": 1.7917511735879554e-05, + "loss": 0.5213, + "step": 15316 + }, + { + "epoch": 0.42056562328390995, + "grad_norm": 0.6428372859954834, + "learning_rate": 1.7917247909108784e-05, + "loss": 0.5185, + "step": 15317 + }, + { + "epoch": 0.42059308072487644, + "grad_norm": 0.39903077483177185, + "learning_rate": 1.791698406756982e-05, + "loss": 0.4288, + "step": 15318 + }, + { + "epoch": 0.42062053816584294, + "grad_norm": 0.3894887864589691, + "learning_rate": 1.7916720211263155e-05, + "loss": 0.5335, + "step": 15319 + }, + { + "epoch": 0.42064799560680943, + "grad_norm": 0.3702535331249237, + "learning_rate": 1.7916456340189282e-05, + "loss": 0.5531, + "step": 15320 + }, + { + "epoch": 0.4206754530477759, + "grad_norm": 0.434081107378006, + "learning_rate": 1.7916192454348687e-05, + "loss": 0.4826, + "step": 15321 + }, + { + "epoch": 0.4207029104887425, + "grad_norm": 0.4079924523830414, + "learning_rate": 1.7915928553741873e-05, + "loss": 0.5603, + "step": 15322 + }, + { + "epoch": 0.42073036792970897, + "grad_norm": 0.38439348340034485, + "learning_rate": 1.7915664638369322e-05, + "loss": 0.5684, + "step": 15323 + }, + { + "epoch": 0.42075782537067546, + "grad_norm": 0.350631982088089, + "learning_rate": 1.7915400708231526e-05, + "loss": 0.4964, + "step": 15324 + }, + { + "epoch": 0.42078528281164196, + "grad_norm": 0.3776085376739502, + "learning_rate": 1.7915136763328988e-05, + "loss": 0.5277, + "step": 15325 + }, + { + "epoch": 0.42081274025260845, + "grad_norm": 0.3424610197544098, + "learning_rate": 1.7914872803662187e-05, + "loss": 0.5034, + "step": 15326 + }, + { + "epoch": 0.42084019769357495, + "grad_norm": 0.3800703287124634, + "learning_rate": 1.791460882923163e-05, + "loss": 0.475, + "step": 15327 + }, + { + "epoch": 0.42086765513454144, + "grad_norm": 0.36962002515792847, + "learning_rate": 1.7914344840037796e-05, + "loss": 0.5914, + "step": 15328 + }, + { + "epoch": 0.420895112575508, + "grad_norm": 0.4176870882511139, + "learning_rate": 1.7914080836081184e-05, + "loss": 0.4664, + "step": 15329 + }, + { + "epoch": 0.4209225700164745, + "grad_norm": 0.6321670413017273, + "learning_rate": 1.791381681736229e-05, + "loss": 0.5973, + "step": 15330 + }, + { + "epoch": 0.420950027457441, + "grad_norm": 0.38666069507598877, + "learning_rate": 1.7913552783881596e-05, + "loss": 0.5608, + "step": 15331 + }, + { + "epoch": 0.4209774848984075, + "grad_norm": 0.3821503221988678, + "learning_rate": 1.79132887356396e-05, + "loss": 0.5837, + "step": 15332 + }, + { + "epoch": 0.42100494233937397, + "grad_norm": 0.3400202691555023, + "learning_rate": 1.7913024672636797e-05, + "loss": 0.4959, + "step": 15333 + }, + { + "epoch": 0.42103239978034046, + "grad_norm": 0.4021459221839905, + "learning_rate": 1.7912760594873678e-05, + "loss": 0.6059, + "step": 15334 + }, + { + "epoch": 0.42105985722130695, + "grad_norm": 0.3987073302268982, + "learning_rate": 1.7912496502350735e-05, + "loss": 0.5928, + "step": 15335 + }, + { + "epoch": 0.42108731466227345, + "grad_norm": 0.3892855644226074, + "learning_rate": 1.791223239506846e-05, + "loss": 0.558, + "step": 15336 + }, + { + "epoch": 0.42111477210324, + "grad_norm": 0.4224264323711395, + "learning_rate": 1.791196827302735e-05, + "loss": 0.5957, + "step": 15337 + }, + { + "epoch": 0.4211422295442065, + "grad_norm": 0.35344892740249634, + "learning_rate": 1.7911704136227886e-05, + "loss": 0.5151, + "step": 15338 + }, + { + "epoch": 0.421169686985173, + "grad_norm": 0.3787522614002228, + "learning_rate": 1.7911439984670573e-05, + "loss": 0.5328, + "step": 15339 + }, + { + "epoch": 0.4211971444261395, + "grad_norm": 0.3843385577201843, + "learning_rate": 1.79111758183559e-05, + "loss": 0.4545, + "step": 15340 + }, + { + "epoch": 0.421224601867106, + "grad_norm": 0.3930096924304962, + "learning_rate": 1.791091163728436e-05, + "loss": 0.5094, + "step": 15341 + }, + { + "epoch": 0.42125205930807247, + "grad_norm": 0.3682202696800232, + "learning_rate": 1.7910647441456445e-05, + "loss": 0.4227, + "step": 15342 + }, + { + "epoch": 0.42127951674903896, + "grad_norm": 0.323743611574173, + "learning_rate": 1.7910383230872644e-05, + "loss": 0.4881, + "step": 15343 + }, + { + "epoch": 0.4213069741900055, + "grad_norm": 0.4034160077571869, + "learning_rate": 1.7910119005533455e-05, + "loss": 0.4424, + "step": 15344 + }, + { + "epoch": 0.421334431630972, + "grad_norm": 0.37191087007522583, + "learning_rate": 1.7909854765439368e-05, + "loss": 0.5566, + "step": 15345 + }, + { + "epoch": 0.4213618890719385, + "grad_norm": 0.39881840348243713, + "learning_rate": 1.7909590510590883e-05, + "loss": 0.5544, + "step": 15346 + }, + { + "epoch": 0.421389346512905, + "grad_norm": 0.37077686190605164, + "learning_rate": 1.7909326240988482e-05, + "loss": 0.5477, + "step": 15347 + }, + { + "epoch": 0.4214168039538715, + "grad_norm": 0.3782655596733093, + "learning_rate": 1.7909061956632666e-05, + "loss": 0.5419, + "step": 15348 + }, + { + "epoch": 0.421444261394838, + "grad_norm": 0.36843228340148926, + "learning_rate": 1.7908797657523924e-05, + "loss": 0.5064, + "step": 15349 + }, + { + "epoch": 0.4214717188358045, + "grad_norm": 0.40659844875335693, + "learning_rate": 1.790853334366275e-05, + "loss": 0.5379, + "step": 15350 + }, + { + "epoch": 0.421499176276771, + "grad_norm": 0.47023528814315796, + "learning_rate": 1.790826901504964e-05, + "loss": 0.4699, + "step": 15351 + }, + { + "epoch": 0.4215266337177375, + "grad_norm": 0.4117908775806427, + "learning_rate": 1.790800467168508e-05, + "loss": 0.4703, + "step": 15352 + }, + { + "epoch": 0.421554091158704, + "grad_norm": 0.36200663447380066, + "learning_rate": 1.790774031356957e-05, + "loss": 0.5392, + "step": 15353 + }, + { + "epoch": 0.4215815485996705, + "grad_norm": 0.38265615701675415, + "learning_rate": 1.79074759407036e-05, + "loss": 0.4552, + "step": 15354 + }, + { + "epoch": 0.421609006040637, + "grad_norm": 0.42888346314430237, + "learning_rate": 1.790721155308766e-05, + "loss": 0.566, + "step": 15355 + }, + { + "epoch": 0.4216364634816035, + "grad_norm": 0.4274563789367676, + "learning_rate": 1.790694715072225e-05, + "loss": 0.4981, + "step": 15356 + }, + { + "epoch": 0.42166392092257, + "grad_norm": 0.40366658568382263, + "learning_rate": 1.790668273360786e-05, + "loss": 0.5369, + "step": 15357 + }, + { + "epoch": 0.42169137836353654, + "grad_norm": 0.32469499111175537, + "learning_rate": 1.7906418301744982e-05, + "loss": 0.4649, + "step": 15358 + }, + { + "epoch": 0.42171883580450303, + "grad_norm": 0.42720773816108704, + "learning_rate": 1.7906153855134116e-05, + "loss": 0.5071, + "step": 15359 + }, + { + "epoch": 0.42174629324546953, + "grad_norm": 0.3534640669822693, + "learning_rate": 1.790588939377574e-05, + "loss": 0.4536, + "step": 15360 + }, + { + "epoch": 0.421773750686436, + "grad_norm": 0.38092005252838135, + "learning_rate": 1.7905624917670365e-05, + "loss": 0.4192, + "step": 15361 + }, + { + "epoch": 0.4218012081274025, + "grad_norm": 0.38518914580345154, + "learning_rate": 1.7905360426818476e-05, + "loss": 0.5292, + "step": 15362 + }, + { + "epoch": 0.421828665568369, + "grad_norm": 0.3618248999118805, + "learning_rate": 1.790509592122056e-05, + "loss": 0.4919, + "step": 15363 + }, + { + "epoch": 0.4218561230093355, + "grad_norm": 0.36720922589302063, + "learning_rate": 1.7904831400877123e-05, + "loss": 0.4811, + "step": 15364 + }, + { + "epoch": 0.42188358045030205, + "grad_norm": 0.3925595283508301, + "learning_rate": 1.790456686578865e-05, + "loss": 0.5223, + "step": 15365 + }, + { + "epoch": 0.42191103789126855, + "grad_norm": 0.35651329159736633, + "learning_rate": 1.790430231595564e-05, + "loss": 0.4053, + "step": 15366 + }, + { + "epoch": 0.42193849533223504, + "grad_norm": 0.42494189739227295, + "learning_rate": 1.790403775137858e-05, + "loss": 0.6033, + "step": 15367 + }, + { + "epoch": 0.42196595277320154, + "grad_norm": 0.4613994359970093, + "learning_rate": 1.7903773172057968e-05, + "loss": 0.485, + "step": 15368 + }, + { + "epoch": 0.42199341021416803, + "grad_norm": 0.45521706342697144, + "learning_rate": 1.7903508577994295e-05, + "loss": 0.4903, + "step": 15369 + }, + { + "epoch": 0.4220208676551345, + "grad_norm": 0.3662005662918091, + "learning_rate": 1.790324396918806e-05, + "loss": 0.5266, + "step": 15370 + }, + { + "epoch": 0.422048325096101, + "grad_norm": 0.3900262117385864, + "learning_rate": 1.790297934563975e-05, + "loss": 0.5122, + "step": 15371 + }, + { + "epoch": 0.42207578253706757, + "grad_norm": 0.38697460293769836, + "learning_rate": 1.790271470734986e-05, + "loss": 0.4911, + "step": 15372 + }, + { + "epoch": 0.42210323997803406, + "grad_norm": 0.35743212699890137, + "learning_rate": 1.7902450054318888e-05, + "loss": 0.4832, + "step": 15373 + }, + { + "epoch": 0.42213069741900056, + "grad_norm": 0.34955212473869324, + "learning_rate": 1.7902185386547323e-05, + "loss": 0.5833, + "step": 15374 + }, + { + "epoch": 0.42215815485996705, + "grad_norm": 0.37907874584198, + "learning_rate": 1.790192070403566e-05, + "loss": 0.5617, + "step": 15375 + }, + { + "epoch": 0.42218561230093354, + "grad_norm": 0.32797345519065857, + "learning_rate": 1.7901656006784393e-05, + "loss": 0.5617, + "step": 15376 + }, + { + "epoch": 0.42221306974190004, + "grad_norm": 0.3716447055339813, + "learning_rate": 1.7901391294794015e-05, + "loss": 0.6241, + "step": 15377 + }, + { + "epoch": 0.42224052718286653, + "grad_norm": 0.3724316954612732, + "learning_rate": 1.790112656806502e-05, + "loss": 0.5477, + "step": 15378 + }, + { + "epoch": 0.4222679846238331, + "grad_norm": 0.3421581983566284, + "learning_rate": 1.79008618265979e-05, + "loss": 0.4287, + "step": 15379 + }, + { + "epoch": 0.4222954420647996, + "grad_norm": 0.43447670340538025, + "learning_rate": 1.7900597070393155e-05, + "loss": 0.5452, + "step": 15380 + }, + { + "epoch": 0.42232289950576607, + "grad_norm": 0.4265252351760864, + "learning_rate": 1.7900332299451276e-05, + "loss": 0.5786, + "step": 15381 + }, + { + "epoch": 0.42235035694673256, + "grad_norm": 0.33777397871017456, + "learning_rate": 1.790006751377275e-05, + "loss": 0.504, + "step": 15382 + }, + { + "epoch": 0.42237781438769906, + "grad_norm": 0.35328271985054016, + "learning_rate": 1.7899802713358083e-05, + "loss": 0.5487, + "step": 15383 + }, + { + "epoch": 0.42240527182866555, + "grad_norm": 0.4235985279083252, + "learning_rate": 1.789953789820776e-05, + "loss": 0.4682, + "step": 15384 + }, + { + "epoch": 0.42243272926963205, + "grad_norm": 0.3709211051464081, + "learning_rate": 1.7899273068322277e-05, + "loss": 0.5537, + "step": 15385 + }, + { + "epoch": 0.4224601867105986, + "grad_norm": 0.3916857838630676, + "learning_rate": 1.7899008223702128e-05, + "loss": 0.5705, + "step": 15386 + }, + { + "epoch": 0.4224876441515651, + "grad_norm": 0.29270896315574646, + "learning_rate": 1.789874336434781e-05, + "loss": 0.4497, + "step": 15387 + }, + { + "epoch": 0.4225151015925316, + "grad_norm": 0.35953375697135925, + "learning_rate": 1.789847849025981e-05, + "loss": 0.5651, + "step": 15388 + }, + { + "epoch": 0.4225425590334981, + "grad_norm": 0.3887077271938324, + "learning_rate": 1.789821360143863e-05, + "loss": 0.5316, + "step": 15389 + }, + { + "epoch": 0.42257001647446457, + "grad_norm": 0.33636271953582764, + "learning_rate": 1.7897948697884765e-05, + "loss": 0.4817, + "step": 15390 + }, + { + "epoch": 0.42259747391543107, + "grad_norm": 0.3465147316455841, + "learning_rate": 1.78976837795987e-05, + "loss": 0.5053, + "step": 15391 + }, + { + "epoch": 0.42262493135639756, + "grad_norm": 0.3718602657318115, + "learning_rate": 1.7897418846580935e-05, + "loss": 0.5897, + "step": 15392 + }, + { + "epoch": 0.4226523887973641, + "grad_norm": 0.3765242397785187, + "learning_rate": 1.7897153898831963e-05, + "loss": 0.5821, + "step": 15393 + }, + { + "epoch": 0.4226798462383306, + "grad_norm": 0.43361595273017883, + "learning_rate": 1.7896888936352275e-05, + "loss": 0.5992, + "step": 15394 + }, + { + "epoch": 0.4227073036792971, + "grad_norm": 0.3090726137161255, + "learning_rate": 1.7896623959142373e-05, + "loss": 0.4644, + "step": 15395 + }, + { + "epoch": 0.4227347611202636, + "grad_norm": 0.3916003406047821, + "learning_rate": 1.789635896720275e-05, + "loss": 0.5432, + "step": 15396 + }, + { + "epoch": 0.4227622185612301, + "grad_norm": 0.38945290446281433, + "learning_rate": 1.7896093960533892e-05, + "loss": 0.5448, + "step": 15397 + }, + { + "epoch": 0.4227896760021966, + "grad_norm": 0.3650817275047302, + "learning_rate": 1.78958289391363e-05, + "loss": 0.483, + "step": 15398 + }, + { + "epoch": 0.4228171334431631, + "grad_norm": 0.4364616274833679, + "learning_rate": 1.789556390301047e-05, + "loss": 0.4326, + "step": 15399 + }, + { + "epoch": 0.4228445908841296, + "grad_norm": 0.3795132040977478, + "learning_rate": 1.789529885215689e-05, + "loss": 0.4673, + "step": 15400 + }, + { + "epoch": 0.4228720483250961, + "grad_norm": 0.3504631817340851, + "learning_rate": 1.7895033786576056e-05, + "loss": 0.5546, + "step": 15401 + }, + { + "epoch": 0.4228995057660626, + "grad_norm": 0.4262174367904663, + "learning_rate": 1.7894768706268468e-05, + "loss": 0.5341, + "step": 15402 + }, + { + "epoch": 0.4229269632070291, + "grad_norm": 0.38668495416641235, + "learning_rate": 1.7894503611234615e-05, + "loss": 0.5562, + "step": 15403 + }, + { + "epoch": 0.4229544206479956, + "grad_norm": 0.3437403738498688, + "learning_rate": 1.7894238501474994e-05, + "loss": 0.4314, + "step": 15404 + }, + { + "epoch": 0.4229818780889621, + "grad_norm": 0.37566953897476196, + "learning_rate": 1.78939733769901e-05, + "loss": 0.4944, + "step": 15405 + }, + { + "epoch": 0.4230093355299286, + "grad_norm": 0.368822306394577, + "learning_rate": 1.7893708237780424e-05, + "loss": 0.5117, + "step": 15406 + }, + { + "epoch": 0.42303679297089514, + "grad_norm": 0.34816229343414307, + "learning_rate": 1.789344308384646e-05, + "loss": 0.5311, + "step": 15407 + }, + { + "epoch": 0.42306425041186163, + "grad_norm": 0.34994837641716003, + "learning_rate": 1.7893177915188712e-05, + "loss": 0.5649, + "step": 15408 + }, + { + "epoch": 0.4230917078528281, + "grad_norm": 0.3711960017681122, + "learning_rate": 1.7892912731807663e-05, + "loss": 0.5147, + "step": 15409 + }, + { + "epoch": 0.4231191652937946, + "grad_norm": 0.3749971091747284, + "learning_rate": 1.7892647533703812e-05, + "loss": 0.4766, + "step": 15410 + }, + { + "epoch": 0.4231466227347611, + "grad_norm": 0.3443308472633362, + "learning_rate": 1.789238232087766e-05, + "loss": 0.4547, + "step": 15411 + }, + { + "epoch": 0.4231740801757276, + "grad_norm": 0.3372957706451416, + "learning_rate": 1.789211709332969e-05, + "loss": 0.4915, + "step": 15412 + }, + { + "epoch": 0.4232015376166941, + "grad_norm": 0.33935970067977905, + "learning_rate": 1.7891851851060406e-05, + "loss": 0.4741, + "step": 15413 + }, + { + "epoch": 0.42322899505766065, + "grad_norm": 0.3685728907585144, + "learning_rate": 1.7891586594070296e-05, + "loss": 0.5237, + "step": 15414 + }, + { + "epoch": 0.42325645249862714, + "grad_norm": 0.48300641775131226, + "learning_rate": 1.7891321322359862e-05, + "loss": 0.4438, + "step": 15415 + }, + { + "epoch": 0.42328390993959364, + "grad_norm": 0.3664702773094177, + "learning_rate": 1.789105603592959e-05, + "loss": 0.5254, + "step": 15416 + }, + { + "epoch": 0.42331136738056013, + "grad_norm": 0.34579116106033325, + "learning_rate": 1.7890790734779985e-05, + "loss": 0.4631, + "step": 15417 + }, + { + "epoch": 0.4233388248215266, + "grad_norm": 0.3677965998649597, + "learning_rate": 1.7890525418911538e-05, + "loss": 0.477, + "step": 15418 + }, + { + "epoch": 0.4233662822624931, + "grad_norm": 0.3905978202819824, + "learning_rate": 1.789026008832474e-05, + "loss": 0.5296, + "step": 15419 + }, + { + "epoch": 0.4233937397034596, + "grad_norm": 0.44650667905807495, + "learning_rate": 1.788999474302009e-05, + "loss": 0.5284, + "step": 15420 + }, + { + "epoch": 0.42342119714442616, + "grad_norm": 0.4412221610546112, + "learning_rate": 1.788972938299808e-05, + "loss": 0.5425, + "step": 15421 + }, + { + "epoch": 0.42344865458539266, + "grad_norm": 0.3906874358654022, + "learning_rate": 1.788946400825921e-05, + "loss": 0.5561, + "step": 15422 + }, + { + "epoch": 0.42347611202635915, + "grad_norm": 0.34105947613716125, + "learning_rate": 1.7889198618803964e-05, + "loss": 0.5347, + "step": 15423 + }, + { + "epoch": 0.42350356946732565, + "grad_norm": 0.3608778715133667, + "learning_rate": 1.7888933214632854e-05, + "loss": 0.5923, + "step": 15424 + }, + { + "epoch": 0.42353102690829214, + "grad_norm": 0.3894387483596802, + "learning_rate": 1.7888667795746356e-05, + "loss": 0.4861, + "step": 15425 + }, + { + "epoch": 0.42355848434925863, + "grad_norm": 0.34145066142082214, + "learning_rate": 1.7888402362144985e-05, + "loss": 0.4628, + "step": 15426 + }, + { + "epoch": 0.42358594179022513, + "grad_norm": 0.3708588778972626, + "learning_rate": 1.788813691382922e-05, + "loss": 0.5084, + "step": 15427 + }, + { + "epoch": 0.4236133992311917, + "grad_norm": 0.334555059671402, + "learning_rate": 1.7887871450799563e-05, + "loss": 0.4958, + "step": 15428 + }, + { + "epoch": 0.4236408566721582, + "grad_norm": 0.37389659881591797, + "learning_rate": 1.7887605973056506e-05, + "loss": 0.4481, + "step": 15429 + }, + { + "epoch": 0.42366831411312467, + "grad_norm": 0.3574800193309784, + "learning_rate": 1.788734048060055e-05, + "loss": 0.5238, + "step": 15430 + }, + { + "epoch": 0.42369577155409116, + "grad_norm": 0.4371650218963623, + "learning_rate": 1.7887074973432187e-05, + "loss": 0.452, + "step": 15431 + }, + { + "epoch": 0.42372322899505765, + "grad_norm": 0.4858699440956116, + "learning_rate": 1.788680945155191e-05, + "loss": 0.5582, + "step": 15432 + }, + { + "epoch": 0.42375068643602415, + "grad_norm": 0.35705533623695374, + "learning_rate": 1.7886543914960216e-05, + "loss": 0.4727, + "step": 15433 + }, + { + "epoch": 0.42377814387699064, + "grad_norm": 0.5150899291038513, + "learning_rate": 1.78862783636576e-05, + "loss": 0.5286, + "step": 15434 + }, + { + "epoch": 0.4238056013179572, + "grad_norm": 0.37268877029418945, + "learning_rate": 1.7886012797644557e-05, + "loss": 0.5306, + "step": 15435 + }, + { + "epoch": 0.4238330587589237, + "grad_norm": 0.38457512855529785, + "learning_rate": 1.7885747216921586e-05, + "loss": 0.5432, + "step": 15436 + }, + { + "epoch": 0.4238605161998902, + "grad_norm": 0.3875664472579956, + "learning_rate": 1.788548162148918e-05, + "loss": 0.5192, + "step": 15437 + }, + { + "epoch": 0.4238879736408567, + "grad_norm": 0.3989471197128296, + "learning_rate": 1.7885216011347832e-05, + "loss": 0.5489, + "step": 15438 + }, + { + "epoch": 0.42391543108182317, + "grad_norm": 0.34630337357521057, + "learning_rate": 1.7884950386498043e-05, + "loss": 0.5238, + "step": 15439 + }, + { + "epoch": 0.42394288852278966, + "grad_norm": 0.39135169982910156, + "learning_rate": 1.78846847469403e-05, + "loss": 0.5607, + "step": 15440 + }, + { + "epoch": 0.42397034596375616, + "grad_norm": 0.41511270403862, + "learning_rate": 1.7884419092675104e-05, + "loss": 0.5035, + "step": 15441 + }, + { + "epoch": 0.4239978034047227, + "grad_norm": 0.38577836751937866, + "learning_rate": 1.7884153423702952e-05, + "loss": 0.5471, + "step": 15442 + }, + { + "epoch": 0.4240252608456892, + "grad_norm": 0.3693135678768158, + "learning_rate": 1.788388774002434e-05, + "loss": 0.5565, + "step": 15443 + }, + { + "epoch": 0.4240527182866557, + "grad_norm": 0.3750303387641907, + "learning_rate": 1.7883622041639753e-05, + "loss": 0.5957, + "step": 15444 + }, + { + "epoch": 0.4240801757276222, + "grad_norm": 0.3833070695400238, + "learning_rate": 1.7883356328549703e-05, + "loss": 0.5274, + "step": 15445 + }, + { + "epoch": 0.4241076331685887, + "grad_norm": 0.3558944761753082, + "learning_rate": 1.7883090600754673e-05, + "loss": 0.5873, + "step": 15446 + }, + { + "epoch": 0.4241350906095552, + "grad_norm": 0.3661985397338867, + "learning_rate": 1.7882824858255165e-05, + "loss": 0.5233, + "step": 15447 + }, + { + "epoch": 0.42416254805052167, + "grad_norm": 0.34880223870277405, + "learning_rate": 1.788255910105167e-05, + "loss": 0.5122, + "step": 15448 + }, + { + "epoch": 0.4241900054914882, + "grad_norm": 0.34481850266456604, + "learning_rate": 1.7882293329144688e-05, + "loss": 0.5121, + "step": 15449 + }, + { + "epoch": 0.4242174629324547, + "grad_norm": 0.511839747428894, + "learning_rate": 1.7882027542534716e-05, + "loss": 0.5459, + "step": 15450 + }, + { + "epoch": 0.4242449203734212, + "grad_norm": 0.3557758033275604, + "learning_rate": 1.7881761741222244e-05, + "loss": 0.4409, + "step": 15451 + }, + { + "epoch": 0.4242723778143877, + "grad_norm": 0.3952370882034302, + "learning_rate": 1.7881495925207774e-05, + "loss": 0.4947, + "step": 15452 + }, + { + "epoch": 0.4242998352553542, + "grad_norm": 0.3689156174659729, + "learning_rate": 1.7881230094491793e-05, + "loss": 0.4936, + "step": 15453 + }, + { + "epoch": 0.4243272926963207, + "grad_norm": 0.3925451636314392, + "learning_rate": 1.7880964249074807e-05, + "loss": 0.5144, + "step": 15454 + }, + { + "epoch": 0.4243547501372872, + "grad_norm": 0.42030656337738037, + "learning_rate": 1.7880698388957303e-05, + "loss": 0.5426, + "step": 15455 + }, + { + "epoch": 0.42438220757825373, + "grad_norm": 0.4391491115093231, + "learning_rate": 1.7880432514139787e-05, + "loss": 0.5934, + "step": 15456 + }, + { + "epoch": 0.42440966501922023, + "grad_norm": 0.34903261065483093, + "learning_rate": 1.7880166624622745e-05, + "loss": 0.5526, + "step": 15457 + }, + { + "epoch": 0.4244371224601867, + "grad_norm": 0.4167464077472687, + "learning_rate": 1.787990072040668e-05, + "loss": 0.5524, + "step": 15458 + }, + { + "epoch": 0.4244645799011532, + "grad_norm": 0.39602577686309814, + "learning_rate": 1.7879634801492082e-05, + "loss": 0.5683, + "step": 15459 + }, + { + "epoch": 0.4244920373421197, + "grad_norm": 0.39758631587028503, + "learning_rate": 1.787936886787945e-05, + "loss": 0.5902, + "step": 15460 + }, + { + "epoch": 0.4245194947830862, + "grad_norm": 0.3783600926399231, + "learning_rate": 1.7879102919569283e-05, + "loss": 0.5631, + "step": 15461 + }, + { + "epoch": 0.4245469522240527, + "grad_norm": 0.3531072735786438, + "learning_rate": 1.7878836956562077e-05, + "loss": 0.4457, + "step": 15462 + }, + { + "epoch": 0.42457440966501925, + "grad_norm": 0.37754878401756287, + "learning_rate": 1.787857097885832e-05, + "loss": 0.6168, + "step": 15463 + }, + { + "epoch": 0.42460186710598574, + "grad_norm": 0.41106197237968445, + "learning_rate": 1.7878304986458513e-05, + "loss": 0.3917, + "step": 15464 + }, + { + "epoch": 0.42462932454695224, + "grad_norm": 0.37918969988822937, + "learning_rate": 1.7878038979363158e-05, + "loss": 0.4776, + "step": 15465 + }, + { + "epoch": 0.42465678198791873, + "grad_norm": 0.38154786825180054, + "learning_rate": 1.7877772957572743e-05, + "loss": 0.4492, + "step": 15466 + }, + { + "epoch": 0.4246842394288852, + "grad_norm": 0.365256130695343, + "learning_rate": 1.7877506921087767e-05, + "loss": 0.4682, + "step": 15467 + }, + { + "epoch": 0.4247116968698517, + "grad_norm": 0.3869714140892029, + "learning_rate": 1.7877240869908727e-05, + "loss": 0.5545, + "step": 15468 + }, + { + "epoch": 0.4247391543108182, + "grad_norm": 0.3317021429538727, + "learning_rate": 1.787697480403612e-05, + "loss": 0.5378, + "step": 15469 + }, + { + "epoch": 0.4247666117517847, + "grad_norm": 0.3870697617530823, + "learning_rate": 1.787670872347044e-05, + "loss": 0.4851, + "step": 15470 + }, + { + "epoch": 0.42479406919275126, + "grad_norm": 0.39599868655204773, + "learning_rate": 1.787644262821218e-05, + "loss": 0.4983, + "step": 15471 + }, + { + "epoch": 0.42482152663371775, + "grad_norm": 0.3547077476978302, + "learning_rate": 1.7876176518261846e-05, + "loss": 0.4915, + "step": 15472 + }, + { + "epoch": 0.42484898407468424, + "grad_norm": 0.41535645723342896, + "learning_rate": 1.7875910393619927e-05, + "loss": 0.5171, + "step": 15473 + }, + { + "epoch": 0.42487644151565074, + "grad_norm": 0.3791908025741577, + "learning_rate": 1.787564425428692e-05, + "loss": 0.5711, + "step": 15474 + }, + { + "epoch": 0.42490389895661723, + "grad_norm": 0.413862943649292, + "learning_rate": 1.787537810026333e-05, + "loss": 0.4639, + "step": 15475 + }, + { + "epoch": 0.4249313563975837, + "grad_norm": 0.9719947576522827, + "learning_rate": 1.7875111931549635e-05, + "loss": 0.5075, + "step": 15476 + }, + { + "epoch": 0.4249588138385502, + "grad_norm": 0.39916470646858215, + "learning_rate": 1.7874845748146354e-05, + "loss": 0.5914, + "step": 15477 + }, + { + "epoch": 0.42498627127951677, + "grad_norm": 0.35332438349723816, + "learning_rate": 1.7874579550053962e-05, + "loss": 0.4748, + "step": 15478 + }, + { + "epoch": 0.42501372872048326, + "grad_norm": 0.31516095995903015, + "learning_rate": 1.7874313337272973e-05, + "loss": 0.47, + "step": 15479 + }, + { + "epoch": 0.42504118616144976, + "grad_norm": 0.34321728348731995, + "learning_rate": 1.7874047109803873e-05, + "loss": 0.4548, + "step": 15480 + }, + { + "epoch": 0.42506864360241625, + "grad_norm": 0.38090986013412476, + "learning_rate": 1.7873780867647165e-05, + "loss": 0.59, + "step": 15481 + }, + { + "epoch": 0.42509610104338275, + "grad_norm": 0.36997613310813904, + "learning_rate": 1.7873514610803338e-05, + "loss": 0.5494, + "step": 15482 + }, + { + "epoch": 0.42512355848434924, + "grad_norm": 0.6323895454406738, + "learning_rate": 1.78732483392729e-05, + "loss": 0.6484, + "step": 15483 + }, + { + "epoch": 0.42515101592531573, + "grad_norm": 0.36001527309417725, + "learning_rate": 1.7872982053056335e-05, + "loss": 0.4752, + "step": 15484 + }, + { + "epoch": 0.4251784733662823, + "grad_norm": 0.35151728987693787, + "learning_rate": 1.787271575215415e-05, + "loss": 0.5044, + "step": 15485 + }, + { + "epoch": 0.4252059308072488, + "grad_norm": 0.38568857312202454, + "learning_rate": 1.7872449436566835e-05, + "loss": 0.5656, + "step": 15486 + }, + { + "epoch": 0.42523338824821527, + "grad_norm": 0.37312862277030945, + "learning_rate": 1.7872183106294888e-05, + "loss": 0.4892, + "step": 15487 + }, + { + "epoch": 0.42526084568918177, + "grad_norm": 0.3548831343650818, + "learning_rate": 1.787191676133881e-05, + "loss": 0.5054, + "step": 15488 + }, + { + "epoch": 0.42528830313014826, + "grad_norm": 0.34941452741622925, + "learning_rate": 1.787165040169909e-05, + "loss": 0.5537, + "step": 15489 + }, + { + "epoch": 0.42531576057111475, + "grad_norm": 0.363538920879364, + "learning_rate": 1.7871384027376236e-05, + "loss": 0.5248, + "step": 15490 + }, + { + "epoch": 0.42534321801208125, + "grad_norm": 0.3919290602207184, + "learning_rate": 1.7871117638370737e-05, + "loss": 0.4593, + "step": 15491 + }, + { + "epoch": 0.4253706754530478, + "grad_norm": 0.4524669945240021, + "learning_rate": 1.787085123468309e-05, + "loss": 0.5695, + "step": 15492 + }, + { + "epoch": 0.4253981328940143, + "grad_norm": 0.641028642654419, + "learning_rate": 1.787058481631379e-05, + "loss": 0.5674, + "step": 15493 + }, + { + "epoch": 0.4254255903349808, + "grad_norm": 0.370451956987381, + "learning_rate": 1.787031838326334e-05, + "loss": 0.5705, + "step": 15494 + }, + { + "epoch": 0.4254530477759473, + "grad_norm": 0.38285014033317566, + "learning_rate": 1.7870051935532237e-05, + "loss": 0.4993, + "step": 15495 + }, + { + "epoch": 0.4254805052169138, + "grad_norm": 0.35659530758857727, + "learning_rate": 1.7869785473120975e-05, + "loss": 0.4735, + "step": 15496 + }, + { + "epoch": 0.42550796265788027, + "grad_norm": 0.30287298560142517, + "learning_rate": 1.786951899603005e-05, + "loss": 0.4148, + "step": 15497 + }, + { + "epoch": 0.42553542009884676, + "grad_norm": 0.30575618147850037, + "learning_rate": 1.7869252504259958e-05, + "loss": 0.4075, + "step": 15498 + }, + { + "epoch": 0.4255628775398133, + "grad_norm": 0.3632853031158447, + "learning_rate": 1.7868985997811202e-05, + "loss": 0.5725, + "step": 15499 + }, + { + "epoch": 0.4255903349807798, + "grad_norm": 0.338909387588501, + "learning_rate": 1.7868719476684273e-05, + "loss": 0.499, + "step": 15500 + }, + { + "epoch": 0.4256177924217463, + "grad_norm": 0.38204896450042725, + "learning_rate": 1.7868452940879673e-05, + "loss": 0.583, + "step": 15501 + }, + { + "epoch": 0.4256452498627128, + "grad_norm": 0.358659952878952, + "learning_rate": 1.7868186390397893e-05, + "loss": 0.5472, + "step": 15502 + }, + { + "epoch": 0.4256727073036793, + "grad_norm": 0.37160035967826843, + "learning_rate": 1.786791982523944e-05, + "loss": 0.5612, + "step": 15503 + }, + { + "epoch": 0.4257001647446458, + "grad_norm": 0.3632577955722809, + "learning_rate": 1.7867653245404804e-05, + "loss": 0.4379, + "step": 15504 + }, + { + "epoch": 0.4257276221856123, + "grad_norm": 0.3386332094669342, + "learning_rate": 1.7867386650894482e-05, + "loss": 0.5031, + "step": 15505 + }, + { + "epoch": 0.4257550796265788, + "grad_norm": 0.4070073664188385, + "learning_rate": 1.786712004170897e-05, + "loss": 0.4057, + "step": 15506 + }, + { + "epoch": 0.4257825370675453, + "grad_norm": 0.5327192544937134, + "learning_rate": 1.7866853417848774e-05, + "loss": 0.5336, + "step": 15507 + }, + { + "epoch": 0.4258099945085118, + "grad_norm": 0.37817007303237915, + "learning_rate": 1.7866586779314382e-05, + "loss": 0.4843, + "step": 15508 + }, + { + "epoch": 0.4258374519494783, + "grad_norm": 0.39681151509284973, + "learning_rate": 1.7866320126106296e-05, + "loss": 0.4492, + "step": 15509 + }, + { + "epoch": 0.4258649093904448, + "grad_norm": 0.3777090311050415, + "learning_rate": 1.7866053458225014e-05, + "loss": 0.5529, + "step": 15510 + }, + { + "epoch": 0.4258923668314113, + "grad_norm": 0.3981437683105469, + "learning_rate": 1.7865786775671027e-05, + "loss": 0.5088, + "step": 15511 + }, + { + "epoch": 0.4259198242723778, + "grad_norm": 0.3897170126438141, + "learning_rate": 1.786552007844484e-05, + "loss": 0.5161, + "step": 15512 + }, + { + "epoch": 0.42594728171334434, + "grad_norm": 0.3950134515762329, + "learning_rate": 1.786525336654695e-05, + "loss": 0.6138, + "step": 15513 + }, + { + "epoch": 0.42597473915431083, + "grad_norm": 0.35477912425994873, + "learning_rate": 1.7864986639977848e-05, + "loss": 0.5591, + "step": 15514 + }, + { + "epoch": 0.4260021965952773, + "grad_norm": 0.3761753737926483, + "learning_rate": 1.7864719898738038e-05, + "loss": 0.4484, + "step": 15515 + }, + { + "epoch": 0.4260296540362438, + "grad_norm": 0.3998357355594635, + "learning_rate": 1.7864453142828017e-05, + "loss": 0.5599, + "step": 15516 + }, + { + "epoch": 0.4260571114772103, + "grad_norm": 0.4130476713180542, + "learning_rate": 1.7864186372248276e-05, + "loss": 0.5833, + "step": 15517 + }, + { + "epoch": 0.4260845689181768, + "grad_norm": 0.3810631334781647, + "learning_rate": 1.786391958699932e-05, + "loss": 0.5305, + "step": 15518 + }, + { + "epoch": 0.4261120263591433, + "grad_norm": 0.35093677043914795, + "learning_rate": 1.7863652787081648e-05, + "loss": 0.516, + "step": 15519 + }, + { + "epoch": 0.42613948380010985, + "grad_norm": 0.4066230058670044, + "learning_rate": 1.7863385972495747e-05, + "loss": 0.589, + "step": 15520 + }, + { + "epoch": 0.42616694124107635, + "grad_norm": 0.33948323130607605, + "learning_rate": 1.7863119143242125e-05, + "loss": 0.4913, + "step": 15521 + }, + { + "epoch": 0.42619439868204284, + "grad_norm": 0.4075924754142761, + "learning_rate": 1.7862852299321277e-05, + "loss": 0.5275, + "step": 15522 + }, + { + "epoch": 0.42622185612300933, + "grad_norm": 0.3457423150539398, + "learning_rate": 1.78625854407337e-05, + "loss": 0.4625, + "step": 15523 + }, + { + "epoch": 0.42624931356397583, + "grad_norm": 23.693344116210938, + "learning_rate": 1.7862318567479887e-05, + "loss": 1.7582, + "step": 15524 + }, + { + "epoch": 0.4262767710049423, + "grad_norm": 0.3927377462387085, + "learning_rate": 1.7862051679560343e-05, + "loss": 0.6889, + "step": 15525 + }, + { + "epoch": 0.4263042284459088, + "grad_norm": 0.4061179459095001, + "learning_rate": 1.7861784776975562e-05, + "loss": 0.5745, + "step": 15526 + }, + { + "epoch": 0.42633168588687537, + "grad_norm": 0.4117131531238556, + "learning_rate": 1.7861517859726046e-05, + "loss": 0.4809, + "step": 15527 + }, + { + "epoch": 0.42635914332784186, + "grad_norm": 0.5080301761627197, + "learning_rate": 1.7861250927812292e-05, + "loss": 0.3935, + "step": 15528 + }, + { + "epoch": 0.42638660076880835, + "grad_norm": 0.3831770718097687, + "learning_rate": 1.7860983981234795e-05, + "loss": 0.5158, + "step": 15529 + }, + { + "epoch": 0.42641405820977485, + "grad_norm": 0.4029556214809418, + "learning_rate": 1.7860717019994053e-05, + "loss": 0.4868, + "step": 15530 + }, + { + "epoch": 0.42644151565074134, + "grad_norm": 0.3776521384716034, + "learning_rate": 1.786045004409056e-05, + "loss": 0.4885, + "step": 15531 + }, + { + "epoch": 0.42646897309170784, + "grad_norm": 0.3726821541786194, + "learning_rate": 1.7860183053524824e-05, + "loss": 0.542, + "step": 15532 + }, + { + "epoch": 0.42649643053267433, + "grad_norm": 0.3104356527328491, + "learning_rate": 1.785991604829734e-05, + "loss": 0.4587, + "step": 15533 + }, + { + "epoch": 0.4265238879736409, + "grad_norm": 0.38669925928115845, + "learning_rate": 1.78596490284086e-05, + "loss": 0.515, + "step": 15534 + }, + { + "epoch": 0.4265513454146074, + "grad_norm": 0.3674103617668152, + "learning_rate": 1.7859381993859107e-05, + "loss": 0.5832, + "step": 15535 + }, + { + "epoch": 0.42657880285557387, + "grad_norm": 0.46413689851760864, + "learning_rate": 1.785911494464936e-05, + "loss": 0.5874, + "step": 15536 + }, + { + "epoch": 0.42660626029654036, + "grad_norm": 0.37777215242385864, + "learning_rate": 1.7858847880779852e-05, + "loss": 0.537, + "step": 15537 + }, + { + "epoch": 0.42663371773750686, + "grad_norm": 0.40580570697784424, + "learning_rate": 1.7858580802251085e-05, + "loss": 0.5329, + "step": 15538 + }, + { + "epoch": 0.42666117517847335, + "grad_norm": 0.39893338084220886, + "learning_rate": 1.785831370906356e-05, + "loss": 0.7214, + "step": 15539 + }, + { + "epoch": 0.42668863261943984, + "grad_norm": 0.3465839624404907, + "learning_rate": 1.7858046601217765e-05, + "loss": 0.5092, + "step": 15540 + }, + { + "epoch": 0.4267160900604064, + "grad_norm": 0.3527071177959442, + "learning_rate": 1.7857779478714215e-05, + "loss": 0.5211, + "step": 15541 + }, + { + "epoch": 0.4267435475013729, + "grad_norm": 0.3720036447048187, + "learning_rate": 1.785751234155339e-05, + "loss": 0.5585, + "step": 15542 + }, + { + "epoch": 0.4267710049423394, + "grad_norm": 0.3364792466163635, + "learning_rate": 1.7857245189735803e-05, + "loss": 0.4701, + "step": 15543 + }, + { + "epoch": 0.4267984623833059, + "grad_norm": 0.4178813397884369, + "learning_rate": 1.785697802326194e-05, + "loss": 0.548, + "step": 15544 + }, + { + "epoch": 0.42682591982427237, + "grad_norm": 0.35511258244514465, + "learning_rate": 1.785671084213231e-05, + "loss": 0.5645, + "step": 15545 + }, + { + "epoch": 0.42685337726523886, + "grad_norm": 0.33630019426345825, + "learning_rate": 1.7856443646347405e-05, + "loss": 0.4947, + "step": 15546 + }, + { + "epoch": 0.42688083470620536, + "grad_norm": 0.33837372064590454, + "learning_rate": 1.7856176435907723e-05, + "loss": 0.4678, + "step": 15547 + }, + { + "epoch": 0.4269082921471719, + "grad_norm": 0.38518592715263367, + "learning_rate": 1.7855909210813765e-05, + "loss": 0.4959, + "step": 15548 + }, + { + "epoch": 0.4269357495881384, + "grad_norm": 0.4091208279132843, + "learning_rate": 1.7855641971066033e-05, + "loss": 0.5571, + "step": 15549 + }, + { + "epoch": 0.4269632070291049, + "grad_norm": 0.329557329416275, + "learning_rate": 1.7855374716665016e-05, + "loss": 0.4602, + "step": 15550 + }, + { + "epoch": 0.4269906644700714, + "grad_norm": 0.3863663971424103, + "learning_rate": 1.785510744761122e-05, + "loss": 0.5546, + "step": 15551 + }, + { + "epoch": 0.4270181219110379, + "grad_norm": 0.4092973470687866, + "learning_rate": 1.7854840163905144e-05, + "loss": 0.5138, + "step": 15552 + }, + { + "epoch": 0.4270455793520044, + "grad_norm": 0.3504412770271301, + "learning_rate": 1.7854572865547282e-05, + "loss": 0.4939, + "step": 15553 + }, + { + "epoch": 0.4270730367929709, + "grad_norm": 0.3976462185382843, + "learning_rate": 1.7854305552538135e-05, + "loss": 0.5249, + "step": 15554 + }, + { + "epoch": 0.4271004942339374, + "grad_norm": 0.41781318187713623, + "learning_rate": 1.7854038224878204e-05, + "loss": 0.5382, + "step": 15555 + }, + { + "epoch": 0.4271279516749039, + "grad_norm": 0.390360563993454, + "learning_rate": 1.785377088256798e-05, + "loss": 0.4549, + "step": 15556 + }, + { + "epoch": 0.4271554091158704, + "grad_norm": 0.3617633283138275, + "learning_rate": 1.785350352560797e-05, + "loss": 0.5615, + "step": 15557 + }, + { + "epoch": 0.4271828665568369, + "grad_norm": 0.37597525119781494, + "learning_rate": 1.785323615399867e-05, + "loss": 0.5104, + "step": 15558 + }, + { + "epoch": 0.4272103239978034, + "grad_norm": 0.3892720341682434, + "learning_rate": 1.7852968767740576e-05, + "loss": 0.5601, + "step": 15559 + }, + { + "epoch": 0.4272377814387699, + "grad_norm": 0.35516464710235596, + "learning_rate": 1.785270136683419e-05, + "loss": 0.5918, + "step": 15560 + }, + { + "epoch": 0.4272652388797364, + "grad_norm": 0.39645999670028687, + "learning_rate": 1.7852433951280012e-05, + "loss": 0.4935, + "step": 15561 + }, + { + "epoch": 0.42729269632070294, + "grad_norm": 0.35321134328842163, + "learning_rate": 1.7852166521078533e-05, + "loss": 0.5262, + "step": 15562 + }, + { + "epoch": 0.42732015376166943, + "grad_norm": 0.3558225929737091, + "learning_rate": 1.785189907623026e-05, + "loss": 0.5069, + "step": 15563 + }, + { + "epoch": 0.4273476112026359, + "grad_norm": 0.3179214894771576, + "learning_rate": 1.785163161673569e-05, + "loss": 0.4557, + "step": 15564 + }, + { + "epoch": 0.4273750686436024, + "grad_norm": 0.40835005044937134, + "learning_rate": 1.785136414259532e-05, + "loss": 0.4868, + "step": 15565 + }, + { + "epoch": 0.4274025260845689, + "grad_norm": 0.3481159806251526, + "learning_rate": 1.785109665380965e-05, + "loss": 0.5348, + "step": 15566 + }, + { + "epoch": 0.4274299835255354, + "grad_norm": 0.3252103626728058, + "learning_rate": 1.785082915037918e-05, + "loss": 0.4683, + "step": 15567 + }, + { + "epoch": 0.4274574409665019, + "grad_norm": 0.4563353955745697, + "learning_rate": 1.7850561632304405e-05, + "loss": 0.5595, + "step": 15568 + }, + { + "epoch": 0.42748489840746845, + "grad_norm": 0.3681945204734802, + "learning_rate": 1.785029409958583e-05, + "loss": 0.5617, + "step": 15569 + }, + { + "epoch": 0.42751235584843494, + "grad_norm": 0.4106462001800537, + "learning_rate": 1.785002655222395e-05, + "loss": 0.5231, + "step": 15570 + }, + { + "epoch": 0.42753981328940144, + "grad_norm": 0.3568187355995178, + "learning_rate": 1.7849758990219267e-05, + "loss": 0.4958, + "step": 15571 + }, + { + "epoch": 0.42756727073036793, + "grad_norm": 0.4265635013580322, + "learning_rate": 1.7849491413572274e-05, + "loss": 0.6026, + "step": 15572 + }, + { + "epoch": 0.4275947281713344, + "grad_norm": 0.3427497148513794, + "learning_rate": 1.7849223822283476e-05, + "loss": 0.4668, + "step": 15573 + }, + { + "epoch": 0.4276221856123009, + "grad_norm": 0.39538654685020447, + "learning_rate": 1.784895621635337e-05, + "loss": 0.4363, + "step": 15574 + }, + { + "epoch": 0.4276496430532674, + "grad_norm": 0.34898945689201355, + "learning_rate": 1.784868859578246e-05, + "loss": 0.5734, + "step": 15575 + }, + { + "epoch": 0.42767710049423396, + "grad_norm": 0.37825992703437805, + "learning_rate": 1.7848420960571237e-05, + "loss": 0.5426, + "step": 15576 + }, + { + "epoch": 0.42770455793520046, + "grad_norm": 0.358203649520874, + "learning_rate": 1.78481533107202e-05, + "loss": 0.4538, + "step": 15577 + }, + { + "epoch": 0.42773201537616695, + "grad_norm": 0.43523499369621277, + "learning_rate": 1.784788564622986e-05, + "loss": 0.5345, + "step": 15578 + }, + { + "epoch": 0.42775947281713345, + "grad_norm": 0.3684293031692505, + "learning_rate": 1.7847617967100706e-05, + "loss": 0.5107, + "step": 15579 + }, + { + "epoch": 0.42778693025809994, + "grad_norm": 0.3635784387588501, + "learning_rate": 1.7847350273333237e-05, + "loss": 0.5002, + "step": 15580 + }, + { + "epoch": 0.42781438769906643, + "grad_norm": 0.351823091506958, + "learning_rate": 1.7847082564927958e-05, + "loss": 0.5423, + "step": 15581 + }, + { + "epoch": 0.42784184514003293, + "grad_norm": 0.3754054009914398, + "learning_rate": 1.7846814841885364e-05, + "loss": 0.5409, + "step": 15582 + }, + { + "epoch": 0.4278693025809995, + "grad_norm": 0.3931616544723511, + "learning_rate": 1.7846547104205955e-05, + "loss": 0.5188, + "step": 15583 + }, + { + "epoch": 0.42789676002196597, + "grad_norm": 0.32828494906425476, + "learning_rate": 1.7846279351890234e-05, + "loss": 0.4684, + "step": 15584 + }, + { + "epoch": 0.42792421746293247, + "grad_norm": 0.4146119952201843, + "learning_rate": 1.7846011584938695e-05, + "loss": 0.5103, + "step": 15585 + }, + { + "epoch": 0.42795167490389896, + "grad_norm": 0.37571293115615845, + "learning_rate": 1.7845743803351846e-05, + "loss": 0.5181, + "step": 15586 + }, + { + "epoch": 0.42797913234486545, + "grad_norm": 0.36555659770965576, + "learning_rate": 1.7845476007130176e-05, + "loss": 0.4412, + "step": 15587 + }, + { + "epoch": 0.42800658978583195, + "grad_norm": 0.45493435859680176, + "learning_rate": 1.7845208196274188e-05, + "loss": 0.5366, + "step": 15588 + }, + { + "epoch": 0.42803404722679844, + "grad_norm": 0.40772348642349243, + "learning_rate": 1.7844940370784385e-05, + "loss": 0.5096, + "step": 15589 + }, + { + "epoch": 0.428061504667765, + "grad_norm": 0.338485449552536, + "learning_rate": 1.7844672530661265e-05, + "loss": 0.49, + "step": 15590 + }, + { + "epoch": 0.4280889621087315, + "grad_norm": 0.3944374620914459, + "learning_rate": 1.7844404675905325e-05, + "loss": 0.433, + "step": 15591 + }, + { + "epoch": 0.428116419549698, + "grad_norm": 0.380371630191803, + "learning_rate": 1.7844136806517067e-05, + "loss": 0.5572, + "step": 15592 + }, + { + "epoch": 0.4281438769906645, + "grad_norm": 0.35681915283203125, + "learning_rate": 1.7843868922496992e-05, + "loss": 0.5443, + "step": 15593 + }, + { + "epoch": 0.42817133443163097, + "grad_norm": 0.35569679737091064, + "learning_rate": 1.7843601023845596e-05, + "loss": 0.5328, + "step": 15594 + }, + { + "epoch": 0.42819879187259746, + "grad_norm": 0.34454721212387085, + "learning_rate": 1.7843333110563383e-05, + "loss": 0.524, + "step": 15595 + }, + { + "epoch": 0.42822624931356396, + "grad_norm": 0.3646050691604614, + "learning_rate": 1.7843065182650847e-05, + "loss": 0.5225, + "step": 15596 + }, + { + "epoch": 0.4282537067545305, + "grad_norm": 0.36839067935943604, + "learning_rate": 1.7842797240108494e-05, + "loss": 0.4887, + "step": 15597 + }, + { + "epoch": 0.428281164195497, + "grad_norm": 0.37263861298561096, + "learning_rate": 1.784252928293682e-05, + "loss": 0.4894, + "step": 15598 + }, + { + "epoch": 0.4283086216364635, + "grad_norm": 0.363040953874588, + "learning_rate": 1.7842261311136327e-05, + "loss": 0.4799, + "step": 15599 + }, + { + "epoch": 0.42833607907743, + "grad_norm": 0.4820467233657837, + "learning_rate": 1.7841993324707513e-05, + "loss": 0.5081, + "step": 15600 + }, + { + "epoch": 0.4283635365183965, + "grad_norm": 0.33750930428504944, + "learning_rate": 1.7841725323650876e-05, + "loss": 0.4539, + "step": 15601 + }, + { + "epoch": 0.428390993959363, + "grad_norm": 0.3770262897014618, + "learning_rate": 1.7841457307966923e-05, + "loss": 0.5165, + "step": 15602 + }, + { + "epoch": 0.42841845140032947, + "grad_norm": 0.405598908662796, + "learning_rate": 1.7841189277656144e-05, + "loss": 0.5542, + "step": 15603 + }, + { + "epoch": 0.42844590884129596, + "grad_norm": 0.3619769215583801, + "learning_rate": 1.784092123271905e-05, + "loss": 0.5529, + "step": 15604 + }, + { + "epoch": 0.4284733662822625, + "grad_norm": 0.3692227303981781, + "learning_rate": 1.7840653173156135e-05, + "loss": 0.4853, + "step": 15605 + }, + { + "epoch": 0.428500823723229, + "grad_norm": 0.3580462634563446, + "learning_rate": 1.7840385098967897e-05, + "loss": 0.5003, + "step": 15606 + }, + { + "epoch": 0.4285282811641955, + "grad_norm": 0.3320994973182678, + "learning_rate": 1.784011701015484e-05, + "loss": 0.5899, + "step": 15607 + }, + { + "epoch": 0.428555738605162, + "grad_norm": 0.401951402425766, + "learning_rate": 1.783984890671746e-05, + "loss": 0.526, + "step": 15608 + }, + { + "epoch": 0.4285831960461285, + "grad_norm": 0.3477945923805237, + "learning_rate": 1.783958078865626e-05, + "loss": 0.5004, + "step": 15609 + }, + { + "epoch": 0.428610653487095, + "grad_norm": 0.3977945148944855, + "learning_rate": 1.783931265597174e-05, + "loss": 0.5186, + "step": 15610 + }, + { + "epoch": 0.4286381109280615, + "grad_norm": 0.5047829747200012, + "learning_rate": 1.78390445086644e-05, + "loss": 0.5397, + "step": 15611 + }, + { + "epoch": 0.428665568369028, + "grad_norm": 0.3687898814678192, + "learning_rate": 1.7838776346734743e-05, + "loss": 0.5076, + "step": 15612 + }, + { + "epoch": 0.4286930258099945, + "grad_norm": 0.36475813388824463, + "learning_rate": 1.783850817018326e-05, + "loss": 0.4385, + "step": 15613 + }, + { + "epoch": 0.428720483250961, + "grad_norm": 0.42715758085250854, + "learning_rate": 1.7838239979010463e-05, + "loss": 0.5348, + "step": 15614 + }, + { + "epoch": 0.4287479406919275, + "grad_norm": 0.3834131062030792, + "learning_rate": 1.7837971773216846e-05, + "loss": 0.4934, + "step": 15615 + }, + { + "epoch": 0.428775398132894, + "grad_norm": 0.4326578378677368, + "learning_rate": 1.783770355280291e-05, + "loss": 0.4872, + "step": 15616 + }, + { + "epoch": 0.4288028555738605, + "grad_norm": 0.36710941791534424, + "learning_rate": 1.7837435317769152e-05, + "loss": 0.5424, + "step": 15617 + }, + { + "epoch": 0.428830313014827, + "grad_norm": 0.42682603001594543, + "learning_rate": 1.783716706811608e-05, + "loss": 0.5265, + "step": 15618 + }, + { + "epoch": 0.42885777045579354, + "grad_norm": 0.39651522040367126, + "learning_rate": 1.783689880384419e-05, + "loss": 0.5145, + "step": 15619 + }, + { + "epoch": 0.42888522789676004, + "grad_norm": 0.40778475999832153, + "learning_rate": 1.7836630524953978e-05, + "loss": 0.4944, + "step": 15620 + }, + { + "epoch": 0.42891268533772653, + "grad_norm": 0.3755621314048767, + "learning_rate": 1.7836362231445953e-05, + "loss": 0.5253, + "step": 15621 + }, + { + "epoch": 0.428940142778693, + "grad_norm": 0.35571184754371643, + "learning_rate": 1.7836093923320608e-05, + "loss": 0.517, + "step": 15622 + }, + { + "epoch": 0.4289676002196595, + "grad_norm": 0.3492415249347687, + "learning_rate": 1.783582560057845e-05, + "loss": 0.4927, + "step": 15623 + }, + { + "epoch": 0.428995057660626, + "grad_norm": 0.3760034739971161, + "learning_rate": 1.7835557263219976e-05, + "loss": 0.5559, + "step": 15624 + }, + { + "epoch": 0.4290225151015925, + "grad_norm": 0.41185203194618225, + "learning_rate": 1.7835288911245687e-05, + "loss": 0.5259, + "step": 15625 + }, + { + "epoch": 0.42904997254255905, + "grad_norm": 0.4010952413082123, + "learning_rate": 1.7835020544656082e-05, + "loss": 0.5477, + "step": 15626 + }, + { + "epoch": 0.42907742998352555, + "grad_norm": 0.37991026043891907, + "learning_rate": 1.7834752163451666e-05, + "loss": 0.5669, + "step": 15627 + }, + { + "epoch": 0.42910488742449204, + "grad_norm": 0.39822471141815186, + "learning_rate": 1.7834483767632933e-05, + "loss": 0.4211, + "step": 15628 + }, + { + "epoch": 0.42913234486545854, + "grad_norm": 0.4352516233921051, + "learning_rate": 1.7834215357200388e-05, + "loss": 0.5102, + "step": 15629 + }, + { + "epoch": 0.42915980230642503, + "grad_norm": 0.3892457187175751, + "learning_rate": 1.7833946932154532e-05, + "loss": 0.5131, + "step": 15630 + }, + { + "epoch": 0.4291872597473915, + "grad_norm": 0.3551495671272278, + "learning_rate": 1.7833678492495864e-05, + "loss": 0.4878, + "step": 15631 + }, + { + "epoch": 0.429214717188358, + "grad_norm": 0.3664412498474121, + "learning_rate": 1.7833410038224888e-05, + "loss": 0.5307, + "step": 15632 + }, + { + "epoch": 0.42924217462932457, + "grad_norm": 0.38042640686035156, + "learning_rate": 1.7833141569342095e-05, + "loss": 0.4434, + "step": 15633 + }, + { + "epoch": 0.42926963207029106, + "grad_norm": 0.34326887130737305, + "learning_rate": 1.7832873085848e-05, + "loss": 0.4484, + "step": 15634 + }, + { + "epoch": 0.42929708951125756, + "grad_norm": 0.42982205748558044, + "learning_rate": 1.7832604587743098e-05, + "loss": 0.4555, + "step": 15635 + }, + { + "epoch": 0.42932454695222405, + "grad_norm": 0.40658050775527954, + "learning_rate": 1.7832336075027882e-05, + "loss": 0.5462, + "step": 15636 + }, + { + "epoch": 0.42935200439319054, + "grad_norm": 0.39834654331207275, + "learning_rate": 1.7832067547702862e-05, + "loss": 0.6259, + "step": 15637 + }, + { + "epoch": 0.42937946183415704, + "grad_norm": 0.397270530462265, + "learning_rate": 1.7831799005768536e-05, + "loss": 0.5267, + "step": 15638 + }, + { + "epoch": 0.42940691927512353, + "grad_norm": 0.3909752666950226, + "learning_rate": 1.783153044922541e-05, + "loss": 0.6079, + "step": 15639 + }, + { + "epoch": 0.4294343767160901, + "grad_norm": 0.3312550187110901, + "learning_rate": 1.7831261878073974e-05, + "loss": 0.5022, + "step": 15640 + }, + { + "epoch": 0.4294618341570566, + "grad_norm": 0.4100955128669739, + "learning_rate": 1.7830993292314736e-05, + "loss": 0.4716, + "step": 15641 + }, + { + "epoch": 0.42948929159802307, + "grad_norm": 0.41584843397140503, + "learning_rate": 1.78307246919482e-05, + "loss": 0.4369, + "step": 15642 + }, + { + "epoch": 0.42951674903898956, + "grad_norm": 0.40486595034599304, + "learning_rate": 1.783045607697486e-05, + "loss": 0.5571, + "step": 15643 + }, + { + "epoch": 0.42954420647995606, + "grad_norm": 0.34573739767074585, + "learning_rate": 1.783018744739522e-05, + "loss": 0.5268, + "step": 15644 + }, + { + "epoch": 0.42957166392092255, + "grad_norm": 0.3542966842651367, + "learning_rate": 1.7829918803209783e-05, + "loss": 0.4583, + "step": 15645 + }, + { + "epoch": 0.42959912136188905, + "grad_norm": 0.33474448323249817, + "learning_rate": 1.7829650144419046e-05, + "loss": 0.5061, + "step": 15646 + }, + { + "epoch": 0.4296265788028556, + "grad_norm": 0.3493189513683319, + "learning_rate": 1.7829381471023516e-05, + "loss": 0.4287, + "step": 15647 + }, + { + "epoch": 0.4296540362438221, + "grad_norm": 0.3732379674911499, + "learning_rate": 1.782911278302369e-05, + "loss": 0.4615, + "step": 15648 + }, + { + "epoch": 0.4296814936847886, + "grad_norm": 0.361259400844574, + "learning_rate": 1.7828844080420067e-05, + "loss": 0.4837, + "step": 15649 + }, + { + "epoch": 0.4297089511257551, + "grad_norm": 0.36637619137763977, + "learning_rate": 1.782857536321315e-05, + "loss": 0.4921, + "step": 15650 + }, + { + "epoch": 0.4297364085667216, + "grad_norm": 0.37268027663230896, + "learning_rate": 1.7828306631403448e-05, + "loss": 0.5531, + "step": 15651 + }, + { + "epoch": 0.42976386600768807, + "grad_norm": 0.43603235483169556, + "learning_rate": 1.7828037884991452e-05, + "loss": 0.5553, + "step": 15652 + }, + { + "epoch": 0.42979132344865456, + "grad_norm": 0.372663289308548, + "learning_rate": 1.7827769123977665e-05, + "loss": 0.4901, + "step": 15653 + }, + { + "epoch": 0.4298187808896211, + "grad_norm": 0.3810310363769531, + "learning_rate": 1.7827500348362593e-05, + "loss": 0.5234, + "step": 15654 + }, + { + "epoch": 0.4298462383305876, + "grad_norm": 0.352780818939209, + "learning_rate": 1.7827231558146732e-05, + "loss": 0.5417, + "step": 15655 + }, + { + "epoch": 0.4298736957715541, + "grad_norm": 0.4100407660007477, + "learning_rate": 1.7826962753330588e-05, + "loss": 0.5434, + "step": 15656 + }, + { + "epoch": 0.4299011532125206, + "grad_norm": 0.38979974389076233, + "learning_rate": 1.782669393391466e-05, + "loss": 0.5606, + "step": 15657 + }, + { + "epoch": 0.4299286106534871, + "grad_norm": 0.363499253988266, + "learning_rate": 1.7826425099899453e-05, + "loss": 0.496, + "step": 15658 + }, + { + "epoch": 0.4299560680944536, + "grad_norm": 0.3653847277164459, + "learning_rate": 1.7826156251285458e-05, + "loss": 0.5111, + "step": 15659 + }, + { + "epoch": 0.4299835255354201, + "grad_norm": 0.37519705295562744, + "learning_rate": 1.782588738807319e-05, + "loss": 0.5995, + "step": 15660 + }, + { + "epoch": 0.4300109829763866, + "grad_norm": 0.3726786971092224, + "learning_rate": 1.7825618510263144e-05, + "loss": 0.5448, + "step": 15661 + }, + { + "epoch": 0.4300384404173531, + "grad_norm": 0.3909270763397217, + "learning_rate": 1.7825349617855818e-05, + "loss": 0.5069, + "step": 15662 + }, + { + "epoch": 0.4300658978583196, + "grad_norm": 0.37019577622413635, + "learning_rate": 1.7825080710851718e-05, + "loss": 0.6331, + "step": 15663 + }, + { + "epoch": 0.4300933552992861, + "grad_norm": 0.3984910249710083, + "learning_rate": 1.7824811789251347e-05, + "loss": 0.537, + "step": 15664 + }, + { + "epoch": 0.4301208127402526, + "grad_norm": 0.3523688316345215, + "learning_rate": 1.7824542853055203e-05, + "loss": 0.4739, + "step": 15665 + }, + { + "epoch": 0.4301482701812191, + "grad_norm": 0.3890755772590637, + "learning_rate": 1.7824273902263792e-05, + "loss": 0.5956, + "step": 15666 + }, + { + "epoch": 0.4301757276221856, + "grad_norm": 0.38757261633872986, + "learning_rate": 1.7824004936877612e-05, + "loss": 0.4991, + "step": 15667 + }, + { + "epoch": 0.43020318506315214, + "grad_norm": 0.3760043978691101, + "learning_rate": 1.7823735956897165e-05, + "loss": 0.4655, + "step": 15668 + }, + { + "epoch": 0.43023064250411863, + "grad_norm": 0.3279905617237091, + "learning_rate": 1.782346696232295e-05, + "loss": 0.4742, + "step": 15669 + }, + { + "epoch": 0.4302580999450851, + "grad_norm": 0.5020060539245605, + "learning_rate": 1.7823197953155477e-05, + "loss": 0.5759, + "step": 15670 + }, + { + "epoch": 0.4302855573860516, + "grad_norm": 0.3679308593273163, + "learning_rate": 1.782292892939524e-05, + "loss": 0.5334, + "step": 15671 + }, + { + "epoch": 0.4303130148270181, + "grad_norm": 0.3719729781150818, + "learning_rate": 1.782265989104275e-05, + "loss": 0.4177, + "step": 15672 + }, + { + "epoch": 0.4303404722679846, + "grad_norm": 0.3461589217185974, + "learning_rate": 1.7822390838098495e-05, + "loss": 0.5041, + "step": 15673 + }, + { + "epoch": 0.4303679297089511, + "grad_norm": 0.33373895287513733, + "learning_rate": 1.7822121770562986e-05, + "loss": 0.5332, + "step": 15674 + }, + { + "epoch": 0.43039538714991765, + "grad_norm": 0.3466026782989502, + "learning_rate": 1.7821852688436726e-05, + "loss": 0.5046, + "step": 15675 + }, + { + "epoch": 0.43042284459088415, + "grad_norm": 0.3929496705532074, + "learning_rate": 1.782158359172021e-05, + "loss": 0.5747, + "step": 15676 + }, + { + "epoch": 0.43045030203185064, + "grad_norm": 0.603589653968811, + "learning_rate": 1.782131448041395e-05, + "loss": 0.4462, + "step": 15677 + }, + { + "epoch": 0.43047775947281713, + "grad_norm": 0.4259941279888153, + "learning_rate": 1.7821045354518438e-05, + "loss": 0.4838, + "step": 15678 + }, + { + "epoch": 0.43050521691378363, + "grad_norm": 0.35709646344184875, + "learning_rate": 1.7820776214034182e-05, + "loss": 0.5316, + "step": 15679 + }, + { + "epoch": 0.4305326743547501, + "grad_norm": 0.41204413771629333, + "learning_rate": 1.782050705896168e-05, + "loss": 0.6177, + "step": 15680 + }, + { + "epoch": 0.4305601317957166, + "grad_norm": 0.38293275237083435, + "learning_rate": 1.782023788930144e-05, + "loss": 0.5499, + "step": 15681 + }, + { + "epoch": 0.43058758923668317, + "grad_norm": 0.4198997914791107, + "learning_rate": 1.7819968705053957e-05, + "loss": 0.4566, + "step": 15682 + }, + { + "epoch": 0.43061504667764966, + "grad_norm": 0.3612712025642395, + "learning_rate": 1.781969950621974e-05, + "loss": 0.5657, + "step": 15683 + }, + { + "epoch": 0.43064250411861615, + "grad_norm": 0.3902244567871094, + "learning_rate": 1.7819430292799286e-05, + "loss": 0.555, + "step": 15684 + }, + { + "epoch": 0.43066996155958265, + "grad_norm": 0.3809455931186676, + "learning_rate": 1.7819161064793098e-05, + "loss": 0.5291, + "step": 15685 + }, + { + "epoch": 0.43069741900054914, + "grad_norm": 0.46708962321281433, + "learning_rate": 1.781889182220168e-05, + "loss": 0.5517, + "step": 15686 + }, + { + "epoch": 0.43072487644151564, + "grad_norm": 0.3488490879535675, + "learning_rate": 1.7818622565025534e-05, + "loss": 0.5213, + "step": 15687 + }, + { + "epoch": 0.43075233388248213, + "grad_norm": 0.41209137439727783, + "learning_rate": 1.7818353293265158e-05, + "loss": 0.4224, + "step": 15688 + }, + { + "epoch": 0.4307797913234487, + "grad_norm": 0.45707350969314575, + "learning_rate": 1.7818084006921063e-05, + "loss": 0.6717, + "step": 15689 + }, + { + "epoch": 0.4308072487644152, + "grad_norm": 0.3382502794265747, + "learning_rate": 1.7817814705993738e-05, + "loss": 0.4591, + "step": 15690 + }, + { + "epoch": 0.43083470620538167, + "grad_norm": 0.3718818426132202, + "learning_rate": 1.7817545390483703e-05, + "loss": 0.5394, + "step": 15691 + }, + { + "epoch": 0.43086216364634816, + "grad_norm": 0.33813679218292236, + "learning_rate": 1.7817276060391444e-05, + "loss": 0.5399, + "step": 15692 + }, + { + "epoch": 0.43088962108731466, + "grad_norm": 0.45246621966362, + "learning_rate": 1.7817006715717472e-05, + "loss": 0.5457, + "step": 15693 + }, + { + "epoch": 0.43091707852828115, + "grad_norm": 0.38487163186073303, + "learning_rate": 1.781673735646229e-05, + "loss": 0.4991, + "step": 15694 + }, + { + "epoch": 0.43094453596924764, + "grad_norm": 0.41845324635505676, + "learning_rate": 1.7816467982626396e-05, + "loss": 0.5384, + "step": 15695 + }, + { + "epoch": 0.4309719934102142, + "grad_norm": 0.34164661169052124, + "learning_rate": 1.7816198594210294e-05, + "loss": 0.5523, + "step": 15696 + }, + { + "epoch": 0.4309994508511807, + "grad_norm": 0.37607812881469727, + "learning_rate": 1.781592919121449e-05, + "loss": 0.4913, + "step": 15697 + }, + { + "epoch": 0.4310269082921472, + "grad_norm": 0.3684045374393463, + "learning_rate": 1.7815659773639478e-05, + "loss": 0.5275, + "step": 15698 + }, + { + "epoch": 0.4310543657331137, + "grad_norm": 0.44977059960365295, + "learning_rate": 1.781539034148577e-05, + "loss": 0.515, + "step": 15699 + }, + { + "epoch": 0.43108182317408017, + "grad_norm": 0.3925391435623169, + "learning_rate": 1.7815120894753865e-05, + "loss": 0.5727, + "step": 15700 + }, + { + "epoch": 0.43110928061504666, + "grad_norm": 0.4505382478237152, + "learning_rate": 1.7814851433444263e-05, + "loss": 0.5379, + "step": 15701 + }, + { + "epoch": 0.43113673805601316, + "grad_norm": 0.3827342092990875, + "learning_rate": 1.781458195755747e-05, + "loss": 0.4273, + "step": 15702 + }, + { + "epoch": 0.4311641954969797, + "grad_norm": 0.39571887254714966, + "learning_rate": 1.7814312467093987e-05, + "loss": 0.491, + "step": 15703 + }, + { + "epoch": 0.4311916529379462, + "grad_norm": 0.3637155592441559, + "learning_rate": 1.7814042962054317e-05, + "loss": 0.5778, + "step": 15704 + }, + { + "epoch": 0.4312191103789127, + "grad_norm": 0.34734776616096497, + "learning_rate": 1.7813773442438968e-05, + "loss": 0.4339, + "step": 15705 + }, + { + "epoch": 0.4312465678198792, + "grad_norm": 0.39142775535583496, + "learning_rate": 1.781350390824843e-05, + "loss": 0.5682, + "step": 15706 + }, + { + "epoch": 0.4312740252608457, + "grad_norm": 0.4007240831851959, + "learning_rate": 1.7813234359483216e-05, + "loss": 0.5592, + "step": 15707 + }, + { + "epoch": 0.4313014827018122, + "grad_norm": 0.378548800945282, + "learning_rate": 1.7812964796143826e-05, + "loss": 0.5796, + "step": 15708 + }, + { + "epoch": 0.43132894014277867, + "grad_norm": 0.3980883061885834, + "learning_rate": 1.7812695218230764e-05, + "loss": 0.4332, + "step": 15709 + }, + { + "epoch": 0.4313563975837452, + "grad_norm": 0.3982264995574951, + "learning_rate": 1.781242562574453e-05, + "loss": 0.5949, + "step": 15710 + }, + { + "epoch": 0.4313838550247117, + "grad_norm": 0.3674687147140503, + "learning_rate": 1.7812156018685633e-05, + "loss": 0.5322, + "step": 15711 + }, + { + "epoch": 0.4314113124656782, + "grad_norm": 0.35201963782310486, + "learning_rate": 1.781188639705457e-05, + "loss": 0.5392, + "step": 15712 + }, + { + "epoch": 0.4314387699066447, + "grad_norm": 0.33073943853378296, + "learning_rate": 1.7811616760851845e-05, + "loss": 0.5223, + "step": 15713 + }, + { + "epoch": 0.4314662273476112, + "grad_norm": 0.35256484150886536, + "learning_rate": 1.781134711007796e-05, + "loss": 0.4886, + "step": 15714 + }, + { + "epoch": 0.4314936847885777, + "grad_norm": 0.39341309666633606, + "learning_rate": 1.7811077444733422e-05, + "loss": 0.5896, + "step": 15715 + }, + { + "epoch": 0.4315211422295442, + "grad_norm": 0.33381661772727966, + "learning_rate": 1.7810807764818728e-05, + "loss": 0.4581, + "step": 15716 + }, + { + "epoch": 0.43154859967051074, + "grad_norm": 0.3790580928325653, + "learning_rate": 1.781053807033439e-05, + "loss": 0.5242, + "step": 15717 + }, + { + "epoch": 0.43157605711147723, + "grad_norm": 0.3390536904335022, + "learning_rate": 1.78102683612809e-05, + "loss": 0.4294, + "step": 15718 + }, + { + "epoch": 0.4316035145524437, + "grad_norm": 0.3827509582042694, + "learning_rate": 1.780999863765877e-05, + "loss": 0.5301, + "step": 15719 + }, + { + "epoch": 0.4316309719934102, + "grad_norm": 0.41508498787879944, + "learning_rate": 1.78097288994685e-05, + "loss": 0.5467, + "step": 15720 + }, + { + "epoch": 0.4316584294343767, + "grad_norm": 0.3679489493370056, + "learning_rate": 1.7809459146710596e-05, + "loss": 0.4673, + "step": 15721 + }, + { + "epoch": 0.4316858868753432, + "grad_norm": 0.3585484027862549, + "learning_rate": 1.780918937938555e-05, + "loss": 0.5203, + "step": 15722 + }, + { + "epoch": 0.4317133443163097, + "grad_norm": 0.3760419189929962, + "learning_rate": 1.780891959749388e-05, + "loss": 0.5436, + "step": 15723 + }, + { + "epoch": 0.43174080175727625, + "grad_norm": 0.3591921031475067, + "learning_rate": 1.780864980103608e-05, + "loss": 0.4432, + "step": 15724 + }, + { + "epoch": 0.43176825919824274, + "grad_norm": 0.3487963080406189, + "learning_rate": 1.7808379990012657e-05, + "loss": 0.4889, + "step": 15725 + }, + { + "epoch": 0.43179571663920924, + "grad_norm": 0.3573169708251953, + "learning_rate": 1.7808110164424115e-05, + "loss": 0.4482, + "step": 15726 + }, + { + "epoch": 0.43182317408017573, + "grad_norm": 0.4218449294567108, + "learning_rate": 1.7807840324270953e-05, + "loss": 0.4779, + "step": 15727 + }, + { + "epoch": 0.4318506315211422, + "grad_norm": 0.4103866219520569, + "learning_rate": 1.7807570469553677e-05, + "loss": 0.58, + "step": 15728 + }, + { + "epoch": 0.4318780889621087, + "grad_norm": 1.0443987846374512, + "learning_rate": 1.780730060027279e-05, + "loss": 0.5899, + "step": 15729 + }, + { + "epoch": 0.4319055464030752, + "grad_norm": 0.43774178624153137, + "learning_rate": 1.7807030716428794e-05, + "loss": 0.5366, + "step": 15730 + }, + { + "epoch": 0.43193300384404176, + "grad_norm": 0.3651607036590576, + "learning_rate": 1.78067608180222e-05, + "loss": 0.56, + "step": 15731 + }, + { + "epoch": 0.43196046128500826, + "grad_norm": 0.37602555751800537, + "learning_rate": 1.78064909050535e-05, + "loss": 0.5628, + "step": 15732 + }, + { + "epoch": 0.43198791872597475, + "grad_norm": 0.3664279282093048, + "learning_rate": 1.7806220977523205e-05, + "loss": 0.5677, + "step": 15733 + }, + { + "epoch": 0.43201537616694125, + "grad_norm": 0.36824747920036316, + "learning_rate": 1.7805951035431813e-05, + "loss": 0.4528, + "step": 15734 + }, + { + "epoch": 0.43204283360790774, + "grad_norm": 0.5187387466430664, + "learning_rate": 1.7805681078779833e-05, + "loss": 0.4766, + "step": 15735 + }, + { + "epoch": 0.43207029104887423, + "grad_norm": 0.348093718290329, + "learning_rate": 1.7805411107567767e-05, + "loss": 0.5126, + "step": 15736 + }, + { + "epoch": 0.4320977484898407, + "grad_norm": 0.3847460448741913, + "learning_rate": 1.780514112179612e-05, + "loss": 0.5325, + "step": 15737 + }, + { + "epoch": 0.4321252059308072, + "grad_norm": 0.40005144476890564, + "learning_rate": 1.7804871121465392e-05, + "loss": 0.4937, + "step": 15738 + }, + { + "epoch": 0.43215266337177377, + "grad_norm": 0.37596601247787476, + "learning_rate": 1.7804601106576086e-05, + "loss": 0.4866, + "step": 15739 + }, + { + "epoch": 0.43218012081274026, + "grad_norm": 0.33173397183418274, + "learning_rate": 1.7804331077128712e-05, + "loss": 0.4568, + "step": 15740 + }, + { + "epoch": 0.43220757825370676, + "grad_norm": 0.376644492149353, + "learning_rate": 1.7804061033123767e-05, + "loss": 0.5061, + "step": 15741 + }, + { + "epoch": 0.43223503569467325, + "grad_norm": 0.3960675597190857, + "learning_rate": 1.780379097456176e-05, + "loss": 0.5436, + "step": 15742 + }, + { + "epoch": 0.43226249313563975, + "grad_norm": 0.39394718408584595, + "learning_rate": 1.780352090144319e-05, + "loss": 0.5307, + "step": 15743 + }, + { + "epoch": 0.43228995057660624, + "grad_norm": 0.4712206721305847, + "learning_rate": 1.7803250813768563e-05, + "loss": 0.5065, + "step": 15744 + }, + { + "epoch": 0.43231740801757274, + "grad_norm": 0.44236811995506287, + "learning_rate": 1.7802980711538383e-05, + "loss": 0.6437, + "step": 15745 + }, + { + "epoch": 0.4323448654585393, + "grad_norm": 0.4044110178947449, + "learning_rate": 1.7802710594753155e-05, + "loss": 0.5749, + "step": 15746 + }, + { + "epoch": 0.4323723228995058, + "grad_norm": 0.3664276599884033, + "learning_rate": 1.7802440463413376e-05, + "loss": 0.4981, + "step": 15747 + }, + { + "epoch": 0.4323997803404723, + "grad_norm": 0.4061742424964905, + "learning_rate": 1.7802170317519562e-05, + "loss": 0.5459, + "step": 15748 + }, + { + "epoch": 0.43242723778143877, + "grad_norm": 0.4023115038871765, + "learning_rate": 1.7801900157072207e-05, + "loss": 0.5721, + "step": 15749 + }, + { + "epoch": 0.43245469522240526, + "grad_norm": 0.4223284423351288, + "learning_rate": 1.7801629982071817e-05, + "loss": 0.4946, + "step": 15750 + }, + { + "epoch": 0.43248215266337175, + "grad_norm": 0.40260612964630127, + "learning_rate": 1.78013597925189e-05, + "loss": 0.5203, + "step": 15751 + }, + { + "epoch": 0.43250961010433825, + "grad_norm": 0.38852378726005554, + "learning_rate": 1.7801089588413956e-05, + "loss": 0.5835, + "step": 15752 + }, + { + "epoch": 0.4325370675453048, + "grad_norm": 0.3788832128047943, + "learning_rate": 1.780081936975749e-05, + "loss": 0.5258, + "step": 15753 + }, + { + "epoch": 0.4325645249862713, + "grad_norm": 0.39573419094085693, + "learning_rate": 1.7800549136550006e-05, + "loss": 0.4873, + "step": 15754 + }, + { + "epoch": 0.4325919824272378, + "grad_norm": 0.35904091596603394, + "learning_rate": 1.780027888879201e-05, + "loss": 0.5215, + "step": 15755 + }, + { + "epoch": 0.4326194398682043, + "grad_norm": 0.40415456891059875, + "learning_rate": 1.7800008626484004e-05, + "loss": 0.5075, + "step": 15756 + }, + { + "epoch": 0.4326468973091708, + "grad_norm": 0.4173988699913025, + "learning_rate": 1.779973834962649e-05, + "loss": 0.6431, + "step": 15757 + }, + { + "epoch": 0.43267435475013727, + "grad_norm": 0.4011816084384918, + "learning_rate": 1.7799468058219973e-05, + "loss": 0.4507, + "step": 15758 + }, + { + "epoch": 0.43270181219110376, + "grad_norm": 0.36061280965805054, + "learning_rate": 1.7799197752264962e-05, + "loss": 0.4138, + "step": 15759 + }, + { + "epoch": 0.4327292696320703, + "grad_norm": 0.4089283347129822, + "learning_rate": 1.7798927431761957e-05, + "loss": 0.4733, + "step": 15760 + }, + { + "epoch": 0.4327567270730368, + "grad_norm": 0.3595152795314789, + "learning_rate": 1.7798657096711466e-05, + "loss": 0.5674, + "step": 15761 + }, + { + "epoch": 0.4327841845140033, + "grad_norm": 0.3656414747238159, + "learning_rate": 1.7798386747113987e-05, + "loss": 0.5041, + "step": 15762 + }, + { + "epoch": 0.4328116419549698, + "grad_norm": 0.3366455137729645, + "learning_rate": 1.7798116382970028e-05, + "loss": 0.4678, + "step": 15763 + }, + { + "epoch": 0.4328390993959363, + "grad_norm": 0.37512555718421936, + "learning_rate": 1.779784600428009e-05, + "loss": 0.5195, + "step": 15764 + }, + { + "epoch": 0.4328665568369028, + "grad_norm": 0.3332734704017639, + "learning_rate": 1.7797575611044688e-05, + "loss": 0.5018, + "step": 15765 + }, + { + "epoch": 0.4328940142778693, + "grad_norm": 0.31853818893432617, + "learning_rate": 1.7797305203264316e-05, + "loss": 0.4782, + "step": 15766 + }, + { + "epoch": 0.4329214717188358, + "grad_norm": 0.4094507098197937, + "learning_rate": 1.7797034780939477e-05, + "loss": 0.5251, + "step": 15767 + }, + { + "epoch": 0.4329489291598023, + "grad_norm": 0.37258896231651306, + "learning_rate": 1.7796764344070686e-05, + "loss": 0.4758, + "step": 15768 + }, + { + "epoch": 0.4329763866007688, + "grad_norm": 0.3203381299972534, + "learning_rate": 1.7796493892658437e-05, + "loss": 0.4703, + "step": 15769 + }, + { + "epoch": 0.4330038440417353, + "grad_norm": 0.38322409987449646, + "learning_rate": 1.779622342670324e-05, + "loss": 0.4961, + "step": 15770 + }, + { + "epoch": 0.4330313014827018, + "grad_norm": 0.36165711283683777, + "learning_rate": 1.7795952946205597e-05, + "loss": 0.486, + "step": 15771 + }, + { + "epoch": 0.4330587589236683, + "grad_norm": 0.41877514123916626, + "learning_rate": 1.779568245116601e-05, + "loss": 0.4868, + "step": 15772 + }, + { + "epoch": 0.4330862163646348, + "grad_norm": 0.3353801965713501, + "learning_rate": 1.7795411941584995e-05, + "loss": 0.4253, + "step": 15773 + }, + { + "epoch": 0.43311367380560134, + "grad_norm": 0.3681931495666504, + "learning_rate": 1.7795141417463045e-05, + "loss": 0.4948, + "step": 15774 + }, + { + "epoch": 0.43314113124656783, + "grad_norm": 0.38289928436279297, + "learning_rate": 1.7794870878800666e-05, + "loss": 0.5024, + "step": 15775 + }, + { + "epoch": 0.43316858868753433, + "grad_norm": 0.3968794047832489, + "learning_rate": 1.779460032559837e-05, + "loss": 0.4581, + "step": 15776 + }, + { + "epoch": 0.4331960461285008, + "grad_norm": 0.360524445772171, + "learning_rate": 1.779432975785665e-05, + "loss": 0.4894, + "step": 15777 + }, + { + "epoch": 0.4332235035694673, + "grad_norm": 0.4136369824409485, + "learning_rate": 1.7794059175576022e-05, + "loss": 0.5057, + "step": 15778 + }, + { + "epoch": 0.4332509610104338, + "grad_norm": 0.34836655855178833, + "learning_rate": 1.779378857875699e-05, + "loss": 0.5076, + "step": 15779 + }, + { + "epoch": 0.4332784184514003, + "grad_norm": 0.537964940071106, + "learning_rate": 1.779351796740005e-05, + "loss": 0.4526, + "step": 15780 + }, + { + "epoch": 0.43330587589236685, + "grad_norm": 0.4050541818141937, + "learning_rate": 1.779324734150571e-05, + "loss": 0.5703, + "step": 15781 + }, + { + "epoch": 0.43333333333333335, + "grad_norm": 0.41144153475761414, + "learning_rate": 1.7792976701074478e-05, + "loss": 0.6084, + "step": 15782 + }, + { + "epoch": 0.43336079077429984, + "grad_norm": 0.3459450900554657, + "learning_rate": 1.7792706046106856e-05, + "loss": 0.4806, + "step": 15783 + }, + { + "epoch": 0.43338824821526634, + "grad_norm": 0.4050538241863251, + "learning_rate": 1.7792435376603352e-05, + "loss": 0.4685, + "step": 15784 + }, + { + "epoch": 0.43341570565623283, + "grad_norm": 0.3708988428115845, + "learning_rate": 1.779216469256447e-05, + "loss": 0.5081, + "step": 15785 + }, + { + "epoch": 0.4334431630971993, + "grad_norm": 0.38712260127067566, + "learning_rate": 1.779189399399071e-05, + "loss": 0.5654, + "step": 15786 + }, + { + "epoch": 0.4334706205381658, + "grad_norm": 0.37014782428741455, + "learning_rate": 1.7791623280882584e-05, + "loss": 0.4849, + "step": 15787 + }, + { + "epoch": 0.43349807797913237, + "grad_norm": 0.34224873781204224, + "learning_rate": 1.7791352553240592e-05, + "loss": 0.5033, + "step": 15788 + }, + { + "epoch": 0.43352553542009886, + "grad_norm": 0.36535584926605225, + "learning_rate": 1.7791081811065243e-05, + "loss": 0.5386, + "step": 15789 + }, + { + "epoch": 0.43355299286106536, + "grad_norm": 0.36339256167411804, + "learning_rate": 1.7790811054357038e-05, + "loss": 0.5052, + "step": 15790 + }, + { + "epoch": 0.43358045030203185, + "grad_norm": 0.366231232881546, + "learning_rate": 1.7790540283116485e-05, + "loss": 0.437, + "step": 15791 + }, + { + "epoch": 0.43360790774299834, + "grad_norm": 0.3615324795246124, + "learning_rate": 1.7790269497344085e-05, + "loss": 0.494, + "step": 15792 + }, + { + "epoch": 0.43363536518396484, + "grad_norm": 0.43290185928344727, + "learning_rate": 1.7789998697040348e-05, + "loss": 0.5271, + "step": 15793 + }, + { + "epoch": 0.43366282262493133, + "grad_norm": 0.39158308506011963, + "learning_rate": 1.7789727882205778e-05, + "loss": 0.5931, + "step": 15794 + }, + { + "epoch": 0.4336902800658979, + "grad_norm": 0.3939250111579895, + "learning_rate": 1.778945705284088e-05, + "loss": 0.5306, + "step": 15795 + }, + { + "epoch": 0.4337177375068644, + "grad_norm": 0.3552763760089874, + "learning_rate": 1.7789186208946156e-05, + "loss": 0.5332, + "step": 15796 + }, + { + "epoch": 0.43374519494783087, + "grad_norm": 0.42389288544654846, + "learning_rate": 1.7788915350522115e-05, + "loss": 0.5166, + "step": 15797 + }, + { + "epoch": 0.43377265238879736, + "grad_norm": 0.34257790446281433, + "learning_rate": 1.7788644477569263e-05, + "loss": 0.532, + "step": 15798 + }, + { + "epoch": 0.43380010982976386, + "grad_norm": 0.4072161018848419, + "learning_rate": 1.7788373590088098e-05, + "loss": 0.58, + "step": 15799 + }, + { + "epoch": 0.43382756727073035, + "grad_norm": 0.3685208261013031, + "learning_rate": 1.7788102688079133e-05, + "loss": 0.5027, + "step": 15800 + }, + { + "epoch": 0.43385502471169685, + "grad_norm": 0.3383324444293976, + "learning_rate": 1.778783177154287e-05, + "loss": 0.5142, + "step": 15801 + }, + { + "epoch": 0.4338824821526634, + "grad_norm": 0.3743751347064972, + "learning_rate": 1.7787560840479822e-05, + "loss": 0.5313, + "step": 15802 + }, + { + "epoch": 0.4339099395936299, + "grad_norm": 0.3608703911304474, + "learning_rate": 1.778728989489048e-05, + "loss": 0.5412, + "step": 15803 + }, + { + "epoch": 0.4339373970345964, + "grad_norm": 0.4003990590572357, + "learning_rate": 1.778701893477536e-05, + "loss": 0.4596, + "step": 15804 + }, + { + "epoch": 0.4339648544755629, + "grad_norm": 0.34891417622566223, + "learning_rate": 1.7786747960134962e-05, + "loss": 0.5037, + "step": 15805 + }, + { + "epoch": 0.43399231191652937, + "grad_norm": 0.3308875858783722, + "learning_rate": 1.7786476970969796e-05, + "loss": 0.5438, + "step": 15806 + }, + { + "epoch": 0.43401976935749587, + "grad_norm": 0.4000769555568695, + "learning_rate": 1.7786205967280363e-05, + "loss": 0.5221, + "step": 15807 + }, + { + "epoch": 0.43404722679846236, + "grad_norm": 0.3394167721271515, + "learning_rate": 1.7785934949067175e-05, + "loss": 0.4068, + "step": 15808 + }, + { + "epoch": 0.4340746842394289, + "grad_norm": 0.42152225971221924, + "learning_rate": 1.778566391633073e-05, + "loss": 0.483, + "step": 15809 + }, + { + "epoch": 0.4341021416803954, + "grad_norm": 0.3226282000541687, + "learning_rate": 1.7785392869071537e-05, + "loss": 0.4935, + "step": 15810 + }, + { + "epoch": 0.4341295991213619, + "grad_norm": 0.41348370909690857, + "learning_rate": 1.77851218072901e-05, + "loss": 0.656, + "step": 15811 + }, + { + "epoch": 0.4341570565623284, + "grad_norm": 0.4105367660522461, + "learning_rate": 1.778485073098693e-05, + "loss": 0.499, + "step": 15812 + }, + { + "epoch": 0.4341845140032949, + "grad_norm": 0.35402336716651917, + "learning_rate": 1.7784579640162526e-05, + "loss": 0.4048, + "step": 15813 + }, + { + "epoch": 0.4342119714442614, + "grad_norm": 0.3282882571220398, + "learning_rate": 1.7784308534817396e-05, + "loss": 0.5468, + "step": 15814 + }, + { + "epoch": 0.4342394288852279, + "grad_norm": 0.37348800897598267, + "learning_rate": 1.7784037414952048e-05, + "loss": 0.5173, + "step": 15815 + }, + { + "epoch": 0.4342668863261944, + "grad_norm": 0.3790699541568756, + "learning_rate": 1.7783766280566986e-05, + "loss": 0.5527, + "step": 15816 + }, + { + "epoch": 0.4342943437671609, + "grad_norm": 0.4281144142150879, + "learning_rate": 1.7783495131662713e-05, + "loss": 0.6128, + "step": 15817 + }, + { + "epoch": 0.4343218012081274, + "grad_norm": 0.3457939624786377, + "learning_rate": 1.778322396823974e-05, + "loss": 0.497, + "step": 15818 + }, + { + "epoch": 0.4343492586490939, + "grad_norm": 0.3558986783027649, + "learning_rate": 1.778295279029857e-05, + "loss": 0.5087, + "step": 15819 + }, + { + "epoch": 0.4343767160900604, + "grad_norm": 0.3898905813694, + "learning_rate": 1.778268159783971e-05, + "loss": 0.5652, + "step": 15820 + }, + { + "epoch": 0.4344041735310269, + "grad_norm": 0.4207228422164917, + "learning_rate": 1.7782410390863664e-05, + "loss": 0.542, + "step": 15821 + }, + { + "epoch": 0.4344316309719934, + "grad_norm": 0.3639105558395386, + "learning_rate": 1.7782139169370937e-05, + "loss": 0.5587, + "step": 15822 + }, + { + "epoch": 0.43445908841295994, + "grad_norm": 0.3443910777568817, + "learning_rate": 1.7781867933362037e-05, + "loss": 0.4582, + "step": 15823 + }, + { + "epoch": 0.43448654585392643, + "grad_norm": 0.3996908366680145, + "learning_rate": 1.778159668283747e-05, + "loss": 0.5949, + "step": 15824 + }, + { + "epoch": 0.4345140032948929, + "grad_norm": 0.3983134329319, + "learning_rate": 1.778132541779774e-05, + "loss": 0.4831, + "step": 15825 + }, + { + "epoch": 0.4345414607358594, + "grad_norm": 0.5080505013465881, + "learning_rate": 1.778105413824336e-05, + "loss": 0.5284, + "step": 15826 + }, + { + "epoch": 0.4345689181768259, + "grad_norm": 0.3386547267436981, + "learning_rate": 1.7780782844174823e-05, + "loss": 0.4783, + "step": 15827 + }, + { + "epoch": 0.4345963756177924, + "grad_norm": 0.3632126748561859, + "learning_rate": 1.7780511535592648e-05, + "loss": 0.5375, + "step": 15828 + }, + { + "epoch": 0.4346238330587589, + "grad_norm": 0.42568525671958923, + "learning_rate": 1.7780240212497336e-05, + "loss": 0.4829, + "step": 15829 + }, + { + "epoch": 0.43465129049972545, + "grad_norm": 0.3632119297981262, + "learning_rate": 1.777996887488939e-05, + "loss": 0.4436, + "step": 15830 + }, + { + "epoch": 0.43467874794069195, + "grad_norm": 0.3579495847225189, + "learning_rate": 1.777969752276932e-05, + "loss": 0.5076, + "step": 15831 + }, + { + "epoch": 0.43470620538165844, + "grad_norm": 0.4616347849369049, + "learning_rate": 1.777942615613763e-05, + "loss": 0.4859, + "step": 15832 + }, + { + "epoch": 0.43473366282262493, + "grad_norm": 0.3953580856323242, + "learning_rate": 1.777915477499483e-05, + "loss": 0.4798, + "step": 15833 + }, + { + "epoch": 0.4347611202635914, + "grad_norm": 0.4218102693557739, + "learning_rate": 1.7778883379341422e-05, + "loss": 0.5308, + "step": 15834 + }, + { + "epoch": 0.4347885777045579, + "grad_norm": 0.43501290678977966, + "learning_rate": 1.7778611969177913e-05, + "loss": 0.5512, + "step": 15835 + }, + { + "epoch": 0.4348160351455244, + "grad_norm": 0.35984867811203003, + "learning_rate": 1.777834054450481e-05, + "loss": 0.5118, + "step": 15836 + }, + { + "epoch": 0.43484349258649097, + "grad_norm": 0.3327392339706421, + "learning_rate": 1.777806910532262e-05, + "loss": 0.507, + "step": 15837 + }, + { + "epoch": 0.43487095002745746, + "grad_norm": 0.36942344903945923, + "learning_rate": 1.777779765163185e-05, + "loss": 0.5255, + "step": 15838 + }, + { + "epoch": 0.43489840746842395, + "grad_norm": 0.3470667600631714, + "learning_rate": 1.7777526183433006e-05, + "loss": 0.534, + "step": 15839 + }, + { + "epoch": 0.43492586490939045, + "grad_norm": 0.3204260468482971, + "learning_rate": 1.777725470072659e-05, + "loss": 0.3954, + "step": 15840 + }, + { + "epoch": 0.43495332235035694, + "grad_norm": 0.382302850484848, + "learning_rate": 1.7776983203513113e-05, + "loss": 0.5311, + "step": 15841 + }, + { + "epoch": 0.43498077979132344, + "grad_norm": 0.37130507826805115, + "learning_rate": 1.7776711691793084e-05, + "loss": 0.594, + "step": 15842 + }, + { + "epoch": 0.43500823723228993, + "grad_norm": 0.38654929399490356, + "learning_rate": 1.7776440165567e-05, + "loss": 0.5264, + "step": 15843 + }, + { + "epoch": 0.4350356946732565, + "grad_norm": 0.3921875059604645, + "learning_rate": 1.7776168624835375e-05, + "loss": 0.454, + "step": 15844 + }, + { + "epoch": 0.435063152114223, + "grad_norm": 0.35898104310035706, + "learning_rate": 1.7775897069598714e-05, + "loss": 0.5018, + "step": 15845 + }, + { + "epoch": 0.43509060955518947, + "grad_norm": 0.39647793769836426, + "learning_rate": 1.7775625499857522e-05, + "loss": 0.5282, + "step": 15846 + }, + { + "epoch": 0.43511806699615596, + "grad_norm": 0.41702693700790405, + "learning_rate": 1.7775353915612306e-05, + "loss": 0.6002, + "step": 15847 + }, + { + "epoch": 0.43514552443712246, + "grad_norm": 0.3622141182422638, + "learning_rate": 1.777508231686358e-05, + "loss": 0.5222, + "step": 15848 + }, + { + "epoch": 0.43517298187808895, + "grad_norm": 0.3850437104701996, + "learning_rate": 1.7774810703611836e-05, + "loss": 0.5773, + "step": 15849 + }, + { + "epoch": 0.43520043931905544, + "grad_norm": 0.34381839632987976, + "learning_rate": 1.777453907585759e-05, + "loss": 0.478, + "step": 15850 + }, + { + "epoch": 0.435227896760022, + "grad_norm": 0.3511781394481659, + "learning_rate": 1.777426743360135e-05, + "loss": 0.5042, + "step": 15851 + }, + { + "epoch": 0.4352553542009885, + "grad_norm": 0.3984534740447998, + "learning_rate": 1.777399577684362e-05, + "loss": 0.5013, + "step": 15852 + }, + { + "epoch": 0.435282811641955, + "grad_norm": 0.4096635580062866, + "learning_rate": 1.7773724105584908e-05, + "loss": 0.5068, + "step": 15853 + }, + { + "epoch": 0.4353102690829215, + "grad_norm": 0.4613991677761078, + "learning_rate": 1.7773452419825714e-05, + "loss": 0.5379, + "step": 15854 + }, + { + "epoch": 0.43533772652388797, + "grad_norm": 0.4078718423843384, + "learning_rate": 1.7773180719566554e-05, + "loss": 0.5974, + "step": 15855 + }, + { + "epoch": 0.43536518396485446, + "grad_norm": 0.3718605041503906, + "learning_rate": 1.777290900480793e-05, + "loss": 0.4965, + "step": 15856 + }, + { + "epoch": 0.43539264140582096, + "grad_norm": 0.3643260896205902, + "learning_rate": 1.777263727555035e-05, + "loss": 0.5187, + "step": 15857 + }, + { + "epoch": 0.4354200988467875, + "grad_norm": 0.36632901430130005, + "learning_rate": 1.777236553179432e-05, + "loss": 0.5217, + "step": 15858 + }, + { + "epoch": 0.435447556287754, + "grad_norm": 0.3820452094078064, + "learning_rate": 1.777209377354035e-05, + "loss": 0.4462, + "step": 15859 + }, + { + "epoch": 0.4354750137287205, + "grad_norm": 0.42883503437042236, + "learning_rate": 1.7771822000788942e-05, + "loss": 0.4884, + "step": 15860 + }, + { + "epoch": 0.435502471169687, + "grad_norm": 0.42689090967178345, + "learning_rate": 1.777155021354061e-05, + "loss": 0.4985, + "step": 15861 + }, + { + "epoch": 0.4355299286106535, + "grad_norm": 0.34809309244155884, + "learning_rate": 1.777127841179585e-05, + "loss": 0.4945, + "step": 15862 + }, + { + "epoch": 0.43555738605162, + "grad_norm": 0.3639553189277649, + "learning_rate": 1.7771006595555177e-05, + "loss": 0.4359, + "step": 15863 + }, + { + "epoch": 0.43558484349258647, + "grad_norm": 0.3971739113330841, + "learning_rate": 1.77707347648191e-05, + "loss": 0.6052, + "step": 15864 + }, + { + "epoch": 0.435612300933553, + "grad_norm": 0.39819633960723877, + "learning_rate": 1.777046291958812e-05, + "loss": 0.5129, + "step": 15865 + }, + { + "epoch": 0.4356397583745195, + "grad_norm": 0.3583029508590698, + "learning_rate": 1.7770191059862746e-05, + "loss": 0.4252, + "step": 15866 + }, + { + "epoch": 0.435667215815486, + "grad_norm": 0.32921040058135986, + "learning_rate": 1.776991918564349e-05, + "loss": 0.4538, + "step": 15867 + }, + { + "epoch": 0.4356946732564525, + "grad_norm": 0.4123268723487854, + "learning_rate": 1.776964729693085e-05, + "loss": 0.5082, + "step": 15868 + }, + { + "epoch": 0.435722130697419, + "grad_norm": 0.37465164065361023, + "learning_rate": 1.776937539372534e-05, + "loss": 0.5876, + "step": 15869 + }, + { + "epoch": 0.4357495881383855, + "grad_norm": 0.3566734194755554, + "learning_rate": 1.7769103476027465e-05, + "loss": 0.487, + "step": 15870 + }, + { + "epoch": 0.435777045579352, + "grad_norm": 0.35565823316574097, + "learning_rate": 1.7768831543837734e-05, + "loss": 0.5222, + "step": 15871 + }, + { + "epoch": 0.4358045030203185, + "grad_norm": 0.3792523145675659, + "learning_rate": 1.7768559597156648e-05, + "loss": 0.4595, + "step": 15872 + }, + { + "epoch": 0.43583196046128503, + "grad_norm": 0.33798009157180786, + "learning_rate": 1.7768287635984722e-05, + "loss": 0.5186, + "step": 15873 + }, + { + "epoch": 0.4358594179022515, + "grad_norm": 0.38798460364341736, + "learning_rate": 1.776801566032246e-05, + "loss": 0.5179, + "step": 15874 + }, + { + "epoch": 0.435886875343218, + "grad_norm": 0.4192604124546051, + "learning_rate": 1.776774367017037e-05, + "loss": 0.5442, + "step": 15875 + }, + { + "epoch": 0.4359143327841845, + "grad_norm": 0.4031027853488922, + "learning_rate": 1.7767471665528958e-05, + "loss": 0.4791, + "step": 15876 + }, + { + "epoch": 0.435941790225151, + "grad_norm": 0.3764681816101074, + "learning_rate": 1.7767199646398735e-05, + "loss": 0.5617, + "step": 15877 + }, + { + "epoch": 0.4359692476661175, + "grad_norm": 0.5068613886833191, + "learning_rate": 1.7766927612780204e-05, + "loss": 0.5039, + "step": 15878 + }, + { + "epoch": 0.435996705107084, + "grad_norm": 0.34902673959732056, + "learning_rate": 1.7766655564673875e-05, + "loss": 0.5218, + "step": 15879 + }, + { + "epoch": 0.43602416254805054, + "grad_norm": 0.3677148222923279, + "learning_rate": 1.776638350208025e-05, + "loss": 0.533, + "step": 15880 + }, + { + "epoch": 0.43605161998901704, + "grad_norm": 0.3835904598236084, + "learning_rate": 1.7766111424999844e-05, + "loss": 0.4955, + "step": 15881 + }, + { + "epoch": 0.43607907742998353, + "grad_norm": 0.3737671673297882, + "learning_rate": 1.7765839333433163e-05, + "loss": 0.4889, + "step": 15882 + }, + { + "epoch": 0.43610653487095, + "grad_norm": 0.379873663187027, + "learning_rate": 1.776556722738071e-05, + "loss": 0.5123, + "step": 15883 + }, + { + "epoch": 0.4361339923119165, + "grad_norm": 0.3733636736869812, + "learning_rate": 1.7765295106843e-05, + "loss": 0.6575, + "step": 15884 + }, + { + "epoch": 0.436161449752883, + "grad_norm": 0.3931286633014679, + "learning_rate": 1.776502297182053e-05, + "loss": 0.4399, + "step": 15885 + }, + { + "epoch": 0.4361889071938495, + "grad_norm": 0.3562195897102356, + "learning_rate": 1.776475082231382e-05, + "loss": 0.5434, + "step": 15886 + }, + { + "epoch": 0.43621636463481606, + "grad_norm": 0.37582406401634216, + "learning_rate": 1.7764478658323367e-05, + "loss": 0.48, + "step": 15887 + }, + { + "epoch": 0.43624382207578255, + "grad_norm": 0.38222822546958923, + "learning_rate": 1.776420647984969e-05, + "loss": 0.448, + "step": 15888 + }, + { + "epoch": 0.43627127951674904, + "grad_norm": 0.3018167018890381, + "learning_rate": 1.7763934286893282e-05, + "loss": 0.4426, + "step": 15889 + }, + { + "epoch": 0.43629873695771554, + "grad_norm": 0.41410377621650696, + "learning_rate": 1.7763662079454662e-05, + "loss": 0.5309, + "step": 15890 + }, + { + "epoch": 0.43632619439868203, + "grad_norm": 0.39254194498062134, + "learning_rate": 1.7763389857534333e-05, + "loss": 0.632, + "step": 15891 + }, + { + "epoch": 0.4363536518396485, + "grad_norm": 0.35128727555274963, + "learning_rate": 1.7763117621132803e-05, + "loss": 0.6042, + "step": 15892 + }, + { + "epoch": 0.436381109280615, + "grad_norm": 0.3649732172489166, + "learning_rate": 1.7762845370250585e-05, + "loss": 0.5337, + "step": 15893 + }, + { + "epoch": 0.43640856672158157, + "grad_norm": 0.3686985373497009, + "learning_rate": 1.7762573104888182e-05, + "loss": 0.5088, + "step": 15894 + }, + { + "epoch": 0.43643602416254806, + "grad_norm": 0.3927343487739563, + "learning_rate": 1.77623008250461e-05, + "loss": 0.5556, + "step": 15895 + }, + { + "epoch": 0.43646348160351456, + "grad_norm": 0.4350680410861969, + "learning_rate": 1.776202853072485e-05, + "loss": 0.5117, + "step": 15896 + }, + { + "epoch": 0.43649093904448105, + "grad_norm": 0.44280120730400085, + "learning_rate": 1.7761756221924942e-05, + "loss": 0.5189, + "step": 15897 + }, + { + "epoch": 0.43651839648544755, + "grad_norm": 0.3498659133911133, + "learning_rate": 1.776148389864688e-05, + "loss": 0.4527, + "step": 15898 + }, + { + "epoch": 0.43654585392641404, + "grad_norm": 0.4022500514984131, + "learning_rate": 1.7761211560891173e-05, + "loss": 0.5029, + "step": 15899 + }, + { + "epoch": 0.43657331136738053, + "grad_norm": 0.35254722833633423, + "learning_rate": 1.776093920865833e-05, + "loss": 0.5989, + "step": 15900 + }, + { + "epoch": 0.4366007688083471, + "grad_norm": 0.3519185483455658, + "learning_rate": 1.7760666841948857e-05, + "loss": 0.5858, + "step": 15901 + }, + { + "epoch": 0.4366282262493136, + "grad_norm": 0.37374967336654663, + "learning_rate": 1.7760394460763264e-05, + "loss": 0.4929, + "step": 15902 + }, + { + "epoch": 0.43665568369028007, + "grad_norm": 0.37551864981651306, + "learning_rate": 1.7760122065102058e-05, + "loss": 0.5283, + "step": 15903 + }, + { + "epoch": 0.43668314113124657, + "grad_norm": 0.341462641954422, + "learning_rate": 1.775984965496575e-05, + "loss": 0.4672, + "step": 15904 + }, + { + "epoch": 0.43671059857221306, + "grad_norm": 0.4056234359741211, + "learning_rate": 1.7759577230354844e-05, + "loss": 0.5502, + "step": 15905 + }, + { + "epoch": 0.43673805601317955, + "grad_norm": 0.36891016364097595, + "learning_rate": 1.775930479126985e-05, + "loss": 0.4921, + "step": 15906 + }, + { + "epoch": 0.43676551345414605, + "grad_norm": 0.36526918411254883, + "learning_rate": 1.7759032337711275e-05, + "loss": 0.519, + "step": 15907 + }, + { + "epoch": 0.4367929708951126, + "grad_norm": 0.31434789299964905, + "learning_rate": 1.7758759869679628e-05, + "loss": 0.4556, + "step": 15908 + }, + { + "epoch": 0.4368204283360791, + "grad_norm": 0.40769606828689575, + "learning_rate": 1.775848738717542e-05, + "loss": 0.5344, + "step": 15909 + }, + { + "epoch": 0.4368478857770456, + "grad_norm": 0.3707984685897827, + "learning_rate": 1.7758214890199156e-05, + "loss": 0.5645, + "step": 15910 + }, + { + "epoch": 0.4368753432180121, + "grad_norm": 0.3611975908279419, + "learning_rate": 1.7757942378751345e-05, + "loss": 0.5132, + "step": 15911 + }, + { + "epoch": 0.4369028006589786, + "grad_norm": 0.3599609136581421, + "learning_rate": 1.7757669852832493e-05, + "loss": 0.5169, + "step": 15912 + }, + { + "epoch": 0.43693025809994507, + "grad_norm": 0.33238449692726135, + "learning_rate": 1.7757397312443118e-05, + "loss": 0.4484, + "step": 15913 + }, + { + "epoch": 0.43695771554091156, + "grad_norm": 0.37518590688705444, + "learning_rate": 1.7757124757583712e-05, + "loss": 0.5403, + "step": 15914 + }, + { + "epoch": 0.4369851729818781, + "grad_norm": 0.5720561146736145, + "learning_rate": 1.77568521882548e-05, + "loss": 0.4525, + "step": 15915 + }, + { + "epoch": 0.4370126304228446, + "grad_norm": 0.3296835124492645, + "learning_rate": 1.7756579604456878e-05, + "loss": 0.5188, + "step": 15916 + }, + { + "epoch": 0.4370400878638111, + "grad_norm": 0.3669154644012451, + "learning_rate": 1.775630700619046e-05, + "loss": 0.5832, + "step": 15917 + }, + { + "epoch": 0.4370675453047776, + "grad_norm": 0.4243949055671692, + "learning_rate": 1.7756034393456057e-05, + "loss": 0.5369, + "step": 15918 + }, + { + "epoch": 0.4370950027457441, + "grad_norm": 0.3647055923938751, + "learning_rate": 1.775576176625417e-05, + "loss": 0.4694, + "step": 15919 + }, + { + "epoch": 0.4371224601867106, + "grad_norm": 0.3840551972389221, + "learning_rate": 1.7755489124585315e-05, + "loss": 0.6116, + "step": 15920 + }, + { + "epoch": 0.4371499176276771, + "grad_norm": 0.4129382371902466, + "learning_rate": 1.7755216468449995e-05, + "loss": 0.5035, + "step": 15921 + }, + { + "epoch": 0.4371773750686436, + "grad_norm": 0.40011605620384216, + "learning_rate": 1.7754943797848725e-05, + "loss": 0.5375, + "step": 15922 + }, + { + "epoch": 0.4372048325096101, + "grad_norm": 0.408597856760025, + "learning_rate": 1.775467111278201e-05, + "loss": 0.5139, + "step": 15923 + }, + { + "epoch": 0.4372322899505766, + "grad_norm": 0.40908995270729065, + "learning_rate": 1.7754398413250355e-05, + "loss": 0.5248, + "step": 15924 + }, + { + "epoch": 0.4372597473915431, + "grad_norm": 0.362725168466568, + "learning_rate": 1.7754125699254272e-05, + "loss": 0.4925, + "step": 15925 + }, + { + "epoch": 0.4372872048325096, + "grad_norm": 0.3752734363079071, + "learning_rate": 1.775385297079427e-05, + "loss": 0.4853, + "step": 15926 + }, + { + "epoch": 0.4373146622734761, + "grad_norm": 0.3731890916824341, + "learning_rate": 1.7753580227870858e-05, + "loss": 0.4801, + "step": 15927 + }, + { + "epoch": 0.4373421197144426, + "grad_norm": 0.379780650138855, + "learning_rate": 1.7753307470484543e-05, + "loss": 0.5181, + "step": 15928 + }, + { + "epoch": 0.43736957715540914, + "grad_norm": 0.3538089096546173, + "learning_rate": 1.775303469863584e-05, + "loss": 0.418, + "step": 15929 + }, + { + "epoch": 0.43739703459637563, + "grad_norm": 0.35898011922836304, + "learning_rate": 1.7752761912325246e-05, + "loss": 0.5556, + "step": 15930 + }, + { + "epoch": 0.4374244920373421, + "grad_norm": 0.4202660322189331, + "learning_rate": 1.775248911155328e-05, + "loss": 0.6093, + "step": 15931 + }, + { + "epoch": 0.4374519494783086, + "grad_norm": 0.3578537702560425, + "learning_rate": 1.775221629632045e-05, + "loss": 0.4687, + "step": 15932 + }, + { + "epoch": 0.4374794069192751, + "grad_norm": 0.34583160281181335, + "learning_rate": 1.775194346662726e-05, + "loss": 0.4549, + "step": 15933 + }, + { + "epoch": 0.4375068643602416, + "grad_norm": 0.34837228059768677, + "learning_rate": 1.775167062247422e-05, + "loss": 0.4639, + "step": 15934 + }, + { + "epoch": 0.4375343218012081, + "grad_norm": 0.37006428837776184, + "learning_rate": 1.7751397763861843e-05, + "loss": 0.571, + "step": 15935 + }, + { + "epoch": 0.43756177924217465, + "grad_norm": 0.36324918270111084, + "learning_rate": 1.775112489079063e-05, + "loss": 0.5134, + "step": 15936 + }, + { + "epoch": 0.43758923668314115, + "grad_norm": 0.3556821942329407, + "learning_rate": 1.7750852003261102e-05, + "loss": 0.4784, + "step": 15937 + }, + { + "epoch": 0.43761669412410764, + "grad_norm": 0.7487929463386536, + "learning_rate": 1.7750579101273758e-05, + "loss": 0.5224, + "step": 15938 + }, + { + "epoch": 0.43764415156507414, + "grad_norm": 0.36051034927368164, + "learning_rate": 1.775030618482911e-05, + "loss": 0.509, + "step": 15939 + }, + { + "epoch": 0.43767160900604063, + "grad_norm": 0.3660929501056671, + "learning_rate": 1.7750033253927665e-05, + "loss": 0.5167, + "step": 15940 + }, + { + "epoch": 0.4376990664470071, + "grad_norm": 0.3967372179031372, + "learning_rate": 1.774976030856994e-05, + "loss": 0.5214, + "step": 15941 + }, + { + "epoch": 0.4377265238879736, + "grad_norm": 0.35566696524620056, + "learning_rate": 1.7749487348756432e-05, + "loss": 0.4693, + "step": 15942 + }, + { + "epoch": 0.43775398132894017, + "grad_norm": 0.34724199771881104, + "learning_rate": 1.7749214374487663e-05, + "loss": 0.4744, + "step": 15943 + }, + { + "epoch": 0.43778143876990666, + "grad_norm": 0.41620585322380066, + "learning_rate": 1.7748941385764135e-05, + "loss": 0.5457, + "step": 15944 + }, + { + "epoch": 0.43780889621087316, + "grad_norm": 0.34973692893981934, + "learning_rate": 1.7748668382586355e-05, + "loss": 0.5638, + "step": 15945 + }, + { + "epoch": 0.43783635365183965, + "grad_norm": 0.35186755657196045, + "learning_rate": 1.7748395364954836e-05, + "loss": 0.4097, + "step": 15946 + }, + { + "epoch": 0.43786381109280614, + "grad_norm": 0.381716787815094, + "learning_rate": 1.7748122332870088e-05, + "loss": 0.5121, + "step": 15947 + }, + { + "epoch": 0.43789126853377264, + "grad_norm": 0.3242526352405548, + "learning_rate": 1.774784928633262e-05, + "loss": 0.4819, + "step": 15948 + }, + { + "epoch": 0.43791872597473913, + "grad_norm": 0.3681618869304657, + "learning_rate": 1.7747576225342938e-05, + "loss": 0.4791, + "step": 15949 + }, + { + "epoch": 0.4379461834157057, + "grad_norm": 0.35470667481422424, + "learning_rate": 1.7747303149901552e-05, + "loss": 0.4978, + "step": 15950 + }, + { + "epoch": 0.4379736408566722, + "grad_norm": 0.3981340825557709, + "learning_rate": 1.7747030060008978e-05, + "loss": 0.5466, + "step": 15951 + }, + { + "epoch": 0.43800109829763867, + "grad_norm": 0.5080363750457764, + "learning_rate": 1.7746756955665716e-05, + "loss": 0.6029, + "step": 15952 + }, + { + "epoch": 0.43802855573860516, + "grad_norm": 0.40614375472068787, + "learning_rate": 1.7746483836872283e-05, + "loss": 0.6181, + "step": 15953 + }, + { + "epoch": 0.43805601317957166, + "grad_norm": 0.32994258403778076, + "learning_rate": 1.7746210703629182e-05, + "loss": 0.4073, + "step": 15954 + }, + { + "epoch": 0.43808347062053815, + "grad_norm": 0.36442553997039795, + "learning_rate": 1.7745937555936926e-05, + "loss": 0.4854, + "step": 15955 + }, + { + "epoch": 0.43811092806150465, + "grad_norm": 0.375436395406723, + "learning_rate": 1.7745664393796026e-05, + "loss": 0.4908, + "step": 15956 + }, + { + "epoch": 0.4381383855024712, + "grad_norm": 0.3867364227771759, + "learning_rate": 1.774539121720699e-05, + "loss": 0.5117, + "step": 15957 + }, + { + "epoch": 0.4381658429434377, + "grad_norm": 0.36884868144989014, + "learning_rate": 1.7745118026170326e-05, + "loss": 0.4815, + "step": 15958 + }, + { + "epoch": 0.4381933003844042, + "grad_norm": 0.36027681827545166, + "learning_rate": 1.7744844820686547e-05, + "loss": 0.4714, + "step": 15959 + }, + { + "epoch": 0.4382207578253707, + "grad_norm": 0.3996996581554413, + "learning_rate": 1.774457160075616e-05, + "loss": 0.6128, + "step": 15960 + }, + { + "epoch": 0.43824821526633717, + "grad_norm": 0.35728779435157776, + "learning_rate": 1.7744298366379673e-05, + "loss": 0.5347, + "step": 15961 + }, + { + "epoch": 0.43827567270730367, + "grad_norm": 0.363992840051651, + "learning_rate": 1.77440251175576e-05, + "loss": 0.5158, + "step": 15962 + }, + { + "epoch": 0.43830313014827016, + "grad_norm": 0.3694213926792145, + "learning_rate": 1.7743751854290448e-05, + "loss": 0.5596, + "step": 15963 + }, + { + "epoch": 0.4383305875892367, + "grad_norm": 0.4645237326622009, + "learning_rate": 1.774347857657873e-05, + "loss": 0.6765, + "step": 15964 + }, + { + "epoch": 0.4383580450302032, + "grad_norm": 0.4396706223487854, + "learning_rate": 1.7743205284422947e-05, + "loss": 0.5916, + "step": 15965 + }, + { + "epoch": 0.4383855024711697, + "grad_norm": 0.4184701144695282, + "learning_rate": 1.7742931977823617e-05, + "loss": 0.5544, + "step": 15966 + }, + { + "epoch": 0.4384129599121362, + "grad_norm": 0.34663859009742737, + "learning_rate": 1.774265865678125e-05, + "loss": 0.4803, + "step": 15967 + }, + { + "epoch": 0.4384404173531027, + "grad_norm": 0.4119214117527008, + "learning_rate": 1.774238532129635e-05, + "loss": 0.5347, + "step": 15968 + }, + { + "epoch": 0.4384678747940692, + "grad_norm": 0.3629450798034668, + "learning_rate": 1.7742111971369436e-05, + "loss": 0.4938, + "step": 15969 + }, + { + "epoch": 0.4384953322350357, + "grad_norm": 0.354852557182312, + "learning_rate": 1.7741838607001006e-05, + "loss": 0.4066, + "step": 15970 + }, + { + "epoch": 0.4385227896760022, + "grad_norm": 0.3971484899520874, + "learning_rate": 1.774156522819158e-05, + "loss": 0.498, + "step": 15971 + }, + { + "epoch": 0.4385502471169687, + "grad_norm": 0.405958354473114, + "learning_rate": 1.774129183494166e-05, + "loss": 0.5127, + "step": 15972 + }, + { + "epoch": 0.4385777045579352, + "grad_norm": 0.34912818670272827, + "learning_rate": 1.7741018427251765e-05, + "loss": 0.5524, + "step": 15973 + }, + { + "epoch": 0.4386051619989017, + "grad_norm": 0.35008862614631653, + "learning_rate": 1.7740745005122395e-05, + "loss": 0.4608, + "step": 15974 + }, + { + "epoch": 0.4386326194398682, + "grad_norm": 0.35633668303489685, + "learning_rate": 1.774047156855407e-05, + "loss": 0.4098, + "step": 15975 + }, + { + "epoch": 0.4386600768808347, + "grad_norm": 0.36366939544677734, + "learning_rate": 1.7740198117547293e-05, + "loss": 0.4346, + "step": 15976 + }, + { + "epoch": 0.4386875343218012, + "grad_norm": 0.3880147337913513, + "learning_rate": 1.7739924652102573e-05, + "loss": 0.5519, + "step": 15977 + }, + { + "epoch": 0.43871499176276774, + "grad_norm": 0.38407576084136963, + "learning_rate": 1.7739651172220428e-05, + "loss": 0.496, + "step": 15978 + }, + { + "epoch": 0.43874244920373423, + "grad_norm": 0.3575894832611084, + "learning_rate": 1.773937767790136e-05, + "loss": 0.5439, + "step": 15979 + }, + { + "epoch": 0.4387699066447007, + "grad_norm": 0.44234228134155273, + "learning_rate": 1.7739104169145888e-05, + "loss": 0.5525, + "step": 15980 + }, + { + "epoch": 0.4387973640856672, + "grad_norm": 0.4053306579589844, + "learning_rate": 1.773883064595451e-05, + "loss": 0.5838, + "step": 15981 + }, + { + "epoch": 0.4388248215266337, + "grad_norm": 0.37242254614830017, + "learning_rate": 1.7738557108327744e-05, + "loss": 0.4766, + "step": 15982 + }, + { + "epoch": 0.4388522789676002, + "grad_norm": 0.4313550293445587, + "learning_rate": 1.77382835562661e-05, + "loss": 0.5848, + "step": 15983 + }, + { + "epoch": 0.4388797364085667, + "grad_norm": 0.37778764963150024, + "learning_rate": 1.7738009989770088e-05, + "loss": 0.5486, + "step": 15984 + }, + { + "epoch": 0.43890719384953325, + "grad_norm": 0.3821971118450165, + "learning_rate": 1.7737736408840218e-05, + "loss": 0.5471, + "step": 15985 + }, + { + "epoch": 0.43893465129049974, + "grad_norm": 0.3401161730289459, + "learning_rate": 1.7737462813477e-05, + "loss": 0.513, + "step": 15986 + }, + { + "epoch": 0.43896210873146624, + "grad_norm": 0.46170490980148315, + "learning_rate": 1.7737189203680944e-05, + "loss": 0.5863, + "step": 15987 + }, + { + "epoch": 0.43898956617243273, + "grad_norm": 0.42784059047698975, + "learning_rate": 1.7736915579452563e-05, + "loss": 0.5263, + "step": 15988 + }, + { + "epoch": 0.4390170236133992, + "grad_norm": 0.3759987950325012, + "learning_rate": 1.773664194079236e-05, + "loss": 0.4963, + "step": 15989 + }, + { + "epoch": 0.4390444810543657, + "grad_norm": 0.3591609001159668, + "learning_rate": 1.7736368287700854e-05, + "loss": 0.5078, + "step": 15990 + }, + { + "epoch": 0.4390719384953322, + "grad_norm": 0.3699112832546234, + "learning_rate": 1.7736094620178552e-05, + "loss": 0.5625, + "step": 15991 + }, + { + "epoch": 0.43909939593629876, + "grad_norm": 0.374315083026886, + "learning_rate": 1.7735820938225963e-05, + "loss": 0.4263, + "step": 15992 + }, + { + "epoch": 0.43912685337726526, + "grad_norm": 0.37277668714523315, + "learning_rate": 1.77355472418436e-05, + "loss": 0.5318, + "step": 15993 + }, + { + "epoch": 0.43915431081823175, + "grad_norm": 0.34824442863464355, + "learning_rate": 1.773527353103197e-05, + "loss": 0.4746, + "step": 15994 + }, + { + "epoch": 0.43918176825919825, + "grad_norm": 0.33013543486595154, + "learning_rate": 1.773499980579159e-05, + "loss": 0.4901, + "step": 15995 + }, + { + "epoch": 0.43920922570016474, + "grad_norm": 0.41612404584884644, + "learning_rate": 1.7734726066122966e-05, + "loss": 0.5448, + "step": 15996 + }, + { + "epoch": 0.43923668314113123, + "grad_norm": 0.3917672336101532, + "learning_rate": 1.773445231202661e-05, + "loss": 0.521, + "step": 15997 + }, + { + "epoch": 0.43926414058209773, + "grad_norm": 0.39520263671875, + "learning_rate": 1.7734178543503028e-05, + "loss": 0.5653, + "step": 15998 + }, + { + "epoch": 0.4392915980230643, + "grad_norm": 0.3708845376968384, + "learning_rate": 1.7733904760552737e-05, + "loss": 0.5165, + "step": 15999 + }, + { + "epoch": 0.43931905546403077, + "grad_norm": 0.3643205463886261, + "learning_rate": 1.7733630963176246e-05, + "loss": 0.5219, + "step": 16000 + }, + { + "epoch": 0.43934651290499727, + "grad_norm": 0.4000820815563202, + "learning_rate": 1.7733357151374062e-05, + "loss": 0.5394, + "step": 16001 + }, + { + "epoch": 0.43937397034596376, + "grad_norm": 0.3798181414604187, + "learning_rate": 1.7733083325146704e-05, + "loss": 0.5084, + "step": 16002 + }, + { + "epoch": 0.43940142778693025, + "grad_norm": 0.41463640332221985, + "learning_rate": 1.7732809484494672e-05, + "loss": 0.5425, + "step": 16003 + }, + { + "epoch": 0.43942888522789675, + "grad_norm": 0.4158388078212738, + "learning_rate": 1.7732535629418483e-05, + "loss": 0.5312, + "step": 16004 + }, + { + "epoch": 0.43945634266886324, + "grad_norm": 0.40611496567726135, + "learning_rate": 1.773226175991865e-05, + "loss": 0.5462, + "step": 16005 + }, + { + "epoch": 0.43948380010982974, + "grad_norm": 0.3649519085884094, + "learning_rate": 1.7731987875995677e-05, + "loss": 0.5693, + "step": 16006 + }, + { + "epoch": 0.4395112575507963, + "grad_norm": 0.37972137331962585, + "learning_rate": 1.7731713977650084e-05, + "loss": 0.506, + "step": 16007 + }, + { + "epoch": 0.4395387149917628, + "grad_norm": 0.34527742862701416, + "learning_rate": 1.7731440064882372e-05, + "loss": 0.5338, + "step": 16008 + }, + { + "epoch": 0.4395661724327293, + "grad_norm": 0.3539807200431824, + "learning_rate": 1.773116613769306e-05, + "loss": 0.4615, + "step": 16009 + }, + { + "epoch": 0.43959362987369577, + "grad_norm": 0.621793270111084, + "learning_rate": 1.7730892196082653e-05, + "loss": 0.564, + "step": 16010 + }, + { + "epoch": 0.43962108731466226, + "grad_norm": 0.4323682487010956, + "learning_rate": 1.7730618240051667e-05, + "loss": 0.4779, + "step": 16011 + }, + { + "epoch": 0.43964854475562876, + "grad_norm": 0.3663260042667389, + "learning_rate": 1.7730344269600608e-05, + "loss": 0.5115, + "step": 16012 + }, + { + "epoch": 0.43967600219659525, + "grad_norm": 0.33200037479400635, + "learning_rate": 1.7730070284729993e-05, + "loss": 0.5014, + "step": 16013 + }, + { + "epoch": 0.4397034596375618, + "grad_norm": 0.3666228950023651, + "learning_rate": 1.772979628544033e-05, + "loss": 0.4506, + "step": 16014 + }, + { + "epoch": 0.4397309170785283, + "grad_norm": 0.37638646364212036, + "learning_rate": 1.7729522271732124e-05, + "loss": 0.4647, + "step": 16015 + }, + { + "epoch": 0.4397583745194948, + "grad_norm": 0.33108824491500854, + "learning_rate": 1.7729248243605897e-05, + "loss": 0.5169, + "step": 16016 + }, + { + "epoch": 0.4397858319604613, + "grad_norm": 0.393625408411026, + "learning_rate": 1.7728974201062153e-05, + "loss": 0.4905, + "step": 16017 + }, + { + "epoch": 0.4398132894014278, + "grad_norm": 0.9037589430809021, + "learning_rate": 1.7728700144101407e-05, + "loss": 0.5634, + "step": 16018 + }, + { + "epoch": 0.43984074684239427, + "grad_norm": 0.36234351992607117, + "learning_rate": 1.7728426072724168e-05, + "loss": 0.4974, + "step": 16019 + }, + { + "epoch": 0.43986820428336076, + "grad_norm": 0.35893815755844116, + "learning_rate": 1.772815198693095e-05, + "loss": 0.5594, + "step": 16020 + }, + { + "epoch": 0.4398956617243273, + "grad_norm": 0.3755025565624237, + "learning_rate": 1.772787788672226e-05, + "loss": 0.5783, + "step": 16021 + }, + { + "epoch": 0.4399231191652938, + "grad_norm": 0.3486843705177307, + "learning_rate": 1.7727603772098607e-05, + "loss": 0.5039, + "step": 16022 + }, + { + "epoch": 0.4399505766062603, + "grad_norm": 0.4275088608264923, + "learning_rate": 1.772732964306051e-05, + "loss": 0.5082, + "step": 16023 + }, + { + "epoch": 0.4399780340472268, + "grad_norm": 0.35277193784713745, + "learning_rate": 1.7727055499608478e-05, + "loss": 0.5262, + "step": 16024 + }, + { + "epoch": 0.4400054914881933, + "grad_norm": 0.34900686144828796, + "learning_rate": 1.772678134174302e-05, + "loss": 0.5394, + "step": 16025 + }, + { + "epoch": 0.4400329489291598, + "grad_norm": 0.9462416172027588, + "learning_rate": 1.772650716946465e-05, + "loss": 0.5213, + "step": 16026 + }, + { + "epoch": 0.4400604063701263, + "grad_norm": 0.39672985672950745, + "learning_rate": 1.7726232982773877e-05, + "loss": 0.5738, + "step": 16027 + }, + { + "epoch": 0.4400878638110928, + "grad_norm": 0.4004582464694977, + "learning_rate": 1.7725958781671217e-05, + "loss": 0.5891, + "step": 16028 + }, + { + "epoch": 0.4401153212520593, + "grad_norm": 0.3605201244354248, + "learning_rate": 1.7725684566157176e-05, + "loss": 0.5586, + "step": 16029 + }, + { + "epoch": 0.4401427786930258, + "grad_norm": 0.3654342293739319, + "learning_rate": 1.7725410336232265e-05, + "loss": 0.5247, + "step": 16030 + }, + { + "epoch": 0.4401702361339923, + "grad_norm": 0.3668719530105591, + "learning_rate": 1.7725136091897002e-05, + "loss": 0.4921, + "step": 16031 + }, + { + "epoch": 0.4401976935749588, + "grad_norm": 0.40508851408958435, + "learning_rate": 1.772486183315189e-05, + "loss": 0.5492, + "step": 16032 + }, + { + "epoch": 0.4402251510159253, + "grad_norm": 0.40457937121391296, + "learning_rate": 1.772458755999745e-05, + "loss": 0.598, + "step": 16033 + }, + { + "epoch": 0.4402526084568918, + "grad_norm": 0.3663124740123749, + "learning_rate": 1.7724313272434183e-05, + "loss": 0.5028, + "step": 16034 + }, + { + "epoch": 0.44028006589785834, + "grad_norm": 0.3289175033569336, + "learning_rate": 1.772403897046261e-05, + "loss": 0.4817, + "step": 16035 + }, + { + "epoch": 0.44030752333882484, + "grad_norm": 0.38116955757141113, + "learning_rate": 1.772376465408324e-05, + "loss": 0.5333, + "step": 16036 + }, + { + "epoch": 0.44033498077979133, + "grad_norm": 0.5627240538597107, + "learning_rate": 1.7723490323296582e-05, + "loss": 0.5056, + "step": 16037 + }, + { + "epoch": 0.4403624382207578, + "grad_norm": 0.346038818359375, + "learning_rate": 1.772321597810315e-05, + "loss": 0.5113, + "step": 16038 + }, + { + "epoch": 0.4403898956617243, + "grad_norm": 0.35738298296928406, + "learning_rate": 1.7722941618503456e-05, + "loss": 0.5896, + "step": 16039 + }, + { + "epoch": 0.4404173531026908, + "grad_norm": 0.3827212154865265, + "learning_rate": 1.772266724449801e-05, + "loss": 0.4865, + "step": 16040 + }, + { + "epoch": 0.4404448105436573, + "grad_norm": 0.45579320192337036, + "learning_rate": 1.7722392856087326e-05, + "loss": 0.5915, + "step": 16041 + }, + { + "epoch": 0.44047226798462386, + "grad_norm": 0.3681230843067169, + "learning_rate": 1.7722118453271915e-05, + "loss": 0.5357, + "step": 16042 + }, + { + "epoch": 0.44049972542559035, + "grad_norm": 0.36544346809387207, + "learning_rate": 1.7721844036052288e-05, + "loss": 0.4919, + "step": 16043 + }, + { + "epoch": 0.44052718286655684, + "grad_norm": 0.4000301659107208, + "learning_rate": 1.7721569604428954e-05, + "loss": 0.5949, + "step": 16044 + }, + { + "epoch": 0.44055464030752334, + "grad_norm": 0.38843491673469543, + "learning_rate": 1.772129515840243e-05, + "loss": 0.4496, + "step": 16045 + }, + { + "epoch": 0.44058209774848983, + "grad_norm": 0.38684719800949097, + "learning_rate": 1.7721020697973228e-05, + "loss": 0.5477, + "step": 16046 + }, + { + "epoch": 0.4406095551894563, + "grad_norm": 0.416536420583725, + "learning_rate": 1.772074622314186e-05, + "loss": 0.5375, + "step": 16047 + }, + { + "epoch": 0.4406370126304228, + "grad_norm": 0.35261958837509155, + "learning_rate": 1.772047173390883e-05, + "loss": 0.5088, + "step": 16048 + }, + { + "epoch": 0.44066447007138937, + "grad_norm": 0.3693728744983673, + "learning_rate": 1.772019723027466e-05, + "loss": 0.5793, + "step": 16049 + }, + { + "epoch": 0.44069192751235586, + "grad_norm": 0.35269519686698914, + "learning_rate": 1.7719922712239857e-05, + "loss": 0.4535, + "step": 16050 + }, + { + "epoch": 0.44071938495332236, + "grad_norm": 0.44733014702796936, + "learning_rate": 1.7719648179804936e-05, + "loss": 0.5316, + "step": 16051 + }, + { + "epoch": 0.44074684239428885, + "grad_norm": 0.395713746547699, + "learning_rate": 1.7719373632970406e-05, + "loss": 0.5874, + "step": 16052 + }, + { + "epoch": 0.44077429983525535, + "grad_norm": 0.5107450485229492, + "learning_rate": 1.771909907173678e-05, + "loss": 0.6614, + "step": 16053 + }, + { + "epoch": 0.44080175727622184, + "grad_norm": 0.36749371886253357, + "learning_rate": 1.771882449610457e-05, + "loss": 0.4905, + "step": 16054 + }, + { + "epoch": 0.44082921471718833, + "grad_norm": 0.4176290035247803, + "learning_rate": 1.7718549906074292e-05, + "loss": 0.5163, + "step": 16055 + }, + { + "epoch": 0.4408566721581549, + "grad_norm": 0.3652890622615814, + "learning_rate": 1.771827530164645e-05, + "loss": 0.5173, + "step": 16056 + }, + { + "epoch": 0.4408841295991214, + "grad_norm": 0.3827417492866516, + "learning_rate": 1.771800068282157e-05, + "loss": 0.5292, + "step": 16057 + }, + { + "epoch": 0.44091158704008787, + "grad_norm": 0.3801558315753937, + "learning_rate": 1.7717726049600145e-05, + "loss": 0.5208, + "step": 16058 + }, + { + "epoch": 0.44093904448105437, + "grad_norm": 0.4223015308380127, + "learning_rate": 1.7717451401982704e-05, + "loss": 0.5899, + "step": 16059 + }, + { + "epoch": 0.44096650192202086, + "grad_norm": 0.3582773208618164, + "learning_rate": 1.771717673996975e-05, + "loss": 0.3916, + "step": 16060 + }, + { + "epoch": 0.44099395936298735, + "grad_norm": 0.4195508658885956, + "learning_rate": 1.7716902063561798e-05, + "loss": 0.4862, + "step": 16061 + }, + { + "epoch": 0.44102141680395385, + "grad_norm": 0.33842170238494873, + "learning_rate": 1.7716627372759366e-05, + "loss": 0.49, + "step": 16062 + }, + { + "epoch": 0.4410488742449204, + "grad_norm": 0.46936091780662537, + "learning_rate": 1.7716352667562957e-05, + "loss": 0.5023, + "step": 16063 + }, + { + "epoch": 0.4410763316858869, + "grad_norm": 0.331992506980896, + "learning_rate": 1.7716077947973086e-05, + "loss": 0.4759, + "step": 16064 + }, + { + "epoch": 0.4411037891268534, + "grad_norm": 0.3947388231754303, + "learning_rate": 1.771580321399027e-05, + "loss": 0.5671, + "step": 16065 + }, + { + "epoch": 0.4411312465678199, + "grad_norm": 0.4215451776981354, + "learning_rate": 1.7715528465615017e-05, + "loss": 0.4746, + "step": 16066 + }, + { + "epoch": 0.4411587040087864, + "grad_norm": 0.3842114806175232, + "learning_rate": 1.771525370284784e-05, + "loss": 0.5236, + "step": 16067 + }, + { + "epoch": 0.44118616144975287, + "grad_norm": 0.36023566126823425, + "learning_rate": 1.7714978925689256e-05, + "loss": 0.4682, + "step": 16068 + }, + { + "epoch": 0.44121361889071936, + "grad_norm": 0.3463267982006073, + "learning_rate": 1.771470413413977e-05, + "loss": 0.5249, + "step": 16069 + }, + { + "epoch": 0.4412410763316859, + "grad_norm": 0.355824738740921, + "learning_rate": 1.7714429328199897e-05, + "loss": 0.5355, + "step": 16070 + }, + { + "epoch": 0.4412685337726524, + "grad_norm": 0.43725666403770447, + "learning_rate": 1.7714154507870155e-05, + "loss": 0.5354, + "step": 16071 + }, + { + "epoch": 0.4412959912136189, + "grad_norm": 0.3928440809249878, + "learning_rate": 1.771387967315105e-05, + "loss": 0.4906, + "step": 16072 + }, + { + "epoch": 0.4413234486545854, + "grad_norm": 0.40828660130500793, + "learning_rate": 1.7713604824043104e-05, + "loss": 0.4607, + "step": 16073 + }, + { + "epoch": 0.4413509060955519, + "grad_norm": 0.3941815197467804, + "learning_rate": 1.7713329960546815e-05, + "loss": 0.5703, + "step": 16074 + }, + { + "epoch": 0.4413783635365184, + "grad_norm": 0.4187332093715668, + "learning_rate": 1.771305508266271e-05, + "loss": 0.4597, + "step": 16075 + }, + { + "epoch": 0.4414058209774849, + "grad_norm": 0.34143298864364624, + "learning_rate": 1.771278019039129e-05, + "loss": 0.4331, + "step": 16076 + }, + { + "epoch": 0.4414332784184514, + "grad_norm": 0.36700600385665894, + "learning_rate": 1.771250528373308e-05, + "loss": 0.553, + "step": 16077 + }, + { + "epoch": 0.4414607358594179, + "grad_norm": 0.41106295585632324, + "learning_rate": 1.771223036268858e-05, + "loss": 0.4853, + "step": 16078 + }, + { + "epoch": 0.4414881933003844, + "grad_norm": 0.39432722330093384, + "learning_rate": 1.7711955427258312e-05, + "loss": 0.5155, + "step": 16079 + }, + { + "epoch": 0.4415156507413509, + "grad_norm": 0.3491683900356293, + "learning_rate": 1.7711680477442783e-05, + "loss": 0.5097, + "step": 16080 + }, + { + "epoch": 0.4415431081823174, + "grad_norm": 0.3833048641681671, + "learning_rate": 1.7711405513242513e-05, + "loss": 0.5377, + "step": 16081 + }, + { + "epoch": 0.4415705656232839, + "grad_norm": 0.6803582310676575, + "learning_rate": 1.7711130534658007e-05, + "loss": 0.663, + "step": 16082 + }, + { + "epoch": 0.4415980230642504, + "grad_norm": 0.3410532474517822, + "learning_rate": 1.771085554168978e-05, + "loss": 0.4344, + "step": 16083 + }, + { + "epoch": 0.44162548050521694, + "grad_norm": 0.3486756682395935, + "learning_rate": 1.7710580534338353e-05, + "loss": 0.4403, + "step": 16084 + }, + { + "epoch": 0.44165293794618343, + "grad_norm": 0.34327831864356995, + "learning_rate": 1.7710305512604227e-05, + "loss": 0.4837, + "step": 16085 + }, + { + "epoch": 0.4416803953871499, + "grad_norm": 0.39238032698631287, + "learning_rate": 1.7710030476487922e-05, + "loss": 0.4501, + "step": 16086 + }, + { + "epoch": 0.4417078528281164, + "grad_norm": 0.4031986594200134, + "learning_rate": 1.7709755425989947e-05, + "loss": 0.51, + "step": 16087 + }, + { + "epoch": 0.4417353102690829, + "grad_norm": 0.36213165521621704, + "learning_rate": 1.770948036111082e-05, + "loss": 0.4201, + "step": 16088 + }, + { + "epoch": 0.4417627677100494, + "grad_norm": 0.38540422916412354, + "learning_rate": 1.770920528185105e-05, + "loss": 0.4802, + "step": 16089 + }, + { + "epoch": 0.4417902251510159, + "grad_norm": 0.3580580949783325, + "learning_rate": 1.7708930188211156e-05, + "loss": 0.5284, + "step": 16090 + }, + { + "epoch": 0.44181768259198245, + "grad_norm": 0.4043821096420288, + "learning_rate": 1.7708655080191644e-05, + "loss": 0.5318, + "step": 16091 + }, + { + "epoch": 0.44184514003294895, + "grad_norm": 0.3658545911312103, + "learning_rate": 1.770837995779303e-05, + "loss": 0.5321, + "step": 16092 + }, + { + "epoch": 0.44187259747391544, + "grad_norm": 0.36862120032310486, + "learning_rate": 1.7708104821015824e-05, + "loss": 0.4958, + "step": 16093 + }, + { + "epoch": 0.44190005491488193, + "grad_norm": 0.36862418055534363, + "learning_rate": 1.7707829669860546e-05, + "loss": 0.5338, + "step": 16094 + }, + { + "epoch": 0.44192751235584843, + "grad_norm": 0.36742526292800903, + "learning_rate": 1.7707554504327703e-05, + "loss": 0.4951, + "step": 16095 + }, + { + "epoch": 0.4419549697968149, + "grad_norm": 0.4655337333679199, + "learning_rate": 1.7707279324417815e-05, + "loss": 0.5614, + "step": 16096 + }, + { + "epoch": 0.4419824272377814, + "grad_norm": 0.41464704275131226, + "learning_rate": 1.7707004130131385e-05, + "loss": 0.5716, + "step": 16097 + }, + { + "epoch": 0.44200988467874797, + "grad_norm": 0.3828180134296417, + "learning_rate": 1.770672892146894e-05, + "loss": 0.539, + "step": 16098 + }, + { + "epoch": 0.44203734211971446, + "grad_norm": 0.33615949749946594, + "learning_rate": 1.770645369843098e-05, + "loss": 0.4822, + "step": 16099 + }, + { + "epoch": 0.44206479956068095, + "grad_norm": 0.3751169741153717, + "learning_rate": 1.7706178461018027e-05, + "loss": 0.576, + "step": 16100 + }, + { + "epoch": 0.44209225700164745, + "grad_norm": 0.37492918968200684, + "learning_rate": 1.770590320923059e-05, + "loss": 0.5691, + "step": 16101 + }, + { + "epoch": 0.44211971444261394, + "grad_norm": 0.37807121872901917, + "learning_rate": 1.7705627943069186e-05, + "loss": 0.6017, + "step": 16102 + }, + { + "epoch": 0.44214717188358044, + "grad_norm": 0.34820759296417236, + "learning_rate": 1.7705352662534325e-05, + "loss": 0.4913, + "step": 16103 + }, + { + "epoch": 0.44217462932454693, + "grad_norm": 0.3840310573577881, + "learning_rate": 1.770507736762652e-05, + "loss": 0.4882, + "step": 16104 + }, + { + "epoch": 0.4422020867655135, + "grad_norm": 0.3873975872993469, + "learning_rate": 1.770480205834629e-05, + "loss": 0.5834, + "step": 16105 + }, + { + "epoch": 0.44222954420648, + "grad_norm": 0.35591810941696167, + "learning_rate": 1.7704526734694146e-05, + "loss": 0.5236, + "step": 16106 + }, + { + "epoch": 0.44225700164744647, + "grad_norm": 0.4110228419303894, + "learning_rate": 1.7704251396670596e-05, + "loss": 0.5723, + "step": 16107 + }, + { + "epoch": 0.44228445908841296, + "grad_norm": 0.3800068497657776, + "learning_rate": 1.7703976044276167e-05, + "loss": 0.5783, + "step": 16108 + }, + { + "epoch": 0.44231191652937946, + "grad_norm": 0.4323784112930298, + "learning_rate": 1.7703700677511356e-05, + "loss": 0.5162, + "step": 16109 + }, + { + "epoch": 0.44233937397034595, + "grad_norm": 0.3532456159591675, + "learning_rate": 1.7703425296376685e-05, + "loss": 0.5206, + "step": 16110 + }, + { + "epoch": 0.44236683141131244, + "grad_norm": 0.3582749664783478, + "learning_rate": 1.770314990087267e-05, + "loss": 0.4802, + "step": 16111 + }, + { + "epoch": 0.442394288852279, + "grad_norm": 0.4507274627685547, + "learning_rate": 1.770287449099982e-05, + "loss": 0.5662, + "step": 16112 + }, + { + "epoch": 0.4424217462932455, + "grad_norm": 0.39153197407722473, + "learning_rate": 1.770259906675865e-05, + "loss": 0.6087, + "step": 16113 + }, + { + "epoch": 0.442449203734212, + "grad_norm": 0.36976945400238037, + "learning_rate": 1.7702323628149677e-05, + "loss": 0.4115, + "step": 16114 + }, + { + "epoch": 0.4424766611751785, + "grad_norm": 0.3431207537651062, + "learning_rate": 1.770204817517341e-05, + "loss": 0.5089, + "step": 16115 + }, + { + "epoch": 0.44250411861614497, + "grad_norm": 0.3672163188457489, + "learning_rate": 1.7701772707830364e-05, + "loss": 0.4928, + "step": 16116 + }, + { + "epoch": 0.44253157605711146, + "grad_norm": 0.7632066607475281, + "learning_rate": 1.7701497226121058e-05, + "loss": 0.6196, + "step": 16117 + }, + { + "epoch": 0.44255903349807796, + "grad_norm": 0.4191618859767914, + "learning_rate": 1.7701221730045998e-05, + "loss": 0.4831, + "step": 16118 + }, + { + "epoch": 0.4425864909390445, + "grad_norm": 0.36553719639778137, + "learning_rate": 1.77009462196057e-05, + "loss": 0.4359, + "step": 16119 + }, + { + "epoch": 0.442613948380011, + "grad_norm": 0.40466609597206116, + "learning_rate": 1.7700670694800682e-05, + "loss": 0.5705, + "step": 16120 + }, + { + "epoch": 0.4426414058209775, + "grad_norm": 0.42002734541893005, + "learning_rate": 1.7700395155631456e-05, + "loss": 0.4483, + "step": 16121 + }, + { + "epoch": 0.442668863261944, + "grad_norm": 0.4822810888290405, + "learning_rate": 1.7700119602098536e-05, + "loss": 0.5338, + "step": 16122 + }, + { + "epoch": 0.4426963207029105, + "grad_norm": 0.36181890964508057, + "learning_rate": 1.769984403420243e-05, + "loss": 0.5017, + "step": 16123 + }, + { + "epoch": 0.442723778143877, + "grad_norm": 0.35915908217430115, + "learning_rate": 1.7699568451943663e-05, + "loss": 0.436, + "step": 16124 + }, + { + "epoch": 0.44275123558484347, + "grad_norm": 0.3797287940979004, + "learning_rate": 1.7699292855322743e-05, + "loss": 0.4695, + "step": 16125 + }, + { + "epoch": 0.44277869302581, + "grad_norm": 0.3674972653388977, + "learning_rate": 1.7699017244340183e-05, + "loss": 0.5474, + "step": 16126 + }, + { + "epoch": 0.4428061504667765, + "grad_norm": 0.31156811118125916, + "learning_rate": 1.7698741618996498e-05, + "loss": 0.4717, + "step": 16127 + }, + { + "epoch": 0.442833607907743, + "grad_norm": 0.3621383607387543, + "learning_rate": 1.7698465979292203e-05, + "loss": 0.5196, + "step": 16128 + }, + { + "epoch": 0.4428610653487095, + "grad_norm": 0.3658513128757477, + "learning_rate": 1.7698190325227816e-05, + "loss": 0.5065, + "step": 16129 + }, + { + "epoch": 0.442888522789676, + "grad_norm": 0.39195987582206726, + "learning_rate": 1.7697914656803842e-05, + "loss": 0.5713, + "step": 16130 + }, + { + "epoch": 0.4429159802306425, + "grad_norm": 0.3768465220928192, + "learning_rate": 1.76976389740208e-05, + "loss": 0.506, + "step": 16131 + }, + { + "epoch": 0.442943437671609, + "grad_norm": 0.4017414152622223, + "learning_rate": 1.7697363276879207e-05, + "loss": 0.4919, + "step": 16132 + }, + { + "epoch": 0.44297089511257554, + "grad_norm": 0.3649878203868866, + "learning_rate": 1.7697087565379572e-05, + "loss": 0.4927, + "step": 16133 + }, + { + "epoch": 0.44299835255354203, + "grad_norm": 0.3686313331127167, + "learning_rate": 1.7696811839522418e-05, + "loss": 0.5682, + "step": 16134 + }, + { + "epoch": 0.4430258099945085, + "grad_norm": 0.350781112909317, + "learning_rate": 1.7696536099308248e-05, + "loss": 0.6492, + "step": 16135 + }, + { + "epoch": 0.443053267435475, + "grad_norm": 0.35362258553504944, + "learning_rate": 1.7696260344737584e-05, + "loss": 0.4782, + "step": 16136 + }, + { + "epoch": 0.4430807248764415, + "grad_norm": 0.3760995864868164, + "learning_rate": 1.7695984575810935e-05, + "loss": 0.5567, + "step": 16137 + }, + { + "epoch": 0.443108182317408, + "grad_norm": 0.3840887248516083, + "learning_rate": 1.769570879252882e-05, + "loss": 0.5861, + "step": 16138 + }, + { + "epoch": 0.4431356397583745, + "grad_norm": 1.1924145221710205, + "learning_rate": 1.7695432994891755e-05, + "loss": 0.6142, + "step": 16139 + }, + { + "epoch": 0.443163097199341, + "grad_norm": 0.34570831060409546, + "learning_rate": 1.769515718290025e-05, + "loss": 0.531, + "step": 16140 + }, + { + "epoch": 0.44319055464030754, + "grad_norm": 0.3442162573337555, + "learning_rate": 1.769488135655482e-05, + "loss": 0.5362, + "step": 16141 + }, + { + "epoch": 0.44321801208127404, + "grad_norm": 0.3471300005912781, + "learning_rate": 1.769460551585598e-05, + "loss": 0.5045, + "step": 16142 + }, + { + "epoch": 0.44324546952224053, + "grad_norm": 0.3458541929721832, + "learning_rate": 1.7694329660804243e-05, + "loss": 0.4815, + "step": 16143 + }, + { + "epoch": 0.443272926963207, + "grad_norm": 0.39499083161354065, + "learning_rate": 1.7694053791400127e-05, + "loss": 0.5356, + "step": 16144 + }, + { + "epoch": 0.4433003844041735, + "grad_norm": 0.3441265821456909, + "learning_rate": 1.769377790764415e-05, + "loss": 0.4906, + "step": 16145 + }, + { + "epoch": 0.44332784184514, + "grad_norm": 0.3962032198905945, + "learning_rate": 1.7693502009536817e-05, + "loss": 0.5878, + "step": 16146 + }, + { + "epoch": 0.4433552992861065, + "grad_norm": 0.37307220697402954, + "learning_rate": 1.7693226097078646e-05, + "loss": 0.5527, + "step": 16147 + }, + { + "epoch": 0.44338275672707306, + "grad_norm": 0.3970467150211334, + "learning_rate": 1.7692950170270155e-05, + "loss": 0.4801, + "step": 16148 + }, + { + "epoch": 0.44341021416803955, + "grad_norm": 0.4585181474685669, + "learning_rate": 1.7692674229111855e-05, + "loss": 0.5949, + "step": 16149 + }, + { + "epoch": 0.44343767160900605, + "grad_norm": 0.3821594715118408, + "learning_rate": 1.7692398273604264e-05, + "loss": 0.5545, + "step": 16150 + }, + { + "epoch": 0.44346512904997254, + "grad_norm": 0.37809842824935913, + "learning_rate": 1.7692122303747895e-05, + "loss": 0.5014, + "step": 16151 + }, + { + "epoch": 0.44349258649093903, + "grad_norm": 0.42849352955818176, + "learning_rate": 1.7691846319543262e-05, + "loss": 0.5843, + "step": 16152 + }, + { + "epoch": 0.4435200439319055, + "grad_norm": 0.34685197472572327, + "learning_rate": 1.769157032099088e-05, + "loss": 0.5099, + "step": 16153 + }, + { + "epoch": 0.443547501372872, + "grad_norm": 0.5483766794204712, + "learning_rate": 1.7691294308091268e-05, + "loss": 0.4879, + "step": 16154 + }, + { + "epoch": 0.44357495881383857, + "grad_norm": 0.3368704915046692, + "learning_rate": 1.769101828084493e-05, + "loss": 0.4746, + "step": 16155 + }, + { + "epoch": 0.44360241625480507, + "grad_norm": 0.3965184688568115, + "learning_rate": 1.7690742239252396e-05, + "loss": 0.4848, + "step": 16156 + }, + { + "epoch": 0.44362987369577156, + "grad_norm": 0.45751336216926575, + "learning_rate": 1.7690466183314172e-05, + "loss": 0.5505, + "step": 16157 + }, + { + "epoch": 0.44365733113673805, + "grad_norm": 0.3538562059402466, + "learning_rate": 1.7690190113030768e-05, + "loss": 0.4348, + "step": 16158 + }, + { + "epoch": 0.44368478857770455, + "grad_norm": 0.3939152657985687, + "learning_rate": 1.7689914028402712e-05, + "loss": 0.5203, + "step": 16159 + }, + { + "epoch": 0.44371224601867104, + "grad_norm": 0.38007253408432007, + "learning_rate": 1.768963792943051e-05, + "loss": 0.4956, + "step": 16160 + }, + { + "epoch": 0.44373970345963754, + "grad_norm": 0.3676116168498993, + "learning_rate": 1.768936181611468e-05, + "loss": 0.4464, + "step": 16161 + }, + { + "epoch": 0.4437671609006041, + "grad_norm": 0.44770869612693787, + "learning_rate": 1.768908568845573e-05, + "loss": 0.5426, + "step": 16162 + }, + { + "epoch": 0.4437946183415706, + "grad_norm": 0.3889642059803009, + "learning_rate": 1.7688809546454185e-05, + "loss": 0.5029, + "step": 16163 + }, + { + "epoch": 0.4438220757825371, + "grad_norm": 0.4090629816055298, + "learning_rate": 1.7688533390110557e-05, + "loss": 0.6285, + "step": 16164 + }, + { + "epoch": 0.44384953322350357, + "grad_norm": 0.41927003860473633, + "learning_rate": 1.7688257219425363e-05, + "loss": 0.5276, + "step": 16165 + }, + { + "epoch": 0.44387699066447006, + "grad_norm": 0.38014259934425354, + "learning_rate": 1.7687981034399115e-05, + "loss": 0.4902, + "step": 16166 + }, + { + "epoch": 0.44390444810543656, + "grad_norm": 0.41481706500053406, + "learning_rate": 1.768770483503232e-05, + "loss": 0.5298, + "step": 16167 + }, + { + "epoch": 0.44393190554640305, + "grad_norm": 0.36542627215385437, + "learning_rate": 1.768742862132551e-05, + "loss": 0.4866, + "step": 16168 + }, + { + "epoch": 0.4439593629873696, + "grad_norm": 0.38311904668807983, + "learning_rate": 1.768715239327919e-05, + "loss": 0.5188, + "step": 16169 + }, + { + "epoch": 0.4439868204283361, + "grad_norm": 0.35593292117118835, + "learning_rate": 1.768687615089388e-05, + "loss": 0.5022, + "step": 16170 + }, + { + "epoch": 0.4440142778693026, + "grad_norm": 0.39767372608184814, + "learning_rate": 1.768659989417009e-05, + "loss": 0.5877, + "step": 16171 + }, + { + "epoch": 0.4440417353102691, + "grad_norm": 0.4220234751701355, + "learning_rate": 1.7686323623108336e-05, + "loss": 0.5661, + "step": 16172 + }, + { + "epoch": 0.4440691927512356, + "grad_norm": 0.400147408246994, + "learning_rate": 1.7686047337709137e-05, + "loss": 0.5115, + "step": 16173 + }, + { + "epoch": 0.44409665019220207, + "grad_norm": 0.36150652170181274, + "learning_rate": 1.768577103797301e-05, + "loss": 0.5075, + "step": 16174 + }, + { + "epoch": 0.44412410763316856, + "grad_norm": 0.38955724239349365, + "learning_rate": 1.768549472390046e-05, + "loss": 0.4574, + "step": 16175 + }, + { + "epoch": 0.4441515650741351, + "grad_norm": 0.43840545415878296, + "learning_rate": 1.7685218395492016e-05, + "loss": 0.6293, + "step": 16176 + }, + { + "epoch": 0.4441790225151016, + "grad_norm": 0.3827981948852539, + "learning_rate": 1.7684942052748183e-05, + "loss": 0.5715, + "step": 16177 + }, + { + "epoch": 0.4442064799560681, + "grad_norm": 0.3449585735797882, + "learning_rate": 1.7684665695669482e-05, + "loss": 0.5107, + "step": 16178 + }, + { + "epoch": 0.4442339373970346, + "grad_norm": 0.343605637550354, + "learning_rate": 1.7684389324256426e-05, + "loss": 0.4787, + "step": 16179 + }, + { + "epoch": 0.4442613948380011, + "grad_norm": 0.3180278241634369, + "learning_rate": 1.768411293850953e-05, + "loss": 0.4046, + "step": 16180 + }, + { + "epoch": 0.4442888522789676, + "grad_norm": 0.361375629901886, + "learning_rate": 1.7683836538429314e-05, + "loss": 0.5961, + "step": 16181 + }, + { + "epoch": 0.4443163097199341, + "grad_norm": 0.4050460457801819, + "learning_rate": 1.768356012401629e-05, + "loss": 0.4457, + "step": 16182 + }, + { + "epoch": 0.4443437671609006, + "grad_norm": 0.33976760506629944, + "learning_rate": 1.7683283695270973e-05, + "loss": 0.4215, + "step": 16183 + }, + { + "epoch": 0.4443712246018671, + "grad_norm": 0.3804028332233429, + "learning_rate": 1.7683007252193878e-05, + "loss": 0.5192, + "step": 16184 + }, + { + "epoch": 0.4443986820428336, + "grad_norm": 0.38831809163093567, + "learning_rate": 1.7682730794785524e-05, + "loss": 0.5415, + "step": 16185 + }, + { + "epoch": 0.4444261394838001, + "grad_norm": 0.3560028076171875, + "learning_rate": 1.7682454323046426e-05, + "loss": 0.5036, + "step": 16186 + }, + { + "epoch": 0.4444535969247666, + "grad_norm": 0.49475836753845215, + "learning_rate": 1.7682177836977096e-05, + "loss": 0.5972, + "step": 16187 + }, + { + "epoch": 0.4444810543657331, + "grad_norm": 0.3918502926826477, + "learning_rate": 1.7681901336578056e-05, + "loss": 0.4851, + "step": 16188 + }, + { + "epoch": 0.4445085118066996, + "grad_norm": 0.33554813265800476, + "learning_rate": 1.7681624821849817e-05, + "loss": 0.4604, + "step": 16189 + }, + { + "epoch": 0.44453596924766614, + "grad_norm": 0.3732165992259979, + "learning_rate": 1.7681348292792894e-05, + "loss": 0.5247, + "step": 16190 + }, + { + "epoch": 0.44456342668863263, + "grad_norm": 0.37881898880004883, + "learning_rate": 1.768107174940781e-05, + "loss": 0.5832, + "step": 16191 + }, + { + "epoch": 0.44459088412959913, + "grad_norm": 0.34582021832466125, + "learning_rate": 1.7680795191695067e-05, + "loss": 0.4623, + "step": 16192 + }, + { + "epoch": 0.4446183415705656, + "grad_norm": 0.35325780510902405, + "learning_rate": 1.76805186196552e-05, + "loss": 0.4293, + "step": 16193 + }, + { + "epoch": 0.4446457990115321, + "grad_norm": 0.39461180567741394, + "learning_rate": 1.7680242033288705e-05, + "loss": 0.6124, + "step": 16194 + }, + { + "epoch": 0.4446732564524986, + "grad_norm": 0.43209999799728394, + "learning_rate": 1.7679965432596112e-05, + "loss": 0.5031, + "step": 16195 + }, + { + "epoch": 0.4447007138934651, + "grad_norm": 0.36966854333877563, + "learning_rate": 1.7679688817577932e-05, + "loss": 0.439, + "step": 16196 + }, + { + "epoch": 0.44472817133443165, + "grad_norm": 0.3481385409832001, + "learning_rate": 1.767941218823468e-05, + "loss": 0.5069, + "step": 16197 + }, + { + "epoch": 0.44475562877539815, + "grad_norm": 0.3758752942085266, + "learning_rate": 1.7679135544566874e-05, + "loss": 0.4603, + "step": 16198 + }, + { + "epoch": 0.44478308621636464, + "grad_norm": 0.3594307005405426, + "learning_rate": 1.767885888657503e-05, + "loss": 0.5164, + "step": 16199 + }, + { + "epoch": 0.44481054365733114, + "grad_norm": 0.37687569856643677, + "learning_rate": 1.7678582214259662e-05, + "loss": 0.474, + "step": 16200 + }, + { + "epoch": 0.44483800109829763, + "grad_norm": 0.3937789499759674, + "learning_rate": 1.7678305527621292e-05, + "loss": 0.5442, + "step": 16201 + }, + { + "epoch": 0.4448654585392641, + "grad_norm": 0.42549073696136475, + "learning_rate": 1.767802882666043e-05, + "loss": 0.5092, + "step": 16202 + }, + { + "epoch": 0.4448929159802306, + "grad_norm": 0.3591281771659851, + "learning_rate": 1.767775211137759e-05, + "loss": 0.4268, + "step": 16203 + }, + { + "epoch": 0.44492037342119717, + "grad_norm": 0.32215529680252075, + "learning_rate": 1.767747538177329e-05, + "loss": 0.4439, + "step": 16204 + }, + { + "epoch": 0.44494783086216366, + "grad_norm": 0.41082727909088135, + "learning_rate": 1.7677198637848053e-05, + "loss": 0.5418, + "step": 16205 + }, + { + "epoch": 0.44497528830313016, + "grad_norm": 0.44698724150657654, + "learning_rate": 1.7676921879602388e-05, + "loss": 0.4789, + "step": 16206 + }, + { + "epoch": 0.44500274574409665, + "grad_norm": 0.38168463110923767, + "learning_rate": 1.7676645107036815e-05, + "loss": 0.6087, + "step": 16207 + }, + { + "epoch": 0.44503020318506314, + "grad_norm": 0.369469553232193, + "learning_rate": 1.767636832015185e-05, + "loss": 0.5152, + "step": 16208 + }, + { + "epoch": 0.44505766062602964, + "grad_norm": 0.3668217957019806, + "learning_rate": 1.7676091518948003e-05, + "loss": 0.4783, + "step": 16209 + }, + { + "epoch": 0.44508511806699613, + "grad_norm": 0.32934874296188354, + "learning_rate": 1.7675814703425798e-05, + "loss": 0.5009, + "step": 16210 + }, + { + "epoch": 0.4451125755079627, + "grad_norm": 0.40517526865005493, + "learning_rate": 1.767553787358575e-05, + "loss": 0.5101, + "step": 16211 + }, + { + "epoch": 0.4451400329489292, + "grad_norm": 0.41313445568084717, + "learning_rate": 1.767526102942837e-05, + "loss": 0.5107, + "step": 16212 + }, + { + "epoch": 0.44516749038989567, + "grad_norm": 0.4035915732383728, + "learning_rate": 1.7674984170954185e-05, + "loss": 0.5529, + "step": 16213 + }, + { + "epoch": 0.44519494783086216, + "grad_norm": 0.35228946805000305, + "learning_rate": 1.7674707298163703e-05, + "loss": 0.5109, + "step": 16214 + }, + { + "epoch": 0.44522240527182866, + "grad_norm": 0.44823500514030457, + "learning_rate": 1.767443041105744e-05, + "loss": 0.5309, + "step": 16215 + }, + { + "epoch": 0.44524986271279515, + "grad_norm": 0.33598530292510986, + "learning_rate": 1.767415350963592e-05, + "loss": 0.5263, + "step": 16216 + }, + { + "epoch": 0.44527732015376165, + "grad_norm": 0.3530315160751343, + "learning_rate": 1.767387659389965e-05, + "loss": 0.467, + "step": 16217 + }, + { + "epoch": 0.4453047775947282, + "grad_norm": 3.568052291870117, + "learning_rate": 1.7673599663849147e-05, + "loss": 0.5451, + "step": 16218 + }, + { + "epoch": 0.4453322350356947, + "grad_norm": 0.3755253553390503, + "learning_rate": 1.7673322719484936e-05, + "loss": 0.4366, + "step": 16219 + }, + { + "epoch": 0.4453596924766612, + "grad_norm": 0.4278234839439392, + "learning_rate": 1.767304576080753e-05, + "loss": 0.4535, + "step": 16220 + }, + { + "epoch": 0.4453871499176277, + "grad_norm": 0.3449489176273346, + "learning_rate": 1.7672768787817444e-05, + "loss": 0.4796, + "step": 16221 + }, + { + "epoch": 0.4454146073585942, + "grad_norm": 0.38805052638053894, + "learning_rate": 1.7672491800515198e-05, + "loss": 0.5628, + "step": 16222 + }, + { + "epoch": 0.44544206479956067, + "grad_norm": 0.34644490480422974, + "learning_rate": 1.76722147989013e-05, + "loss": 0.4583, + "step": 16223 + }, + { + "epoch": 0.44546952224052716, + "grad_norm": 0.3869039714336395, + "learning_rate": 1.7671937782976278e-05, + "loss": 0.507, + "step": 16224 + }, + { + "epoch": 0.4454969796814937, + "grad_norm": 0.4172741770744324, + "learning_rate": 1.767166075274064e-05, + "loss": 0.4705, + "step": 16225 + }, + { + "epoch": 0.4455244371224602, + "grad_norm": 0.3480086326599121, + "learning_rate": 1.7671383708194908e-05, + "loss": 0.5207, + "step": 16226 + }, + { + "epoch": 0.4455518945634267, + "grad_norm": 0.3607298731803894, + "learning_rate": 1.7671106649339595e-05, + "loss": 0.6085, + "step": 16227 + }, + { + "epoch": 0.4455793520043932, + "grad_norm": 0.5689302682876587, + "learning_rate": 1.7670829576175222e-05, + "loss": 0.5208, + "step": 16228 + }, + { + "epoch": 0.4456068094453597, + "grad_norm": 0.34528982639312744, + "learning_rate": 1.7670552488702305e-05, + "loss": 0.4352, + "step": 16229 + }, + { + "epoch": 0.4456342668863262, + "grad_norm": 0.34856030344963074, + "learning_rate": 1.7670275386921354e-05, + "loss": 0.5256, + "step": 16230 + }, + { + "epoch": 0.4456617243272927, + "grad_norm": 0.35272538661956787, + "learning_rate": 1.7669998270832896e-05, + "loss": 0.4831, + "step": 16231 + }, + { + "epoch": 0.4456891817682592, + "grad_norm": 0.3707428574562073, + "learning_rate": 1.766972114043744e-05, + "loss": 0.5278, + "step": 16232 + }, + { + "epoch": 0.4457166392092257, + "grad_norm": 0.3768356442451477, + "learning_rate": 1.766944399573551e-05, + "loss": 0.5009, + "step": 16233 + }, + { + "epoch": 0.4457440966501922, + "grad_norm": 0.3978661596775055, + "learning_rate": 1.7669166836727616e-05, + "loss": 0.4154, + "step": 16234 + }, + { + "epoch": 0.4457715540911587, + "grad_norm": 0.3842734694480896, + "learning_rate": 1.766888966341428e-05, + "loss": 0.5915, + "step": 16235 + }, + { + "epoch": 0.4457990115321252, + "grad_norm": 0.3481404781341553, + "learning_rate": 1.7668612475796013e-05, + "loss": 0.561, + "step": 16236 + }, + { + "epoch": 0.4458264689730917, + "grad_norm": 0.40750834345817566, + "learning_rate": 1.7668335273873342e-05, + "loss": 0.4909, + "step": 16237 + }, + { + "epoch": 0.4458539264140582, + "grad_norm": 0.40168800950050354, + "learning_rate": 1.7668058057646775e-05, + "loss": 0.4593, + "step": 16238 + }, + { + "epoch": 0.44588138385502474, + "grad_norm": 0.3796750009059906, + "learning_rate": 1.7667780827116833e-05, + "loss": 0.5066, + "step": 16239 + }, + { + "epoch": 0.44590884129599123, + "grad_norm": 0.42957910895347595, + "learning_rate": 1.7667503582284036e-05, + "loss": 0.5139, + "step": 16240 + }, + { + "epoch": 0.4459362987369577, + "grad_norm": 0.38503462076187134, + "learning_rate": 1.7667226323148894e-05, + "loss": 0.468, + "step": 16241 + }, + { + "epoch": 0.4459637561779242, + "grad_norm": 0.47551414370536804, + "learning_rate": 1.766694904971193e-05, + "loss": 0.5744, + "step": 16242 + }, + { + "epoch": 0.4459912136188907, + "grad_norm": 0.3701879680156708, + "learning_rate": 1.7666671761973654e-05, + "loss": 0.4785, + "step": 16243 + }, + { + "epoch": 0.4460186710598572, + "grad_norm": 0.37971368432044983, + "learning_rate": 1.7666394459934592e-05, + "loss": 0.4618, + "step": 16244 + }, + { + "epoch": 0.4460461285008237, + "grad_norm": 0.40801090002059937, + "learning_rate": 1.7666117143595256e-05, + "loss": 0.4549, + "step": 16245 + }, + { + "epoch": 0.44607358594179025, + "grad_norm": 0.3757406771183014, + "learning_rate": 1.7665839812956168e-05, + "loss": 0.4706, + "step": 16246 + }, + { + "epoch": 0.44610104338275675, + "grad_norm": 0.3197157680988312, + "learning_rate": 1.7665562468017838e-05, + "loss": 0.4485, + "step": 16247 + }, + { + "epoch": 0.44612850082372324, + "grad_norm": 0.3615194857120514, + "learning_rate": 1.766528510878079e-05, + "loss": 0.4598, + "step": 16248 + }, + { + "epoch": 0.44615595826468973, + "grad_norm": 0.3849776089191437, + "learning_rate": 1.7665007735245537e-05, + "loss": 0.4806, + "step": 16249 + }, + { + "epoch": 0.44618341570565623, + "grad_norm": 0.39970749616622925, + "learning_rate": 1.7664730347412602e-05, + "loss": 0.5201, + "step": 16250 + }, + { + "epoch": 0.4462108731466227, + "grad_norm": 0.37025195360183716, + "learning_rate": 1.7664452945282496e-05, + "loss": 0.5385, + "step": 16251 + }, + { + "epoch": 0.4462383305875892, + "grad_norm": 0.38511526584625244, + "learning_rate": 1.7664175528855735e-05, + "loss": 0.5565, + "step": 16252 + }, + { + "epoch": 0.44626578802855577, + "grad_norm": 0.36795148253440857, + "learning_rate": 1.7663898098132846e-05, + "loss": 0.5326, + "step": 16253 + }, + { + "epoch": 0.44629324546952226, + "grad_norm": 0.4097501337528229, + "learning_rate": 1.7663620653114342e-05, + "loss": 0.5579, + "step": 16254 + }, + { + "epoch": 0.44632070291048875, + "grad_norm": 0.35103651881217957, + "learning_rate": 1.7663343193800734e-05, + "loss": 0.532, + "step": 16255 + }, + { + "epoch": 0.44634816035145525, + "grad_norm": 0.5811704397201538, + "learning_rate": 1.766306572019255e-05, + "loss": 0.5144, + "step": 16256 + }, + { + "epoch": 0.44637561779242174, + "grad_norm": 0.3651686906814575, + "learning_rate": 1.76627882322903e-05, + "loss": 0.5173, + "step": 16257 + }, + { + "epoch": 0.44640307523338824, + "grad_norm": 0.35018861293792725, + "learning_rate": 1.7662510730094506e-05, + "loss": 0.5519, + "step": 16258 + }, + { + "epoch": 0.44643053267435473, + "grad_norm": 0.3284205496311188, + "learning_rate": 1.7662233213605682e-05, + "loss": 0.4564, + "step": 16259 + }, + { + "epoch": 0.4464579901153213, + "grad_norm": 0.38832947611808777, + "learning_rate": 1.7661955682824346e-05, + "loss": 0.5222, + "step": 16260 + }, + { + "epoch": 0.4464854475562878, + "grad_norm": 0.3543276786804199, + "learning_rate": 1.766167813775102e-05, + "loss": 0.5202, + "step": 16261 + }, + { + "epoch": 0.44651290499725427, + "grad_norm": 0.3370468020439148, + "learning_rate": 1.766140057838622e-05, + "loss": 0.4582, + "step": 16262 + }, + { + "epoch": 0.44654036243822076, + "grad_norm": 0.3501153588294983, + "learning_rate": 1.766112300473046e-05, + "loss": 0.4786, + "step": 16263 + }, + { + "epoch": 0.44656781987918726, + "grad_norm": 0.44563642144203186, + "learning_rate": 1.7660845416784264e-05, + "loss": 0.6021, + "step": 16264 + }, + { + "epoch": 0.44659527732015375, + "grad_norm": 0.5630598664283752, + "learning_rate": 1.7660567814548144e-05, + "loss": 0.568, + "step": 16265 + }, + { + "epoch": 0.44662273476112024, + "grad_norm": 0.5015395283699036, + "learning_rate": 1.766029019802262e-05, + "loss": 0.6243, + "step": 16266 + }, + { + "epoch": 0.4466501922020868, + "grad_norm": 0.4601952135562897, + "learning_rate": 1.766001256720821e-05, + "loss": 0.6387, + "step": 16267 + }, + { + "epoch": 0.4466776496430533, + "grad_norm": 0.378411203622818, + "learning_rate": 1.7659734922105433e-05, + "loss": 0.4411, + "step": 16268 + }, + { + "epoch": 0.4467051070840198, + "grad_norm": 0.35130318999290466, + "learning_rate": 1.7659457262714802e-05, + "loss": 0.4549, + "step": 16269 + }, + { + "epoch": 0.4467325645249863, + "grad_norm": 0.37039053440093994, + "learning_rate": 1.765917958903684e-05, + "loss": 0.5502, + "step": 16270 + }, + { + "epoch": 0.44676002196595277, + "grad_norm": 0.30707189440727234, + "learning_rate": 1.7658901901072068e-05, + "loss": 0.3914, + "step": 16271 + }, + { + "epoch": 0.44678747940691926, + "grad_norm": 0.331959068775177, + "learning_rate": 1.7658624198820994e-05, + "loss": 0.553, + "step": 16272 + }, + { + "epoch": 0.44681493684788576, + "grad_norm": 0.3585236668586731, + "learning_rate": 1.7658346482284144e-05, + "loss": 0.4691, + "step": 16273 + }, + { + "epoch": 0.44684239428885225, + "grad_norm": 0.3942684233188629, + "learning_rate": 1.7658068751462032e-05, + "loss": 0.4986, + "step": 16274 + }, + { + "epoch": 0.4468698517298188, + "grad_norm": 0.46478286385536194, + "learning_rate": 1.765779100635518e-05, + "loss": 0.6143, + "step": 16275 + }, + { + "epoch": 0.4468973091707853, + "grad_norm": 0.3873024880886078, + "learning_rate": 1.76575132469641e-05, + "loss": 0.5387, + "step": 16276 + }, + { + "epoch": 0.4469247666117518, + "grad_norm": 0.3873789310455322, + "learning_rate": 1.765723547328932e-05, + "loss": 0.5833, + "step": 16277 + }, + { + "epoch": 0.4469522240527183, + "grad_norm": 0.31765511631965637, + "learning_rate": 1.7656957685331345e-05, + "loss": 0.4492, + "step": 16278 + }, + { + "epoch": 0.4469796814936848, + "grad_norm": 0.3743450343608856, + "learning_rate": 1.7656679883090705e-05, + "loss": 0.5575, + "step": 16279 + }, + { + "epoch": 0.44700713893465127, + "grad_norm": 0.37586691975593567, + "learning_rate": 1.7656402066567914e-05, + "loss": 0.5527, + "step": 16280 + }, + { + "epoch": 0.44703459637561777, + "grad_norm": 0.4324176609516144, + "learning_rate": 1.7656124235763487e-05, + "loss": 0.5686, + "step": 16281 + }, + { + "epoch": 0.4470620538165843, + "grad_norm": 0.6613055467605591, + "learning_rate": 1.7655846390677945e-05, + "loss": 0.5365, + "step": 16282 + }, + { + "epoch": 0.4470895112575508, + "grad_norm": 0.3653600215911865, + "learning_rate": 1.7655568531311804e-05, + "loss": 0.5201, + "step": 16283 + }, + { + "epoch": 0.4471169686985173, + "grad_norm": 0.5308765172958374, + "learning_rate": 1.7655290657665585e-05, + "loss": 0.5979, + "step": 16284 + }, + { + "epoch": 0.4471444261394838, + "grad_norm": 0.4483291506767273, + "learning_rate": 1.7655012769739807e-05, + "loss": 0.615, + "step": 16285 + }, + { + "epoch": 0.4471718835804503, + "grad_norm": 0.40119820833206177, + "learning_rate": 1.765473486753499e-05, + "loss": 0.5195, + "step": 16286 + }, + { + "epoch": 0.4471993410214168, + "grad_norm": 0.38622960448265076, + "learning_rate": 1.7654456951051644e-05, + "loss": 0.515, + "step": 16287 + }, + { + "epoch": 0.4472267984623833, + "grad_norm": 0.39907050132751465, + "learning_rate": 1.7654179020290295e-05, + "loss": 0.5029, + "step": 16288 + }, + { + "epoch": 0.44725425590334983, + "grad_norm": 0.46260976791381836, + "learning_rate": 1.7653901075251457e-05, + "loss": 0.4661, + "step": 16289 + }, + { + "epoch": 0.4472817133443163, + "grad_norm": 0.34621086716651917, + "learning_rate": 1.7653623115935655e-05, + "loss": 0.4999, + "step": 16290 + }, + { + "epoch": 0.4473091707852828, + "grad_norm": 0.3318263292312622, + "learning_rate": 1.76533451423434e-05, + "loss": 0.505, + "step": 16291 + }, + { + "epoch": 0.4473366282262493, + "grad_norm": 0.47858351469039917, + "learning_rate": 1.7653067154475216e-05, + "loss": 0.6846, + "step": 16292 + }, + { + "epoch": 0.4473640856672158, + "grad_norm": 0.4143407642841339, + "learning_rate": 1.7652789152331616e-05, + "loss": 0.5102, + "step": 16293 + }, + { + "epoch": 0.4473915431081823, + "grad_norm": 0.3776983320713043, + "learning_rate": 1.7652511135913125e-05, + "loss": 0.6003, + "step": 16294 + }, + { + "epoch": 0.4474190005491488, + "grad_norm": 0.38930270075798035, + "learning_rate": 1.765223310522026e-05, + "loss": 0.5305, + "step": 16295 + }, + { + "epoch": 0.44744645799011534, + "grad_norm": 0.35578814148902893, + "learning_rate": 1.7651955060253533e-05, + "loss": 0.4775, + "step": 16296 + }, + { + "epoch": 0.44747391543108184, + "grad_norm": 0.3677821457386017, + "learning_rate": 1.7651677001013468e-05, + "loss": 0.5214, + "step": 16297 + }, + { + "epoch": 0.44750137287204833, + "grad_norm": 0.37774255871772766, + "learning_rate": 1.7651398927500586e-05, + "loss": 0.4977, + "step": 16298 + }, + { + "epoch": 0.4475288303130148, + "grad_norm": 1.4527891874313354, + "learning_rate": 1.7651120839715398e-05, + "loss": 0.4528, + "step": 16299 + }, + { + "epoch": 0.4475562877539813, + "grad_norm": 0.3749931752681732, + "learning_rate": 1.7650842737658434e-05, + "loss": 0.5544, + "step": 16300 + }, + { + "epoch": 0.4475837451949478, + "grad_norm": 0.3598018288612366, + "learning_rate": 1.7650564621330202e-05, + "loss": 0.475, + "step": 16301 + }, + { + "epoch": 0.4476112026359143, + "grad_norm": 0.3201737403869629, + "learning_rate": 1.7650286490731226e-05, + "loss": 0.441, + "step": 16302 + }, + { + "epoch": 0.44763866007688086, + "grad_norm": 0.3528090715408325, + "learning_rate": 1.7650008345862025e-05, + "loss": 0.4561, + "step": 16303 + }, + { + "epoch": 0.44766611751784735, + "grad_norm": 0.3508578836917877, + "learning_rate": 1.7649730186723118e-05, + "loss": 0.4942, + "step": 16304 + }, + { + "epoch": 0.44769357495881384, + "grad_norm": 0.3582995533943176, + "learning_rate": 1.7649452013315022e-05, + "loss": 0.4798, + "step": 16305 + }, + { + "epoch": 0.44772103239978034, + "grad_norm": 0.3848623037338257, + "learning_rate": 1.7649173825638253e-05, + "loss": 0.4964, + "step": 16306 + }, + { + "epoch": 0.44774848984074683, + "grad_norm": 0.40599629282951355, + "learning_rate": 1.764889562369334e-05, + "loss": 0.4903, + "step": 16307 + }, + { + "epoch": 0.4477759472817133, + "grad_norm": 0.3705848455429077, + "learning_rate": 1.764861740748079e-05, + "loss": 0.4842, + "step": 16308 + }, + { + "epoch": 0.4478034047226798, + "grad_norm": 0.4040386974811554, + "learning_rate": 1.764833917700113e-05, + "loss": 0.6046, + "step": 16309 + }, + { + "epoch": 0.44783086216364637, + "grad_norm": 0.3588997423648834, + "learning_rate": 1.7648060932254875e-05, + "loss": 0.4552, + "step": 16310 + }, + { + "epoch": 0.44785831960461286, + "grad_norm": 0.45170530676841736, + "learning_rate": 1.7647782673242547e-05, + "loss": 0.5861, + "step": 16311 + }, + { + "epoch": 0.44788577704557936, + "grad_norm": 0.41263341903686523, + "learning_rate": 1.7647504399964663e-05, + "loss": 0.4992, + "step": 16312 + }, + { + "epoch": 0.44791323448654585, + "grad_norm": 0.581620991230011, + "learning_rate": 1.764722611242174e-05, + "loss": 0.5214, + "step": 16313 + }, + { + "epoch": 0.44794069192751235, + "grad_norm": 0.351747989654541, + "learning_rate": 1.7646947810614302e-05, + "loss": 0.5327, + "step": 16314 + }, + { + "epoch": 0.44796814936847884, + "grad_norm": 0.35103926062583923, + "learning_rate": 1.7646669494542866e-05, + "loss": 0.5819, + "step": 16315 + }, + { + "epoch": 0.44799560680944533, + "grad_norm": 0.35802045464515686, + "learning_rate": 1.7646391164207948e-05, + "loss": 0.4569, + "step": 16316 + }, + { + "epoch": 0.4480230642504119, + "grad_norm": 0.3239123523235321, + "learning_rate": 1.7646112819610072e-05, + "loss": 0.4572, + "step": 16317 + }, + { + "epoch": 0.4480505216913784, + "grad_norm": 0.4031168222427368, + "learning_rate": 1.7645834460749758e-05, + "loss": 0.5162, + "step": 16318 + }, + { + "epoch": 0.4480779791323449, + "grad_norm": 0.3830220699310303, + "learning_rate": 1.764555608762752e-05, + "loss": 0.6239, + "step": 16319 + }, + { + "epoch": 0.44810543657331137, + "grad_norm": 0.6425554156303406, + "learning_rate": 1.7645277700243878e-05, + "loss": 0.5061, + "step": 16320 + }, + { + "epoch": 0.44813289401427786, + "grad_norm": 0.3940119743347168, + "learning_rate": 1.7644999298599353e-05, + "loss": 0.5286, + "step": 16321 + }, + { + "epoch": 0.44816035145524435, + "grad_norm": 0.4258232116699219, + "learning_rate": 1.7644720882694466e-05, + "loss": 0.569, + "step": 16322 + }, + { + "epoch": 0.44818780889621085, + "grad_norm": 0.36462703347206116, + "learning_rate": 1.7644442452529736e-05, + "loss": 0.4462, + "step": 16323 + }, + { + "epoch": 0.4482152663371774, + "grad_norm": 0.37443259358406067, + "learning_rate": 1.7644164008105676e-05, + "loss": 0.576, + "step": 16324 + }, + { + "epoch": 0.4482427237781439, + "grad_norm": 0.4350542426109314, + "learning_rate": 1.764388554942282e-05, + "loss": 0.4763, + "step": 16325 + }, + { + "epoch": 0.4482701812191104, + "grad_norm": 0.3826397955417633, + "learning_rate": 1.764360707648167e-05, + "loss": 0.5288, + "step": 16326 + }, + { + "epoch": 0.4482976386600769, + "grad_norm": 0.38793954253196716, + "learning_rate": 1.7643328589282754e-05, + "loss": 0.4606, + "step": 16327 + }, + { + "epoch": 0.4483250961010434, + "grad_norm": 0.38546791672706604, + "learning_rate": 1.764305008782659e-05, + "loss": 0.5234, + "step": 16328 + }, + { + "epoch": 0.44835255354200987, + "grad_norm": 0.35235854983329773, + "learning_rate": 1.76427715721137e-05, + "loss": 0.5573, + "step": 16329 + }, + { + "epoch": 0.44838001098297636, + "grad_norm": 0.3640965223312378, + "learning_rate": 1.7642493042144605e-05, + "loss": 0.5417, + "step": 16330 + }, + { + "epoch": 0.4484074684239429, + "grad_norm": 0.36048293113708496, + "learning_rate": 1.7642214497919817e-05, + "loss": 0.5115, + "step": 16331 + }, + { + "epoch": 0.4484349258649094, + "grad_norm": 0.4432981610298157, + "learning_rate": 1.7641935939439858e-05, + "loss": 0.5573, + "step": 16332 + }, + { + "epoch": 0.4484623833058759, + "grad_norm": 0.40263521671295166, + "learning_rate": 1.7641657366705252e-05, + "loss": 0.46, + "step": 16333 + }, + { + "epoch": 0.4484898407468424, + "grad_norm": 0.3744945228099823, + "learning_rate": 1.7641378779716518e-05, + "loss": 0.5329, + "step": 16334 + }, + { + "epoch": 0.4485172981878089, + "grad_norm": 0.4188179075717926, + "learning_rate": 1.764110017847417e-05, + "loss": 0.484, + "step": 16335 + }, + { + "epoch": 0.4485447556287754, + "grad_norm": 0.38507163524627686, + "learning_rate": 1.7640821562978737e-05, + "loss": 0.4475, + "step": 16336 + }, + { + "epoch": 0.4485722130697419, + "grad_norm": 0.42068207263946533, + "learning_rate": 1.764054293323073e-05, + "loss": 0.4505, + "step": 16337 + }, + { + "epoch": 0.4485996705107084, + "grad_norm": 0.4947984516620636, + "learning_rate": 1.7640264289230668e-05, + "loss": 0.5289, + "step": 16338 + }, + { + "epoch": 0.4486271279516749, + "grad_norm": 0.3511262834072113, + "learning_rate": 1.763998563097908e-05, + "loss": 0.4485, + "step": 16339 + }, + { + "epoch": 0.4486545853926414, + "grad_norm": 0.3525216281414032, + "learning_rate": 1.7639706958476475e-05, + "loss": 0.525, + "step": 16340 + }, + { + "epoch": 0.4486820428336079, + "grad_norm": 0.42244017124176025, + "learning_rate": 1.7639428271723385e-05, + "loss": 0.5467, + "step": 16341 + }, + { + "epoch": 0.4487095002745744, + "grad_norm": 0.4891397953033447, + "learning_rate": 1.7639149570720316e-05, + "loss": 0.4652, + "step": 16342 + }, + { + "epoch": 0.4487369577155409, + "grad_norm": 0.3815208971500397, + "learning_rate": 1.76388708554678e-05, + "loss": 0.5407, + "step": 16343 + }, + { + "epoch": 0.4487644151565074, + "grad_norm": 0.43269941210746765, + "learning_rate": 1.763859212596635e-05, + "loss": 0.5679, + "step": 16344 + }, + { + "epoch": 0.44879187259747394, + "grad_norm": 0.3986944556236267, + "learning_rate": 1.763831338221649e-05, + "loss": 0.5428, + "step": 16345 + }, + { + "epoch": 0.44881933003844043, + "grad_norm": 0.4186573624610901, + "learning_rate": 1.7638034624218738e-05, + "loss": 0.4772, + "step": 16346 + }, + { + "epoch": 0.44884678747940693, + "grad_norm": 0.3791615664958954, + "learning_rate": 1.763775585197361e-05, + "loss": 0.6196, + "step": 16347 + }, + { + "epoch": 0.4488742449203734, + "grad_norm": 0.38311004638671875, + "learning_rate": 1.763747706548163e-05, + "loss": 0.4991, + "step": 16348 + }, + { + "epoch": 0.4489017023613399, + "grad_norm": 0.3693498969078064, + "learning_rate": 1.7637198264743318e-05, + "loss": 0.5746, + "step": 16349 + }, + { + "epoch": 0.4489291598023064, + "grad_norm": 0.45124709606170654, + "learning_rate": 1.7636919449759197e-05, + "loss": 0.5092, + "step": 16350 + }, + { + "epoch": 0.4489566172432729, + "grad_norm": 0.35207122564315796, + "learning_rate": 1.763664062052978e-05, + "loss": 0.4291, + "step": 16351 + }, + { + "epoch": 0.44898407468423945, + "grad_norm": 0.3303559720516205, + "learning_rate": 1.7636361777055594e-05, + "loss": 0.4881, + "step": 16352 + }, + { + "epoch": 0.44901153212520595, + "grad_norm": 0.35786306858062744, + "learning_rate": 1.7636082919337153e-05, + "loss": 0.5943, + "step": 16353 + }, + { + "epoch": 0.44903898956617244, + "grad_norm": 0.3418938219547272, + "learning_rate": 1.763580404737498e-05, + "loss": 0.4484, + "step": 16354 + }, + { + "epoch": 0.44906644700713894, + "grad_norm": 0.3868178129196167, + "learning_rate": 1.7635525161169596e-05, + "loss": 0.5722, + "step": 16355 + }, + { + "epoch": 0.44909390444810543, + "grad_norm": 0.4371788799762726, + "learning_rate": 1.7635246260721525e-05, + "loss": 0.462, + "step": 16356 + }, + { + "epoch": 0.4491213618890719, + "grad_norm": 0.36801877617836, + "learning_rate": 1.7634967346031278e-05, + "loss": 0.5276, + "step": 16357 + }, + { + "epoch": 0.4491488193300384, + "grad_norm": 0.3286571502685547, + "learning_rate": 1.763468841709938e-05, + "loss": 0.5339, + "step": 16358 + }, + { + "epoch": 0.44917627677100497, + "grad_norm": 0.3956201672554016, + "learning_rate": 1.7634409473926352e-05, + "loss": 0.5911, + "step": 16359 + }, + { + "epoch": 0.44920373421197146, + "grad_norm": 0.3090289533138275, + "learning_rate": 1.7634130516512715e-05, + "loss": 0.484, + "step": 16360 + }, + { + "epoch": 0.44923119165293796, + "grad_norm": 0.4280528724193573, + "learning_rate": 1.7633851544858988e-05, + "loss": 0.6085, + "step": 16361 + }, + { + "epoch": 0.44925864909390445, + "grad_norm": 1.4353727102279663, + "learning_rate": 1.7633572558965692e-05, + "loss": 0.4605, + "step": 16362 + }, + { + "epoch": 0.44928610653487094, + "grad_norm": 0.3505908250808716, + "learning_rate": 1.7633293558833345e-05, + "loss": 0.4974, + "step": 16363 + }, + { + "epoch": 0.44931356397583744, + "grad_norm": 0.36935925483703613, + "learning_rate": 1.763301454446247e-05, + "loss": 0.5498, + "step": 16364 + }, + { + "epoch": 0.44934102141680393, + "grad_norm": 0.41407841444015503, + "learning_rate": 1.763273551585359e-05, + "loss": 0.4967, + "step": 16365 + }, + { + "epoch": 0.4493684788577705, + "grad_norm": 0.3528873026371002, + "learning_rate": 1.7632456473007217e-05, + "loss": 0.4289, + "step": 16366 + }, + { + "epoch": 0.449395936298737, + "grad_norm": 0.41084054112434387, + "learning_rate": 1.7632177415923878e-05, + "loss": 0.5812, + "step": 16367 + }, + { + "epoch": 0.44942339373970347, + "grad_norm": 0.3799944221973419, + "learning_rate": 1.763189834460409e-05, + "loss": 0.5829, + "step": 16368 + }, + { + "epoch": 0.44945085118066996, + "grad_norm": 0.3394142687320709, + "learning_rate": 1.763161925904838e-05, + "loss": 0.5174, + "step": 16369 + }, + { + "epoch": 0.44947830862163646, + "grad_norm": 0.3679940104484558, + "learning_rate": 1.7631340159257262e-05, + "loss": 0.559, + "step": 16370 + }, + { + "epoch": 0.44950576606260295, + "grad_norm": 0.360752671957016, + "learning_rate": 1.763106104523126e-05, + "loss": 0.5027, + "step": 16371 + }, + { + "epoch": 0.44953322350356945, + "grad_norm": 0.4499969482421875, + "learning_rate": 1.7630781916970894e-05, + "loss": 0.4975, + "step": 16372 + }, + { + "epoch": 0.449560680944536, + "grad_norm": 0.41656696796417236, + "learning_rate": 1.7630502774476683e-05, + "loss": 0.5876, + "step": 16373 + }, + { + "epoch": 0.4495881383855025, + "grad_norm": 0.3844637870788574, + "learning_rate": 1.763022361774915e-05, + "loss": 0.5468, + "step": 16374 + }, + { + "epoch": 0.449615595826469, + "grad_norm": 0.37302976846694946, + "learning_rate": 1.7629944446788815e-05, + "loss": 0.4634, + "step": 16375 + }, + { + "epoch": 0.4496430532674355, + "grad_norm": 0.33911624550819397, + "learning_rate": 1.7629665261596198e-05, + "loss": 0.4856, + "step": 16376 + }, + { + "epoch": 0.44967051070840197, + "grad_norm": 0.3796495795249939, + "learning_rate": 1.762938606217182e-05, + "loss": 0.516, + "step": 16377 + }, + { + "epoch": 0.44969796814936847, + "grad_norm": 0.40785104036331177, + "learning_rate": 1.7629106848516204e-05, + "loss": 0.5102, + "step": 16378 + }, + { + "epoch": 0.44972542559033496, + "grad_norm": 0.3765966296195984, + "learning_rate": 1.7628827620629863e-05, + "loss": 0.5434, + "step": 16379 + }, + { + "epoch": 0.4497528830313015, + "grad_norm": 0.4066828489303589, + "learning_rate": 1.762854837851333e-05, + "loss": 0.5474, + "step": 16380 + }, + { + "epoch": 0.449780340472268, + "grad_norm": 0.44063472747802734, + "learning_rate": 1.7628269122167115e-05, + "loss": 0.4619, + "step": 16381 + }, + { + "epoch": 0.4498077979132345, + "grad_norm": 0.4751841723918915, + "learning_rate": 1.7627989851591744e-05, + "loss": 0.4975, + "step": 16382 + }, + { + "epoch": 0.449835255354201, + "grad_norm": 0.38785117864608765, + "learning_rate": 1.762771056678774e-05, + "loss": 0.5455, + "step": 16383 + }, + { + "epoch": 0.4498627127951675, + "grad_norm": 0.404065877199173, + "learning_rate": 1.762743126775562e-05, + "loss": 0.5912, + "step": 16384 + }, + { + "epoch": 0.449890170236134, + "grad_norm": 0.372230589389801, + "learning_rate": 1.7627151954495904e-05, + "loss": 0.4384, + "step": 16385 + }, + { + "epoch": 0.4499176276771005, + "grad_norm": 0.42826223373413086, + "learning_rate": 1.7626872627009118e-05, + "loss": 0.5441, + "step": 16386 + }, + { + "epoch": 0.449945085118067, + "grad_norm": 0.35649630427360535, + "learning_rate": 1.762659328529578e-05, + "loss": 0.5179, + "step": 16387 + }, + { + "epoch": 0.4499725425590335, + "grad_norm": 0.31712231040000916, + "learning_rate": 1.7626313929356415e-05, + "loss": 0.4624, + "step": 16388 + }, + { + "epoch": 0.45, + "grad_norm": 0.3887706995010376, + "learning_rate": 1.7626034559191534e-05, + "loss": 0.5044, + "step": 16389 + }, + { + "epoch": 0.4500274574409665, + "grad_norm": 0.34697288274765015, + "learning_rate": 1.7625755174801668e-05, + "loss": 0.5219, + "step": 16390 + }, + { + "epoch": 0.450054914881933, + "grad_norm": 0.37770694494247437, + "learning_rate": 1.7625475776187333e-05, + "loss": 0.5232, + "step": 16391 + }, + { + "epoch": 0.4500823723228995, + "grad_norm": 0.4143514931201935, + "learning_rate": 1.7625196363349055e-05, + "loss": 0.5276, + "step": 16392 + }, + { + "epoch": 0.450109829763866, + "grad_norm": 0.3799136281013489, + "learning_rate": 1.762491693628735e-05, + "loss": 0.4993, + "step": 16393 + }, + { + "epoch": 0.45013728720483254, + "grad_norm": 0.41576096415519714, + "learning_rate": 1.762463749500274e-05, + "loss": 0.5654, + "step": 16394 + }, + { + "epoch": 0.45016474464579903, + "grad_norm": 0.4133335053920746, + "learning_rate": 1.762435803949575e-05, + "loss": 0.5191, + "step": 16395 + }, + { + "epoch": 0.4501922020867655, + "grad_norm": 0.34034594893455505, + "learning_rate": 1.76240785697669e-05, + "loss": 0.5233, + "step": 16396 + }, + { + "epoch": 0.450219659527732, + "grad_norm": 0.414831280708313, + "learning_rate": 1.7623799085816705e-05, + "loss": 0.6038, + "step": 16397 + }, + { + "epoch": 0.4502471169686985, + "grad_norm": 0.36482420563697815, + "learning_rate": 1.7623519587645696e-05, + "loss": 0.5843, + "step": 16398 + }, + { + "epoch": 0.450274574409665, + "grad_norm": 0.3563075661659241, + "learning_rate": 1.7623240075254385e-05, + "loss": 0.5435, + "step": 16399 + }, + { + "epoch": 0.4503020318506315, + "grad_norm": 0.359744668006897, + "learning_rate": 1.7622960548643302e-05, + "loss": 0.5668, + "step": 16400 + }, + { + "epoch": 0.45032948929159805, + "grad_norm": 0.4468838572502136, + "learning_rate": 1.7622681007812965e-05, + "loss": 0.4241, + "step": 16401 + }, + { + "epoch": 0.45035694673256454, + "grad_norm": 0.461291640996933, + "learning_rate": 1.7622401452763896e-05, + "loss": 0.5636, + "step": 16402 + }, + { + "epoch": 0.45038440417353104, + "grad_norm": 0.38259997963905334, + "learning_rate": 1.7622121883496614e-05, + "loss": 0.5486, + "step": 16403 + }, + { + "epoch": 0.45041186161449753, + "grad_norm": 0.3538258671760559, + "learning_rate": 1.762184230001164e-05, + "loss": 0.5426, + "step": 16404 + }, + { + "epoch": 0.450439319055464, + "grad_norm": 0.3682671785354614, + "learning_rate": 1.7621562702309498e-05, + "loss": 0.5223, + "step": 16405 + }, + { + "epoch": 0.4504667764964305, + "grad_norm": 0.38607680797576904, + "learning_rate": 1.762128309039071e-05, + "loss": 0.5431, + "step": 16406 + }, + { + "epoch": 0.450494233937397, + "grad_norm": 0.34689515829086304, + "learning_rate": 1.7621003464255798e-05, + "loss": 0.4496, + "step": 16407 + }, + { + "epoch": 0.4505216913783635, + "grad_norm": 0.38456472754478455, + "learning_rate": 1.762072382390528e-05, + "loss": 0.5704, + "step": 16408 + }, + { + "epoch": 0.45054914881933006, + "grad_norm": 0.3719666004180908, + "learning_rate": 1.7620444169339683e-05, + "loss": 0.5503, + "step": 16409 + }, + { + "epoch": 0.45057660626029655, + "grad_norm": 0.3527688682079315, + "learning_rate": 1.7620164500559524e-05, + "loss": 0.5336, + "step": 16410 + }, + { + "epoch": 0.45060406370126305, + "grad_norm": 0.3233591318130493, + "learning_rate": 1.761988481756532e-05, + "loss": 0.4437, + "step": 16411 + }, + { + "epoch": 0.45063152114222954, + "grad_norm": 0.37388744950294495, + "learning_rate": 1.761960512035761e-05, + "loss": 0.5335, + "step": 16412 + }, + { + "epoch": 0.45065897858319603, + "grad_norm": 0.7181567549705505, + "learning_rate": 1.7619325408936896e-05, + "loss": 0.5282, + "step": 16413 + }, + { + "epoch": 0.45068643602416253, + "grad_norm": 0.46058544516563416, + "learning_rate": 1.761904568330371e-05, + "loss": 0.5384, + "step": 16414 + }, + { + "epoch": 0.450713893465129, + "grad_norm": 0.4352286159992218, + "learning_rate": 1.7618765943458577e-05, + "loss": 0.5966, + "step": 16415 + }, + { + "epoch": 0.4507413509060956, + "grad_norm": 0.42245474457740784, + "learning_rate": 1.7618486189402007e-05, + "loss": 0.5201, + "step": 16416 + }, + { + "epoch": 0.45076880834706207, + "grad_norm": 0.36213329434394836, + "learning_rate": 1.7618206421134532e-05, + "loss": 0.5418, + "step": 16417 + }, + { + "epoch": 0.45079626578802856, + "grad_norm": 0.43807539343833923, + "learning_rate": 1.761792663865667e-05, + "loss": 0.4667, + "step": 16418 + }, + { + "epoch": 0.45082372322899505, + "grad_norm": 0.3651374578475952, + "learning_rate": 1.7617646841968942e-05, + "loss": 0.513, + "step": 16419 + }, + { + "epoch": 0.45085118066996155, + "grad_norm": 0.3257370591163635, + "learning_rate": 1.7617367031071874e-05, + "loss": 0.5543, + "step": 16420 + }, + { + "epoch": 0.45087863811092804, + "grad_norm": 0.39248254895210266, + "learning_rate": 1.7617087205965987e-05, + "loss": 0.5061, + "step": 16421 + }, + { + "epoch": 0.45090609555189454, + "grad_norm": 0.34287458658218384, + "learning_rate": 1.7616807366651797e-05, + "loss": 0.5245, + "step": 16422 + }, + { + "epoch": 0.4509335529928611, + "grad_norm": 0.41643720865249634, + "learning_rate": 1.7616527513129832e-05, + "loss": 0.5041, + "step": 16423 + }, + { + "epoch": 0.4509610104338276, + "grad_norm": 0.4319380819797516, + "learning_rate": 1.761624764540061e-05, + "loss": 0.5755, + "step": 16424 + }, + { + "epoch": 0.4509884678747941, + "grad_norm": 0.39449480175971985, + "learning_rate": 1.7615967763464658e-05, + "loss": 0.5994, + "step": 16425 + }, + { + "epoch": 0.45101592531576057, + "grad_norm": 0.47950389981269836, + "learning_rate": 1.7615687867322496e-05, + "loss": 0.4804, + "step": 16426 + }, + { + "epoch": 0.45104338275672706, + "grad_norm": 0.37265294790267944, + "learning_rate": 1.7615407956974646e-05, + "loss": 0.5347, + "step": 16427 + }, + { + "epoch": 0.45107084019769356, + "grad_norm": 0.3874069154262543, + "learning_rate": 1.761512803242163e-05, + "loss": 0.5433, + "step": 16428 + }, + { + "epoch": 0.45109829763866005, + "grad_norm": 0.3805796205997467, + "learning_rate": 1.7614848093663966e-05, + "loss": 0.5563, + "step": 16429 + }, + { + "epoch": 0.4511257550796266, + "grad_norm": 0.43778395652770996, + "learning_rate": 1.761456814070218e-05, + "loss": 0.5776, + "step": 16430 + }, + { + "epoch": 0.4511532125205931, + "grad_norm": 0.4236373007297516, + "learning_rate": 1.7614288173536796e-05, + "loss": 0.5399, + "step": 16431 + }, + { + "epoch": 0.4511806699615596, + "grad_norm": 0.3807057738304138, + "learning_rate": 1.7614008192168336e-05, + "loss": 0.5802, + "step": 16432 + }, + { + "epoch": 0.4512081274025261, + "grad_norm": 0.38247713446617126, + "learning_rate": 1.761372819659732e-05, + "loss": 0.4717, + "step": 16433 + }, + { + "epoch": 0.4512355848434926, + "grad_norm": 0.40510234236717224, + "learning_rate": 1.7613448186824268e-05, + "loss": 0.5246, + "step": 16434 + }, + { + "epoch": 0.45126304228445907, + "grad_norm": 0.3878079950809479, + "learning_rate": 1.7613168162849706e-05, + "loss": 0.484, + "step": 16435 + }, + { + "epoch": 0.45129049972542556, + "grad_norm": 0.4531189203262329, + "learning_rate": 1.7612888124674155e-05, + "loss": 0.4557, + "step": 16436 + }, + { + "epoch": 0.4513179571663921, + "grad_norm": 0.4005299508571625, + "learning_rate": 1.761260807229814e-05, + "loss": 0.5528, + "step": 16437 + }, + { + "epoch": 0.4513454146073586, + "grad_norm": 0.3891911804676056, + "learning_rate": 1.7612328005722183e-05, + "loss": 0.5397, + "step": 16438 + }, + { + "epoch": 0.4513728720483251, + "grad_norm": 0.3791523873806, + "learning_rate": 1.76120479249468e-05, + "loss": 0.5335, + "step": 16439 + }, + { + "epoch": 0.4514003294892916, + "grad_norm": 0.3671433925628662, + "learning_rate": 1.761176782997252e-05, + "loss": 0.5041, + "step": 16440 + }, + { + "epoch": 0.4514277869302581, + "grad_norm": 0.36536648869514465, + "learning_rate": 1.7611487720799868e-05, + "loss": 0.4742, + "step": 16441 + }, + { + "epoch": 0.4514552443712246, + "grad_norm": 0.40077856183052063, + "learning_rate": 1.7611207597429357e-05, + "loss": 0.5251, + "step": 16442 + }, + { + "epoch": 0.4514827018121911, + "grad_norm": 0.4081714451313019, + "learning_rate": 1.7610927459861514e-05, + "loss": 0.5526, + "step": 16443 + }, + { + "epoch": 0.45151015925315763, + "grad_norm": 0.3407379388809204, + "learning_rate": 1.7610647308096863e-05, + "loss": 0.4684, + "step": 16444 + }, + { + "epoch": 0.4515376166941241, + "grad_norm": 0.336406409740448, + "learning_rate": 1.761036714213593e-05, + "loss": 0.4811, + "step": 16445 + }, + { + "epoch": 0.4515650741350906, + "grad_norm": 0.34787511825561523, + "learning_rate": 1.761008696197923e-05, + "loss": 0.4534, + "step": 16446 + }, + { + "epoch": 0.4515925315760571, + "grad_norm": 0.35643380880355835, + "learning_rate": 1.7609806767627286e-05, + "loss": 0.4762, + "step": 16447 + }, + { + "epoch": 0.4516199890170236, + "grad_norm": 0.39195793867111206, + "learning_rate": 1.7609526559080627e-05, + "loss": 0.4229, + "step": 16448 + }, + { + "epoch": 0.4516474464579901, + "grad_norm": 0.3406163454055786, + "learning_rate": 1.7609246336339775e-05, + "loss": 0.4972, + "step": 16449 + }, + { + "epoch": 0.4516749038989566, + "grad_norm": 0.43508389592170715, + "learning_rate": 1.7608966099405246e-05, + "loss": 0.5158, + "step": 16450 + }, + { + "epoch": 0.45170236133992314, + "grad_norm": 0.3196220397949219, + "learning_rate": 1.7608685848277566e-05, + "loss": 0.4222, + "step": 16451 + }, + { + "epoch": 0.45172981878088964, + "grad_norm": 0.3754172921180725, + "learning_rate": 1.7608405582957262e-05, + "loss": 0.5944, + "step": 16452 + }, + { + "epoch": 0.45175727622185613, + "grad_norm": 0.4495798647403717, + "learning_rate": 1.760812530344485e-05, + "loss": 0.5269, + "step": 16453 + }, + { + "epoch": 0.4517847336628226, + "grad_norm": 0.3724677264690399, + "learning_rate": 1.760784500974086e-05, + "loss": 0.5034, + "step": 16454 + }, + { + "epoch": 0.4518121911037891, + "grad_norm": 0.3524262607097626, + "learning_rate": 1.7607564701845807e-05, + "loss": 0.4793, + "step": 16455 + }, + { + "epoch": 0.4518396485447556, + "grad_norm": 0.3692380487918854, + "learning_rate": 1.7607284379760218e-05, + "loss": 0.6116, + "step": 16456 + }, + { + "epoch": 0.4518671059857221, + "grad_norm": 0.40111252665519714, + "learning_rate": 1.7607004043484615e-05, + "loss": 0.4631, + "step": 16457 + }, + { + "epoch": 0.45189456342668866, + "grad_norm": 0.38076698780059814, + "learning_rate": 1.7606723693019526e-05, + "loss": 0.5746, + "step": 16458 + }, + { + "epoch": 0.45192202086765515, + "grad_norm": 0.39562347531318665, + "learning_rate": 1.7606443328365467e-05, + "loss": 0.5699, + "step": 16459 + }, + { + "epoch": 0.45194947830862164, + "grad_norm": 0.32691484689712524, + "learning_rate": 1.7606162949522962e-05, + "loss": 0.5291, + "step": 16460 + }, + { + "epoch": 0.45197693574958814, + "grad_norm": 0.3756413757801056, + "learning_rate": 1.7605882556492536e-05, + "loss": 0.5387, + "step": 16461 + }, + { + "epoch": 0.45200439319055463, + "grad_norm": 0.42511433362960815, + "learning_rate": 1.7605602149274715e-05, + "loss": 0.5201, + "step": 16462 + }, + { + "epoch": 0.4520318506315211, + "grad_norm": 0.3948748707771301, + "learning_rate": 1.7605321727870014e-05, + "loss": 0.4637, + "step": 16463 + }, + { + "epoch": 0.4520593080724876, + "grad_norm": 0.3371366560459137, + "learning_rate": 1.7605041292278965e-05, + "loss": 0.5317, + "step": 16464 + }, + { + "epoch": 0.45208676551345417, + "grad_norm": 0.4076662063598633, + "learning_rate": 1.7604760842502082e-05, + "loss": 0.5597, + "step": 16465 + }, + { + "epoch": 0.45211422295442066, + "grad_norm": 0.33447667956352234, + "learning_rate": 1.7604480378539897e-05, + "loss": 0.396, + "step": 16466 + }, + { + "epoch": 0.45214168039538716, + "grad_norm": 0.4558180868625641, + "learning_rate": 1.7604199900392927e-05, + "loss": 0.5404, + "step": 16467 + }, + { + "epoch": 0.45216913783635365, + "grad_norm": 0.45525142550468445, + "learning_rate": 1.7603919408061698e-05, + "loss": 0.4513, + "step": 16468 + }, + { + "epoch": 0.45219659527732015, + "grad_norm": 0.36419957876205444, + "learning_rate": 1.760363890154673e-05, + "loss": 0.508, + "step": 16469 + }, + { + "epoch": 0.45222405271828664, + "grad_norm": 0.3514192998409271, + "learning_rate": 1.760335838084855e-05, + "loss": 0.5151, + "step": 16470 + }, + { + "epoch": 0.45225151015925313, + "grad_norm": 0.3583473265171051, + "learning_rate": 1.7603077845967683e-05, + "loss": 0.4706, + "step": 16471 + }, + { + "epoch": 0.4522789676002197, + "grad_norm": 0.36676880717277527, + "learning_rate": 1.7602797296904647e-05, + "loss": 0.4781, + "step": 16472 + }, + { + "epoch": 0.4523064250411862, + "grad_norm": 0.41546207666397095, + "learning_rate": 1.7602516733659966e-05, + "loss": 0.5038, + "step": 16473 + }, + { + "epoch": 0.45233388248215267, + "grad_norm": 0.4245785176753998, + "learning_rate": 1.7602236156234164e-05, + "loss": 0.5652, + "step": 16474 + }, + { + "epoch": 0.45236133992311917, + "grad_norm": 0.3646193742752075, + "learning_rate": 1.7601955564627767e-05, + "loss": 0.6295, + "step": 16475 + }, + { + "epoch": 0.45238879736408566, + "grad_norm": 0.3531426787376404, + "learning_rate": 1.76016749588413e-05, + "loss": 0.5023, + "step": 16476 + }, + { + "epoch": 0.45241625480505215, + "grad_norm": 0.41452303528785706, + "learning_rate": 1.760139433887528e-05, + "loss": 0.5779, + "step": 16477 + }, + { + "epoch": 0.45244371224601865, + "grad_norm": 0.39359986782073975, + "learning_rate": 1.7601113704730232e-05, + "loss": 0.5835, + "step": 16478 + }, + { + "epoch": 0.4524711696869852, + "grad_norm": 0.4056164622306824, + "learning_rate": 1.760083305640668e-05, + "loss": 0.5205, + "step": 16479 + }, + { + "epoch": 0.4524986271279517, + "grad_norm": 0.39417484402656555, + "learning_rate": 1.760055239390515e-05, + "loss": 0.5561, + "step": 16480 + }, + { + "epoch": 0.4525260845689182, + "grad_norm": 0.3654676377773285, + "learning_rate": 1.7600271717226167e-05, + "loss": 0.5203, + "step": 16481 + }, + { + "epoch": 0.4525535420098847, + "grad_norm": 0.3931303322315216, + "learning_rate": 1.759999102637025e-05, + "loss": 0.4846, + "step": 16482 + }, + { + "epoch": 0.4525809994508512, + "grad_norm": 0.37459105253219604, + "learning_rate": 1.7599710321337925e-05, + "loss": 0.5199, + "step": 16483 + }, + { + "epoch": 0.45260845689181767, + "grad_norm": 0.7336270809173584, + "learning_rate": 1.759942960212971e-05, + "loss": 0.596, + "step": 16484 + }, + { + "epoch": 0.45263591433278416, + "grad_norm": 0.33560794591903687, + "learning_rate": 1.7599148868746136e-05, + "loss": 0.5198, + "step": 16485 + }, + { + "epoch": 0.4526633717737507, + "grad_norm": 0.40384554862976074, + "learning_rate": 1.7598868121187725e-05, + "loss": 0.5007, + "step": 16486 + }, + { + "epoch": 0.4526908292147172, + "grad_norm": 0.40007176995277405, + "learning_rate": 1.7598587359454998e-05, + "loss": 0.571, + "step": 16487 + }, + { + "epoch": 0.4527182866556837, + "grad_norm": 0.36748895049095154, + "learning_rate": 1.7598306583548482e-05, + "loss": 0.5234, + "step": 16488 + }, + { + "epoch": 0.4527457440966502, + "grad_norm": 0.35370126366615295, + "learning_rate": 1.75980257934687e-05, + "loss": 0.4468, + "step": 16489 + }, + { + "epoch": 0.4527732015376167, + "grad_norm": 0.32824206352233887, + "learning_rate": 1.7597744989216172e-05, + "loss": 0.4697, + "step": 16490 + }, + { + "epoch": 0.4528006589785832, + "grad_norm": 0.4980722665786743, + "learning_rate": 1.7597464170791428e-05, + "loss": 0.5322, + "step": 16491 + }, + { + "epoch": 0.4528281164195497, + "grad_norm": 0.3525618612766266, + "learning_rate": 1.7597183338194985e-05, + "loss": 0.471, + "step": 16492 + }, + { + "epoch": 0.4528555738605162, + "grad_norm": 0.34973886609077454, + "learning_rate": 1.7596902491427374e-05, + "loss": 0.4754, + "step": 16493 + }, + { + "epoch": 0.4528830313014827, + "grad_norm": 0.38278862833976746, + "learning_rate": 1.759662163048911e-05, + "loss": 0.5587, + "step": 16494 + }, + { + "epoch": 0.4529104887424492, + "grad_norm": 0.34794360399246216, + "learning_rate": 1.7596340755380725e-05, + "loss": 0.5122, + "step": 16495 + }, + { + "epoch": 0.4529379461834157, + "grad_norm": 0.3971276879310608, + "learning_rate": 1.7596059866102742e-05, + "loss": 0.4915, + "step": 16496 + }, + { + "epoch": 0.4529654036243822, + "grad_norm": 0.34436362981796265, + "learning_rate": 1.759577896265568e-05, + "loss": 0.5522, + "step": 16497 + }, + { + "epoch": 0.4529928610653487, + "grad_norm": 0.39027896523475647, + "learning_rate": 1.7595498045040068e-05, + "loss": 0.5039, + "step": 16498 + }, + { + "epoch": 0.4530203185063152, + "grad_norm": 0.446169912815094, + "learning_rate": 1.7595217113256427e-05, + "loss": 0.5018, + "step": 16499 + }, + { + "epoch": 0.45304777594728174, + "grad_norm": 0.41163384914398193, + "learning_rate": 1.7594936167305282e-05, + "loss": 0.5874, + "step": 16500 + }, + { + "epoch": 0.45307523338824823, + "grad_norm": 0.4378408193588257, + "learning_rate": 1.7594655207187155e-05, + "loss": 0.6295, + "step": 16501 + }, + { + "epoch": 0.4531026908292147, + "grad_norm": 0.3744196593761444, + "learning_rate": 1.7594374232902576e-05, + "loss": 0.5828, + "step": 16502 + }, + { + "epoch": 0.4531301482701812, + "grad_norm": 0.38281193375587463, + "learning_rate": 1.759409324445206e-05, + "loss": 0.5031, + "step": 16503 + }, + { + "epoch": 0.4531576057111477, + "grad_norm": 0.47974494099617004, + "learning_rate": 1.7593812241836143e-05, + "loss": 0.6065, + "step": 16504 + }, + { + "epoch": 0.4531850631521142, + "grad_norm": 0.3786256015300751, + "learning_rate": 1.7593531225055338e-05, + "loss": 0.5128, + "step": 16505 + }, + { + "epoch": 0.4532125205930807, + "grad_norm": 0.352970689535141, + "learning_rate": 1.7593250194110173e-05, + "loss": 0.5215, + "step": 16506 + }, + { + "epoch": 0.45323997803404725, + "grad_norm": 0.3822009861469269, + "learning_rate": 1.7592969149001177e-05, + "loss": 0.5164, + "step": 16507 + }, + { + "epoch": 0.45326743547501375, + "grad_norm": 0.35719236731529236, + "learning_rate": 1.759268808972887e-05, + "loss": 0.5537, + "step": 16508 + }, + { + "epoch": 0.45329489291598024, + "grad_norm": 0.348567396402359, + "learning_rate": 1.7592407016293773e-05, + "loss": 0.5002, + "step": 16509 + }, + { + "epoch": 0.45332235035694673, + "grad_norm": 0.40390071272850037, + "learning_rate": 1.7592125928696416e-05, + "loss": 0.4902, + "step": 16510 + }, + { + "epoch": 0.45334980779791323, + "grad_norm": 0.38037532567977905, + "learning_rate": 1.759184482693732e-05, + "loss": 0.4783, + "step": 16511 + }, + { + "epoch": 0.4533772652388797, + "grad_norm": 0.3663862645626068, + "learning_rate": 1.759156371101701e-05, + "loss": 0.4507, + "step": 16512 + }, + { + "epoch": 0.4534047226798462, + "grad_norm": 0.4120646119117737, + "learning_rate": 1.759128258093601e-05, + "loss": 0.4852, + "step": 16513 + }, + { + "epoch": 0.45343218012081277, + "grad_norm": 0.3911692798137665, + "learning_rate": 1.7591001436694847e-05, + "loss": 0.57, + "step": 16514 + }, + { + "epoch": 0.45345963756177926, + "grad_norm": 0.34604448080062866, + "learning_rate": 1.7590720278294046e-05, + "loss": 0.5579, + "step": 16515 + }, + { + "epoch": 0.45348709500274575, + "grad_norm": 0.4464185833930969, + "learning_rate": 1.7590439105734124e-05, + "loss": 0.4766, + "step": 16516 + }, + { + "epoch": 0.45351455244371225, + "grad_norm": 0.37113016843795776, + "learning_rate": 1.7590157919015612e-05, + "loss": 0.6062, + "step": 16517 + }, + { + "epoch": 0.45354200988467874, + "grad_norm": 0.374452143907547, + "learning_rate": 1.7589876718139032e-05, + "loss": 0.5737, + "step": 16518 + }, + { + "epoch": 0.45356946732564524, + "grad_norm": 0.40791723132133484, + "learning_rate": 1.758959550310491e-05, + "loss": 0.4911, + "step": 16519 + }, + { + "epoch": 0.45359692476661173, + "grad_norm": 0.8731909394264221, + "learning_rate": 1.7589314273913775e-05, + "loss": 0.4884, + "step": 16520 + }, + { + "epoch": 0.4536243822075783, + "grad_norm": 0.47034579515457153, + "learning_rate": 1.758903303056614e-05, + "loss": 0.6221, + "step": 16521 + }, + { + "epoch": 0.4536518396485448, + "grad_norm": 0.33338698744773865, + "learning_rate": 1.758875177306254e-05, + "loss": 0.5129, + "step": 16522 + }, + { + "epoch": 0.45367929708951127, + "grad_norm": 0.3691624402999878, + "learning_rate": 1.7588470501403495e-05, + "loss": 0.501, + "step": 16523 + }, + { + "epoch": 0.45370675453047776, + "grad_norm": 0.3896225392818451, + "learning_rate": 1.758818921558953e-05, + "loss": 0.5427, + "step": 16524 + }, + { + "epoch": 0.45373421197144426, + "grad_norm": 0.3702406585216522, + "learning_rate": 1.7587907915621173e-05, + "loss": 0.5763, + "step": 16525 + }, + { + "epoch": 0.45376166941241075, + "grad_norm": 0.36843329668045044, + "learning_rate": 1.7587626601498938e-05, + "loss": 0.5177, + "step": 16526 + }, + { + "epoch": 0.45378912685337724, + "grad_norm": 0.3660006821155548, + "learning_rate": 1.7587345273223366e-05, + "loss": 0.5179, + "step": 16527 + }, + { + "epoch": 0.4538165842943438, + "grad_norm": 0.3529936373233795, + "learning_rate": 1.758706393079497e-05, + "loss": 0.506, + "step": 16528 + }, + { + "epoch": 0.4538440417353103, + "grad_norm": 0.43203458189964294, + "learning_rate": 1.758678257421428e-05, + "loss": 0.4945, + "step": 16529 + }, + { + "epoch": 0.4538714991762768, + "grad_norm": 0.36017531156539917, + "learning_rate": 1.7586501203481815e-05, + "loss": 0.5482, + "step": 16530 + }, + { + "epoch": 0.4538989566172433, + "grad_norm": 0.36587652564048767, + "learning_rate": 1.758621981859811e-05, + "loss": 0.425, + "step": 16531 + }, + { + "epoch": 0.45392641405820977, + "grad_norm": 0.3845268785953522, + "learning_rate": 1.758593841956368e-05, + "loss": 0.5048, + "step": 16532 + }, + { + "epoch": 0.45395387149917626, + "grad_norm": 0.3753984868526459, + "learning_rate": 1.7585657006379055e-05, + "loss": 0.4884, + "step": 16533 + }, + { + "epoch": 0.45398132894014276, + "grad_norm": 0.4043693244457245, + "learning_rate": 1.7585375579044758e-05, + "loss": 0.5196, + "step": 16534 + }, + { + "epoch": 0.4540087863811093, + "grad_norm": 0.3651009500026703, + "learning_rate": 1.7585094137561314e-05, + "loss": 0.4542, + "step": 16535 + }, + { + "epoch": 0.4540362438220758, + "grad_norm": 0.3863637447357178, + "learning_rate": 1.7584812681929252e-05, + "loss": 0.5231, + "step": 16536 + }, + { + "epoch": 0.4540637012630423, + "grad_norm": 0.3809734582901001, + "learning_rate": 1.7584531212149087e-05, + "loss": 0.5055, + "step": 16537 + }, + { + "epoch": 0.4540911587040088, + "grad_norm": 0.37339386343955994, + "learning_rate": 1.7584249728221358e-05, + "loss": 0.5225, + "step": 16538 + }, + { + "epoch": 0.4541186161449753, + "grad_norm": 0.3471454679965973, + "learning_rate": 1.758396823014658e-05, + "loss": 0.4743, + "step": 16539 + }, + { + "epoch": 0.4541460735859418, + "grad_norm": 0.38942334055900574, + "learning_rate": 1.7583686717925276e-05, + "loss": 0.5299, + "step": 16540 + }, + { + "epoch": 0.4541735310269083, + "grad_norm": 0.34102708101272583, + "learning_rate": 1.758340519155798e-05, + "loss": 0.4184, + "step": 16541 + }, + { + "epoch": 0.45420098846787477, + "grad_norm": 0.3690069317817688, + "learning_rate": 1.7583123651045214e-05, + "loss": 0.454, + "step": 16542 + }, + { + "epoch": 0.4542284459088413, + "grad_norm": 0.3641393482685089, + "learning_rate": 1.75828420963875e-05, + "loss": 0.4798, + "step": 16543 + }, + { + "epoch": 0.4542559033498078, + "grad_norm": 0.3586338758468628, + "learning_rate": 1.7582560527585365e-05, + "loss": 0.5407, + "step": 16544 + }, + { + "epoch": 0.4542833607907743, + "grad_norm": 0.37158146500587463, + "learning_rate": 1.7582278944639335e-05, + "loss": 0.5927, + "step": 16545 + }, + { + "epoch": 0.4543108182317408, + "grad_norm": 0.42106330394744873, + "learning_rate": 1.7581997347549935e-05, + "loss": 0.5533, + "step": 16546 + }, + { + "epoch": 0.4543382756727073, + "grad_norm": 0.40784454345703125, + "learning_rate": 1.7581715736317693e-05, + "loss": 0.5134, + "step": 16547 + }, + { + "epoch": 0.4543657331136738, + "grad_norm": 0.3454005718231201, + "learning_rate": 1.758143411094313e-05, + "loss": 0.4547, + "step": 16548 + }, + { + "epoch": 0.4543931905546403, + "grad_norm": 0.3688507080078125, + "learning_rate": 1.7581152471426767e-05, + "loss": 0.4753, + "step": 16549 + }, + { + "epoch": 0.45442064799560683, + "grad_norm": 0.3741554021835327, + "learning_rate": 1.758087081776914e-05, + "loss": 0.5003, + "step": 16550 + }, + { + "epoch": 0.4544481054365733, + "grad_norm": 0.43804216384887695, + "learning_rate": 1.758058914997077e-05, + "loss": 0.5459, + "step": 16551 + }, + { + "epoch": 0.4544755628775398, + "grad_norm": 0.3778623044490814, + "learning_rate": 1.758030746803218e-05, + "loss": 0.5833, + "step": 16552 + }, + { + "epoch": 0.4545030203185063, + "grad_norm": 0.3499540090560913, + "learning_rate": 1.7580025771953898e-05, + "loss": 0.481, + "step": 16553 + }, + { + "epoch": 0.4545304777594728, + "grad_norm": 0.31239765882492065, + "learning_rate": 1.7579744061736448e-05, + "loss": 0.4267, + "step": 16554 + }, + { + "epoch": 0.4545579352004393, + "grad_norm": 0.3770048916339874, + "learning_rate": 1.7579462337380358e-05, + "loss": 0.5068, + "step": 16555 + }, + { + "epoch": 0.4545853926414058, + "grad_norm": 0.43326085805892944, + "learning_rate": 1.757918059888615e-05, + "loss": 0.5871, + "step": 16556 + }, + { + "epoch": 0.45461285008237234, + "grad_norm": 0.40523549914360046, + "learning_rate": 1.757889884625435e-05, + "loss": 0.5912, + "step": 16557 + }, + { + "epoch": 0.45464030752333884, + "grad_norm": 0.38553765416145325, + "learning_rate": 1.757861707948549e-05, + "loss": 0.611, + "step": 16558 + }, + { + "epoch": 0.45466776496430533, + "grad_norm": 0.34674856066703796, + "learning_rate": 1.7578335298580086e-05, + "loss": 0.5222, + "step": 16559 + }, + { + "epoch": 0.4546952224052718, + "grad_norm": 0.4021919369697571, + "learning_rate": 1.7578053503538666e-05, + "loss": 0.5343, + "step": 16560 + }, + { + "epoch": 0.4547226798462383, + "grad_norm": 0.466279000043869, + "learning_rate": 1.757777169436176e-05, + "loss": 0.557, + "step": 16561 + }, + { + "epoch": 0.4547501372872048, + "grad_norm": 0.498821884393692, + "learning_rate": 1.757748987104989e-05, + "loss": 0.4803, + "step": 16562 + }, + { + "epoch": 0.4547775947281713, + "grad_norm": 0.3816353380680084, + "learning_rate": 1.7577208033603586e-05, + "loss": 0.4876, + "step": 16563 + }, + { + "epoch": 0.45480505216913786, + "grad_norm": 0.3880798816680908, + "learning_rate": 1.7576926182023368e-05, + "loss": 0.4954, + "step": 16564 + }, + { + "epoch": 0.45483250961010435, + "grad_norm": 0.4021984934806824, + "learning_rate": 1.7576644316309764e-05, + "loss": 0.5995, + "step": 16565 + }, + { + "epoch": 0.45485996705107085, + "grad_norm": 0.42004838585853577, + "learning_rate": 1.7576362436463304e-05, + "loss": 0.5183, + "step": 16566 + }, + { + "epoch": 0.45488742449203734, + "grad_norm": 0.4238894283771515, + "learning_rate": 1.7576080542484505e-05, + "loss": 0.5577, + "step": 16567 + }, + { + "epoch": 0.45491488193300383, + "grad_norm": 0.4555741250514984, + "learning_rate": 1.75757986343739e-05, + "loss": 0.6249, + "step": 16568 + }, + { + "epoch": 0.45494233937397033, + "grad_norm": 0.5429349541664124, + "learning_rate": 1.7575516712132015e-05, + "loss": 0.4369, + "step": 16569 + }, + { + "epoch": 0.4549697968149368, + "grad_norm": 0.3127163350582123, + "learning_rate": 1.757523477575937e-05, + "loss": 0.427, + "step": 16570 + }, + { + "epoch": 0.45499725425590337, + "grad_norm": 0.39038074016571045, + "learning_rate": 1.7574952825256498e-05, + "loss": 0.5468, + "step": 16571 + }, + { + "epoch": 0.45502471169686987, + "grad_norm": 0.5138571858406067, + "learning_rate": 1.7574670860623917e-05, + "loss": 0.5059, + "step": 16572 + }, + { + "epoch": 0.45505216913783636, + "grad_norm": 0.41994452476501465, + "learning_rate": 1.7574388881862162e-05, + "loss": 0.4652, + "step": 16573 + }, + { + "epoch": 0.45507962657880285, + "grad_norm": 0.44227197766304016, + "learning_rate": 1.7574106888971752e-05, + "loss": 0.5353, + "step": 16574 + }, + { + "epoch": 0.45510708401976935, + "grad_norm": 0.345552921295166, + "learning_rate": 1.7573824881953215e-05, + "loss": 0.5109, + "step": 16575 + }, + { + "epoch": 0.45513454146073584, + "grad_norm": 0.3799777626991272, + "learning_rate": 1.7573542860807077e-05, + "loss": 0.5686, + "step": 16576 + }, + { + "epoch": 0.45516199890170234, + "grad_norm": 0.40512946248054504, + "learning_rate": 1.7573260825533866e-05, + "loss": 0.4583, + "step": 16577 + }, + { + "epoch": 0.4551894563426689, + "grad_norm": 0.38737088441848755, + "learning_rate": 1.7572978776134103e-05, + "loss": 0.4859, + "step": 16578 + }, + { + "epoch": 0.4552169137836354, + "grad_norm": 0.46383780241012573, + "learning_rate": 1.7572696712608318e-05, + "loss": 0.54, + "step": 16579 + }, + { + "epoch": 0.4552443712246019, + "grad_norm": 0.3988005816936493, + "learning_rate": 1.757241463495704e-05, + "loss": 0.516, + "step": 16580 + }, + { + "epoch": 0.45527182866556837, + "grad_norm": 0.35547006130218506, + "learning_rate": 1.7572132543180787e-05, + "loss": 0.5222, + "step": 16581 + }, + { + "epoch": 0.45529928610653486, + "grad_norm": 0.35235831141471863, + "learning_rate": 1.7571850437280095e-05, + "loss": 0.3366, + "step": 16582 + }, + { + "epoch": 0.45532674354750136, + "grad_norm": 0.3560246527194977, + "learning_rate": 1.7571568317255484e-05, + "loss": 0.5256, + "step": 16583 + }, + { + "epoch": 0.45535420098846785, + "grad_norm": 0.36116281151771545, + "learning_rate": 1.757128618310748e-05, + "loss": 0.4665, + "step": 16584 + }, + { + "epoch": 0.4553816584294344, + "grad_norm": 0.3748455047607422, + "learning_rate": 1.757100403483661e-05, + "loss": 0.6031, + "step": 16585 + }, + { + "epoch": 0.4554091158704009, + "grad_norm": 0.39700865745544434, + "learning_rate": 1.7570721872443406e-05, + "loss": 0.4209, + "step": 16586 + }, + { + "epoch": 0.4554365733113674, + "grad_norm": 0.5671699047088623, + "learning_rate": 1.7570439695928385e-05, + "loss": 0.5576, + "step": 16587 + }, + { + "epoch": 0.4554640307523339, + "grad_norm": 0.3827188014984131, + "learning_rate": 1.757015750529208e-05, + "loss": 0.5963, + "step": 16588 + }, + { + "epoch": 0.4554914881933004, + "grad_norm": 0.3311350345611572, + "learning_rate": 1.7569875300535016e-05, + "loss": 0.5518, + "step": 16589 + }, + { + "epoch": 0.45551894563426687, + "grad_norm": 0.3538949191570282, + "learning_rate": 1.7569593081657713e-05, + "loss": 0.5305, + "step": 16590 + }, + { + "epoch": 0.45554640307523336, + "grad_norm": 0.4465828537940979, + "learning_rate": 1.756931084866071e-05, + "loss": 0.487, + "step": 16591 + }, + { + "epoch": 0.4555738605161999, + "grad_norm": 0.42497649788856506, + "learning_rate": 1.756902860154452e-05, + "loss": 0.5926, + "step": 16592 + }, + { + "epoch": 0.4556013179571664, + "grad_norm": 0.46656814217567444, + "learning_rate": 1.7568746340309682e-05, + "loss": 0.5881, + "step": 16593 + }, + { + "epoch": 0.4556287753981329, + "grad_norm": 0.33339646458625793, + "learning_rate": 1.7568464064956715e-05, + "loss": 0.5144, + "step": 16594 + }, + { + "epoch": 0.4556562328390994, + "grad_norm": 0.3568938374519348, + "learning_rate": 1.7568181775486145e-05, + "loss": 0.5803, + "step": 16595 + }, + { + "epoch": 0.4556836902800659, + "grad_norm": 0.38381120562553406, + "learning_rate": 1.75678994718985e-05, + "loss": 0.6159, + "step": 16596 + }, + { + "epoch": 0.4557111477210324, + "grad_norm": 0.41803234815597534, + "learning_rate": 1.756761715419431e-05, + "loss": 0.5217, + "step": 16597 + }, + { + "epoch": 0.4557386051619989, + "grad_norm": 0.39402201771736145, + "learning_rate": 1.7567334822374094e-05, + "loss": 0.5001, + "step": 16598 + }, + { + "epoch": 0.4557660626029654, + "grad_norm": 0.36497536301612854, + "learning_rate": 1.7567052476438386e-05, + "loss": 0.4662, + "step": 16599 + }, + { + "epoch": 0.4557935200439319, + "grad_norm": 0.48700135946273804, + "learning_rate": 1.756677011638771e-05, + "loss": 0.5115, + "step": 16600 + }, + { + "epoch": 0.4558209774848984, + "grad_norm": 0.4429991543292999, + "learning_rate": 1.7566487742222594e-05, + "loss": 0.4987, + "step": 16601 + }, + { + "epoch": 0.4558484349258649, + "grad_norm": 0.34190091490745544, + "learning_rate": 1.756620535394356e-05, + "loss": 0.509, + "step": 16602 + }, + { + "epoch": 0.4558758923668314, + "grad_norm": 0.3770783245563507, + "learning_rate": 1.7565922951551142e-05, + "loss": 0.5336, + "step": 16603 + }, + { + "epoch": 0.4559033498077979, + "grad_norm": 0.37652865052223206, + "learning_rate": 1.7565640535045864e-05, + "loss": 0.4614, + "step": 16604 + }, + { + "epoch": 0.4559308072487644, + "grad_norm": 0.3874792158603668, + "learning_rate": 1.7565358104428247e-05, + "loss": 0.4834, + "step": 16605 + }, + { + "epoch": 0.45595826468973094, + "grad_norm": 0.3723720908164978, + "learning_rate": 1.7565075659698825e-05, + "loss": 0.4479, + "step": 16606 + }, + { + "epoch": 0.45598572213069744, + "grad_norm": 0.3963593542575836, + "learning_rate": 1.756479320085812e-05, + "loss": 0.6025, + "step": 16607 + }, + { + "epoch": 0.45601317957166393, + "grad_norm": 0.48961296677589417, + "learning_rate": 1.7564510727906666e-05, + "loss": 0.5453, + "step": 16608 + }, + { + "epoch": 0.4560406370126304, + "grad_norm": 0.3994031548500061, + "learning_rate": 1.7564228240844983e-05, + "loss": 0.5269, + "step": 16609 + }, + { + "epoch": 0.4560680944535969, + "grad_norm": 0.3720681369304657, + "learning_rate": 1.7563945739673598e-05, + "loss": 0.509, + "step": 16610 + }, + { + "epoch": 0.4560955518945634, + "grad_norm": 0.4138161540031433, + "learning_rate": 1.7563663224393042e-05, + "loss": 0.5282, + "step": 16611 + }, + { + "epoch": 0.4561230093355299, + "grad_norm": 0.3703373670578003, + "learning_rate": 1.756338069500384e-05, + "loss": 0.4826, + "step": 16612 + }, + { + "epoch": 0.45615046677649645, + "grad_norm": 0.4052475094795227, + "learning_rate": 1.756309815150652e-05, + "loss": 0.4918, + "step": 16613 + }, + { + "epoch": 0.45617792421746295, + "grad_norm": 0.36041557788848877, + "learning_rate": 1.7562815593901605e-05, + "loss": 0.5902, + "step": 16614 + }, + { + "epoch": 0.45620538165842944, + "grad_norm": 0.35031044483184814, + "learning_rate": 1.7562533022189628e-05, + "loss": 0.4645, + "step": 16615 + }, + { + "epoch": 0.45623283909939594, + "grad_norm": 0.3535565733909607, + "learning_rate": 1.756225043637111e-05, + "loss": 0.5314, + "step": 16616 + }, + { + "epoch": 0.45626029654036243, + "grad_norm": 0.3753267824649811, + "learning_rate": 1.7561967836446586e-05, + "loss": 0.5159, + "step": 16617 + }, + { + "epoch": 0.4562877539813289, + "grad_norm": 0.34340569376945496, + "learning_rate": 1.7561685222416575e-05, + "loss": 0.4581, + "step": 16618 + }, + { + "epoch": 0.4563152114222954, + "grad_norm": 0.3571034371852875, + "learning_rate": 1.756140259428161e-05, + "loss": 0.5259, + "step": 16619 + }, + { + "epoch": 0.45634266886326197, + "grad_norm": 0.3868033289909363, + "learning_rate": 1.7561119952042213e-05, + "loss": 0.5184, + "step": 16620 + }, + { + "epoch": 0.45637012630422846, + "grad_norm": 0.3761453926563263, + "learning_rate": 1.7560837295698915e-05, + "loss": 0.5033, + "step": 16621 + }, + { + "epoch": 0.45639758374519496, + "grad_norm": 0.3405040502548218, + "learning_rate": 1.7560554625252244e-05, + "loss": 0.4424, + "step": 16622 + }, + { + "epoch": 0.45642504118616145, + "grad_norm": 0.35442841053009033, + "learning_rate": 1.7560271940702725e-05, + "loss": 0.524, + "step": 16623 + }, + { + "epoch": 0.45645249862712794, + "grad_norm": 0.344390869140625, + "learning_rate": 1.7559989242050884e-05, + "loss": 0.4655, + "step": 16624 + }, + { + "epoch": 0.45647995606809444, + "grad_norm": 0.47831660509109497, + "learning_rate": 1.7559706529297252e-05, + "loss": 0.5965, + "step": 16625 + }, + { + "epoch": 0.45650741350906093, + "grad_norm": 0.37052714824676514, + "learning_rate": 1.7559423802442354e-05, + "loss": 0.6006, + "step": 16626 + }, + { + "epoch": 0.4565348709500275, + "grad_norm": 0.38625040650367737, + "learning_rate": 1.755914106148672e-05, + "loss": 0.5618, + "step": 16627 + }, + { + "epoch": 0.456562328390994, + "grad_norm": 0.35605087876319885, + "learning_rate": 1.7558858306430873e-05, + "loss": 0.446, + "step": 16628 + }, + { + "epoch": 0.45658978583196047, + "grad_norm": 0.4145413637161255, + "learning_rate": 1.7558575537275343e-05, + "loss": 0.5476, + "step": 16629 + }, + { + "epoch": 0.45661724327292696, + "grad_norm": 0.37305736541748047, + "learning_rate": 1.7558292754020655e-05, + "loss": 0.6043, + "step": 16630 + }, + { + "epoch": 0.45664470071389346, + "grad_norm": 0.3537141978740692, + "learning_rate": 1.7558009956667344e-05, + "loss": 0.5077, + "step": 16631 + }, + { + "epoch": 0.45667215815485995, + "grad_norm": 0.36483174562454224, + "learning_rate": 1.755772714521593e-05, + "loss": 0.5514, + "step": 16632 + }, + { + "epoch": 0.45669961559582645, + "grad_norm": 0.35622337460517883, + "learning_rate": 1.755744431966694e-05, + "loss": 0.6007, + "step": 16633 + }, + { + "epoch": 0.456727073036793, + "grad_norm": 0.8003289699554443, + "learning_rate": 1.7557161480020905e-05, + "loss": 0.4634, + "step": 16634 + }, + { + "epoch": 0.4567545304777595, + "grad_norm": 0.4081186354160309, + "learning_rate": 1.7556878626278354e-05, + "loss": 0.526, + "step": 16635 + }, + { + "epoch": 0.456781987918726, + "grad_norm": 0.5972845554351807, + "learning_rate": 1.7556595758439813e-05, + "loss": 0.5012, + "step": 16636 + }, + { + "epoch": 0.4568094453596925, + "grad_norm": 0.35675859451293945, + "learning_rate": 1.7556312876505807e-05, + "loss": 0.4866, + "step": 16637 + }, + { + "epoch": 0.456836902800659, + "grad_norm": 0.3735601007938385, + "learning_rate": 1.7556029980476864e-05, + "loss": 0.5557, + "step": 16638 + }, + { + "epoch": 0.45686436024162547, + "grad_norm": 0.353425532579422, + "learning_rate": 1.7555747070353517e-05, + "loss": 0.5167, + "step": 16639 + }, + { + "epoch": 0.45689181768259196, + "grad_norm": 0.4468083381652832, + "learning_rate": 1.755546414613629e-05, + "loss": 0.5285, + "step": 16640 + }, + { + "epoch": 0.4569192751235585, + "grad_norm": 0.3692243695259094, + "learning_rate": 1.7555181207825712e-05, + "loss": 0.5675, + "step": 16641 + }, + { + "epoch": 0.456946732564525, + "grad_norm": 0.33781954646110535, + "learning_rate": 1.7554898255422306e-05, + "loss": 0.4514, + "step": 16642 + }, + { + "epoch": 0.4569741900054915, + "grad_norm": 0.38140323758125305, + "learning_rate": 1.7554615288926605e-05, + "loss": 0.4385, + "step": 16643 + }, + { + "epoch": 0.457001647446458, + "grad_norm": 0.35141217708587646, + "learning_rate": 1.7554332308339136e-05, + "loss": 0.4929, + "step": 16644 + }, + { + "epoch": 0.4570291048874245, + "grad_norm": 0.3962617516517639, + "learning_rate": 1.7554049313660426e-05, + "loss": 0.478, + "step": 16645 + }, + { + "epoch": 0.457056562328391, + "grad_norm": 0.3716096878051758, + "learning_rate": 1.7553766304891002e-05, + "loss": 0.5303, + "step": 16646 + }, + { + "epoch": 0.4570840197693575, + "grad_norm": 0.41047346591949463, + "learning_rate": 1.7553483282031398e-05, + "loss": 0.4643, + "step": 16647 + }, + { + "epoch": 0.457111477210324, + "grad_norm": 0.3691604733467102, + "learning_rate": 1.755320024508213e-05, + "loss": 0.5111, + "step": 16648 + }, + { + "epoch": 0.4571389346512905, + "grad_norm": 0.4332333505153656, + "learning_rate": 1.7552917194043734e-05, + "loss": 0.6241, + "step": 16649 + }, + { + "epoch": 0.457166392092257, + "grad_norm": 0.38561490178108215, + "learning_rate": 1.755263412891674e-05, + "loss": 0.4974, + "step": 16650 + }, + { + "epoch": 0.4571938495332235, + "grad_norm": 0.343030720949173, + "learning_rate": 1.755235104970167e-05, + "loss": 0.5167, + "step": 16651 + }, + { + "epoch": 0.45722130697419, + "grad_norm": 0.3809076249599457, + "learning_rate": 1.7552067956399057e-05, + "loss": 0.4581, + "step": 16652 + }, + { + "epoch": 0.4572487644151565, + "grad_norm": 0.40163013339042664, + "learning_rate": 1.7551784849009424e-05, + "loss": 0.5214, + "step": 16653 + }, + { + "epoch": 0.457276221856123, + "grad_norm": 1.0973079204559326, + "learning_rate": 1.7551501727533304e-05, + "loss": 0.4283, + "step": 16654 + }, + { + "epoch": 0.45730367929708954, + "grad_norm": 0.39215803146362305, + "learning_rate": 1.7551218591971223e-05, + "loss": 0.4807, + "step": 16655 + }, + { + "epoch": 0.45733113673805603, + "grad_norm": 0.37104201316833496, + "learning_rate": 1.7550935442323713e-05, + "loss": 0.5496, + "step": 16656 + }, + { + "epoch": 0.4573585941790225, + "grad_norm": 0.32840776443481445, + "learning_rate": 1.755065227859129e-05, + "loss": 0.421, + "step": 16657 + }, + { + "epoch": 0.457386051619989, + "grad_norm": 0.30561700463294983, + "learning_rate": 1.75503691007745e-05, + "loss": 0.472, + "step": 16658 + }, + { + "epoch": 0.4574135090609555, + "grad_norm": 0.37705928087234497, + "learning_rate": 1.7550085908873856e-05, + "loss": 0.5483, + "step": 16659 + }, + { + "epoch": 0.457440966501922, + "grad_norm": 0.3470211625099182, + "learning_rate": 1.7549802702889893e-05, + "loss": 0.4864, + "step": 16660 + }, + { + "epoch": 0.4574684239428885, + "grad_norm": 0.3672449588775635, + "learning_rate": 1.754951948282314e-05, + "loss": 0.5152, + "step": 16661 + }, + { + "epoch": 0.45749588138385505, + "grad_norm": 0.3696678578853607, + "learning_rate": 1.754923624867412e-05, + "loss": 0.4842, + "step": 16662 + }, + { + "epoch": 0.45752333882482155, + "grad_norm": 0.3849492073059082, + "learning_rate": 1.754895300044337e-05, + "loss": 0.5216, + "step": 16663 + }, + { + "epoch": 0.45755079626578804, + "grad_norm": 0.4267268478870392, + "learning_rate": 1.754866973813141e-05, + "loss": 0.5594, + "step": 16664 + }, + { + "epoch": 0.45757825370675453, + "grad_norm": 0.3731401562690735, + "learning_rate": 1.7548386461738772e-05, + "loss": 0.5424, + "step": 16665 + }, + { + "epoch": 0.45760571114772103, + "grad_norm": 0.4468056559562683, + "learning_rate": 1.7548103171265983e-05, + "loss": 0.4999, + "step": 16666 + }, + { + "epoch": 0.4576331685886875, + "grad_norm": 0.37811264395713806, + "learning_rate": 1.7547819866713575e-05, + "loss": 0.4929, + "step": 16667 + }, + { + "epoch": 0.457660626029654, + "grad_norm": 0.4378097951412201, + "learning_rate": 1.7547536548082073e-05, + "loss": 0.6317, + "step": 16668 + }, + { + "epoch": 0.45768808347062057, + "grad_norm": 0.3596460819244385, + "learning_rate": 1.7547253215372007e-05, + "loss": 0.5302, + "step": 16669 + }, + { + "epoch": 0.45771554091158706, + "grad_norm": 0.39432626962661743, + "learning_rate": 1.7546969868583903e-05, + "loss": 0.5539, + "step": 16670 + }, + { + "epoch": 0.45774299835255355, + "grad_norm": 0.38308092951774597, + "learning_rate": 1.7546686507718292e-05, + "loss": 0.486, + "step": 16671 + }, + { + "epoch": 0.45777045579352005, + "grad_norm": 0.38907116651535034, + "learning_rate": 1.7546403132775704e-05, + "loss": 0.4817, + "step": 16672 + }, + { + "epoch": 0.45779791323448654, + "grad_norm": 0.3749037981033325, + "learning_rate": 1.7546119743756663e-05, + "loss": 0.5016, + "step": 16673 + }, + { + "epoch": 0.45782537067545304, + "grad_norm": 0.4085845351219177, + "learning_rate": 1.7545836340661702e-05, + "loss": 0.5157, + "step": 16674 + }, + { + "epoch": 0.45785282811641953, + "grad_norm": 0.4375322163105011, + "learning_rate": 1.7545552923491348e-05, + "loss": 0.5177, + "step": 16675 + }, + { + "epoch": 0.457880285557386, + "grad_norm": 0.38231489062309265, + "learning_rate": 1.7545269492246128e-05, + "loss": 0.5856, + "step": 16676 + }, + { + "epoch": 0.4579077429983526, + "grad_norm": 0.3738904595375061, + "learning_rate": 1.7544986046926575e-05, + "loss": 0.4724, + "step": 16677 + }, + { + "epoch": 0.45793520043931907, + "grad_norm": 0.5630139708518982, + "learning_rate": 1.754470258753321e-05, + "loss": 0.5109, + "step": 16678 + }, + { + "epoch": 0.45796265788028556, + "grad_norm": 0.34499862790107727, + "learning_rate": 1.7544419114066572e-05, + "loss": 0.4394, + "step": 16679 + }, + { + "epoch": 0.45799011532125206, + "grad_norm": 0.35807371139526367, + "learning_rate": 1.7544135626527184e-05, + "loss": 0.438, + "step": 16680 + }, + { + "epoch": 0.45801757276221855, + "grad_norm": 0.37431856989860535, + "learning_rate": 1.754385212491557e-05, + "loss": 0.5276, + "step": 16681 + }, + { + "epoch": 0.45804503020318504, + "grad_norm": 0.4551090598106384, + "learning_rate": 1.7543568609232267e-05, + "loss": 0.5146, + "step": 16682 + }, + { + "epoch": 0.45807248764415154, + "grad_norm": 0.371967077255249, + "learning_rate": 1.7543285079477804e-05, + "loss": 0.4934, + "step": 16683 + }, + { + "epoch": 0.4580999450851181, + "grad_norm": 0.3527933955192566, + "learning_rate": 1.7543001535652706e-05, + "loss": 0.4255, + "step": 16684 + }, + { + "epoch": 0.4581274025260846, + "grad_norm": 0.3506869971752167, + "learning_rate": 1.75427179777575e-05, + "loss": 0.5534, + "step": 16685 + }, + { + "epoch": 0.4581548599670511, + "grad_norm": 0.41786202788352966, + "learning_rate": 1.7542434405792715e-05, + "loss": 0.5485, + "step": 16686 + }, + { + "epoch": 0.45818231740801757, + "grad_norm": 0.3686217963695526, + "learning_rate": 1.754215081975889e-05, + "loss": 0.488, + "step": 16687 + }, + { + "epoch": 0.45820977484898406, + "grad_norm": 0.33872222900390625, + "learning_rate": 1.7541867219656543e-05, + "loss": 0.5612, + "step": 16688 + }, + { + "epoch": 0.45823723228995056, + "grad_norm": 0.3853532671928406, + "learning_rate": 1.7541583605486207e-05, + "loss": 0.5288, + "step": 16689 + }, + { + "epoch": 0.45826468973091705, + "grad_norm": 0.905916154384613, + "learning_rate": 1.7541299977248408e-05, + "loss": 0.5022, + "step": 16690 + }, + { + "epoch": 0.4582921471718836, + "grad_norm": 0.31432631611824036, + "learning_rate": 1.754101633494368e-05, + "loss": 0.4364, + "step": 16691 + }, + { + "epoch": 0.4583196046128501, + "grad_norm": 0.37017613649368286, + "learning_rate": 1.754073267857255e-05, + "loss": 0.5579, + "step": 16692 + }, + { + "epoch": 0.4583470620538166, + "grad_norm": 0.4064299464225769, + "learning_rate": 1.7540449008135544e-05, + "loss": 0.5189, + "step": 16693 + }, + { + "epoch": 0.4583745194947831, + "grad_norm": 0.34623581171035767, + "learning_rate": 1.75401653236332e-05, + "loss": 0.5296, + "step": 16694 + }, + { + "epoch": 0.4584019769357496, + "grad_norm": 0.4108019471168518, + "learning_rate": 1.7539881625066035e-05, + "loss": 0.5246, + "step": 16695 + }, + { + "epoch": 0.45842943437671607, + "grad_norm": 0.3541775941848755, + "learning_rate": 1.7539597912434586e-05, + "loss": 0.5397, + "step": 16696 + }, + { + "epoch": 0.45845689181768257, + "grad_norm": 0.41107672452926636, + "learning_rate": 1.753931418573938e-05, + "loss": 0.5178, + "step": 16697 + }, + { + "epoch": 0.4584843492586491, + "grad_norm": 0.3717249929904938, + "learning_rate": 1.753903044498095e-05, + "loss": 0.5166, + "step": 16698 + }, + { + "epoch": 0.4585118066996156, + "grad_norm": 0.4021792709827423, + "learning_rate": 1.753874669015982e-05, + "loss": 0.5156, + "step": 16699 + }, + { + "epoch": 0.4585392641405821, + "grad_norm": 0.41659197211265564, + "learning_rate": 1.753846292127652e-05, + "loss": 0.5074, + "step": 16700 + }, + { + "epoch": 0.4585667215815486, + "grad_norm": 0.3796684145927429, + "learning_rate": 1.753817913833158e-05, + "loss": 0.5701, + "step": 16701 + }, + { + "epoch": 0.4585941790225151, + "grad_norm": 0.3828224241733551, + "learning_rate": 1.7537895341325533e-05, + "loss": 0.5344, + "step": 16702 + }, + { + "epoch": 0.4586216364634816, + "grad_norm": 0.3642633855342865, + "learning_rate": 1.7537611530258905e-05, + "loss": 0.4574, + "step": 16703 + }, + { + "epoch": 0.4586490939044481, + "grad_norm": 0.3427603840827942, + "learning_rate": 1.7537327705132224e-05, + "loss": 0.4947, + "step": 16704 + }, + { + "epoch": 0.45867655134541463, + "grad_norm": 0.4845469892024994, + "learning_rate": 1.7537043865946023e-05, + "loss": 0.5604, + "step": 16705 + }, + { + "epoch": 0.4587040087863811, + "grad_norm": 0.3538109064102173, + "learning_rate": 1.7536760012700827e-05, + "loss": 0.4682, + "step": 16706 + }, + { + "epoch": 0.4587314662273476, + "grad_norm": 0.3690710663795471, + "learning_rate": 1.753647614539717e-05, + "loss": 0.5767, + "step": 16707 + }, + { + "epoch": 0.4587589236683141, + "grad_norm": 0.3553994596004486, + "learning_rate": 1.753619226403558e-05, + "loss": 0.4549, + "step": 16708 + }, + { + "epoch": 0.4587863811092806, + "grad_norm": 0.3812497854232788, + "learning_rate": 1.7535908368616587e-05, + "loss": 0.5672, + "step": 16709 + }, + { + "epoch": 0.4588138385502471, + "grad_norm": 0.354568213224411, + "learning_rate": 1.7535624459140715e-05, + "loss": 0.551, + "step": 16710 + }, + { + "epoch": 0.4588412959912136, + "grad_norm": 0.3369874656200409, + "learning_rate": 1.7535340535608505e-05, + "loss": 0.463, + "step": 16711 + }, + { + "epoch": 0.45886875343218014, + "grad_norm": 0.36775216460227966, + "learning_rate": 1.7535056598020477e-05, + "loss": 0.5142, + "step": 16712 + }, + { + "epoch": 0.45889621087314664, + "grad_norm": 0.3682059645652771, + "learning_rate": 1.753477264637716e-05, + "loss": 0.3859, + "step": 16713 + }, + { + "epoch": 0.45892366831411313, + "grad_norm": 0.34382978081703186, + "learning_rate": 1.7534488680679095e-05, + "loss": 0.4977, + "step": 16714 + }, + { + "epoch": 0.4589511257550796, + "grad_norm": 0.6165159344673157, + "learning_rate": 1.7534204700926797e-05, + "loss": 0.5074, + "step": 16715 + }, + { + "epoch": 0.4589785831960461, + "grad_norm": 0.3677917718887329, + "learning_rate": 1.7533920707120807e-05, + "loss": 0.5544, + "step": 16716 + }, + { + "epoch": 0.4590060406370126, + "grad_norm": 0.4692487418651581, + "learning_rate": 1.7533636699261645e-05, + "loss": 0.4814, + "step": 16717 + }, + { + "epoch": 0.4590334980779791, + "grad_norm": 0.52354896068573, + "learning_rate": 1.753335267734985e-05, + "loss": 0.5397, + "step": 16718 + }, + { + "epoch": 0.45906095551894566, + "grad_norm": 0.3549933433532715, + "learning_rate": 1.7533068641385947e-05, + "loss": 0.4893, + "step": 16719 + }, + { + "epoch": 0.45908841295991215, + "grad_norm": 0.3826676309108734, + "learning_rate": 1.7532784591370467e-05, + "loss": 0.5759, + "step": 16720 + }, + { + "epoch": 0.45911587040087865, + "grad_norm": 0.4681667685508728, + "learning_rate": 1.753250052730394e-05, + "loss": 0.5063, + "step": 16721 + }, + { + "epoch": 0.45914332784184514, + "grad_norm": 0.40760254859924316, + "learning_rate": 1.7532216449186896e-05, + "loss": 0.5342, + "step": 16722 + }, + { + "epoch": 0.45917078528281163, + "grad_norm": 0.3598197102546692, + "learning_rate": 1.7531932357019862e-05, + "loss": 0.4858, + "step": 16723 + }, + { + "epoch": 0.4591982427237781, + "grad_norm": 0.3490156829357147, + "learning_rate": 1.753164825080337e-05, + "loss": 0.4262, + "step": 16724 + }, + { + "epoch": 0.4592257001647446, + "grad_norm": 0.3888266086578369, + "learning_rate": 1.7531364130537953e-05, + "loss": 0.6043, + "step": 16725 + }, + { + "epoch": 0.45925315760571117, + "grad_norm": 0.4135594666004181, + "learning_rate": 1.7531079996224138e-05, + "loss": 0.5756, + "step": 16726 + }, + { + "epoch": 0.45928061504667766, + "grad_norm": 0.33224770426750183, + "learning_rate": 1.7530795847862455e-05, + "loss": 0.4661, + "step": 16727 + }, + { + "epoch": 0.45930807248764416, + "grad_norm": 0.3955075442790985, + "learning_rate": 1.753051168545343e-05, + "loss": 0.5325, + "step": 16728 + }, + { + "epoch": 0.45933552992861065, + "grad_norm": 0.3918803930282593, + "learning_rate": 1.7530227508997603e-05, + "loss": 0.4975, + "step": 16729 + }, + { + "epoch": 0.45936298736957715, + "grad_norm": 0.4016367495059967, + "learning_rate": 1.7529943318495495e-05, + "loss": 0.4978, + "step": 16730 + }, + { + "epoch": 0.45939044481054364, + "grad_norm": 0.808336615562439, + "learning_rate": 1.752965911394764e-05, + "loss": 0.4729, + "step": 16731 + }, + { + "epoch": 0.45941790225151014, + "grad_norm": 0.40157440304756165, + "learning_rate": 1.7529374895354565e-05, + "loss": 0.5586, + "step": 16732 + }, + { + "epoch": 0.4594453596924767, + "grad_norm": 0.40272584557533264, + "learning_rate": 1.7529090662716807e-05, + "loss": 0.5211, + "step": 16733 + }, + { + "epoch": 0.4594728171334432, + "grad_norm": 0.33085769414901733, + "learning_rate": 1.752880641603489e-05, + "loss": 0.5189, + "step": 16734 + }, + { + "epoch": 0.4595002745744097, + "grad_norm": 0.38534781336784363, + "learning_rate": 1.7528522155309346e-05, + "loss": 0.4922, + "step": 16735 + }, + { + "epoch": 0.45952773201537617, + "grad_norm": 0.38339370489120483, + "learning_rate": 1.7528237880540707e-05, + "loss": 0.5197, + "step": 16736 + }, + { + "epoch": 0.45955518945634266, + "grad_norm": 0.48465147614479065, + "learning_rate": 1.75279535917295e-05, + "loss": 0.5936, + "step": 16737 + }, + { + "epoch": 0.45958264689730915, + "grad_norm": 0.4105777144432068, + "learning_rate": 1.752766928887626e-05, + "loss": 0.5193, + "step": 16738 + }, + { + "epoch": 0.45961010433827565, + "grad_norm": 0.36207860708236694, + "learning_rate": 1.752738497198151e-05, + "loss": 0.5441, + "step": 16739 + }, + { + "epoch": 0.4596375617792422, + "grad_norm": 0.3734719753265381, + "learning_rate": 1.7527100641045783e-05, + "loss": 0.5642, + "step": 16740 + }, + { + "epoch": 0.4596650192202087, + "grad_norm": 0.354739785194397, + "learning_rate": 1.7526816296069616e-05, + "loss": 0.4664, + "step": 16741 + }, + { + "epoch": 0.4596924766611752, + "grad_norm": 0.37582337856292725, + "learning_rate": 1.7526531937053532e-05, + "loss": 0.4562, + "step": 16742 + }, + { + "epoch": 0.4597199341021417, + "grad_norm": 0.3974027931690216, + "learning_rate": 1.7526247563998064e-05, + "loss": 0.5608, + "step": 16743 + }, + { + "epoch": 0.4597473915431082, + "grad_norm": 0.37814322113990784, + "learning_rate": 1.7525963176903742e-05, + "loss": 0.4445, + "step": 16744 + }, + { + "epoch": 0.45977484898407467, + "grad_norm": 0.38701900839805603, + "learning_rate": 1.7525678775771097e-05, + "loss": 0.5904, + "step": 16745 + }, + { + "epoch": 0.45980230642504116, + "grad_norm": 0.38682299852371216, + "learning_rate": 1.752539436060066e-05, + "loss": 0.5112, + "step": 16746 + }, + { + "epoch": 0.4598297638660077, + "grad_norm": 0.3622158169746399, + "learning_rate": 1.7525109931392957e-05, + "loss": 0.5587, + "step": 16747 + }, + { + "epoch": 0.4598572213069742, + "grad_norm": 0.3827058672904968, + "learning_rate": 1.752482548814853e-05, + "loss": 0.4842, + "step": 16748 + }, + { + "epoch": 0.4598846787479407, + "grad_norm": 0.5879247188568115, + "learning_rate": 1.7524541030867893e-05, + "loss": 0.5222, + "step": 16749 + }, + { + "epoch": 0.4599121361889072, + "grad_norm": 0.36257269978523254, + "learning_rate": 1.752425655955159e-05, + "loss": 0.5449, + "step": 16750 + }, + { + "epoch": 0.4599395936298737, + "grad_norm": 0.37266722321510315, + "learning_rate": 1.7523972074200148e-05, + "loss": 0.496, + "step": 16751 + }, + { + "epoch": 0.4599670510708402, + "grad_norm": 0.35239923000335693, + "learning_rate": 1.7523687574814096e-05, + "loss": 0.563, + "step": 16752 + }, + { + "epoch": 0.4599945085118067, + "grad_norm": 0.3448857367038727, + "learning_rate": 1.752340306139396e-05, + "loss": 0.4717, + "step": 16753 + }, + { + "epoch": 0.4600219659527732, + "grad_norm": 0.3562699258327484, + "learning_rate": 1.752311853394028e-05, + "loss": 0.5533, + "step": 16754 + }, + { + "epoch": 0.4600494233937397, + "grad_norm": 0.42588892579078674, + "learning_rate": 1.7522833992453588e-05, + "loss": 0.5401, + "step": 16755 + }, + { + "epoch": 0.4600768808347062, + "grad_norm": 0.3418267071247101, + "learning_rate": 1.7522549436934405e-05, + "loss": 0.4017, + "step": 16756 + }, + { + "epoch": 0.4601043382756727, + "grad_norm": 0.3489398658275604, + "learning_rate": 1.7522264867383264e-05, + "loss": 0.5726, + "step": 16757 + }, + { + "epoch": 0.4601317957166392, + "grad_norm": 0.43054139614105225, + "learning_rate": 1.75219802838007e-05, + "loss": 0.537, + "step": 16758 + }, + { + "epoch": 0.4601592531576057, + "grad_norm": 0.31304270029067993, + "learning_rate": 1.7521695686187244e-05, + "loss": 0.5054, + "step": 16759 + }, + { + "epoch": 0.4601867105985722, + "grad_norm": 0.3848211169242859, + "learning_rate": 1.7521411074543424e-05, + "loss": 0.5323, + "step": 16760 + }, + { + "epoch": 0.46021416803953874, + "grad_norm": 0.4069370627403259, + "learning_rate": 1.752112644886977e-05, + "loss": 0.51, + "step": 16761 + }, + { + "epoch": 0.46024162548050523, + "grad_norm": 0.35784202814102173, + "learning_rate": 1.7520841809166816e-05, + "loss": 0.4229, + "step": 16762 + }, + { + "epoch": 0.46026908292147173, + "grad_norm": 0.38886719942092896, + "learning_rate": 1.752055715543509e-05, + "loss": 0.5118, + "step": 16763 + }, + { + "epoch": 0.4602965403624382, + "grad_norm": 0.40310996770858765, + "learning_rate": 1.752027248767513e-05, + "loss": 0.5681, + "step": 16764 + }, + { + "epoch": 0.4603239978034047, + "grad_norm": 0.3980056345462799, + "learning_rate": 1.7519987805887457e-05, + "loss": 0.4813, + "step": 16765 + }, + { + "epoch": 0.4603514552443712, + "grad_norm": 0.38054293394088745, + "learning_rate": 1.7519703110072605e-05, + "loss": 0.4898, + "step": 16766 + }, + { + "epoch": 0.4603789126853377, + "grad_norm": 0.3632095754146576, + "learning_rate": 1.751941840023111e-05, + "loss": 0.516, + "step": 16767 + }, + { + "epoch": 0.46040637012630425, + "grad_norm": 0.36414584517478943, + "learning_rate": 1.75191336763635e-05, + "loss": 0.46, + "step": 16768 + }, + { + "epoch": 0.46043382756727075, + "grad_norm": 0.3761109709739685, + "learning_rate": 1.7518848938470303e-05, + "loss": 0.555, + "step": 16769 + }, + { + "epoch": 0.46046128500823724, + "grad_norm": 0.3865005373954773, + "learning_rate": 1.7518564186552058e-05, + "loss": 0.4395, + "step": 16770 + }, + { + "epoch": 0.46048874244920374, + "grad_norm": 0.4563003182411194, + "learning_rate": 1.7518279420609284e-05, + "loss": 0.5633, + "step": 16771 + }, + { + "epoch": 0.46051619989017023, + "grad_norm": 0.36994031071662903, + "learning_rate": 1.7517994640642525e-05, + "loss": 0.5266, + "step": 16772 + }, + { + "epoch": 0.4605436573311367, + "grad_norm": 0.45069026947021484, + "learning_rate": 1.7517709846652306e-05, + "loss": 0.5147, + "step": 16773 + }, + { + "epoch": 0.4605711147721032, + "grad_norm": 0.3383224308490753, + "learning_rate": 1.7517425038639157e-05, + "loss": 0.5143, + "step": 16774 + }, + { + "epoch": 0.46059857221306977, + "grad_norm": 0.3929903507232666, + "learning_rate": 1.751714021660361e-05, + "loss": 0.548, + "step": 16775 + }, + { + "epoch": 0.46062602965403626, + "grad_norm": 0.3944860100746155, + "learning_rate": 1.75168553805462e-05, + "loss": 0.4374, + "step": 16776 + }, + { + "epoch": 0.46065348709500276, + "grad_norm": 0.3671780526638031, + "learning_rate": 1.7516570530467457e-05, + "loss": 0.5177, + "step": 16777 + }, + { + "epoch": 0.46068094453596925, + "grad_norm": 0.3906111419200897, + "learning_rate": 1.7516285666367907e-05, + "loss": 0.5124, + "step": 16778 + }, + { + "epoch": 0.46070840197693574, + "grad_norm": 0.49627116322517395, + "learning_rate": 1.7516000788248088e-05, + "loss": 0.4591, + "step": 16779 + }, + { + "epoch": 0.46073585941790224, + "grad_norm": 0.37780576944351196, + "learning_rate": 1.7515715896108525e-05, + "loss": 0.536, + "step": 16780 + }, + { + "epoch": 0.46076331685886873, + "grad_norm": 0.39213982224464417, + "learning_rate": 1.7515430989949754e-05, + "loss": 0.548, + "step": 16781 + }, + { + "epoch": 0.4607907742998353, + "grad_norm": 0.34261542558670044, + "learning_rate": 1.751514606977231e-05, + "loss": 0.4236, + "step": 16782 + }, + { + "epoch": 0.4608182317408018, + "grad_norm": 0.3767632246017456, + "learning_rate": 1.7514861135576718e-05, + "loss": 0.5187, + "step": 16783 + }, + { + "epoch": 0.46084568918176827, + "grad_norm": 0.410241037607193, + "learning_rate": 1.751457618736351e-05, + "loss": 0.468, + "step": 16784 + }, + { + "epoch": 0.46087314662273476, + "grad_norm": 0.35502946376800537, + "learning_rate": 1.7514291225133217e-05, + "loss": 0.4548, + "step": 16785 + }, + { + "epoch": 0.46090060406370126, + "grad_norm": 0.3606296181678772, + "learning_rate": 1.7514006248886376e-05, + "loss": 0.4148, + "step": 16786 + }, + { + "epoch": 0.46092806150466775, + "grad_norm": 0.3438888192176819, + "learning_rate": 1.7513721258623518e-05, + "loss": 0.4475, + "step": 16787 + }, + { + "epoch": 0.46095551894563425, + "grad_norm": 0.40374937653541565, + "learning_rate": 1.7513436254345165e-05, + "loss": 0.5647, + "step": 16788 + }, + { + "epoch": 0.4609829763866008, + "grad_norm": 0.4112730324268341, + "learning_rate": 1.751315123605186e-05, + "loss": 0.558, + "step": 16789 + }, + { + "epoch": 0.4610104338275673, + "grad_norm": 0.3497674763202667, + "learning_rate": 1.7512866203744126e-05, + "loss": 0.4261, + "step": 16790 + }, + { + "epoch": 0.4610378912685338, + "grad_norm": 0.3575612008571625, + "learning_rate": 1.7512581157422503e-05, + "loss": 0.5114, + "step": 16791 + }, + { + "epoch": 0.4610653487095003, + "grad_norm": 0.40974360704421997, + "learning_rate": 1.7512296097087517e-05, + "loss": 0.5366, + "step": 16792 + }, + { + "epoch": 0.46109280615046677, + "grad_norm": 0.36033526062965393, + "learning_rate": 1.7512011022739702e-05, + "loss": 0.5395, + "step": 16793 + }, + { + "epoch": 0.46112026359143327, + "grad_norm": 0.3374375104904175, + "learning_rate": 1.7511725934379587e-05, + "loss": 0.4715, + "step": 16794 + }, + { + "epoch": 0.46114772103239976, + "grad_norm": 0.36099907755851746, + "learning_rate": 1.7511440832007707e-05, + "loss": 0.5124, + "step": 16795 + }, + { + "epoch": 0.4611751784733663, + "grad_norm": 0.35933640599250793, + "learning_rate": 1.7511155715624594e-05, + "loss": 0.5333, + "step": 16796 + }, + { + "epoch": 0.4612026359143328, + "grad_norm": 0.4124352037906647, + "learning_rate": 1.7510870585230774e-05, + "loss": 0.4312, + "step": 16797 + }, + { + "epoch": 0.4612300933552993, + "grad_norm": 0.3248565196990967, + "learning_rate": 1.7510585440826785e-05, + "loss": 0.4607, + "step": 16798 + }, + { + "epoch": 0.4612575507962658, + "grad_norm": 0.400428831577301, + "learning_rate": 1.751030028241316e-05, + "loss": 0.5708, + "step": 16799 + }, + { + "epoch": 0.4612850082372323, + "grad_norm": 0.3571847081184387, + "learning_rate": 1.7510015109990424e-05, + "loss": 0.5049, + "step": 16800 + }, + { + "epoch": 0.4613124656781988, + "grad_norm": 0.36050134897232056, + "learning_rate": 1.7509729923559113e-05, + "loss": 0.485, + "step": 16801 + }, + { + "epoch": 0.4613399231191653, + "grad_norm": 0.42100104689598083, + "learning_rate": 1.7509444723119758e-05, + "loss": 0.4477, + "step": 16802 + }, + { + "epoch": 0.4613673805601318, + "grad_norm": 0.34857067465782166, + "learning_rate": 1.7509159508672897e-05, + "loss": 0.6103, + "step": 16803 + }, + { + "epoch": 0.4613948380010983, + "grad_norm": 0.9561535716056824, + "learning_rate": 1.750887428021905e-05, + "loss": 0.506, + "step": 16804 + }, + { + "epoch": 0.4614222954420648, + "grad_norm": 0.41193318367004395, + "learning_rate": 1.750858903775876e-05, + "loss": 0.472, + "step": 16805 + }, + { + "epoch": 0.4614497528830313, + "grad_norm": 0.398209810256958, + "learning_rate": 1.7508303781292556e-05, + "loss": 0.5108, + "step": 16806 + }, + { + "epoch": 0.4614772103239978, + "grad_norm": 0.3357483148574829, + "learning_rate": 1.750801851082097e-05, + "loss": 0.4024, + "step": 16807 + }, + { + "epoch": 0.4615046677649643, + "grad_norm": 0.3751375079154968, + "learning_rate": 1.750773322634453e-05, + "loss": 0.4384, + "step": 16808 + }, + { + "epoch": 0.4615321252059308, + "grad_norm": 0.36944738030433655, + "learning_rate": 1.7507447927863773e-05, + "loss": 0.5253, + "step": 16809 + }, + { + "epoch": 0.4615595826468973, + "grad_norm": 0.3787977993488312, + "learning_rate": 1.7507162615379228e-05, + "loss": 0.5318, + "step": 16810 + }, + { + "epoch": 0.46158704008786383, + "grad_norm": 0.3563523590564728, + "learning_rate": 1.750687728889143e-05, + "loss": 0.5303, + "step": 16811 + }, + { + "epoch": 0.4616144975288303, + "grad_norm": 0.37688148021698, + "learning_rate": 1.7506591948400912e-05, + "loss": 0.4775, + "step": 16812 + }, + { + "epoch": 0.4616419549697968, + "grad_norm": 0.4257352948188782, + "learning_rate": 1.75063065939082e-05, + "loss": 0.5303, + "step": 16813 + }, + { + "epoch": 0.4616694124107633, + "grad_norm": 0.41676396131515503, + "learning_rate": 1.7506021225413833e-05, + "loss": 0.4791, + "step": 16814 + }, + { + "epoch": 0.4616968698517298, + "grad_norm": 0.3048132061958313, + "learning_rate": 1.750573584291834e-05, + "loss": 0.4278, + "step": 16815 + }, + { + "epoch": 0.4617243272926963, + "grad_norm": 0.36794623732566833, + "learning_rate": 1.750545044642225e-05, + "loss": 0.5332, + "step": 16816 + }, + { + "epoch": 0.4617517847336628, + "grad_norm": 0.3846539855003357, + "learning_rate": 1.7505165035926107e-05, + "loss": 0.5538, + "step": 16817 + }, + { + "epoch": 0.46177924217462935, + "grad_norm": 0.3650066554546356, + "learning_rate": 1.7504879611430433e-05, + "loss": 0.4931, + "step": 16818 + }, + { + "epoch": 0.46180669961559584, + "grad_norm": 0.38273653388023376, + "learning_rate": 1.750459417293576e-05, + "loss": 0.4998, + "step": 16819 + }, + { + "epoch": 0.46183415705656233, + "grad_norm": 0.39580708742141724, + "learning_rate": 1.7504308720442626e-05, + "loss": 0.4672, + "step": 16820 + }, + { + "epoch": 0.4618616144975288, + "grad_norm": 0.3511931002140045, + "learning_rate": 1.750402325395156e-05, + "loss": 0.4962, + "step": 16821 + }, + { + "epoch": 0.4618890719384953, + "grad_norm": 0.3633881211280823, + "learning_rate": 1.75037377734631e-05, + "loss": 0.4512, + "step": 16822 + }, + { + "epoch": 0.4619165293794618, + "grad_norm": 0.3536127209663391, + "learning_rate": 1.750345227897777e-05, + "loss": 0.4467, + "step": 16823 + }, + { + "epoch": 0.4619439868204283, + "grad_norm": 0.35829076170921326, + "learning_rate": 1.750316677049611e-05, + "loss": 0.5636, + "step": 16824 + }, + { + "epoch": 0.46197144426139486, + "grad_norm": 0.39390814304351807, + "learning_rate": 1.7502881248018643e-05, + "loss": 0.5137, + "step": 16825 + }, + { + "epoch": 0.46199890170236135, + "grad_norm": 0.3566391170024872, + "learning_rate": 1.7502595711545915e-05, + "loss": 0.4717, + "step": 16826 + }, + { + "epoch": 0.46202635914332785, + "grad_norm": 0.37286216020584106, + "learning_rate": 1.7502310161078447e-05, + "loss": 0.4786, + "step": 16827 + }, + { + "epoch": 0.46205381658429434, + "grad_norm": 0.35542032122612, + "learning_rate": 1.750202459661678e-05, + "loss": 0.4233, + "step": 16828 + }, + { + "epoch": 0.46208127402526084, + "grad_norm": 0.3472912907600403, + "learning_rate": 1.750173901816144e-05, + "loss": 0.4868, + "step": 16829 + }, + { + "epoch": 0.46210873146622733, + "grad_norm": 0.3682062327861786, + "learning_rate": 1.7501453425712962e-05, + "loss": 0.5481, + "step": 16830 + }, + { + "epoch": 0.4621361889071938, + "grad_norm": 0.3861781060695648, + "learning_rate": 1.7501167819271877e-05, + "loss": 0.5119, + "step": 16831 + }, + { + "epoch": 0.4621636463481604, + "grad_norm": 0.3630113899707794, + "learning_rate": 1.7500882198838726e-05, + "loss": 0.449, + "step": 16832 + }, + { + "epoch": 0.46219110378912687, + "grad_norm": 0.37886691093444824, + "learning_rate": 1.7500596564414034e-05, + "loss": 0.517, + "step": 16833 + }, + { + "epoch": 0.46221856123009336, + "grad_norm": 0.4012962281703949, + "learning_rate": 1.7500310915998332e-05, + "loss": 0.5408, + "step": 16834 + }, + { + "epoch": 0.46224601867105986, + "grad_norm": 0.3696017861366272, + "learning_rate": 1.7500025253592158e-05, + "loss": 0.4565, + "step": 16835 + }, + { + "epoch": 0.46227347611202635, + "grad_norm": 0.34557557106018066, + "learning_rate": 1.7499739577196044e-05, + "loss": 0.5124, + "step": 16836 + }, + { + "epoch": 0.46230093355299284, + "grad_norm": 0.3859192430973053, + "learning_rate": 1.749945388681052e-05, + "loss": 0.5362, + "step": 16837 + }, + { + "epoch": 0.46232839099395934, + "grad_norm": 0.35960614681243896, + "learning_rate": 1.7499168182436124e-05, + "loss": 0.5242, + "step": 16838 + }, + { + "epoch": 0.4623558484349259, + "grad_norm": 0.36309146881103516, + "learning_rate": 1.7498882464073384e-05, + "loss": 0.5479, + "step": 16839 + }, + { + "epoch": 0.4623833058758924, + "grad_norm": 0.37063369154930115, + "learning_rate": 1.7498596731722833e-05, + "loss": 0.5123, + "step": 16840 + }, + { + "epoch": 0.4624107633168589, + "grad_norm": 0.34388142824172974, + "learning_rate": 1.749831098538501e-05, + "loss": 0.4754, + "step": 16841 + }, + { + "epoch": 0.46243822075782537, + "grad_norm": 0.4301467835903168, + "learning_rate": 1.749802522506044e-05, + "loss": 0.5208, + "step": 16842 + }, + { + "epoch": 0.46246567819879186, + "grad_norm": 0.4151974022388458, + "learning_rate": 1.7497739450749664e-05, + "loss": 0.5694, + "step": 16843 + }, + { + "epoch": 0.46249313563975836, + "grad_norm": 0.3697217106819153, + "learning_rate": 1.7497453662453207e-05, + "loss": 0.4461, + "step": 16844 + }, + { + "epoch": 0.46252059308072485, + "grad_norm": 0.3955533802509308, + "learning_rate": 1.749716786017161e-05, + "loss": 0.4794, + "step": 16845 + }, + { + "epoch": 0.4625480505216914, + "grad_norm": 0.3472885489463806, + "learning_rate": 1.74968820439054e-05, + "loss": 0.4487, + "step": 16846 + }, + { + "epoch": 0.4625755079626579, + "grad_norm": 0.36240869760513306, + "learning_rate": 1.749659621365511e-05, + "loss": 0.5615, + "step": 16847 + }, + { + "epoch": 0.4626029654036244, + "grad_norm": 0.3584919571876526, + "learning_rate": 1.7496310369421278e-05, + "loss": 0.539, + "step": 16848 + }, + { + "epoch": 0.4626304228445909, + "grad_norm": 0.38911494612693787, + "learning_rate": 1.7496024511204433e-05, + "loss": 0.5602, + "step": 16849 + }, + { + "epoch": 0.4626578802855574, + "grad_norm": 0.4066694378852844, + "learning_rate": 1.749573863900511e-05, + "loss": 0.5408, + "step": 16850 + }, + { + "epoch": 0.46268533772652387, + "grad_norm": 0.43434837460517883, + "learning_rate": 1.7495452752823846e-05, + "loss": 0.56, + "step": 16851 + }, + { + "epoch": 0.46271279516749036, + "grad_norm": 0.3808327317237854, + "learning_rate": 1.7495166852661168e-05, + "loss": 0.5396, + "step": 16852 + }, + { + "epoch": 0.4627402526084569, + "grad_norm": 0.416063129901886, + "learning_rate": 1.7494880938517608e-05, + "loss": 0.5674, + "step": 16853 + }, + { + "epoch": 0.4627677100494234, + "grad_norm": 0.37894684076309204, + "learning_rate": 1.7494595010393707e-05, + "loss": 0.5168, + "step": 16854 + }, + { + "epoch": 0.4627951674903899, + "grad_norm": 0.34506964683532715, + "learning_rate": 1.7494309068289993e-05, + "loss": 0.4749, + "step": 16855 + }, + { + "epoch": 0.4628226249313564, + "grad_norm": 0.42959165573120117, + "learning_rate": 1.7494023112207e-05, + "loss": 0.5954, + "step": 16856 + }, + { + "epoch": 0.4628500823723229, + "grad_norm": 0.3894118070602417, + "learning_rate": 1.7493737142145264e-05, + "loss": 0.5206, + "step": 16857 + }, + { + "epoch": 0.4628775398132894, + "grad_norm": 0.3563203811645508, + "learning_rate": 1.7493451158105314e-05, + "loss": 0.4956, + "step": 16858 + }, + { + "epoch": 0.4629049972542559, + "grad_norm": 0.3687094449996948, + "learning_rate": 1.7493165160087687e-05, + "loss": 0.5392, + "step": 16859 + }, + { + "epoch": 0.46293245469522243, + "grad_norm": 0.4027034342288971, + "learning_rate": 1.749287914809292e-05, + "loss": 0.4506, + "step": 16860 + }, + { + "epoch": 0.4629599121361889, + "grad_norm": 0.4208536744117737, + "learning_rate": 1.7492593122121534e-05, + "loss": 0.5461, + "step": 16861 + }, + { + "epoch": 0.4629873695771554, + "grad_norm": 0.39534875750541687, + "learning_rate": 1.7492307082174076e-05, + "loss": 0.5556, + "step": 16862 + }, + { + "epoch": 0.4630148270181219, + "grad_norm": 0.42969900369644165, + "learning_rate": 1.749202102825107e-05, + "loss": 0.5124, + "step": 16863 + }, + { + "epoch": 0.4630422844590884, + "grad_norm": 0.3934958875179291, + "learning_rate": 1.7491734960353057e-05, + "loss": 0.563, + "step": 16864 + }, + { + "epoch": 0.4630697419000549, + "grad_norm": 0.32671916484832764, + "learning_rate": 1.7491448878480568e-05, + "loss": 0.4948, + "step": 16865 + }, + { + "epoch": 0.4630971993410214, + "grad_norm": 0.347296804189682, + "learning_rate": 1.7491162782634132e-05, + "loss": 0.4806, + "step": 16866 + }, + { + "epoch": 0.46312465678198794, + "grad_norm": 0.3363370895385742, + "learning_rate": 1.749087667281429e-05, + "loss": 0.554, + "step": 16867 + }, + { + "epoch": 0.46315211422295444, + "grad_norm": 0.37550386786460876, + "learning_rate": 1.749059054902157e-05, + "loss": 0.5572, + "step": 16868 + }, + { + "epoch": 0.46317957166392093, + "grad_norm": 0.3542129099369049, + "learning_rate": 1.7490304411256507e-05, + "loss": 0.4829, + "step": 16869 + }, + { + "epoch": 0.4632070291048874, + "grad_norm": 0.39087581634521484, + "learning_rate": 1.749001825951964e-05, + "loss": 0.5121, + "step": 16870 + }, + { + "epoch": 0.4632344865458539, + "grad_norm": 0.3955361545085907, + "learning_rate": 1.7489732093811495e-05, + "loss": 0.5387, + "step": 16871 + }, + { + "epoch": 0.4632619439868204, + "grad_norm": 0.3747192621231079, + "learning_rate": 1.748944591413261e-05, + "loss": 0.5457, + "step": 16872 + }, + { + "epoch": 0.4632894014277869, + "grad_norm": 0.387796014547348, + "learning_rate": 1.7489159720483518e-05, + "loss": 0.4478, + "step": 16873 + }, + { + "epoch": 0.46331685886875346, + "grad_norm": 0.4140513837337494, + "learning_rate": 1.748887351286475e-05, + "loss": 0.5409, + "step": 16874 + }, + { + "epoch": 0.46334431630971995, + "grad_norm": 0.3534620702266693, + "learning_rate": 1.7488587291276848e-05, + "loss": 0.5053, + "step": 16875 + }, + { + "epoch": 0.46337177375068644, + "grad_norm": 0.369500070810318, + "learning_rate": 1.7488301055720337e-05, + "loss": 0.579, + "step": 16876 + }, + { + "epoch": 0.46339923119165294, + "grad_norm": 0.3907381594181061, + "learning_rate": 1.7488014806195755e-05, + "loss": 0.4658, + "step": 16877 + }, + { + "epoch": 0.46342668863261943, + "grad_norm": 0.40527015924453735, + "learning_rate": 1.7487728542703637e-05, + "loss": 0.5886, + "step": 16878 + }, + { + "epoch": 0.4634541460735859, + "grad_norm": 0.3770305812358856, + "learning_rate": 1.7487442265244518e-05, + "loss": 0.4667, + "step": 16879 + }, + { + "epoch": 0.4634816035145524, + "grad_norm": 0.33930259943008423, + "learning_rate": 1.7487155973818924e-05, + "loss": 0.5368, + "step": 16880 + }, + { + "epoch": 0.46350906095551897, + "grad_norm": 0.3857060372829437, + "learning_rate": 1.7486869668427396e-05, + "loss": 0.4756, + "step": 16881 + }, + { + "epoch": 0.46353651839648546, + "grad_norm": 0.36525505781173706, + "learning_rate": 1.7486583349070466e-05, + "loss": 0.5236, + "step": 16882 + }, + { + "epoch": 0.46356397583745196, + "grad_norm": 0.4278118908405304, + "learning_rate": 1.748629701574867e-05, + "loss": 0.5805, + "step": 16883 + }, + { + "epoch": 0.46359143327841845, + "grad_norm": 0.42358213663101196, + "learning_rate": 1.748601066846254e-05, + "loss": 0.454, + "step": 16884 + }, + { + "epoch": 0.46361889071938495, + "grad_norm": 0.5455300807952881, + "learning_rate": 1.7485724307212612e-05, + "loss": 0.5106, + "step": 16885 + }, + { + "epoch": 0.46364634816035144, + "grad_norm": 0.35954588651657104, + "learning_rate": 1.7485437931999417e-05, + "loss": 0.4567, + "step": 16886 + }, + { + "epoch": 0.46367380560131793, + "grad_norm": 0.4351702332496643, + "learning_rate": 1.7485151542823493e-05, + "loss": 0.5292, + "step": 16887 + }, + { + "epoch": 0.4637012630422845, + "grad_norm": 0.39526185393333435, + "learning_rate": 1.7484865139685372e-05, + "loss": 0.5565, + "step": 16888 + }, + { + "epoch": 0.463728720483251, + "grad_norm": 0.32998332381248474, + "learning_rate": 1.748457872258559e-05, + "loss": 0.4119, + "step": 16889 + }, + { + "epoch": 0.46375617792421747, + "grad_norm": 0.3972926437854767, + "learning_rate": 1.7484292291524677e-05, + "loss": 0.5431, + "step": 16890 + }, + { + "epoch": 0.46378363536518397, + "grad_norm": 0.5110490918159485, + "learning_rate": 1.748400584650317e-05, + "loss": 0.3907, + "step": 16891 + }, + { + "epoch": 0.46381109280615046, + "grad_norm": 0.4238927662372589, + "learning_rate": 1.7483719387521607e-05, + "loss": 0.4693, + "step": 16892 + }, + { + "epoch": 0.46383855024711695, + "grad_norm": 0.3512466549873352, + "learning_rate": 1.7483432914580515e-05, + "loss": 0.5358, + "step": 16893 + }, + { + "epoch": 0.46386600768808345, + "grad_norm": 0.34300950169563293, + "learning_rate": 1.7483146427680435e-05, + "loss": 0.5605, + "step": 16894 + }, + { + "epoch": 0.46389346512905, + "grad_norm": 0.37212440371513367, + "learning_rate": 1.7482859926821895e-05, + "loss": 0.4435, + "step": 16895 + }, + { + "epoch": 0.4639209225700165, + "grad_norm": 0.41316771507263184, + "learning_rate": 1.7482573412005435e-05, + "loss": 0.5916, + "step": 16896 + }, + { + "epoch": 0.463948380010983, + "grad_norm": 0.36366701126098633, + "learning_rate": 1.748228688323159e-05, + "loss": 0.5128, + "step": 16897 + }, + { + "epoch": 0.4639758374519495, + "grad_norm": 0.36917686462402344, + "learning_rate": 1.7482000340500892e-05, + "loss": 0.5045, + "step": 16898 + }, + { + "epoch": 0.464003294892916, + "grad_norm": 0.3510574400424957, + "learning_rate": 1.7481713783813872e-05, + "loss": 0.5248, + "step": 16899 + }, + { + "epoch": 0.46403075233388247, + "grad_norm": 0.38627496361732483, + "learning_rate": 1.748142721317107e-05, + "loss": 0.5203, + "step": 16900 + }, + { + "epoch": 0.46405820977484896, + "grad_norm": 0.3708861172199249, + "learning_rate": 1.7481140628573017e-05, + "loss": 0.5187, + "step": 16901 + }, + { + "epoch": 0.4640856672158155, + "grad_norm": 0.4005656838417053, + "learning_rate": 1.748085403002025e-05, + "loss": 0.548, + "step": 16902 + }, + { + "epoch": 0.464113124656782, + "grad_norm": 0.3799499571323395, + "learning_rate": 1.74805674175133e-05, + "loss": 0.5023, + "step": 16903 + }, + { + "epoch": 0.4641405820977485, + "grad_norm": 0.32577183842658997, + "learning_rate": 1.7480280791052707e-05, + "loss": 0.4986, + "step": 16904 + }, + { + "epoch": 0.464168039538715, + "grad_norm": 0.3593858778476715, + "learning_rate": 1.7479994150639005e-05, + "loss": 0.5637, + "step": 16905 + }, + { + "epoch": 0.4641954969796815, + "grad_norm": 0.36932694911956787, + "learning_rate": 1.747970749627272e-05, + "loss": 0.5814, + "step": 16906 + }, + { + "epoch": 0.464222954420648, + "grad_norm": 0.35311880707740784, + "learning_rate": 1.74794208279544e-05, + "loss": 0.5058, + "step": 16907 + }, + { + "epoch": 0.4642504118616145, + "grad_norm": 0.38506314158439636, + "learning_rate": 1.747913414568457e-05, + "loss": 0.4921, + "step": 16908 + }, + { + "epoch": 0.464277869302581, + "grad_norm": 0.33022457361221313, + "learning_rate": 1.7478847449463767e-05, + "loss": 0.46, + "step": 16909 + }, + { + "epoch": 0.4643053267435475, + "grad_norm": 0.3791036903858185, + "learning_rate": 1.7478560739292528e-05, + "loss": 0.5254, + "step": 16910 + }, + { + "epoch": 0.464332784184514, + "grad_norm": 0.36364808678627014, + "learning_rate": 1.7478274015171387e-05, + "loss": 0.4532, + "step": 16911 + }, + { + "epoch": 0.4643602416254805, + "grad_norm": 0.3688960671424866, + "learning_rate": 1.7477987277100878e-05, + "loss": 0.4948, + "step": 16912 + }, + { + "epoch": 0.464387699066447, + "grad_norm": 0.3501456379890442, + "learning_rate": 1.7477700525081535e-05, + "loss": 0.4685, + "step": 16913 + }, + { + "epoch": 0.4644151565074135, + "grad_norm": 0.37293192744255066, + "learning_rate": 1.7477413759113894e-05, + "loss": 0.5603, + "step": 16914 + }, + { + "epoch": 0.46444261394838, + "grad_norm": 0.3926141560077667, + "learning_rate": 1.7477126979198487e-05, + "loss": 0.5315, + "step": 16915 + }, + { + "epoch": 0.46447007138934654, + "grad_norm": 0.3564329445362091, + "learning_rate": 1.7476840185335855e-05, + "loss": 0.5437, + "step": 16916 + }, + { + "epoch": 0.46449752883031303, + "grad_norm": 0.35250774025917053, + "learning_rate": 1.747655337752653e-05, + "loss": 0.4839, + "step": 16917 + }, + { + "epoch": 0.4645249862712795, + "grad_norm": 0.3680199980735779, + "learning_rate": 1.7476266555771047e-05, + "loss": 0.5269, + "step": 16918 + }, + { + "epoch": 0.464552443712246, + "grad_norm": 0.311867892742157, + "learning_rate": 1.747597972006994e-05, + "loss": 0.4694, + "step": 16919 + }, + { + "epoch": 0.4645799011532125, + "grad_norm": 0.3477252423763275, + "learning_rate": 1.7475692870423743e-05, + "loss": 0.4901, + "step": 16920 + }, + { + "epoch": 0.464607358594179, + "grad_norm": 0.45639994740486145, + "learning_rate": 1.7475406006832996e-05, + "loss": 0.5554, + "step": 16921 + }, + { + "epoch": 0.4646348160351455, + "grad_norm": 0.4128105640411377, + "learning_rate": 1.7475119129298228e-05, + "loss": 0.5161, + "step": 16922 + }, + { + "epoch": 0.46466227347611205, + "grad_norm": 0.3596931993961334, + "learning_rate": 1.747483223781998e-05, + "loss": 0.4753, + "step": 16923 + }, + { + "epoch": 0.46468973091707855, + "grad_norm": 0.6810435056686401, + "learning_rate": 1.747454533239878e-05, + "loss": 0.5572, + "step": 16924 + }, + { + "epoch": 0.46471718835804504, + "grad_norm": 0.33691033720970154, + "learning_rate": 1.747425841303517e-05, + "loss": 0.4882, + "step": 16925 + }, + { + "epoch": 0.46474464579901154, + "grad_norm": 0.3709375560283661, + "learning_rate": 1.7473971479729683e-05, + "loss": 0.4873, + "step": 16926 + }, + { + "epoch": 0.46477210323997803, + "grad_norm": 0.407829225063324, + "learning_rate": 1.7473684532482852e-05, + "loss": 0.5208, + "step": 16927 + }, + { + "epoch": 0.4647995606809445, + "grad_norm": 0.4466405212879181, + "learning_rate": 1.7473397571295215e-05, + "loss": 0.4939, + "step": 16928 + }, + { + "epoch": 0.464827018121911, + "grad_norm": 0.35260090231895447, + "learning_rate": 1.7473110596167305e-05, + "loss": 0.3674, + "step": 16929 + }, + { + "epoch": 0.46485447556287757, + "grad_norm": 0.3948887884616852, + "learning_rate": 1.7472823607099663e-05, + "loss": 0.4387, + "step": 16930 + }, + { + "epoch": 0.46488193300384406, + "grad_norm": 0.41016730666160583, + "learning_rate": 1.7472536604092816e-05, + "loss": 0.5005, + "step": 16931 + }, + { + "epoch": 0.46490939044481056, + "grad_norm": 0.4022945463657379, + "learning_rate": 1.7472249587147304e-05, + "loss": 0.5541, + "step": 16932 + }, + { + "epoch": 0.46493684788577705, + "grad_norm": 0.3817179799079895, + "learning_rate": 1.747196255626366e-05, + "loss": 0.5131, + "step": 16933 + }, + { + "epoch": 0.46496430532674354, + "grad_norm": 0.34984561800956726, + "learning_rate": 1.7471675511442426e-05, + "loss": 0.5223, + "step": 16934 + }, + { + "epoch": 0.46499176276771004, + "grad_norm": 0.3803809583187103, + "learning_rate": 1.747138845268413e-05, + "loss": 0.4469, + "step": 16935 + }, + { + "epoch": 0.46501922020867653, + "grad_norm": 0.3400759696960449, + "learning_rate": 1.747110137998931e-05, + "loss": 0.3834, + "step": 16936 + }, + { + "epoch": 0.4650466776496431, + "grad_norm": 0.370835542678833, + "learning_rate": 1.74708142933585e-05, + "loss": 0.5243, + "step": 16937 + }, + { + "epoch": 0.4650741350906096, + "grad_norm": 0.3897378146648407, + "learning_rate": 1.7470527192792236e-05, + "loss": 0.5223, + "step": 16938 + }, + { + "epoch": 0.46510159253157607, + "grad_norm": 0.3241879940032959, + "learning_rate": 1.7470240078291056e-05, + "loss": 0.4534, + "step": 16939 + }, + { + "epoch": 0.46512904997254256, + "grad_norm": 0.3836115002632141, + "learning_rate": 1.7469952949855496e-05, + "loss": 0.4817, + "step": 16940 + }, + { + "epoch": 0.46515650741350906, + "grad_norm": 0.35602566599845886, + "learning_rate": 1.7469665807486088e-05, + "loss": 0.416, + "step": 16941 + }, + { + "epoch": 0.46518396485447555, + "grad_norm": 0.3718385100364685, + "learning_rate": 1.7469378651183367e-05, + "loss": 0.57, + "step": 16942 + }, + { + "epoch": 0.46521142229544205, + "grad_norm": 0.3564950227737427, + "learning_rate": 1.7469091480947876e-05, + "loss": 0.4983, + "step": 16943 + }, + { + "epoch": 0.46523887973640854, + "grad_norm": 0.39068007469177246, + "learning_rate": 1.7468804296780143e-05, + "loss": 0.4702, + "step": 16944 + }, + { + "epoch": 0.4652663371773751, + "grad_norm": 0.4072388708591461, + "learning_rate": 1.7468517098680704e-05, + "loss": 0.4962, + "step": 16945 + }, + { + "epoch": 0.4652937946183416, + "grad_norm": 0.36905354261398315, + "learning_rate": 1.74682298866501e-05, + "loss": 0.4121, + "step": 16946 + }, + { + "epoch": 0.4653212520593081, + "grad_norm": 0.35261309146881104, + "learning_rate": 1.746794266068886e-05, + "loss": 0.5765, + "step": 16947 + }, + { + "epoch": 0.46534870950027457, + "grad_norm": 0.3956209719181061, + "learning_rate": 1.7467655420797527e-05, + "loss": 0.4802, + "step": 16948 + }, + { + "epoch": 0.46537616694124107, + "grad_norm": 0.4002109467983246, + "learning_rate": 1.7467368166976632e-05, + "loss": 0.5069, + "step": 16949 + }, + { + "epoch": 0.46540362438220756, + "grad_norm": 0.40009215474128723, + "learning_rate": 1.746708089922671e-05, + "loss": 0.4971, + "step": 16950 + }, + { + "epoch": 0.46543108182317405, + "grad_norm": 0.4305175244808197, + "learning_rate": 1.74667936175483e-05, + "loss": 0.4911, + "step": 16951 + }, + { + "epoch": 0.4654585392641406, + "grad_norm": 0.3981086015701294, + "learning_rate": 1.7466506321941942e-05, + "loss": 0.4692, + "step": 16952 + }, + { + "epoch": 0.4654859967051071, + "grad_norm": 0.3695143759250641, + "learning_rate": 1.746621901240816e-05, + "loss": 0.5526, + "step": 16953 + }, + { + "epoch": 0.4655134541460736, + "grad_norm": 0.36620327830314636, + "learning_rate": 1.7465931688947502e-05, + "loss": 0.5559, + "step": 16954 + }, + { + "epoch": 0.4655409115870401, + "grad_norm": 0.3817724883556366, + "learning_rate": 1.7465644351560494e-05, + "loss": 0.6128, + "step": 16955 + }, + { + "epoch": 0.4655683690280066, + "grad_norm": 0.3779057562351227, + "learning_rate": 1.746535700024768e-05, + "loss": 0.5136, + "step": 16956 + }, + { + "epoch": 0.4655958264689731, + "grad_norm": 0.38283276557922363, + "learning_rate": 1.7465069635009588e-05, + "loss": 0.5056, + "step": 16957 + }, + { + "epoch": 0.46562328390993957, + "grad_norm": 0.3471296429634094, + "learning_rate": 1.7464782255846762e-05, + "loss": 0.4745, + "step": 16958 + }, + { + "epoch": 0.4656507413509061, + "grad_norm": 0.3841201961040497, + "learning_rate": 1.7464494862759732e-05, + "loss": 0.5886, + "step": 16959 + }, + { + "epoch": 0.4656781987918726, + "grad_norm": 0.35044562816619873, + "learning_rate": 1.746420745574904e-05, + "loss": 0.5026, + "step": 16960 + }, + { + "epoch": 0.4657056562328391, + "grad_norm": 0.4019415080547333, + "learning_rate": 1.7463920034815214e-05, + "loss": 0.5566, + "step": 16961 + }, + { + "epoch": 0.4657331136738056, + "grad_norm": 0.34497979283332825, + "learning_rate": 1.74636325999588e-05, + "loss": 0.4795, + "step": 16962 + }, + { + "epoch": 0.4657605711147721, + "grad_norm": 1.4787840843200684, + "learning_rate": 1.746334515118033e-05, + "loss": 0.5336, + "step": 16963 + }, + { + "epoch": 0.4657880285557386, + "grad_norm": 0.37970170378685, + "learning_rate": 1.7463057688480338e-05, + "loss": 0.5058, + "step": 16964 + }, + { + "epoch": 0.4658154859967051, + "grad_norm": 0.3907909393310547, + "learning_rate": 1.746277021185936e-05, + "loss": 0.5677, + "step": 16965 + }, + { + "epoch": 0.46584294343767163, + "grad_norm": 0.3656117022037506, + "learning_rate": 1.746248272131793e-05, + "loss": 0.4848, + "step": 16966 + }, + { + "epoch": 0.4658704008786381, + "grad_norm": 0.39899101853370667, + "learning_rate": 1.7462195216856592e-05, + "loss": 0.5361, + "step": 16967 + }, + { + "epoch": 0.4658978583196046, + "grad_norm": 0.3936983048915863, + "learning_rate": 1.746190769847588e-05, + "loss": 0.552, + "step": 16968 + }, + { + "epoch": 0.4659253157605711, + "grad_norm": 0.37411943078041077, + "learning_rate": 1.7461620166176328e-05, + "loss": 0.5189, + "step": 16969 + }, + { + "epoch": 0.4659527732015376, + "grad_norm": 0.3574541509151459, + "learning_rate": 1.746133261995847e-05, + "loss": 0.4944, + "step": 16970 + }, + { + "epoch": 0.4659802306425041, + "grad_norm": 0.3404518961906433, + "learning_rate": 1.7461045059822845e-05, + "loss": 0.4202, + "step": 16971 + }, + { + "epoch": 0.4660076880834706, + "grad_norm": 0.37138357758522034, + "learning_rate": 1.7460757485769993e-05, + "loss": 0.5168, + "step": 16972 + }, + { + "epoch": 0.46603514552443714, + "grad_norm": 0.3662682771682739, + "learning_rate": 1.7460469897800445e-05, + "loss": 0.5283, + "step": 16973 + }, + { + "epoch": 0.46606260296540364, + "grad_norm": 0.36791056394577026, + "learning_rate": 1.746018229591474e-05, + "loss": 0.5588, + "step": 16974 + }, + { + "epoch": 0.46609006040637013, + "grad_norm": 0.38002684712409973, + "learning_rate": 1.7459894680113415e-05, + "loss": 0.5575, + "step": 16975 + }, + { + "epoch": 0.4661175178473366, + "grad_norm": 0.3701854944229126, + "learning_rate": 1.7459607050397004e-05, + "loss": 0.504, + "step": 16976 + }, + { + "epoch": 0.4661449752883031, + "grad_norm": 0.37627002596855164, + "learning_rate": 1.7459319406766047e-05, + "loss": 0.5113, + "step": 16977 + }, + { + "epoch": 0.4661724327292696, + "grad_norm": 0.3704990744590759, + "learning_rate": 1.7459031749221077e-05, + "loss": 0.5687, + "step": 16978 + }, + { + "epoch": 0.4661998901702361, + "grad_norm": 0.4129607677459717, + "learning_rate": 1.7458744077762632e-05, + "loss": 0.5296, + "step": 16979 + }, + { + "epoch": 0.46622734761120266, + "grad_norm": 0.3615747094154358, + "learning_rate": 1.745845639239125e-05, + "loss": 0.5095, + "step": 16980 + }, + { + "epoch": 0.46625480505216915, + "grad_norm": 0.5112725496292114, + "learning_rate": 1.7458168693107465e-05, + "loss": 0.5105, + "step": 16981 + }, + { + "epoch": 0.46628226249313565, + "grad_norm": 0.3190946877002716, + "learning_rate": 1.7457880979911816e-05, + "loss": 0.4477, + "step": 16982 + }, + { + "epoch": 0.46630971993410214, + "grad_norm": 0.354484885931015, + "learning_rate": 1.7457593252804838e-05, + "loss": 0.4583, + "step": 16983 + }, + { + "epoch": 0.46633717737506863, + "grad_norm": 0.3711962401866913, + "learning_rate": 1.7457305511787068e-05, + "loss": 0.537, + "step": 16984 + }, + { + "epoch": 0.46636463481603513, + "grad_norm": 0.37025657296180725, + "learning_rate": 1.745701775685904e-05, + "loss": 0.5478, + "step": 16985 + }, + { + "epoch": 0.4663920922570016, + "grad_norm": 0.3695226311683655, + "learning_rate": 1.74567299880213e-05, + "loss": 0.5184, + "step": 16986 + }, + { + "epoch": 0.46641954969796817, + "grad_norm": 0.34491240978240967, + "learning_rate": 1.745644220527438e-05, + "loss": 0.4835, + "step": 16987 + }, + { + "epoch": 0.46644700713893467, + "grad_norm": 0.4063764214515686, + "learning_rate": 1.745615440861881e-05, + "loss": 0.5507, + "step": 16988 + }, + { + "epoch": 0.46647446457990116, + "grad_norm": 0.36633872985839844, + "learning_rate": 1.7455866598055134e-05, + "loss": 0.491, + "step": 16989 + }, + { + "epoch": 0.46650192202086765, + "grad_norm": 0.3731537163257599, + "learning_rate": 1.7455578773583887e-05, + "loss": 0.5034, + "step": 16990 + }, + { + "epoch": 0.46652937946183415, + "grad_norm": 0.3913518488407135, + "learning_rate": 1.7455290935205606e-05, + "loss": 0.5587, + "step": 16991 + }, + { + "epoch": 0.46655683690280064, + "grad_norm": 0.326567679643631, + "learning_rate": 1.745500308292083e-05, + "loss": 0.5078, + "step": 16992 + }, + { + "epoch": 0.46658429434376714, + "grad_norm": 0.3894326984882355, + "learning_rate": 1.7454715216730088e-05, + "loss": 0.5606, + "step": 16993 + }, + { + "epoch": 0.4666117517847337, + "grad_norm": 0.4274847209453583, + "learning_rate": 1.7454427336633926e-05, + "loss": 0.6441, + "step": 16994 + }, + { + "epoch": 0.4666392092257002, + "grad_norm": 0.3944520652294159, + "learning_rate": 1.7454139442632882e-05, + "loss": 0.5051, + "step": 16995 + }, + { + "epoch": 0.4666666666666667, + "grad_norm": 0.38332846760749817, + "learning_rate": 1.7453851534727487e-05, + "loss": 0.573, + "step": 16996 + }, + { + "epoch": 0.46669412410763317, + "grad_norm": 0.4687936007976532, + "learning_rate": 1.7453563612918276e-05, + "loss": 0.5567, + "step": 16997 + }, + { + "epoch": 0.46672158154859966, + "grad_norm": 0.35550495982170105, + "learning_rate": 1.7453275677205792e-05, + "loss": 0.5016, + "step": 16998 + }, + { + "epoch": 0.46674903898956616, + "grad_norm": 0.34160637855529785, + "learning_rate": 1.745298772759057e-05, + "loss": 0.4392, + "step": 16999 + }, + { + "epoch": 0.46677649643053265, + "grad_norm": 0.3520333766937256, + "learning_rate": 1.7452699764073148e-05, + "loss": 0.4602, + "step": 17000 + }, + { + "epoch": 0.4668039538714992, + "grad_norm": 0.39728832244873047, + "learning_rate": 1.7452411786654063e-05, + "loss": 0.4297, + "step": 17001 + }, + { + "epoch": 0.4668314113124657, + "grad_norm": 0.3837659955024719, + "learning_rate": 1.7452123795333852e-05, + "loss": 0.4175, + "step": 17002 + }, + { + "epoch": 0.4668588687534322, + "grad_norm": 0.3627690374851227, + "learning_rate": 1.745183579011305e-05, + "loss": 0.5607, + "step": 17003 + }, + { + "epoch": 0.4668863261943987, + "grad_norm": 0.3546083867549896, + "learning_rate": 1.7451547770992193e-05, + "loss": 0.4899, + "step": 17004 + }, + { + "epoch": 0.4669137836353652, + "grad_norm": 0.4754144251346588, + "learning_rate": 1.7451259737971826e-05, + "loss": 0.4895, + "step": 17005 + }, + { + "epoch": 0.46694124107633167, + "grad_norm": 0.4153731167316437, + "learning_rate": 1.745097169105248e-05, + "loss": 0.512, + "step": 17006 + }, + { + "epoch": 0.46696869851729816, + "grad_norm": 0.35857516527175903, + "learning_rate": 1.7450683630234694e-05, + "loss": 0.4971, + "step": 17007 + }, + { + "epoch": 0.4669961559582647, + "grad_norm": 0.4314795732498169, + "learning_rate": 1.7450395555519004e-05, + "loss": 0.5285, + "step": 17008 + }, + { + "epoch": 0.4670236133992312, + "grad_norm": 0.3575865626335144, + "learning_rate": 1.7450107466905946e-05, + "loss": 0.402, + "step": 17009 + }, + { + "epoch": 0.4670510708401977, + "grad_norm": 0.4053703248500824, + "learning_rate": 1.7449819364396066e-05, + "loss": 0.4522, + "step": 17010 + }, + { + "epoch": 0.4670785282811642, + "grad_norm": 0.3791302740573883, + "learning_rate": 1.744953124798989e-05, + "loss": 0.5497, + "step": 17011 + }, + { + "epoch": 0.4671059857221307, + "grad_norm": 0.37876513600349426, + "learning_rate": 1.7449243117687964e-05, + "loss": 0.532, + "step": 17012 + }, + { + "epoch": 0.4671334431630972, + "grad_norm": 0.3893001079559326, + "learning_rate": 1.744895497349082e-05, + "loss": 0.5496, + "step": 17013 + }, + { + "epoch": 0.4671609006040637, + "grad_norm": 0.3640681803226471, + "learning_rate": 1.7448666815399e-05, + "loss": 0.499, + "step": 17014 + }, + { + "epoch": 0.4671883580450302, + "grad_norm": 0.5210261940956116, + "learning_rate": 1.7448378643413034e-05, + "loss": 0.5961, + "step": 17015 + }, + { + "epoch": 0.4672158154859967, + "grad_norm": 0.4215487241744995, + "learning_rate": 1.7448090457533465e-05, + "loss": 0.4838, + "step": 17016 + }, + { + "epoch": 0.4672432729269632, + "grad_norm": 0.3975925147533417, + "learning_rate": 1.7447802257760835e-05, + "loss": 0.5274, + "step": 17017 + }, + { + "epoch": 0.4672707303679297, + "grad_norm": 0.44974949955940247, + "learning_rate": 1.7447514044095675e-05, + "loss": 0.4845, + "step": 17018 + }, + { + "epoch": 0.4672981878088962, + "grad_norm": 0.43210074305534363, + "learning_rate": 1.744722581653852e-05, + "loss": 0.588, + "step": 17019 + }, + { + "epoch": 0.4673256452498627, + "grad_norm": 0.37265270948410034, + "learning_rate": 1.7446937575089918e-05, + "loss": 0.5303, + "step": 17020 + }, + { + "epoch": 0.4673531026908292, + "grad_norm": 0.38527217507362366, + "learning_rate": 1.74466493197504e-05, + "loss": 0.5507, + "step": 17021 + }, + { + "epoch": 0.46738056013179574, + "grad_norm": 0.3656613826751709, + "learning_rate": 1.74463610505205e-05, + "loss": 0.5041, + "step": 17022 + }, + { + "epoch": 0.46740801757276224, + "grad_norm": 0.3706596791744232, + "learning_rate": 1.7446072767400767e-05, + "loss": 0.5841, + "step": 17023 + }, + { + "epoch": 0.46743547501372873, + "grad_norm": 0.4103105366230011, + "learning_rate": 1.7445784470391725e-05, + "loss": 0.4717, + "step": 17024 + }, + { + "epoch": 0.4674629324546952, + "grad_norm": 0.31466102600097656, + "learning_rate": 1.7445496159493922e-05, + "loss": 0.4463, + "step": 17025 + }, + { + "epoch": 0.4674903898956617, + "grad_norm": 0.35096168518066406, + "learning_rate": 1.7445207834707892e-05, + "loss": 0.4729, + "step": 17026 + }, + { + "epoch": 0.4675178473366282, + "grad_norm": 0.42214953899383545, + "learning_rate": 1.7444919496034174e-05, + "loss": 0.5363, + "step": 17027 + }, + { + "epoch": 0.4675453047775947, + "grad_norm": 0.37205764651298523, + "learning_rate": 1.74446311434733e-05, + "loss": 0.5603, + "step": 17028 + }, + { + "epoch": 0.46757276221856126, + "grad_norm": 0.5457680821418762, + "learning_rate": 1.7444342777025816e-05, + "loss": 0.4764, + "step": 17029 + }, + { + "epoch": 0.46760021965952775, + "grad_norm": 0.3835238814353943, + "learning_rate": 1.744405439669226e-05, + "loss": 0.5535, + "step": 17030 + }, + { + "epoch": 0.46762767710049424, + "grad_norm": 0.35217994451522827, + "learning_rate": 1.7443766002473164e-05, + "loss": 0.5784, + "step": 17031 + }, + { + "epoch": 0.46765513454146074, + "grad_norm": 0.36157482862472534, + "learning_rate": 1.7443477594369067e-05, + "loss": 0.6066, + "step": 17032 + }, + { + "epoch": 0.46768259198242723, + "grad_norm": 0.38532060384750366, + "learning_rate": 1.744318917238051e-05, + "loss": 0.5051, + "step": 17033 + }, + { + "epoch": 0.4677100494233937, + "grad_norm": 0.35546156764030457, + "learning_rate": 1.744290073650803e-05, + "loss": 0.5136, + "step": 17034 + }, + { + "epoch": 0.4677375068643602, + "grad_norm": 0.39217129349708557, + "learning_rate": 1.7442612286752166e-05, + "loss": 0.5312, + "step": 17035 + }, + { + "epoch": 0.46776496430532677, + "grad_norm": 0.4183287024497986, + "learning_rate": 1.7442323823113452e-05, + "loss": 0.6398, + "step": 17036 + }, + { + "epoch": 0.46779242174629326, + "grad_norm": 0.3687008321285248, + "learning_rate": 1.744203534559243e-05, + "loss": 0.5007, + "step": 17037 + }, + { + "epoch": 0.46781987918725976, + "grad_norm": 0.3718061149120331, + "learning_rate": 1.7441746854189636e-05, + "loss": 0.4448, + "step": 17038 + }, + { + "epoch": 0.46784733662822625, + "grad_norm": 0.3340950608253479, + "learning_rate": 1.744145834890561e-05, + "loss": 0.4408, + "step": 17039 + }, + { + "epoch": 0.46787479406919275, + "grad_norm": 0.40376371145248413, + "learning_rate": 1.744116982974089e-05, + "loss": 0.5291, + "step": 17040 + }, + { + "epoch": 0.46790225151015924, + "grad_norm": 0.3274785578250885, + "learning_rate": 1.744088129669601e-05, + "loss": 0.5535, + "step": 17041 + }, + { + "epoch": 0.46792970895112573, + "grad_norm": 0.4073832631111145, + "learning_rate": 1.7440592749771513e-05, + "loss": 0.5216, + "step": 17042 + }, + { + "epoch": 0.4679571663920923, + "grad_norm": 0.3420123755931854, + "learning_rate": 1.744030418896794e-05, + "loss": 0.6042, + "step": 17043 + }, + { + "epoch": 0.4679846238330588, + "grad_norm": 0.3958752155303955, + "learning_rate": 1.744001561428582e-05, + "loss": 0.5853, + "step": 17044 + }, + { + "epoch": 0.46801208127402527, + "grad_norm": 0.3438078761100769, + "learning_rate": 1.7439727025725697e-05, + "loss": 0.537, + "step": 17045 + }, + { + "epoch": 0.46803953871499177, + "grad_norm": 0.38933736085891724, + "learning_rate": 1.7439438423288112e-05, + "loss": 0.5139, + "step": 17046 + }, + { + "epoch": 0.46806699615595826, + "grad_norm": 0.6319993734359741, + "learning_rate": 1.7439149806973596e-05, + "loss": 0.5205, + "step": 17047 + }, + { + "epoch": 0.46809445359692475, + "grad_norm": 0.33319899439811707, + "learning_rate": 1.743886117678269e-05, + "loss": 0.5282, + "step": 17048 + }, + { + "epoch": 0.46812191103789125, + "grad_norm": 0.4043929874897003, + "learning_rate": 1.7438572532715937e-05, + "loss": 0.4846, + "step": 17049 + }, + { + "epoch": 0.4681493684788578, + "grad_norm": 0.35247987508773804, + "learning_rate": 1.743828387477387e-05, + "loss": 0.4346, + "step": 17050 + }, + { + "epoch": 0.4681768259198243, + "grad_norm": 0.40560659766197205, + "learning_rate": 1.7437995202957034e-05, + "loss": 0.5689, + "step": 17051 + }, + { + "epoch": 0.4682042833607908, + "grad_norm": 0.3724038600921631, + "learning_rate": 1.743770651726596e-05, + "loss": 0.524, + "step": 17052 + }, + { + "epoch": 0.4682317408017573, + "grad_norm": 0.4546937644481659, + "learning_rate": 1.743741781770119e-05, + "loss": 0.4643, + "step": 17053 + }, + { + "epoch": 0.4682591982427238, + "grad_norm": 0.40008124709129333, + "learning_rate": 1.743712910426326e-05, + "loss": 0.5553, + "step": 17054 + }, + { + "epoch": 0.46828665568369027, + "grad_norm": 0.3586139976978302, + "learning_rate": 1.743684037695271e-05, + "loss": 0.416, + "step": 17055 + }, + { + "epoch": 0.46831411312465676, + "grad_norm": 0.35657480359077454, + "learning_rate": 1.7436551635770083e-05, + "loss": 0.5133, + "step": 17056 + }, + { + "epoch": 0.4683415705656233, + "grad_norm": 0.3596322536468506, + "learning_rate": 1.743626288071591e-05, + "loss": 0.4517, + "step": 17057 + }, + { + "epoch": 0.4683690280065898, + "grad_norm": 0.3723606467247009, + "learning_rate": 1.743597411179074e-05, + "loss": 0.4819, + "step": 17058 + }, + { + "epoch": 0.4683964854475563, + "grad_norm": 0.37674784660339355, + "learning_rate": 1.7435685328995097e-05, + "loss": 0.5572, + "step": 17059 + }, + { + "epoch": 0.4684239428885228, + "grad_norm": 0.3661544620990753, + "learning_rate": 1.7435396532329533e-05, + "loss": 0.5054, + "step": 17060 + }, + { + "epoch": 0.4684514003294893, + "grad_norm": 0.3502786457538605, + "learning_rate": 1.7435107721794577e-05, + "loss": 0.4988, + "step": 17061 + }, + { + "epoch": 0.4684788577704558, + "grad_norm": 0.33850374817848206, + "learning_rate": 1.7434818897390774e-05, + "loss": 0.5173, + "step": 17062 + }, + { + "epoch": 0.4685063152114223, + "grad_norm": 0.36681485176086426, + "learning_rate": 1.743453005911866e-05, + "loss": 0.4708, + "step": 17063 + }, + { + "epoch": 0.4685337726523888, + "grad_norm": 0.3799437880516052, + "learning_rate": 1.7434241206978778e-05, + "loss": 0.4842, + "step": 17064 + }, + { + "epoch": 0.4685612300933553, + "grad_norm": 0.3844027817249298, + "learning_rate": 1.7433952340971664e-05, + "loss": 0.5357, + "step": 17065 + }, + { + "epoch": 0.4685886875343218, + "grad_norm": 0.34639549255371094, + "learning_rate": 1.743366346109785e-05, + "loss": 0.5118, + "step": 17066 + }, + { + "epoch": 0.4686161449752883, + "grad_norm": 0.36602044105529785, + "learning_rate": 1.7433374567357885e-05, + "loss": 0.5176, + "step": 17067 + }, + { + "epoch": 0.4686436024162548, + "grad_norm": 0.39535290002822876, + "learning_rate": 1.7433085659752302e-05, + "loss": 0.5714, + "step": 17068 + }, + { + "epoch": 0.4686710598572213, + "grad_norm": 0.36868980526924133, + "learning_rate": 1.7432796738281644e-05, + "loss": 0.48, + "step": 17069 + }, + { + "epoch": 0.4686985172981878, + "grad_norm": 0.3926275074481964, + "learning_rate": 1.7432507802946446e-05, + "loss": 0.4301, + "step": 17070 + }, + { + "epoch": 0.46872597473915434, + "grad_norm": 0.4059324264526367, + "learning_rate": 1.7432218853747254e-05, + "loss": 0.5655, + "step": 17071 + }, + { + "epoch": 0.46875343218012083, + "grad_norm": 0.35474029183387756, + "learning_rate": 1.7431929890684595e-05, + "loss": 0.5338, + "step": 17072 + }, + { + "epoch": 0.4687808896210873, + "grad_norm": 0.33851268887519836, + "learning_rate": 1.743164091375902e-05, + "loss": 0.5497, + "step": 17073 + }, + { + "epoch": 0.4688083470620538, + "grad_norm": 0.41660940647125244, + "learning_rate": 1.743135192297106e-05, + "loss": 0.5281, + "step": 17074 + }, + { + "epoch": 0.4688358045030203, + "grad_norm": 0.36567676067352295, + "learning_rate": 1.743106291832126e-05, + "loss": 0.4765, + "step": 17075 + }, + { + "epoch": 0.4688632619439868, + "grad_norm": 0.3957929015159607, + "learning_rate": 1.743077389981015e-05, + "loss": 0.5363, + "step": 17076 + }, + { + "epoch": 0.4688907193849533, + "grad_norm": 0.3438754975795746, + "learning_rate": 1.7430484867438277e-05, + "loss": 0.5228, + "step": 17077 + }, + { + "epoch": 0.4689181768259198, + "grad_norm": 0.39070719480514526, + "learning_rate": 1.743019582120618e-05, + "loss": 0.5657, + "step": 17078 + }, + { + "epoch": 0.46894563426688635, + "grad_norm": 0.4292851984500885, + "learning_rate": 1.7429906761114398e-05, + "loss": 0.5352, + "step": 17079 + }, + { + "epoch": 0.46897309170785284, + "grad_norm": 0.3695099353790283, + "learning_rate": 1.7429617687163464e-05, + "loss": 0.5028, + "step": 17080 + }, + { + "epoch": 0.46900054914881933, + "grad_norm": 0.35412099957466125, + "learning_rate": 1.7429328599353926e-05, + "loss": 0.4933, + "step": 17081 + }, + { + "epoch": 0.46902800658978583, + "grad_norm": 0.33713799715042114, + "learning_rate": 1.7429039497686316e-05, + "loss": 0.4789, + "step": 17082 + }, + { + "epoch": 0.4690554640307523, + "grad_norm": 0.36816883087158203, + "learning_rate": 1.7428750382161176e-05, + "loss": 0.5654, + "step": 17083 + }, + { + "epoch": 0.4690829214717188, + "grad_norm": 0.34501931071281433, + "learning_rate": 1.7428461252779047e-05, + "loss": 0.4471, + "step": 17084 + }, + { + "epoch": 0.4691103789126853, + "grad_norm": 0.3594520688056946, + "learning_rate": 1.7428172109540466e-05, + "loss": 0.4965, + "step": 17085 + }, + { + "epoch": 0.46913783635365186, + "grad_norm": 0.35713744163513184, + "learning_rate": 1.7427882952445974e-05, + "loss": 0.4592, + "step": 17086 + }, + { + "epoch": 0.46916529379461835, + "grad_norm": 0.40955662727355957, + "learning_rate": 1.7427593781496107e-05, + "loss": 0.6714, + "step": 17087 + }, + { + "epoch": 0.46919275123558485, + "grad_norm": 0.39261043071746826, + "learning_rate": 1.7427304596691414e-05, + "loss": 0.5391, + "step": 17088 + }, + { + "epoch": 0.46922020867655134, + "grad_norm": 0.3557354509830475, + "learning_rate": 1.7427015398032422e-05, + "loss": 0.4875, + "step": 17089 + }, + { + "epoch": 0.46924766611751784, + "grad_norm": 0.35868677496910095, + "learning_rate": 1.7426726185519675e-05, + "loss": 0.5847, + "step": 17090 + }, + { + "epoch": 0.46927512355848433, + "grad_norm": 0.37853068113327026, + "learning_rate": 1.7426436959153714e-05, + "loss": 0.5713, + "step": 17091 + }, + { + "epoch": 0.4693025809994508, + "grad_norm": 0.36245396733283997, + "learning_rate": 1.742614771893508e-05, + "loss": 0.4608, + "step": 17092 + }, + { + "epoch": 0.4693300384404174, + "grad_norm": 0.37499722838401794, + "learning_rate": 1.742585846486431e-05, + "loss": 0.5165, + "step": 17093 + }, + { + "epoch": 0.46935749588138387, + "grad_norm": 0.3808550238609314, + "learning_rate": 1.7425569196941943e-05, + "loss": 0.5054, + "step": 17094 + }, + { + "epoch": 0.46938495332235036, + "grad_norm": 0.3840847313404083, + "learning_rate": 1.7425279915168518e-05, + "loss": 0.5066, + "step": 17095 + }, + { + "epoch": 0.46941241076331686, + "grad_norm": 0.38980793952941895, + "learning_rate": 1.742499061954458e-05, + "loss": 0.4451, + "step": 17096 + }, + { + "epoch": 0.46943986820428335, + "grad_norm": 0.3526729345321655, + "learning_rate": 1.7424701310070663e-05, + "loss": 0.4987, + "step": 17097 + }, + { + "epoch": 0.46946732564524984, + "grad_norm": 0.43383216857910156, + "learning_rate": 1.7424411986747304e-05, + "loss": 0.5346, + "step": 17098 + }, + { + "epoch": 0.46949478308621634, + "grad_norm": 0.38595640659332275, + "learning_rate": 1.7424122649575054e-05, + "loss": 0.4737, + "step": 17099 + }, + { + "epoch": 0.4695222405271829, + "grad_norm": 0.36706170439720154, + "learning_rate": 1.7423833298554443e-05, + "loss": 0.5451, + "step": 17100 + }, + { + "epoch": 0.4695496979681494, + "grad_norm": 0.3900626003742218, + "learning_rate": 1.742354393368601e-05, + "loss": 0.5214, + "step": 17101 + }, + { + "epoch": 0.4695771554091159, + "grad_norm": 0.3775525689125061, + "learning_rate": 1.7423254554970302e-05, + "loss": 0.4867, + "step": 17102 + }, + { + "epoch": 0.46960461285008237, + "grad_norm": 0.38418447971343994, + "learning_rate": 1.7422965162407854e-05, + "loss": 0.4668, + "step": 17103 + }, + { + "epoch": 0.46963207029104886, + "grad_norm": 0.530857503414154, + "learning_rate": 1.7422675755999206e-05, + "loss": 0.5614, + "step": 17104 + }, + { + "epoch": 0.46965952773201536, + "grad_norm": 0.41304031014442444, + "learning_rate": 1.7422386335744903e-05, + "loss": 0.475, + "step": 17105 + }, + { + "epoch": 0.46968698517298185, + "grad_norm": 0.39012306928634644, + "learning_rate": 1.7422096901645477e-05, + "loss": 0.4957, + "step": 17106 + }, + { + "epoch": 0.4697144426139484, + "grad_norm": 0.3557257652282715, + "learning_rate": 1.742180745370147e-05, + "loss": 0.4211, + "step": 17107 + }, + { + "epoch": 0.4697419000549149, + "grad_norm": 0.39167526364326477, + "learning_rate": 1.742151799191343e-05, + "loss": 0.5109, + "step": 17108 + }, + { + "epoch": 0.4697693574958814, + "grad_norm": 0.3547302484512329, + "learning_rate": 1.7421228516281884e-05, + "loss": 0.4138, + "step": 17109 + }, + { + "epoch": 0.4697968149368479, + "grad_norm": 0.37484145164489746, + "learning_rate": 1.742093902680738e-05, + "loss": 0.5001, + "step": 17110 + }, + { + "epoch": 0.4698242723778144, + "grad_norm": 0.3967234194278717, + "learning_rate": 1.7420649523490452e-05, + "loss": 0.4571, + "step": 17111 + }, + { + "epoch": 0.46985172981878087, + "grad_norm": 0.3972424268722534, + "learning_rate": 1.742036000633165e-05, + "loss": 0.4868, + "step": 17112 + }, + { + "epoch": 0.46987918725974737, + "grad_norm": 0.37020254135131836, + "learning_rate": 1.7420070475331506e-05, + "loss": 0.53, + "step": 17113 + }, + { + "epoch": 0.4699066447007139, + "grad_norm": 0.36313048005104065, + "learning_rate": 1.7419780930490562e-05, + "loss": 0.4602, + "step": 17114 + }, + { + "epoch": 0.4699341021416804, + "grad_norm": 0.39895719289779663, + "learning_rate": 1.741949137180936e-05, + "loss": 0.4637, + "step": 17115 + }, + { + "epoch": 0.4699615595826469, + "grad_norm": 0.4086248278617859, + "learning_rate": 1.7419201799288438e-05, + "loss": 0.5815, + "step": 17116 + }, + { + "epoch": 0.4699890170236134, + "grad_norm": 0.386262983083725, + "learning_rate": 1.7418912212928335e-05, + "loss": 0.5744, + "step": 17117 + }, + { + "epoch": 0.4700164744645799, + "grad_norm": 0.38011878728866577, + "learning_rate": 1.7418622612729597e-05, + "loss": 0.5795, + "step": 17118 + }, + { + "epoch": 0.4700439319055464, + "grad_norm": 0.3548242449760437, + "learning_rate": 1.7418332998692756e-05, + "loss": 0.4951, + "step": 17119 + }, + { + "epoch": 0.4700713893465129, + "grad_norm": 0.4745754301548004, + "learning_rate": 1.741804337081836e-05, + "loss": 0.5378, + "step": 17120 + }, + { + "epoch": 0.47009884678747943, + "grad_norm": 0.6391884684562683, + "learning_rate": 1.741775372910694e-05, + "loss": 0.4535, + "step": 17121 + }, + { + "epoch": 0.4701263042284459, + "grad_norm": 0.43966150283813477, + "learning_rate": 1.7417464073559047e-05, + "loss": 0.4634, + "step": 17122 + }, + { + "epoch": 0.4701537616694124, + "grad_norm": 0.36903369426727295, + "learning_rate": 1.7417174404175214e-05, + "loss": 0.4907, + "step": 17123 + }, + { + "epoch": 0.4701812191103789, + "grad_norm": 0.35011449456214905, + "learning_rate": 1.741688472095598e-05, + "loss": 0.4668, + "step": 17124 + }, + { + "epoch": 0.4702086765513454, + "grad_norm": 0.36873772740364075, + "learning_rate": 1.7416595023901896e-05, + "loss": 0.4938, + "step": 17125 + }, + { + "epoch": 0.4702361339923119, + "grad_norm": 0.40376386046409607, + "learning_rate": 1.7416305313013492e-05, + "loss": 0.5829, + "step": 17126 + }, + { + "epoch": 0.4702635914332784, + "grad_norm": 0.35475465655326843, + "learning_rate": 1.741601558829131e-05, + "loss": 0.524, + "step": 17127 + }, + { + "epoch": 0.47029104887424494, + "grad_norm": 0.3583265542984009, + "learning_rate": 1.7415725849735895e-05, + "loss": 0.4652, + "step": 17128 + }, + { + "epoch": 0.47031850631521144, + "grad_norm": 0.3792871832847595, + "learning_rate": 1.741543609734778e-05, + "loss": 0.4661, + "step": 17129 + }, + { + "epoch": 0.47034596375617793, + "grad_norm": 0.4408613443374634, + "learning_rate": 1.741514633112751e-05, + "loss": 0.5244, + "step": 17130 + }, + { + "epoch": 0.4703734211971444, + "grad_norm": 0.41571420431137085, + "learning_rate": 1.7414856551075632e-05, + "loss": 0.4589, + "step": 17131 + }, + { + "epoch": 0.4704008786381109, + "grad_norm": 0.3950871527194977, + "learning_rate": 1.7414566757192677e-05, + "loss": 0.5385, + "step": 17132 + }, + { + "epoch": 0.4704283360790774, + "grad_norm": 0.5954047441482544, + "learning_rate": 1.7414276949479188e-05, + "loss": 0.5433, + "step": 17133 + }, + { + "epoch": 0.4704557935200439, + "grad_norm": 0.3341102600097656, + "learning_rate": 1.7413987127935705e-05, + "loss": 0.4754, + "step": 17134 + }, + { + "epoch": 0.47048325096101046, + "grad_norm": 0.355400025844574, + "learning_rate": 1.741369729256277e-05, + "loss": 0.4684, + "step": 17135 + }, + { + "epoch": 0.47051070840197695, + "grad_norm": 0.3946392238140106, + "learning_rate": 1.7413407443360925e-05, + "loss": 0.5206, + "step": 17136 + }, + { + "epoch": 0.47053816584294345, + "grad_norm": 0.44840216636657715, + "learning_rate": 1.7413117580330706e-05, + "loss": 0.5188, + "step": 17137 + }, + { + "epoch": 0.47056562328390994, + "grad_norm": 0.3565295934677124, + "learning_rate": 1.7412827703472658e-05, + "loss": 0.5274, + "step": 17138 + }, + { + "epoch": 0.47059308072487643, + "grad_norm": 0.4233197867870331, + "learning_rate": 1.741253781278732e-05, + "loss": 0.4912, + "step": 17139 + }, + { + "epoch": 0.4706205381658429, + "grad_norm": 0.37272974848747253, + "learning_rate": 1.7412247908275236e-05, + "loss": 0.4704, + "step": 17140 + }, + { + "epoch": 0.4706479956068094, + "grad_norm": 0.4280697703361511, + "learning_rate": 1.7411957989936944e-05, + "loss": 0.4893, + "step": 17141 + }, + { + "epoch": 0.47067545304777597, + "grad_norm": 0.3766244351863861, + "learning_rate": 1.741166805777298e-05, + "loss": 0.4785, + "step": 17142 + }, + { + "epoch": 0.47070291048874247, + "grad_norm": 0.4235098659992218, + "learning_rate": 1.741137811178389e-05, + "loss": 0.5092, + "step": 17143 + }, + { + "epoch": 0.47073036792970896, + "grad_norm": 0.3231426477432251, + "learning_rate": 1.741108815197022e-05, + "loss": 0.4401, + "step": 17144 + }, + { + "epoch": 0.47075782537067545, + "grad_norm": 0.42804139852523804, + "learning_rate": 1.74107981783325e-05, + "loss": 0.5619, + "step": 17145 + }, + { + "epoch": 0.47078528281164195, + "grad_norm": 0.3764135539531708, + "learning_rate": 1.741050819087128e-05, + "loss": 0.5209, + "step": 17146 + }, + { + "epoch": 0.47081274025260844, + "grad_norm": 0.44187411665916443, + "learning_rate": 1.7410218189587096e-05, + "loss": 0.5341, + "step": 17147 + }, + { + "epoch": 0.47084019769357494, + "grad_norm": 0.42096319794654846, + "learning_rate": 1.7409928174480487e-05, + "loss": 0.4891, + "step": 17148 + }, + { + "epoch": 0.4708676551345415, + "grad_norm": 0.4597965478897095, + "learning_rate": 1.7409638145552e-05, + "loss": 0.5254, + "step": 17149 + }, + { + "epoch": 0.470895112575508, + "grad_norm": 0.38269561529159546, + "learning_rate": 1.7409348102802174e-05, + "loss": 0.5232, + "step": 17150 + }, + { + "epoch": 0.4709225700164745, + "grad_norm": 0.34176668524742126, + "learning_rate": 1.7409058046231547e-05, + "loss": 0.4408, + "step": 17151 + }, + { + "epoch": 0.47095002745744097, + "grad_norm": 0.3873199224472046, + "learning_rate": 1.7408767975840663e-05, + "loss": 0.5479, + "step": 17152 + }, + { + "epoch": 0.47097748489840746, + "grad_norm": 0.364918977022171, + "learning_rate": 1.740847789163006e-05, + "loss": 0.4832, + "step": 17153 + }, + { + "epoch": 0.47100494233937396, + "grad_norm": 0.4034755527973175, + "learning_rate": 1.740818779360028e-05, + "loss": 0.4591, + "step": 17154 + }, + { + "epoch": 0.47103239978034045, + "grad_norm": 0.36387234926223755, + "learning_rate": 1.740789768175187e-05, + "loss": 0.5125, + "step": 17155 + }, + { + "epoch": 0.471059857221307, + "grad_norm": 0.40485408902168274, + "learning_rate": 1.7407607556085365e-05, + "loss": 0.5518, + "step": 17156 + }, + { + "epoch": 0.4710873146622735, + "grad_norm": 0.3466642200946808, + "learning_rate": 1.7407317416601305e-05, + "loss": 0.533, + "step": 17157 + }, + { + "epoch": 0.47111477210324, + "grad_norm": 0.4467039406299591, + "learning_rate": 1.7407027263300237e-05, + "loss": 0.4533, + "step": 17158 + }, + { + "epoch": 0.4711422295442065, + "grad_norm": 0.3722216486930847, + "learning_rate": 1.74067370961827e-05, + "loss": 0.5154, + "step": 17159 + }, + { + "epoch": 0.471169686985173, + "grad_norm": 0.38394734263420105, + "learning_rate": 1.7406446915249233e-05, + "loss": 0.4602, + "step": 17160 + }, + { + "epoch": 0.47119714442613947, + "grad_norm": 0.3970642685890198, + "learning_rate": 1.7406156720500376e-05, + "loss": 0.5399, + "step": 17161 + }, + { + "epoch": 0.47122460186710596, + "grad_norm": 0.3529106378555298, + "learning_rate": 1.7405866511936677e-05, + "loss": 0.5054, + "step": 17162 + }, + { + "epoch": 0.4712520593080725, + "grad_norm": 0.3469769060611725, + "learning_rate": 1.740557628955867e-05, + "loss": 0.4584, + "step": 17163 + }, + { + "epoch": 0.471279516749039, + "grad_norm": 0.3683535158634186, + "learning_rate": 1.74052860533669e-05, + "loss": 0.5395, + "step": 17164 + }, + { + "epoch": 0.4713069741900055, + "grad_norm": 0.43552398681640625, + "learning_rate": 1.740499580336191e-05, + "loss": 0.5815, + "step": 17165 + }, + { + "epoch": 0.471334431630972, + "grad_norm": 0.4493972361087799, + "learning_rate": 1.740470553954424e-05, + "loss": 0.5978, + "step": 17166 + }, + { + "epoch": 0.4713618890719385, + "grad_norm": 0.35988086462020874, + "learning_rate": 1.7404415261914425e-05, + "loss": 0.5215, + "step": 17167 + }, + { + "epoch": 0.471389346512905, + "grad_norm": 0.31829819083213806, + "learning_rate": 1.740412497047302e-05, + "loss": 0.3859, + "step": 17168 + }, + { + "epoch": 0.4714168039538715, + "grad_norm": 0.44183000922203064, + "learning_rate": 1.7403834665220555e-05, + "loss": 0.5121, + "step": 17169 + }, + { + "epoch": 0.471444261394838, + "grad_norm": 0.3646385967731476, + "learning_rate": 1.740354434615758e-05, + "loss": 0.4621, + "step": 17170 + }, + { + "epoch": 0.4714717188358045, + "grad_norm": 0.39171311259269714, + "learning_rate": 1.740325401328462e-05, + "loss": 0.5741, + "step": 17171 + }, + { + "epoch": 0.471499176276771, + "grad_norm": 0.362976610660553, + "learning_rate": 1.740296366660224e-05, + "loss": 0.4865, + "step": 17172 + }, + { + "epoch": 0.4715266337177375, + "grad_norm": 0.3351900577545166, + "learning_rate": 1.7402673306110966e-05, + "loss": 0.4377, + "step": 17173 + }, + { + "epoch": 0.471554091158704, + "grad_norm": 0.36703336238861084, + "learning_rate": 1.7402382931811342e-05, + "loss": 0.5317, + "step": 17174 + }, + { + "epoch": 0.4715815485996705, + "grad_norm": 0.3941352367401123, + "learning_rate": 1.7402092543703912e-05, + "loss": 0.5881, + "step": 17175 + }, + { + "epoch": 0.471609006040637, + "grad_norm": 0.33394235372543335, + "learning_rate": 1.7401802141789218e-05, + "loss": 0.5129, + "step": 17176 + }, + { + "epoch": 0.47163646348160354, + "grad_norm": 0.32967448234558105, + "learning_rate": 1.7401511726067802e-05, + "loss": 0.39, + "step": 17177 + }, + { + "epoch": 0.47166392092257003, + "grad_norm": 0.47130271792411804, + "learning_rate": 1.7401221296540203e-05, + "loss": 0.413, + "step": 17178 + }, + { + "epoch": 0.47169137836353653, + "grad_norm": 0.34721434116363525, + "learning_rate": 1.7400930853206964e-05, + "loss": 0.4424, + "step": 17179 + }, + { + "epoch": 0.471718835804503, + "grad_norm": 0.3731633424758911, + "learning_rate": 1.7400640396068625e-05, + "loss": 0.5283, + "step": 17180 + }, + { + "epoch": 0.4717462932454695, + "grad_norm": 0.36002546548843384, + "learning_rate": 1.7400349925125733e-05, + "loss": 0.4912, + "step": 17181 + }, + { + "epoch": 0.471773750686436, + "grad_norm": 0.37416812777519226, + "learning_rate": 1.7400059440378824e-05, + "loss": 0.509, + "step": 17182 + }, + { + "epoch": 0.4718012081274025, + "grad_norm": 0.36918312311172485, + "learning_rate": 1.7399768941828445e-05, + "loss": 0.5735, + "step": 17183 + }, + { + "epoch": 0.47182866556836905, + "grad_norm": 0.3621938228607178, + "learning_rate": 1.7399478429475132e-05, + "loss": 0.5517, + "step": 17184 + }, + { + "epoch": 0.47185612300933555, + "grad_norm": 0.37911492586135864, + "learning_rate": 1.739918790331943e-05, + "loss": 0.4181, + "step": 17185 + }, + { + "epoch": 0.47188358045030204, + "grad_norm": 0.3568498194217682, + "learning_rate": 1.7398897363361883e-05, + "loss": 0.4099, + "step": 17186 + }, + { + "epoch": 0.47191103789126854, + "grad_norm": 0.3792688548564911, + "learning_rate": 1.7398606809603032e-05, + "loss": 0.5262, + "step": 17187 + }, + { + "epoch": 0.47193849533223503, + "grad_norm": 0.3800898790359497, + "learning_rate": 1.7398316242043415e-05, + "loss": 0.5531, + "step": 17188 + }, + { + "epoch": 0.4719659527732015, + "grad_norm": 0.34380286931991577, + "learning_rate": 1.7398025660683578e-05, + "loss": 0.4272, + "step": 17189 + }, + { + "epoch": 0.471993410214168, + "grad_norm": 0.37594109773635864, + "learning_rate": 1.7397735065524063e-05, + "loss": 0.5275, + "step": 17190 + }, + { + "epoch": 0.47202086765513457, + "grad_norm": 0.6572182178497314, + "learning_rate": 1.739744445656541e-05, + "loss": 0.5887, + "step": 17191 + }, + { + "epoch": 0.47204832509610106, + "grad_norm": 0.37441498041152954, + "learning_rate": 1.7397153833808166e-05, + "loss": 0.536, + "step": 17192 + }, + { + "epoch": 0.47207578253706756, + "grad_norm": 0.3591223657131195, + "learning_rate": 1.7396863197252864e-05, + "loss": 0.4204, + "step": 17193 + }, + { + "epoch": 0.47210323997803405, + "grad_norm": 0.3884643018245697, + "learning_rate": 1.7396572546900052e-05, + "loss": 0.5997, + "step": 17194 + }, + { + "epoch": 0.47213069741900054, + "grad_norm": 0.367172509431839, + "learning_rate": 1.7396281882750273e-05, + "loss": 0.584, + "step": 17195 + }, + { + "epoch": 0.47215815485996704, + "grad_norm": 0.36013105511665344, + "learning_rate": 1.739599120480407e-05, + "loss": 0.4929, + "step": 17196 + }, + { + "epoch": 0.47218561230093353, + "grad_norm": 0.3591015338897705, + "learning_rate": 1.7395700513061978e-05, + "loss": 0.5317, + "step": 17197 + }, + { + "epoch": 0.4722130697419001, + "grad_norm": 0.35685884952545166, + "learning_rate": 1.7395409807524543e-05, + "loss": 0.4849, + "step": 17198 + }, + { + "epoch": 0.4722405271828666, + "grad_norm": 0.3693915903568268, + "learning_rate": 1.7395119088192312e-05, + "loss": 0.4857, + "step": 17199 + }, + { + "epoch": 0.47226798462383307, + "grad_norm": 0.41792190074920654, + "learning_rate": 1.7394828355065825e-05, + "loss": 0.4891, + "step": 17200 + }, + { + "epoch": 0.47229544206479956, + "grad_norm": 0.33272621035575867, + "learning_rate": 1.739453760814562e-05, + "loss": 0.4955, + "step": 17201 + }, + { + "epoch": 0.47232289950576606, + "grad_norm": 0.48918411135673523, + "learning_rate": 1.7394246847432245e-05, + "loss": 0.5484, + "step": 17202 + }, + { + "epoch": 0.47235035694673255, + "grad_norm": 0.34855470061302185, + "learning_rate": 1.7393956072926236e-05, + "loss": 0.4117, + "step": 17203 + }, + { + "epoch": 0.47237781438769905, + "grad_norm": 0.3952394425868988, + "learning_rate": 1.739366528462814e-05, + "loss": 0.5747, + "step": 17204 + }, + { + "epoch": 0.4724052718286656, + "grad_norm": 0.4538654685020447, + "learning_rate": 1.73933744825385e-05, + "loss": 0.6068, + "step": 17205 + }, + { + "epoch": 0.4724327292696321, + "grad_norm": 0.37930038571357727, + "learning_rate": 1.739308366665786e-05, + "loss": 0.6068, + "step": 17206 + }, + { + "epoch": 0.4724601867105986, + "grad_norm": 0.3464859426021576, + "learning_rate": 1.7392792836986752e-05, + "loss": 0.4583, + "step": 17207 + }, + { + "epoch": 0.4724876441515651, + "grad_norm": 0.3632785677909851, + "learning_rate": 1.739250199352573e-05, + "loss": 0.5256, + "step": 17208 + }, + { + "epoch": 0.4725151015925316, + "grad_norm": 0.3598712384700775, + "learning_rate": 1.7392211136275336e-05, + "loss": 0.429, + "step": 17209 + }, + { + "epoch": 0.47254255903349807, + "grad_norm": 0.40367990732192993, + "learning_rate": 1.7391920265236102e-05, + "loss": 0.5902, + "step": 17210 + }, + { + "epoch": 0.47257001647446456, + "grad_norm": 0.4128073453903198, + "learning_rate": 1.739162938040858e-05, + "loss": 0.481, + "step": 17211 + }, + { + "epoch": 0.47259747391543105, + "grad_norm": 0.3733974099159241, + "learning_rate": 1.739133848179331e-05, + "loss": 0.5262, + "step": 17212 + }, + { + "epoch": 0.4726249313563976, + "grad_norm": 0.42053601145744324, + "learning_rate": 1.7391047569390836e-05, + "loss": 0.5679, + "step": 17213 + }, + { + "epoch": 0.4726523887973641, + "grad_norm": 0.3420772850513458, + "learning_rate": 1.7390756643201698e-05, + "loss": 0.4654, + "step": 17214 + }, + { + "epoch": 0.4726798462383306, + "grad_norm": 0.37414565682411194, + "learning_rate": 1.739046570322644e-05, + "loss": 0.4965, + "step": 17215 + }, + { + "epoch": 0.4727073036792971, + "grad_norm": 0.3983024060726166, + "learning_rate": 1.7390174749465608e-05, + "loss": 0.6005, + "step": 17216 + }, + { + "epoch": 0.4727347611202636, + "grad_norm": 0.41155120730400085, + "learning_rate": 1.738988378191974e-05, + "loss": 0.4524, + "step": 17217 + }, + { + "epoch": 0.4727622185612301, + "grad_norm": 0.38649871945381165, + "learning_rate": 1.7389592800589374e-05, + "loss": 0.5498, + "step": 17218 + }, + { + "epoch": 0.47278967600219657, + "grad_norm": 0.36010658740997314, + "learning_rate": 1.7389301805475064e-05, + "loss": 0.5136, + "step": 17219 + }, + { + "epoch": 0.4728171334431631, + "grad_norm": 0.3788629472255707, + "learning_rate": 1.738901079657735e-05, + "loss": 0.6178, + "step": 17220 + }, + { + "epoch": 0.4728445908841296, + "grad_norm": 0.37550729513168335, + "learning_rate": 1.738871977389677e-05, + "loss": 0.5969, + "step": 17221 + }, + { + "epoch": 0.4728720483250961, + "grad_norm": 0.3857876658439636, + "learning_rate": 1.7388428737433868e-05, + "loss": 0.4896, + "step": 17222 + }, + { + "epoch": 0.4728995057660626, + "grad_norm": 0.3998032212257385, + "learning_rate": 1.7388137687189192e-05, + "loss": 0.5497, + "step": 17223 + }, + { + "epoch": 0.4729269632070291, + "grad_norm": 0.31837695837020874, + "learning_rate": 1.7387846623163277e-05, + "loss": 0.4098, + "step": 17224 + }, + { + "epoch": 0.4729544206479956, + "grad_norm": 0.43702268600463867, + "learning_rate": 1.738755554535667e-05, + "loss": 0.6094, + "step": 17225 + }, + { + "epoch": 0.4729818780889621, + "grad_norm": 0.3653499484062195, + "learning_rate": 1.738726445376992e-05, + "loss": 0.4394, + "step": 17226 + }, + { + "epoch": 0.47300933552992863, + "grad_norm": 0.4202346205711365, + "learning_rate": 1.7386973348403558e-05, + "loss": 0.4675, + "step": 17227 + }, + { + "epoch": 0.4730367929708951, + "grad_norm": 0.34099262952804565, + "learning_rate": 1.7386682229258136e-05, + "loss": 0.4607, + "step": 17228 + }, + { + "epoch": 0.4730642504118616, + "grad_norm": 0.3644240200519562, + "learning_rate": 1.7386391096334194e-05, + "loss": 0.5676, + "step": 17229 + }, + { + "epoch": 0.4730917078528281, + "grad_norm": 0.35538753867149353, + "learning_rate": 1.7386099949632273e-05, + "loss": 0.496, + "step": 17230 + }, + { + "epoch": 0.4731191652937946, + "grad_norm": 0.3731416165828705, + "learning_rate": 1.738580878915292e-05, + "loss": 0.4976, + "step": 17231 + }, + { + "epoch": 0.4731466227347611, + "grad_norm": 0.44499915838241577, + "learning_rate": 1.7385517614896678e-05, + "loss": 0.5091, + "step": 17232 + }, + { + "epoch": 0.4731740801757276, + "grad_norm": 0.3652336895465851, + "learning_rate": 1.7385226426864082e-05, + "loss": 0.475, + "step": 17233 + }, + { + "epoch": 0.47320153761669415, + "grad_norm": 0.3302925229072571, + "learning_rate": 1.7384935225055688e-05, + "loss": 0.4976, + "step": 17234 + }, + { + "epoch": 0.47322899505766064, + "grad_norm": 0.3312305808067322, + "learning_rate": 1.738464400947203e-05, + "loss": 0.458, + "step": 17235 + }, + { + "epoch": 0.47325645249862713, + "grad_norm": 0.4262823164463043, + "learning_rate": 1.7384352780113656e-05, + "loss": 0.5381, + "step": 17236 + }, + { + "epoch": 0.47328390993959363, + "grad_norm": 0.3880760669708252, + "learning_rate": 1.7384061536981106e-05, + "loss": 0.5175, + "step": 17237 + }, + { + "epoch": 0.4733113673805601, + "grad_norm": 0.35953396558761597, + "learning_rate": 1.7383770280074926e-05, + "loss": 0.5672, + "step": 17238 + }, + { + "epoch": 0.4733388248215266, + "grad_norm": 0.3842301368713379, + "learning_rate": 1.7383479009395655e-05, + "loss": 0.5227, + "step": 17239 + }, + { + "epoch": 0.4733662822624931, + "grad_norm": 0.332232266664505, + "learning_rate": 1.7383187724943842e-05, + "loss": 0.4854, + "step": 17240 + }, + { + "epoch": 0.47339373970345966, + "grad_norm": 0.4168449640274048, + "learning_rate": 1.7382896426720025e-05, + "loss": 0.5288, + "step": 17241 + }, + { + "epoch": 0.47342119714442615, + "grad_norm": 0.4474450647830963, + "learning_rate": 1.738260511472475e-05, + "loss": 0.503, + "step": 17242 + }, + { + "epoch": 0.47344865458539265, + "grad_norm": 0.404805064201355, + "learning_rate": 1.7382313788958563e-05, + "loss": 0.4929, + "step": 17243 + }, + { + "epoch": 0.47347611202635914, + "grad_norm": 0.4483509361743927, + "learning_rate": 1.7382022449422e-05, + "loss": 0.5267, + "step": 17244 + }, + { + "epoch": 0.47350356946732564, + "grad_norm": 0.4038238525390625, + "learning_rate": 1.7381731096115615e-05, + "loss": 0.5427, + "step": 17245 + }, + { + "epoch": 0.47353102690829213, + "grad_norm": 0.4074070155620575, + "learning_rate": 1.7381439729039942e-05, + "loss": 0.4967, + "step": 17246 + }, + { + "epoch": 0.4735584843492586, + "grad_norm": 0.3859017789363861, + "learning_rate": 1.7381148348195526e-05, + "loss": 0.5314, + "step": 17247 + }, + { + "epoch": 0.4735859417902252, + "grad_norm": 0.5953295826911926, + "learning_rate": 1.7380856953582917e-05, + "loss": 0.517, + "step": 17248 + }, + { + "epoch": 0.47361339923119167, + "grad_norm": 0.3646122217178345, + "learning_rate": 1.7380565545202652e-05, + "loss": 0.5108, + "step": 17249 + }, + { + "epoch": 0.47364085667215816, + "grad_norm": 0.4172845482826233, + "learning_rate": 1.7380274123055275e-05, + "loss": 0.5473, + "step": 17250 + }, + { + "epoch": 0.47366831411312466, + "grad_norm": 0.3587459325790405, + "learning_rate": 1.7379982687141333e-05, + "loss": 0.5128, + "step": 17251 + }, + { + "epoch": 0.47369577155409115, + "grad_norm": 0.35335248708724976, + "learning_rate": 1.7379691237461368e-05, + "loss": 0.5197, + "step": 17252 + }, + { + "epoch": 0.47372322899505764, + "grad_norm": 0.36500540375709534, + "learning_rate": 1.7379399774015924e-05, + "loss": 0.5641, + "step": 17253 + }, + { + "epoch": 0.47375068643602414, + "grad_norm": 0.35103997588157654, + "learning_rate": 1.7379108296805545e-05, + "loss": 0.55, + "step": 17254 + }, + { + "epoch": 0.4737781438769907, + "grad_norm": 0.40318673849105835, + "learning_rate": 1.737881680583077e-05, + "loss": 0.5044, + "step": 17255 + }, + { + "epoch": 0.4738056013179572, + "grad_norm": 0.4028772711753845, + "learning_rate": 1.737852530109215e-05, + "loss": 0.5394, + "step": 17256 + }, + { + "epoch": 0.4738330587589237, + "grad_norm": 0.4003382623195648, + "learning_rate": 1.7378233782590226e-05, + "loss": 0.5069, + "step": 17257 + }, + { + "epoch": 0.47386051619989017, + "grad_norm": 0.38466304540634155, + "learning_rate": 1.7377942250325537e-05, + "loss": 0.5049, + "step": 17258 + }, + { + "epoch": 0.47388797364085666, + "grad_norm": 0.40776029229164124, + "learning_rate": 1.7377650704298634e-05, + "loss": 0.5028, + "step": 17259 + }, + { + "epoch": 0.47391543108182316, + "grad_norm": 0.43944212794303894, + "learning_rate": 1.7377359144510057e-05, + "loss": 0.5289, + "step": 17260 + }, + { + "epoch": 0.47394288852278965, + "grad_norm": 0.4240504503250122, + "learning_rate": 1.7377067570960352e-05, + "loss": 0.4963, + "step": 17261 + }, + { + "epoch": 0.4739703459637562, + "grad_norm": 0.3660351037979126, + "learning_rate": 1.737677598365006e-05, + "loss": 0.4969, + "step": 17262 + }, + { + "epoch": 0.4739978034047227, + "grad_norm": 0.37510454654693604, + "learning_rate": 1.7376484382579725e-05, + "loss": 0.5447, + "step": 17263 + }, + { + "epoch": 0.4740252608456892, + "grad_norm": 0.38862836360931396, + "learning_rate": 1.7376192767749894e-05, + "loss": 0.4905, + "step": 17264 + }, + { + "epoch": 0.4740527182866557, + "grad_norm": 0.35776373744010925, + "learning_rate": 1.737590113916111e-05, + "loss": 0.4723, + "step": 17265 + }, + { + "epoch": 0.4740801757276222, + "grad_norm": 0.4024008512496948, + "learning_rate": 1.7375609496813916e-05, + "loss": 0.5367, + "step": 17266 + }, + { + "epoch": 0.47410763316858867, + "grad_norm": 0.3687393069267273, + "learning_rate": 1.7375317840708853e-05, + "loss": 0.4479, + "step": 17267 + }, + { + "epoch": 0.47413509060955517, + "grad_norm": 0.3861641585826874, + "learning_rate": 1.7375026170846474e-05, + "loss": 0.528, + "step": 17268 + }, + { + "epoch": 0.4741625480505217, + "grad_norm": 0.38521334528923035, + "learning_rate": 1.7374734487227315e-05, + "loss": 0.5967, + "step": 17269 + }, + { + "epoch": 0.4741900054914882, + "grad_norm": 0.4410783648490906, + "learning_rate": 1.737444278985192e-05, + "loss": 0.5497, + "step": 17270 + }, + { + "epoch": 0.4742174629324547, + "grad_norm": 0.42397835850715637, + "learning_rate": 1.7374151078720836e-05, + "loss": 0.5411, + "step": 17271 + }, + { + "epoch": 0.4742449203734212, + "grad_norm": 0.3946220874786377, + "learning_rate": 1.737385935383461e-05, + "loss": 0.5628, + "step": 17272 + }, + { + "epoch": 0.4742723778143877, + "grad_norm": 0.37670767307281494, + "learning_rate": 1.737356761519378e-05, + "loss": 0.5462, + "step": 17273 + }, + { + "epoch": 0.4742998352553542, + "grad_norm": 0.455321341753006, + "learning_rate": 1.7373275862798897e-05, + "loss": 0.588, + "step": 17274 + }, + { + "epoch": 0.4743272926963207, + "grad_norm": 0.38799625635147095, + "learning_rate": 1.7372984096650495e-05, + "loss": 0.5018, + "step": 17275 + }, + { + "epoch": 0.47435475013728723, + "grad_norm": 0.4008844196796417, + "learning_rate": 1.737269231674913e-05, + "loss": 0.542, + "step": 17276 + }, + { + "epoch": 0.4743822075782537, + "grad_norm": 0.4597949683666229, + "learning_rate": 1.7372400523095337e-05, + "loss": 0.6019, + "step": 17277 + }, + { + "epoch": 0.4744096650192202, + "grad_norm": 0.31671902537345886, + "learning_rate": 1.7372108715689665e-05, + "loss": 0.427, + "step": 17278 + }, + { + "epoch": 0.4744371224601867, + "grad_norm": 0.3999205231666565, + "learning_rate": 1.7371816894532657e-05, + "loss": 0.5337, + "step": 17279 + }, + { + "epoch": 0.4744645799011532, + "grad_norm": 0.3543127477169037, + "learning_rate": 1.737152505962486e-05, + "loss": 0.4671, + "step": 17280 + }, + { + "epoch": 0.4744920373421197, + "grad_norm": 0.36503762006759644, + "learning_rate": 1.7371233210966814e-05, + "loss": 0.4329, + "step": 17281 + }, + { + "epoch": 0.4745194947830862, + "grad_norm": 0.3354511857032776, + "learning_rate": 1.7370941348559068e-05, + "loss": 0.5202, + "step": 17282 + }, + { + "epoch": 0.47454695222405274, + "grad_norm": 0.3819139897823334, + "learning_rate": 1.737064947240216e-05, + "loss": 0.5134, + "step": 17283 + }, + { + "epoch": 0.47457440966501924, + "grad_norm": 0.3565080761909485, + "learning_rate": 1.737035758249664e-05, + "loss": 0.482, + "step": 17284 + }, + { + "epoch": 0.47460186710598573, + "grad_norm": 0.39283961057662964, + "learning_rate": 1.7370065678843052e-05, + "loss": 0.4679, + "step": 17285 + }, + { + "epoch": 0.4746293245469522, + "grad_norm": 0.35014277696609497, + "learning_rate": 1.736977376144194e-05, + "loss": 0.4961, + "step": 17286 + }, + { + "epoch": 0.4746567819879187, + "grad_norm": 0.37778040766716003, + "learning_rate": 1.7369481830293847e-05, + "loss": 0.516, + "step": 17287 + }, + { + "epoch": 0.4746842394288852, + "grad_norm": 0.3363319933414459, + "learning_rate": 1.7369189885399314e-05, + "loss": 0.4738, + "step": 17288 + }, + { + "epoch": 0.4747116968698517, + "grad_norm": 0.3324213922023773, + "learning_rate": 1.7368897926758894e-05, + "loss": 0.5213, + "step": 17289 + }, + { + "epoch": 0.47473915431081826, + "grad_norm": 0.4516817033290863, + "learning_rate": 1.7368605954373125e-05, + "loss": 0.6487, + "step": 17290 + }, + { + "epoch": 0.47476661175178475, + "grad_norm": 0.6064024567604065, + "learning_rate": 1.736831396824256e-05, + "loss": 0.472, + "step": 17291 + }, + { + "epoch": 0.47479406919275124, + "grad_norm": 0.3879409432411194, + "learning_rate": 1.736802196836773e-05, + "loss": 0.5094, + "step": 17292 + }, + { + "epoch": 0.47482152663371774, + "grad_norm": 0.3599814176559448, + "learning_rate": 1.7367729954749194e-05, + "loss": 0.4996, + "step": 17293 + }, + { + "epoch": 0.47484898407468423, + "grad_norm": 0.383217453956604, + "learning_rate": 1.7367437927387484e-05, + "loss": 0.4346, + "step": 17294 + }, + { + "epoch": 0.4748764415156507, + "grad_norm": 0.3607107102870941, + "learning_rate": 1.7367145886283155e-05, + "loss": 0.4242, + "step": 17295 + }, + { + "epoch": 0.4749038989566172, + "grad_norm": 0.3496772348880768, + "learning_rate": 1.7366853831436746e-05, + "loss": 0.4394, + "step": 17296 + }, + { + "epoch": 0.47493135639758377, + "grad_norm": 0.38314926624298096, + "learning_rate": 1.7366561762848803e-05, + "loss": 0.4963, + "step": 17297 + }, + { + "epoch": 0.47495881383855026, + "grad_norm": 0.3727840185165405, + "learning_rate": 1.7366269680519875e-05, + "loss": 0.5107, + "step": 17298 + }, + { + "epoch": 0.47498627127951676, + "grad_norm": 0.39647218585014343, + "learning_rate": 1.7365977584450497e-05, + "loss": 0.5597, + "step": 17299 + }, + { + "epoch": 0.47501372872048325, + "grad_norm": 0.6762796640396118, + "learning_rate": 1.7365685474641222e-05, + "loss": 0.465, + "step": 17300 + }, + { + "epoch": 0.47504118616144975, + "grad_norm": 0.4140743315219879, + "learning_rate": 1.7365393351092598e-05, + "loss": 0.5154, + "step": 17301 + }, + { + "epoch": 0.47506864360241624, + "grad_norm": 0.420852929353714, + "learning_rate": 1.7365101213805157e-05, + "loss": 0.5467, + "step": 17302 + }, + { + "epoch": 0.47509610104338273, + "grad_norm": 0.6223469972610474, + "learning_rate": 1.7364809062779454e-05, + "loss": 0.5541, + "step": 17303 + }, + { + "epoch": 0.4751235584843493, + "grad_norm": 0.4063449203968048, + "learning_rate": 1.7364516898016033e-05, + "loss": 0.4877, + "step": 17304 + }, + { + "epoch": 0.4751510159253158, + "grad_norm": 0.5202169418334961, + "learning_rate": 1.7364224719515438e-05, + "loss": 0.501, + "step": 17305 + }, + { + "epoch": 0.4751784733662823, + "grad_norm": 0.4030936658382416, + "learning_rate": 1.7363932527278212e-05, + "loss": 0.4689, + "step": 17306 + }, + { + "epoch": 0.47520593080724877, + "grad_norm": 0.3792634904384613, + "learning_rate": 1.7363640321304903e-05, + "loss": 0.5148, + "step": 17307 + }, + { + "epoch": 0.47523338824821526, + "grad_norm": 0.37605535984039307, + "learning_rate": 1.736334810159605e-05, + "loss": 0.4869, + "step": 17308 + }, + { + "epoch": 0.47526084568918175, + "grad_norm": 0.40993282198905945, + "learning_rate": 1.736305586815221e-05, + "loss": 0.5332, + "step": 17309 + }, + { + "epoch": 0.47528830313014825, + "grad_norm": 0.3890884518623352, + "learning_rate": 1.7362763620973916e-05, + "loss": 0.4945, + "step": 17310 + }, + { + "epoch": 0.4753157605711148, + "grad_norm": 0.34221163392066956, + "learning_rate": 1.7362471360061718e-05, + "loss": 0.5349, + "step": 17311 + }, + { + "epoch": 0.4753432180120813, + "grad_norm": 0.523823082447052, + "learning_rate": 1.736217908541616e-05, + "loss": 0.4289, + "step": 17312 + }, + { + "epoch": 0.4753706754530478, + "grad_norm": 0.355613112449646, + "learning_rate": 1.7361886797037795e-05, + "loss": 0.4693, + "step": 17313 + }, + { + "epoch": 0.4753981328940143, + "grad_norm": 0.42061594128608704, + "learning_rate": 1.7361594494927155e-05, + "loss": 0.6207, + "step": 17314 + }, + { + "epoch": 0.4754255903349808, + "grad_norm": 0.37371620535850525, + "learning_rate": 1.7361302179084796e-05, + "loss": 0.4454, + "step": 17315 + }, + { + "epoch": 0.47545304777594727, + "grad_norm": 0.39288410544395447, + "learning_rate": 1.7361009849511254e-05, + "loss": 0.5075, + "step": 17316 + }, + { + "epoch": 0.47548050521691376, + "grad_norm": 0.4103913903236389, + "learning_rate": 1.7360717506207084e-05, + "loss": 0.5251, + "step": 17317 + }, + { + "epoch": 0.4755079626578803, + "grad_norm": 0.3623792231082916, + "learning_rate": 1.736042514917282e-05, + "loss": 0.4756, + "step": 17318 + }, + { + "epoch": 0.4755354200988468, + "grad_norm": 0.340503454208374, + "learning_rate": 1.736013277840902e-05, + "loss": 0.4593, + "step": 17319 + }, + { + "epoch": 0.4755628775398133, + "grad_norm": 0.38084450364112854, + "learning_rate": 1.735984039391622e-05, + "loss": 0.5572, + "step": 17320 + }, + { + "epoch": 0.4755903349807798, + "grad_norm": 0.42625728249549866, + "learning_rate": 1.7359547995694975e-05, + "loss": 0.62, + "step": 17321 + }, + { + "epoch": 0.4756177924217463, + "grad_norm": 0.3708992600440979, + "learning_rate": 1.735925558374582e-05, + "loss": 0.5233, + "step": 17322 + }, + { + "epoch": 0.4756452498627128, + "grad_norm": 0.4071897566318512, + "learning_rate": 1.73589631580693e-05, + "loss": 0.5477, + "step": 17323 + }, + { + "epoch": 0.4756727073036793, + "grad_norm": 0.35251614451408386, + "learning_rate": 1.735867071866597e-05, + "loss": 0.4574, + "step": 17324 + }, + { + "epoch": 0.4757001647446458, + "grad_norm": 0.5738990306854248, + "learning_rate": 1.7358378265536368e-05, + "loss": 0.5683, + "step": 17325 + }, + { + "epoch": 0.4757276221856123, + "grad_norm": 0.43464571237564087, + "learning_rate": 1.7358085798681045e-05, + "loss": 0.4777, + "step": 17326 + }, + { + "epoch": 0.4757550796265788, + "grad_norm": 0.33663681149482727, + "learning_rate": 1.7357793318100543e-05, + "loss": 0.4766, + "step": 17327 + }, + { + "epoch": 0.4757825370675453, + "grad_norm": 0.4021676182746887, + "learning_rate": 1.7357500823795406e-05, + "loss": 0.5029, + "step": 17328 + }, + { + "epoch": 0.4758099945085118, + "grad_norm": 0.37662041187286377, + "learning_rate": 1.735720831576618e-05, + "loss": 0.4684, + "step": 17329 + }, + { + "epoch": 0.4758374519494783, + "grad_norm": 0.38047337532043457, + "learning_rate": 1.7356915794013415e-05, + "loss": 0.5406, + "step": 17330 + }, + { + "epoch": 0.4758649093904448, + "grad_norm": 0.4084271490573883, + "learning_rate": 1.7356623258537656e-05, + "loss": 0.5619, + "step": 17331 + }, + { + "epoch": 0.47589236683141134, + "grad_norm": 0.3907347023487091, + "learning_rate": 1.7356330709339445e-05, + "loss": 0.5044, + "step": 17332 + }, + { + "epoch": 0.47591982427237783, + "grad_norm": 0.3495807647705078, + "learning_rate": 1.735603814641933e-05, + "loss": 0.4672, + "step": 17333 + }, + { + "epoch": 0.47594728171334433, + "grad_norm": 0.35436978936195374, + "learning_rate": 1.7355745569777854e-05, + "loss": 0.4731, + "step": 17334 + }, + { + "epoch": 0.4759747391543108, + "grad_norm": 0.4180624783039093, + "learning_rate": 1.7355452979415566e-05, + "loss": 0.5568, + "step": 17335 + }, + { + "epoch": 0.4760021965952773, + "grad_norm": 0.3657536506652832, + "learning_rate": 1.735516037533301e-05, + "loss": 0.5173, + "step": 17336 + }, + { + "epoch": 0.4760296540362438, + "grad_norm": 0.4378049969673157, + "learning_rate": 1.7354867757530735e-05, + "loss": 0.6014, + "step": 17337 + }, + { + "epoch": 0.4760571114772103, + "grad_norm": 0.43598273396492004, + "learning_rate": 1.735457512600928e-05, + "loss": 0.5198, + "step": 17338 + }, + { + "epoch": 0.47608456891817685, + "grad_norm": 0.38626915216445923, + "learning_rate": 1.7354282480769197e-05, + "loss": 0.4536, + "step": 17339 + }, + { + "epoch": 0.47611202635914335, + "grad_norm": 0.36826831102371216, + "learning_rate": 1.7353989821811034e-05, + "loss": 0.4051, + "step": 17340 + }, + { + "epoch": 0.47613948380010984, + "grad_norm": 0.3723454177379608, + "learning_rate": 1.7353697149135327e-05, + "loss": 0.5006, + "step": 17341 + }, + { + "epoch": 0.47616694124107634, + "grad_norm": 0.3407902717590332, + "learning_rate": 1.735340446274263e-05, + "loss": 0.472, + "step": 17342 + }, + { + "epoch": 0.47619439868204283, + "grad_norm": 0.5272567868232727, + "learning_rate": 1.735311176263349e-05, + "loss": 0.5114, + "step": 17343 + }, + { + "epoch": 0.4762218561230093, + "grad_norm": 0.37096962332725525, + "learning_rate": 1.7352819048808444e-05, + "loss": 0.4979, + "step": 17344 + }, + { + "epoch": 0.4762493135639758, + "grad_norm": 0.4156540334224701, + "learning_rate": 1.7352526321268045e-05, + "loss": 0.5121, + "step": 17345 + }, + { + "epoch": 0.4762767710049423, + "grad_norm": 0.39967218041419983, + "learning_rate": 1.7352233580012838e-05, + "loss": 0.6195, + "step": 17346 + }, + { + "epoch": 0.47630422844590886, + "grad_norm": 0.37623417377471924, + "learning_rate": 1.735194082504337e-05, + "loss": 0.5032, + "step": 17347 + }, + { + "epoch": 0.47633168588687536, + "grad_norm": 0.3679143488407135, + "learning_rate": 1.7351648056360186e-05, + "loss": 0.5125, + "step": 17348 + }, + { + "epoch": 0.47635914332784185, + "grad_norm": 0.4195724427700043, + "learning_rate": 1.7351355273963833e-05, + "loss": 0.5978, + "step": 17349 + }, + { + "epoch": 0.47638660076880834, + "grad_norm": 0.3589080274105072, + "learning_rate": 1.7351062477854854e-05, + "loss": 0.3483, + "step": 17350 + }, + { + "epoch": 0.47641405820977484, + "grad_norm": 0.3734091520309448, + "learning_rate": 1.7350769668033796e-05, + "loss": 0.5217, + "step": 17351 + }, + { + "epoch": 0.47644151565074133, + "grad_norm": 0.34398144483566284, + "learning_rate": 1.735047684450121e-05, + "loss": 0.4923, + "step": 17352 + }, + { + "epoch": 0.4764689730917078, + "grad_norm": 0.3816189765930176, + "learning_rate": 1.7350184007257634e-05, + "loss": 0.4929, + "step": 17353 + }, + { + "epoch": 0.4764964305326744, + "grad_norm": 0.38596463203430176, + "learning_rate": 1.7349891156303623e-05, + "loss": 0.5035, + "step": 17354 + }, + { + "epoch": 0.47652388797364087, + "grad_norm": 0.3599514067173004, + "learning_rate": 1.734959829163972e-05, + "loss": 0.481, + "step": 17355 + }, + { + "epoch": 0.47655134541460736, + "grad_norm": 0.3589027225971222, + "learning_rate": 1.734930541326647e-05, + "loss": 0.5475, + "step": 17356 + }, + { + "epoch": 0.47657880285557386, + "grad_norm": 0.3633432984352112, + "learning_rate": 1.7349012521184416e-05, + "loss": 0.4721, + "step": 17357 + }, + { + "epoch": 0.47660626029654035, + "grad_norm": 0.37459900975227356, + "learning_rate": 1.7348719615394113e-05, + "loss": 0.5462, + "step": 17358 + }, + { + "epoch": 0.47663371773750685, + "grad_norm": 0.3886943459510803, + "learning_rate": 1.7348426695896102e-05, + "loss": 0.5796, + "step": 17359 + }, + { + "epoch": 0.47666117517847334, + "grad_norm": 0.49584949016571045, + "learning_rate": 1.7348133762690927e-05, + "loss": 0.5096, + "step": 17360 + }, + { + "epoch": 0.4766886326194399, + "grad_norm": 0.3323688507080078, + "learning_rate": 1.734784081577914e-05, + "loss": 0.4871, + "step": 17361 + }, + { + "epoch": 0.4767160900604064, + "grad_norm": 0.37866467237472534, + "learning_rate": 1.734754785516128e-05, + "loss": 0.5645, + "step": 17362 + }, + { + "epoch": 0.4767435475013729, + "grad_norm": 0.34868693351745605, + "learning_rate": 1.73472548808379e-05, + "loss": 0.4782, + "step": 17363 + }, + { + "epoch": 0.47677100494233937, + "grad_norm": 0.40447738766670227, + "learning_rate": 1.7346961892809547e-05, + "loss": 0.5112, + "step": 17364 + }, + { + "epoch": 0.47679846238330587, + "grad_norm": 0.357263445854187, + "learning_rate": 1.7346668891076763e-05, + "loss": 0.5571, + "step": 17365 + }, + { + "epoch": 0.47682591982427236, + "grad_norm": 0.34988224506378174, + "learning_rate": 1.7346375875640098e-05, + "loss": 0.4864, + "step": 17366 + }, + { + "epoch": 0.47685337726523885, + "grad_norm": 0.3967995047569275, + "learning_rate": 1.73460828465001e-05, + "loss": 0.5511, + "step": 17367 + }, + { + "epoch": 0.4768808347062054, + "grad_norm": 0.4132901132106781, + "learning_rate": 1.734578980365731e-05, + "loss": 0.6095, + "step": 17368 + }, + { + "epoch": 0.4769082921471719, + "grad_norm": 0.33256980776786804, + "learning_rate": 1.7345496747112276e-05, + "loss": 0.4923, + "step": 17369 + }, + { + "epoch": 0.4769357495881384, + "grad_norm": 0.40854552388191223, + "learning_rate": 1.734520367686555e-05, + "loss": 0.5656, + "step": 17370 + }, + { + "epoch": 0.4769632070291049, + "grad_norm": 0.44924187660217285, + "learning_rate": 1.734491059291767e-05, + "loss": 0.4898, + "step": 17371 + }, + { + "epoch": 0.4769906644700714, + "grad_norm": 0.42592760920524597, + "learning_rate": 1.734461749526919e-05, + "loss": 0.5275, + "step": 17372 + }, + { + "epoch": 0.4770181219110379, + "grad_norm": 0.5115145444869995, + "learning_rate": 1.7344324383920653e-05, + "loss": 0.4352, + "step": 17373 + }, + { + "epoch": 0.47704557935200437, + "grad_norm": 0.40516889095306396, + "learning_rate": 1.7344031258872608e-05, + "loss": 0.4788, + "step": 17374 + }, + { + "epoch": 0.4770730367929709, + "grad_norm": 0.41437679529190063, + "learning_rate": 1.73437381201256e-05, + "loss": 0.477, + "step": 17375 + }, + { + "epoch": 0.4771004942339374, + "grad_norm": 0.3685512840747833, + "learning_rate": 1.734344496768018e-05, + "loss": 0.4909, + "step": 17376 + }, + { + "epoch": 0.4771279516749039, + "grad_norm": 0.3972471058368683, + "learning_rate": 1.734315180153689e-05, + "loss": 0.5639, + "step": 17377 + }, + { + "epoch": 0.4771554091158704, + "grad_norm": 0.416051983833313, + "learning_rate": 1.7342858621696276e-05, + "loss": 0.6111, + "step": 17378 + }, + { + "epoch": 0.4771828665568369, + "grad_norm": 0.3596571683883667, + "learning_rate": 1.7342565428158888e-05, + "loss": 0.5325, + "step": 17379 + }, + { + "epoch": 0.4772103239978034, + "grad_norm": 0.38166409730911255, + "learning_rate": 1.7342272220925273e-05, + "loss": 0.5376, + "step": 17380 + }, + { + "epoch": 0.4772377814387699, + "grad_norm": 0.3678920567035675, + "learning_rate": 1.7341978999995975e-05, + "loss": 0.4626, + "step": 17381 + }, + { + "epoch": 0.47726523887973643, + "grad_norm": 0.35474836826324463, + "learning_rate": 1.7341685765371547e-05, + "loss": 0.513, + "step": 17382 + }, + { + "epoch": 0.4772926963207029, + "grad_norm": 0.38975921273231506, + "learning_rate": 1.7341392517052527e-05, + "loss": 0.4393, + "step": 17383 + }, + { + "epoch": 0.4773201537616694, + "grad_norm": 0.3774014413356781, + "learning_rate": 1.734109925503947e-05, + "loss": 0.496, + "step": 17384 + }, + { + "epoch": 0.4773476112026359, + "grad_norm": 0.319079726934433, + "learning_rate": 1.7340805979332916e-05, + "loss": 0.393, + "step": 17385 + }, + { + "epoch": 0.4773750686436024, + "grad_norm": 0.45210450887680054, + "learning_rate": 1.734051268993342e-05, + "loss": 0.4461, + "step": 17386 + }, + { + "epoch": 0.4774025260845689, + "grad_norm": 0.49747586250305176, + "learning_rate": 1.7340219386841525e-05, + "loss": 0.5088, + "step": 17387 + }, + { + "epoch": 0.4774299835255354, + "grad_norm": 0.35782289505004883, + "learning_rate": 1.7339926070057774e-05, + "loss": 0.5117, + "step": 17388 + }, + { + "epoch": 0.47745744096650194, + "grad_norm": 0.3840772211551666, + "learning_rate": 1.7339632739582725e-05, + "loss": 0.5206, + "step": 17389 + }, + { + "epoch": 0.47748489840746844, + "grad_norm": 0.4035729765892029, + "learning_rate": 1.7339339395416913e-05, + "loss": 0.4382, + "step": 17390 + }, + { + "epoch": 0.47751235584843493, + "grad_norm": 0.41780993342399597, + "learning_rate": 1.733904603756089e-05, + "loss": 0.5717, + "step": 17391 + }, + { + "epoch": 0.4775398132894014, + "grad_norm": 0.35625511407852173, + "learning_rate": 1.7338752666015206e-05, + "loss": 0.5464, + "step": 17392 + }, + { + "epoch": 0.4775672707303679, + "grad_norm": 0.37332481145858765, + "learning_rate": 1.7338459280780405e-05, + "loss": 0.5484, + "step": 17393 + }, + { + "epoch": 0.4775947281713344, + "grad_norm": 0.3746476471424103, + "learning_rate": 1.733816588185704e-05, + "loss": 0.447, + "step": 17394 + }, + { + "epoch": 0.4776221856123009, + "grad_norm": 0.35769957304000854, + "learning_rate": 1.7337872469245647e-05, + "loss": 0.4575, + "step": 17395 + }, + { + "epoch": 0.47764964305326746, + "grad_norm": 0.4278643727302551, + "learning_rate": 1.733757904294678e-05, + "loss": 0.5866, + "step": 17396 + }, + { + "epoch": 0.47767710049423395, + "grad_norm": 0.382748007774353, + "learning_rate": 1.7337285602960993e-05, + "loss": 0.5526, + "step": 17397 + }, + { + "epoch": 0.47770455793520045, + "grad_norm": 0.37162765860557556, + "learning_rate": 1.733699214928882e-05, + "loss": 0.5788, + "step": 17398 + }, + { + "epoch": 0.47773201537616694, + "grad_norm": 0.3568292260169983, + "learning_rate": 1.7336698681930817e-05, + "loss": 0.5862, + "step": 17399 + }, + { + "epoch": 0.47775947281713343, + "grad_norm": 0.6592757105827332, + "learning_rate": 1.733640520088753e-05, + "loss": 0.5302, + "step": 17400 + }, + { + "epoch": 0.47778693025809993, + "grad_norm": 0.3465222418308258, + "learning_rate": 1.7336111706159508e-05, + "loss": 0.4362, + "step": 17401 + }, + { + "epoch": 0.4778143876990664, + "grad_norm": 0.3710457384586334, + "learning_rate": 1.7335818197747292e-05, + "loss": 0.4709, + "step": 17402 + }, + { + "epoch": 0.477841845140033, + "grad_norm": 0.33137112855911255, + "learning_rate": 1.7335524675651435e-05, + "loss": 0.4214, + "step": 17403 + }, + { + "epoch": 0.47786930258099947, + "grad_norm": 0.35789304971694946, + "learning_rate": 1.7335231139872483e-05, + "loss": 0.5367, + "step": 17404 + }, + { + "epoch": 0.47789676002196596, + "grad_norm": 0.36171191930770874, + "learning_rate": 1.7334937590410984e-05, + "loss": 0.5059, + "step": 17405 + }, + { + "epoch": 0.47792421746293245, + "grad_norm": 0.36463484168052673, + "learning_rate": 1.7334644027267487e-05, + "loss": 0.433, + "step": 17406 + }, + { + "epoch": 0.47795167490389895, + "grad_norm": 0.3833870589733124, + "learning_rate": 1.7334350450442534e-05, + "loss": 0.4867, + "step": 17407 + }, + { + "epoch": 0.47797913234486544, + "grad_norm": 0.4165036976337433, + "learning_rate": 1.733405685993668e-05, + "loss": 0.5567, + "step": 17408 + }, + { + "epoch": 0.47800658978583194, + "grad_norm": 0.35925546288490295, + "learning_rate": 1.7333763255750467e-05, + "loss": 0.4963, + "step": 17409 + }, + { + "epoch": 0.4780340472267985, + "grad_norm": 0.3920578956604004, + "learning_rate": 1.7333469637884447e-05, + "loss": 0.5691, + "step": 17410 + }, + { + "epoch": 0.478061504667765, + "grad_norm": 0.4162949323654175, + "learning_rate": 1.7333176006339164e-05, + "loss": 0.5596, + "step": 17411 + }, + { + "epoch": 0.4780889621087315, + "grad_norm": 0.3206687271595001, + "learning_rate": 1.7332882361115172e-05, + "loss": 0.3999, + "step": 17412 + }, + { + "epoch": 0.47811641954969797, + "grad_norm": 0.4134502112865448, + "learning_rate": 1.7332588702213008e-05, + "loss": 0.5225, + "step": 17413 + }, + { + "epoch": 0.47814387699066446, + "grad_norm": 0.32541438937187195, + "learning_rate": 1.7332295029633227e-05, + "loss": 0.4886, + "step": 17414 + }, + { + "epoch": 0.47817133443163096, + "grad_norm": 0.7432602047920227, + "learning_rate": 1.7332001343376378e-05, + "loss": 0.3887, + "step": 17415 + }, + { + "epoch": 0.47819879187259745, + "grad_norm": 0.3295065462589264, + "learning_rate": 1.7331707643443003e-05, + "loss": 0.4282, + "step": 17416 + }, + { + "epoch": 0.478226249313564, + "grad_norm": 0.3790801465511322, + "learning_rate": 1.7331413929833657e-05, + "loss": 0.5246, + "step": 17417 + }, + { + "epoch": 0.4782537067545305, + "grad_norm": 0.40568724274635315, + "learning_rate": 1.733112020254888e-05, + "loss": 0.5278, + "step": 17418 + }, + { + "epoch": 0.478281164195497, + "grad_norm": 0.3514539301395416, + "learning_rate": 1.7330826461589227e-05, + "loss": 0.474, + "step": 17419 + }, + { + "epoch": 0.4783086216364635, + "grad_norm": 0.4348537027835846, + "learning_rate": 1.7330532706955243e-05, + "loss": 0.508, + "step": 17420 + }, + { + "epoch": 0.47833607907743, + "grad_norm": 0.48672258853912354, + "learning_rate": 1.7330238938647475e-05, + "loss": 0.5473, + "step": 17421 + }, + { + "epoch": 0.47836353651839647, + "grad_norm": 0.357158899307251, + "learning_rate": 1.732994515666647e-05, + "loss": 0.4893, + "step": 17422 + }, + { + "epoch": 0.47839099395936296, + "grad_norm": 0.3885211646556854, + "learning_rate": 1.732965136101278e-05, + "loss": 0.5622, + "step": 17423 + }, + { + "epoch": 0.4784184514003295, + "grad_norm": 0.3971414268016815, + "learning_rate": 1.732935755168695e-05, + "loss": 0.4967, + "step": 17424 + }, + { + "epoch": 0.478445908841296, + "grad_norm": 0.4435804784297943, + "learning_rate": 1.732906372868953e-05, + "loss": 0.5426, + "step": 17425 + }, + { + "epoch": 0.4784733662822625, + "grad_norm": 0.34197506308555603, + "learning_rate": 1.7328769892021067e-05, + "loss": 0.4665, + "step": 17426 + }, + { + "epoch": 0.478500823723229, + "grad_norm": 0.4920865297317505, + "learning_rate": 1.7328476041682108e-05, + "loss": 0.5275, + "step": 17427 + }, + { + "epoch": 0.4785282811641955, + "grad_norm": 0.3378271460533142, + "learning_rate": 1.7328182177673203e-05, + "loss": 0.4022, + "step": 17428 + }, + { + "epoch": 0.478555738605162, + "grad_norm": 0.37281525135040283, + "learning_rate": 1.73278882999949e-05, + "loss": 0.5112, + "step": 17429 + }, + { + "epoch": 0.4785831960461285, + "grad_norm": 0.3964836001396179, + "learning_rate": 1.7327594408647746e-05, + "loss": 0.4992, + "step": 17430 + }, + { + "epoch": 0.47861065348709503, + "grad_norm": 0.4037831127643585, + "learning_rate": 1.7327300503632288e-05, + "loss": 0.5196, + "step": 17431 + }, + { + "epoch": 0.4786381109280615, + "grad_norm": 0.35960671305656433, + "learning_rate": 1.7327006584949076e-05, + "loss": 0.5348, + "step": 17432 + }, + { + "epoch": 0.478665568369028, + "grad_norm": 0.4321916401386261, + "learning_rate": 1.732671265259866e-05, + "loss": 0.4439, + "step": 17433 + }, + { + "epoch": 0.4786930258099945, + "grad_norm": 0.4099774658679962, + "learning_rate": 1.7326418706581586e-05, + "loss": 0.5167, + "step": 17434 + }, + { + "epoch": 0.478720483250961, + "grad_norm": 0.3795802593231201, + "learning_rate": 1.7326124746898405e-05, + "loss": 0.6486, + "step": 17435 + }, + { + "epoch": 0.4787479406919275, + "grad_norm": 0.3593728244304657, + "learning_rate": 1.732583077354966e-05, + "loss": 0.4988, + "step": 17436 + }, + { + "epoch": 0.478775398132894, + "grad_norm": 0.40003809332847595, + "learning_rate": 1.73255367865359e-05, + "loss": 0.5522, + "step": 17437 + }, + { + "epoch": 0.47880285557386054, + "grad_norm": 0.34275415539741516, + "learning_rate": 1.732524278585768e-05, + "loss": 0.4467, + "step": 17438 + }, + { + "epoch": 0.47883031301482704, + "grad_norm": 0.37368786334991455, + "learning_rate": 1.7324948771515547e-05, + "loss": 0.4802, + "step": 17439 + }, + { + "epoch": 0.47885777045579353, + "grad_norm": 0.3622078001499176, + "learning_rate": 1.7324654743510042e-05, + "loss": 0.5243, + "step": 17440 + }, + { + "epoch": 0.47888522789676, + "grad_norm": 0.5485128164291382, + "learning_rate": 1.732436070184172e-05, + "loss": 0.5257, + "step": 17441 + }, + { + "epoch": 0.4789126853377265, + "grad_norm": 0.36230897903442383, + "learning_rate": 1.7324066646511127e-05, + "loss": 0.4803, + "step": 17442 + }, + { + "epoch": 0.478940142778693, + "grad_norm": 0.39385026693344116, + "learning_rate": 1.7323772577518812e-05, + "loss": 0.5651, + "step": 17443 + }, + { + "epoch": 0.4789676002196595, + "grad_norm": 0.3838667571544647, + "learning_rate": 1.7323478494865322e-05, + "loss": 0.4886, + "step": 17444 + }, + { + "epoch": 0.47899505766062606, + "grad_norm": 0.39377760887145996, + "learning_rate": 1.732318439855121e-05, + "loss": 0.552, + "step": 17445 + }, + { + "epoch": 0.47902251510159255, + "grad_norm": 0.33924782276153564, + "learning_rate": 1.732289028857702e-05, + "loss": 0.4368, + "step": 17446 + }, + { + "epoch": 0.47904997254255904, + "grad_norm": 0.3385504186153412, + "learning_rate": 1.7322596164943303e-05, + "loss": 0.5336, + "step": 17447 + }, + { + "epoch": 0.47907742998352554, + "grad_norm": 0.39383769035339355, + "learning_rate": 1.732230202765061e-05, + "loss": 0.5068, + "step": 17448 + }, + { + "epoch": 0.47910488742449203, + "grad_norm": 0.37099388241767883, + "learning_rate": 1.7322007876699484e-05, + "loss": 0.5869, + "step": 17449 + }, + { + "epoch": 0.4791323448654585, + "grad_norm": 0.3543555438518524, + "learning_rate": 1.7321713712090474e-05, + "loss": 0.4848, + "step": 17450 + }, + { + "epoch": 0.479159802306425, + "grad_norm": 0.35517990589141846, + "learning_rate": 1.7321419533824136e-05, + "loss": 0.4658, + "step": 17451 + }, + { + "epoch": 0.47918725974739157, + "grad_norm": 0.4681800305843353, + "learning_rate": 1.7321125341901008e-05, + "loss": 0.5598, + "step": 17452 + }, + { + "epoch": 0.47921471718835806, + "grad_norm": 0.379406601190567, + "learning_rate": 1.732083113632165e-05, + "loss": 0.4897, + "step": 17453 + }, + { + "epoch": 0.47924217462932456, + "grad_norm": 0.39931923151016235, + "learning_rate": 1.73205369170866e-05, + "loss": 0.4873, + "step": 17454 + }, + { + "epoch": 0.47926963207029105, + "grad_norm": 0.4038063585758209, + "learning_rate": 1.732024268419642e-05, + "loss": 0.5414, + "step": 17455 + }, + { + "epoch": 0.47929708951125755, + "grad_norm": 0.39987099170684814, + "learning_rate": 1.7319948437651642e-05, + "loss": 0.6193, + "step": 17456 + }, + { + "epoch": 0.47932454695222404, + "grad_norm": 0.4186311364173889, + "learning_rate": 1.731965417745283e-05, + "loss": 0.467, + "step": 17457 + }, + { + "epoch": 0.47935200439319053, + "grad_norm": 0.41336333751678467, + "learning_rate": 1.7319359903600525e-05, + "loss": 0.5531, + "step": 17458 + }, + { + "epoch": 0.4793794618341571, + "grad_norm": 0.34689077734947205, + "learning_rate": 1.7319065616095278e-05, + "loss": 0.452, + "step": 17459 + }, + { + "epoch": 0.4794069192751236, + "grad_norm": 0.40383651852607727, + "learning_rate": 1.731877131493764e-05, + "loss": 0.5256, + "step": 17460 + }, + { + "epoch": 0.47943437671609007, + "grad_norm": 0.3858388364315033, + "learning_rate": 1.7318477000128153e-05, + "loss": 0.5357, + "step": 17461 + }, + { + "epoch": 0.47946183415705657, + "grad_norm": 0.35135722160339355, + "learning_rate": 1.7318182671667373e-05, + "loss": 0.5068, + "step": 17462 + }, + { + "epoch": 0.47948929159802306, + "grad_norm": 0.3773167133331299, + "learning_rate": 1.7317888329555847e-05, + "loss": 0.5784, + "step": 17463 + }, + { + "epoch": 0.47951674903898955, + "grad_norm": 0.416190505027771, + "learning_rate": 1.7317593973794124e-05, + "loss": 0.5415, + "step": 17464 + }, + { + "epoch": 0.47954420647995605, + "grad_norm": 0.3321760892868042, + "learning_rate": 1.731729960438275e-05, + "loss": 0.4449, + "step": 17465 + }, + { + "epoch": 0.4795716639209226, + "grad_norm": 0.4396505653858185, + "learning_rate": 1.731700522132228e-05, + "loss": 0.5076, + "step": 17466 + }, + { + "epoch": 0.4795991213618891, + "grad_norm": 0.4887462556362152, + "learning_rate": 1.731671082461326e-05, + "loss": 0.4939, + "step": 17467 + }, + { + "epoch": 0.4796265788028556, + "grad_norm": 0.5533541440963745, + "learning_rate": 1.7316416414256238e-05, + "loss": 0.5278, + "step": 17468 + }, + { + "epoch": 0.4796540362438221, + "grad_norm": 0.376334547996521, + "learning_rate": 1.731612199025176e-05, + "loss": 0.5495, + "step": 17469 + }, + { + "epoch": 0.4796814936847886, + "grad_norm": 0.3600611090660095, + "learning_rate": 1.7315827552600387e-05, + "loss": 0.4328, + "step": 17470 + }, + { + "epoch": 0.47970895112575507, + "grad_norm": 0.4096277356147766, + "learning_rate": 1.7315533101302654e-05, + "loss": 0.5295, + "step": 17471 + }, + { + "epoch": 0.47973640856672156, + "grad_norm": 0.37855756282806396, + "learning_rate": 1.731523863635912e-05, + "loss": 0.526, + "step": 17472 + }, + { + "epoch": 0.4797638660076881, + "grad_norm": 0.33261558413505554, + "learning_rate": 1.731494415777033e-05, + "loss": 0.4864, + "step": 17473 + }, + { + "epoch": 0.4797913234486546, + "grad_norm": 0.452090322971344, + "learning_rate": 1.731464966553684e-05, + "loss": 0.56, + "step": 17474 + }, + { + "epoch": 0.4798187808896211, + "grad_norm": 0.39700597524642944, + "learning_rate": 1.731435515965919e-05, + "loss": 0.5507, + "step": 17475 + }, + { + "epoch": 0.4798462383305876, + "grad_norm": 0.440921813249588, + "learning_rate": 1.7314060640137928e-05, + "loss": 0.6252, + "step": 17476 + }, + { + "epoch": 0.4798736957715541, + "grad_norm": 0.3513808250427246, + "learning_rate": 1.7313766106973614e-05, + "loss": 0.488, + "step": 17477 + }, + { + "epoch": 0.4799011532125206, + "grad_norm": 0.3945593535900116, + "learning_rate": 1.731347156016679e-05, + "loss": 0.5702, + "step": 17478 + }, + { + "epoch": 0.4799286106534871, + "grad_norm": 0.3711896240711212, + "learning_rate": 1.731317699971801e-05, + "loss": 0.4465, + "step": 17479 + }, + { + "epoch": 0.47995606809445357, + "grad_norm": 1.2150377035140991, + "learning_rate": 1.731288242562782e-05, + "loss": 0.3888, + "step": 17480 + }, + { + "epoch": 0.4799835255354201, + "grad_norm": 0.3237757682800293, + "learning_rate": 1.7312587837896767e-05, + "loss": 0.5429, + "step": 17481 + }, + { + "epoch": 0.4800109829763866, + "grad_norm": 0.3663346767425537, + "learning_rate": 1.7312293236525407e-05, + "loss": 0.5199, + "step": 17482 + }, + { + "epoch": 0.4800384404173531, + "grad_norm": 0.4141944646835327, + "learning_rate": 1.731199862151429e-05, + "loss": 0.502, + "step": 17483 + }, + { + "epoch": 0.4800658978583196, + "grad_norm": 0.34636250138282776, + "learning_rate": 1.7311703992863954e-05, + "loss": 0.4012, + "step": 17484 + }, + { + "epoch": 0.4800933552992861, + "grad_norm": 0.378449022769928, + "learning_rate": 1.731140935057496e-05, + "loss": 0.5304, + "step": 17485 + }, + { + "epoch": 0.4801208127402526, + "grad_norm": 0.35368916392326355, + "learning_rate": 1.7311114694647853e-05, + "loss": 0.4437, + "step": 17486 + }, + { + "epoch": 0.4801482701812191, + "grad_norm": 0.3583977222442627, + "learning_rate": 1.7310820025083185e-05, + "loss": 0.5821, + "step": 17487 + }, + { + "epoch": 0.48017572762218563, + "grad_norm": 0.3363000452518463, + "learning_rate": 1.7310525341881503e-05, + "loss": 0.4685, + "step": 17488 + }, + { + "epoch": 0.4802031850631521, + "grad_norm": 0.3884904086589813, + "learning_rate": 1.731023064504336e-05, + "loss": 0.4743, + "step": 17489 + }, + { + "epoch": 0.4802306425041186, + "grad_norm": 0.40167930722236633, + "learning_rate": 1.73099359345693e-05, + "loss": 0.5354, + "step": 17490 + }, + { + "epoch": 0.4802580999450851, + "grad_norm": 0.3394714593887329, + "learning_rate": 1.730964121045988e-05, + "loss": 0.4821, + "step": 17491 + }, + { + "epoch": 0.4802855573860516, + "grad_norm": 0.4138287603855133, + "learning_rate": 1.7309346472715647e-05, + "loss": 0.5194, + "step": 17492 + }, + { + "epoch": 0.4803130148270181, + "grad_norm": 0.37954211235046387, + "learning_rate": 1.730905172133715e-05, + "loss": 0.4895, + "step": 17493 + }, + { + "epoch": 0.4803404722679846, + "grad_norm": 0.4558545649051666, + "learning_rate": 1.7308756956324933e-05, + "loss": 0.5496, + "step": 17494 + }, + { + "epoch": 0.48036792970895115, + "grad_norm": 0.35365408658981323, + "learning_rate": 1.730846217767956e-05, + "loss": 0.461, + "step": 17495 + }, + { + "epoch": 0.48039538714991764, + "grad_norm": 0.38356998562812805, + "learning_rate": 1.7308167385401568e-05, + "loss": 0.5622, + "step": 17496 + }, + { + "epoch": 0.48042284459088413, + "grad_norm": 0.4530021846294403, + "learning_rate": 1.730787257949151e-05, + "loss": 0.5308, + "step": 17497 + }, + { + "epoch": 0.48045030203185063, + "grad_norm": 0.38855478167533875, + "learning_rate": 1.7307577759949938e-05, + "loss": 0.5576, + "step": 17498 + }, + { + "epoch": 0.4804777594728171, + "grad_norm": 0.3981868624687195, + "learning_rate": 1.73072829267774e-05, + "loss": 0.5289, + "step": 17499 + }, + { + "epoch": 0.4805052169137836, + "grad_norm": 0.3578651547431946, + "learning_rate": 1.7306988079974448e-05, + "loss": 0.5111, + "step": 17500 + }, + { + "epoch": 0.4805326743547501, + "grad_norm": 0.7941781282424927, + "learning_rate": 1.7306693219541632e-05, + "loss": 0.5024, + "step": 17501 + }, + { + "epoch": 0.48056013179571666, + "grad_norm": 0.3841637670993805, + "learning_rate": 1.7306398345479503e-05, + "loss": 0.617, + "step": 17502 + }, + { + "epoch": 0.48058758923668315, + "grad_norm": 0.37433546781539917, + "learning_rate": 1.7306103457788608e-05, + "loss": 0.5372, + "step": 17503 + }, + { + "epoch": 0.48061504667764965, + "grad_norm": 0.3689453601837158, + "learning_rate": 1.7305808556469497e-05, + "loss": 0.5285, + "step": 17504 + }, + { + "epoch": 0.48064250411861614, + "grad_norm": 0.3881574869155884, + "learning_rate": 1.730551364152272e-05, + "loss": 0.5139, + "step": 17505 + }, + { + "epoch": 0.48066996155958264, + "grad_norm": 0.349735289812088, + "learning_rate": 1.7305218712948832e-05, + "loss": 0.4557, + "step": 17506 + }, + { + "epoch": 0.48069741900054913, + "grad_norm": 0.40166714787483215, + "learning_rate": 1.7304923770748378e-05, + "loss": 0.6314, + "step": 17507 + }, + { + "epoch": 0.4807248764415156, + "grad_norm": 0.3618466258049011, + "learning_rate": 1.7304628814921908e-05, + "loss": 0.5358, + "step": 17508 + }, + { + "epoch": 0.4807523338824822, + "grad_norm": 0.36432695388793945, + "learning_rate": 1.7304333845469975e-05, + "loss": 0.4864, + "step": 17509 + }, + { + "epoch": 0.48077979132344867, + "grad_norm": 0.34232115745544434, + "learning_rate": 1.7304038862393127e-05, + "loss": 0.4741, + "step": 17510 + }, + { + "epoch": 0.48080724876441516, + "grad_norm": 0.4414617717266083, + "learning_rate": 1.7303743865691917e-05, + "loss": 0.5352, + "step": 17511 + }, + { + "epoch": 0.48083470620538166, + "grad_norm": 0.41822654008865356, + "learning_rate": 1.7303448855366897e-05, + "loss": 0.4749, + "step": 17512 + }, + { + "epoch": 0.48086216364634815, + "grad_norm": 0.32531991600990295, + "learning_rate": 1.7303153831418605e-05, + "loss": 0.5071, + "step": 17513 + }, + { + "epoch": 0.48088962108731464, + "grad_norm": 0.3722848892211914, + "learning_rate": 1.7302858793847608e-05, + "loss": 0.4415, + "step": 17514 + }, + { + "epoch": 0.48091707852828114, + "grad_norm": 0.3323245048522949, + "learning_rate": 1.7302563742654444e-05, + "loss": 0.4227, + "step": 17515 + }, + { + "epoch": 0.4809445359692477, + "grad_norm": 0.36743345856666565, + "learning_rate": 1.7302268677839672e-05, + "loss": 0.4705, + "step": 17516 + }, + { + "epoch": 0.4809719934102142, + "grad_norm": 0.3477756083011627, + "learning_rate": 1.7301973599403835e-05, + "loss": 0.4857, + "step": 17517 + }, + { + "epoch": 0.4809994508511807, + "grad_norm": 0.42857837677001953, + "learning_rate": 1.7301678507347488e-05, + "loss": 0.4849, + "step": 17518 + }, + { + "epoch": 0.48102690829214717, + "grad_norm": 0.37254664301872253, + "learning_rate": 1.730138340167118e-05, + "loss": 0.5707, + "step": 17519 + }, + { + "epoch": 0.48105436573311366, + "grad_norm": 0.3649071753025055, + "learning_rate": 1.730108828237546e-05, + "loss": 0.4952, + "step": 17520 + }, + { + "epoch": 0.48108182317408016, + "grad_norm": 0.40212082862854004, + "learning_rate": 1.7300793149460884e-05, + "loss": 0.5023, + "step": 17521 + }, + { + "epoch": 0.48110928061504665, + "grad_norm": 0.39454036951065063, + "learning_rate": 1.7300498002927996e-05, + "loss": 0.5574, + "step": 17522 + }, + { + "epoch": 0.4811367380560132, + "grad_norm": 0.37781229615211487, + "learning_rate": 1.7300202842777348e-05, + "loss": 0.5258, + "step": 17523 + }, + { + "epoch": 0.4811641954969797, + "grad_norm": 0.3983345031738281, + "learning_rate": 1.7299907669009494e-05, + "loss": 0.4984, + "step": 17524 + }, + { + "epoch": 0.4811916529379462, + "grad_norm": 8.726655960083008, + "learning_rate": 1.7299612481624983e-05, + "loss": 0.5938, + "step": 17525 + }, + { + "epoch": 0.4812191103789127, + "grad_norm": 0.36584287881851196, + "learning_rate": 1.7299317280624362e-05, + "loss": 0.5196, + "step": 17526 + }, + { + "epoch": 0.4812465678198792, + "grad_norm": 0.36755555868148804, + "learning_rate": 1.729902206600819e-05, + "loss": 0.5884, + "step": 17527 + }, + { + "epoch": 0.4812740252608457, + "grad_norm": 0.45232316851615906, + "learning_rate": 1.7298726837777007e-05, + "loss": 0.5614, + "step": 17528 + }, + { + "epoch": 0.48130148270181217, + "grad_norm": 0.40691834688186646, + "learning_rate": 1.729843159593137e-05, + "loss": 0.5021, + "step": 17529 + }, + { + "epoch": 0.4813289401427787, + "grad_norm": 0.395847886800766, + "learning_rate": 1.7298136340471827e-05, + "loss": 0.5716, + "step": 17530 + }, + { + "epoch": 0.4813563975837452, + "grad_norm": 0.37636908888816833, + "learning_rate": 1.7297841071398936e-05, + "loss": 0.5358, + "step": 17531 + }, + { + "epoch": 0.4813838550247117, + "grad_norm": 0.378365159034729, + "learning_rate": 1.7297545788713236e-05, + "loss": 0.5223, + "step": 17532 + }, + { + "epoch": 0.4814113124656782, + "grad_norm": 0.32043442130088806, + "learning_rate": 1.7297250492415287e-05, + "loss": 0.4096, + "step": 17533 + }, + { + "epoch": 0.4814387699066447, + "grad_norm": 0.48982542753219604, + "learning_rate": 1.7296955182505636e-05, + "loss": 0.5413, + "step": 17534 + }, + { + "epoch": 0.4814662273476112, + "grad_norm": 0.42933884263038635, + "learning_rate": 1.7296659858984837e-05, + "loss": 0.5423, + "step": 17535 + }, + { + "epoch": 0.4814936847885777, + "grad_norm": 0.3843173086643219, + "learning_rate": 1.729636452185344e-05, + "loss": 0.495, + "step": 17536 + }, + { + "epoch": 0.48152114222954423, + "grad_norm": 0.37789106369018555, + "learning_rate": 1.7296069171111986e-05, + "loss": 0.4728, + "step": 17537 + }, + { + "epoch": 0.4815485996705107, + "grad_norm": 0.34102383255958557, + "learning_rate": 1.729577380676104e-05, + "loss": 0.4965, + "step": 17538 + }, + { + "epoch": 0.4815760571114772, + "grad_norm": 0.3841340243816376, + "learning_rate": 1.7295478428801147e-05, + "loss": 0.5327, + "step": 17539 + }, + { + "epoch": 0.4816035145524437, + "grad_norm": 0.39339905977249146, + "learning_rate": 1.7295183037232858e-05, + "loss": 0.5008, + "step": 17540 + }, + { + "epoch": 0.4816309719934102, + "grad_norm": 0.37303659319877625, + "learning_rate": 1.7294887632056724e-05, + "loss": 0.4647, + "step": 17541 + }, + { + "epoch": 0.4816584294343767, + "grad_norm": 0.38660871982574463, + "learning_rate": 1.7294592213273298e-05, + "loss": 0.5364, + "step": 17542 + }, + { + "epoch": 0.4816858868753432, + "grad_norm": 0.34620922803878784, + "learning_rate": 1.7294296780883127e-05, + "loss": 0.5384, + "step": 17543 + }, + { + "epoch": 0.48171334431630974, + "grad_norm": 0.35966548323631287, + "learning_rate": 1.7294001334886768e-05, + "loss": 0.5636, + "step": 17544 + }, + { + "epoch": 0.48174080175727624, + "grad_norm": 0.36454638838768005, + "learning_rate": 1.7293705875284766e-05, + "loss": 0.4932, + "step": 17545 + }, + { + "epoch": 0.48176825919824273, + "grad_norm": 0.33886945247650146, + "learning_rate": 1.7293410402077673e-05, + "loss": 0.5102, + "step": 17546 + }, + { + "epoch": 0.4817957166392092, + "grad_norm": 0.3629171848297119, + "learning_rate": 1.7293114915266046e-05, + "loss": 0.4913, + "step": 17547 + }, + { + "epoch": 0.4818231740801757, + "grad_norm": 0.35926923155784607, + "learning_rate": 1.729281941485043e-05, + "loss": 0.4538, + "step": 17548 + }, + { + "epoch": 0.4818506315211422, + "grad_norm": 0.4834381937980652, + "learning_rate": 1.7292523900831375e-05, + "loss": 0.5516, + "step": 17549 + }, + { + "epoch": 0.4818780889621087, + "grad_norm": 0.3769710063934326, + "learning_rate": 1.729222837320944e-05, + "loss": 0.5632, + "step": 17550 + }, + { + "epoch": 0.48190554640307526, + "grad_norm": 0.37136945128440857, + "learning_rate": 1.7291932831985167e-05, + "loss": 0.5031, + "step": 17551 + }, + { + "epoch": 0.48193300384404175, + "grad_norm": 0.4483974575996399, + "learning_rate": 1.7291637277159114e-05, + "loss": 0.518, + "step": 17552 + }, + { + "epoch": 0.48196046128500825, + "grad_norm": 0.3857955038547516, + "learning_rate": 1.7291341708731834e-05, + "loss": 0.4514, + "step": 17553 + }, + { + "epoch": 0.48198791872597474, + "grad_norm": 0.4026235044002533, + "learning_rate": 1.729104612670387e-05, + "loss": 0.4607, + "step": 17554 + }, + { + "epoch": 0.48201537616694123, + "grad_norm": 0.3762950599193573, + "learning_rate": 1.729075053107578e-05, + "loss": 0.5142, + "step": 17555 + }, + { + "epoch": 0.48204283360790773, + "grad_norm": 0.3551711142063141, + "learning_rate": 1.7290454921848116e-05, + "loss": 0.5151, + "step": 17556 + }, + { + "epoch": 0.4820702910488742, + "grad_norm": 0.47367215156555176, + "learning_rate": 1.729015929902142e-05, + "loss": 0.5051, + "step": 17557 + }, + { + "epoch": 0.48209774848984077, + "grad_norm": 0.36164391040802, + "learning_rate": 1.7289863662596254e-05, + "loss": 0.5294, + "step": 17558 + }, + { + "epoch": 0.48212520593080727, + "grad_norm": 0.3566454350948334, + "learning_rate": 1.7289568012573164e-05, + "loss": 0.4389, + "step": 17559 + }, + { + "epoch": 0.48215266337177376, + "grad_norm": 0.39334964752197266, + "learning_rate": 1.7289272348952706e-05, + "loss": 0.4705, + "step": 17560 + }, + { + "epoch": 0.48218012081274025, + "grad_norm": 0.39020371437072754, + "learning_rate": 1.7288976671735428e-05, + "loss": 0.4926, + "step": 17561 + }, + { + "epoch": 0.48220757825370675, + "grad_norm": 0.685217022895813, + "learning_rate": 1.728868098092188e-05, + "loss": 0.486, + "step": 17562 + }, + { + "epoch": 0.48223503569467324, + "grad_norm": 0.3806471526622772, + "learning_rate": 1.728838527651262e-05, + "loss": 0.5608, + "step": 17563 + }, + { + "epoch": 0.48226249313563974, + "grad_norm": 0.4207320213317871, + "learning_rate": 1.728808955850819e-05, + "loss": 0.5379, + "step": 17564 + }, + { + "epoch": 0.4822899505766063, + "grad_norm": 0.49942415952682495, + "learning_rate": 1.728779382690915e-05, + "loss": 0.5297, + "step": 17565 + }, + { + "epoch": 0.4823174080175728, + "grad_norm": 0.35278117656707764, + "learning_rate": 1.7287498081716048e-05, + "loss": 0.4864, + "step": 17566 + }, + { + "epoch": 0.4823448654585393, + "grad_norm": 0.3763061761856079, + "learning_rate": 1.7287202322929436e-05, + "loss": 0.4749, + "step": 17567 + }, + { + "epoch": 0.48237232289950577, + "grad_norm": 0.4010665714740753, + "learning_rate": 1.7286906550549866e-05, + "loss": 0.5812, + "step": 17568 + }, + { + "epoch": 0.48239978034047226, + "grad_norm": 0.38473519682884216, + "learning_rate": 1.7286610764577887e-05, + "loss": 0.5492, + "step": 17569 + }, + { + "epoch": 0.48242723778143876, + "grad_norm": 0.34626853466033936, + "learning_rate": 1.7286314965014055e-05, + "loss": 0.4982, + "step": 17570 + }, + { + "epoch": 0.48245469522240525, + "grad_norm": 0.34542596340179443, + "learning_rate": 1.7286019151858922e-05, + "loss": 0.4617, + "step": 17571 + }, + { + "epoch": 0.4824821526633718, + "grad_norm": 1.4327795505523682, + "learning_rate": 1.7285723325113037e-05, + "loss": 0.6346, + "step": 17572 + }, + { + "epoch": 0.4825096101043383, + "grad_norm": 0.3287796974182129, + "learning_rate": 1.7285427484776952e-05, + "loss": 0.4203, + "step": 17573 + }, + { + "epoch": 0.4825370675453048, + "grad_norm": 0.3998962640762329, + "learning_rate": 1.728513163085122e-05, + "loss": 0.5498, + "step": 17574 + }, + { + "epoch": 0.4825645249862713, + "grad_norm": 0.3500562906265259, + "learning_rate": 1.7284835763336392e-05, + "loss": 0.5026, + "step": 17575 + }, + { + "epoch": 0.4825919824272378, + "grad_norm": 0.366131991147995, + "learning_rate": 1.728453988223302e-05, + "loss": 0.4747, + "step": 17576 + }, + { + "epoch": 0.48261943986820427, + "grad_norm": 0.39842483401298523, + "learning_rate": 1.7284243987541656e-05, + "loss": 0.5342, + "step": 17577 + }, + { + "epoch": 0.48264689730917076, + "grad_norm": 0.43158096075057983, + "learning_rate": 1.7283948079262853e-05, + "loss": 0.4946, + "step": 17578 + }, + { + "epoch": 0.4826743547501373, + "grad_norm": 0.3594059348106384, + "learning_rate": 1.728365215739716e-05, + "loss": 0.5607, + "step": 17579 + }, + { + "epoch": 0.4827018121911038, + "grad_norm": 0.3587777614593506, + "learning_rate": 1.7283356221945134e-05, + "loss": 0.546, + "step": 17580 + }, + { + "epoch": 0.4827292696320703, + "grad_norm": 0.3895912766456604, + "learning_rate": 1.7283060272907322e-05, + "loss": 0.5415, + "step": 17581 + }, + { + "epoch": 0.4827567270730368, + "grad_norm": 0.39170390367507935, + "learning_rate": 1.728276431028428e-05, + "loss": 0.5401, + "step": 17582 + }, + { + "epoch": 0.4827841845140033, + "grad_norm": 0.3051326274871826, + "learning_rate": 1.7282468334076556e-05, + "loss": 0.4546, + "step": 17583 + }, + { + "epoch": 0.4828116419549698, + "grad_norm": 0.32393914461135864, + "learning_rate": 1.7282172344284706e-05, + "loss": 0.5166, + "step": 17584 + }, + { + "epoch": 0.4828390993959363, + "grad_norm": 0.38260024785995483, + "learning_rate": 1.728187634090928e-05, + "loss": 0.4789, + "step": 17585 + }, + { + "epoch": 0.4828665568369028, + "grad_norm": 0.3701643645763397, + "learning_rate": 1.728158032395083e-05, + "loss": 0.529, + "step": 17586 + }, + { + "epoch": 0.4828940142778693, + "grad_norm": 0.4043022692203522, + "learning_rate": 1.728128429340991e-05, + "loss": 0.5084, + "step": 17587 + }, + { + "epoch": 0.4829214717188358, + "grad_norm": 0.39740023016929626, + "learning_rate": 1.7280988249287068e-05, + "loss": 0.5231, + "step": 17588 + }, + { + "epoch": 0.4829489291598023, + "grad_norm": 0.3731479346752167, + "learning_rate": 1.728069219158286e-05, + "loss": 0.5639, + "step": 17589 + }, + { + "epoch": 0.4829763866007688, + "grad_norm": 0.31423524022102356, + "learning_rate": 1.728039612029784e-05, + "loss": 0.3872, + "step": 17590 + }, + { + "epoch": 0.4830038440417353, + "grad_norm": 0.35348615050315857, + "learning_rate": 1.7280100035432554e-05, + "loss": 0.4287, + "step": 17591 + }, + { + "epoch": 0.4830313014827018, + "grad_norm": 0.3880104422569275, + "learning_rate": 1.727980393698756e-05, + "loss": 0.5239, + "step": 17592 + }, + { + "epoch": 0.48305875892366834, + "grad_norm": 0.3838599622249603, + "learning_rate": 1.7279507824963408e-05, + "loss": 0.4896, + "step": 17593 + }, + { + "epoch": 0.48308621636463484, + "grad_norm": 0.3518800437450409, + "learning_rate": 1.727921169936065e-05, + "loss": 0.4628, + "step": 17594 + }, + { + "epoch": 0.48311367380560133, + "grad_norm": 0.3722374141216278, + "learning_rate": 1.7278915560179838e-05, + "loss": 0.5902, + "step": 17595 + }, + { + "epoch": 0.4831411312465678, + "grad_norm": 0.41177743673324585, + "learning_rate": 1.7278619407421526e-05, + "loss": 0.5467, + "step": 17596 + }, + { + "epoch": 0.4831685886875343, + "grad_norm": 0.4365810751914978, + "learning_rate": 1.7278323241086268e-05, + "loss": 0.5168, + "step": 17597 + }, + { + "epoch": 0.4831960461285008, + "grad_norm": 0.363593727350235, + "learning_rate": 1.727802706117461e-05, + "loss": 0.469, + "step": 17598 + }, + { + "epoch": 0.4832235035694673, + "grad_norm": 0.5995293259620667, + "learning_rate": 1.7277730867687115e-05, + "loss": 0.4623, + "step": 17599 + }, + { + "epoch": 0.48325096101043385, + "grad_norm": 0.35891056060791016, + "learning_rate": 1.7277434660624324e-05, + "loss": 0.5226, + "step": 17600 + }, + { + "epoch": 0.48327841845140035, + "grad_norm": 0.3373907804489136, + "learning_rate": 1.7277138439986797e-05, + "loss": 0.4773, + "step": 17601 + }, + { + "epoch": 0.48330587589236684, + "grad_norm": 0.38734203577041626, + "learning_rate": 1.7276842205775083e-05, + "loss": 0.4817, + "step": 17602 + }, + { + "epoch": 0.48333333333333334, + "grad_norm": 0.369619220495224, + "learning_rate": 1.7276545957989736e-05, + "loss": 0.504, + "step": 17603 + }, + { + "epoch": 0.48336079077429983, + "grad_norm": 0.4390208423137665, + "learning_rate": 1.727624969663131e-05, + "loss": 0.4843, + "step": 17604 + }, + { + "epoch": 0.4833882482152663, + "grad_norm": 0.35928797721862793, + "learning_rate": 1.727595342170035e-05, + "loss": 0.5454, + "step": 17605 + }, + { + "epoch": 0.4834157056562328, + "grad_norm": 0.4214595854282379, + "learning_rate": 1.7275657133197422e-05, + "loss": 0.4239, + "step": 17606 + }, + { + "epoch": 0.48344316309719937, + "grad_norm": 0.33992287516593933, + "learning_rate": 1.7275360831123068e-05, + "loss": 0.4196, + "step": 17607 + }, + { + "epoch": 0.48347062053816586, + "grad_norm": 0.399882972240448, + "learning_rate": 1.7275064515477843e-05, + "loss": 0.4567, + "step": 17608 + }, + { + "epoch": 0.48349807797913236, + "grad_norm": 0.38239866495132446, + "learning_rate": 1.7274768186262303e-05, + "loss": 0.5698, + "step": 17609 + }, + { + "epoch": 0.48352553542009885, + "grad_norm": 0.3959523141384125, + "learning_rate": 1.7274471843476997e-05, + "loss": 0.5248, + "step": 17610 + }, + { + "epoch": 0.48355299286106534, + "grad_norm": 0.7146145105361938, + "learning_rate": 1.727417548712248e-05, + "loss": 0.5416, + "step": 17611 + }, + { + "epoch": 0.48358045030203184, + "grad_norm": 0.3821927309036255, + "learning_rate": 1.7273879117199307e-05, + "loss": 0.442, + "step": 17612 + }, + { + "epoch": 0.48360790774299833, + "grad_norm": 0.30212587118148804, + "learning_rate": 1.7273582733708024e-05, + "loss": 0.4278, + "step": 17613 + }, + { + "epoch": 0.4836353651839648, + "grad_norm": 0.4030287265777588, + "learning_rate": 1.7273286336649187e-05, + "loss": 0.4813, + "step": 17614 + }, + { + "epoch": 0.4836628226249314, + "grad_norm": 0.35988548398017883, + "learning_rate": 1.7272989926023352e-05, + "loss": 0.5829, + "step": 17615 + }, + { + "epoch": 0.48369028006589787, + "grad_norm": 0.3900493085384369, + "learning_rate": 1.727269350183107e-05, + "loss": 0.5691, + "step": 17616 + }, + { + "epoch": 0.48371773750686436, + "grad_norm": 0.4267294108867645, + "learning_rate": 1.7272397064072892e-05, + "loss": 0.5188, + "step": 17617 + }, + { + "epoch": 0.48374519494783086, + "grad_norm": 0.369585782289505, + "learning_rate": 1.727210061274937e-05, + "loss": 0.484, + "step": 17618 + }, + { + "epoch": 0.48377265238879735, + "grad_norm": 0.36233434081077576, + "learning_rate": 1.7271804147861063e-05, + "loss": 0.5087, + "step": 17619 + }, + { + "epoch": 0.48380010982976385, + "grad_norm": 0.3509266972541809, + "learning_rate": 1.727150766940852e-05, + "loss": 0.4535, + "step": 17620 + }, + { + "epoch": 0.48382756727073034, + "grad_norm": 0.38026779890060425, + "learning_rate": 1.7271211177392296e-05, + "loss": 0.4999, + "step": 17621 + }, + { + "epoch": 0.4838550247116969, + "grad_norm": 0.36419108510017395, + "learning_rate": 1.7270914671812936e-05, + "loss": 0.5126, + "step": 17622 + }, + { + "epoch": 0.4838824821526634, + "grad_norm": 0.3929356634616852, + "learning_rate": 1.7270618152671007e-05, + "loss": 0.4982, + "step": 17623 + }, + { + "epoch": 0.4839099395936299, + "grad_norm": 0.3975479304790497, + "learning_rate": 1.727032161996705e-05, + "loss": 0.5078, + "step": 17624 + }, + { + "epoch": 0.4839373970345964, + "grad_norm": 0.3420872688293457, + "learning_rate": 1.7270025073701625e-05, + "loss": 0.4021, + "step": 17625 + }, + { + "epoch": 0.48396485447556287, + "grad_norm": 0.37143760919570923, + "learning_rate": 1.726972851387528e-05, + "loss": 0.4966, + "step": 17626 + }, + { + "epoch": 0.48399231191652936, + "grad_norm": 0.38254067301750183, + "learning_rate": 1.7269431940488575e-05, + "loss": 0.5179, + "step": 17627 + }, + { + "epoch": 0.48401976935749585, + "grad_norm": 0.461773157119751, + "learning_rate": 1.7269135353542058e-05, + "loss": 0.4837, + "step": 17628 + }, + { + "epoch": 0.4840472267984624, + "grad_norm": 0.3826406002044678, + "learning_rate": 1.7268838753036282e-05, + "loss": 0.628, + "step": 17629 + }, + { + "epoch": 0.4840746842394289, + "grad_norm": 0.3479669690132141, + "learning_rate": 1.7268542138971802e-05, + "loss": 0.4842, + "step": 17630 + }, + { + "epoch": 0.4841021416803954, + "grad_norm": 0.4209999740123749, + "learning_rate": 1.726824551134917e-05, + "loss": 0.4249, + "step": 17631 + }, + { + "epoch": 0.4841295991213619, + "grad_norm": 0.41264402866363525, + "learning_rate": 1.7267948870168943e-05, + "loss": 0.5495, + "step": 17632 + }, + { + "epoch": 0.4841570565623284, + "grad_norm": 0.5419906973838806, + "learning_rate": 1.7267652215431672e-05, + "loss": 0.6256, + "step": 17633 + }, + { + "epoch": 0.4841845140032949, + "grad_norm": 0.4007457196712494, + "learning_rate": 1.726735554713791e-05, + "loss": 0.5186, + "step": 17634 + }, + { + "epoch": 0.48421197144426137, + "grad_norm": 0.4351632297039032, + "learning_rate": 1.726705886528821e-05, + "loss": 0.5843, + "step": 17635 + }, + { + "epoch": 0.4842394288852279, + "grad_norm": 0.3462987542152405, + "learning_rate": 1.7266762169883125e-05, + "loss": 0.5435, + "step": 17636 + }, + { + "epoch": 0.4842668863261944, + "grad_norm": 0.3828832507133484, + "learning_rate": 1.726646546092321e-05, + "loss": 0.5138, + "step": 17637 + }, + { + "epoch": 0.4842943437671609, + "grad_norm": 0.36050868034362793, + "learning_rate": 1.7266168738409015e-05, + "loss": 0.4904, + "step": 17638 + }, + { + "epoch": 0.4843218012081274, + "grad_norm": 0.3506377041339874, + "learning_rate": 1.72658720023411e-05, + "loss": 0.6007, + "step": 17639 + }, + { + "epoch": 0.4843492586490939, + "grad_norm": 0.3758707642555237, + "learning_rate": 1.7265575252720013e-05, + "loss": 0.4562, + "step": 17640 + }, + { + "epoch": 0.4843767160900604, + "grad_norm": 0.3868147134780884, + "learning_rate": 1.726527848954631e-05, + "loss": 0.5278, + "step": 17641 + }, + { + "epoch": 0.4844041735310269, + "grad_norm": 0.38815340399742126, + "learning_rate": 1.7264981712820542e-05, + "loss": 0.4971, + "step": 17642 + }, + { + "epoch": 0.48443163097199343, + "grad_norm": 0.39664438366889954, + "learning_rate": 1.7264684922543265e-05, + "loss": 0.5281, + "step": 17643 + }, + { + "epoch": 0.4844590884129599, + "grad_norm": 0.35527917742729187, + "learning_rate": 1.7264388118715033e-05, + "loss": 0.4465, + "step": 17644 + }, + { + "epoch": 0.4844865458539264, + "grad_norm": 0.35456177592277527, + "learning_rate": 1.7264091301336398e-05, + "loss": 0.5876, + "step": 17645 + }, + { + "epoch": 0.4845140032948929, + "grad_norm": 0.417082816362381, + "learning_rate": 1.7263794470407915e-05, + "loss": 0.5318, + "step": 17646 + }, + { + "epoch": 0.4845414607358594, + "grad_norm": 0.36527732014656067, + "learning_rate": 1.7263497625930138e-05, + "loss": 0.4935, + "step": 17647 + }, + { + "epoch": 0.4845689181768259, + "grad_norm": 0.33819037675857544, + "learning_rate": 1.7263200767903615e-05, + "loss": 0.447, + "step": 17648 + }, + { + "epoch": 0.4845963756177924, + "grad_norm": 0.3693745732307434, + "learning_rate": 1.726290389632891e-05, + "loss": 0.5062, + "step": 17649 + }, + { + "epoch": 0.48462383305875895, + "grad_norm": 0.3844183087348938, + "learning_rate": 1.726260701120657e-05, + "loss": 0.5564, + "step": 17650 + }, + { + "epoch": 0.48465129049972544, + "grad_norm": 0.505126416683197, + "learning_rate": 1.726231011253715e-05, + "loss": 0.4717, + "step": 17651 + }, + { + "epoch": 0.48467874794069193, + "grad_norm": 0.4464876353740692, + "learning_rate": 1.7262013200321202e-05, + "loss": 0.4816, + "step": 17652 + }, + { + "epoch": 0.48470620538165843, + "grad_norm": 0.3749581277370453, + "learning_rate": 1.726171627455928e-05, + "loss": 0.4694, + "step": 17653 + }, + { + "epoch": 0.4847336628226249, + "grad_norm": 0.3258917033672333, + "learning_rate": 1.726141933525194e-05, + "loss": 0.4424, + "step": 17654 + }, + { + "epoch": 0.4847611202635914, + "grad_norm": 0.38427838683128357, + "learning_rate": 1.7261122382399736e-05, + "loss": 0.5642, + "step": 17655 + }, + { + "epoch": 0.4847885777045579, + "grad_norm": 0.39062902331352234, + "learning_rate": 1.7260825416003225e-05, + "loss": 0.5155, + "step": 17656 + }, + { + "epoch": 0.48481603514552446, + "grad_norm": 0.3522874414920807, + "learning_rate": 1.7260528436062954e-05, + "loss": 0.4645, + "step": 17657 + }, + { + "epoch": 0.48484349258649095, + "grad_norm": 0.3333744704723358, + "learning_rate": 1.726023144257948e-05, + "loss": 0.4059, + "step": 17658 + }, + { + "epoch": 0.48487095002745745, + "grad_norm": 0.35040608048439026, + "learning_rate": 1.725993443555336e-05, + "loss": 0.4422, + "step": 17659 + }, + { + "epoch": 0.48489840746842394, + "grad_norm": 0.38689297437667847, + "learning_rate": 1.7259637414985142e-05, + "loss": 0.4881, + "step": 17660 + }, + { + "epoch": 0.48492586490939044, + "grad_norm": 0.28546157479286194, + "learning_rate": 1.7259340380875384e-05, + "loss": 0.4163, + "step": 17661 + }, + { + "epoch": 0.48495332235035693, + "grad_norm": 0.37033697962760925, + "learning_rate": 1.7259043333224638e-05, + "loss": 0.4612, + "step": 17662 + }, + { + "epoch": 0.4849807797913234, + "grad_norm": 0.36841344833374023, + "learning_rate": 1.725874627203346e-05, + "loss": 0.4873, + "step": 17663 + }, + { + "epoch": 0.48500823723229, + "grad_norm": 0.3662201762199402, + "learning_rate": 1.7258449197302408e-05, + "loss": 0.4551, + "step": 17664 + }, + { + "epoch": 0.48503569467325647, + "grad_norm": 0.3655455708503723, + "learning_rate": 1.7258152109032028e-05, + "loss": 0.4261, + "step": 17665 + }, + { + "epoch": 0.48506315211422296, + "grad_norm": 0.3785841166973114, + "learning_rate": 1.7257855007222877e-05, + "loss": 0.4984, + "step": 17666 + }, + { + "epoch": 0.48509060955518946, + "grad_norm": 0.38041505217552185, + "learning_rate": 1.725755789187551e-05, + "loss": 0.5377, + "step": 17667 + }, + { + "epoch": 0.48511806699615595, + "grad_norm": 0.46540001034736633, + "learning_rate": 1.7257260762990483e-05, + "loss": 0.4513, + "step": 17668 + }, + { + "epoch": 0.48514552443712244, + "grad_norm": 0.4044991433620453, + "learning_rate": 1.7256963620568345e-05, + "loss": 0.5229, + "step": 17669 + }, + { + "epoch": 0.48517298187808894, + "grad_norm": 0.361749529838562, + "learning_rate": 1.7256666464609658e-05, + "loss": 0.5523, + "step": 17670 + }, + { + "epoch": 0.4852004393190555, + "grad_norm": 0.35886600613594055, + "learning_rate": 1.725636929511497e-05, + "loss": 0.4604, + "step": 17671 + }, + { + "epoch": 0.485227896760022, + "grad_norm": 0.37631893157958984, + "learning_rate": 1.725607211208484e-05, + "loss": 0.5914, + "step": 17672 + }, + { + "epoch": 0.4852553542009885, + "grad_norm": 0.4514726996421814, + "learning_rate": 1.7255774915519814e-05, + "loss": 0.6165, + "step": 17673 + }, + { + "epoch": 0.48528281164195497, + "grad_norm": 0.3857898414134979, + "learning_rate": 1.7255477705420455e-05, + "loss": 0.508, + "step": 17674 + }, + { + "epoch": 0.48531026908292146, + "grad_norm": 0.3787430226802826, + "learning_rate": 1.7255180481787315e-05, + "loss": 0.5334, + "step": 17675 + }, + { + "epoch": 0.48533772652388796, + "grad_norm": 0.37013018131256104, + "learning_rate": 1.7254883244620947e-05, + "loss": 0.4507, + "step": 17676 + }, + { + "epoch": 0.48536518396485445, + "grad_norm": 0.3900471329689026, + "learning_rate": 1.7254585993921906e-05, + "loss": 0.5595, + "step": 17677 + }, + { + "epoch": 0.485392641405821, + "grad_norm": 0.4384017884731293, + "learning_rate": 1.7254288729690748e-05, + "loss": 0.5558, + "step": 17678 + }, + { + "epoch": 0.4854200988467875, + "grad_norm": 0.39514613151550293, + "learning_rate": 1.7253991451928025e-05, + "loss": 0.5791, + "step": 17679 + }, + { + "epoch": 0.485447556287754, + "grad_norm": 0.41059666872024536, + "learning_rate": 1.7253694160634293e-05, + "loss": 0.5374, + "step": 17680 + }, + { + "epoch": 0.4854750137287205, + "grad_norm": 0.3740375339984894, + "learning_rate": 1.7253396855810108e-05, + "loss": 0.5159, + "step": 17681 + }, + { + "epoch": 0.485502471169687, + "grad_norm": 0.38929611444473267, + "learning_rate": 1.725309953745602e-05, + "loss": 0.5914, + "step": 17682 + }, + { + "epoch": 0.48552992861065347, + "grad_norm": 0.3967209458351135, + "learning_rate": 1.7252802205572586e-05, + "loss": 0.4576, + "step": 17683 + }, + { + "epoch": 0.48555738605161997, + "grad_norm": 0.36684665083885193, + "learning_rate": 1.7252504860160364e-05, + "loss": 0.4431, + "step": 17684 + }, + { + "epoch": 0.4855848434925865, + "grad_norm": 0.3648223280906677, + "learning_rate": 1.7252207501219905e-05, + "loss": 0.4704, + "step": 17685 + }, + { + "epoch": 0.485612300933553, + "grad_norm": 0.37341010570526123, + "learning_rate": 1.7251910128751763e-05, + "loss": 0.5593, + "step": 17686 + }, + { + "epoch": 0.4856397583745195, + "grad_norm": 0.3693113625049591, + "learning_rate": 1.7251612742756495e-05, + "loss": 0.4869, + "step": 17687 + }, + { + "epoch": 0.485667215815486, + "grad_norm": 0.4018973112106323, + "learning_rate": 1.7251315343234653e-05, + "loss": 0.5608, + "step": 17688 + }, + { + "epoch": 0.4856946732564525, + "grad_norm": 0.3755534291267395, + "learning_rate": 1.7251017930186796e-05, + "loss": 0.5178, + "step": 17689 + }, + { + "epoch": 0.485722130697419, + "grad_norm": 0.368085116147995, + "learning_rate": 1.7250720503613477e-05, + "loss": 0.544, + "step": 17690 + }, + { + "epoch": 0.4857495881383855, + "grad_norm": 0.3669165372848511, + "learning_rate": 1.725042306351525e-05, + "loss": 0.5612, + "step": 17691 + }, + { + "epoch": 0.48577704557935203, + "grad_norm": 0.4460585117340088, + "learning_rate": 1.7250125609892668e-05, + "loss": 0.4415, + "step": 17692 + }, + { + "epoch": 0.4858045030203185, + "grad_norm": 0.32796958088874817, + "learning_rate": 1.7249828142746287e-05, + "loss": 0.4523, + "step": 17693 + }, + { + "epoch": 0.485831960461285, + "grad_norm": 0.37613967061042786, + "learning_rate": 1.7249530662076663e-05, + "loss": 0.4694, + "step": 17694 + }, + { + "epoch": 0.4858594179022515, + "grad_norm": 0.3745920956134796, + "learning_rate": 1.724923316788435e-05, + "loss": 0.5097, + "step": 17695 + }, + { + "epoch": 0.485886875343218, + "grad_norm": 0.4276891350746155, + "learning_rate": 1.724893566016991e-05, + "loss": 0.5917, + "step": 17696 + }, + { + "epoch": 0.4859143327841845, + "grad_norm": 0.46024858951568604, + "learning_rate": 1.7248638138933885e-05, + "loss": 0.6246, + "step": 17697 + }, + { + "epoch": 0.485941790225151, + "grad_norm": 0.4026954770088196, + "learning_rate": 1.7248340604176836e-05, + "loss": 0.5479, + "step": 17698 + }, + { + "epoch": 0.48596924766611754, + "grad_norm": 0.3717985153198242, + "learning_rate": 1.724804305589932e-05, + "loss": 0.5054, + "step": 17699 + }, + { + "epoch": 0.48599670510708404, + "grad_norm": 0.39193832874298096, + "learning_rate": 1.724774549410189e-05, + "loss": 0.5072, + "step": 17700 + }, + { + "epoch": 0.48602416254805053, + "grad_norm": 0.39475783705711365, + "learning_rate": 1.7247447918785104e-05, + "loss": 0.5128, + "step": 17701 + }, + { + "epoch": 0.486051619989017, + "grad_norm": 0.6941933631896973, + "learning_rate": 1.724715032994951e-05, + "loss": 0.5091, + "step": 17702 + }, + { + "epoch": 0.4860790774299835, + "grad_norm": 0.6703605651855469, + "learning_rate": 1.7246852727595672e-05, + "loss": 0.5039, + "step": 17703 + }, + { + "epoch": 0.48610653487095, + "grad_norm": 0.37660545110702515, + "learning_rate": 1.7246555111724136e-05, + "loss": 0.4944, + "step": 17704 + }, + { + "epoch": 0.4861339923119165, + "grad_norm": 0.4162742495536804, + "learning_rate": 1.724625748233547e-05, + "loss": 0.5702, + "step": 17705 + }, + { + "epoch": 0.48616144975288306, + "grad_norm": 0.4582599997520447, + "learning_rate": 1.724595983943021e-05, + "loss": 0.5816, + "step": 17706 + }, + { + "epoch": 0.48618890719384955, + "grad_norm": 0.4030217230319977, + "learning_rate": 1.7245662183008928e-05, + "loss": 0.4908, + "step": 17707 + }, + { + "epoch": 0.48621636463481605, + "grad_norm": 0.4112563133239746, + "learning_rate": 1.7245364513072177e-05, + "loss": 0.433, + "step": 17708 + }, + { + "epoch": 0.48624382207578254, + "grad_norm": 0.3883416950702667, + "learning_rate": 1.7245066829620503e-05, + "loss": 0.541, + "step": 17709 + }, + { + "epoch": 0.48627127951674903, + "grad_norm": 0.3631395995616913, + "learning_rate": 1.724476913265447e-05, + "loss": 0.3886, + "step": 17710 + }, + { + "epoch": 0.4862987369577155, + "grad_norm": 0.3502767086029053, + "learning_rate": 1.724447142217463e-05, + "loss": 0.5055, + "step": 17711 + }, + { + "epoch": 0.486326194398682, + "grad_norm": 0.42143526673316956, + "learning_rate": 1.724417369818154e-05, + "loss": 0.5261, + "step": 17712 + }, + { + "epoch": 0.48635365183964857, + "grad_norm": 0.4726541042327881, + "learning_rate": 1.724387596067575e-05, + "loss": 0.5536, + "step": 17713 + }, + { + "epoch": 0.48638110928061506, + "grad_norm": 0.4135623574256897, + "learning_rate": 1.724357820965782e-05, + "loss": 0.5136, + "step": 17714 + }, + { + "epoch": 0.48640856672158156, + "grad_norm": 0.3986741006374359, + "learning_rate": 1.724328044512831e-05, + "loss": 0.5075, + "step": 17715 + }, + { + "epoch": 0.48643602416254805, + "grad_norm": 0.40754055976867676, + "learning_rate": 1.7242982667087765e-05, + "loss": 0.5188, + "step": 17716 + }, + { + "epoch": 0.48646348160351455, + "grad_norm": 0.35497453808784485, + "learning_rate": 1.7242684875536747e-05, + "loss": 0.4892, + "step": 17717 + }, + { + "epoch": 0.48649093904448104, + "grad_norm": 0.3958967328071594, + "learning_rate": 1.724238707047581e-05, + "loss": 0.5378, + "step": 17718 + }, + { + "epoch": 0.48651839648544754, + "grad_norm": 0.386508971452713, + "learning_rate": 1.724208925190551e-05, + "loss": 0.4982, + "step": 17719 + }, + { + "epoch": 0.4865458539264141, + "grad_norm": 0.3687201142311096, + "learning_rate": 1.7241791419826402e-05, + "loss": 0.5619, + "step": 17720 + }, + { + "epoch": 0.4865733113673806, + "grad_norm": 0.3781968653202057, + "learning_rate": 1.7241493574239043e-05, + "loss": 0.5327, + "step": 17721 + }, + { + "epoch": 0.4866007688083471, + "grad_norm": 0.4008883535861969, + "learning_rate": 1.7241195715143985e-05, + "loss": 0.5526, + "step": 17722 + }, + { + "epoch": 0.48662822624931357, + "grad_norm": 0.3475266098976135, + "learning_rate": 1.7240897842541787e-05, + "loss": 0.4823, + "step": 17723 + }, + { + "epoch": 0.48665568369028006, + "grad_norm": 0.43884342908859253, + "learning_rate": 1.7240599956433e-05, + "loss": 0.5722, + "step": 17724 + }, + { + "epoch": 0.48668314113124655, + "grad_norm": 0.36870115995407104, + "learning_rate": 1.7240302056818188e-05, + "loss": 0.554, + "step": 17725 + }, + { + "epoch": 0.48671059857221305, + "grad_norm": 0.3334406912326813, + "learning_rate": 1.72400041436979e-05, + "loss": 0.5224, + "step": 17726 + }, + { + "epoch": 0.4867380560131796, + "grad_norm": 0.38367241621017456, + "learning_rate": 1.7239706217072694e-05, + "loss": 0.564, + "step": 17727 + }, + { + "epoch": 0.4867655134541461, + "grad_norm": 0.43796539306640625, + "learning_rate": 1.7239408276943126e-05, + "loss": 0.6198, + "step": 17728 + }, + { + "epoch": 0.4867929708951126, + "grad_norm": 0.3972180485725403, + "learning_rate": 1.7239110323309748e-05, + "loss": 0.5096, + "step": 17729 + }, + { + "epoch": 0.4868204283360791, + "grad_norm": 0.38561907410621643, + "learning_rate": 1.7238812356173124e-05, + "loss": 0.5126, + "step": 17730 + }, + { + "epoch": 0.4868478857770456, + "grad_norm": 0.4878607988357544, + "learning_rate": 1.72385143755338e-05, + "loss": 0.5495, + "step": 17731 + }, + { + "epoch": 0.48687534321801207, + "grad_norm": 0.39124777913093567, + "learning_rate": 1.7238216381392337e-05, + "loss": 0.4609, + "step": 17732 + }, + { + "epoch": 0.48690280065897856, + "grad_norm": 0.4016071856021881, + "learning_rate": 1.7237918373749292e-05, + "loss": 0.5538, + "step": 17733 + }, + { + "epoch": 0.4869302580999451, + "grad_norm": 0.39538928866386414, + "learning_rate": 1.7237620352605217e-05, + "loss": 0.4511, + "step": 17734 + }, + { + "epoch": 0.4869577155409116, + "grad_norm": 0.3696497976779938, + "learning_rate": 1.7237322317960673e-05, + "loss": 0.5199, + "step": 17735 + }, + { + "epoch": 0.4869851729818781, + "grad_norm": 0.39079752564430237, + "learning_rate": 1.7237024269816213e-05, + "loss": 0.4762, + "step": 17736 + }, + { + "epoch": 0.4870126304228446, + "grad_norm": 0.3570830523967743, + "learning_rate": 1.723672620817239e-05, + "loss": 0.4736, + "step": 17737 + }, + { + "epoch": 0.4870400878638111, + "grad_norm": 0.3696151077747345, + "learning_rate": 1.7236428133029763e-05, + "loss": 0.5347, + "step": 17738 + }, + { + "epoch": 0.4870675453047776, + "grad_norm": 0.3928174376487732, + "learning_rate": 1.723613004438889e-05, + "loss": 0.534, + "step": 17739 + }, + { + "epoch": 0.4870950027457441, + "grad_norm": 0.364764541387558, + "learning_rate": 1.7235831942250323e-05, + "loss": 0.5242, + "step": 17740 + }, + { + "epoch": 0.4871224601867106, + "grad_norm": 0.3651586174964905, + "learning_rate": 1.723553382661462e-05, + "loss": 0.4969, + "step": 17741 + }, + { + "epoch": 0.4871499176276771, + "grad_norm": 0.34638652205467224, + "learning_rate": 1.723523569748234e-05, + "loss": 0.4894, + "step": 17742 + }, + { + "epoch": 0.4871773750686436, + "grad_norm": 0.42067965865135193, + "learning_rate": 1.7234937554854036e-05, + "loss": 0.5934, + "step": 17743 + }, + { + "epoch": 0.4872048325096101, + "grad_norm": 0.4183940589427948, + "learning_rate": 1.7234639398730263e-05, + "loss": 0.4686, + "step": 17744 + }, + { + "epoch": 0.4872322899505766, + "grad_norm": 0.3619309365749359, + "learning_rate": 1.7234341229111574e-05, + "loss": 0.5376, + "step": 17745 + }, + { + "epoch": 0.4872597473915431, + "grad_norm": 0.3527815043926239, + "learning_rate": 1.7234043045998536e-05, + "loss": 0.5539, + "step": 17746 + }, + { + "epoch": 0.4872872048325096, + "grad_norm": 0.41010531783103943, + "learning_rate": 1.7233744849391695e-05, + "loss": 0.4477, + "step": 17747 + }, + { + "epoch": 0.4873146622734761, + "grad_norm": 0.391961932182312, + "learning_rate": 1.7233446639291614e-05, + "loss": 0.5731, + "step": 17748 + }, + { + "epoch": 0.48734211971444263, + "grad_norm": 0.4832632839679718, + "learning_rate": 1.7233148415698845e-05, + "loss": 0.5361, + "step": 17749 + }, + { + "epoch": 0.48736957715540913, + "grad_norm": 0.3956652581691742, + "learning_rate": 1.7232850178613948e-05, + "loss": 0.5125, + "step": 17750 + }, + { + "epoch": 0.4873970345963756, + "grad_norm": 0.3980465531349182, + "learning_rate": 1.7232551928037474e-05, + "loss": 0.6334, + "step": 17751 + }, + { + "epoch": 0.4874244920373421, + "grad_norm": 0.35887694358825684, + "learning_rate": 1.7232253663969983e-05, + "loss": 0.5906, + "step": 17752 + }, + { + "epoch": 0.4874519494783086, + "grad_norm": 0.3828239142894745, + "learning_rate": 1.723195538641203e-05, + "loss": 0.5064, + "step": 17753 + }, + { + "epoch": 0.4874794069192751, + "grad_norm": 0.3923206627368927, + "learning_rate": 1.7231657095364172e-05, + "loss": 0.5904, + "step": 17754 + }, + { + "epoch": 0.4875068643602416, + "grad_norm": 0.39551278948783875, + "learning_rate": 1.723135879082697e-05, + "loss": 0.5041, + "step": 17755 + }, + { + "epoch": 0.48753432180120815, + "grad_norm": 0.379682332277298, + "learning_rate": 1.723106047280097e-05, + "loss": 0.5614, + "step": 17756 + }, + { + "epoch": 0.48756177924217464, + "grad_norm": 0.4454176127910614, + "learning_rate": 1.723076214128674e-05, + "loss": 0.534, + "step": 17757 + }, + { + "epoch": 0.48758923668314114, + "grad_norm": 0.3913172483444214, + "learning_rate": 1.7230463796284824e-05, + "loss": 0.5007, + "step": 17758 + }, + { + "epoch": 0.48761669412410763, + "grad_norm": 0.34683042764663696, + "learning_rate": 1.723016543779579e-05, + "loss": 0.5721, + "step": 17759 + }, + { + "epoch": 0.4876441515650741, + "grad_norm": 0.39055755734443665, + "learning_rate": 1.7229867065820188e-05, + "loss": 0.5999, + "step": 17760 + }, + { + "epoch": 0.4876716090060406, + "grad_norm": 0.40304601192474365, + "learning_rate": 1.722956868035858e-05, + "loss": 0.4563, + "step": 17761 + }, + { + "epoch": 0.4876990664470071, + "grad_norm": 0.3663020730018616, + "learning_rate": 1.7229270281411514e-05, + "loss": 0.4746, + "step": 17762 + }, + { + "epoch": 0.48772652388797366, + "grad_norm": 0.3913702964782715, + "learning_rate": 1.7228971868979552e-05, + "loss": 0.551, + "step": 17763 + }, + { + "epoch": 0.48775398132894016, + "grad_norm": 0.4230533242225647, + "learning_rate": 1.7228673443063252e-05, + "loss": 0.5509, + "step": 17764 + }, + { + "epoch": 0.48778143876990665, + "grad_norm": 0.37927526235580444, + "learning_rate": 1.7228375003663168e-05, + "loss": 0.5323, + "step": 17765 + }, + { + "epoch": 0.48780889621087314, + "grad_norm": 0.581646740436554, + "learning_rate": 1.7228076550779857e-05, + "loss": 0.4903, + "step": 17766 + }, + { + "epoch": 0.48783635365183964, + "grad_norm": 0.4110003411769867, + "learning_rate": 1.7227778084413878e-05, + "loss": 0.5128, + "step": 17767 + }, + { + "epoch": 0.48786381109280613, + "grad_norm": 0.38262462615966797, + "learning_rate": 1.7227479604565788e-05, + "loss": 0.5139, + "step": 17768 + }, + { + "epoch": 0.4878912685337726, + "grad_norm": 0.399765282869339, + "learning_rate": 1.722718111123614e-05, + "loss": 0.4737, + "step": 17769 + }, + { + "epoch": 0.4879187259747392, + "grad_norm": 0.3757775127887726, + "learning_rate": 1.7226882604425493e-05, + "loss": 0.5441, + "step": 17770 + }, + { + "epoch": 0.48794618341570567, + "grad_norm": 0.41653019189834595, + "learning_rate": 1.7226584084134402e-05, + "loss": 0.5535, + "step": 17771 + }, + { + "epoch": 0.48797364085667216, + "grad_norm": 0.3651380240917206, + "learning_rate": 1.7226285550363422e-05, + "loss": 0.4327, + "step": 17772 + }, + { + "epoch": 0.48800109829763866, + "grad_norm": 0.442548930644989, + "learning_rate": 1.722598700311312e-05, + "loss": 0.5168, + "step": 17773 + }, + { + "epoch": 0.48802855573860515, + "grad_norm": 0.41931644082069397, + "learning_rate": 1.722568844238404e-05, + "loss": 0.5024, + "step": 17774 + }, + { + "epoch": 0.48805601317957165, + "grad_norm": 0.4078995883464813, + "learning_rate": 1.722538986817675e-05, + "loss": 0.4495, + "step": 17775 + }, + { + "epoch": 0.48808347062053814, + "grad_norm": 0.44596463441848755, + "learning_rate": 1.72250912804918e-05, + "loss": 0.5204, + "step": 17776 + }, + { + "epoch": 0.4881109280615047, + "grad_norm": 0.37142249941825867, + "learning_rate": 1.7224792679329744e-05, + "loss": 0.4793, + "step": 17777 + }, + { + "epoch": 0.4881383855024712, + "grad_norm": 0.87308669090271, + "learning_rate": 1.722449406469115e-05, + "loss": 0.5331, + "step": 17778 + }, + { + "epoch": 0.4881658429434377, + "grad_norm": 0.4292074143886566, + "learning_rate": 1.7224195436576566e-05, + "loss": 0.4822, + "step": 17779 + }, + { + "epoch": 0.48819330038440417, + "grad_norm": 0.3964207172393799, + "learning_rate": 1.7223896794986552e-05, + "loss": 0.5059, + "step": 17780 + }, + { + "epoch": 0.48822075782537067, + "grad_norm": 0.3670021891593933, + "learning_rate": 1.7223598139921667e-05, + "loss": 0.5503, + "step": 17781 + }, + { + "epoch": 0.48824821526633716, + "grad_norm": 0.3594217002391815, + "learning_rate": 1.7223299471382464e-05, + "loss": 0.5641, + "step": 17782 + }, + { + "epoch": 0.48827567270730365, + "grad_norm": 0.33374544978141785, + "learning_rate": 1.7223000789369503e-05, + "loss": 0.46, + "step": 17783 + }, + { + "epoch": 0.4883031301482702, + "grad_norm": 0.4914587140083313, + "learning_rate": 1.722270209388334e-05, + "loss": 0.5049, + "step": 17784 + }, + { + "epoch": 0.4883305875892367, + "grad_norm": 0.4681309163570404, + "learning_rate": 1.7222403384924532e-05, + "loss": 0.5703, + "step": 17785 + }, + { + "epoch": 0.4883580450302032, + "grad_norm": 0.39005160331726074, + "learning_rate": 1.7222104662493633e-05, + "loss": 0.5462, + "step": 17786 + }, + { + "epoch": 0.4883855024711697, + "grad_norm": 0.34874671697616577, + "learning_rate": 1.722180592659121e-05, + "loss": 0.5274, + "step": 17787 + }, + { + "epoch": 0.4884129599121362, + "grad_norm": 0.3604627847671509, + "learning_rate": 1.722150717721781e-05, + "loss": 0.4946, + "step": 17788 + }, + { + "epoch": 0.4884404173531027, + "grad_norm": 0.36221233010292053, + "learning_rate": 1.7221208414373995e-05, + "loss": 0.4904, + "step": 17789 + }, + { + "epoch": 0.48846787479406917, + "grad_norm": 0.392921507358551, + "learning_rate": 1.722090963806032e-05, + "loss": 0.5234, + "step": 17790 + }, + { + "epoch": 0.4884953322350357, + "grad_norm": 0.3834966719150543, + "learning_rate": 1.7220610848277347e-05, + "loss": 0.5297, + "step": 17791 + }, + { + "epoch": 0.4885227896760022, + "grad_norm": 0.3801875114440918, + "learning_rate": 1.7220312045025633e-05, + "loss": 0.5291, + "step": 17792 + }, + { + "epoch": 0.4885502471169687, + "grad_norm": 0.36013099551200867, + "learning_rate": 1.7220013228305727e-05, + "loss": 0.4818, + "step": 17793 + }, + { + "epoch": 0.4885777045579352, + "grad_norm": 0.40967345237731934, + "learning_rate": 1.7219714398118196e-05, + "loss": 0.5286, + "step": 17794 + }, + { + "epoch": 0.4886051619989017, + "grad_norm": 0.3858114778995514, + "learning_rate": 1.721941555446359e-05, + "loss": 0.5689, + "step": 17795 + }, + { + "epoch": 0.4886326194398682, + "grad_norm": 0.3556840717792511, + "learning_rate": 1.721911669734247e-05, + "loss": 0.4983, + "step": 17796 + }, + { + "epoch": 0.4886600768808347, + "grad_norm": 0.40683645009994507, + "learning_rate": 1.7218817826755394e-05, + "loss": 0.5373, + "step": 17797 + }, + { + "epoch": 0.48868753432180123, + "grad_norm": 0.3931632339954376, + "learning_rate": 1.721851894270292e-05, + "loss": 0.5447, + "step": 17798 + }, + { + "epoch": 0.4887149917627677, + "grad_norm": 0.3568495512008667, + "learning_rate": 1.72182200451856e-05, + "loss": 0.4652, + "step": 17799 + }, + { + "epoch": 0.4887424492037342, + "grad_norm": 0.47889214754104614, + "learning_rate": 1.7217921134204003e-05, + "loss": 0.5162, + "step": 17800 + }, + { + "epoch": 0.4887699066447007, + "grad_norm": 0.37562096118927, + "learning_rate": 1.7217622209758676e-05, + "loss": 0.559, + "step": 17801 + }, + { + "epoch": 0.4887973640856672, + "grad_norm": 0.41337838768959045, + "learning_rate": 1.7217323271850178e-05, + "loss": 0.523, + "step": 17802 + }, + { + "epoch": 0.4888248215266337, + "grad_norm": 0.3839432895183563, + "learning_rate": 1.721702432047907e-05, + "loss": 0.52, + "step": 17803 + }, + { + "epoch": 0.4888522789676002, + "grad_norm": 0.41358956694602966, + "learning_rate": 1.7216725355645908e-05, + "loss": 0.4747, + "step": 17804 + }, + { + "epoch": 0.48887973640856675, + "grad_norm": 0.34398898482322693, + "learning_rate": 1.721642637735125e-05, + "loss": 0.4782, + "step": 17805 + }, + { + "epoch": 0.48890719384953324, + "grad_norm": 0.339236319065094, + "learning_rate": 1.7216127385595656e-05, + "loss": 0.4916, + "step": 17806 + }, + { + "epoch": 0.48893465129049973, + "grad_norm": 0.3330595791339874, + "learning_rate": 1.7215828380379676e-05, + "loss": 0.4974, + "step": 17807 + }, + { + "epoch": 0.4889621087314662, + "grad_norm": 0.35836082696914673, + "learning_rate": 1.721552936170388e-05, + "loss": 0.5627, + "step": 17808 + }, + { + "epoch": 0.4889895661724327, + "grad_norm": 0.39471641182899475, + "learning_rate": 1.7215230329568813e-05, + "loss": 0.5509, + "step": 17809 + }, + { + "epoch": 0.4890170236133992, + "grad_norm": 0.36818867921829224, + "learning_rate": 1.721493128397504e-05, + "loss": 0.4794, + "step": 17810 + }, + { + "epoch": 0.4890444810543657, + "grad_norm": 0.40194636583328247, + "learning_rate": 1.721463222492312e-05, + "loss": 0.505, + "step": 17811 + }, + { + "epoch": 0.48907193849533226, + "grad_norm": 0.3780379295349121, + "learning_rate": 1.7214333152413603e-05, + "loss": 0.531, + "step": 17812 + }, + { + "epoch": 0.48909939593629875, + "grad_norm": 0.35710835456848145, + "learning_rate": 1.721403406644706e-05, + "loss": 0.4576, + "step": 17813 + }, + { + "epoch": 0.48912685337726525, + "grad_norm": 0.4048587679862976, + "learning_rate": 1.7213734967024035e-05, + "loss": 0.5566, + "step": 17814 + }, + { + "epoch": 0.48915431081823174, + "grad_norm": 0.367737352848053, + "learning_rate": 1.7213435854145096e-05, + "loss": 0.4334, + "step": 17815 + }, + { + "epoch": 0.48918176825919824, + "grad_norm": 0.3767624795436859, + "learning_rate": 1.7213136727810795e-05, + "loss": 0.5709, + "step": 17816 + }, + { + "epoch": 0.48920922570016473, + "grad_norm": 0.38018324971199036, + "learning_rate": 1.721283758802169e-05, + "loss": 0.5281, + "step": 17817 + }, + { + "epoch": 0.4892366831411312, + "grad_norm": 0.3547159433364868, + "learning_rate": 1.7212538434778338e-05, + "loss": 0.5004, + "step": 17818 + }, + { + "epoch": 0.4892641405820978, + "grad_norm": 0.4002813994884491, + "learning_rate": 1.7212239268081304e-05, + "loss": 0.5828, + "step": 17819 + }, + { + "epoch": 0.48929159802306427, + "grad_norm": 0.3951385021209717, + "learning_rate": 1.7211940087931144e-05, + "loss": 0.5007, + "step": 17820 + }, + { + "epoch": 0.48931905546403076, + "grad_norm": 0.45461228489875793, + "learning_rate": 1.7211640894328413e-05, + "loss": 0.4812, + "step": 17821 + }, + { + "epoch": 0.48934651290499726, + "grad_norm": 0.7183622717857361, + "learning_rate": 1.7211341687273664e-05, + "loss": 0.6024, + "step": 17822 + }, + { + "epoch": 0.48937397034596375, + "grad_norm": 0.3656640946865082, + "learning_rate": 1.721104246676747e-05, + "loss": 0.4904, + "step": 17823 + }, + { + "epoch": 0.48940142778693024, + "grad_norm": 0.3793434202671051, + "learning_rate": 1.7210743232810375e-05, + "loss": 0.4961, + "step": 17824 + }, + { + "epoch": 0.48942888522789674, + "grad_norm": 0.434174120426178, + "learning_rate": 1.7210443985402943e-05, + "loss": 0.5813, + "step": 17825 + }, + { + "epoch": 0.4894563426688633, + "grad_norm": 0.4670186936855316, + "learning_rate": 1.7210144724545732e-05, + "loss": 0.525, + "step": 17826 + }, + { + "epoch": 0.4894838001098298, + "grad_norm": 0.37180399894714355, + "learning_rate": 1.72098454502393e-05, + "loss": 0.5165, + "step": 17827 + }, + { + "epoch": 0.4895112575507963, + "grad_norm": 0.48265594244003296, + "learning_rate": 1.7209546162484207e-05, + "loss": 0.5835, + "step": 17828 + }, + { + "epoch": 0.48953871499176277, + "grad_norm": 0.37286481261253357, + "learning_rate": 1.7209246861281007e-05, + "loss": 0.4576, + "step": 17829 + }, + { + "epoch": 0.48956617243272926, + "grad_norm": 0.37007829546928406, + "learning_rate": 1.7208947546630262e-05, + "loss": 0.4671, + "step": 17830 + }, + { + "epoch": 0.48959362987369576, + "grad_norm": 0.3685915172100067, + "learning_rate": 1.720864821853253e-05, + "loss": 0.5054, + "step": 17831 + }, + { + "epoch": 0.48962108731466225, + "grad_norm": 0.3932049870491028, + "learning_rate": 1.7208348876988363e-05, + "loss": 0.5398, + "step": 17832 + }, + { + "epoch": 0.4896485447556288, + "grad_norm": 0.38906607031822205, + "learning_rate": 1.720804952199833e-05, + "loss": 0.5248, + "step": 17833 + }, + { + "epoch": 0.4896760021965953, + "grad_norm": 0.6179790496826172, + "learning_rate": 1.7207750153562982e-05, + "loss": 0.4539, + "step": 17834 + }, + { + "epoch": 0.4897034596375618, + "grad_norm": 0.4705316424369812, + "learning_rate": 1.720745077168288e-05, + "loss": 0.573, + "step": 17835 + }, + { + "epoch": 0.4897309170785283, + "grad_norm": 0.3870733082294464, + "learning_rate": 1.720715137635858e-05, + "loss": 0.4338, + "step": 17836 + }, + { + "epoch": 0.4897583745194948, + "grad_norm": 0.36236003041267395, + "learning_rate": 1.7206851967590647e-05, + "loss": 0.5013, + "step": 17837 + }, + { + "epoch": 0.48978583196046127, + "grad_norm": 0.37478938698768616, + "learning_rate": 1.720655254537963e-05, + "loss": 0.5495, + "step": 17838 + }, + { + "epoch": 0.48981328940142776, + "grad_norm": 0.40118634700775146, + "learning_rate": 1.7206253109726098e-05, + "loss": 0.5129, + "step": 17839 + }, + { + "epoch": 0.4898407468423943, + "grad_norm": 0.39819326996803284, + "learning_rate": 1.72059536606306e-05, + "loss": 0.5018, + "step": 17840 + }, + { + "epoch": 0.4898682042833608, + "grad_norm": 0.3462105691432953, + "learning_rate": 1.72056541980937e-05, + "loss": 0.4351, + "step": 17841 + }, + { + "epoch": 0.4898956617243273, + "grad_norm": 0.4187363386154175, + "learning_rate": 1.720535472211595e-05, + "loss": 0.6056, + "step": 17842 + }, + { + "epoch": 0.4899231191652938, + "grad_norm": 0.3979317545890808, + "learning_rate": 1.720505523269792e-05, + "loss": 0.5157, + "step": 17843 + }, + { + "epoch": 0.4899505766062603, + "grad_norm": 0.3539043664932251, + "learning_rate": 1.720475572984016e-05, + "loss": 0.4649, + "step": 17844 + }, + { + "epoch": 0.4899780340472268, + "grad_norm": 0.354744553565979, + "learning_rate": 1.7204456213543234e-05, + "loss": 0.5292, + "step": 17845 + }, + { + "epoch": 0.4900054914881933, + "grad_norm": 0.390675812959671, + "learning_rate": 1.7204156683807696e-05, + "loss": 0.4385, + "step": 17846 + }, + { + "epoch": 0.49003294892915983, + "grad_norm": 0.37188923358917236, + "learning_rate": 1.7203857140634105e-05, + "loss": 0.5212, + "step": 17847 + }, + { + "epoch": 0.4900604063701263, + "grad_norm": 0.36838841438293457, + "learning_rate": 1.7203557584023022e-05, + "loss": 0.5346, + "step": 17848 + }, + { + "epoch": 0.4900878638110928, + "grad_norm": 0.38904353976249695, + "learning_rate": 1.7203258013975004e-05, + "loss": 0.5247, + "step": 17849 + }, + { + "epoch": 0.4901153212520593, + "grad_norm": 0.3701168894767761, + "learning_rate": 1.7202958430490614e-05, + "loss": 0.5009, + "step": 17850 + }, + { + "epoch": 0.4901427786930258, + "grad_norm": 0.36962389945983887, + "learning_rate": 1.7202658833570404e-05, + "loss": 0.4566, + "step": 17851 + }, + { + "epoch": 0.4901702361339923, + "grad_norm": 0.3574495315551758, + "learning_rate": 1.7202359223214938e-05, + "loss": 0.4716, + "step": 17852 + }, + { + "epoch": 0.4901976935749588, + "grad_norm": 0.38047119975090027, + "learning_rate": 1.7202059599424773e-05, + "loss": 0.4642, + "step": 17853 + }, + { + "epoch": 0.49022515101592534, + "grad_norm": 0.4187179207801819, + "learning_rate": 1.720175996220047e-05, + "loss": 0.4832, + "step": 17854 + }, + { + "epoch": 0.49025260845689184, + "grad_norm": 0.4672851860523224, + "learning_rate": 1.7201460311542582e-05, + "loss": 0.554, + "step": 17855 + }, + { + "epoch": 0.49028006589785833, + "grad_norm": 0.3704807758331299, + "learning_rate": 1.7201160647451674e-05, + "loss": 0.4841, + "step": 17856 + }, + { + "epoch": 0.4903075233388248, + "grad_norm": 0.39375290274620056, + "learning_rate": 1.7200860969928304e-05, + "loss": 0.5586, + "step": 17857 + }, + { + "epoch": 0.4903349807797913, + "grad_norm": 0.37536078691482544, + "learning_rate": 1.720056127897303e-05, + "loss": 0.514, + "step": 17858 + }, + { + "epoch": 0.4903624382207578, + "grad_norm": 0.4565924108028412, + "learning_rate": 1.720026157458641e-05, + "loss": 0.5844, + "step": 17859 + }, + { + "epoch": 0.4903898956617243, + "grad_norm": 0.3822723925113678, + "learning_rate": 1.7199961856769006e-05, + "loss": 0.5155, + "step": 17860 + }, + { + "epoch": 0.49041735310269086, + "grad_norm": 0.394534170627594, + "learning_rate": 1.7199662125521375e-05, + "loss": 0.5347, + "step": 17861 + }, + { + "epoch": 0.49044481054365735, + "grad_norm": 0.352857768535614, + "learning_rate": 1.7199362380844075e-05, + "loss": 0.54, + "step": 17862 + }, + { + "epoch": 0.49047226798462384, + "grad_norm": 0.36084800958633423, + "learning_rate": 1.7199062622737666e-05, + "loss": 0.5633, + "step": 17863 + }, + { + "epoch": 0.49049972542559034, + "grad_norm": 0.4460386335849762, + "learning_rate": 1.719876285120271e-05, + "loss": 0.5463, + "step": 17864 + }, + { + "epoch": 0.49052718286655683, + "grad_norm": 0.3726047873497009, + "learning_rate": 1.7198463066239762e-05, + "loss": 0.4529, + "step": 17865 + }, + { + "epoch": 0.4905546403075233, + "grad_norm": 0.3607197403907776, + "learning_rate": 1.7198163267849385e-05, + "loss": 0.4782, + "step": 17866 + }, + { + "epoch": 0.4905820977484898, + "grad_norm": 0.36153388023376465, + "learning_rate": 1.7197863456032133e-05, + "loss": 0.5919, + "step": 17867 + }, + { + "epoch": 0.49060955518945637, + "grad_norm": 0.34177306294441223, + "learning_rate": 1.7197563630788568e-05, + "loss": 0.4944, + "step": 17868 + }, + { + "epoch": 0.49063701263042286, + "grad_norm": 0.357936829328537, + "learning_rate": 1.7197263792119253e-05, + "loss": 0.5592, + "step": 17869 + }, + { + "epoch": 0.49066447007138936, + "grad_norm": 0.3403005599975586, + "learning_rate": 1.719696394002474e-05, + "loss": 0.4252, + "step": 17870 + }, + { + "epoch": 0.49069192751235585, + "grad_norm": 0.35397663712501526, + "learning_rate": 1.7196664074505596e-05, + "loss": 0.5081, + "step": 17871 + }, + { + "epoch": 0.49071938495332235, + "grad_norm": 0.3690434396266937, + "learning_rate": 1.7196364195562372e-05, + "loss": 0.5202, + "step": 17872 + }, + { + "epoch": 0.49074684239428884, + "grad_norm": 0.41324275732040405, + "learning_rate": 1.7196064303195638e-05, + "loss": 0.518, + "step": 17873 + }, + { + "epoch": 0.49077429983525533, + "grad_norm": 0.3777472972869873, + "learning_rate": 1.719576439740594e-05, + "loss": 0.6197, + "step": 17874 + }, + { + "epoch": 0.4908017572762219, + "grad_norm": 0.3294455111026764, + "learning_rate": 1.719546447819385e-05, + "loss": 0.4153, + "step": 17875 + }, + { + "epoch": 0.4908292147171884, + "grad_norm": 0.33800798654556274, + "learning_rate": 1.719516454555992e-05, + "loss": 0.4814, + "step": 17876 + }, + { + "epoch": 0.49085667215815487, + "grad_norm": 0.3439382314682007, + "learning_rate": 1.7194864599504714e-05, + "loss": 0.5159, + "step": 17877 + }, + { + "epoch": 0.49088412959912137, + "grad_norm": 0.6582164168357849, + "learning_rate": 1.719456464002879e-05, + "loss": 0.5819, + "step": 17878 + }, + { + "epoch": 0.49091158704008786, + "grad_norm": 0.3737907111644745, + "learning_rate": 1.7194264667132704e-05, + "loss": 0.5806, + "step": 17879 + }, + { + "epoch": 0.49093904448105435, + "grad_norm": 0.41890832781791687, + "learning_rate": 1.7193964680817022e-05, + "loss": 0.4302, + "step": 17880 + }, + { + "epoch": 0.49096650192202085, + "grad_norm": 0.48428452014923096, + "learning_rate": 1.7193664681082296e-05, + "loss": 0.5703, + "step": 17881 + }, + { + "epoch": 0.49099395936298734, + "grad_norm": 0.4254513680934906, + "learning_rate": 1.719336466792909e-05, + "loss": 0.5109, + "step": 17882 + }, + { + "epoch": 0.4910214168039539, + "grad_norm": 0.3602256178855896, + "learning_rate": 1.719306464135796e-05, + "loss": 0.5252, + "step": 17883 + }, + { + "epoch": 0.4910488742449204, + "grad_norm": 0.3605472147464752, + "learning_rate": 1.7192764601369473e-05, + "loss": 0.5492, + "step": 17884 + }, + { + "epoch": 0.4910763316858869, + "grad_norm": 0.37658411264419556, + "learning_rate": 1.7192464547964184e-05, + "loss": 0.51, + "step": 17885 + }, + { + "epoch": 0.4911037891268534, + "grad_norm": 0.42590972781181335, + "learning_rate": 1.7192164481142652e-05, + "loss": 0.534, + "step": 17886 + }, + { + "epoch": 0.49113124656781987, + "grad_norm": 0.42022639513015747, + "learning_rate": 1.719186440090544e-05, + "loss": 0.6338, + "step": 17887 + }, + { + "epoch": 0.49115870400878636, + "grad_norm": 0.3934406340122223, + "learning_rate": 1.7191564307253102e-05, + "loss": 0.5449, + "step": 17888 + }, + { + "epoch": 0.49118616144975286, + "grad_norm": 0.6173372268676758, + "learning_rate": 1.7191264200186202e-05, + "loss": 0.6196, + "step": 17889 + }, + { + "epoch": 0.4912136188907194, + "grad_norm": 0.40943050384521484, + "learning_rate": 1.7190964079705302e-05, + "loss": 0.5122, + "step": 17890 + }, + { + "epoch": 0.4912410763316859, + "grad_norm": 0.37756669521331787, + "learning_rate": 1.7190663945810958e-05, + "loss": 0.5149, + "step": 17891 + }, + { + "epoch": 0.4912685337726524, + "grad_norm": 0.4090964198112488, + "learning_rate": 1.7190363798503725e-05, + "loss": 0.5179, + "step": 17892 + }, + { + "epoch": 0.4912959912136189, + "grad_norm": 0.3988071084022522, + "learning_rate": 1.7190063637784176e-05, + "loss": 0.578, + "step": 17893 + }, + { + "epoch": 0.4913234486545854, + "grad_norm": 0.391082763671875, + "learning_rate": 1.718976346365286e-05, + "loss": 0.5947, + "step": 17894 + }, + { + "epoch": 0.4913509060955519, + "grad_norm": 0.3932049870491028, + "learning_rate": 1.7189463276110343e-05, + "loss": 0.562, + "step": 17895 + }, + { + "epoch": 0.49137836353651837, + "grad_norm": 0.38366004824638367, + "learning_rate": 1.718916307515718e-05, + "loss": 0.5369, + "step": 17896 + }, + { + "epoch": 0.4914058209774849, + "grad_norm": 0.36642587184906006, + "learning_rate": 1.7188862860793933e-05, + "loss": 0.5546, + "step": 17897 + }, + { + "epoch": 0.4914332784184514, + "grad_norm": 0.498079776763916, + "learning_rate": 1.7188562633021165e-05, + "loss": 0.5736, + "step": 17898 + }, + { + "epoch": 0.4914607358594179, + "grad_norm": 0.3934246003627777, + "learning_rate": 1.718826239183943e-05, + "loss": 0.584, + "step": 17899 + }, + { + "epoch": 0.4914881933003844, + "grad_norm": 0.5527629852294922, + "learning_rate": 1.7187962137249293e-05, + "loss": 0.5108, + "step": 17900 + }, + { + "epoch": 0.4915156507413509, + "grad_norm": 0.39081281423568726, + "learning_rate": 1.7187661869251314e-05, + "loss": 0.5853, + "step": 17901 + }, + { + "epoch": 0.4915431081823174, + "grad_norm": 0.363167405128479, + "learning_rate": 1.718736158784605e-05, + "loss": 0.4909, + "step": 17902 + }, + { + "epoch": 0.4915705656232839, + "grad_norm": 0.3828846216201782, + "learning_rate": 1.7187061293034064e-05, + "loss": 0.526, + "step": 17903 + }, + { + "epoch": 0.49159802306425043, + "grad_norm": 0.3723444938659668, + "learning_rate": 1.7186760984815914e-05, + "loss": 0.5524, + "step": 17904 + }, + { + "epoch": 0.4916254805052169, + "grad_norm": 0.386844664812088, + "learning_rate": 1.718646066319216e-05, + "loss": 0.5125, + "step": 17905 + }, + { + "epoch": 0.4916529379461834, + "grad_norm": 0.37585577368736267, + "learning_rate": 1.7186160328163365e-05, + "loss": 0.5492, + "step": 17906 + }, + { + "epoch": 0.4916803953871499, + "grad_norm": 0.4624541699886322, + "learning_rate": 1.7185859979730083e-05, + "loss": 0.5042, + "step": 17907 + }, + { + "epoch": 0.4917078528281164, + "grad_norm": 0.3886857032775879, + "learning_rate": 1.718555961789288e-05, + "loss": 0.5182, + "step": 17908 + }, + { + "epoch": 0.4917353102690829, + "grad_norm": 0.3442637324333191, + "learning_rate": 1.718525924265232e-05, + "loss": 0.4573, + "step": 17909 + }, + { + "epoch": 0.4917627677100494, + "grad_norm": 0.39258092641830444, + "learning_rate": 1.7184958854008956e-05, + "loss": 0.5201, + "step": 17910 + }, + { + "epoch": 0.49179022515101595, + "grad_norm": 0.3832319974899292, + "learning_rate": 1.718465845196335e-05, + "loss": 0.4977, + "step": 17911 + }, + { + "epoch": 0.49181768259198244, + "grad_norm": 0.4401923418045044, + "learning_rate": 1.718435803651606e-05, + "loss": 0.5025, + "step": 17912 + }, + { + "epoch": 0.49184514003294894, + "grad_norm": 0.3987842798233032, + "learning_rate": 1.7184057607667653e-05, + "loss": 0.5542, + "step": 17913 + }, + { + "epoch": 0.49187259747391543, + "grad_norm": 0.5196675062179565, + "learning_rate": 1.7183757165418686e-05, + "loss": 0.4658, + "step": 17914 + }, + { + "epoch": 0.4919000549148819, + "grad_norm": 0.3787429928779602, + "learning_rate": 1.7183456709769715e-05, + "loss": 0.5184, + "step": 17915 + }, + { + "epoch": 0.4919275123558484, + "grad_norm": 0.3541223406791687, + "learning_rate": 1.718315624072131e-05, + "loss": 0.4816, + "step": 17916 + }, + { + "epoch": 0.4919549697968149, + "grad_norm": 0.35931575298309326, + "learning_rate": 1.7182855758274023e-05, + "loss": 0.5089, + "step": 17917 + }, + { + "epoch": 0.49198242723778146, + "grad_norm": 0.4177033603191376, + "learning_rate": 1.7182555262428416e-05, + "loss": 0.5114, + "step": 17918 + }, + { + "epoch": 0.49200988467874796, + "grad_norm": 0.41591504216194153, + "learning_rate": 1.7182254753185055e-05, + "loss": 0.4676, + "step": 17919 + }, + { + "epoch": 0.49203734211971445, + "grad_norm": 0.32278892397880554, + "learning_rate": 1.7181954230544496e-05, + "loss": 0.3803, + "step": 17920 + }, + { + "epoch": 0.49206479956068094, + "grad_norm": 0.44925805926322937, + "learning_rate": 1.7181653694507297e-05, + "loss": 0.5494, + "step": 17921 + }, + { + "epoch": 0.49209225700164744, + "grad_norm": 0.3613086938858032, + "learning_rate": 1.7181353145074022e-05, + "loss": 0.4524, + "step": 17922 + }, + { + "epoch": 0.49211971444261393, + "grad_norm": 0.37967050075531006, + "learning_rate": 1.7181052582245233e-05, + "loss": 0.5439, + "step": 17923 + }, + { + "epoch": 0.4921471718835804, + "grad_norm": 0.38003477454185486, + "learning_rate": 1.718075200602149e-05, + "loss": 0.5241, + "step": 17924 + }, + { + "epoch": 0.492174629324547, + "grad_norm": 0.37857815623283386, + "learning_rate": 1.718045141640335e-05, + "loss": 0.4687, + "step": 17925 + }, + { + "epoch": 0.49220208676551347, + "grad_norm": 0.38082700967788696, + "learning_rate": 1.718015081339138e-05, + "loss": 0.5949, + "step": 17926 + }, + { + "epoch": 0.49222954420647996, + "grad_norm": 0.4298068583011627, + "learning_rate": 1.7179850196986132e-05, + "loss": 0.5038, + "step": 17927 + }, + { + "epoch": 0.49225700164744646, + "grad_norm": 0.36844637989997864, + "learning_rate": 1.7179549567188177e-05, + "loss": 0.4615, + "step": 17928 + }, + { + "epoch": 0.49228445908841295, + "grad_norm": 0.3754223883152008, + "learning_rate": 1.7179248923998065e-05, + "loss": 0.4936, + "step": 17929 + }, + { + "epoch": 0.49231191652937945, + "grad_norm": 0.36460793018341064, + "learning_rate": 1.717894826741637e-05, + "loss": 0.532, + "step": 17930 + }, + { + "epoch": 0.49233937397034594, + "grad_norm": 0.36904463171958923, + "learning_rate": 1.717864759744364e-05, + "loss": 0.5395, + "step": 17931 + }, + { + "epoch": 0.4923668314113125, + "grad_norm": 0.4602816700935364, + "learning_rate": 1.717834691408044e-05, + "loss": 0.5378, + "step": 17932 + }, + { + "epoch": 0.492394288852279, + "grad_norm": 0.3438127040863037, + "learning_rate": 1.7178046217327336e-05, + "loss": 0.4993, + "step": 17933 + }, + { + "epoch": 0.4924217462932455, + "grad_norm": 0.41845324635505676, + "learning_rate": 1.7177745507184883e-05, + "loss": 0.4536, + "step": 17934 + }, + { + "epoch": 0.49244920373421197, + "grad_norm": 0.3562749922275543, + "learning_rate": 1.7177444783653642e-05, + "loss": 0.4707, + "step": 17935 + }, + { + "epoch": 0.49247666117517847, + "grad_norm": 0.38490015268325806, + "learning_rate": 1.7177144046734178e-05, + "loss": 0.5615, + "step": 17936 + }, + { + "epoch": 0.49250411861614496, + "grad_norm": 0.3355584740638733, + "learning_rate": 1.717684329642705e-05, + "loss": 0.5449, + "step": 17937 + }, + { + "epoch": 0.49253157605711145, + "grad_norm": 0.3565799295902252, + "learning_rate": 1.7176542532732816e-05, + "loss": 0.5075, + "step": 17938 + }, + { + "epoch": 0.492559033498078, + "grad_norm": 0.4044046401977539, + "learning_rate": 1.717624175565204e-05, + "loss": 0.5568, + "step": 17939 + }, + { + "epoch": 0.4925864909390445, + "grad_norm": 0.41660937666893005, + "learning_rate": 1.7175940965185284e-05, + "loss": 0.586, + "step": 17940 + }, + { + "epoch": 0.492613948380011, + "grad_norm": 0.3532661199569702, + "learning_rate": 1.717564016133311e-05, + "loss": 0.4902, + "step": 17941 + }, + { + "epoch": 0.4926414058209775, + "grad_norm": 0.4710400104522705, + "learning_rate": 1.7175339344096074e-05, + "loss": 0.4162, + "step": 17942 + }, + { + "epoch": 0.492668863261944, + "grad_norm": 0.40276315808296204, + "learning_rate": 1.7175038513474742e-05, + "loss": 0.5906, + "step": 17943 + }, + { + "epoch": 0.4926963207029105, + "grad_norm": 0.43990251421928406, + "learning_rate": 1.717473766946967e-05, + "loss": 0.5918, + "step": 17944 + }, + { + "epoch": 0.49272377814387697, + "grad_norm": 0.3555658757686615, + "learning_rate": 1.717443681208142e-05, + "loss": 0.5193, + "step": 17945 + }, + { + "epoch": 0.4927512355848435, + "grad_norm": 0.7733058333396912, + "learning_rate": 1.7174135941310565e-05, + "loss": 0.562, + "step": 17946 + }, + { + "epoch": 0.49277869302581, + "grad_norm": 0.40725457668304443, + "learning_rate": 1.717383505715765e-05, + "loss": 0.5227, + "step": 17947 + }, + { + "epoch": 0.4928061504667765, + "grad_norm": 0.33708226680755615, + "learning_rate": 1.7173534159623248e-05, + "loss": 0.5235, + "step": 17948 + }, + { + "epoch": 0.492833607907743, + "grad_norm": 0.3559558689594269, + "learning_rate": 1.7173233248707908e-05, + "loss": 0.4507, + "step": 17949 + }, + { + "epoch": 0.4928610653487095, + "grad_norm": 0.3661240041255951, + "learning_rate": 1.7172932324412203e-05, + "loss": 0.5816, + "step": 17950 + }, + { + "epoch": 0.492888522789676, + "grad_norm": 0.34928640723228455, + "learning_rate": 1.717263138673669e-05, + "loss": 0.6052, + "step": 17951 + }, + { + "epoch": 0.4929159802306425, + "grad_norm": 0.4056544303894043, + "learning_rate": 1.717233043568193e-05, + "loss": 0.521, + "step": 17952 + }, + { + "epoch": 0.49294343767160903, + "grad_norm": 0.35231298208236694, + "learning_rate": 1.7172029471248485e-05, + "loss": 0.5052, + "step": 17953 + }, + { + "epoch": 0.4929708951125755, + "grad_norm": 0.36980488896369934, + "learning_rate": 1.7171728493436912e-05, + "loss": 0.4648, + "step": 17954 + }, + { + "epoch": 0.492998352553542, + "grad_norm": 0.35177719593048096, + "learning_rate": 1.717142750224778e-05, + "loss": 0.4914, + "step": 17955 + }, + { + "epoch": 0.4930258099945085, + "grad_norm": 0.391419380903244, + "learning_rate": 1.717112649768165e-05, + "loss": 0.5274, + "step": 17956 + }, + { + "epoch": 0.493053267435475, + "grad_norm": 0.4985537528991699, + "learning_rate": 1.717082547973907e-05, + "loss": 0.508, + "step": 17957 + }, + { + "epoch": 0.4930807248764415, + "grad_norm": 0.40397703647613525, + "learning_rate": 1.717052444842062e-05, + "loss": 0.471, + "step": 17958 + }, + { + "epoch": 0.493108182317408, + "grad_norm": 0.3993542790412903, + "learning_rate": 1.7170223403726853e-05, + "loss": 0.4813, + "step": 17959 + }, + { + "epoch": 0.49313563975837454, + "grad_norm": 0.3657904863357544, + "learning_rate": 1.716992234565833e-05, + "loss": 0.5423, + "step": 17960 + }, + { + "epoch": 0.49316309719934104, + "grad_norm": 0.3577600419521332, + "learning_rate": 1.7169621274215614e-05, + "loss": 0.4443, + "step": 17961 + }, + { + "epoch": 0.49319055464030753, + "grad_norm": 0.37999626994132996, + "learning_rate": 1.7169320189399267e-05, + "loss": 0.5793, + "step": 17962 + }, + { + "epoch": 0.493218012081274, + "grad_norm": 0.3497711420059204, + "learning_rate": 1.7169019091209846e-05, + "loss": 0.4359, + "step": 17963 + }, + { + "epoch": 0.4932454695222405, + "grad_norm": 0.3446301221847534, + "learning_rate": 1.716871797964792e-05, + "loss": 0.4769, + "step": 17964 + }, + { + "epoch": 0.493272926963207, + "grad_norm": 0.7012301683425903, + "learning_rate": 1.7168416854714042e-05, + "loss": 0.4696, + "step": 17965 + }, + { + "epoch": 0.4933003844041735, + "grad_norm": 0.4118887782096863, + "learning_rate": 1.716811571640878e-05, + "loss": 0.5541, + "step": 17966 + }, + { + "epoch": 0.49332784184514006, + "grad_norm": 0.40469327569007874, + "learning_rate": 1.71678145647327e-05, + "loss": 0.6024, + "step": 17967 + }, + { + "epoch": 0.49335529928610655, + "grad_norm": 0.40736010670661926, + "learning_rate": 1.7167513399686354e-05, + "loss": 0.5511, + "step": 17968 + }, + { + "epoch": 0.49338275672707305, + "grad_norm": 0.3747415542602539, + "learning_rate": 1.716721222127031e-05, + "loss": 0.6216, + "step": 17969 + }, + { + "epoch": 0.49341021416803954, + "grad_norm": 0.3698968291282654, + "learning_rate": 1.7166911029485122e-05, + "loss": 0.5185, + "step": 17970 + }, + { + "epoch": 0.49343767160900603, + "grad_norm": 0.38291239738464355, + "learning_rate": 1.716660982433136e-05, + "loss": 0.5424, + "step": 17971 + }, + { + "epoch": 0.49346512904997253, + "grad_norm": 0.3592331111431122, + "learning_rate": 1.7166308605809586e-05, + "loss": 0.5174, + "step": 17972 + }, + { + "epoch": 0.493492586490939, + "grad_norm": 0.5112324953079224, + "learning_rate": 1.716600737392036e-05, + "loss": 0.4069, + "step": 17973 + }, + { + "epoch": 0.49352004393190557, + "grad_norm": 0.5105748772621155, + "learning_rate": 1.7165706128664237e-05, + "loss": 0.5715, + "step": 17974 + }, + { + "epoch": 0.49354750137287207, + "grad_norm": 0.5488637089729309, + "learning_rate": 1.7165404870041792e-05, + "loss": 0.5079, + "step": 17975 + }, + { + "epoch": 0.49357495881383856, + "grad_norm": 0.3652249872684479, + "learning_rate": 1.7165103598053576e-05, + "loss": 0.5069, + "step": 17976 + }, + { + "epoch": 0.49360241625480505, + "grad_norm": 0.3680688440799713, + "learning_rate": 1.7164802312700157e-05, + "loss": 0.4701, + "step": 17977 + }, + { + "epoch": 0.49362987369577155, + "grad_norm": 0.35927268862724304, + "learning_rate": 1.7164501013982096e-05, + "loss": 0.4324, + "step": 17978 + }, + { + "epoch": 0.49365733113673804, + "grad_norm": 0.3858721852302551, + "learning_rate": 1.7164199701899952e-05, + "loss": 0.5049, + "step": 17979 + }, + { + "epoch": 0.49368478857770454, + "grad_norm": 0.32894790172576904, + "learning_rate": 1.7163898376454288e-05, + "loss": 0.4862, + "step": 17980 + }, + { + "epoch": 0.4937122460186711, + "grad_norm": 0.4254773259162903, + "learning_rate": 1.7163597037645666e-05, + "loss": 0.4804, + "step": 17981 + }, + { + "epoch": 0.4937397034596376, + "grad_norm": 0.4106791615486145, + "learning_rate": 1.7163295685474652e-05, + "loss": 0.4613, + "step": 17982 + }, + { + "epoch": 0.4937671609006041, + "grad_norm": 0.5800666809082031, + "learning_rate": 1.7162994319941805e-05, + "loss": 0.5556, + "step": 17983 + }, + { + "epoch": 0.49379461834157057, + "grad_norm": 0.4093939960002899, + "learning_rate": 1.7162692941047688e-05, + "loss": 0.5835, + "step": 17984 + }, + { + "epoch": 0.49382207578253706, + "grad_norm": 0.4323198199272156, + "learning_rate": 1.7162391548792862e-05, + "loss": 0.5259, + "step": 17985 + }, + { + "epoch": 0.49384953322350356, + "grad_norm": 0.34572163224220276, + "learning_rate": 1.716209014317789e-05, + "loss": 0.4967, + "step": 17986 + }, + { + "epoch": 0.49387699066447005, + "grad_norm": 0.39716726541519165, + "learning_rate": 1.7161788724203334e-05, + "loss": 0.5343, + "step": 17987 + }, + { + "epoch": 0.4939044481054366, + "grad_norm": 0.3746080696582794, + "learning_rate": 1.7161487291869755e-05, + "loss": 0.501, + "step": 17988 + }, + { + "epoch": 0.4939319055464031, + "grad_norm": 0.3894653022289276, + "learning_rate": 1.7161185846177715e-05, + "loss": 0.4788, + "step": 17989 + }, + { + "epoch": 0.4939593629873696, + "grad_norm": 0.33428505063056946, + "learning_rate": 1.7160884387127782e-05, + "loss": 0.4437, + "step": 17990 + }, + { + "epoch": 0.4939868204283361, + "grad_norm": 0.35403043031692505, + "learning_rate": 1.716058291472051e-05, + "loss": 0.4946, + "step": 17991 + }, + { + "epoch": 0.4940142778693026, + "grad_norm": 0.4354105591773987, + "learning_rate": 1.716028142895647e-05, + "loss": 0.4935, + "step": 17992 + }, + { + "epoch": 0.49404173531026907, + "grad_norm": 0.3614565134048462, + "learning_rate": 1.7159979929836216e-05, + "loss": 0.5861, + "step": 17993 + }, + { + "epoch": 0.49406919275123556, + "grad_norm": 0.45869335532188416, + "learning_rate": 1.7159678417360317e-05, + "loss": 0.4983, + "step": 17994 + }, + { + "epoch": 0.4940966501922021, + "grad_norm": 0.3388749957084656, + "learning_rate": 1.7159376891529328e-05, + "loss": 0.4993, + "step": 17995 + }, + { + "epoch": 0.4941241076331686, + "grad_norm": 0.32819220423698425, + "learning_rate": 1.7159075352343824e-05, + "loss": 0.4836, + "step": 17996 + }, + { + "epoch": 0.4941515650741351, + "grad_norm": 0.34097370505332947, + "learning_rate": 1.715877379980435e-05, + "loss": 0.4151, + "step": 17997 + }, + { + "epoch": 0.4941790225151016, + "grad_norm": 0.39025625586509705, + "learning_rate": 1.7158472233911484e-05, + "loss": 0.5564, + "step": 17998 + }, + { + "epoch": 0.4942064799560681, + "grad_norm": 0.42084693908691406, + "learning_rate": 1.7158170654665784e-05, + "loss": 0.4823, + "step": 17999 + }, + { + "epoch": 0.4942339373970346, + "grad_norm": 0.3919021189212799, + "learning_rate": 1.715786906206781e-05, + "loss": 0.4491, + "step": 18000 + }, + { + "epoch": 0.4942613948380011, + "grad_norm": 0.4029986560344696, + "learning_rate": 1.7157567456118124e-05, + "loss": 0.6284, + "step": 18001 + }, + { + "epoch": 0.4942888522789676, + "grad_norm": 0.48697495460510254, + "learning_rate": 1.715726583681729e-05, + "loss": 0.5612, + "step": 18002 + }, + { + "epoch": 0.4943163097199341, + "grad_norm": 0.44692298769950867, + "learning_rate": 1.715696420416587e-05, + "loss": 0.5549, + "step": 18003 + }, + { + "epoch": 0.4943437671609006, + "grad_norm": 0.42928484082221985, + "learning_rate": 1.715666255816443e-05, + "loss": 0.4872, + "step": 18004 + }, + { + "epoch": 0.4943712246018671, + "grad_norm": 0.3850124478340149, + "learning_rate": 1.715636089881353e-05, + "loss": 0.4985, + "step": 18005 + }, + { + "epoch": 0.4943986820428336, + "grad_norm": 0.33896538615226746, + "learning_rate": 1.7156059226113732e-05, + "loss": 0.5162, + "step": 18006 + }, + { + "epoch": 0.4944261394838001, + "grad_norm": 0.3595883250236511, + "learning_rate": 1.71557575400656e-05, + "loss": 0.4516, + "step": 18007 + }, + { + "epoch": 0.4944535969247666, + "grad_norm": 0.37072330713272095, + "learning_rate": 1.7155455840669696e-05, + "loss": 0.4496, + "step": 18008 + }, + { + "epoch": 0.49448105436573314, + "grad_norm": 0.8181400299072266, + "learning_rate": 1.7155154127926582e-05, + "loss": 0.4257, + "step": 18009 + }, + { + "epoch": 0.49450851180669964, + "grad_norm": 0.41068658232688904, + "learning_rate": 1.715485240183682e-05, + "loss": 0.4534, + "step": 18010 + }, + { + "epoch": 0.49453596924766613, + "grad_norm": 0.5140672922134399, + "learning_rate": 1.7154550662400978e-05, + "loss": 0.5455, + "step": 18011 + }, + { + "epoch": 0.4945634266886326, + "grad_norm": 0.3912648558616638, + "learning_rate": 1.7154248909619616e-05, + "loss": 0.568, + "step": 18012 + }, + { + "epoch": 0.4945908841295991, + "grad_norm": 0.3619721233844757, + "learning_rate": 1.7153947143493297e-05, + "loss": 0.5626, + "step": 18013 + }, + { + "epoch": 0.4946183415705656, + "grad_norm": 0.3864416182041168, + "learning_rate": 1.715364536402258e-05, + "loss": 0.4842, + "step": 18014 + }, + { + "epoch": 0.4946457990115321, + "grad_norm": 0.3768928647041321, + "learning_rate": 1.7153343571208037e-05, + "loss": 0.5151, + "step": 18015 + }, + { + "epoch": 0.4946732564524986, + "grad_norm": 0.3891160488128662, + "learning_rate": 1.715304176505022e-05, + "loss": 0.5483, + "step": 18016 + }, + { + "epoch": 0.49470071389346515, + "grad_norm": 0.3617023527622223, + "learning_rate": 1.71527399455497e-05, + "loss": 0.4773, + "step": 18017 + }, + { + "epoch": 0.49472817133443164, + "grad_norm": 0.3538110852241516, + "learning_rate": 1.7152438112707033e-05, + "loss": 0.4997, + "step": 18018 + }, + { + "epoch": 0.49475562877539814, + "grad_norm": 0.34884747862815857, + "learning_rate": 1.715213626652279e-05, + "loss": 0.4793, + "step": 18019 + }, + { + "epoch": 0.49478308621636463, + "grad_norm": 0.3991345763206482, + "learning_rate": 1.715183440699753e-05, + "loss": 0.4694, + "step": 18020 + }, + { + "epoch": 0.4948105436573311, + "grad_norm": 0.3554777204990387, + "learning_rate": 1.715153253413181e-05, + "loss": 0.5, + "step": 18021 + }, + { + "epoch": 0.4948380010982976, + "grad_norm": 0.37285226583480835, + "learning_rate": 1.715123064792621e-05, + "loss": 0.5002, + "step": 18022 + }, + { + "epoch": 0.4948654585392641, + "grad_norm": 0.3851296007633209, + "learning_rate": 1.7150928748381275e-05, + "loss": 0.5208, + "step": 18023 + }, + { + "epoch": 0.49489291598023066, + "grad_norm": 0.3638288676738739, + "learning_rate": 1.7150626835497577e-05, + "loss": 0.4052, + "step": 18024 + }, + { + "epoch": 0.49492037342119716, + "grad_norm": 0.33369502425193787, + "learning_rate": 1.715032490927568e-05, + "loss": 0.4928, + "step": 18025 + }, + { + "epoch": 0.49494783086216365, + "grad_norm": 0.3929636478424072, + "learning_rate": 1.7150022969716143e-05, + "loss": 0.4684, + "step": 18026 + }, + { + "epoch": 0.49497528830313015, + "grad_norm": 0.36121219396591187, + "learning_rate": 1.714972101681953e-05, + "loss": 0.5708, + "step": 18027 + }, + { + "epoch": 0.49500274574409664, + "grad_norm": 0.36061331629753113, + "learning_rate": 1.7149419050586408e-05, + "loss": 0.4898, + "step": 18028 + }, + { + "epoch": 0.49503020318506313, + "grad_norm": 0.3356732428073883, + "learning_rate": 1.7149117071017338e-05, + "loss": 0.3656, + "step": 18029 + }, + { + "epoch": 0.4950576606260296, + "grad_norm": 0.3659035861492157, + "learning_rate": 1.714881507811288e-05, + "loss": 0.5126, + "step": 18030 + }, + { + "epoch": 0.4950851180669962, + "grad_norm": 0.39169132709503174, + "learning_rate": 1.71485130718736e-05, + "loss": 0.5724, + "step": 18031 + }, + { + "epoch": 0.49511257550796267, + "grad_norm": 0.3460921347141266, + "learning_rate": 1.7148211052300063e-05, + "loss": 0.4246, + "step": 18032 + }, + { + "epoch": 0.49514003294892917, + "grad_norm": 0.35078978538513184, + "learning_rate": 1.714790901939283e-05, + "loss": 0.5396, + "step": 18033 + }, + { + "epoch": 0.49516749038989566, + "grad_norm": 0.34466609358787537, + "learning_rate": 1.7147606973152465e-05, + "loss": 0.4826, + "step": 18034 + }, + { + "epoch": 0.49519494783086215, + "grad_norm": 0.3673054575920105, + "learning_rate": 1.7147304913579534e-05, + "loss": 0.5652, + "step": 18035 + }, + { + "epoch": 0.49522240527182865, + "grad_norm": 0.3689931035041809, + "learning_rate": 1.71470028406746e-05, + "loss": 0.5129, + "step": 18036 + }, + { + "epoch": 0.49524986271279514, + "grad_norm": 0.35773035883903503, + "learning_rate": 1.714670075443822e-05, + "loss": 0.4482, + "step": 18037 + }, + { + "epoch": 0.4952773201537617, + "grad_norm": 0.35809704661369324, + "learning_rate": 1.7146398654870964e-05, + "loss": 0.4814, + "step": 18038 + }, + { + "epoch": 0.4953047775947282, + "grad_norm": 0.38772737979888916, + "learning_rate": 1.7146096541973395e-05, + "loss": 0.5132, + "step": 18039 + }, + { + "epoch": 0.4953322350356947, + "grad_norm": 0.3958050310611725, + "learning_rate": 1.7145794415746073e-05, + "loss": 0.5531, + "step": 18040 + }, + { + "epoch": 0.4953596924766612, + "grad_norm": 0.3745381236076355, + "learning_rate": 1.7145492276189565e-05, + "loss": 0.4381, + "step": 18041 + }, + { + "epoch": 0.49538714991762767, + "grad_norm": 0.3577905297279358, + "learning_rate": 1.7145190123304432e-05, + "loss": 0.4597, + "step": 18042 + }, + { + "epoch": 0.49541460735859416, + "grad_norm": 0.42759400606155396, + "learning_rate": 1.714488795709124e-05, + "loss": 0.579, + "step": 18043 + }, + { + "epoch": 0.49544206479956066, + "grad_norm": 0.3459891378879547, + "learning_rate": 1.714458577755055e-05, + "loss": 0.5199, + "step": 18044 + }, + { + "epoch": 0.4954695222405272, + "grad_norm": 0.36716020107269287, + "learning_rate": 1.7144283584682928e-05, + "loss": 0.4999, + "step": 18045 + }, + { + "epoch": 0.4954969796814937, + "grad_norm": 0.4022444188594818, + "learning_rate": 1.714398137848894e-05, + "loss": 0.4998, + "step": 18046 + }, + { + "epoch": 0.4955244371224602, + "grad_norm": 0.34617486596107483, + "learning_rate": 1.7143679158969144e-05, + "loss": 0.4736, + "step": 18047 + }, + { + "epoch": 0.4955518945634267, + "grad_norm": 0.3869198262691498, + "learning_rate": 1.7143376926124107e-05, + "loss": 0.499, + "step": 18048 + }, + { + "epoch": 0.4955793520043932, + "grad_norm": 0.37870630621910095, + "learning_rate": 1.714307467995439e-05, + "loss": 0.4988, + "step": 18049 + }, + { + "epoch": 0.4956068094453597, + "grad_norm": 0.3864934742450714, + "learning_rate": 1.714277242046056e-05, + "loss": 0.4675, + "step": 18050 + }, + { + "epoch": 0.49563426688632617, + "grad_norm": 0.35694417357444763, + "learning_rate": 1.714247014764318e-05, + "loss": 0.4959, + "step": 18051 + }, + { + "epoch": 0.4956617243272927, + "grad_norm": 0.3551012873649597, + "learning_rate": 1.7142167861502814e-05, + "loss": 0.5176, + "step": 18052 + }, + { + "epoch": 0.4956891817682592, + "grad_norm": 0.3825741112232208, + "learning_rate": 1.714186556204003e-05, + "loss": 0.4916, + "step": 18053 + }, + { + "epoch": 0.4957166392092257, + "grad_norm": 0.37723904848098755, + "learning_rate": 1.7141563249255377e-05, + "loss": 0.4727, + "step": 18054 + }, + { + "epoch": 0.4957440966501922, + "grad_norm": 0.3839629590511322, + "learning_rate": 1.7141260923149437e-05, + "loss": 0.4692, + "step": 18055 + }, + { + "epoch": 0.4957715540911587, + "grad_norm": 0.4407203495502472, + "learning_rate": 1.7140958583722763e-05, + "loss": 0.4978, + "step": 18056 + }, + { + "epoch": 0.4957990115321252, + "grad_norm": 0.6031894683837891, + "learning_rate": 1.7140656230975927e-05, + "loss": 0.4283, + "step": 18057 + }, + { + "epoch": 0.4958264689730917, + "grad_norm": 0.334059476852417, + "learning_rate": 1.714035386490948e-05, + "loss": 0.4806, + "step": 18058 + }, + { + "epoch": 0.49585392641405823, + "grad_norm": 0.38559871912002563, + "learning_rate": 1.7140051485524e-05, + "loss": 0.5556, + "step": 18059 + }, + { + "epoch": 0.4958813838550247, + "grad_norm": 0.44070833921432495, + "learning_rate": 1.713974909282004e-05, + "loss": 0.5922, + "step": 18060 + }, + { + "epoch": 0.4959088412959912, + "grad_norm": 0.39782440662384033, + "learning_rate": 1.7139446686798176e-05, + "loss": 0.4515, + "step": 18061 + }, + { + "epoch": 0.4959362987369577, + "grad_norm": 0.39064180850982666, + "learning_rate": 1.713914426745896e-05, + "loss": 0.5619, + "step": 18062 + }, + { + "epoch": 0.4959637561779242, + "grad_norm": 0.4381251931190491, + "learning_rate": 1.7138841834802965e-05, + "loss": 0.4434, + "step": 18063 + }, + { + "epoch": 0.4959912136188907, + "grad_norm": 0.321460485458374, + "learning_rate": 1.7138539388830748e-05, + "loss": 0.4483, + "step": 18064 + }, + { + "epoch": 0.4960186710598572, + "grad_norm": 0.36805370450019836, + "learning_rate": 1.713823692954288e-05, + "loss": 0.5595, + "step": 18065 + }, + { + "epoch": 0.49604612850082375, + "grad_norm": 0.358269602060318, + "learning_rate": 1.713793445693992e-05, + "loss": 0.4391, + "step": 18066 + }, + { + "epoch": 0.49607358594179024, + "grad_norm": 0.35984277725219727, + "learning_rate": 1.7137631971022435e-05, + "loss": 0.4822, + "step": 18067 + }, + { + "epoch": 0.49610104338275673, + "grad_norm": 0.350449800491333, + "learning_rate": 1.7137329471790987e-05, + "loss": 0.4933, + "step": 18068 + }, + { + "epoch": 0.49612850082372323, + "grad_norm": 0.40611281991004944, + "learning_rate": 1.713702695924614e-05, + "loss": 0.5037, + "step": 18069 + }, + { + "epoch": 0.4961559582646897, + "grad_norm": 0.3518425226211548, + "learning_rate": 1.7136724433388467e-05, + "loss": 0.4511, + "step": 18070 + }, + { + "epoch": 0.4961834157056562, + "grad_norm": 0.3978702127933502, + "learning_rate": 1.713642189421852e-05, + "loss": 0.4753, + "step": 18071 + }, + { + "epoch": 0.4962108731466227, + "grad_norm": 0.3420800566673279, + "learning_rate": 1.7136119341736868e-05, + "loss": 0.414, + "step": 18072 + }, + { + "epoch": 0.49623833058758926, + "grad_norm": 0.31877195835113525, + "learning_rate": 1.7135816775944074e-05, + "loss": 0.4297, + "step": 18073 + }, + { + "epoch": 0.49626578802855575, + "grad_norm": 1.425424575805664, + "learning_rate": 1.7135514196840706e-05, + "loss": 0.4955, + "step": 18074 + }, + { + "epoch": 0.49629324546952225, + "grad_norm": 0.3340473771095276, + "learning_rate": 1.713521160442733e-05, + "loss": 0.5199, + "step": 18075 + }, + { + "epoch": 0.49632070291048874, + "grad_norm": 0.3660655915737152, + "learning_rate": 1.7134908998704506e-05, + "loss": 0.5354, + "step": 18076 + }, + { + "epoch": 0.49634816035145524, + "grad_norm": 0.35943353176116943, + "learning_rate": 1.7134606379672795e-05, + "loss": 0.4962, + "step": 18077 + }, + { + "epoch": 0.49637561779242173, + "grad_norm": 0.3563924729824066, + "learning_rate": 1.713430374733277e-05, + "loss": 0.5419, + "step": 18078 + }, + { + "epoch": 0.4964030752333882, + "grad_norm": 0.3076569437980652, + "learning_rate": 1.713400110168499e-05, + "loss": 0.4523, + "step": 18079 + }, + { + "epoch": 0.4964305326743548, + "grad_norm": 0.4074445962905884, + "learning_rate": 1.7133698442730023e-05, + "loss": 0.5238, + "step": 18080 + }, + { + "epoch": 0.49645799011532127, + "grad_norm": 0.3895065486431122, + "learning_rate": 1.713339577046843e-05, + "loss": 0.5606, + "step": 18081 + }, + { + "epoch": 0.49648544755628776, + "grad_norm": 0.3602030575275421, + "learning_rate": 1.7133093084900777e-05, + "loss": 0.5084, + "step": 18082 + }, + { + "epoch": 0.49651290499725426, + "grad_norm": 0.40063247084617615, + "learning_rate": 1.7132790386027628e-05, + "loss": 0.4587, + "step": 18083 + }, + { + "epoch": 0.49654036243822075, + "grad_norm": 0.4001257121562958, + "learning_rate": 1.713248767384955e-05, + "loss": 0.4913, + "step": 18084 + }, + { + "epoch": 0.49656781987918724, + "grad_norm": 1.5800052881240845, + "learning_rate": 1.713218494836711e-05, + "loss": 0.5936, + "step": 18085 + }, + { + "epoch": 0.49659527732015374, + "grad_norm": 0.35585376620292664, + "learning_rate": 1.7131882209580863e-05, + "loss": 0.5106, + "step": 18086 + }, + { + "epoch": 0.4966227347611203, + "grad_norm": 0.3544674813747406, + "learning_rate": 1.7131579457491378e-05, + "loss": 0.559, + "step": 18087 + }, + { + "epoch": 0.4966501922020868, + "grad_norm": 0.3898366689682007, + "learning_rate": 1.7131276692099224e-05, + "loss": 0.4706, + "step": 18088 + }, + { + "epoch": 0.4966776496430533, + "grad_norm": 0.43339136242866516, + "learning_rate": 1.7130973913404964e-05, + "loss": 0.5251, + "step": 18089 + }, + { + "epoch": 0.49670510708401977, + "grad_norm": 0.37210848927497864, + "learning_rate": 1.713067112140916e-05, + "loss": 0.5752, + "step": 18090 + }, + { + "epoch": 0.49673256452498626, + "grad_norm": 0.3562559187412262, + "learning_rate": 1.713036831611238e-05, + "loss": 0.541, + "step": 18091 + }, + { + "epoch": 0.49676002196595276, + "grad_norm": 0.3666997253894806, + "learning_rate": 1.7130065497515188e-05, + "loss": 0.5319, + "step": 18092 + }, + { + "epoch": 0.49678747940691925, + "grad_norm": 0.3696627616882324, + "learning_rate": 1.7129762665618144e-05, + "loss": 0.5675, + "step": 18093 + }, + { + "epoch": 0.4968149368478858, + "grad_norm": 0.36546790599823, + "learning_rate": 1.7129459820421822e-05, + "loss": 0.4403, + "step": 18094 + }, + { + "epoch": 0.4968423942888523, + "grad_norm": 0.4002673029899597, + "learning_rate": 1.7129156961926778e-05, + "loss": 0.4591, + "step": 18095 + }, + { + "epoch": 0.4968698517298188, + "grad_norm": 0.38140302896499634, + "learning_rate": 1.7128854090133584e-05, + "loss": 0.6107, + "step": 18096 + }, + { + "epoch": 0.4968973091707853, + "grad_norm": 0.43380630016326904, + "learning_rate": 1.7128551205042797e-05, + "loss": 0.6419, + "step": 18097 + }, + { + "epoch": 0.4969247666117518, + "grad_norm": 0.3346729576587677, + "learning_rate": 1.7128248306654995e-05, + "loss": 0.5199, + "step": 18098 + }, + { + "epoch": 0.49695222405271827, + "grad_norm": 0.47727271914482117, + "learning_rate": 1.7127945394970727e-05, + "loss": 0.564, + "step": 18099 + }, + { + "epoch": 0.49697968149368477, + "grad_norm": 0.39138948917388916, + "learning_rate": 1.712764246999057e-05, + "loss": 0.6505, + "step": 18100 + }, + { + "epoch": 0.4970071389346513, + "grad_norm": 0.36027175188064575, + "learning_rate": 1.7127339531715085e-05, + "loss": 0.5194, + "step": 18101 + }, + { + "epoch": 0.4970345963756178, + "grad_norm": 0.3931269645690918, + "learning_rate": 1.7127036580144835e-05, + "loss": 0.5443, + "step": 18102 + }, + { + "epoch": 0.4970620538165843, + "grad_norm": 0.4084882438182831, + "learning_rate": 1.712673361528039e-05, + "loss": 0.405, + "step": 18103 + }, + { + "epoch": 0.4970895112575508, + "grad_norm": 0.45017755031585693, + "learning_rate": 1.7126430637122307e-05, + "loss": 0.467, + "step": 18104 + }, + { + "epoch": 0.4971169686985173, + "grad_norm": 0.4806169867515564, + "learning_rate": 1.712612764567116e-05, + "loss": 0.5294, + "step": 18105 + }, + { + "epoch": 0.4971444261394838, + "grad_norm": 0.9122720956802368, + "learning_rate": 1.712582464092751e-05, + "loss": 0.5502, + "step": 18106 + }, + { + "epoch": 0.4971718835804503, + "grad_norm": 0.35052505135536194, + "learning_rate": 1.7125521622891924e-05, + "loss": 0.4327, + "step": 18107 + }, + { + "epoch": 0.49719934102141683, + "grad_norm": 0.3698379695415497, + "learning_rate": 1.7125218591564964e-05, + "loss": 0.5529, + "step": 18108 + }, + { + "epoch": 0.4972267984623833, + "grad_norm": 0.4450494349002838, + "learning_rate": 1.71249155469472e-05, + "loss": 0.4653, + "step": 18109 + }, + { + "epoch": 0.4972542559033498, + "grad_norm": 0.3529316782951355, + "learning_rate": 1.7124612489039193e-05, + "loss": 0.4957, + "step": 18110 + }, + { + "epoch": 0.4972817133443163, + "grad_norm": 0.3796810805797577, + "learning_rate": 1.7124309417841508e-05, + "loss": 0.5098, + "step": 18111 + }, + { + "epoch": 0.4973091707852828, + "grad_norm": 0.3867679238319397, + "learning_rate": 1.7124006333354716e-05, + "loss": 0.4491, + "step": 18112 + }, + { + "epoch": 0.4973366282262493, + "grad_norm": 0.3879712224006653, + "learning_rate": 1.7123703235579373e-05, + "loss": 0.4744, + "step": 18113 + }, + { + "epoch": 0.4973640856672158, + "grad_norm": 0.4261866807937622, + "learning_rate": 1.7123400124516055e-05, + "loss": 0.565, + "step": 18114 + }, + { + "epoch": 0.49739154310818234, + "grad_norm": 0.3776656687259674, + "learning_rate": 1.7123097000165318e-05, + "loss": 0.4791, + "step": 18115 + }, + { + "epoch": 0.49741900054914884, + "grad_norm": 0.392581969499588, + "learning_rate": 1.7122793862527737e-05, + "loss": 0.408, + "step": 18116 + }, + { + "epoch": 0.49744645799011533, + "grad_norm": 0.35223355889320374, + "learning_rate": 1.7122490711603865e-05, + "loss": 0.5375, + "step": 18117 + }, + { + "epoch": 0.4974739154310818, + "grad_norm": 0.42664703726768494, + "learning_rate": 1.712218754739428e-05, + "loss": 0.5554, + "step": 18118 + }, + { + "epoch": 0.4975013728720483, + "grad_norm": 0.3635009527206421, + "learning_rate": 1.712188436989954e-05, + "loss": 0.437, + "step": 18119 + }, + { + "epoch": 0.4975288303130148, + "grad_norm": 0.36371028423309326, + "learning_rate": 1.712158117912021e-05, + "loss": 0.467, + "step": 18120 + }, + { + "epoch": 0.4975562877539813, + "grad_norm": 0.41507041454315186, + "learning_rate": 1.7121277975056863e-05, + "loss": 0.3786, + "step": 18121 + }, + { + "epoch": 0.49758374519494786, + "grad_norm": 0.3716070055961609, + "learning_rate": 1.7120974757710057e-05, + "loss": 0.5227, + "step": 18122 + }, + { + "epoch": 0.49761120263591435, + "grad_norm": 0.442789763212204, + "learning_rate": 1.7120671527080364e-05, + "loss": 0.4972, + "step": 18123 + }, + { + "epoch": 0.49763866007688085, + "grad_norm": 0.3918672204017639, + "learning_rate": 1.7120368283168344e-05, + "loss": 0.5105, + "step": 18124 + }, + { + "epoch": 0.49766611751784734, + "grad_norm": 0.3890630006790161, + "learning_rate": 1.7120065025974563e-05, + "loss": 0.4685, + "step": 18125 + }, + { + "epoch": 0.49769357495881383, + "grad_norm": 0.3442296087741852, + "learning_rate": 1.711976175549959e-05, + "loss": 0.402, + "step": 18126 + }, + { + "epoch": 0.4977210323997803, + "grad_norm": 0.4134598672389984, + "learning_rate": 1.7119458471743988e-05, + "loss": 0.4537, + "step": 18127 + }, + { + "epoch": 0.4977484898407468, + "grad_norm": 0.39220383763313293, + "learning_rate": 1.7119155174708325e-05, + "loss": 0.4202, + "step": 18128 + }, + { + "epoch": 0.49777594728171337, + "grad_norm": 0.4110613763332367, + "learning_rate": 1.711885186439316e-05, + "loss": 0.5621, + "step": 18129 + }, + { + "epoch": 0.49780340472267987, + "grad_norm": 0.383037805557251, + "learning_rate": 1.711854854079907e-05, + "loss": 0.5798, + "step": 18130 + }, + { + "epoch": 0.49783086216364636, + "grad_norm": 0.3736554980278015, + "learning_rate": 1.7118245203926616e-05, + "loss": 0.5163, + "step": 18131 + }, + { + "epoch": 0.49785831960461285, + "grad_norm": 0.36420005559921265, + "learning_rate": 1.711794185377636e-05, + "loss": 0.4981, + "step": 18132 + }, + { + "epoch": 0.49788577704557935, + "grad_norm": 0.38444778323173523, + "learning_rate": 1.7117638490348868e-05, + "loss": 0.533, + "step": 18133 + }, + { + "epoch": 0.49791323448654584, + "grad_norm": 0.33724531531333923, + "learning_rate": 1.7117335113644712e-05, + "loss": 0.508, + "step": 18134 + }, + { + "epoch": 0.49794069192751234, + "grad_norm": 0.4041788876056671, + "learning_rate": 1.7117031723664456e-05, + "loss": 0.6482, + "step": 18135 + }, + { + "epoch": 0.4979681493684789, + "grad_norm": 0.38526666164398193, + "learning_rate": 1.7116728320408662e-05, + "loss": 0.5896, + "step": 18136 + }, + { + "epoch": 0.4979956068094454, + "grad_norm": 0.35115107893943787, + "learning_rate": 1.7116424903877898e-05, + "loss": 0.5894, + "step": 18137 + }, + { + "epoch": 0.4980230642504119, + "grad_norm": 0.3842831552028656, + "learning_rate": 1.7116121474072727e-05, + "loss": 0.4847, + "step": 18138 + }, + { + "epoch": 0.49805052169137837, + "grad_norm": 0.4195261299610138, + "learning_rate": 1.7115818030993724e-05, + "loss": 0.4895, + "step": 18139 + }, + { + "epoch": 0.49807797913234486, + "grad_norm": 0.3534254729747772, + "learning_rate": 1.711551457464145e-05, + "loss": 0.4726, + "step": 18140 + }, + { + "epoch": 0.49810543657331136, + "grad_norm": 0.38802364468574524, + "learning_rate": 1.7115211105016465e-05, + "loss": 0.4639, + "step": 18141 + }, + { + "epoch": 0.49813289401427785, + "grad_norm": 0.34438204765319824, + "learning_rate": 1.7114907622119344e-05, + "loss": 0.4647, + "step": 18142 + }, + { + "epoch": 0.4981603514552444, + "grad_norm": 0.4019126296043396, + "learning_rate": 1.7114604125950646e-05, + "loss": 0.5288, + "step": 18143 + }, + { + "epoch": 0.4981878088962109, + "grad_norm": 0.35834503173828125, + "learning_rate": 1.7114300616510945e-05, + "loss": 0.561, + "step": 18144 + }, + { + "epoch": 0.4982152663371774, + "grad_norm": 0.3469700515270233, + "learning_rate": 1.71139970938008e-05, + "loss": 0.4863, + "step": 18145 + }, + { + "epoch": 0.4982427237781439, + "grad_norm": 0.38737839460372925, + "learning_rate": 1.711369355782078e-05, + "loss": 0.5523, + "step": 18146 + }, + { + "epoch": 0.4982701812191104, + "grad_norm": 0.36940619349479675, + "learning_rate": 1.7113390008571452e-05, + "loss": 0.4204, + "step": 18147 + }, + { + "epoch": 0.49829763866007687, + "grad_norm": 0.38066819310188293, + "learning_rate": 1.7113086446053383e-05, + "loss": 0.4734, + "step": 18148 + }, + { + "epoch": 0.49832509610104336, + "grad_norm": 0.3531523048877716, + "learning_rate": 1.7112782870267136e-05, + "loss": 0.5487, + "step": 18149 + }, + { + "epoch": 0.49835255354200986, + "grad_norm": 0.3583870530128479, + "learning_rate": 1.7112479281213277e-05, + "loss": 0.524, + "step": 18150 + }, + { + "epoch": 0.4983800109829764, + "grad_norm": 0.38869017362594604, + "learning_rate": 1.7112175678892374e-05, + "loss": 0.4794, + "step": 18151 + }, + { + "epoch": 0.4984074684239429, + "grad_norm": 0.3680994510650635, + "learning_rate": 1.7111872063304998e-05, + "loss": 0.5377, + "step": 18152 + }, + { + "epoch": 0.4984349258649094, + "grad_norm": 0.3784719407558441, + "learning_rate": 1.7111568434451706e-05, + "loss": 0.5343, + "step": 18153 + }, + { + "epoch": 0.4984623833058759, + "grad_norm": 0.4397129714488983, + "learning_rate": 1.7111264792333072e-05, + "loss": 0.48, + "step": 18154 + }, + { + "epoch": 0.4984898407468424, + "grad_norm": 0.45577865839004517, + "learning_rate": 1.7110961136949655e-05, + "loss": 0.6058, + "step": 18155 + }, + { + "epoch": 0.4985172981878089, + "grad_norm": 1.1771347522735596, + "learning_rate": 1.711065746830203e-05, + "loss": 0.4763, + "step": 18156 + }, + { + "epoch": 0.49854475562877537, + "grad_norm": 2.076005220413208, + "learning_rate": 1.7110353786390758e-05, + "loss": 0.566, + "step": 18157 + }, + { + "epoch": 0.4985722130697419, + "grad_norm": 0.3940882682800293, + "learning_rate": 1.711005009121641e-05, + "loss": 0.5302, + "step": 18158 + }, + { + "epoch": 0.4985996705107084, + "grad_norm": 0.4110874831676483, + "learning_rate": 1.7109746382779545e-05, + "loss": 0.6214, + "step": 18159 + }, + { + "epoch": 0.4986271279516749, + "grad_norm": 0.3741133511066437, + "learning_rate": 1.7109442661080735e-05, + "loss": 0.5446, + "step": 18160 + }, + { + "epoch": 0.4986545853926414, + "grad_norm": 0.3794558048248291, + "learning_rate": 1.7109138926120548e-05, + "loss": 0.5918, + "step": 18161 + }, + { + "epoch": 0.4986820428336079, + "grad_norm": 0.39069801568984985, + "learning_rate": 1.7108835177899545e-05, + "loss": 0.4758, + "step": 18162 + }, + { + "epoch": 0.4987095002745744, + "grad_norm": 0.3896941840648651, + "learning_rate": 1.7108531416418296e-05, + "loss": 0.5588, + "step": 18163 + }, + { + "epoch": 0.4987369577155409, + "grad_norm": 0.35241642594337463, + "learning_rate": 1.7108227641677365e-05, + "loss": 0.4834, + "step": 18164 + }, + { + "epoch": 0.49876441515650743, + "grad_norm": 0.41385412216186523, + "learning_rate": 1.7107923853677327e-05, + "loss": 0.4554, + "step": 18165 + }, + { + "epoch": 0.49879187259747393, + "grad_norm": 0.4130232334136963, + "learning_rate": 1.7107620052418736e-05, + "loss": 0.4958, + "step": 18166 + }, + { + "epoch": 0.4988193300384404, + "grad_norm": 0.3209391236305237, + "learning_rate": 1.7107316237902168e-05, + "loss": 0.3982, + "step": 18167 + }, + { + "epoch": 0.4988467874794069, + "grad_norm": 0.39276057481765747, + "learning_rate": 1.7107012410128186e-05, + "loss": 0.5627, + "step": 18168 + }, + { + "epoch": 0.4988742449203734, + "grad_norm": 0.3911188244819641, + "learning_rate": 1.710670856909736e-05, + "loss": 0.5331, + "step": 18169 + }, + { + "epoch": 0.4989017023613399, + "grad_norm": 0.3907928168773651, + "learning_rate": 1.710640471481025e-05, + "loss": 0.5347, + "step": 18170 + }, + { + "epoch": 0.4989291598023064, + "grad_norm": 0.35195717215538025, + "learning_rate": 1.7106100847267432e-05, + "loss": 0.4662, + "step": 18171 + }, + { + "epoch": 0.49895661724327295, + "grad_norm": 0.5661927461624146, + "learning_rate": 1.7105796966469465e-05, + "loss": 0.572, + "step": 18172 + }, + { + "epoch": 0.49898407468423944, + "grad_norm": 0.3568142056465149, + "learning_rate": 1.710549307241692e-05, + "loss": 0.549, + "step": 18173 + }, + { + "epoch": 0.49901153212520594, + "grad_norm": 0.40312129259109497, + "learning_rate": 1.710518916511036e-05, + "loss": 0.6038, + "step": 18174 + }, + { + "epoch": 0.49903898956617243, + "grad_norm": 0.4097234308719635, + "learning_rate": 1.7104885244550357e-05, + "loss": 0.4919, + "step": 18175 + }, + { + "epoch": 0.4990664470071389, + "grad_norm": 0.4440852105617523, + "learning_rate": 1.7104581310737476e-05, + "loss": 0.57, + "step": 18176 + }, + { + "epoch": 0.4990939044481054, + "grad_norm": 0.38012751936912537, + "learning_rate": 1.710427736367228e-05, + "loss": 0.6219, + "step": 18177 + }, + { + "epoch": 0.4991213618890719, + "grad_norm": 0.34073367714881897, + "learning_rate": 1.710397340335534e-05, + "loss": 0.4911, + "step": 18178 + }, + { + "epoch": 0.49914881933003846, + "grad_norm": 0.36202433705329895, + "learning_rate": 1.7103669429787227e-05, + "loss": 0.532, + "step": 18179 + }, + { + "epoch": 0.49917627677100496, + "grad_norm": 0.3895537853240967, + "learning_rate": 1.7103365442968498e-05, + "loss": 0.5897, + "step": 18180 + }, + { + "epoch": 0.49920373421197145, + "grad_norm": 0.3531620502471924, + "learning_rate": 1.7103061442899727e-05, + "loss": 0.5032, + "step": 18181 + }, + { + "epoch": 0.49923119165293794, + "grad_norm": 0.4048424959182739, + "learning_rate": 1.710275742958148e-05, + "loss": 0.5477, + "step": 18182 + }, + { + "epoch": 0.49925864909390444, + "grad_norm": 0.3806428015232086, + "learning_rate": 1.7102453403014325e-05, + "loss": 0.4504, + "step": 18183 + }, + { + "epoch": 0.49928610653487093, + "grad_norm": 0.3631454408168793, + "learning_rate": 1.7102149363198825e-05, + "loss": 0.4073, + "step": 18184 + }, + { + "epoch": 0.4993135639758374, + "grad_norm": 0.39270731806755066, + "learning_rate": 1.710184531013555e-05, + "loss": 0.5698, + "step": 18185 + }, + { + "epoch": 0.499341021416804, + "grad_norm": 0.36737629771232605, + "learning_rate": 1.710154124382507e-05, + "loss": 0.4557, + "step": 18186 + }, + { + "epoch": 0.49936847885777047, + "grad_norm": 0.40107691287994385, + "learning_rate": 1.710123716426794e-05, + "loss": 0.5576, + "step": 18187 + }, + { + "epoch": 0.49939593629873696, + "grad_norm": 0.3575264513492584, + "learning_rate": 1.7100933071464747e-05, + "loss": 0.5534, + "step": 18188 + }, + { + "epoch": 0.49942339373970346, + "grad_norm": 0.4202025234699249, + "learning_rate": 1.710062896541604e-05, + "loss": 0.5612, + "step": 18189 + }, + { + "epoch": 0.49945085118066995, + "grad_norm": 0.3865579068660736, + "learning_rate": 1.71003248461224e-05, + "loss": 0.5457, + "step": 18190 + }, + { + "epoch": 0.49947830862163645, + "grad_norm": 0.49026867747306824, + "learning_rate": 1.7100020713584384e-05, + "loss": 0.5435, + "step": 18191 + }, + { + "epoch": 0.49950576606260294, + "grad_norm": 0.3526456952095032, + "learning_rate": 1.7099716567802565e-05, + "loss": 0.3906, + "step": 18192 + }, + { + "epoch": 0.4995332235035695, + "grad_norm": 0.3963441252708435, + "learning_rate": 1.7099412408777504e-05, + "loss": 0.525, + "step": 18193 + }, + { + "epoch": 0.499560680944536, + "grad_norm": 0.34657251834869385, + "learning_rate": 1.709910823650978e-05, + "loss": 0.5679, + "step": 18194 + }, + { + "epoch": 0.4995881383855025, + "grad_norm": 0.4281478226184845, + "learning_rate": 1.709880405099995e-05, + "loss": 0.4361, + "step": 18195 + }, + { + "epoch": 0.499615595826469, + "grad_norm": 0.3741914629936218, + "learning_rate": 1.7098499852248586e-05, + "loss": 0.4941, + "step": 18196 + }, + { + "epoch": 0.49964305326743547, + "grad_norm": 0.40962764620780945, + "learning_rate": 1.709819564025625e-05, + "loss": 0.5098, + "step": 18197 + }, + { + "epoch": 0.49967051070840196, + "grad_norm": 0.37838900089263916, + "learning_rate": 1.7097891415023515e-05, + "loss": 0.4555, + "step": 18198 + }, + { + "epoch": 0.49969796814936845, + "grad_norm": 0.3677780032157898, + "learning_rate": 1.7097587176550948e-05, + "loss": 0.4762, + "step": 18199 + }, + { + "epoch": 0.499725425590335, + "grad_norm": 0.3437698483467102, + "learning_rate": 1.709728292483912e-05, + "loss": 0.5154, + "step": 18200 + }, + { + "epoch": 0.4997528830313015, + "grad_norm": 0.4435120224952698, + "learning_rate": 1.709697865988859e-05, + "loss": 0.4561, + "step": 18201 + }, + { + "epoch": 0.499780340472268, + "grad_norm": 0.3807353377342224, + "learning_rate": 1.7096674381699928e-05, + "loss": 0.5293, + "step": 18202 + }, + { + "epoch": 0.4998077979132345, + "grad_norm": 0.37894707918167114, + "learning_rate": 1.7096370090273708e-05, + "loss": 0.5655, + "step": 18203 + }, + { + "epoch": 0.499835255354201, + "grad_norm": 0.3475729823112488, + "learning_rate": 1.709606578561049e-05, + "loss": 0.4888, + "step": 18204 + }, + { + "epoch": 0.4998627127951675, + "grad_norm": 0.3682712912559509, + "learning_rate": 1.7095761467710844e-05, + "loss": 0.5781, + "step": 18205 + }, + { + "epoch": 0.49989017023613397, + "grad_norm": 0.36894920468330383, + "learning_rate": 1.709545713657534e-05, + "loss": 0.4365, + "step": 18206 + }, + { + "epoch": 0.4999176276771005, + "grad_norm": 0.5045278668403625, + "learning_rate": 1.7095152792204542e-05, + "loss": 0.4748, + "step": 18207 + }, + { + "epoch": 0.499945085118067, + "grad_norm": 0.357883483171463, + "learning_rate": 1.709484843459902e-05, + "loss": 0.5728, + "step": 18208 + }, + { + "epoch": 0.4999725425590335, + "grad_norm": 0.3584842085838318, + "learning_rate": 1.7094544063759342e-05, + "loss": 0.5232, + "step": 18209 + }, + { + "epoch": 0.5, + "grad_norm": 0.45562970638275146, + "learning_rate": 1.7094239679686074e-05, + "loss": 0.596, + "step": 18210 + }, + { + "epoch": 0.5000274574409665, + "grad_norm": 0.41492700576782227, + "learning_rate": 1.7093935282379787e-05, + "loss": 0.4968, + "step": 18211 + }, + { + "epoch": 0.500054914881933, + "grad_norm": 0.3725951015949249, + "learning_rate": 1.7093630871841042e-05, + "loss": 0.4976, + "step": 18212 + }, + { + "epoch": 0.5000823723228995, + "grad_norm": 0.3786039650440216, + "learning_rate": 1.7093326448070416e-05, + "loss": 0.4984, + "step": 18213 + }, + { + "epoch": 0.500109829763866, + "grad_norm": 0.43422070145606995, + "learning_rate": 1.709302201106847e-05, + "loss": 0.4695, + "step": 18214 + }, + { + "epoch": 0.5001372872048325, + "grad_norm": 0.3747687339782715, + "learning_rate": 1.7092717560835778e-05, + "loss": 0.4471, + "step": 18215 + }, + { + "epoch": 0.500164744645799, + "grad_norm": 0.3790091276168823, + "learning_rate": 1.7092413097372897e-05, + "loss": 0.4797, + "step": 18216 + }, + { + "epoch": 0.5001922020867655, + "grad_norm": 0.4182693362236023, + "learning_rate": 1.7092108620680408e-05, + "loss": 0.4999, + "step": 18217 + }, + { + "epoch": 0.5002196595277321, + "grad_norm": 0.42537251114845276, + "learning_rate": 1.709180413075887e-05, + "loss": 0.536, + "step": 18218 + }, + { + "epoch": 0.5002471169686985, + "grad_norm": 0.3446579575538635, + "learning_rate": 1.7091499627608853e-05, + "loss": 0.4541, + "step": 18219 + }, + { + "epoch": 0.500274574409665, + "grad_norm": 0.37186503410339355, + "learning_rate": 1.709119511123093e-05, + "loss": 0.5694, + "step": 18220 + }, + { + "epoch": 0.5003020318506315, + "grad_norm": 0.49211129546165466, + "learning_rate": 1.709089058162566e-05, + "loss": 0.5075, + "step": 18221 + }, + { + "epoch": 0.500329489291598, + "grad_norm": 0.3689965009689331, + "learning_rate": 1.7090586038793618e-05, + "loss": 0.5325, + "step": 18222 + }, + { + "epoch": 0.5003569467325645, + "grad_norm": 0.3700341284275055, + "learning_rate": 1.7090281482735372e-05, + "loss": 0.5386, + "step": 18223 + }, + { + "epoch": 0.500384404173531, + "grad_norm": 0.5286343097686768, + "learning_rate": 1.7089976913451486e-05, + "loss": 0.5047, + "step": 18224 + }, + { + "epoch": 0.5004118616144976, + "grad_norm": 0.43405434489250183, + "learning_rate": 1.708967233094253e-05, + "loss": 0.6526, + "step": 18225 + }, + { + "epoch": 0.500439319055464, + "grad_norm": 0.37092649936676025, + "learning_rate": 1.7089367735209073e-05, + "loss": 0.5747, + "step": 18226 + }, + { + "epoch": 0.5004667764964306, + "grad_norm": 0.6884369254112244, + "learning_rate": 1.708906312625168e-05, + "loss": 0.495, + "step": 18227 + }, + { + "epoch": 0.500494233937397, + "grad_norm": 0.34716156125068665, + "learning_rate": 1.7088758504070926e-05, + "loss": 0.4412, + "step": 18228 + }, + { + "epoch": 0.5005216913783636, + "grad_norm": 0.35005030035972595, + "learning_rate": 1.708845386866737e-05, + "loss": 0.501, + "step": 18229 + }, + { + "epoch": 0.50054914881933, + "grad_norm": 0.3413712680339813, + "learning_rate": 1.708814922004159e-05, + "loss": 0.502, + "step": 18230 + }, + { + "epoch": 0.5005766062602965, + "grad_norm": 0.3684580326080322, + "learning_rate": 1.708784455819415e-05, + "loss": 0.4705, + "step": 18231 + }, + { + "epoch": 0.5006040637012631, + "grad_norm": 0.38489511609077454, + "learning_rate": 1.7087539883125616e-05, + "loss": 0.5279, + "step": 18232 + }, + { + "epoch": 0.5006315211422295, + "grad_norm": 0.4198662340641022, + "learning_rate": 1.7087235194836552e-05, + "loss": 0.4914, + "step": 18233 + }, + { + "epoch": 0.5006589785831961, + "grad_norm": 0.3855777382850647, + "learning_rate": 1.708693049332754e-05, + "loss": 0.5283, + "step": 18234 + }, + { + "epoch": 0.5006864360241625, + "grad_norm": 0.40187713503837585, + "learning_rate": 1.7086625778599138e-05, + "loss": 0.627, + "step": 18235 + }, + { + "epoch": 0.5007138934651291, + "grad_norm": 0.34507349133491516, + "learning_rate": 1.7086321050651916e-05, + "loss": 0.5656, + "step": 18236 + }, + { + "epoch": 0.5007413509060955, + "grad_norm": 0.3510937988758087, + "learning_rate": 1.7086016309486448e-05, + "loss": 0.4213, + "step": 18237 + }, + { + "epoch": 0.500768808347062, + "grad_norm": 0.3694456219673157, + "learning_rate": 1.7085711555103294e-05, + "loss": 0.4606, + "step": 18238 + }, + { + "epoch": 0.5007962657880286, + "grad_norm": 0.37695667147636414, + "learning_rate": 1.708540678750303e-05, + "loss": 0.4652, + "step": 18239 + }, + { + "epoch": 0.500823723228995, + "grad_norm": 0.3377726972103119, + "learning_rate": 1.7085102006686213e-05, + "loss": 0.4584, + "step": 18240 + }, + { + "epoch": 0.5008511806699616, + "grad_norm": 0.3316347002983093, + "learning_rate": 1.7084797212653427e-05, + "loss": 0.411, + "step": 18241 + }, + { + "epoch": 0.500878638110928, + "grad_norm": 0.39750754833221436, + "learning_rate": 1.7084492405405232e-05, + "loss": 0.5506, + "step": 18242 + }, + { + "epoch": 0.5009060955518946, + "grad_norm": 0.40273407101631165, + "learning_rate": 1.7084187584942196e-05, + "loss": 0.4896, + "step": 18243 + }, + { + "epoch": 0.500933552992861, + "grad_norm": 0.36837077140808105, + "learning_rate": 1.708388275126489e-05, + "loss": 0.5987, + "step": 18244 + }, + { + "epoch": 0.5009610104338276, + "grad_norm": 0.41695207357406616, + "learning_rate": 1.708357790437388e-05, + "loss": 0.5609, + "step": 18245 + }, + { + "epoch": 0.5009884678747941, + "grad_norm": 0.36511173844337463, + "learning_rate": 1.7083273044269736e-05, + "loss": 0.5273, + "step": 18246 + }, + { + "epoch": 0.5010159253157606, + "grad_norm": 0.351592093706131, + "learning_rate": 1.708296817095303e-05, + "loss": 0.5429, + "step": 18247 + }, + { + "epoch": 0.5010433827567271, + "grad_norm": 0.4118198752403259, + "learning_rate": 1.7082663284424326e-05, + "loss": 0.519, + "step": 18248 + }, + { + "epoch": 0.5010708401976935, + "grad_norm": 0.42447248101234436, + "learning_rate": 1.7082358384684193e-05, + "loss": 0.5345, + "step": 18249 + }, + { + "epoch": 0.5010982976386601, + "grad_norm": 0.39835605025291443, + "learning_rate": 1.7082053471733206e-05, + "loss": 0.5517, + "step": 18250 + }, + { + "epoch": 0.5011257550796265, + "grad_norm": 0.3643490970134735, + "learning_rate": 1.7081748545571926e-05, + "loss": 0.5309, + "step": 18251 + }, + { + "epoch": 0.5011532125205931, + "grad_norm": 0.39660945534706116, + "learning_rate": 1.7081443606200924e-05, + "loss": 0.5927, + "step": 18252 + }, + { + "epoch": 0.5011806699615596, + "grad_norm": 0.42283761501312256, + "learning_rate": 1.7081138653620772e-05, + "loss": 0.6286, + "step": 18253 + }, + { + "epoch": 0.5012081274025261, + "grad_norm": 0.3496248722076416, + "learning_rate": 1.7080833687832035e-05, + "loss": 0.5675, + "step": 18254 + }, + { + "epoch": 0.5012355848434926, + "grad_norm": 0.4316891133785248, + "learning_rate": 1.708052870883528e-05, + "loss": 0.5737, + "step": 18255 + }, + { + "epoch": 0.5012630422844591, + "grad_norm": 0.3601992428302765, + "learning_rate": 1.7080223716631082e-05, + "loss": 0.5624, + "step": 18256 + }, + { + "epoch": 0.5012904997254256, + "grad_norm": 0.375296413898468, + "learning_rate": 1.7079918711220008e-05, + "loss": 0.6066, + "step": 18257 + }, + { + "epoch": 0.501317957166392, + "grad_norm": 0.3431360423564911, + "learning_rate": 1.7079613692602626e-05, + "loss": 0.5197, + "step": 18258 + }, + { + "epoch": 0.5013454146073586, + "grad_norm": 0.3618892431259155, + "learning_rate": 1.7079308660779502e-05, + "loss": 0.4989, + "step": 18259 + }, + { + "epoch": 0.5013728720483251, + "grad_norm": 0.3886778652667999, + "learning_rate": 1.707900361575121e-05, + "loss": 0.5228, + "step": 18260 + }, + { + "epoch": 0.5014003294892916, + "grad_norm": 0.34657061100006104, + "learning_rate": 1.707869855751832e-05, + "loss": 0.4668, + "step": 18261 + }, + { + "epoch": 0.5014277869302581, + "grad_norm": 0.3365839123725891, + "learning_rate": 1.7078393486081393e-05, + "loss": 0.5278, + "step": 18262 + }, + { + "epoch": 0.5014552443712246, + "grad_norm": 0.4252789616584778, + "learning_rate": 1.7078088401441005e-05, + "loss": 0.5295, + "step": 18263 + }, + { + "epoch": 0.5014827018121911, + "grad_norm": 0.379203200340271, + "learning_rate": 1.707778330359772e-05, + "loss": 0.5286, + "step": 18264 + }, + { + "epoch": 0.5015101592531576, + "grad_norm": 0.34421226382255554, + "learning_rate": 1.7077478192552114e-05, + "loss": 0.4326, + "step": 18265 + }, + { + "epoch": 0.5015376166941241, + "grad_norm": 0.3814091086387634, + "learning_rate": 1.707717306830475e-05, + "loss": 0.5532, + "step": 18266 + }, + { + "epoch": 0.5015650741350907, + "grad_norm": 0.3631158173084259, + "learning_rate": 1.7076867930856204e-05, + "loss": 0.5211, + "step": 18267 + }, + { + "epoch": 0.5015925315760571, + "grad_norm": 0.3957575559616089, + "learning_rate": 1.7076562780207036e-05, + "loss": 0.5986, + "step": 18268 + }, + { + "epoch": 0.5016199890170236, + "grad_norm": 0.37048327922821045, + "learning_rate": 1.7076257616357822e-05, + "loss": 0.4908, + "step": 18269 + }, + { + "epoch": 0.5016474464579901, + "grad_norm": 0.3629724085330963, + "learning_rate": 1.7075952439309128e-05, + "loss": 0.4554, + "step": 18270 + }, + { + "epoch": 0.5016749038989566, + "grad_norm": 0.35151106119155884, + "learning_rate": 1.7075647249061525e-05, + "loss": 0.5277, + "step": 18271 + }, + { + "epoch": 0.5017023613399231, + "grad_norm": 0.35424408316612244, + "learning_rate": 1.707534204561558e-05, + "loss": 0.4205, + "step": 18272 + }, + { + "epoch": 0.5017298187808896, + "grad_norm": 0.3324435353279114, + "learning_rate": 1.7075036828971864e-05, + "loss": 0.4818, + "step": 18273 + }, + { + "epoch": 0.5017572762218562, + "grad_norm": 0.40016046166419983, + "learning_rate": 1.7074731599130946e-05, + "loss": 0.5589, + "step": 18274 + }, + { + "epoch": 0.5017847336628226, + "grad_norm": 0.5063086748123169, + "learning_rate": 1.7074426356093398e-05, + "loss": 0.4383, + "step": 18275 + }, + { + "epoch": 0.5018121911037892, + "grad_norm": 0.41734758019447327, + "learning_rate": 1.7074121099859788e-05, + "loss": 0.5431, + "step": 18276 + }, + { + "epoch": 0.5018396485447556, + "grad_norm": 0.35300785303115845, + "learning_rate": 1.707381583043068e-05, + "loss": 0.4788, + "step": 18277 + }, + { + "epoch": 0.5018671059857222, + "grad_norm": 0.3644106388092041, + "learning_rate": 1.7073510547806652e-05, + "loss": 0.4759, + "step": 18278 + }, + { + "epoch": 0.5018945634266886, + "grad_norm": 0.40512365102767944, + "learning_rate": 1.7073205251988265e-05, + "loss": 0.5304, + "step": 18279 + }, + { + "epoch": 0.5019220208676551, + "grad_norm": 0.401725709438324, + "learning_rate": 1.7072899942976096e-05, + "loss": 0.5472, + "step": 18280 + }, + { + "epoch": 0.5019494783086217, + "grad_norm": 0.38179975748062134, + "learning_rate": 1.707259462077071e-05, + "loss": 0.5124, + "step": 18281 + }, + { + "epoch": 0.5019769357495881, + "grad_norm": 0.39495307207107544, + "learning_rate": 1.707228928537268e-05, + "loss": 0.513, + "step": 18282 + }, + { + "epoch": 0.5020043931905547, + "grad_norm": 0.3934388756752014, + "learning_rate": 1.7071983936782568e-05, + "loss": 0.5166, + "step": 18283 + }, + { + "epoch": 0.5020318506315211, + "grad_norm": 0.40163472294807434, + "learning_rate": 1.707167857500095e-05, + "loss": 0.4805, + "step": 18284 + }, + { + "epoch": 0.5020593080724877, + "grad_norm": 0.34462910890579224, + "learning_rate": 1.70713732000284e-05, + "loss": 0.475, + "step": 18285 + }, + { + "epoch": 0.5020867655134541, + "grad_norm": 0.5391959547996521, + "learning_rate": 1.7071067811865477e-05, + "loss": 0.5429, + "step": 18286 + }, + { + "epoch": 0.5021142229544207, + "grad_norm": 0.4408515393733978, + "learning_rate": 1.7070762410512758e-05, + "loss": 0.5543, + "step": 18287 + }, + { + "epoch": 0.5021416803953872, + "grad_norm": 0.47167643904685974, + "learning_rate": 1.7070456995970808e-05, + "loss": 0.5634, + "step": 18288 + }, + { + "epoch": 0.5021691378363536, + "grad_norm": 0.4184386134147644, + "learning_rate": 1.7070151568240202e-05, + "loss": 0.4756, + "step": 18289 + }, + { + "epoch": 0.5021965952773202, + "grad_norm": 0.406840980052948, + "learning_rate": 1.7069846127321503e-05, + "loss": 0.5366, + "step": 18290 + }, + { + "epoch": 0.5022240527182866, + "grad_norm": 0.39505401253700256, + "learning_rate": 1.7069540673215287e-05, + "loss": 0.5259, + "step": 18291 + }, + { + "epoch": 0.5022515101592532, + "grad_norm": 0.3747074007987976, + "learning_rate": 1.7069235205922124e-05, + "loss": 0.4102, + "step": 18292 + }, + { + "epoch": 0.5022789676002196, + "grad_norm": 0.45754820108413696, + "learning_rate": 1.7068929725442575e-05, + "loss": 0.5674, + "step": 18293 + }, + { + "epoch": 0.5023064250411862, + "grad_norm": 0.4053472578525543, + "learning_rate": 1.706862423177722e-05, + "loss": 0.5061, + "step": 18294 + }, + { + "epoch": 0.5023338824821527, + "grad_norm": 0.4034256637096405, + "learning_rate": 1.7068318724926626e-05, + "loss": 0.5094, + "step": 18295 + }, + { + "epoch": 0.5023613399231192, + "grad_norm": 0.4249768555164337, + "learning_rate": 1.706801320489136e-05, + "loss": 0.5928, + "step": 18296 + }, + { + "epoch": 0.5023887973640857, + "grad_norm": 0.42865055799484253, + "learning_rate": 1.7067707671671992e-05, + "loss": 0.4687, + "step": 18297 + }, + { + "epoch": 0.5024162548050521, + "grad_norm": 0.374934583902359, + "learning_rate": 1.7067402125269096e-05, + "loss": 0.5065, + "step": 18298 + }, + { + "epoch": 0.5024437122460187, + "grad_norm": 0.3550419509410858, + "learning_rate": 1.7067096565683236e-05, + "loss": 0.4872, + "step": 18299 + }, + { + "epoch": 0.5024711696869851, + "grad_norm": 0.4121793210506439, + "learning_rate": 1.706679099291499e-05, + "loss": 0.5057, + "step": 18300 + }, + { + "epoch": 0.5024986271279517, + "grad_norm": 0.4242143929004669, + "learning_rate": 1.7066485406964917e-05, + "loss": 0.5103, + "step": 18301 + }, + { + "epoch": 0.5025260845689182, + "grad_norm": 0.3878735601902008, + "learning_rate": 1.70661798078336e-05, + "loss": 0.3969, + "step": 18302 + }, + { + "epoch": 0.5025535420098847, + "grad_norm": 0.4393337666988373, + "learning_rate": 1.7065874195521595e-05, + "loss": 0.54, + "step": 18303 + }, + { + "epoch": 0.5025809994508512, + "grad_norm": 0.4017086625099182, + "learning_rate": 1.7065568570029488e-05, + "loss": 0.4868, + "step": 18304 + }, + { + "epoch": 0.5026084568918177, + "grad_norm": 0.42388954758644104, + "learning_rate": 1.7065262931357835e-05, + "loss": 0.5149, + "step": 18305 + }, + { + "epoch": 0.5026359143327842, + "grad_norm": 0.44178521633148193, + "learning_rate": 1.706495727950721e-05, + "loss": 0.519, + "step": 18306 + }, + { + "epoch": 0.5026633717737506, + "grad_norm": 0.3854296803474426, + "learning_rate": 1.706465161447819e-05, + "loss": 0.5029, + "step": 18307 + }, + { + "epoch": 0.5026908292147172, + "grad_norm": 0.4020719826221466, + "learning_rate": 1.7064345936271336e-05, + "loss": 0.5849, + "step": 18308 + }, + { + "epoch": 0.5027182866556837, + "grad_norm": 0.4111078381538391, + "learning_rate": 1.7064040244887224e-05, + "loss": 0.612, + "step": 18309 + }, + { + "epoch": 0.5027457440966502, + "grad_norm": 0.5885664224624634, + "learning_rate": 1.7063734540326424e-05, + "loss": 0.5875, + "step": 18310 + }, + { + "epoch": 0.5027732015376167, + "grad_norm": 0.4228686988353729, + "learning_rate": 1.7063428822589504e-05, + "loss": 0.5024, + "step": 18311 + }, + { + "epoch": 0.5028006589785832, + "grad_norm": 0.3949965834617615, + "learning_rate": 1.706312309167703e-05, + "loss": 0.4586, + "step": 18312 + }, + { + "epoch": 0.5028281164195497, + "grad_norm": 0.35988566279411316, + "learning_rate": 1.7062817347589584e-05, + "loss": 0.5248, + "step": 18313 + }, + { + "epoch": 0.5028555738605162, + "grad_norm": 0.4265875816345215, + "learning_rate": 1.7062511590327725e-05, + "loss": 0.6016, + "step": 18314 + }, + { + "epoch": 0.5028830313014827, + "grad_norm": 0.5422950387001038, + "learning_rate": 1.706220581989203e-05, + "loss": 0.5075, + "step": 18315 + }, + { + "epoch": 0.5029104887424493, + "grad_norm": 0.4016532301902771, + "learning_rate": 1.7061900036283066e-05, + "loss": 0.5456, + "step": 18316 + }, + { + "epoch": 0.5029379461834157, + "grad_norm": 0.35709452629089355, + "learning_rate": 1.7061594239501405e-05, + "loss": 0.5078, + "step": 18317 + }, + { + "epoch": 0.5029654036243822, + "grad_norm": 0.32176473736763, + "learning_rate": 1.706128842954762e-05, + "loss": 0.4078, + "step": 18318 + }, + { + "epoch": 0.5029928610653487, + "grad_norm": 0.380409300327301, + "learning_rate": 1.706098260642227e-05, + "loss": 0.5399, + "step": 18319 + }, + { + "epoch": 0.5030203185063152, + "grad_norm": 0.4212191104888916, + "learning_rate": 1.7060676770125942e-05, + "loss": 0.5708, + "step": 18320 + }, + { + "epoch": 0.5030477759472817, + "grad_norm": 0.36827605962753296, + "learning_rate": 1.7060370920659194e-05, + "loss": 0.4795, + "step": 18321 + }, + { + "epoch": 0.5030752333882482, + "grad_norm": 0.37226402759552, + "learning_rate": 1.70600650580226e-05, + "loss": 0.4898, + "step": 18322 + }, + { + "epoch": 0.5031026908292148, + "grad_norm": 0.38775354623794556, + "learning_rate": 1.7059759182216736e-05, + "loss": 0.5815, + "step": 18323 + }, + { + "epoch": 0.5031301482701812, + "grad_norm": 0.37222155928611755, + "learning_rate": 1.7059453293242166e-05, + "loss": 0.5038, + "step": 18324 + }, + { + "epoch": 0.5031576057111478, + "grad_norm": 0.3465757668018341, + "learning_rate": 1.705914739109946e-05, + "loss": 0.4827, + "step": 18325 + }, + { + "epoch": 0.5031850631521142, + "grad_norm": 0.4027126729488373, + "learning_rate": 1.7058841475789197e-05, + "loss": 0.5028, + "step": 18326 + }, + { + "epoch": 0.5032125205930807, + "grad_norm": 0.427123486995697, + "learning_rate": 1.7058535547311933e-05, + "loss": 0.5075, + "step": 18327 + }, + { + "epoch": 0.5032399780340472, + "grad_norm": 0.4352464973926544, + "learning_rate": 1.7058229605668254e-05, + "loss": 0.4555, + "step": 18328 + }, + { + "epoch": 0.5032674354750137, + "grad_norm": 0.33354371786117554, + "learning_rate": 1.7057923650858722e-05, + "loss": 0.4538, + "step": 18329 + }, + { + "epoch": 0.5032948929159803, + "grad_norm": 0.34731248021125793, + "learning_rate": 1.7057617682883912e-05, + "loss": 0.4766, + "step": 18330 + }, + { + "epoch": 0.5033223503569467, + "grad_norm": 0.35552549362182617, + "learning_rate": 1.7057311701744388e-05, + "loss": 0.5096, + "step": 18331 + }, + { + "epoch": 0.5033498077979133, + "grad_norm": 0.3730519413948059, + "learning_rate": 1.7057005707440728e-05, + "loss": 0.5076, + "step": 18332 + }, + { + "epoch": 0.5033772652388797, + "grad_norm": 0.3710536062717438, + "learning_rate": 1.70566996999735e-05, + "loss": 0.4792, + "step": 18333 + }, + { + "epoch": 0.5034047226798463, + "grad_norm": 0.37105944752693176, + "learning_rate": 1.7056393679343272e-05, + "loss": 0.5302, + "step": 18334 + }, + { + "epoch": 0.5034321801208127, + "grad_norm": 0.4189833402633667, + "learning_rate": 1.705608764555062e-05, + "loss": 0.5046, + "step": 18335 + }, + { + "epoch": 0.5034596375617792, + "grad_norm": 0.43591082096099854, + "learning_rate": 1.7055781598596115e-05, + "loss": 0.5204, + "step": 18336 + }, + { + "epoch": 0.5034870950027458, + "grad_norm": 0.4085991680622101, + "learning_rate": 1.7055475538480323e-05, + "loss": 0.4695, + "step": 18337 + }, + { + "epoch": 0.5035145524437122, + "grad_norm": 0.34525325894355774, + "learning_rate": 1.7055169465203818e-05, + "loss": 0.5076, + "step": 18338 + }, + { + "epoch": 0.5035420098846788, + "grad_norm": 0.3884754180908203, + "learning_rate": 1.705486337876717e-05, + "loss": 0.4782, + "step": 18339 + }, + { + "epoch": 0.5035694673256452, + "grad_norm": 0.34470629692077637, + "learning_rate": 1.705455727917095e-05, + "loss": 0.5067, + "step": 18340 + }, + { + "epoch": 0.5035969247666118, + "grad_norm": 0.33834901452064514, + "learning_rate": 1.7054251166415726e-05, + "loss": 0.4363, + "step": 18341 + }, + { + "epoch": 0.5036243822075782, + "grad_norm": 0.806690514087677, + "learning_rate": 1.7053945040502076e-05, + "loss": 0.4645, + "step": 18342 + }, + { + "epoch": 0.5036518396485448, + "grad_norm": 0.39665651321411133, + "learning_rate": 1.7053638901430565e-05, + "loss": 0.4778, + "step": 18343 + }, + { + "epoch": 0.5036792970895112, + "grad_norm": 0.369426965713501, + "learning_rate": 1.705333274920177e-05, + "loss": 0.6007, + "step": 18344 + }, + { + "epoch": 0.5037067545304778, + "grad_norm": 0.3898095488548279, + "learning_rate": 1.7053026583816255e-05, + "loss": 0.4429, + "step": 18345 + }, + { + "epoch": 0.5037342119714443, + "grad_norm": 0.45592570304870605, + "learning_rate": 1.7052720405274594e-05, + "loss": 0.5001, + "step": 18346 + }, + { + "epoch": 0.5037616694124107, + "grad_norm": 0.37848100066185, + "learning_rate": 1.705241421357736e-05, + "loss": 0.5036, + "step": 18347 + }, + { + "epoch": 0.5037891268533773, + "grad_norm": 0.41575393080711365, + "learning_rate": 1.705210800872512e-05, + "loss": 0.5532, + "step": 18348 + }, + { + "epoch": 0.5038165842943437, + "grad_norm": 0.4008714556694031, + "learning_rate": 1.705180179071845e-05, + "loss": 0.5605, + "step": 18349 + }, + { + "epoch": 0.5038440417353103, + "grad_norm": 0.37351468205451965, + "learning_rate": 1.7051495559557918e-05, + "loss": 0.4615, + "step": 18350 + }, + { + "epoch": 0.5038714991762767, + "grad_norm": 0.3717767298221588, + "learning_rate": 1.7051189315244098e-05, + "loss": 0.5199, + "step": 18351 + }, + { + "epoch": 0.5038989566172433, + "grad_norm": 1.138818621635437, + "learning_rate": 1.7050883057777557e-05, + "loss": 0.4915, + "step": 18352 + }, + { + "epoch": 0.5039264140582098, + "grad_norm": 0.41945356130599976, + "learning_rate": 1.705057678715887e-05, + "loss": 0.5253, + "step": 18353 + }, + { + "epoch": 0.5039538714991763, + "grad_norm": 0.445121705532074, + "learning_rate": 1.705027050338861e-05, + "loss": 0.5167, + "step": 18354 + }, + { + "epoch": 0.5039813289401428, + "grad_norm": 1.1888108253479004, + "learning_rate": 1.704996420646734e-05, + "loss": 0.5426, + "step": 18355 + }, + { + "epoch": 0.5040087863811092, + "grad_norm": 0.35418960452079773, + "learning_rate": 1.704965789639564e-05, + "loss": 0.4427, + "step": 18356 + }, + { + "epoch": 0.5040362438220758, + "grad_norm": 0.3944084942340851, + "learning_rate": 1.704935157317408e-05, + "loss": 0.5292, + "step": 18357 + }, + { + "epoch": 0.5040637012630422, + "grad_norm": 0.416432648897171, + "learning_rate": 1.7049045236803224e-05, + "loss": 0.5317, + "step": 18358 + }, + { + "epoch": 0.5040911587040088, + "grad_norm": 0.3697250485420227, + "learning_rate": 1.7048738887283654e-05, + "loss": 0.4639, + "step": 18359 + }, + { + "epoch": 0.5041186161449753, + "grad_norm": 0.355241596698761, + "learning_rate": 1.704843252461593e-05, + "loss": 0.471, + "step": 18360 + }, + { + "epoch": 0.5041460735859418, + "grad_norm": 0.37289518117904663, + "learning_rate": 1.7048126148800634e-05, + "loss": 0.485, + "step": 18361 + }, + { + "epoch": 0.5041735310269083, + "grad_norm": 0.3860722780227661, + "learning_rate": 1.7047819759838337e-05, + "loss": 0.5578, + "step": 18362 + }, + { + "epoch": 0.5042009884678748, + "grad_norm": 0.545289158821106, + "learning_rate": 1.7047513357729598e-05, + "loss": 0.5628, + "step": 18363 + }, + { + "epoch": 0.5042284459088413, + "grad_norm": 0.5187903046607971, + "learning_rate": 1.7047206942475e-05, + "loss": 0.5131, + "step": 18364 + }, + { + "epoch": 0.5042559033498077, + "grad_norm": 0.37178850173950195, + "learning_rate": 1.7046900514075115e-05, + "loss": 0.5683, + "step": 18365 + }, + { + "epoch": 0.5042833607907743, + "grad_norm": 0.40599679946899414, + "learning_rate": 1.704659407253051e-05, + "loss": 0.5557, + "step": 18366 + }, + { + "epoch": 0.5043108182317408, + "grad_norm": 0.39342787861824036, + "learning_rate": 1.704628761784176e-05, + "loss": 0.4974, + "step": 18367 + }, + { + "epoch": 0.5043382756727073, + "grad_norm": 0.3889973759651184, + "learning_rate": 1.704598115000943e-05, + "loss": 0.5442, + "step": 18368 + }, + { + "epoch": 0.5043657331136738, + "grad_norm": 0.38861945271492004, + "learning_rate": 1.7045674669034104e-05, + "loss": 0.4776, + "step": 18369 + }, + { + "epoch": 0.5043931905546403, + "grad_norm": 0.35963380336761475, + "learning_rate": 1.7045368174916336e-05, + "loss": 0.494, + "step": 18370 + }, + { + "epoch": 0.5044206479956068, + "grad_norm": 0.45133689045906067, + "learning_rate": 1.7045061667656716e-05, + "loss": 0.5189, + "step": 18371 + }, + { + "epoch": 0.5044481054365733, + "grad_norm": 0.5000846982002258, + "learning_rate": 1.7044755147255804e-05, + "loss": 0.5723, + "step": 18372 + }, + { + "epoch": 0.5044755628775398, + "grad_norm": 0.45064589381217957, + "learning_rate": 1.7044448613714175e-05, + "loss": 0.5694, + "step": 18373 + }, + { + "epoch": 0.5045030203185064, + "grad_norm": 0.5103515982627869, + "learning_rate": 1.7044142067032402e-05, + "loss": 0.6114, + "step": 18374 + }, + { + "epoch": 0.5045304777594728, + "grad_norm": 0.39816761016845703, + "learning_rate": 1.7043835507211054e-05, + "loss": 0.5825, + "step": 18375 + }, + { + "epoch": 0.5045579352004393, + "grad_norm": 0.42666882276535034, + "learning_rate": 1.7043528934250707e-05, + "loss": 0.5932, + "step": 18376 + }, + { + "epoch": 0.5045853926414058, + "grad_norm": 0.3621983230113983, + "learning_rate": 1.7043222348151927e-05, + "loss": 0.488, + "step": 18377 + }, + { + "epoch": 0.5046128500823723, + "grad_norm": 0.37762001156806946, + "learning_rate": 1.7042915748915294e-05, + "loss": 0.4816, + "step": 18378 + }, + { + "epoch": 0.5046403075233388, + "grad_norm": 0.35587337613105774, + "learning_rate": 1.7042609136541372e-05, + "loss": 0.5208, + "step": 18379 + }, + { + "epoch": 0.5046677649643053, + "grad_norm": 0.37579256296157837, + "learning_rate": 1.704230251103074e-05, + "loss": 0.5565, + "step": 18380 + }, + { + "epoch": 0.5046952224052719, + "grad_norm": 0.32276269793510437, + "learning_rate": 1.704199587238396e-05, + "loss": 0.4632, + "step": 18381 + }, + { + "epoch": 0.5047226798462383, + "grad_norm": 0.35776588320732117, + "learning_rate": 1.7041689220601613e-05, + "loss": 0.5006, + "step": 18382 + }, + { + "epoch": 0.5047501372872049, + "grad_norm": 0.3579261302947998, + "learning_rate": 1.704138255568427e-05, + "loss": 0.5576, + "step": 18383 + }, + { + "epoch": 0.5047775947281713, + "grad_norm": 0.43764299154281616, + "learning_rate": 1.70410758776325e-05, + "loss": 0.4691, + "step": 18384 + }, + { + "epoch": 0.5048050521691378, + "grad_norm": 0.39894604682922363, + "learning_rate": 1.7040769186446876e-05, + "loss": 0.5837, + "step": 18385 + }, + { + "epoch": 0.5048325096101043, + "grad_norm": 0.3664305806159973, + "learning_rate": 1.7040462482127972e-05, + "loss": 0.4945, + "step": 18386 + }, + { + "epoch": 0.5048599670510708, + "grad_norm": 0.37880370020866394, + "learning_rate": 1.7040155764676357e-05, + "loss": 0.4569, + "step": 18387 + }, + { + "epoch": 0.5048874244920374, + "grad_norm": 0.37551209330558777, + "learning_rate": 1.7039849034092605e-05, + "loss": 0.4934, + "step": 18388 + }, + { + "epoch": 0.5049148819330038, + "grad_norm": 0.3922435939311981, + "learning_rate": 1.7039542290377284e-05, + "loss": 0.5652, + "step": 18389 + }, + { + "epoch": 0.5049423393739704, + "grad_norm": 0.37158751487731934, + "learning_rate": 1.7039235533530976e-05, + "loss": 0.5084, + "step": 18390 + }, + { + "epoch": 0.5049697968149368, + "grad_norm": 0.38584575057029724, + "learning_rate": 1.7038928763554242e-05, + "loss": 0.4803, + "step": 18391 + }, + { + "epoch": 0.5049972542559034, + "grad_norm": 0.4343508780002594, + "learning_rate": 1.7038621980447664e-05, + "loss": 0.4334, + "step": 18392 + }, + { + "epoch": 0.5050247116968698, + "grad_norm": 0.35185033082962036, + "learning_rate": 1.7038315184211807e-05, + "loss": 0.4676, + "step": 18393 + }, + { + "epoch": 0.5050521691378363, + "grad_norm": 0.30139675736427307, + "learning_rate": 1.7038008374847245e-05, + "loss": 0.4636, + "step": 18394 + }, + { + "epoch": 0.5050796265788029, + "grad_norm": 0.5083524584770203, + "learning_rate": 1.7037701552354553e-05, + "loss": 0.4812, + "step": 18395 + }, + { + "epoch": 0.5051070840197693, + "grad_norm": 0.35708630084991455, + "learning_rate": 1.70373947167343e-05, + "loss": 0.4813, + "step": 18396 + }, + { + "epoch": 0.5051345414607359, + "grad_norm": 0.36972567439079285, + "learning_rate": 1.703708786798706e-05, + "loss": 0.5131, + "step": 18397 + }, + { + "epoch": 0.5051619989017023, + "grad_norm": 0.4062800705432892, + "learning_rate": 1.7036781006113407e-05, + "loss": 0.5598, + "step": 18398 + }, + { + "epoch": 0.5051894563426689, + "grad_norm": 0.41372150182724, + "learning_rate": 1.7036474131113908e-05, + "loss": 0.5145, + "step": 18399 + }, + { + "epoch": 0.5052169137836353, + "grad_norm": 0.5446830987930298, + "learning_rate": 1.7036167242989142e-05, + "loss": 0.4838, + "step": 18400 + }, + { + "epoch": 0.5052443712246019, + "grad_norm": 0.4011324346065521, + "learning_rate": 1.7035860341739676e-05, + "loss": 0.4942, + "step": 18401 + }, + { + "epoch": 0.5052718286655684, + "grad_norm": 0.37036147713661194, + "learning_rate": 1.7035553427366084e-05, + "loss": 0.5387, + "step": 18402 + }, + { + "epoch": 0.5052992861065349, + "grad_norm": 0.38475868105888367, + "learning_rate": 1.7035246499868943e-05, + "loss": 0.5602, + "step": 18403 + }, + { + "epoch": 0.5053267435475014, + "grad_norm": 0.3593212962150574, + "learning_rate": 1.703493955924882e-05, + "loss": 0.5289, + "step": 18404 + }, + { + "epoch": 0.5053542009884678, + "grad_norm": 0.3718862533569336, + "learning_rate": 1.703463260550629e-05, + "loss": 0.5301, + "step": 18405 + }, + { + "epoch": 0.5053816584294344, + "grad_norm": 0.38726404309272766, + "learning_rate": 1.7034325638641923e-05, + "loss": 0.5967, + "step": 18406 + }, + { + "epoch": 0.5054091158704008, + "grad_norm": 0.41226547956466675, + "learning_rate": 1.7034018658656295e-05, + "loss": 0.537, + "step": 18407 + }, + { + "epoch": 0.5054365733113674, + "grad_norm": 0.3216029405593872, + "learning_rate": 1.7033711665549978e-05, + "loss": 0.4885, + "step": 18408 + }, + { + "epoch": 0.5054640307523339, + "grad_norm": 0.37696218490600586, + "learning_rate": 1.7033404659323544e-05, + "loss": 0.5344, + "step": 18409 + }, + { + "epoch": 0.5054914881933004, + "grad_norm": 0.4029962420463562, + "learning_rate": 1.7033097639977566e-05, + "loss": 0.5695, + "step": 18410 + }, + { + "epoch": 0.5055189456342669, + "grad_norm": 0.4111739695072174, + "learning_rate": 1.7032790607512613e-05, + "loss": 0.4809, + "step": 18411 + }, + { + "epoch": 0.5055464030752334, + "grad_norm": 0.4192233383655548, + "learning_rate": 1.7032483561929264e-05, + "loss": 0.4916, + "step": 18412 + }, + { + "epoch": 0.5055738605161999, + "grad_norm": 0.3672683835029602, + "learning_rate": 1.703217650322809e-05, + "loss": 0.5369, + "step": 18413 + }, + { + "epoch": 0.5056013179571663, + "grad_norm": 0.3909851908683777, + "learning_rate": 1.703186943140966e-05, + "loss": 0.4609, + "step": 18414 + }, + { + "epoch": 0.5056287753981329, + "grad_norm": 0.41092073917388916, + "learning_rate": 1.703156234647455e-05, + "loss": 0.5245, + "step": 18415 + }, + { + "epoch": 0.5056562328390994, + "grad_norm": 0.4065009355545044, + "learning_rate": 1.703125524842333e-05, + "loss": 0.5343, + "step": 18416 + }, + { + "epoch": 0.5056836902800659, + "grad_norm": 0.3893324136734009, + "learning_rate": 1.7030948137256574e-05, + "loss": 0.5313, + "step": 18417 + }, + { + "epoch": 0.5057111477210324, + "grad_norm": 0.4140928387641907, + "learning_rate": 1.703064101297486e-05, + "loss": 0.6168, + "step": 18418 + }, + { + "epoch": 0.5057386051619989, + "grad_norm": 0.34070661664009094, + "learning_rate": 1.7030333875578753e-05, + "loss": 0.3746, + "step": 18419 + }, + { + "epoch": 0.5057660626029654, + "grad_norm": 0.4070437550544739, + "learning_rate": 1.7030026725068828e-05, + "loss": 0.5295, + "step": 18420 + }, + { + "epoch": 0.5057935200439319, + "grad_norm": 0.3943770229816437, + "learning_rate": 1.7029719561445665e-05, + "loss": 0.5746, + "step": 18421 + }, + { + "epoch": 0.5058209774848984, + "grad_norm": 0.4280271530151367, + "learning_rate": 1.7029412384709828e-05, + "loss": 0.5067, + "step": 18422 + }, + { + "epoch": 0.505848434925865, + "grad_norm": 0.34529566764831543, + "learning_rate": 1.7029105194861894e-05, + "loss": 0.5148, + "step": 18423 + }, + { + "epoch": 0.5058758923668314, + "grad_norm": 0.373695969581604, + "learning_rate": 1.7028797991902436e-05, + "loss": 0.4919, + "step": 18424 + }, + { + "epoch": 0.5059033498077979, + "grad_norm": 0.3645274043083191, + "learning_rate": 1.7028490775832026e-05, + "loss": 0.5248, + "step": 18425 + }, + { + "epoch": 0.5059308072487644, + "grad_norm": 0.3779239356517792, + "learning_rate": 1.7028183546651236e-05, + "loss": 0.4929, + "step": 18426 + }, + { + "epoch": 0.5059582646897309, + "grad_norm": 0.41468146443367004, + "learning_rate": 1.7027876304360643e-05, + "loss": 0.4947, + "step": 18427 + }, + { + "epoch": 0.5059857221306974, + "grad_norm": 0.3308921456336975, + "learning_rate": 1.7027569048960816e-05, + "loss": 0.4178, + "step": 18428 + }, + { + "epoch": 0.5060131795716639, + "grad_norm": 0.39613571763038635, + "learning_rate": 1.702726178045233e-05, + "loss": 0.514, + "step": 18429 + }, + { + "epoch": 0.5060406370126305, + "grad_norm": 0.4320034086704254, + "learning_rate": 1.702695449883576e-05, + "loss": 0.535, + "step": 18430 + }, + { + "epoch": 0.5060680944535969, + "grad_norm": 0.33359208703041077, + "learning_rate": 1.7026647204111677e-05, + "loss": 0.4323, + "step": 18431 + }, + { + "epoch": 0.5060955518945635, + "grad_norm": 0.37847700715065, + "learning_rate": 1.702633989628065e-05, + "loss": 0.5516, + "step": 18432 + }, + { + "epoch": 0.5061230093355299, + "grad_norm": 0.396894633769989, + "learning_rate": 1.702603257534326e-05, + "loss": 0.5429, + "step": 18433 + }, + { + "epoch": 0.5061504667764964, + "grad_norm": 0.404427170753479, + "learning_rate": 1.7025725241300077e-05, + "loss": 0.5313, + "step": 18434 + }, + { + "epoch": 0.5061779242174629, + "grad_norm": 0.42284753918647766, + "learning_rate": 1.7025417894151674e-05, + "loss": 0.4743, + "step": 18435 + }, + { + "epoch": 0.5062053816584294, + "grad_norm": 0.41090935468673706, + "learning_rate": 1.702511053389862e-05, + "loss": 0.5158, + "step": 18436 + }, + { + "epoch": 0.506232839099396, + "grad_norm": 0.36088189482688904, + "learning_rate": 1.7024803160541497e-05, + "loss": 0.4265, + "step": 18437 + }, + { + "epoch": 0.5062602965403624, + "grad_norm": 0.3547121286392212, + "learning_rate": 1.7024495774080875e-05, + "loss": 0.528, + "step": 18438 + }, + { + "epoch": 0.506287753981329, + "grad_norm": 0.38209617137908936, + "learning_rate": 1.7024188374517324e-05, + "loss": 0.5344, + "step": 18439 + }, + { + "epoch": 0.5063152114222954, + "grad_norm": 0.3724437355995178, + "learning_rate": 1.7023880961851418e-05, + "loss": 0.493, + "step": 18440 + }, + { + "epoch": 0.506342668863262, + "grad_norm": 0.35482069849967957, + "learning_rate": 1.7023573536083734e-05, + "loss": 0.5301, + "step": 18441 + }, + { + "epoch": 0.5063701263042284, + "grad_norm": 0.33121055364608765, + "learning_rate": 1.702326609721485e-05, + "loss": 0.4844, + "step": 18442 + }, + { + "epoch": 0.506397583745195, + "grad_norm": 0.33862969279289246, + "learning_rate": 1.7022958645245323e-05, + "loss": 0.4786, + "step": 18443 + }, + { + "epoch": 0.5064250411861615, + "grad_norm": 0.37278664112091064, + "learning_rate": 1.702265118017574e-05, + "loss": 0.59, + "step": 18444 + }, + { + "epoch": 0.5064524986271279, + "grad_norm": 0.363298237323761, + "learning_rate": 1.7022343702006675e-05, + "loss": 0.4775, + "step": 18445 + }, + { + "epoch": 0.5064799560680945, + "grad_norm": 0.34477025270462036, + "learning_rate": 1.7022036210738696e-05, + "loss": 0.4828, + "step": 18446 + }, + { + "epoch": 0.5065074135090609, + "grad_norm": 0.3330436050891876, + "learning_rate": 1.7021728706372376e-05, + "loss": 0.521, + "step": 18447 + }, + { + "epoch": 0.5065348709500275, + "grad_norm": 0.372567355632782, + "learning_rate": 1.702142118890829e-05, + "loss": 0.4893, + "step": 18448 + }, + { + "epoch": 0.5065623283909939, + "grad_norm": 0.3821532130241394, + "learning_rate": 1.7021113658347018e-05, + "loss": 0.4744, + "step": 18449 + }, + { + "epoch": 0.5065897858319605, + "grad_norm": 0.4222080111503601, + "learning_rate": 1.7020806114689126e-05, + "loss": 0.4908, + "step": 18450 + }, + { + "epoch": 0.506617243272927, + "grad_norm": 0.36788201332092285, + "learning_rate": 1.7020498557935187e-05, + "loss": 0.5006, + "step": 18451 + }, + { + "epoch": 0.5066447007138934, + "grad_norm": 0.44815585017204285, + "learning_rate": 1.702019098808578e-05, + "loss": 0.5283, + "step": 18452 + }, + { + "epoch": 0.50667215815486, + "grad_norm": 0.42253613471984863, + "learning_rate": 1.7019883405141476e-05, + "loss": 0.5097, + "step": 18453 + }, + { + "epoch": 0.5066996155958264, + "grad_norm": 0.348308265209198, + "learning_rate": 1.701957580910285e-05, + "loss": 0.4941, + "step": 18454 + }, + { + "epoch": 0.506727073036793, + "grad_norm": 0.37384557723999023, + "learning_rate": 1.701926819997047e-05, + "loss": 0.4002, + "step": 18455 + }, + { + "epoch": 0.5067545304777594, + "grad_norm": 0.40464481711387634, + "learning_rate": 1.701896057774492e-05, + "loss": 0.5161, + "step": 18456 + }, + { + "epoch": 0.506781987918726, + "grad_norm": 0.39037540555000305, + "learning_rate": 1.7018652942426766e-05, + "loss": 0.4508, + "step": 18457 + }, + { + "epoch": 0.5068094453596925, + "grad_norm": 0.3811308741569519, + "learning_rate": 1.7018345294016584e-05, + "loss": 0.5884, + "step": 18458 + }, + { + "epoch": 0.506836902800659, + "grad_norm": 0.38915011286735535, + "learning_rate": 1.7018037632514948e-05, + "loss": 0.544, + "step": 18459 + }, + { + "epoch": 0.5068643602416255, + "grad_norm": 0.36638984084129333, + "learning_rate": 1.7017729957922432e-05, + "loss": 0.4872, + "step": 18460 + }, + { + "epoch": 0.506891817682592, + "grad_norm": 0.35239800810813904, + "learning_rate": 1.7017422270239608e-05, + "loss": 0.4945, + "step": 18461 + }, + { + "epoch": 0.5069192751235585, + "grad_norm": 0.3758838474750519, + "learning_rate": 1.7017114569467056e-05, + "loss": 0.4405, + "step": 18462 + }, + { + "epoch": 0.5069467325645249, + "grad_norm": 0.40256282687187195, + "learning_rate": 1.7016806855605343e-05, + "loss": 0.6294, + "step": 18463 + }, + { + "epoch": 0.5069741900054915, + "grad_norm": 0.3415452241897583, + "learning_rate": 1.7016499128655048e-05, + "loss": 0.5271, + "step": 18464 + }, + { + "epoch": 0.507001647446458, + "grad_norm": 0.3873440623283386, + "learning_rate": 1.7016191388616738e-05, + "loss": 0.4829, + "step": 18465 + }, + { + "epoch": 0.5070291048874245, + "grad_norm": 0.39236801862716675, + "learning_rate": 1.7015883635490996e-05, + "loss": 0.5772, + "step": 18466 + }, + { + "epoch": 0.507056562328391, + "grad_norm": 0.39547935128211975, + "learning_rate": 1.7015575869278392e-05, + "loss": 0.5087, + "step": 18467 + }, + { + "epoch": 0.5070840197693575, + "grad_norm": 0.41498100757598877, + "learning_rate": 1.70152680899795e-05, + "loss": 0.4721, + "step": 18468 + }, + { + "epoch": 0.507111477210324, + "grad_norm": 0.3713070750236511, + "learning_rate": 1.7014960297594888e-05, + "loss": 0.5034, + "step": 18469 + }, + { + "epoch": 0.5071389346512905, + "grad_norm": 0.3827979266643524, + "learning_rate": 1.701465249212514e-05, + "loss": 0.4533, + "step": 18470 + }, + { + "epoch": 0.507166392092257, + "grad_norm": 0.37760066986083984, + "learning_rate": 1.701434467357083e-05, + "loss": 0.58, + "step": 18471 + }, + { + "epoch": 0.5071938495332236, + "grad_norm": 0.4089290201663971, + "learning_rate": 1.7014036841932523e-05, + "loss": 0.4889, + "step": 18472 + }, + { + "epoch": 0.50722130697419, + "grad_norm": 0.44893211126327515, + "learning_rate": 1.70137289972108e-05, + "loss": 0.4222, + "step": 18473 + }, + { + "epoch": 0.5072487644151565, + "grad_norm": 0.3953304886817932, + "learning_rate": 1.7013421139406237e-05, + "loss": 0.4881, + "step": 18474 + }, + { + "epoch": 0.507276221856123, + "grad_norm": 0.3444829285144806, + "learning_rate": 1.7013113268519402e-05, + "loss": 0.529, + "step": 18475 + }, + { + "epoch": 0.5073036792970895, + "grad_norm": 0.3682324290275574, + "learning_rate": 1.7012805384550876e-05, + "loss": 0.4937, + "step": 18476 + }, + { + "epoch": 0.507331136738056, + "grad_norm": 0.4004669487476349, + "learning_rate": 1.7012497487501226e-05, + "loss": 0.5631, + "step": 18477 + }, + { + "epoch": 0.5073585941790225, + "grad_norm": 0.40080103278160095, + "learning_rate": 1.701218957737103e-05, + "loss": 0.5464, + "step": 18478 + }, + { + "epoch": 0.5073860516199891, + "grad_norm": 0.36055049300193787, + "learning_rate": 1.7011881654160865e-05, + "loss": 0.4823, + "step": 18479 + }, + { + "epoch": 0.5074135090609555, + "grad_norm": 0.34053465723991394, + "learning_rate": 1.70115737178713e-05, + "loss": 0.4418, + "step": 18480 + }, + { + "epoch": 0.507440966501922, + "grad_norm": 0.4014171063899994, + "learning_rate": 1.7011265768502912e-05, + "loss": 0.6195, + "step": 18481 + }, + { + "epoch": 0.5074684239428885, + "grad_norm": 0.37327343225479126, + "learning_rate": 1.7010957806056278e-05, + "loss": 0.5075, + "step": 18482 + }, + { + "epoch": 0.507495881383855, + "grad_norm": 0.32317957282066345, + "learning_rate": 1.701064983053197e-05, + "loss": 0.4789, + "step": 18483 + }, + { + "epoch": 0.5075233388248215, + "grad_norm": 0.35024353861808777, + "learning_rate": 1.701034184193056e-05, + "loss": 0.5356, + "step": 18484 + }, + { + "epoch": 0.507550796265788, + "grad_norm": 0.36012688279151917, + "learning_rate": 1.701003384025263e-05, + "loss": 0.5482, + "step": 18485 + }, + { + "epoch": 0.5075782537067546, + "grad_norm": 0.38247373700141907, + "learning_rate": 1.7009725825498743e-05, + "loss": 0.5114, + "step": 18486 + }, + { + "epoch": 0.507605711147721, + "grad_norm": 0.37171828746795654, + "learning_rate": 1.7009417797669487e-05, + "loss": 0.5281, + "step": 18487 + }, + { + "epoch": 0.5076331685886876, + "grad_norm": 0.3859644830226898, + "learning_rate": 1.7009109756765426e-05, + "loss": 0.475, + "step": 18488 + }, + { + "epoch": 0.507660626029654, + "grad_norm": 0.4409497082233429, + "learning_rate": 1.7008801702787138e-05, + "loss": 0.5357, + "step": 18489 + }, + { + "epoch": 0.5076880834706206, + "grad_norm": 0.4524669349193573, + "learning_rate": 1.7008493635735197e-05, + "loss": 0.4869, + "step": 18490 + }, + { + "epoch": 0.507715540911587, + "grad_norm": 0.3758498430252075, + "learning_rate": 1.700818555561018e-05, + "loss": 0.6059, + "step": 18491 + }, + { + "epoch": 0.5077429983525535, + "grad_norm": 0.3690436780452728, + "learning_rate": 1.7007877462412658e-05, + "loss": 0.4565, + "step": 18492 + }, + { + "epoch": 0.5077704557935201, + "grad_norm": 0.3430847227573395, + "learning_rate": 1.700756935614321e-05, + "loss": 0.5392, + "step": 18493 + }, + { + "epoch": 0.5077979132344865, + "grad_norm": 0.48653444647789, + "learning_rate": 1.7007261236802408e-05, + "loss": 0.5555, + "step": 18494 + }, + { + "epoch": 0.5078253706754531, + "grad_norm": 0.3584933876991272, + "learning_rate": 1.7006953104390827e-05, + "loss": 0.449, + "step": 18495 + }, + { + "epoch": 0.5078528281164195, + "grad_norm": 0.36981719732284546, + "learning_rate": 1.7006644958909047e-05, + "loss": 0.4526, + "step": 18496 + }, + { + "epoch": 0.5078802855573861, + "grad_norm": 0.36832261085510254, + "learning_rate": 1.7006336800357632e-05, + "loss": 0.4799, + "step": 18497 + }, + { + "epoch": 0.5079077429983525, + "grad_norm": 0.3313303589820862, + "learning_rate": 1.7006028628737167e-05, + "loss": 0.4966, + "step": 18498 + }, + { + "epoch": 0.5079352004393191, + "grad_norm": 0.35143229365348816, + "learning_rate": 1.7005720444048217e-05, + "loss": 0.5179, + "step": 18499 + }, + { + "epoch": 0.5079626578802856, + "grad_norm": 0.33716607093811035, + "learning_rate": 1.7005412246291368e-05, + "loss": 0.5125, + "step": 18500 + }, + { + "epoch": 0.507990115321252, + "grad_norm": 0.4248391091823578, + "learning_rate": 1.7005104035467182e-05, + "loss": 0.5793, + "step": 18501 + }, + { + "epoch": 0.5080175727622186, + "grad_norm": 0.34109556674957275, + "learning_rate": 1.700479581157625e-05, + "loss": 0.4814, + "step": 18502 + }, + { + "epoch": 0.508045030203185, + "grad_norm": 0.3499755263328552, + "learning_rate": 1.700448757461913e-05, + "loss": 0.5376, + "step": 18503 + }, + { + "epoch": 0.5080724876441516, + "grad_norm": 0.3302028775215149, + "learning_rate": 1.700417932459641e-05, + "loss": 0.3928, + "step": 18504 + }, + { + "epoch": 0.508099945085118, + "grad_norm": 0.48318666219711304, + "learning_rate": 1.7003871061508662e-05, + "loss": 0.53, + "step": 18505 + }, + { + "epoch": 0.5081274025260846, + "grad_norm": 0.3688989579677582, + "learning_rate": 1.7003562785356457e-05, + "loss": 0.5281, + "step": 18506 + }, + { + "epoch": 0.5081548599670511, + "grad_norm": 0.35562682151794434, + "learning_rate": 1.7003254496140368e-05, + "loss": 0.4383, + "step": 18507 + }, + { + "epoch": 0.5081823174080176, + "grad_norm": 0.3539496958255768, + "learning_rate": 1.7002946193860978e-05, + "loss": 0.4099, + "step": 18508 + }, + { + "epoch": 0.5082097748489841, + "grad_norm": 0.3534518778324127, + "learning_rate": 1.7002637878518856e-05, + "loss": 0.4595, + "step": 18509 + }, + { + "epoch": 0.5082372322899505, + "grad_norm": 0.3501948118209839, + "learning_rate": 1.700232955011458e-05, + "loss": 0.503, + "step": 18510 + }, + { + "epoch": 0.5082646897309171, + "grad_norm": 0.41264107823371887, + "learning_rate": 1.700202120864873e-05, + "loss": 0.5417, + "step": 18511 + }, + { + "epoch": 0.5082921471718835, + "grad_norm": 0.3893459737300873, + "learning_rate": 1.700171285412187e-05, + "loss": 0.5581, + "step": 18512 + }, + { + "epoch": 0.5083196046128501, + "grad_norm": 0.37234675884246826, + "learning_rate": 1.7001404486534584e-05, + "loss": 0.5283, + "step": 18513 + }, + { + "epoch": 0.5083470620538166, + "grad_norm": 0.33495548367500305, + "learning_rate": 1.700109610588744e-05, + "loss": 0.4895, + "step": 18514 + }, + { + "epoch": 0.5083745194947831, + "grad_norm": 0.39584073424339294, + "learning_rate": 1.700078771218102e-05, + "loss": 0.4969, + "step": 18515 + }, + { + "epoch": 0.5084019769357496, + "grad_norm": 0.32126128673553467, + "learning_rate": 1.7000479305415896e-05, + "loss": 0.3748, + "step": 18516 + }, + { + "epoch": 0.5084294343767161, + "grad_norm": 0.3946602940559387, + "learning_rate": 1.7000170885592643e-05, + "loss": 0.5811, + "step": 18517 + }, + { + "epoch": 0.5084568918176826, + "grad_norm": 0.37806057929992676, + "learning_rate": 1.6999862452711838e-05, + "loss": 0.539, + "step": 18518 + }, + { + "epoch": 0.508484349258649, + "grad_norm": 0.3795132040977478, + "learning_rate": 1.6999554006774058e-05, + "loss": 0.5238, + "step": 18519 + }, + { + "epoch": 0.5085118066996156, + "grad_norm": 0.40288108587265015, + "learning_rate": 1.699924554777987e-05, + "loss": 0.5716, + "step": 18520 + }, + { + "epoch": 0.5085392641405821, + "grad_norm": 0.3266240060329437, + "learning_rate": 1.699893707572986e-05, + "loss": 0.4348, + "step": 18521 + }, + { + "epoch": 0.5085667215815486, + "grad_norm": 0.4246068000793457, + "learning_rate": 1.69986285906246e-05, + "loss": 0.5689, + "step": 18522 + }, + { + "epoch": 0.5085941790225151, + "grad_norm": 0.3663918077945709, + "learning_rate": 1.699832009246466e-05, + "loss": 0.4884, + "step": 18523 + }, + { + "epoch": 0.5086216364634816, + "grad_norm": 0.4137879014015198, + "learning_rate": 1.699801158125062e-05, + "loss": 0.5469, + "step": 18524 + }, + { + "epoch": 0.5086490939044481, + "grad_norm": 0.36332714557647705, + "learning_rate": 1.6997703056983054e-05, + "loss": 0.5094, + "step": 18525 + }, + { + "epoch": 0.5086765513454146, + "grad_norm": 0.40160202980041504, + "learning_rate": 1.699739451966254e-05, + "loss": 0.5555, + "step": 18526 + }, + { + "epoch": 0.5087040087863811, + "grad_norm": 0.4350310266017914, + "learning_rate": 1.699708596928965e-05, + "loss": 0.4567, + "step": 18527 + }, + { + "epoch": 0.5087314662273477, + "grad_norm": 0.46098700165748596, + "learning_rate": 1.699677740586496e-05, + "loss": 0.46, + "step": 18528 + }, + { + "epoch": 0.5087589236683141, + "grad_norm": 0.42546144127845764, + "learning_rate": 1.6996468829389053e-05, + "loss": 0.5681, + "step": 18529 + }, + { + "epoch": 0.5087863811092806, + "grad_norm": 0.3744295835494995, + "learning_rate": 1.6996160239862498e-05, + "loss": 0.4578, + "step": 18530 + }, + { + "epoch": 0.5088138385502471, + "grad_norm": 0.41190043091773987, + "learning_rate": 1.699585163728587e-05, + "loss": 0.5348, + "step": 18531 + }, + { + "epoch": 0.5088412959912136, + "grad_norm": 0.34983187913894653, + "learning_rate": 1.699554302165974e-05, + "loss": 0.4719, + "step": 18532 + }, + { + "epoch": 0.5088687534321801, + "grad_norm": 0.34879937767982483, + "learning_rate": 1.6995234392984692e-05, + "loss": 0.5471, + "step": 18533 + }, + { + "epoch": 0.5088962108731466, + "grad_norm": 0.4051634967327118, + "learning_rate": 1.6994925751261308e-05, + "loss": 0.5294, + "step": 18534 + }, + { + "epoch": 0.5089236683141132, + "grad_norm": 0.3325008749961853, + "learning_rate": 1.6994617096490146e-05, + "loss": 0.4345, + "step": 18535 + }, + { + "epoch": 0.5089511257550796, + "grad_norm": 0.3721499741077423, + "learning_rate": 1.6994308428671793e-05, + "loss": 0.5197, + "step": 18536 + }, + { + "epoch": 0.5089785831960462, + "grad_norm": 11.047492980957031, + "learning_rate": 1.6993999747806823e-05, + "loss": 0.4796, + "step": 18537 + }, + { + "epoch": 0.5090060406370126, + "grad_norm": 0.3265029788017273, + "learning_rate": 1.699369105389581e-05, + "loss": 0.466, + "step": 18538 + }, + { + "epoch": 0.5090334980779792, + "grad_norm": 0.4406310021877289, + "learning_rate": 1.699338234693933e-05, + "loss": 0.5537, + "step": 18539 + }, + { + "epoch": 0.5090609555189456, + "grad_norm": 0.8421952724456787, + "learning_rate": 1.6993073626937963e-05, + "loss": 0.5562, + "step": 18540 + }, + { + "epoch": 0.5090884129599121, + "grad_norm": 0.4519234895706177, + "learning_rate": 1.6992764893892283e-05, + "loss": 0.587, + "step": 18541 + }, + { + "epoch": 0.5091158704008787, + "grad_norm": 0.6256911158561707, + "learning_rate": 1.699245614780286e-05, + "loss": 0.4674, + "step": 18542 + }, + { + "epoch": 0.5091433278418451, + "grad_norm": 0.3460980951786041, + "learning_rate": 1.6992147388670278e-05, + "loss": 0.5032, + "step": 18543 + }, + { + "epoch": 0.5091707852828117, + "grad_norm": 0.34802815318107605, + "learning_rate": 1.6991838616495105e-05, + "loss": 0.4186, + "step": 18544 + }, + { + "epoch": 0.5091982427237781, + "grad_norm": 0.3901326060295105, + "learning_rate": 1.699152983127793e-05, + "loss": 0.5498, + "step": 18545 + }, + { + "epoch": 0.5092257001647447, + "grad_norm": 0.37601596117019653, + "learning_rate": 1.699122103301931e-05, + "loss": 0.4724, + "step": 18546 + }, + { + "epoch": 0.5092531576057111, + "grad_norm": 0.3541049361228943, + "learning_rate": 1.699091222171984e-05, + "loss": 0.4945, + "step": 18547 + }, + { + "epoch": 0.5092806150466777, + "grad_norm": 0.39047351479530334, + "learning_rate": 1.6990603397380084e-05, + "loss": 0.4833, + "step": 18548 + }, + { + "epoch": 0.5093080724876442, + "grad_norm": 0.39342573285102844, + "learning_rate": 1.699029456000062e-05, + "loss": 0.448, + "step": 18549 + }, + { + "epoch": 0.5093355299286106, + "grad_norm": 0.3610215485095978, + "learning_rate": 1.6989985709582025e-05, + "loss": 0.5514, + "step": 18550 + }, + { + "epoch": 0.5093629873695772, + "grad_norm": 0.36175137758255005, + "learning_rate": 1.698967684612488e-05, + "loss": 0.4391, + "step": 18551 + }, + { + "epoch": 0.5093904448105436, + "grad_norm": 0.41140732169151306, + "learning_rate": 1.6989367969629755e-05, + "loss": 0.5873, + "step": 18552 + }, + { + "epoch": 0.5094179022515102, + "grad_norm": 0.4560134708881378, + "learning_rate": 1.698905908009723e-05, + "loss": 0.4758, + "step": 18553 + }, + { + "epoch": 0.5094453596924766, + "grad_norm": 0.4010449945926666, + "learning_rate": 1.6988750177527876e-05, + "loss": 0.5922, + "step": 18554 + }, + { + "epoch": 0.5094728171334432, + "grad_norm": 0.39636436104774475, + "learning_rate": 1.6988441261922273e-05, + "loss": 0.6019, + "step": 18555 + }, + { + "epoch": 0.5095002745744097, + "grad_norm": 0.360472172498703, + "learning_rate": 1.6988132333280995e-05, + "loss": 0.4303, + "step": 18556 + }, + { + "epoch": 0.5095277320153762, + "grad_norm": 0.6750775575637817, + "learning_rate": 1.6987823391604622e-05, + "loss": 0.5403, + "step": 18557 + }, + { + "epoch": 0.5095551894563427, + "grad_norm": 0.5308595895767212, + "learning_rate": 1.6987514436893727e-05, + "loss": 0.5554, + "step": 18558 + }, + { + "epoch": 0.5095826468973091, + "grad_norm": 0.3575540781021118, + "learning_rate": 1.698720546914889e-05, + "loss": 0.5226, + "step": 18559 + }, + { + "epoch": 0.5096101043382757, + "grad_norm": 0.41215983033180237, + "learning_rate": 1.698689648837068e-05, + "loss": 0.5227, + "step": 18560 + }, + { + "epoch": 0.5096375617792421, + "grad_norm": 0.36694127321243286, + "learning_rate": 1.6986587494559684e-05, + "loss": 0.5915, + "step": 18561 + }, + { + "epoch": 0.5096650192202087, + "grad_norm": 0.3662472367286682, + "learning_rate": 1.698627848771647e-05, + "loss": 0.5197, + "step": 18562 + }, + { + "epoch": 0.5096924766611752, + "grad_norm": 0.3692583441734314, + "learning_rate": 1.6985969467841614e-05, + "loss": 0.5422, + "step": 18563 + }, + { + "epoch": 0.5097199341021417, + "grad_norm": 0.38701361417770386, + "learning_rate": 1.6985660434935697e-05, + "loss": 0.5076, + "step": 18564 + }, + { + "epoch": 0.5097473915431082, + "grad_norm": 0.4137991964817047, + "learning_rate": 1.6985351388999296e-05, + "loss": 0.5329, + "step": 18565 + }, + { + "epoch": 0.5097748489840747, + "grad_norm": 0.38359352946281433, + "learning_rate": 1.6985042330032983e-05, + "loss": 0.5363, + "step": 18566 + }, + { + "epoch": 0.5098023064250412, + "grad_norm": 0.43564051389694214, + "learning_rate": 1.698473325803734e-05, + "loss": 0.4878, + "step": 18567 + }, + { + "epoch": 0.5098297638660076, + "grad_norm": 0.3857683539390564, + "learning_rate": 1.6984424173012936e-05, + "loss": 0.5011, + "step": 18568 + }, + { + "epoch": 0.5098572213069742, + "grad_norm": 0.3547968566417694, + "learning_rate": 1.6984115074960352e-05, + "loss": 0.4722, + "step": 18569 + }, + { + "epoch": 0.5098846787479407, + "grad_norm": 0.3517572283744812, + "learning_rate": 1.6983805963880165e-05, + "loss": 0.399, + "step": 18570 + }, + { + "epoch": 0.5099121361889072, + "grad_norm": 0.40543392300605774, + "learning_rate": 1.6983496839772953e-05, + "loss": 0.5177, + "step": 18571 + }, + { + "epoch": 0.5099395936298737, + "grad_norm": 0.3801492154598236, + "learning_rate": 1.6983187702639286e-05, + "loss": 0.5783, + "step": 18572 + }, + { + "epoch": 0.5099670510708402, + "grad_norm": 0.38578012585639954, + "learning_rate": 1.698287855247975e-05, + "loss": 0.5654, + "step": 18573 + }, + { + "epoch": 0.5099945085118067, + "grad_norm": 0.3702903389930725, + "learning_rate": 1.6982569389294914e-05, + "loss": 0.5424, + "step": 18574 + }, + { + "epoch": 0.5100219659527732, + "grad_norm": 0.34418368339538574, + "learning_rate": 1.698226021308536e-05, + "loss": 0.5871, + "step": 18575 + }, + { + "epoch": 0.5100494233937397, + "grad_norm": 0.40446576476097107, + "learning_rate": 1.698195102385166e-05, + "loss": 0.4726, + "step": 18576 + }, + { + "epoch": 0.5100768808347063, + "grad_norm": 0.39791324734687805, + "learning_rate": 1.6981641821594392e-05, + "loss": 0.5136, + "step": 18577 + }, + { + "epoch": 0.5101043382756727, + "grad_norm": 0.3416958153247833, + "learning_rate": 1.6981332606314136e-05, + "loss": 0.4682, + "step": 18578 + }, + { + "epoch": 0.5101317957166392, + "grad_norm": 0.39136114716529846, + "learning_rate": 1.6981023378011467e-05, + "loss": 0.5478, + "step": 18579 + }, + { + "epoch": 0.5101592531576057, + "grad_norm": 0.38259679079055786, + "learning_rate": 1.698071413668696e-05, + "loss": 0.561, + "step": 18580 + }, + { + "epoch": 0.5101867105985722, + "grad_norm": 0.7645190954208374, + "learning_rate": 1.6980404882341192e-05, + "loss": 0.4556, + "step": 18581 + }, + { + "epoch": 0.5102141680395387, + "grad_norm": 0.3744647204875946, + "learning_rate": 1.6980095614974742e-05, + "loss": 0.5442, + "step": 18582 + }, + { + "epoch": 0.5102416254805052, + "grad_norm": 0.3610951602458954, + "learning_rate": 1.6979786334588187e-05, + "loss": 0.4813, + "step": 18583 + }, + { + "epoch": 0.5102690829214718, + "grad_norm": 0.3937985599040985, + "learning_rate": 1.69794770411821e-05, + "loss": 0.5403, + "step": 18584 + }, + { + "epoch": 0.5102965403624382, + "grad_norm": 0.40564388036727905, + "learning_rate": 1.6979167734757065e-05, + "loss": 0.5783, + "step": 18585 + }, + { + "epoch": 0.5103239978034048, + "grad_norm": 0.4010675549507141, + "learning_rate": 1.697885841531365e-05, + "loss": 0.496, + "step": 18586 + }, + { + "epoch": 0.5103514552443712, + "grad_norm": 0.34909769892692566, + "learning_rate": 1.697854908285244e-05, + "loss": 0.4958, + "step": 18587 + }, + { + "epoch": 0.5103789126853377, + "grad_norm": 0.37876346707344055, + "learning_rate": 1.6978239737374008e-05, + "loss": 0.4921, + "step": 18588 + }, + { + "epoch": 0.5104063701263042, + "grad_norm": 0.4419255256652832, + "learning_rate": 1.6977930378878933e-05, + "loss": 0.5654, + "step": 18589 + }, + { + "epoch": 0.5104338275672707, + "grad_norm": 0.39342525601387024, + "learning_rate": 1.6977621007367786e-05, + "loss": 0.5281, + "step": 18590 + }, + { + "epoch": 0.5104612850082373, + "grad_norm": 0.5626476407051086, + "learning_rate": 1.6977311622841152e-05, + "loss": 0.57, + "step": 18591 + }, + { + "epoch": 0.5104887424492037, + "grad_norm": 0.3687600791454315, + "learning_rate": 1.6977002225299606e-05, + "loss": 0.4953, + "step": 18592 + }, + { + "epoch": 0.5105161998901703, + "grad_norm": 0.42727363109588623, + "learning_rate": 1.6976692814743724e-05, + "loss": 0.4888, + "step": 18593 + }, + { + "epoch": 0.5105436573311367, + "grad_norm": 0.3891254961490631, + "learning_rate": 1.697638339117408e-05, + "loss": 0.5175, + "step": 18594 + }, + { + "epoch": 0.5105711147721033, + "grad_norm": 0.3617144525051117, + "learning_rate": 1.6976073954591255e-05, + "loss": 0.5267, + "step": 18595 + }, + { + "epoch": 0.5105985722130697, + "grad_norm": 0.6117919683456421, + "learning_rate": 1.697576450499583e-05, + "loss": 0.5822, + "step": 18596 + }, + { + "epoch": 0.5106260296540363, + "grad_norm": 1.0328830480575562, + "learning_rate": 1.6975455042388372e-05, + "loss": 0.6459, + "step": 18597 + }, + { + "epoch": 0.5106534870950028, + "grad_norm": 0.40537548065185547, + "learning_rate": 1.697514556676947e-05, + "loss": 0.4756, + "step": 18598 + }, + { + "epoch": 0.5106809445359692, + "grad_norm": 0.3591884970664978, + "learning_rate": 1.697483607813969e-05, + "loss": 0.5038, + "step": 18599 + }, + { + "epoch": 0.5107084019769358, + "grad_norm": 0.3887330889701843, + "learning_rate": 1.697452657649962e-05, + "loss": 0.404, + "step": 18600 + }, + { + "epoch": 0.5107358594179022, + "grad_norm": 0.3723224699497223, + "learning_rate": 1.6974217061849828e-05, + "loss": 0.515, + "step": 18601 + }, + { + "epoch": 0.5107633168588688, + "grad_norm": 0.32704028487205505, + "learning_rate": 1.6973907534190896e-05, + "loss": 0.5061, + "step": 18602 + }, + { + "epoch": 0.5107907742998352, + "grad_norm": 0.3994567394256592, + "learning_rate": 1.69735979935234e-05, + "loss": 0.4985, + "step": 18603 + }, + { + "epoch": 0.5108182317408018, + "grad_norm": 0.3492930233478546, + "learning_rate": 1.697328843984792e-05, + "loss": 0.5188, + "step": 18604 + }, + { + "epoch": 0.5108456891817683, + "grad_norm": 0.35215985774993896, + "learning_rate": 1.697297887316503e-05, + "loss": 0.5272, + "step": 18605 + }, + { + "epoch": 0.5108731466227348, + "grad_norm": 0.40904736518859863, + "learning_rate": 1.6972669293475306e-05, + "loss": 0.5188, + "step": 18606 + }, + { + "epoch": 0.5109006040637013, + "grad_norm": 0.3817182779312134, + "learning_rate": 1.6972359700779334e-05, + "loss": 0.4998, + "step": 18607 + }, + { + "epoch": 0.5109280615046677, + "grad_norm": 0.38020649552345276, + "learning_rate": 1.6972050095077683e-05, + "loss": 0.5159, + "step": 18608 + }, + { + "epoch": 0.5109555189456343, + "grad_norm": 0.3936541676521301, + "learning_rate": 1.697174047637093e-05, + "loss": 0.5203, + "step": 18609 + }, + { + "epoch": 0.5109829763866007, + "grad_norm": 0.3356493413448334, + "learning_rate": 1.6971430844659663e-05, + "loss": 0.4879, + "step": 18610 + }, + { + "epoch": 0.5110104338275673, + "grad_norm": 0.41974425315856934, + "learning_rate": 1.697112119994445e-05, + "loss": 0.4914, + "step": 18611 + }, + { + "epoch": 0.5110378912685337, + "grad_norm": 0.47035714983940125, + "learning_rate": 1.6970811542225868e-05, + "loss": 0.6621, + "step": 18612 + }, + { + "epoch": 0.5110653487095003, + "grad_norm": 0.5819621086120605, + "learning_rate": 1.69705018715045e-05, + "loss": 0.5546, + "step": 18613 + }, + { + "epoch": 0.5110928061504668, + "grad_norm": 0.388677716255188, + "learning_rate": 1.697019218778092e-05, + "loss": 0.6283, + "step": 18614 + }, + { + "epoch": 0.5111202635914333, + "grad_norm": 0.3554789125919342, + "learning_rate": 1.696988249105571e-05, + "loss": 0.5392, + "step": 18615 + }, + { + "epoch": 0.5111477210323998, + "grad_norm": 0.3696957528591156, + "learning_rate": 1.6969572781329443e-05, + "loss": 0.4747, + "step": 18616 + }, + { + "epoch": 0.5111751784733662, + "grad_norm": 0.4348759949207306, + "learning_rate": 1.6969263058602695e-05, + "loss": 0.5746, + "step": 18617 + }, + { + "epoch": 0.5112026359143328, + "grad_norm": 0.36525896191596985, + "learning_rate": 1.6968953322876052e-05, + "loss": 0.4612, + "step": 18618 + }, + { + "epoch": 0.5112300933552992, + "grad_norm": 0.38608282804489136, + "learning_rate": 1.6968643574150082e-05, + "loss": 0.5005, + "step": 18619 + }, + { + "epoch": 0.5112575507962658, + "grad_norm": 0.42632946372032166, + "learning_rate": 1.696833381242537e-05, + "loss": 0.3586, + "step": 18620 + }, + { + "epoch": 0.5112850082372323, + "grad_norm": 0.45274803042411804, + "learning_rate": 1.6968024037702493e-05, + "loss": 0.4786, + "step": 18621 + }, + { + "epoch": 0.5113124656781988, + "grad_norm": 0.3850913345813751, + "learning_rate": 1.6967714249982027e-05, + "loss": 0.4923, + "step": 18622 + }, + { + "epoch": 0.5113399231191653, + "grad_norm": 0.3569629192352295, + "learning_rate": 1.6967404449264548e-05, + "loss": 0.4786, + "step": 18623 + }, + { + "epoch": 0.5113673805601318, + "grad_norm": 0.40551385283470154, + "learning_rate": 1.6967094635550636e-05, + "loss": 0.5194, + "step": 18624 + }, + { + "epoch": 0.5113948380010983, + "grad_norm": 0.3647242486476898, + "learning_rate": 1.6966784808840873e-05, + "loss": 0.5596, + "step": 18625 + }, + { + "epoch": 0.5114222954420647, + "grad_norm": 0.3421878218650818, + "learning_rate": 1.6966474969135826e-05, + "loss": 0.4712, + "step": 18626 + }, + { + "epoch": 0.5114497528830313, + "grad_norm": 0.40208321809768677, + "learning_rate": 1.6966165116436088e-05, + "loss": 0.5151, + "step": 18627 + }, + { + "epoch": 0.5114772103239978, + "grad_norm": 0.3541579246520996, + "learning_rate": 1.6965855250742223e-05, + "loss": 0.4928, + "step": 18628 + }, + { + "epoch": 0.5115046677649643, + "grad_norm": 0.37649309635162354, + "learning_rate": 1.6965545372054816e-05, + "loss": 0.4658, + "step": 18629 + }, + { + "epoch": 0.5115321252059308, + "grad_norm": 0.39454054832458496, + "learning_rate": 1.696523548037444e-05, + "loss": 0.526, + "step": 18630 + }, + { + "epoch": 0.5115595826468973, + "grad_norm": 0.3725613057613373, + "learning_rate": 1.696492557570168e-05, + "loss": 0.5371, + "step": 18631 + }, + { + "epoch": 0.5115870400878638, + "grad_norm": 0.4415969252586365, + "learning_rate": 1.6964615658037113e-05, + "loss": 0.545, + "step": 18632 + }, + { + "epoch": 0.5116144975288303, + "grad_norm": 0.33665210008621216, + "learning_rate": 1.696430572738131e-05, + "loss": 0.4665, + "step": 18633 + }, + { + "epoch": 0.5116419549697968, + "grad_norm": 0.3869999051094055, + "learning_rate": 1.6963995783734858e-05, + "loss": 0.5197, + "step": 18634 + }, + { + "epoch": 0.5116694124107634, + "grad_norm": 0.46202272176742554, + "learning_rate": 1.6963685827098332e-05, + "loss": 0.5361, + "step": 18635 + }, + { + "epoch": 0.5116968698517298, + "grad_norm": 0.3704226613044739, + "learning_rate": 1.6963375857472306e-05, + "loss": 0.4737, + "step": 18636 + }, + { + "epoch": 0.5117243272926963, + "grad_norm": 0.39509662985801697, + "learning_rate": 1.6963065874857364e-05, + "loss": 0.5215, + "step": 18637 + }, + { + "epoch": 0.5117517847336628, + "grad_norm": 0.3687683343887329, + "learning_rate": 1.6962755879254084e-05, + "loss": 0.5164, + "step": 18638 + }, + { + "epoch": 0.5117792421746293, + "grad_norm": 0.45334988832473755, + "learning_rate": 1.6962445870663037e-05, + "loss": 0.5489, + "step": 18639 + }, + { + "epoch": 0.5118066996155958, + "grad_norm": 0.3462483584880829, + "learning_rate": 1.6962135849084807e-05, + "loss": 0.464, + "step": 18640 + }, + { + "epoch": 0.5118341570565623, + "grad_norm": 0.39044636487960815, + "learning_rate": 1.6961825814519976e-05, + "loss": 0.4817, + "step": 18641 + }, + { + "epoch": 0.5118616144975289, + "grad_norm": 0.43549829721450806, + "learning_rate": 1.696151576696911e-05, + "loss": 0.5207, + "step": 18642 + }, + { + "epoch": 0.5118890719384953, + "grad_norm": 0.4025256335735321, + "learning_rate": 1.6961205706432803e-05, + "loss": 0.4985, + "step": 18643 + }, + { + "epoch": 0.5119165293794619, + "grad_norm": 0.3609309196472168, + "learning_rate": 1.6960895632911626e-05, + "loss": 0.5128, + "step": 18644 + }, + { + "epoch": 0.5119439868204283, + "grad_norm": 0.35793304443359375, + "learning_rate": 1.6960585546406152e-05, + "loss": 0.5142, + "step": 18645 + }, + { + "epoch": 0.5119714442613948, + "grad_norm": 0.3976730704307556, + "learning_rate": 1.6960275446916968e-05, + "loss": 0.4744, + "step": 18646 + }, + { + "epoch": 0.5119989017023613, + "grad_norm": 0.3531462848186493, + "learning_rate": 1.6959965334444645e-05, + "loss": 0.5081, + "step": 18647 + }, + { + "epoch": 0.5120263591433278, + "grad_norm": 0.40081000328063965, + "learning_rate": 1.6959655208989767e-05, + "loss": 0.5454, + "step": 18648 + }, + { + "epoch": 0.5120538165842944, + "grad_norm": 0.4007335901260376, + "learning_rate": 1.695934507055291e-05, + "loss": 0.4822, + "step": 18649 + }, + { + "epoch": 0.5120812740252608, + "grad_norm": 0.4033169746398926, + "learning_rate": 1.6959034919134656e-05, + "loss": 0.4781, + "step": 18650 + }, + { + "epoch": 0.5121087314662274, + "grad_norm": 0.45100030303001404, + "learning_rate": 1.6958724754735582e-05, + "loss": 0.6013, + "step": 18651 + }, + { + "epoch": 0.5121361889071938, + "grad_norm": 0.31972330808639526, + "learning_rate": 1.695841457735626e-05, + "loss": 0.3841, + "step": 18652 + }, + { + "epoch": 0.5121636463481604, + "grad_norm": 0.4122544229030609, + "learning_rate": 1.6958104386997275e-05, + "loss": 0.5663, + "step": 18653 + }, + { + "epoch": 0.5121911037891268, + "grad_norm": 0.38360199332237244, + "learning_rate": 1.6957794183659205e-05, + "loss": 0.4299, + "step": 18654 + }, + { + "epoch": 0.5122185612300933, + "grad_norm": 0.3529968857765198, + "learning_rate": 1.695748396734263e-05, + "loss": 0.5183, + "step": 18655 + }, + { + "epoch": 0.5122460186710599, + "grad_norm": 0.33444714546203613, + "learning_rate": 1.695717373804813e-05, + "loss": 0.4281, + "step": 18656 + }, + { + "epoch": 0.5122734761120263, + "grad_norm": 0.740058183670044, + "learning_rate": 1.6956863495776274e-05, + "loss": 0.5563, + "step": 18657 + }, + { + "epoch": 0.5123009335529929, + "grad_norm": 0.3847612738609314, + "learning_rate": 1.6956553240527648e-05, + "loss": 0.5064, + "step": 18658 + }, + { + "epoch": 0.5123283909939593, + "grad_norm": 0.36713463068008423, + "learning_rate": 1.6956242972302832e-05, + "loss": 0.4453, + "step": 18659 + }, + { + "epoch": 0.5123558484349259, + "grad_norm": 0.4261859059333801, + "learning_rate": 1.69559326911024e-05, + "loss": 0.579, + "step": 18660 + }, + { + "epoch": 0.5123833058758923, + "grad_norm": 0.3419932723045349, + "learning_rate": 1.695562239692694e-05, + "loss": 0.5424, + "step": 18661 + }, + { + "epoch": 0.5124107633168589, + "grad_norm": 0.4075855016708374, + "learning_rate": 1.6955312089777018e-05, + "loss": 0.5153, + "step": 18662 + }, + { + "epoch": 0.5124382207578254, + "grad_norm": 0.3754998445510864, + "learning_rate": 1.695500176965322e-05, + "loss": 0.4877, + "step": 18663 + }, + { + "epoch": 0.5124656781987919, + "grad_norm": 0.39077243208885193, + "learning_rate": 1.6954691436556127e-05, + "loss": 0.5372, + "step": 18664 + }, + { + "epoch": 0.5124931356397584, + "grad_norm": 0.37289750576019287, + "learning_rate": 1.6954381090486313e-05, + "loss": 0.4619, + "step": 18665 + }, + { + "epoch": 0.5125205930807248, + "grad_norm": 0.3652800917625427, + "learning_rate": 1.695407073144436e-05, + "loss": 0.5017, + "step": 18666 + }, + { + "epoch": 0.5125480505216914, + "grad_norm": 0.3635663390159607, + "learning_rate": 1.6953760359430843e-05, + "loss": 0.5171, + "step": 18667 + }, + { + "epoch": 0.5125755079626578, + "grad_norm": 0.3609192669391632, + "learning_rate": 1.6953449974446347e-05, + "loss": 0.4721, + "step": 18668 + }, + { + "epoch": 0.5126029654036244, + "grad_norm": 0.3625755310058594, + "learning_rate": 1.6953139576491443e-05, + "loss": 0.5494, + "step": 18669 + }, + { + "epoch": 0.5126304228445909, + "grad_norm": 0.36530402302742004, + "learning_rate": 1.6952829165566715e-05, + "loss": 0.5441, + "step": 18670 + }, + { + "epoch": 0.5126578802855574, + "grad_norm": 0.3785923421382904, + "learning_rate": 1.6952518741672745e-05, + "loss": 0.604, + "step": 18671 + }, + { + "epoch": 0.5126853377265239, + "grad_norm": 0.4319320023059845, + "learning_rate": 1.6952208304810107e-05, + "loss": 0.5503, + "step": 18672 + }, + { + "epoch": 0.5127127951674904, + "grad_norm": 0.3328680396080017, + "learning_rate": 1.695189785497938e-05, + "loss": 0.503, + "step": 18673 + }, + { + "epoch": 0.5127402526084569, + "grad_norm": 0.3545232117176056, + "learning_rate": 1.6951587392181147e-05, + "loss": 0.5577, + "step": 18674 + }, + { + "epoch": 0.5127677100494233, + "grad_norm": 0.4864352345466614, + "learning_rate": 1.6951276916415983e-05, + "loss": 0.4282, + "step": 18675 + }, + { + "epoch": 0.5127951674903899, + "grad_norm": 0.3901040554046631, + "learning_rate": 1.695096642768447e-05, + "loss": 0.617, + "step": 18676 + }, + { + "epoch": 0.5128226249313564, + "grad_norm": 0.3679579794406891, + "learning_rate": 1.6950655925987186e-05, + "loss": 0.5204, + "step": 18677 + }, + { + "epoch": 0.5128500823723229, + "grad_norm": 0.36346349120140076, + "learning_rate": 1.695034541132471e-05, + "loss": 0.5315, + "step": 18678 + }, + { + "epoch": 0.5128775398132894, + "grad_norm": 0.39569810032844543, + "learning_rate": 1.6950034883697624e-05, + "loss": 0.5607, + "step": 18679 + }, + { + "epoch": 0.5129049972542559, + "grad_norm": 0.38267233967781067, + "learning_rate": 1.69497243431065e-05, + "loss": 0.4949, + "step": 18680 + }, + { + "epoch": 0.5129324546952224, + "grad_norm": 0.44071489572525024, + "learning_rate": 1.6949413789551926e-05, + "loss": 0.5923, + "step": 18681 + }, + { + "epoch": 0.5129599121361889, + "grad_norm": 0.42524248361587524, + "learning_rate": 1.6949103223034476e-05, + "loss": 0.4822, + "step": 18682 + }, + { + "epoch": 0.5129873695771554, + "grad_norm": 0.4341190755367279, + "learning_rate": 1.694879264355473e-05, + "loss": 0.5332, + "step": 18683 + }, + { + "epoch": 0.513014827018122, + "grad_norm": 0.4226025640964508, + "learning_rate": 1.6948482051113268e-05, + "loss": 0.5348, + "step": 18684 + }, + { + "epoch": 0.5130422844590884, + "grad_norm": 0.42955827713012695, + "learning_rate": 1.694817144571067e-05, + "loss": 0.5475, + "step": 18685 + }, + { + "epoch": 0.5130697419000549, + "grad_norm": 0.347808837890625, + "learning_rate": 1.6947860827347513e-05, + "loss": 0.4666, + "step": 18686 + }, + { + "epoch": 0.5130971993410214, + "grad_norm": 0.4013981223106384, + "learning_rate": 1.6947550196024383e-05, + "loss": 0.5247, + "step": 18687 + }, + { + "epoch": 0.5131246567819879, + "grad_norm": 0.36478009819984436, + "learning_rate": 1.694723955174185e-05, + "loss": 0.5127, + "step": 18688 + }, + { + "epoch": 0.5131521142229544, + "grad_norm": 0.4477667808532715, + "learning_rate": 1.6946928894500496e-05, + "loss": 0.4817, + "step": 18689 + }, + { + "epoch": 0.5131795716639209, + "grad_norm": 0.4636624753475189, + "learning_rate": 1.6946618224300908e-05, + "loss": 0.4902, + "step": 18690 + }, + { + "epoch": 0.5132070291048875, + "grad_norm": 0.3757869601249695, + "learning_rate": 1.6946307541143654e-05, + "loss": 0.5616, + "step": 18691 + }, + { + "epoch": 0.5132344865458539, + "grad_norm": 0.3931494355201721, + "learning_rate": 1.6945996845029324e-05, + "loss": 0.6243, + "step": 18692 + }, + { + "epoch": 0.5132619439868205, + "grad_norm": 0.40652647614479065, + "learning_rate": 1.6945686135958493e-05, + "loss": 0.5298, + "step": 18693 + }, + { + "epoch": 0.5132894014277869, + "grad_norm": 0.33743950724601746, + "learning_rate": 1.694537541393174e-05, + "loss": 0.5316, + "step": 18694 + }, + { + "epoch": 0.5133168588687534, + "grad_norm": 0.3642979860305786, + "learning_rate": 1.6945064678949643e-05, + "loss": 0.5043, + "step": 18695 + }, + { + "epoch": 0.5133443163097199, + "grad_norm": 0.33725517988204956, + "learning_rate": 1.6944753931012787e-05, + "loss": 0.4582, + "step": 18696 + }, + { + "epoch": 0.5133717737506864, + "grad_norm": 0.35771727561950684, + "learning_rate": 1.6944443170121744e-05, + "loss": 0.4816, + "step": 18697 + }, + { + "epoch": 0.513399231191653, + "grad_norm": 0.34944266080856323, + "learning_rate": 1.6944132396277097e-05, + "loss": 0.4948, + "step": 18698 + }, + { + "epoch": 0.5134266886326194, + "grad_norm": 0.41065287590026855, + "learning_rate": 1.6943821609479433e-05, + "loss": 0.6726, + "step": 18699 + }, + { + "epoch": 0.513454146073586, + "grad_norm": 0.3977707326412201, + "learning_rate": 1.6943510809729324e-05, + "loss": 0.4571, + "step": 18700 + }, + { + "epoch": 0.5134816035145524, + "grad_norm": 0.39695772528648376, + "learning_rate": 1.694319999702735e-05, + "loss": 0.4825, + "step": 18701 + }, + { + "epoch": 0.513509060955519, + "grad_norm": 0.37188735604286194, + "learning_rate": 1.694288917137409e-05, + "loss": 0.5005, + "step": 18702 + }, + { + "epoch": 0.5135365183964854, + "grad_norm": 0.3639078736305237, + "learning_rate": 1.6942578332770127e-05, + "loss": 0.5598, + "step": 18703 + }, + { + "epoch": 0.513563975837452, + "grad_norm": 0.37298837304115295, + "learning_rate": 1.694226748121604e-05, + "loss": 0.4974, + "step": 18704 + }, + { + "epoch": 0.5135914332784185, + "grad_norm": 0.37391629815101624, + "learning_rate": 1.694195661671241e-05, + "loss": 0.4718, + "step": 18705 + }, + { + "epoch": 0.5136188907193849, + "grad_norm": 0.3683781027793884, + "learning_rate": 1.6941645739259813e-05, + "loss": 0.5232, + "step": 18706 + }, + { + "epoch": 0.5136463481603515, + "grad_norm": 0.3985866606235504, + "learning_rate": 1.6941334848858835e-05, + "loss": 0.6029, + "step": 18707 + }, + { + "epoch": 0.5136738056013179, + "grad_norm": 0.33321788907051086, + "learning_rate": 1.694102394551005e-05, + "loss": 0.4268, + "step": 18708 + }, + { + "epoch": 0.5137012630422845, + "grad_norm": 0.3857799470424652, + "learning_rate": 1.694071302921404e-05, + "loss": 0.4973, + "step": 18709 + }, + { + "epoch": 0.5137287204832509, + "grad_norm": 0.41173428297042847, + "learning_rate": 1.6940402099971382e-05, + "loss": 0.4399, + "step": 18710 + }, + { + "epoch": 0.5137561779242175, + "grad_norm": 0.36251530051231384, + "learning_rate": 1.694009115778266e-05, + "loss": 0.4312, + "step": 18711 + }, + { + "epoch": 0.513783635365184, + "grad_norm": 0.46105244755744934, + "learning_rate": 1.6939780202648454e-05, + "loss": 0.5213, + "step": 18712 + }, + { + "epoch": 0.5138110928061504, + "grad_norm": 0.39996084570884705, + "learning_rate": 1.6939469234569347e-05, + "loss": 0.5372, + "step": 18713 + }, + { + "epoch": 0.513838550247117, + "grad_norm": 0.34587258100509644, + "learning_rate": 1.6939158253545906e-05, + "loss": 0.4491, + "step": 18714 + }, + { + "epoch": 0.5138660076880834, + "grad_norm": 0.4025329649448395, + "learning_rate": 1.693884725957873e-05, + "loss": 0.5304, + "step": 18715 + }, + { + "epoch": 0.51389346512905, + "grad_norm": 0.39447852969169617, + "learning_rate": 1.6938536252668382e-05, + "loss": 0.4364, + "step": 18716 + }, + { + "epoch": 0.5139209225700164, + "grad_norm": 0.3497292697429657, + "learning_rate": 1.6938225232815454e-05, + "loss": 0.5211, + "step": 18717 + }, + { + "epoch": 0.513948380010983, + "grad_norm": 0.38576507568359375, + "learning_rate": 1.693791420002052e-05, + "loss": 0.5468, + "step": 18718 + }, + { + "epoch": 0.5139758374519495, + "grad_norm": 0.5414435863494873, + "learning_rate": 1.6937603154284158e-05, + "loss": 0.4854, + "step": 18719 + }, + { + "epoch": 0.514003294892916, + "grad_norm": 0.36633604764938354, + "learning_rate": 1.6937292095606956e-05, + "loss": 0.5951, + "step": 18720 + }, + { + "epoch": 0.5140307523338825, + "grad_norm": 0.37443187832832336, + "learning_rate": 1.693698102398949e-05, + "loss": 0.4905, + "step": 18721 + }, + { + "epoch": 0.514058209774849, + "grad_norm": 0.3995816111564636, + "learning_rate": 1.693666993943234e-05, + "loss": 0.4953, + "step": 18722 + }, + { + "epoch": 0.5140856672158155, + "grad_norm": 0.370633989572525, + "learning_rate": 1.6936358841936087e-05, + "loss": 0.5643, + "step": 18723 + }, + { + "epoch": 0.5141131246567819, + "grad_norm": 0.3958241641521454, + "learning_rate": 1.693604773150131e-05, + "loss": 0.5241, + "step": 18724 + }, + { + "epoch": 0.5141405820977485, + "grad_norm": 0.39405784010887146, + "learning_rate": 1.693573660812859e-05, + "loss": 0.4746, + "step": 18725 + }, + { + "epoch": 0.514168039538715, + "grad_norm": 0.3756093382835388, + "learning_rate": 1.693542547181851e-05, + "loss": 0.5182, + "step": 18726 + }, + { + "epoch": 0.5141954969796815, + "grad_norm": 0.3983602523803711, + "learning_rate": 1.6935114322571646e-05, + "loss": 0.5274, + "step": 18727 + }, + { + "epoch": 0.514222954420648, + "grad_norm": 0.47980302572250366, + "learning_rate": 1.6934803160388576e-05, + "loss": 0.514, + "step": 18728 + }, + { + "epoch": 0.5142504118616145, + "grad_norm": 0.3720860779285431, + "learning_rate": 1.693449198526989e-05, + "loss": 0.5201, + "step": 18729 + }, + { + "epoch": 0.514277869302581, + "grad_norm": 0.3469696640968323, + "learning_rate": 1.6934180797216165e-05, + "loss": 0.4985, + "step": 18730 + }, + { + "epoch": 0.5143053267435475, + "grad_norm": 0.35542330145835876, + "learning_rate": 1.6933869596227978e-05, + "loss": 0.5535, + "step": 18731 + }, + { + "epoch": 0.514332784184514, + "grad_norm": 0.3780272603034973, + "learning_rate": 1.6933558382305907e-05, + "loss": 0.5508, + "step": 18732 + }, + { + "epoch": 0.5143602416254806, + "grad_norm": 0.377352237701416, + "learning_rate": 1.693324715545054e-05, + "loss": 0.472, + "step": 18733 + }, + { + "epoch": 0.514387699066447, + "grad_norm": 0.3795408010482788, + "learning_rate": 1.6932935915662458e-05, + "loss": 0.591, + "step": 18734 + }, + { + "epoch": 0.5144151565074135, + "grad_norm": 0.3489115238189697, + "learning_rate": 1.6932624662942232e-05, + "loss": 0.4992, + "step": 18735 + }, + { + "epoch": 0.51444261394838, + "grad_norm": 0.30747392773628235, + "learning_rate": 1.693231339729045e-05, + "loss": 0.404, + "step": 18736 + }, + { + "epoch": 0.5144700713893465, + "grad_norm": 0.4092087149620056, + "learning_rate": 1.6932002118707692e-05, + "loss": 0.4681, + "step": 18737 + }, + { + "epoch": 0.514497528830313, + "grad_norm": 0.36396750807762146, + "learning_rate": 1.6931690827194533e-05, + "loss": 0.5207, + "step": 18738 + }, + { + "epoch": 0.5145249862712795, + "grad_norm": 0.41024070978164673, + "learning_rate": 1.693137952275156e-05, + "loss": 0.5597, + "step": 18739 + }, + { + "epoch": 0.5145524437122461, + "grad_norm": 0.4256361126899719, + "learning_rate": 1.6931068205379357e-05, + "loss": 0.5328, + "step": 18740 + }, + { + "epoch": 0.5145799011532125, + "grad_norm": 0.4832627773284912, + "learning_rate": 1.6930756875078496e-05, + "loss": 0.6346, + "step": 18741 + }, + { + "epoch": 0.514607358594179, + "grad_norm": 0.3839092254638672, + "learning_rate": 1.693044553184956e-05, + "loss": 0.4724, + "step": 18742 + }, + { + "epoch": 0.5146348160351455, + "grad_norm": 0.39744871854782104, + "learning_rate": 1.6930134175693135e-05, + "loss": 0.4635, + "step": 18743 + }, + { + "epoch": 0.514662273476112, + "grad_norm": 0.4211215376853943, + "learning_rate": 1.69298228066098e-05, + "loss": 0.4833, + "step": 18744 + }, + { + "epoch": 0.5146897309170785, + "grad_norm": 1.0832326412200928, + "learning_rate": 1.6929511424600128e-05, + "loss": 0.4974, + "step": 18745 + }, + { + "epoch": 0.514717188358045, + "grad_norm": 0.3572729825973511, + "learning_rate": 1.6929200029664707e-05, + "loss": 0.4772, + "step": 18746 + }, + { + "epoch": 0.5147446457990116, + "grad_norm": 0.3841545879840851, + "learning_rate": 1.692888862180412e-05, + "loss": 0.5167, + "step": 18747 + }, + { + "epoch": 0.514772103239978, + "grad_norm": 0.347608357667923, + "learning_rate": 1.692857720101894e-05, + "loss": 0.4893, + "step": 18748 + }, + { + "epoch": 0.5147995606809446, + "grad_norm": 0.4141027331352234, + "learning_rate": 1.692826576730975e-05, + "loss": 0.4606, + "step": 18749 + }, + { + "epoch": 0.514827018121911, + "grad_norm": 0.37261366844177246, + "learning_rate": 1.692795432067714e-05, + "loss": 0.5286, + "step": 18750 + }, + { + "epoch": 0.5148544755628776, + "grad_norm": 0.38296252489089966, + "learning_rate": 1.692764286112168e-05, + "loss": 0.4677, + "step": 18751 + }, + { + "epoch": 0.514881933003844, + "grad_norm": 0.5819352865219116, + "learning_rate": 1.6927331388643956e-05, + "loss": 0.4296, + "step": 18752 + }, + { + "epoch": 0.5149093904448105, + "grad_norm": 0.3692910969257355, + "learning_rate": 1.6927019903244545e-05, + "loss": 0.5019, + "step": 18753 + }, + { + "epoch": 0.5149368478857771, + "grad_norm": 0.37307360768318176, + "learning_rate": 1.6926708404924036e-05, + "loss": 0.4965, + "step": 18754 + }, + { + "epoch": 0.5149643053267435, + "grad_norm": 0.3535226881504059, + "learning_rate": 1.6926396893683002e-05, + "loss": 0.4561, + "step": 18755 + }, + { + "epoch": 0.5149917627677101, + "grad_norm": 0.4212149381637573, + "learning_rate": 1.6926085369522026e-05, + "loss": 0.5733, + "step": 18756 + }, + { + "epoch": 0.5150192202086765, + "grad_norm": 0.3680340051651001, + "learning_rate": 1.6925773832441693e-05, + "loss": 0.5809, + "step": 18757 + }, + { + "epoch": 0.5150466776496431, + "grad_norm": 0.36845651268959045, + "learning_rate": 1.692546228244258e-05, + "loss": 0.5125, + "step": 18758 + }, + { + "epoch": 0.5150741350906095, + "grad_norm": 0.35343456268310547, + "learning_rate": 1.692515071952527e-05, + "loss": 0.4875, + "step": 18759 + }, + { + "epoch": 0.5151015925315761, + "grad_norm": 0.3867940604686737, + "learning_rate": 1.692483914369034e-05, + "loss": 0.5542, + "step": 18760 + }, + { + "epoch": 0.5151290499725426, + "grad_norm": 0.35180389881134033, + "learning_rate": 1.6924527554938383e-05, + "loss": 0.4377, + "step": 18761 + }, + { + "epoch": 0.515156507413509, + "grad_norm": 0.40311694145202637, + "learning_rate": 1.6924215953269968e-05, + "loss": 0.5199, + "step": 18762 + }, + { + "epoch": 0.5151839648544756, + "grad_norm": 0.4167497456073761, + "learning_rate": 1.6923904338685677e-05, + "loss": 0.4961, + "step": 18763 + }, + { + "epoch": 0.515211422295442, + "grad_norm": 0.3496154844760895, + "learning_rate": 1.6923592711186098e-05, + "loss": 0.5143, + "step": 18764 + }, + { + "epoch": 0.5152388797364086, + "grad_norm": 0.3706246018409729, + "learning_rate": 1.6923281070771808e-05, + "loss": 0.5876, + "step": 18765 + }, + { + "epoch": 0.515266337177375, + "grad_norm": 0.839541494846344, + "learning_rate": 1.692296941744339e-05, + "loss": 0.4229, + "step": 18766 + }, + { + "epoch": 0.5152937946183416, + "grad_norm": 0.4071706235408783, + "learning_rate": 1.6922657751201424e-05, + "loss": 0.5691, + "step": 18767 + }, + { + "epoch": 0.5153212520593081, + "grad_norm": 0.368060439825058, + "learning_rate": 1.692234607204649e-05, + "loss": 0.4829, + "step": 18768 + }, + { + "epoch": 0.5153487095002746, + "grad_norm": 0.44660070538520813, + "learning_rate": 1.6922034379979175e-05, + "loss": 0.5723, + "step": 18769 + }, + { + "epoch": 0.5153761669412411, + "grad_norm": 0.3612874448299408, + "learning_rate": 1.6921722675000052e-05, + "loss": 0.5339, + "step": 18770 + }, + { + "epoch": 0.5154036243822075, + "grad_norm": 0.3635653257369995, + "learning_rate": 1.692141095710971e-05, + "loss": 0.5775, + "step": 18771 + }, + { + "epoch": 0.5154310818231741, + "grad_norm": 0.397318571805954, + "learning_rate": 1.6921099226308725e-05, + "loss": 0.5178, + "step": 18772 + }, + { + "epoch": 0.5154585392641405, + "grad_norm": 0.5644494891166687, + "learning_rate": 1.6920787482597686e-05, + "loss": 0.4837, + "step": 18773 + }, + { + "epoch": 0.5154859967051071, + "grad_norm": 0.3683586120605469, + "learning_rate": 1.6920475725977167e-05, + "loss": 0.5553, + "step": 18774 + }, + { + "epoch": 0.5155134541460736, + "grad_norm": 0.30593615770339966, + "learning_rate": 1.692016395644775e-05, + "loss": 0.4244, + "step": 18775 + }, + { + "epoch": 0.5155409115870401, + "grad_norm": 0.3539428412914276, + "learning_rate": 1.691985217401002e-05, + "loss": 0.4183, + "step": 18776 + }, + { + "epoch": 0.5155683690280066, + "grad_norm": 0.4569878876209259, + "learning_rate": 1.6919540378664557e-05, + "loss": 0.6154, + "step": 18777 + }, + { + "epoch": 0.5155958264689731, + "grad_norm": 0.4301442503929138, + "learning_rate": 1.691922857041194e-05, + "loss": 0.4793, + "step": 18778 + }, + { + "epoch": 0.5156232839099396, + "grad_norm": 0.39175912737846375, + "learning_rate": 1.6918916749252757e-05, + "loss": 0.4643, + "step": 18779 + }, + { + "epoch": 0.515650741350906, + "grad_norm": 0.387448787689209, + "learning_rate": 1.6918604915187585e-05, + "loss": 0.4342, + "step": 18780 + }, + { + "epoch": 0.5156781987918726, + "grad_norm": 0.37361636757850647, + "learning_rate": 1.691829306821701e-05, + "loss": 0.4601, + "step": 18781 + }, + { + "epoch": 0.5157056562328391, + "grad_norm": 0.44291234016418457, + "learning_rate": 1.6917981208341605e-05, + "loss": 0.5441, + "step": 18782 + }, + { + "epoch": 0.5157331136738056, + "grad_norm": 0.3519397974014282, + "learning_rate": 1.691766933556196e-05, + "loss": 0.4934, + "step": 18783 + }, + { + "epoch": 0.5157605711147721, + "grad_norm": 0.3506133258342743, + "learning_rate": 1.6917357449878654e-05, + "loss": 0.4645, + "step": 18784 + }, + { + "epoch": 0.5157880285557386, + "grad_norm": 0.3506285846233368, + "learning_rate": 1.6917045551292265e-05, + "loss": 0.4973, + "step": 18785 + }, + { + "epoch": 0.5158154859967051, + "grad_norm": 0.37170928716659546, + "learning_rate": 1.6916733639803383e-05, + "loss": 0.529, + "step": 18786 + }, + { + "epoch": 0.5158429434376716, + "grad_norm": 0.4235914647579193, + "learning_rate": 1.6916421715412583e-05, + "loss": 0.5689, + "step": 18787 + }, + { + "epoch": 0.5158704008786381, + "grad_norm": 0.3889610767364502, + "learning_rate": 1.691610977812045e-05, + "loss": 0.508, + "step": 18788 + }, + { + "epoch": 0.5158978583196047, + "grad_norm": 0.3707149028778076, + "learning_rate": 1.6915797827927563e-05, + "loss": 0.4782, + "step": 18789 + }, + { + "epoch": 0.5159253157605711, + "grad_norm": 0.3864535689353943, + "learning_rate": 1.6915485864834507e-05, + "loss": 0.5469, + "step": 18790 + }, + { + "epoch": 0.5159527732015377, + "grad_norm": 0.4112805128097534, + "learning_rate": 1.6915173888841863e-05, + "loss": 0.5741, + "step": 18791 + }, + { + "epoch": 0.5159802306425041, + "grad_norm": 0.4148314595222473, + "learning_rate": 1.6914861899950212e-05, + "loss": 0.5119, + "step": 18792 + }, + { + "epoch": 0.5160076880834706, + "grad_norm": 0.371114581823349, + "learning_rate": 1.6914549898160137e-05, + "loss": 0.5253, + "step": 18793 + }, + { + "epoch": 0.5160351455244371, + "grad_norm": 0.36905744671821594, + "learning_rate": 1.691423788347222e-05, + "loss": 0.5451, + "step": 18794 + }, + { + "epoch": 0.5160626029654036, + "grad_norm": 0.41001689434051514, + "learning_rate": 1.6913925855887043e-05, + "loss": 0.5793, + "step": 18795 + }, + { + "epoch": 0.5160900604063702, + "grad_norm": 0.40007156133651733, + "learning_rate": 1.6913613815405187e-05, + "loss": 0.5028, + "step": 18796 + }, + { + "epoch": 0.5161175178473366, + "grad_norm": 0.3751251995563507, + "learning_rate": 1.6913301762027234e-05, + "loss": 0.5733, + "step": 18797 + }, + { + "epoch": 0.5161449752883032, + "grad_norm": 0.40554285049438477, + "learning_rate": 1.6912989695753766e-05, + "loss": 0.5038, + "step": 18798 + }, + { + "epoch": 0.5161724327292696, + "grad_norm": 0.3731381893157959, + "learning_rate": 1.6912677616585368e-05, + "loss": 0.5508, + "step": 18799 + }, + { + "epoch": 0.5161998901702362, + "grad_norm": 0.36792001128196716, + "learning_rate": 1.6912365524522617e-05, + "loss": 0.5041, + "step": 18800 + }, + { + "epoch": 0.5162273476112026, + "grad_norm": 0.3725389838218689, + "learning_rate": 1.69120534195661e-05, + "loss": 0.5288, + "step": 18801 + }, + { + "epoch": 0.5162548050521691, + "grad_norm": 0.4181652367115021, + "learning_rate": 1.6911741301716397e-05, + "loss": 0.4711, + "step": 18802 + }, + { + "epoch": 0.5162822624931357, + "grad_norm": 0.3564399480819702, + "learning_rate": 1.6911429170974093e-05, + "loss": 0.4633, + "step": 18803 + }, + { + "epoch": 0.5163097199341021, + "grad_norm": 0.3559333086013794, + "learning_rate": 1.691111702733976e-05, + "loss": 0.4973, + "step": 18804 + }, + { + "epoch": 0.5163371773750687, + "grad_norm": 0.4321770966053009, + "learning_rate": 1.6910804870814e-05, + "loss": 0.5367, + "step": 18805 + }, + { + "epoch": 0.5163646348160351, + "grad_norm": 0.4373331665992737, + "learning_rate": 1.6910492701397373e-05, + "loss": 0.5032, + "step": 18806 + }, + { + "epoch": 0.5163920922570017, + "grad_norm": 0.3789956867694855, + "learning_rate": 1.6910180519090475e-05, + "loss": 0.5715, + "step": 18807 + }, + { + "epoch": 0.5164195496979681, + "grad_norm": 0.48342832922935486, + "learning_rate": 1.6909868323893887e-05, + "loss": 0.4966, + "step": 18808 + }, + { + "epoch": 0.5164470071389347, + "grad_norm": 0.4300185441970825, + "learning_rate": 1.690955611580819e-05, + "loss": 0.4823, + "step": 18809 + }, + { + "epoch": 0.5164744645799012, + "grad_norm": 0.4283064901828766, + "learning_rate": 1.690924389483396e-05, + "loss": 0.5611, + "step": 18810 + }, + { + "epoch": 0.5165019220208676, + "grad_norm": 0.39149588346481323, + "learning_rate": 1.6908931660971787e-05, + "loss": 0.4987, + "step": 18811 + }, + { + "epoch": 0.5165293794618342, + "grad_norm": 0.3893202245235443, + "learning_rate": 1.6908619414222254e-05, + "loss": 0.5691, + "step": 18812 + }, + { + "epoch": 0.5165568369028006, + "grad_norm": 0.3993658423423767, + "learning_rate": 1.690830715458594e-05, + "loss": 0.4775, + "step": 18813 + }, + { + "epoch": 0.5165842943437672, + "grad_norm": 0.4057522714138031, + "learning_rate": 1.6907994882063426e-05, + "loss": 0.5112, + "step": 18814 + }, + { + "epoch": 0.5166117517847336, + "grad_norm": 0.3967551589012146, + "learning_rate": 1.69076825966553e-05, + "loss": 0.5309, + "step": 18815 + }, + { + "epoch": 0.5166392092257002, + "grad_norm": 0.30361127853393555, + "learning_rate": 1.690737029836214e-05, + "loss": 0.4173, + "step": 18816 + }, + { + "epoch": 0.5166666666666667, + "grad_norm": 0.5026010870933533, + "learning_rate": 1.6907057987184528e-05, + "loss": 0.5583, + "step": 18817 + }, + { + "epoch": 0.5166941241076332, + "grad_norm": 0.3627612888813019, + "learning_rate": 1.6906745663123048e-05, + "loss": 0.4892, + "step": 18818 + }, + { + "epoch": 0.5167215815485997, + "grad_norm": 0.35617539286613464, + "learning_rate": 1.6906433326178288e-05, + "loss": 0.4149, + "step": 18819 + }, + { + "epoch": 0.5167490389895661, + "grad_norm": 0.39057257771492004, + "learning_rate": 1.6906120976350818e-05, + "loss": 0.4964, + "step": 18820 + }, + { + "epoch": 0.5167764964305327, + "grad_norm": 0.4334745705127716, + "learning_rate": 1.6905808613641233e-05, + "loss": 0.4087, + "step": 18821 + }, + { + "epoch": 0.5168039538714991, + "grad_norm": 0.3689413368701935, + "learning_rate": 1.690549623805011e-05, + "loss": 0.484, + "step": 18822 + }, + { + "epoch": 0.5168314113124657, + "grad_norm": 0.4021020829677582, + "learning_rate": 1.6905183849578036e-05, + "loss": 0.5846, + "step": 18823 + }, + { + "epoch": 0.5168588687534322, + "grad_norm": 0.4564671218395233, + "learning_rate": 1.6904871448225586e-05, + "loss": 0.5047, + "step": 18824 + }, + { + "epoch": 0.5168863261943987, + "grad_norm": 0.41377317905426025, + "learning_rate": 1.6904559033993345e-05, + "loss": 0.4696, + "step": 18825 + }, + { + "epoch": 0.5169137836353652, + "grad_norm": 0.37164074182510376, + "learning_rate": 1.69042466068819e-05, + "loss": 0.4517, + "step": 18826 + }, + { + "epoch": 0.5169412410763317, + "grad_norm": 0.3550777733325958, + "learning_rate": 1.6903934166891836e-05, + "loss": 0.4554, + "step": 18827 + }, + { + "epoch": 0.5169686985172982, + "grad_norm": 0.499445378780365, + "learning_rate": 1.6903621714023725e-05, + "loss": 0.4969, + "step": 18828 + }, + { + "epoch": 0.5169961559582646, + "grad_norm": 0.3504363000392914, + "learning_rate": 1.690330924827816e-05, + "loss": 0.4984, + "step": 18829 + }, + { + "epoch": 0.5170236133992312, + "grad_norm": 0.39997759461402893, + "learning_rate": 1.6902996769655718e-05, + "loss": 0.5287, + "step": 18830 + }, + { + "epoch": 0.5170510708401977, + "grad_norm": 0.36369842290878296, + "learning_rate": 1.6902684278156983e-05, + "loss": 0.4719, + "step": 18831 + }, + { + "epoch": 0.5170785282811642, + "grad_norm": 0.3649572730064392, + "learning_rate": 1.690237177378254e-05, + "loss": 0.4636, + "step": 18832 + }, + { + "epoch": 0.5171059857221307, + "grad_norm": 0.31605830788612366, + "learning_rate": 1.690205925653297e-05, + "loss": 0.4612, + "step": 18833 + }, + { + "epoch": 0.5171334431630972, + "grad_norm": 0.425552636384964, + "learning_rate": 1.690174672640886e-05, + "loss": 0.4878, + "step": 18834 + }, + { + "epoch": 0.5171609006040637, + "grad_norm": 0.4156164824962616, + "learning_rate": 1.690143418341079e-05, + "loss": 0.5563, + "step": 18835 + }, + { + "epoch": 0.5171883580450302, + "grad_norm": 0.3762718439102173, + "learning_rate": 1.6901121627539335e-05, + "loss": 0.4427, + "step": 18836 + }, + { + "epoch": 0.5172158154859967, + "grad_norm": 0.38159283995628357, + "learning_rate": 1.6900809058795095e-05, + "loss": 0.4843, + "step": 18837 + }, + { + "epoch": 0.5172432729269633, + "grad_norm": 0.391265869140625, + "learning_rate": 1.6900496477178638e-05, + "loss": 0.4648, + "step": 18838 + }, + { + "epoch": 0.5172707303679297, + "grad_norm": 0.3617302477359772, + "learning_rate": 1.6900183882690552e-05, + "loss": 0.4665, + "step": 18839 + }, + { + "epoch": 0.5172981878088962, + "grad_norm": 0.38383200764656067, + "learning_rate": 1.6899871275331422e-05, + "loss": 0.4323, + "step": 18840 + }, + { + "epoch": 0.5173256452498627, + "grad_norm": 0.3692295253276825, + "learning_rate": 1.6899558655101832e-05, + "loss": 0.4835, + "step": 18841 + }, + { + "epoch": 0.5173531026908292, + "grad_norm": 0.38920947909355164, + "learning_rate": 1.689924602200236e-05, + "loss": 0.4768, + "step": 18842 + }, + { + "epoch": 0.5173805601317957, + "grad_norm": 0.3644101619720459, + "learning_rate": 1.6898933376033594e-05, + "loss": 0.4699, + "step": 18843 + }, + { + "epoch": 0.5174080175727622, + "grad_norm": 0.41527339816093445, + "learning_rate": 1.689862071719611e-05, + "loss": 0.521, + "step": 18844 + }, + { + "epoch": 0.5174354750137288, + "grad_norm": 0.375967413187027, + "learning_rate": 1.6898308045490503e-05, + "loss": 0.5251, + "step": 18845 + }, + { + "epoch": 0.5174629324546952, + "grad_norm": 0.3504123091697693, + "learning_rate": 1.689799536091735e-05, + "loss": 0.5549, + "step": 18846 + }, + { + "epoch": 0.5174903898956618, + "grad_norm": 0.4193456470966339, + "learning_rate": 1.689768266347723e-05, + "loss": 0.4644, + "step": 18847 + }, + { + "epoch": 0.5175178473366282, + "grad_norm": 0.3487400710582733, + "learning_rate": 1.6897369953170733e-05, + "loss": 0.5096, + "step": 18848 + }, + { + "epoch": 0.5175453047775948, + "grad_norm": 0.38032928109169006, + "learning_rate": 1.689705722999844e-05, + "loss": 0.4863, + "step": 18849 + }, + { + "epoch": 0.5175727622185612, + "grad_norm": 0.32705259323120117, + "learning_rate": 1.6896744493960935e-05, + "loss": 0.465, + "step": 18850 + }, + { + "epoch": 0.5176002196595277, + "grad_norm": 0.4102742075920105, + "learning_rate": 1.6896431745058798e-05, + "loss": 0.498, + "step": 18851 + }, + { + "epoch": 0.5176276771004943, + "grad_norm": 0.37644630670547485, + "learning_rate": 1.6896118983292615e-05, + "loss": 0.5379, + "step": 18852 + }, + { + "epoch": 0.5176551345414607, + "grad_norm": 0.41633620858192444, + "learning_rate": 1.6895806208662966e-05, + "loss": 0.5288, + "step": 18853 + }, + { + "epoch": 0.5176825919824273, + "grad_norm": 0.36218395829200745, + "learning_rate": 1.6895493421170445e-05, + "loss": 0.4467, + "step": 18854 + }, + { + "epoch": 0.5177100494233937, + "grad_norm": 0.857867956161499, + "learning_rate": 1.6895180620815623e-05, + "loss": 0.5101, + "step": 18855 + }, + { + "epoch": 0.5177375068643603, + "grad_norm": 0.40396907925605774, + "learning_rate": 1.689486780759909e-05, + "loss": 0.452, + "step": 18856 + }, + { + "epoch": 0.5177649643053267, + "grad_norm": 0.32300132513046265, + "learning_rate": 1.6894554981521423e-05, + "loss": 0.4841, + "step": 18857 + }, + { + "epoch": 0.5177924217462933, + "grad_norm": 0.4168510437011719, + "learning_rate": 1.6894242142583217e-05, + "loss": 0.4724, + "step": 18858 + }, + { + "epoch": 0.5178198791872598, + "grad_norm": 0.31230735778808594, + "learning_rate": 1.6893929290785046e-05, + "loss": 0.5072, + "step": 18859 + }, + { + "epoch": 0.5178473366282262, + "grad_norm": 0.37159502506256104, + "learning_rate": 1.6893616426127498e-05, + "loss": 0.5002, + "step": 18860 + }, + { + "epoch": 0.5178747940691928, + "grad_norm": 0.3580821454524994, + "learning_rate": 1.6893303548611152e-05, + "loss": 0.5423, + "step": 18861 + }, + { + "epoch": 0.5179022515101592, + "grad_norm": 0.3922225832939148, + "learning_rate": 1.68929906582366e-05, + "loss": 0.5602, + "step": 18862 + }, + { + "epoch": 0.5179297089511258, + "grad_norm": 0.37729525566101074, + "learning_rate": 1.689267775500442e-05, + "loss": 0.452, + "step": 18863 + }, + { + "epoch": 0.5179571663920922, + "grad_norm": 0.3419925570487976, + "learning_rate": 1.6892364838915193e-05, + "loss": 0.5423, + "step": 18864 + }, + { + "epoch": 0.5179846238330588, + "grad_norm": 0.4055200517177582, + "learning_rate": 1.6892051909969508e-05, + "loss": 0.5667, + "step": 18865 + }, + { + "epoch": 0.5180120812740253, + "grad_norm": 0.3803432285785675, + "learning_rate": 1.6891738968167942e-05, + "loss": 0.4915, + "step": 18866 + }, + { + "epoch": 0.5180395387149918, + "grad_norm": 0.4027034640312195, + "learning_rate": 1.6891426013511084e-05, + "loss": 0.5371, + "step": 18867 + }, + { + "epoch": 0.5180669961559583, + "grad_norm": 0.39849868416786194, + "learning_rate": 1.689111304599952e-05, + "loss": 0.5944, + "step": 18868 + }, + { + "epoch": 0.5180944535969247, + "grad_norm": 0.41467034816741943, + "learning_rate": 1.689080006563383e-05, + "loss": 0.6254, + "step": 18869 + }, + { + "epoch": 0.5181219110378913, + "grad_norm": 0.36746782064437866, + "learning_rate": 1.68904870724146e-05, + "loss": 0.4657, + "step": 18870 + }, + { + "epoch": 0.5181493684788577, + "grad_norm": 0.35142430663108826, + "learning_rate": 1.689017406634241e-05, + "loss": 0.447, + "step": 18871 + }, + { + "epoch": 0.5181768259198243, + "grad_norm": 0.41278815269470215, + "learning_rate": 1.6889861047417852e-05, + "loss": 0.6125, + "step": 18872 + }, + { + "epoch": 0.5182042833607908, + "grad_norm": 0.3636972904205322, + "learning_rate": 1.6889548015641496e-05, + "loss": 0.5045, + "step": 18873 + }, + { + "epoch": 0.5182317408017573, + "grad_norm": 0.37217506766319275, + "learning_rate": 1.688923497101394e-05, + "loss": 0.4823, + "step": 18874 + }, + { + "epoch": 0.5182591982427238, + "grad_norm": 0.41376161575317383, + "learning_rate": 1.6888921913535758e-05, + "loss": 0.4995, + "step": 18875 + }, + { + "epoch": 0.5182866556836903, + "grad_norm": 0.3779754638671875, + "learning_rate": 1.6888608843207538e-05, + "loss": 0.554, + "step": 18876 + }, + { + "epoch": 0.5183141131246568, + "grad_norm": 0.43648841977119446, + "learning_rate": 1.688829576002987e-05, + "loss": 0.517, + "step": 18877 + }, + { + "epoch": 0.5183415705656232, + "grad_norm": 0.4185589551925659, + "learning_rate": 1.6887982664003323e-05, + "loss": 0.4881, + "step": 18878 + }, + { + "epoch": 0.5183690280065898, + "grad_norm": 0.3509986102581024, + "learning_rate": 1.6887669555128496e-05, + "loss": 0.4606, + "step": 18879 + }, + { + "epoch": 0.5183964854475562, + "grad_norm": 0.37593621015548706, + "learning_rate": 1.6887356433405963e-05, + "loss": 0.4485, + "step": 18880 + }, + { + "epoch": 0.5184239428885228, + "grad_norm": 0.38500288128852844, + "learning_rate": 1.6887043298836318e-05, + "loss": 0.5719, + "step": 18881 + }, + { + "epoch": 0.5184514003294893, + "grad_norm": 0.35354694724082947, + "learning_rate": 1.6886730151420134e-05, + "loss": 0.4029, + "step": 18882 + }, + { + "epoch": 0.5184788577704558, + "grad_norm": 0.39938586950302124, + "learning_rate": 1.6886416991158e-05, + "loss": 0.4793, + "step": 18883 + }, + { + "epoch": 0.5185063152114223, + "grad_norm": 0.34833166003227234, + "learning_rate": 1.6886103818050504e-05, + "loss": 0.4443, + "step": 18884 + }, + { + "epoch": 0.5185337726523888, + "grad_norm": 0.5801127552986145, + "learning_rate": 1.6885790632098225e-05, + "loss": 0.519, + "step": 18885 + }, + { + "epoch": 0.5185612300933553, + "grad_norm": 0.34382110834121704, + "learning_rate": 1.6885477433301747e-05, + "loss": 0.5122, + "step": 18886 + }, + { + "epoch": 0.5185886875343217, + "grad_norm": 0.36740732192993164, + "learning_rate": 1.688516422166166e-05, + "loss": 0.4082, + "step": 18887 + }, + { + "epoch": 0.5186161449752883, + "grad_norm": 0.3785931468009949, + "learning_rate": 1.688485099717854e-05, + "loss": 0.4239, + "step": 18888 + }, + { + "epoch": 0.5186436024162548, + "grad_norm": 0.32962173223495483, + "learning_rate": 1.6884537759852978e-05, + "loss": 0.4656, + "step": 18889 + }, + { + "epoch": 0.5186710598572213, + "grad_norm": 0.3573894798755646, + "learning_rate": 1.6884224509685555e-05, + "loss": 0.4986, + "step": 18890 + }, + { + "epoch": 0.5186985172981878, + "grad_norm": 0.35526520013809204, + "learning_rate": 1.6883911246676858e-05, + "loss": 0.5408, + "step": 18891 + }, + { + "epoch": 0.5187259747391543, + "grad_norm": 0.4247969388961792, + "learning_rate": 1.6883597970827467e-05, + "loss": 0.5214, + "step": 18892 + }, + { + "epoch": 0.5187534321801208, + "grad_norm": 0.3954562544822693, + "learning_rate": 1.6883284682137968e-05, + "loss": 0.5536, + "step": 18893 + }, + { + "epoch": 0.5187808896210873, + "grad_norm": 0.37607818841934204, + "learning_rate": 1.6882971380608947e-05, + "loss": 0.4758, + "step": 18894 + }, + { + "epoch": 0.5188083470620538, + "grad_norm": 0.3653302490711212, + "learning_rate": 1.688265806624099e-05, + "loss": 0.4951, + "step": 18895 + }, + { + "epoch": 0.5188358045030204, + "grad_norm": 0.3687068521976471, + "learning_rate": 1.6882344739034677e-05, + "loss": 0.4298, + "step": 18896 + }, + { + "epoch": 0.5188632619439868, + "grad_norm": 0.35975098609924316, + "learning_rate": 1.6882031398990594e-05, + "loss": 0.5151, + "step": 18897 + }, + { + "epoch": 0.5188907193849533, + "grad_norm": 0.40958842635154724, + "learning_rate": 1.6881718046109327e-05, + "loss": 0.4686, + "step": 18898 + }, + { + "epoch": 0.5189181768259198, + "grad_norm": 0.3691691756248474, + "learning_rate": 1.688140468039146e-05, + "loss": 0.5172, + "step": 18899 + }, + { + "epoch": 0.5189456342668863, + "grad_norm": 0.37004923820495605, + "learning_rate": 1.6881091301837574e-05, + "loss": 0.4887, + "step": 18900 + }, + { + "epoch": 0.5189730917078528, + "grad_norm": 0.4172004461288452, + "learning_rate": 1.688077791044826e-05, + "loss": 0.5199, + "step": 18901 + }, + { + "epoch": 0.5190005491488193, + "grad_norm": 0.4217553734779358, + "learning_rate": 1.6880464506224098e-05, + "loss": 0.535, + "step": 18902 + }, + { + "epoch": 0.5190280065897859, + "grad_norm": 0.37289392948150635, + "learning_rate": 1.6880151089165672e-05, + "loss": 0.5035, + "step": 18903 + }, + { + "epoch": 0.5190554640307523, + "grad_norm": 0.37046805024147034, + "learning_rate": 1.687983765927357e-05, + "loss": 0.45, + "step": 18904 + }, + { + "epoch": 0.5190829214717189, + "grad_norm": 0.3984874188899994, + "learning_rate": 1.6879524216548376e-05, + "loss": 0.5783, + "step": 18905 + }, + { + "epoch": 0.5191103789126853, + "grad_norm": 0.3827870488166809, + "learning_rate": 1.6879210760990673e-05, + "loss": 0.4769, + "step": 18906 + }, + { + "epoch": 0.5191378363536518, + "grad_norm": 0.3532126843929291, + "learning_rate": 1.6878897292601047e-05, + "loss": 0.4995, + "step": 18907 + }, + { + "epoch": 0.5191652937946183, + "grad_norm": 0.3421732187271118, + "learning_rate": 1.687858381138008e-05, + "loss": 0.5196, + "step": 18908 + }, + { + "epoch": 0.5191927512355848, + "grad_norm": 0.40706828236579895, + "learning_rate": 1.687827031732836e-05, + "loss": 0.6051, + "step": 18909 + }, + { + "epoch": 0.5192202086765514, + "grad_norm": 0.4036581814289093, + "learning_rate": 1.6877956810446467e-05, + "loss": 0.5454, + "step": 18910 + }, + { + "epoch": 0.5192476661175178, + "grad_norm": 0.38375523686408997, + "learning_rate": 1.6877643290734993e-05, + "loss": 0.5424, + "step": 18911 + }, + { + "epoch": 0.5192751235584844, + "grad_norm": 0.7249485850334167, + "learning_rate": 1.687732975819452e-05, + "loss": 0.5052, + "step": 18912 + }, + { + "epoch": 0.5193025809994508, + "grad_norm": 0.41477179527282715, + "learning_rate": 1.687701621282563e-05, + "loss": 0.3901, + "step": 18913 + }, + { + "epoch": 0.5193300384404174, + "grad_norm": 0.3768211603164673, + "learning_rate": 1.687670265462891e-05, + "loss": 0.5523, + "step": 18914 + }, + { + "epoch": 0.5193574958813838, + "grad_norm": 0.3704836964607239, + "learning_rate": 1.6876389083604947e-05, + "loss": 0.4602, + "step": 18915 + }, + { + "epoch": 0.5193849533223504, + "grad_norm": 0.362648606300354, + "learning_rate": 1.6876075499754322e-05, + "loss": 0.5352, + "step": 18916 + }, + { + "epoch": 0.5194124107633169, + "grad_norm": 0.41108834743499756, + "learning_rate": 1.687576190307762e-05, + "loss": 0.4648, + "step": 18917 + }, + { + "epoch": 0.5194398682042833, + "grad_norm": 0.33999669551849365, + "learning_rate": 1.687544829357543e-05, + "loss": 0.4661, + "step": 18918 + }, + { + "epoch": 0.5194673256452499, + "grad_norm": 0.309731125831604, + "learning_rate": 1.6875134671248335e-05, + "loss": 0.492, + "step": 18919 + }, + { + "epoch": 0.5194947830862163, + "grad_norm": 0.40000537037849426, + "learning_rate": 1.687482103609692e-05, + "loss": 0.4741, + "step": 18920 + }, + { + "epoch": 0.5195222405271829, + "grad_norm": 0.3324780762195587, + "learning_rate": 1.6874507388121767e-05, + "loss": 0.4851, + "step": 18921 + }, + { + "epoch": 0.5195496979681493, + "grad_norm": 0.3614453673362732, + "learning_rate": 1.6874193727323464e-05, + "loss": 0.5433, + "step": 18922 + }, + { + "epoch": 0.5195771554091159, + "grad_norm": 0.36453866958618164, + "learning_rate": 1.6873880053702593e-05, + "loss": 0.4899, + "step": 18923 + }, + { + "epoch": 0.5196046128500824, + "grad_norm": 0.36976873874664307, + "learning_rate": 1.6873566367259745e-05, + "loss": 0.4902, + "step": 18924 + }, + { + "epoch": 0.5196320702910489, + "grad_norm": 0.3609448969364166, + "learning_rate": 1.6873252667995507e-05, + "loss": 0.4659, + "step": 18925 + }, + { + "epoch": 0.5196595277320154, + "grad_norm": 0.3573462963104248, + "learning_rate": 1.687293895591045e-05, + "loss": 0.5346, + "step": 18926 + }, + { + "epoch": 0.5196869851729818, + "grad_norm": 0.39950841665267944, + "learning_rate": 1.6872625231005173e-05, + "loss": 0.551, + "step": 18927 + }, + { + "epoch": 0.5197144426139484, + "grad_norm": 0.39500850439071655, + "learning_rate": 1.6872311493280256e-05, + "loss": 0.47, + "step": 18928 + }, + { + "epoch": 0.5197419000549148, + "grad_norm": 0.4183475971221924, + "learning_rate": 1.6871997742736285e-05, + "loss": 0.5135, + "step": 18929 + }, + { + "epoch": 0.5197693574958814, + "grad_norm": 0.3440958559513092, + "learning_rate": 1.6871683979373842e-05, + "loss": 0.4997, + "step": 18930 + }, + { + "epoch": 0.5197968149368479, + "grad_norm": 0.48556432127952576, + "learning_rate": 1.687137020319352e-05, + "loss": 0.5053, + "step": 18931 + }, + { + "epoch": 0.5198242723778144, + "grad_norm": 0.37415406107902527, + "learning_rate": 1.6871056414195897e-05, + "loss": 0.4915, + "step": 18932 + }, + { + "epoch": 0.5198517298187809, + "grad_norm": 0.38712629675865173, + "learning_rate": 1.687074261238156e-05, + "loss": 0.5993, + "step": 18933 + }, + { + "epoch": 0.5198791872597474, + "grad_norm": 0.3410363495349884, + "learning_rate": 1.6870428797751097e-05, + "loss": 0.4679, + "step": 18934 + }, + { + "epoch": 0.5199066447007139, + "grad_norm": 0.3978807330131531, + "learning_rate": 1.687011497030509e-05, + "loss": 0.4774, + "step": 18935 + }, + { + "epoch": 0.5199341021416803, + "grad_norm": 0.39604371786117554, + "learning_rate": 1.6869801130044127e-05, + "loss": 0.4859, + "step": 18936 + }, + { + "epoch": 0.5199615595826469, + "grad_norm": 0.38651683926582336, + "learning_rate": 1.686948727696879e-05, + "loss": 0.5124, + "step": 18937 + }, + { + "epoch": 0.5199890170236134, + "grad_norm": 0.3792102634906769, + "learning_rate": 1.6869173411079672e-05, + "loss": 0.4449, + "step": 18938 + }, + { + "epoch": 0.5200164744645799, + "grad_norm": 0.4417475759983063, + "learning_rate": 1.6868859532377346e-05, + "loss": 0.5169, + "step": 18939 + }, + { + "epoch": 0.5200439319055464, + "grad_norm": 0.3277568221092224, + "learning_rate": 1.686854564086241e-05, + "loss": 0.4254, + "step": 18940 + }, + { + "epoch": 0.5200713893465129, + "grad_norm": 0.3965734839439392, + "learning_rate": 1.686823173653544e-05, + "loss": 0.5064, + "step": 18941 + }, + { + "epoch": 0.5200988467874794, + "grad_norm": 0.5173834562301636, + "learning_rate": 1.6867917819397032e-05, + "loss": 0.5693, + "step": 18942 + }, + { + "epoch": 0.5201263042284459, + "grad_norm": 0.37954169511795044, + "learning_rate": 1.6867603889447762e-05, + "loss": 0.5235, + "step": 18943 + }, + { + "epoch": 0.5201537616694124, + "grad_norm": 0.36921656131744385, + "learning_rate": 1.6867289946688217e-05, + "loss": 0.4392, + "step": 18944 + }, + { + "epoch": 0.520181219110379, + "grad_norm": 0.6706227660179138, + "learning_rate": 1.6866975991118986e-05, + "loss": 0.5024, + "step": 18945 + }, + { + "epoch": 0.5202086765513454, + "grad_norm": 0.338155061006546, + "learning_rate": 1.686666202274065e-05, + "loss": 0.4868, + "step": 18946 + }, + { + "epoch": 0.5202361339923119, + "grad_norm": 0.3850488066673279, + "learning_rate": 1.6866348041553803e-05, + "loss": 0.4124, + "step": 18947 + }, + { + "epoch": 0.5202635914332784, + "grad_norm": 0.3647080957889557, + "learning_rate": 1.6866034047559023e-05, + "loss": 0.515, + "step": 18948 + }, + { + "epoch": 0.5202910488742449, + "grad_norm": 0.42123132944107056, + "learning_rate": 1.6865720040756896e-05, + "loss": 0.5809, + "step": 18949 + }, + { + "epoch": 0.5203185063152114, + "grad_norm": 0.37151408195495605, + "learning_rate": 1.6865406021148013e-05, + "loss": 0.4974, + "step": 18950 + }, + { + "epoch": 0.5203459637561779, + "grad_norm": 0.3892247676849365, + "learning_rate": 1.6865091988732958e-05, + "loss": 0.4593, + "step": 18951 + }, + { + "epoch": 0.5203734211971445, + "grad_norm": 0.3638782799243927, + "learning_rate": 1.6864777943512313e-05, + "loss": 0.557, + "step": 18952 + }, + { + "epoch": 0.5204008786381109, + "grad_norm": 0.33876314759254456, + "learning_rate": 1.6864463885486664e-05, + "loss": 0.5474, + "step": 18953 + }, + { + "epoch": 0.5204283360790775, + "grad_norm": 0.36781045794487, + "learning_rate": 1.68641498146566e-05, + "loss": 0.4119, + "step": 18954 + }, + { + "epoch": 0.5204557935200439, + "grad_norm": 0.4282263219356537, + "learning_rate": 1.686383573102271e-05, + "loss": 0.5223, + "step": 18955 + }, + { + "epoch": 0.5204832509610104, + "grad_norm": 0.3488922417163849, + "learning_rate": 1.6863521634585574e-05, + "loss": 0.4718, + "step": 18956 + }, + { + "epoch": 0.5205107084019769, + "grad_norm": 0.40156278014183044, + "learning_rate": 1.6863207525345776e-05, + "loss": 0.5981, + "step": 18957 + }, + { + "epoch": 0.5205381658429434, + "grad_norm": 0.4275887906551361, + "learning_rate": 1.6862893403303908e-05, + "loss": 0.5561, + "step": 18958 + }, + { + "epoch": 0.52056562328391, + "grad_norm": 0.42413780093193054, + "learning_rate": 1.686257926846055e-05, + "loss": 0.504, + "step": 18959 + }, + { + "epoch": 0.5205930807248764, + "grad_norm": 0.40675750374794006, + "learning_rate": 1.6862265120816298e-05, + "loss": 0.5008, + "step": 18960 + }, + { + "epoch": 0.520620538165843, + "grad_norm": 0.3755193054676056, + "learning_rate": 1.6861950960371728e-05, + "loss": 0.51, + "step": 18961 + }, + { + "epoch": 0.5206479956068094, + "grad_norm": 0.3663368821144104, + "learning_rate": 1.6861636787127426e-05, + "loss": 0.5025, + "step": 18962 + }, + { + "epoch": 0.520675453047776, + "grad_norm": 0.3582582175731659, + "learning_rate": 1.6861322601083986e-05, + "loss": 0.4836, + "step": 18963 + }, + { + "epoch": 0.5207029104887424, + "grad_norm": 0.39167824387550354, + "learning_rate": 1.686100840224199e-05, + "loss": 0.4351, + "step": 18964 + }, + { + "epoch": 0.520730367929709, + "grad_norm": 0.3699859082698822, + "learning_rate": 1.686069419060202e-05, + "loss": 0.5099, + "step": 18965 + }, + { + "epoch": 0.5207578253706755, + "grad_norm": 0.3388902246952057, + "learning_rate": 1.6860379966164667e-05, + "loss": 0.4623, + "step": 18966 + }, + { + "epoch": 0.5207852828116419, + "grad_norm": 0.4440940320491791, + "learning_rate": 1.6860065728930516e-05, + "loss": 0.4369, + "step": 18967 + }, + { + "epoch": 0.5208127402526085, + "grad_norm": 0.4070604145526886, + "learning_rate": 1.6859751478900153e-05, + "loss": 0.5745, + "step": 18968 + }, + { + "epoch": 0.5208401976935749, + "grad_norm": 0.375446081161499, + "learning_rate": 1.6859437216074166e-05, + "loss": 0.5097, + "step": 18969 + }, + { + "epoch": 0.5208676551345415, + "grad_norm": 0.4081982970237732, + "learning_rate": 1.6859122940453135e-05, + "loss": 0.5803, + "step": 18970 + }, + { + "epoch": 0.5208951125755079, + "grad_norm": 0.38314640522003174, + "learning_rate": 1.6858808652037656e-05, + "loss": 0.5522, + "step": 18971 + }, + { + "epoch": 0.5209225700164745, + "grad_norm": 0.4085412621498108, + "learning_rate": 1.6858494350828306e-05, + "loss": 0.4966, + "step": 18972 + }, + { + "epoch": 0.520950027457441, + "grad_norm": 0.38129785656929016, + "learning_rate": 1.6858180036825675e-05, + "loss": 0.5145, + "step": 18973 + }, + { + "epoch": 0.5209774848984075, + "grad_norm": 0.3605664372444153, + "learning_rate": 1.685786571003035e-05, + "loss": 0.5401, + "step": 18974 + }, + { + "epoch": 0.521004942339374, + "grad_norm": 0.38751456141471863, + "learning_rate": 1.6857551370442916e-05, + "loss": 0.5329, + "step": 18975 + }, + { + "epoch": 0.5210323997803404, + "grad_norm": 0.33605173230171204, + "learning_rate": 1.685723701806396e-05, + "loss": 0.4425, + "step": 18976 + }, + { + "epoch": 0.521059857221307, + "grad_norm": 0.4295981824398041, + "learning_rate": 1.685692265289407e-05, + "loss": 0.4915, + "step": 18977 + }, + { + "epoch": 0.5210873146622734, + "grad_norm": 0.40637025237083435, + "learning_rate": 1.6856608274933832e-05, + "loss": 0.5908, + "step": 18978 + }, + { + "epoch": 0.52111477210324, + "grad_norm": 0.41403141617774963, + "learning_rate": 1.685629388418383e-05, + "loss": 0.5373, + "step": 18979 + }, + { + "epoch": 0.5211422295442065, + "grad_norm": 0.4050733745098114, + "learning_rate": 1.685597948064465e-05, + "loss": 0.4454, + "step": 18980 + }, + { + "epoch": 0.521169686985173, + "grad_norm": 0.5830341577529907, + "learning_rate": 1.6855665064316878e-05, + "loss": 0.4889, + "step": 18981 + }, + { + "epoch": 0.5211971444261395, + "grad_norm": 0.37284013628959656, + "learning_rate": 1.6855350635201108e-05, + "loss": 0.5457, + "step": 18982 + }, + { + "epoch": 0.521224601867106, + "grad_norm": 0.4030427038669586, + "learning_rate": 1.6855036193297918e-05, + "loss": 0.467, + "step": 18983 + }, + { + "epoch": 0.5212520593080725, + "grad_norm": 0.3743803799152374, + "learning_rate": 1.68547217386079e-05, + "loss": 0.4617, + "step": 18984 + }, + { + "epoch": 0.5212795167490389, + "grad_norm": 0.3789041042327881, + "learning_rate": 1.6854407271131633e-05, + "loss": 0.5723, + "step": 18985 + }, + { + "epoch": 0.5213069741900055, + "grad_norm": 0.3483228087425232, + "learning_rate": 1.6854092790869713e-05, + "loss": 0.5315, + "step": 18986 + }, + { + "epoch": 0.521334431630972, + "grad_norm": 0.36210906505584717, + "learning_rate": 1.685377829782272e-05, + "loss": 0.4958, + "step": 18987 + }, + { + "epoch": 0.5213618890719385, + "grad_norm": 0.3325515687465668, + "learning_rate": 1.6853463791991248e-05, + "loss": 0.4478, + "step": 18988 + }, + { + "epoch": 0.521389346512905, + "grad_norm": 0.29631656408309937, + "learning_rate": 1.6853149273375876e-05, + "loss": 0.416, + "step": 18989 + }, + { + "epoch": 0.5214168039538715, + "grad_norm": 0.4588885009288788, + "learning_rate": 1.685283474197719e-05, + "loss": 0.6048, + "step": 18990 + }, + { + "epoch": 0.521444261394838, + "grad_norm": 0.35932573676109314, + "learning_rate": 1.6852520197795783e-05, + "loss": 0.4855, + "step": 18991 + }, + { + "epoch": 0.5214717188358045, + "grad_norm": 0.381866991519928, + "learning_rate": 1.685220564083224e-05, + "loss": 0.4896, + "step": 18992 + }, + { + "epoch": 0.521499176276771, + "grad_norm": 0.3347355127334595, + "learning_rate": 1.6851891071087144e-05, + "loss": 0.4967, + "step": 18993 + }, + { + "epoch": 0.5215266337177376, + "grad_norm": 0.38889291882514954, + "learning_rate": 1.6851576488561086e-05, + "loss": 0.5557, + "step": 18994 + }, + { + "epoch": 0.521554091158704, + "grad_norm": 0.40667206048965454, + "learning_rate": 1.685126189325465e-05, + "loss": 0.44, + "step": 18995 + }, + { + "epoch": 0.5215815485996705, + "grad_norm": 0.39112624526023865, + "learning_rate": 1.6850947285168424e-05, + "loss": 0.478, + "step": 18996 + }, + { + "epoch": 0.521609006040637, + "grad_norm": 0.3906094431877136, + "learning_rate": 1.6850632664302996e-05, + "loss": 0.5045, + "step": 18997 + }, + { + "epoch": 0.5216364634816035, + "grad_norm": 0.4233337342739105, + "learning_rate": 1.6850318030658947e-05, + "loss": 0.4588, + "step": 18998 + }, + { + "epoch": 0.52166392092257, + "grad_norm": 0.8310515880584717, + "learning_rate": 1.6850003384236874e-05, + "loss": 0.4558, + "step": 18999 + }, + { + "epoch": 0.5216913783635365, + "grad_norm": 0.39209645986557007, + "learning_rate": 1.6849688725037356e-05, + "loss": 0.4938, + "step": 19000 + }, + { + "epoch": 0.5217188358045031, + "grad_norm": 0.3315217196941376, + "learning_rate": 1.6849374053060984e-05, + "loss": 0.4768, + "step": 19001 + }, + { + "epoch": 0.5217462932454695, + "grad_norm": 0.34605923295021057, + "learning_rate": 1.6849059368308342e-05, + "loss": 0.4888, + "step": 19002 + }, + { + "epoch": 0.521773750686436, + "grad_norm": 0.34762102365493774, + "learning_rate": 1.6848744670780018e-05, + "loss": 0.4464, + "step": 19003 + }, + { + "epoch": 0.5218012081274025, + "grad_norm": 0.3611046075820923, + "learning_rate": 1.68484299604766e-05, + "loss": 0.5133, + "step": 19004 + }, + { + "epoch": 0.521828665568369, + "grad_norm": 0.3573717474937439, + "learning_rate": 1.6848115237398676e-05, + "loss": 0.5184, + "step": 19005 + }, + { + "epoch": 0.5218561230093355, + "grad_norm": 0.3892163932323456, + "learning_rate": 1.6847800501546827e-05, + "loss": 0.5398, + "step": 19006 + }, + { + "epoch": 0.521883580450302, + "grad_norm": 0.39087963104248047, + "learning_rate": 1.6847485752921647e-05, + "loss": 0.5102, + "step": 19007 + }, + { + "epoch": 0.5219110378912686, + "grad_norm": 0.33482688665390015, + "learning_rate": 1.6847170991523722e-05, + "loss": 0.4235, + "step": 19008 + }, + { + "epoch": 0.521938495332235, + "grad_norm": 0.4007732570171356, + "learning_rate": 1.6846856217353634e-05, + "loss": 0.5739, + "step": 19009 + }, + { + "epoch": 0.5219659527732016, + "grad_norm": 0.36928123235702515, + "learning_rate": 1.6846541430411977e-05, + "loss": 0.5038, + "step": 19010 + }, + { + "epoch": 0.521993410214168, + "grad_norm": 0.3360326290130615, + "learning_rate": 1.6846226630699332e-05, + "loss": 0.4954, + "step": 19011 + }, + { + "epoch": 0.5220208676551346, + "grad_norm": 0.3612794876098633, + "learning_rate": 1.6845911818216298e-05, + "loss": 0.4892, + "step": 19012 + }, + { + "epoch": 0.522048325096101, + "grad_norm": 0.3557289242744446, + "learning_rate": 1.6845596992963445e-05, + "loss": 0.419, + "step": 19013 + }, + { + "epoch": 0.5220757825370675, + "grad_norm": 0.42474544048309326, + "learning_rate": 1.6845282154941368e-05, + "loss": 0.5192, + "step": 19014 + }, + { + "epoch": 0.5221032399780341, + "grad_norm": 0.4863705635070801, + "learning_rate": 1.684496730415066e-05, + "loss": 0.638, + "step": 19015 + }, + { + "epoch": 0.5221306974190005, + "grad_norm": 0.3901522159576416, + "learning_rate": 1.6844652440591903e-05, + "loss": 0.5138, + "step": 19016 + }, + { + "epoch": 0.5221581548599671, + "grad_norm": 0.3456403911113739, + "learning_rate": 1.684433756426568e-05, + "loss": 0.4732, + "step": 19017 + }, + { + "epoch": 0.5221856123009335, + "grad_norm": 0.36034902930259705, + "learning_rate": 1.6844022675172588e-05, + "loss": 0.4414, + "step": 19018 + }, + { + "epoch": 0.5222130697419001, + "grad_norm": 0.6282547116279602, + "learning_rate": 1.684370777331321e-05, + "loss": 0.5038, + "step": 19019 + }, + { + "epoch": 0.5222405271828665, + "grad_norm": 0.39684560894966125, + "learning_rate": 1.6843392858688124e-05, + "loss": 0.4987, + "step": 19020 + }, + { + "epoch": 0.5222679846238331, + "grad_norm": 0.36534571647644043, + "learning_rate": 1.6843077931297935e-05, + "loss": 0.53, + "step": 19021 + }, + { + "epoch": 0.5222954420647996, + "grad_norm": 0.3717254400253296, + "learning_rate": 1.6842762991143216e-05, + "loss": 0.5134, + "step": 19022 + }, + { + "epoch": 0.522322899505766, + "grad_norm": 0.3810361921787262, + "learning_rate": 1.6842448038224565e-05, + "loss": 0.4903, + "step": 19023 + }, + { + "epoch": 0.5223503569467326, + "grad_norm": 0.3518698215484619, + "learning_rate": 1.684213307254256e-05, + "loss": 0.5593, + "step": 19024 + }, + { + "epoch": 0.522377814387699, + "grad_norm": 0.38406476378440857, + "learning_rate": 1.6841818094097796e-05, + "loss": 0.4837, + "step": 19025 + }, + { + "epoch": 0.5224052718286656, + "grad_norm": 0.4197063744068146, + "learning_rate": 1.6841503102890857e-05, + "loss": 0.5328, + "step": 19026 + }, + { + "epoch": 0.522432729269632, + "grad_norm": 0.4662550687789917, + "learning_rate": 1.6841188098922333e-05, + "loss": 0.6513, + "step": 19027 + }, + { + "epoch": 0.5224601867105986, + "grad_norm": 0.3890591263771057, + "learning_rate": 1.6840873082192807e-05, + "loss": 0.5274, + "step": 19028 + }, + { + "epoch": 0.5224876441515651, + "grad_norm": 0.37159547209739685, + "learning_rate": 1.684055805270287e-05, + "loss": 0.5208, + "step": 19029 + }, + { + "epoch": 0.5225151015925316, + "grad_norm": 0.406217098236084, + "learning_rate": 1.684024301045311e-05, + "loss": 0.5012, + "step": 19030 + }, + { + "epoch": 0.5225425590334981, + "grad_norm": 0.4032571315765381, + "learning_rate": 1.6839927955444113e-05, + "loss": 0.5541, + "step": 19031 + }, + { + "epoch": 0.5225700164744645, + "grad_norm": 0.37870749831199646, + "learning_rate": 1.6839612887676468e-05, + "loss": 0.5808, + "step": 19032 + }, + { + "epoch": 0.5225974739154311, + "grad_norm": 0.3680466115474701, + "learning_rate": 1.683929780715076e-05, + "loss": 0.5608, + "step": 19033 + }, + { + "epoch": 0.5226249313563975, + "grad_norm": 0.3540470600128174, + "learning_rate": 1.6838982713867577e-05, + "loss": 0.4708, + "step": 19034 + }, + { + "epoch": 0.5226523887973641, + "grad_norm": 0.392848402261734, + "learning_rate": 1.6838667607827514e-05, + "loss": 0.6004, + "step": 19035 + }, + { + "epoch": 0.5226798462383306, + "grad_norm": 0.34266504645347595, + "learning_rate": 1.683835248903115e-05, + "loss": 0.4722, + "step": 19036 + }, + { + "epoch": 0.5227073036792971, + "grad_norm": 0.36578235030174255, + "learning_rate": 1.6838037357479076e-05, + "loss": 0.5194, + "step": 19037 + }, + { + "epoch": 0.5227347611202636, + "grad_norm": 0.3790718913078308, + "learning_rate": 1.683772221317188e-05, + "loss": 0.5469, + "step": 19038 + }, + { + "epoch": 0.5227622185612301, + "grad_norm": 0.3754431903362274, + "learning_rate": 1.683740705611015e-05, + "loss": 0.4712, + "step": 19039 + }, + { + "epoch": 0.5227896760021966, + "grad_norm": 0.34920555353164673, + "learning_rate": 1.6837091886294477e-05, + "loss": 0.4987, + "step": 19040 + }, + { + "epoch": 0.522817133443163, + "grad_norm": 0.3569197654724121, + "learning_rate": 1.683677670372544e-05, + "loss": 0.5049, + "step": 19041 + }, + { + "epoch": 0.5228445908841296, + "grad_norm": 0.38602152466773987, + "learning_rate": 1.6836461508403633e-05, + "loss": 0.4615, + "step": 19042 + }, + { + "epoch": 0.5228720483250962, + "grad_norm": 0.37662768363952637, + "learning_rate": 1.683614630032965e-05, + "loss": 0.5781, + "step": 19043 + }, + { + "epoch": 0.5228995057660626, + "grad_norm": 0.35551366209983826, + "learning_rate": 1.6835831079504065e-05, + "loss": 0.3967, + "step": 19044 + }, + { + "epoch": 0.5229269632070291, + "grad_norm": 0.34908440709114075, + "learning_rate": 1.6835515845927477e-05, + "loss": 0.5556, + "step": 19045 + }, + { + "epoch": 0.5229544206479956, + "grad_norm": 0.5056825280189514, + "learning_rate": 1.683520059960047e-05, + "loss": 0.5672, + "step": 19046 + }, + { + "epoch": 0.5229818780889621, + "grad_norm": 0.4828684628009796, + "learning_rate": 1.683488534052363e-05, + "loss": 0.5068, + "step": 19047 + }, + { + "epoch": 0.5230093355299286, + "grad_norm": 0.40769538283348083, + "learning_rate": 1.6834570068697548e-05, + "loss": 0.5259, + "step": 19048 + }, + { + "epoch": 0.5230367929708951, + "grad_norm": 0.3575684428215027, + "learning_rate": 1.683425478412281e-05, + "loss": 0.4924, + "step": 19049 + }, + { + "epoch": 0.5230642504118617, + "grad_norm": 0.3488028645515442, + "learning_rate": 1.683393948680001e-05, + "loss": 0.4974, + "step": 19050 + }, + { + "epoch": 0.5230917078528281, + "grad_norm": 0.35067445039749146, + "learning_rate": 1.683362417672973e-05, + "loss": 0.5336, + "step": 19051 + }, + { + "epoch": 0.5231191652937947, + "grad_norm": 0.3355090618133545, + "learning_rate": 1.6833308853912558e-05, + "loss": 0.5602, + "step": 19052 + }, + { + "epoch": 0.5231466227347611, + "grad_norm": 0.34359169006347656, + "learning_rate": 1.6832993518349087e-05, + "loss": 0.5229, + "step": 19053 + }, + { + "epoch": 0.5231740801757276, + "grad_norm": 0.3683093786239624, + "learning_rate": 1.68326781700399e-05, + "loss": 0.5799, + "step": 19054 + }, + { + "epoch": 0.5232015376166941, + "grad_norm": 0.3590908944606781, + "learning_rate": 1.6832362808985587e-05, + "loss": 0.4762, + "step": 19055 + }, + { + "epoch": 0.5232289950576606, + "grad_norm": 0.34998616576194763, + "learning_rate": 1.683204743518674e-05, + "loss": 0.5516, + "step": 19056 + }, + { + "epoch": 0.5232564524986272, + "grad_norm": 0.4020542800426483, + "learning_rate": 1.683173204864394e-05, + "loss": 0.4969, + "step": 19057 + }, + { + "epoch": 0.5232839099395936, + "grad_norm": 0.36883658170700073, + "learning_rate": 1.683141664935778e-05, + "loss": 0.4507, + "step": 19058 + }, + { + "epoch": 0.5233113673805602, + "grad_norm": 0.4067384898662567, + "learning_rate": 1.6831101237328852e-05, + "loss": 0.5319, + "step": 19059 + }, + { + "epoch": 0.5233388248215266, + "grad_norm": 0.41168221831321716, + "learning_rate": 1.6830785812557734e-05, + "loss": 0.4813, + "step": 19060 + }, + { + "epoch": 0.5233662822624932, + "grad_norm": 0.4140578508377075, + "learning_rate": 1.6830470375045026e-05, + "loss": 0.4805, + "step": 19061 + }, + { + "epoch": 0.5233937397034596, + "grad_norm": 0.3432563245296478, + "learning_rate": 1.6830154924791308e-05, + "loss": 0.4876, + "step": 19062 + }, + { + "epoch": 0.5234211971444261, + "grad_norm": 0.3607037365436554, + "learning_rate": 1.682983946179717e-05, + "loss": 0.5461, + "step": 19063 + }, + { + "epoch": 0.5234486545853927, + "grad_norm": 0.3550116717815399, + "learning_rate": 1.6829523986063202e-05, + "loss": 0.4348, + "step": 19064 + }, + { + "epoch": 0.5234761120263591, + "grad_norm": 0.34829017519950867, + "learning_rate": 1.6829208497589993e-05, + "loss": 0.4782, + "step": 19065 + }, + { + "epoch": 0.5235035694673257, + "grad_norm": 0.379910945892334, + "learning_rate": 1.6828892996378133e-05, + "loss": 0.5545, + "step": 19066 + }, + { + "epoch": 0.5235310269082921, + "grad_norm": 0.3391853868961334, + "learning_rate": 1.6828577482428202e-05, + "loss": 0.5107, + "step": 19067 + }, + { + "epoch": 0.5235584843492587, + "grad_norm": 0.36302855610847473, + "learning_rate": 1.6828261955740796e-05, + "loss": 0.4878, + "step": 19068 + }, + { + "epoch": 0.5235859417902251, + "grad_norm": 0.38021528720855713, + "learning_rate": 1.6827946416316505e-05, + "loss": 0.5515, + "step": 19069 + }, + { + "epoch": 0.5236133992311917, + "grad_norm": 0.38448575139045715, + "learning_rate": 1.682763086415591e-05, + "loss": 0.518, + "step": 19070 + }, + { + "epoch": 0.5236408566721582, + "grad_norm": 0.4058588445186615, + "learning_rate": 1.6827315299259612e-05, + "loss": 0.5351, + "step": 19071 + }, + { + "epoch": 0.5236683141131246, + "grad_norm": 0.35425302386283875, + "learning_rate": 1.682699972162819e-05, + "loss": 0.4809, + "step": 19072 + }, + { + "epoch": 0.5236957715540912, + "grad_norm": 0.3681386113166809, + "learning_rate": 1.6826684131262228e-05, + "loss": 0.4666, + "step": 19073 + }, + { + "epoch": 0.5237232289950576, + "grad_norm": 0.3552451431751251, + "learning_rate": 1.682636852816233e-05, + "loss": 0.4897, + "step": 19074 + }, + { + "epoch": 0.5237506864360242, + "grad_norm": 0.3851637840270996, + "learning_rate": 1.682605291232907e-05, + "loss": 0.4846, + "step": 19075 + }, + { + "epoch": 0.5237781438769906, + "grad_norm": 0.4089570939540863, + "learning_rate": 1.6825737283763042e-05, + "loss": 0.5516, + "step": 19076 + }, + { + "epoch": 0.5238056013179572, + "grad_norm": 0.3768284320831299, + "learning_rate": 1.682542164246484e-05, + "loss": 0.4807, + "step": 19077 + }, + { + "epoch": 0.5238330587589237, + "grad_norm": 0.4083001911640167, + "learning_rate": 1.6825105988435045e-05, + "loss": 0.5598, + "step": 19078 + }, + { + "epoch": 0.5238605161998902, + "grad_norm": 0.3849581778049469, + "learning_rate": 1.682479032167425e-05, + "loss": 0.5283, + "step": 19079 + }, + { + "epoch": 0.5238879736408567, + "grad_norm": 0.3595966696739197, + "learning_rate": 1.6824474642183044e-05, + "loss": 0.4643, + "step": 19080 + }, + { + "epoch": 0.5239154310818231, + "grad_norm": 0.358089417219162, + "learning_rate": 1.682415894996201e-05, + "loss": 0.4943, + "step": 19081 + }, + { + "epoch": 0.5239428885227897, + "grad_norm": 0.40837711095809937, + "learning_rate": 1.6823843245011748e-05, + "loss": 0.5168, + "step": 19082 + }, + { + "epoch": 0.5239703459637561, + "grad_norm": 0.432856947183609, + "learning_rate": 1.6823527527332836e-05, + "loss": 0.5199, + "step": 19083 + }, + { + "epoch": 0.5239978034047227, + "grad_norm": 0.3765791356563568, + "learning_rate": 1.6823211796925868e-05, + "loss": 0.523, + "step": 19084 + }, + { + "epoch": 0.5240252608456892, + "grad_norm": 0.36400994658470154, + "learning_rate": 1.6822896053791434e-05, + "loss": 0.5365, + "step": 19085 + }, + { + "epoch": 0.5240527182866557, + "grad_norm": 0.3268885612487793, + "learning_rate": 1.682258029793012e-05, + "loss": 0.4864, + "step": 19086 + }, + { + "epoch": 0.5240801757276222, + "grad_norm": 0.362437903881073, + "learning_rate": 1.682226452934252e-05, + "loss": 0.5164, + "step": 19087 + }, + { + "epoch": 0.5241076331685887, + "grad_norm": 0.5012758374214172, + "learning_rate": 1.6821948748029213e-05, + "loss": 0.5113, + "step": 19088 + }, + { + "epoch": 0.5241350906095552, + "grad_norm": 0.38912877440452576, + "learning_rate": 1.6821632953990795e-05, + "loss": 0.4998, + "step": 19089 + }, + { + "epoch": 0.5241625480505216, + "grad_norm": 0.39192768931388855, + "learning_rate": 1.682131714722786e-05, + "loss": 0.4712, + "step": 19090 + }, + { + "epoch": 0.5241900054914882, + "grad_norm": 0.4605688452720642, + "learning_rate": 1.6821001327740988e-05, + "loss": 0.5488, + "step": 19091 + }, + { + "epoch": 0.5242174629324547, + "grad_norm": 0.38520562648773193, + "learning_rate": 1.682068549553077e-05, + "loss": 0.5742, + "step": 19092 + }, + { + "epoch": 0.5242449203734212, + "grad_norm": 0.3702854514122009, + "learning_rate": 1.68203696505978e-05, + "loss": 0.5099, + "step": 19093 + }, + { + "epoch": 0.5242723778143877, + "grad_norm": 0.36112210154533386, + "learning_rate": 1.682005379294266e-05, + "loss": 0.4653, + "step": 19094 + }, + { + "epoch": 0.5242998352553542, + "grad_norm": 0.4613388478755951, + "learning_rate": 1.6819737922565944e-05, + "loss": 0.4833, + "step": 19095 + }, + { + "epoch": 0.5243272926963207, + "grad_norm": 0.5363832712173462, + "learning_rate": 1.681942203946824e-05, + "loss": 0.4851, + "step": 19096 + }, + { + "epoch": 0.5243547501372872, + "grad_norm": 0.3826412558555603, + "learning_rate": 1.681910614365014e-05, + "loss": 0.5987, + "step": 19097 + }, + { + "epoch": 0.5243822075782537, + "grad_norm": 0.3670894503593445, + "learning_rate": 1.6818790235112228e-05, + "loss": 0.5889, + "step": 19098 + }, + { + "epoch": 0.5244096650192203, + "grad_norm": 0.3783116042613983, + "learning_rate": 1.6818474313855096e-05, + "loss": 0.5307, + "step": 19099 + }, + { + "epoch": 0.5244371224601867, + "grad_norm": 0.39593833684921265, + "learning_rate": 1.6818158379879338e-05, + "loss": 0.425, + "step": 19100 + }, + { + "epoch": 0.5244645799011532, + "grad_norm": 0.3575367331504822, + "learning_rate": 1.681784243318553e-05, + "loss": 0.5199, + "step": 19101 + }, + { + "epoch": 0.5244920373421197, + "grad_norm": 0.43858593702316284, + "learning_rate": 1.6817526473774276e-05, + "loss": 0.4494, + "step": 19102 + }, + { + "epoch": 0.5245194947830862, + "grad_norm": 0.3298320770263672, + "learning_rate": 1.681721050164616e-05, + "loss": 0.5206, + "step": 19103 + }, + { + "epoch": 0.5245469522240527, + "grad_norm": 0.37781253457069397, + "learning_rate": 1.6816894516801766e-05, + "loss": 0.5744, + "step": 19104 + }, + { + "epoch": 0.5245744096650192, + "grad_norm": 0.37422916293144226, + "learning_rate": 1.681657851924169e-05, + "loss": 0.5231, + "step": 19105 + }, + { + "epoch": 0.5246018671059858, + "grad_norm": 0.40899068117141724, + "learning_rate": 1.6816262508966522e-05, + "loss": 0.5018, + "step": 19106 + }, + { + "epoch": 0.5246293245469522, + "grad_norm": 0.3656761944293976, + "learning_rate": 1.6815946485976844e-05, + "loss": 0.4819, + "step": 19107 + }, + { + "epoch": 0.5246567819879188, + "grad_norm": 0.34786659479141235, + "learning_rate": 1.6815630450273253e-05, + "loss": 0.5409, + "step": 19108 + }, + { + "epoch": 0.5246842394288852, + "grad_norm": 0.4714853763580322, + "learning_rate": 1.6815314401856337e-05, + "loss": 0.4328, + "step": 19109 + }, + { + "epoch": 0.5247116968698518, + "grad_norm": 0.4433494806289673, + "learning_rate": 1.6814998340726683e-05, + "loss": 0.5435, + "step": 19110 + }, + { + "epoch": 0.5247391543108182, + "grad_norm": 0.3345203995704651, + "learning_rate": 1.6814682266884883e-05, + "loss": 0.4675, + "step": 19111 + }, + { + "epoch": 0.5247666117517847, + "grad_norm": 0.37820133566856384, + "learning_rate": 1.6814366180331525e-05, + "loss": 0.408, + "step": 19112 + }, + { + "epoch": 0.5247940691927513, + "grad_norm": 0.35987579822540283, + "learning_rate": 1.6814050081067198e-05, + "loss": 0.5361, + "step": 19113 + }, + { + "epoch": 0.5248215266337177, + "grad_norm": 0.4102112650871277, + "learning_rate": 1.6813733969092494e-05, + "loss": 0.488, + "step": 19114 + }, + { + "epoch": 0.5248489840746843, + "grad_norm": 0.36655667424201965, + "learning_rate": 1.6813417844408003e-05, + "loss": 0.5156, + "step": 19115 + }, + { + "epoch": 0.5248764415156507, + "grad_norm": 0.3796371519565582, + "learning_rate": 1.681310170701431e-05, + "loss": 0.5377, + "step": 19116 + }, + { + "epoch": 0.5249038989566173, + "grad_norm": 0.6902063488960266, + "learning_rate": 1.681278555691201e-05, + "loss": 0.6154, + "step": 19117 + }, + { + "epoch": 0.5249313563975837, + "grad_norm": 0.3748316168785095, + "learning_rate": 1.681246939410169e-05, + "loss": 0.5481, + "step": 19118 + }, + { + "epoch": 0.5249588138385503, + "grad_norm": 0.4381031394004822, + "learning_rate": 1.6812153218583936e-05, + "loss": 0.4791, + "step": 19119 + }, + { + "epoch": 0.5249862712795168, + "grad_norm": 0.417222261428833, + "learning_rate": 1.6811837030359347e-05, + "loss": 0.5294, + "step": 19120 + }, + { + "epoch": 0.5250137287204832, + "grad_norm": 0.36938661336898804, + "learning_rate": 1.681152082942851e-05, + "loss": 0.4965, + "step": 19121 + }, + { + "epoch": 0.5250411861614498, + "grad_norm": 0.3393406271934509, + "learning_rate": 1.681120461579201e-05, + "loss": 0.454, + "step": 19122 + }, + { + "epoch": 0.5250686436024162, + "grad_norm": 0.4087430238723755, + "learning_rate": 1.681088838945044e-05, + "loss": 0.5229, + "step": 19123 + }, + { + "epoch": 0.5250961010433828, + "grad_norm": 0.3442004919052124, + "learning_rate": 1.681057215040439e-05, + "loss": 0.4991, + "step": 19124 + }, + { + "epoch": 0.5251235584843492, + "grad_norm": 0.3997524082660675, + "learning_rate": 1.6810255898654446e-05, + "loss": 0.5104, + "step": 19125 + }, + { + "epoch": 0.5251510159253158, + "grad_norm": 0.4781751036643982, + "learning_rate": 1.68099396342012e-05, + "loss": 0.4815, + "step": 19126 + }, + { + "epoch": 0.5251784733662823, + "grad_norm": 0.4363936483860016, + "learning_rate": 1.6809623357045247e-05, + "loss": 0.5673, + "step": 19127 + }, + { + "epoch": 0.5252059308072488, + "grad_norm": 0.4246797561645508, + "learning_rate": 1.6809307067187176e-05, + "loss": 0.4991, + "step": 19128 + }, + { + "epoch": 0.5252333882482153, + "grad_norm": 0.3933110535144806, + "learning_rate": 1.680899076462757e-05, + "loss": 0.4935, + "step": 19129 + }, + { + "epoch": 0.5252608456891817, + "grad_norm": 0.37479984760284424, + "learning_rate": 1.680867444936702e-05, + "loss": 0.5772, + "step": 19130 + }, + { + "epoch": 0.5252883031301483, + "grad_norm": 0.3583928942680359, + "learning_rate": 1.6808358121406122e-05, + "loss": 0.5011, + "step": 19131 + }, + { + "epoch": 0.5253157605711147, + "grad_norm": 0.5592814087867737, + "learning_rate": 1.6808041780745465e-05, + "loss": 0.6281, + "step": 19132 + }, + { + "epoch": 0.5253432180120813, + "grad_norm": 0.37775158882141113, + "learning_rate": 1.6807725427385636e-05, + "loss": 0.6027, + "step": 19133 + }, + { + "epoch": 0.5253706754530478, + "grad_norm": 0.3713689148426056, + "learning_rate": 1.6807409061327227e-05, + "loss": 0.4605, + "step": 19134 + }, + { + "epoch": 0.5253981328940143, + "grad_norm": 0.4083419740200043, + "learning_rate": 1.6807092682570826e-05, + "loss": 0.4927, + "step": 19135 + }, + { + "epoch": 0.5254255903349808, + "grad_norm": 0.6965022087097168, + "learning_rate": 1.6806776291117025e-05, + "loss": 0.482, + "step": 19136 + }, + { + "epoch": 0.5254530477759473, + "grad_norm": 0.3704613149166107, + "learning_rate": 1.6806459886966414e-05, + "loss": 0.513, + "step": 19137 + }, + { + "epoch": 0.5254805052169138, + "grad_norm": 0.3603249788284302, + "learning_rate": 1.6806143470119582e-05, + "loss": 0.5186, + "step": 19138 + }, + { + "epoch": 0.5255079626578802, + "grad_norm": 0.5188543796539307, + "learning_rate": 1.680582704057712e-05, + "loss": 0.5246, + "step": 19139 + }, + { + "epoch": 0.5255354200988468, + "grad_norm": 0.3727228045463562, + "learning_rate": 1.680551059833962e-05, + "loss": 0.4973, + "step": 19140 + }, + { + "epoch": 0.5255628775398133, + "grad_norm": 0.3985389173030853, + "learning_rate": 1.6805194143407672e-05, + "loss": 0.4901, + "step": 19141 + }, + { + "epoch": 0.5255903349807798, + "grad_norm": 0.352114737033844, + "learning_rate": 1.680487767578186e-05, + "loss": 0.5139, + "step": 19142 + }, + { + "epoch": 0.5256177924217463, + "grad_norm": 0.36314526200294495, + "learning_rate": 1.680456119546278e-05, + "loss": 0.5115, + "step": 19143 + }, + { + "epoch": 0.5256452498627128, + "grad_norm": 0.38099151849746704, + "learning_rate": 1.6804244702451028e-05, + "loss": 0.5594, + "step": 19144 + }, + { + "epoch": 0.5256727073036793, + "grad_norm": 0.3631795048713684, + "learning_rate": 1.6803928196747183e-05, + "loss": 0.5411, + "step": 19145 + }, + { + "epoch": 0.5257001647446458, + "grad_norm": 0.38922378420829773, + "learning_rate": 1.680361167835184e-05, + "loss": 0.5838, + "step": 19146 + }, + { + "epoch": 0.5257276221856123, + "grad_norm": 0.40888550877571106, + "learning_rate": 1.6803295147265594e-05, + "loss": 0.5213, + "step": 19147 + }, + { + "epoch": 0.5257550796265787, + "grad_norm": 0.37229689955711365, + "learning_rate": 1.6802978603489028e-05, + "loss": 0.4656, + "step": 19148 + }, + { + "epoch": 0.5257825370675453, + "grad_norm": 0.3934831917285919, + "learning_rate": 1.6802662047022736e-05, + "loss": 0.611, + "step": 19149 + }, + { + "epoch": 0.5258099945085118, + "grad_norm": 0.40785467624664307, + "learning_rate": 1.6802345477867305e-05, + "loss": 0.5479, + "step": 19150 + }, + { + "epoch": 0.5258374519494783, + "grad_norm": 0.37173759937286377, + "learning_rate": 1.6802028896023333e-05, + "loss": 0.5218, + "step": 19151 + }, + { + "epoch": 0.5258649093904448, + "grad_norm": 0.39577949047088623, + "learning_rate": 1.6801712301491405e-05, + "loss": 0.6018, + "step": 19152 + }, + { + "epoch": 0.5258923668314113, + "grad_norm": 0.34941163659095764, + "learning_rate": 1.680139569427211e-05, + "loss": 0.4685, + "step": 19153 + }, + { + "epoch": 0.5259198242723778, + "grad_norm": 0.3749145567417145, + "learning_rate": 1.6801079074366044e-05, + "loss": 0.5333, + "step": 19154 + }, + { + "epoch": 0.5259472817133443, + "grad_norm": 0.38903799653053284, + "learning_rate": 1.6800762441773794e-05, + "loss": 0.51, + "step": 19155 + }, + { + "epoch": 0.5259747391543108, + "grad_norm": 0.38631096482276917, + "learning_rate": 1.6800445796495952e-05, + "loss": 0.4882, + "step": 19156 + }, + { + "epoch": 0.5260021965952774, + "grad_norm": 0.42841726541519165, + "learning_rate": 1.680012913853311e-05, + "loss": 0.5682, + "step": 19157 + }, + { + "epoch": 0.5260296540362438, + "grad_norm": 0.4430391192436218, + "learning_rate": 1.6799812467885856e-05, + "loss": 0.5116, + "step": 19158 + }, + { + "epoch": 0.5260571114772103, + "grad_norm": 0.3784981667995453, + "learning_rate": 1.6799495784554778e-05, + "loss": 0.4363, + "step": 19159 + }, + { + "epoch": 0.5260845689181768, + "grad_norm": 0.3874782621860504, + "learning_rate": 1.6799179088540475e-05, + "loss": 0.4998, + "step": 19160 + }, + { + "epoch": 0.5261120263591433, + "grad_norm": 0.3836885392665863, + "learning_rate": 1.679886237984353e-05, + "loss": 0.5111, + "step": 19161 + }, + { + "epoch": 0.5261394838001098, + "grad_norm": 0.3838655352592468, + "learning_rate": 1.6798545658464536e-05, + "loss": 0.4947, + "step": 19162 + }, + { + "epoch": 0.5261669412410763, + "grad_norm": 0.3336462378501892, + "learning_rate": 1.6798228924404085e-05, + "loss": 0.4848, + "step": 19163 + }, + { + "epoch": 0.5261943986820429, + "grad_norm": 0.38034048676490784, + "learning_rate": 1.679791217766277e-05, + "loss": 0.4948, + "step": 19164 + }, + { + "epoch": 0.5262218561230093, + "grad_norm": 0.40128204226493835, + "learning_rate": 1.6797595418241176e-05, + "loss": 0.5157, + "step": 19165 + }, + { + "epoch": 0.5262493135639759, + "grad_norm": 0.41343289613723755, + "learning_rate": 1.67972786461399e-05, + "loss": 0.5459, + "step": 19166 + }, + { + "epoch": 0.5262767710049423, + "grad_norm": 0.42055070400238037, + "learning_rate": 1.6796961861359526e-05, + "loss": 0.5641, + "step": 19167 + }, + { + "epoch": 0.5263042284459089, + "grad_norm": 0.3727434277534485, + "learning_rate": 1.679664506390065e-05, + "loss": 0.5265, + "step": 19168 + }, + { + "epoch": 0.5263316858868753, + "grad_norm": 0.36285459995269775, + "learning_rate": 1.679632825376386e-05, + "loss": 0.4876, + "step": 19169 + }, + { + "epoch": 0.5263591433278418, + "grad_norm": 0.3798579275608063, + "learning_rate": 1.6796011430949753e-05, + "loss": 0.5014, + "step": 19170 + }, + { + "epoch": 0.5263866007688084, + "grad_norm": 0.39925527572631836, + "learning_rate": 1.6795694595458916e-05, + "loss": 0.5523, + "step": 19171 + }, + { + "epoch": 0.5264140582097748, + "grad_norm": 0.38314545154571533, + "learning_rate": 1.6795377747291937e-05, + "loss": 0.5554, + "step": 19172 + }, + { + "epoch": 0.5264415156507414, + "grad_norm": 0.36303719878196716, + "learning_rate": 1.6795060886449414e-05, + "loss": 0.5354, + "step": 19173 + }, + { + "epoch": 0.5264689730917078, + "grad_norm": 0.33871346712112427, + "learning_rate": 1.6794744012931924e-05, + "loss": 0.4674, + "step": 19174 + }, + { + "epoch": 0.5264964305326744, + "grad_norm": 0.5686480402946472, + "learning_rate": 1.679442712674008e-05, + "loss": 0.4889, + "step": 19175 + }, + { + "epoch": 0.5265238879736408, + "grad_norm": 0.35752493143081665, + "learning_rate": 1.6794110227874448e-05, + "loss": 0.4996, + "step": 19176 + }, + { + "epoch": 0.5265513454146074, + "grad_norm": 0.3468191921710968, + "learning_rate": 1.679379331633564e-05, + "loss": 0.505, + "step": 19177 + }, + { + "epoch": 0.5265788028555739, + "grad_norm": 0.35147663950920105, + "learning_rate": 1.679347639212424e-05, + "loss": 0.3926, + "step": 19178 + }, + { + "epoch": 0.5266062602965403, + "grad_norm": 0.37651383876800537, + "learning_rate": 1.6793159455240834e-05, + "loss": 0.4702, + "step": 19179 + }, + { + "epoch": 0.5266337177375069, + "grad_norm": 0.45632681250572205, + "learning_rate": 1.6792842505686023e-05, + "loss": 0.5899, + "step": 19180 + }, + { + "epoch": 0.5266611751784733, + "grad_norm": 0.33764728903770447, + "learning_rate": 1.679252554346039e-05, + "loss": 0.4295, + "step": 19181 + }, + { + "epoch": 0.5266886326194399, + "grad_norm": 0.35461992025375366, + "learning_rate": 1.6792208568564527e-05, + "loss": 0.51, + "step": 19182 + }, + { + "epoch": 0.5267160900604063, + "grad_norm": 0.3508410155773163, + "learning_rate": 1.6791891580999028e-05, + "loss": 0.5155, + "step": 19183 + }, + { + "epoch": 0.5267435475013729, + "grad_norm": 0.4389016032218933, + "learning_rate": 1.6791574580764484e-05, + "loss": 0.5051, + "step": 19184 + }, + { + "epoch": 0.5267710049423394, + "grad_norm": 0.3596161901950836, + "learning_rate": 1.6791257567861485e-05, + "loss": 0.5367, + "step": 19185 + }, + { + "epoch": 0.5267984623833059, + "grad_norm": 0.3945158123970032, + "learning_rate": 1.6790940542290623e-05, + "loss": 0.5702, + "step": 19186 + }, + { + "epoch": 0.5268259198242724, + "grad_norm": 0.3644779622554779, + "learning_rate": 1.679062350405249e-05, + "loss": 0.4033, + "step": 19187 + }, + { + "epoch": 0.5268533772652388, + "grad_norm": 0.32592862844467163, + "learning_rate": 1.679030645314768e-05, + "loss": 0.4927, + "step": 19188 + }, + { + "epoch": 0.5268808347062054, + "grad_norm": 0.369797945022583, + "learning_rate": 1.6789989389576775e-05, + "loss": 0.5258, + "step": 19189 + }, + { + "epoch": 0.5269082921471718, + "grad_norm": 0.3862202763557434, + "learning_rate": 1.678967231334038e-05, + "loss": 0.5778, + "step": 19190 + }, + { + "epoch": 0.5269357495881384, + "grad_norm": 0.40515604615211487, + "learning_rate": 1.6789355224439075e-05, + "loss": 0.5871, + "step": 19191 + }, + { + "epoch": 0.5269632070291049, + "grad_norm": 0.33311527967453003, + "learning_rate": 1.6789038122873454e-05, + "loss": 0.4278, + "step": 19192 + }, + { + "epoch": 0.5269906644700714, + "grad_norm": 0.37520310282707214, + "learning_rate": 1.678872100864411e-05, + "loss": 0.5767, + "step": 19193 + }, + { + "epoch": 0.5270181219110379, + "grad_norm": 0.38766831159591675, + "learning_rate": 1.6788403881751637e-05, + "loss": 0.4963, + "step": 19194 + }, + { + "epoch": 0.5270455793520044, + "grad_norm": 0.34604915976524353, + "learning_rate": 1.6788086742196626e-05, + "loss": 0.5282, + "step": 19195 + }, + { + "epoch": 0.5270730367929709, + "grad_norm": 0.4100956320762634, + "learning_rate": 1.6787769589979665e-05, + "loss": 0.4834, + "step": 19196 + }, + { + "epoch": 0.5271004942339373, + "grad_norm": 0.3557393252849579, + "learning_rate": 1.6787452425101344e-05, + "loss": 0.4351, + "step": 19197 + }, + { + "epoch": 0.5271279516749039, + "grad_norm": 0.43311917781829834, + "learning_rate": 1.678713524756226e-05, + "loss": 0.5406, + "step": 19198 + }, + { + "epoch": 0.5271554091158704, + "grad_norm": 0.3942387104034424, + "learning_rate": 1.6786818057363005e-05, + "loss": 0.5457, + "step": 19199 + }, + { + "epoch": 0.5271828665568369, + "grad_norm": 0.3914620876312256, + "learning_rate": 1.6786500854504167e-05, + "loss": 0.4688, + "step": 19200 + }, + { + "epoch": 0.5272103239978034, + "grad_norm": 0.350506454706192, + "learning_rate": 1.6786183638986336e-05, + "loss": 0.5624, + "step": 19201 + }, + { + "epoch": 0.5272377814387699, + "grad_norm": 0.410220205783844, + "learning_rate": 1.678586641081011e-05, + "loss": 0.494, + "step": 19202 + }, + { + "epoch": 0.5272652388797364, + "grad_norm": 0.36749276518821716, + "learning_rate": 1.6785549169976078e-05, + "loss": 0.437, + "step": 19203 + }, + { + "epoch": 0.5272926963207029, + "grad_norm": 0.4050450026988983, + "learning_rate": 1.678523191648483e-05, + "loss": 0.5465, + "step": 19204 + }, + { + "epoch": 0.5273201537616694, + "grad_norm": 0.3726862072944641, + "learning_rate": 1.6784914650336953e-05, + "loss": 0.4705, + "step": 19205 + }, + { + "epoch": 0.527347611202636, + "grad_norm": 0.3481220602989197, + "learning_rate": 1.6784597371533052e-05, + "loss": 0.3943, + "step": 19206 + }, + { + "epoch": 0.5273750686436024, + "grad_norm": 0.34966224431991577, + "learning_rate": 1.678428008007371e-05, + "loss": 0.4859, + "step": 19207 + }, + { + "epoch": 0.527402526084569, + "grad_norm": 0.3816086947917938, + "learning_rate": 1.678396277595952e-05, + "loss": 0.5367, + "step": 19208 + }, + { + "epoch": 0.5274299835255354, + "grad_norm": 0.3312292993068695, + "learning_rate": 1.6783645459191075e-05, + "loss": 0.5053, + "step": 19209 + }, + { + "epoch": 0.5274574409665019, + "grad_norm": 0.3624593913555145, + "learning_rate": 1.6783328129768963e-05, + "loss": 0.5113, + "step": 19210 + }, + { + "epoch": 0.5274848984074684, + "grad_norm": 0.3641042113304138, + "learning_rate": 1.6783010787693782e-05, + "loss": 0.4552, + "step": 19211 + }, + { + "epoch": 0.5275123558484349, + "grad_norm": 0.37600281834602356, + "learning_rate": 1.678269343296612e-05, + "loss": 0.4865, + "step": 19212 + }, + { + "epoch": 0.5275398132894015, + "grad_norm": 0.3687096834182739, + "learning_rate": 1.678237606558657e-05, + "loss": 0.4042, + "step": 19213 + }, + { + "epoch": 0.5275672707303679, + "grad_norm": 0.41955721378326416, + "learning_rate": 1.6782058685555726e-05, + "loss": 0.4385, + "step": 19214 + }, + { + "epoch": 0.5275947281713345, + "grad_norm": 0.42253705859184265, + "learning_rate": 1.6781741292874175e-05, + "loss": 0.5351, + "step": 19215 + }, + { + "epoch": 0.5276221856123009, + "grad_norm": 0.41899731755256653, + "learning_rate": 1.6781423887542513e-05, + "loss": 0.5544, + "step": 19216 + }, + { + "epoch": 0.5276496430532674, + "grad_norm": 0.35970762372016907, + "learning_rate": 1.678110646956133e-05, + "loss": 0.417, + "step": 19217 + }, + { + "epoch": 0.5276771004942339, + "grad_norm": 0.4153427183628082, + "learning_rate": 1.678078903893122e-05, + "loss": 0.5379, + "step": 19218 + }, + { + "epoch": 0.5277045579352004, + "grad_norm": 0.33959513902664185, + "learning_rate": 1.6780471595652776e-05, + "loss": 0.4885, + "step": 19219 + }, + { + "epoch": 0.527732015376167, + "grad_norm": 0.38064736127853394, + "learning_rate": 1.678015413972659e-05, + "loss": 0.5122, + "step": 19220 + }, + { + "epoch": 0.5277594728171334, + "grad_norm": 0.366242378950119, + "learning_rate": 1.6779836671153247e-05, + "loss": 0.4187, + "step": 19221 + }, + { + "epoch": 0.5277869302581, + "grad_norm": 0.4022712707519531, + "learning_rate": 1.6779519189933345e-05, + "loss": 0.5217, + "step": 19222 + }, + { + "epoch": 0.5278143876990664, + "grad_norm": 0.3661412000656128, + "learning_rate": 1.677920169606748e-05, + "loss": 0.5254, + "step": 19223 + }, + { + "epoch": 0.527841845140033, + "grad_norm": 0.5110522508621216, + "learning_rate": 1.677888418955624e-05, + "loss": 0.5004, + "step": 19224 + }, + { + "epoch": 0.5278693025809994, + "grad_norm": 0.3969288468360901, + "learning_rate": 1.6778566670400214e-05, + "loss": 0.5056, + "step": 19225 + }, + { + "epoch": 0.527896760021966, + "grad_norm": 0.38329246640205383, + "learning_rate": 1.67782491386e-05, + "loss": 0.4941, + "step": 19226 + }, + { + "epoch": 0.5279242174629325, + "grad_norm": 0.3618462085723877, + "learning_rate": 1.677793159415619e-05, + "loss": 0.4618, + "step": 19227 + }, + { + "epoch": 0.5279516749038989, + "grad_norm": 0.4099544882774353, + "learning_rate": 1.677761403706937e-05, + "loss": 0.5499, + "step": 19228 + }, + { + "epoch": 0.5279791323448655, + "grad_norm": 0.4067249894142151, + "learning_rate": 1.6777296467340137e-05, + "loss": 0.4838, + "step": 19229 + }, + { + "epoch": 0.5280065897858319, + "grad_norm": 0.36066070199012756, + "learning_rate": 1.6776978884969085e-05, + "loss": 0.4862, + "step": 19230 + }, + { + "epoch": 0.5280340472267985, + "grad_norm": 0.4472638964653015, + "learning_rate": 1.6776661289956805e-05, + "loss": 0.4321, + "step": 19231 + }, + { + "epoch": 0.5280615046677649, + "grad_norm": 0.44418203830718994, + "learning_rate": 1.677634368230389e-05, + "loss": 0.523, + "step": 19232 + }, + { + "epoch": 0.5280889621087315, + "grad_norm": 0.35203954577445984, + "learning_rate": 1.6776026062010924e-05, + "loss": 0.4311, + "step": 19233 + }, + { + "epoch": 0.528116419549698, + "grad_norm": 0.8560782074928284, + "learning_rate": 1.6775708429078513e-05, + "loss": 0.5344, + "step": 19234 + }, + { + "epoch": 0.5281438769906645, + "grad_norm": 0.3702181577682495, + "learning_rate": 1.6775390783507243e-05, + "loss": 0.4686, + "step": 19235 + }, + { + "epoch": 0.528171334431631, + "grad_norm": 0.37020552158355713, + "learning_rate": 1.6775073125297705e-05, + "loss": 0.466, + "step": 19236 + }, + { + "epoch": 0.5281987918725974, + "grad_norm": 0.3982091248035431, + "learning_rate": 1.6774755454450494e-05, + "loss": 0.4897, + "step": 19237 + }, + { + "epoch": 0.528226249313564, + "grad_norm": 0.37093690037727356, + "learning_rate": 1.67744377709662e-05, + "loss": 0.4723, + "step": 19238 + }, + { + "epoch": 0.5282537067545304, + "grad_norm": 0.44469910860061646, + "learning_rate": 1.677412007484542e-05, + "loss": 0.6077, + "step": 19239 + }, + { + "epoch": 0.528281164195497, + "grad_norm": 1.986665964126587, + "learning_rate": 1.677380236608874e-05, + "loss": 0.6419, + "step": 19240 + }, + { + "epoch": 0.5283086216364635, + "grad_norm": 0.3858656883239746, + "learning_rate": 1.6773484644696764e-05, + "loss": 0.4817, + "step": 19241 + }, + { + "epoch": 0.52833607907743, + "grad_norm": 0.33428171277046204, + "learning_rate": 1.6773166910670073e-05, + "loss": 0.4901, + "step": 19242 + }, + { + "epoch": 0.5283635365183965, + "grad_norm": 0.36057883501052856, + "learning_rate": 1.6772849164009265e-05, + "loss": 0.4988, + "step": 19243 + }, + { + "epoch": 0.528390993959363, + "grad_norm": 0.37854069471359253, + "learning_rate": 1.677253140471493e-05, + "loss": 0.5069, + "step": 19244 + }, + { + "epoch": 0.5284184514003295, + "grad_norm": 0.3335108757019043, + "learning_rate": 1.6772213632787663e-05, + "loss": 0.4378, + "step": 19245 + }, + { + "epoch": 0.5284459088412959, + "grad_norm": 0.3537657558917999, + "learning_rate": 1.6771895848228057e-05, + "loss": 0.4227, + "step": 19246 + }, + { + "epoch": 0.5284733662822625, + "grad_norm": 0.37530162930488586, + "learning_rate": 1.6771578051036703e-05, + "loss": 0.5332, + "step": 19247 + }, + { + "epoch": 0.528500823723229, + "grad_norm": 0.4401248097419739, + "learning_rate": 1.6771260241214194e-05, + "loss": 0.5919, + "step": 19248 + }, + { + "epoch": 0.5285282811641955, + "grad_norm": 0.39247605204582214, + "learning_rate": 1.6770942418761126e-05, + "loss": 0.5619, + "step": 19249 + }, + { + "epoch": 0.528555738605162, + "grad_norm": 0.3852739930152893, + "learning_rate": 1.6770624583678085e-05, + "loss": 0.5261, + "step": 19250 + }, + { + "epoch": 0.5285831960461285, + "grad_norm": 0.3521665334701538, + "learning_rate": 1.6770306735965675e-05, + "loss": 0.5043, + "step": 19251 + }, + { + "epoch": 0.528610653487095, + "grad_norm": 0.3663382828235626, + "learning_rate": 1.6769988875624474e-05, + "loss": 0.4656, + "step": 19252 + }, + { + "epoch": 0.5286381109280615, + "grad_norm": 0.36031660437583923, + "learning_rate": 1.676967100265509e-05, + "loss": 0.4376, + "step": 19253 + }, + { + "epoch": 0.528665568369028, + "grad_norm": 0.4829384684562683, + "learning_rate": 1.6769353117058104e-05, + "loss": 0.4404, + "step": 19254 + }, + { + "epoch": 0.5286930258099946, + "grad_norm": 0.35596874356269836, + "learning_rate": 1.6769035218834117e-05, + "loss": 0.5053, + "step": 19255 + }, + { + "epoch": 0.528720483250961, + "grad_norm": 0.41918402910232544, + "learning_rate": 1.6768717307983718e-05, + "loss": 0.5429, + "step": 19256 + }, + { + "epoch": 0.5287479406919275, + "grad_norm": 0.3656597435474396, + "learning_rate": 1.67683993845075e-05, + "loss": 0.4672, + "step": 19257 + }, + { + "epoch": 0.528775398132894, + "grad_norm": 0.37589871883392334, + "learning_rate": 1.6768081448406057e-05, + "loss": 0.5168, + "step": 19258 + }, + { + "epoch": 0.5288028555738605, + "grad_norm": 0.41445696353912354, + "learning_rate": 1.6767763499679987e-05, + "loss": 0.6173, + "step": 19259 + }, + { + "epoch": 0.528830313014827, + "grad_norm": 0.3878379166126251, + "learning_rate": 1.676744553832987e-05, + "loss": 0.5528, + "step": 19260 + }, + { + "epoch": 0.5288577704557935, + "grad_norm": 0.38230928778648376, + "learning_rate": 1.6767127564356312e-05, + "loss": 0.5268, + "step": 19261 + }, + { + "epoch": 0.5288852278967601, + "grad_norm": 0.35823333263397217, + "learning_rate": 1.67668095777599e-05, + "loss": 0.524, + "step": 19262 + }, + { + "epoch": 0.5289126853377265, + "grad_norm": 0.4199070334434509, + "learning_rate": 1.6766491578541228e-05, + "loss": 0.5025, + "step": 19263 + }, + { + "epoch": 0.5289401427786931, + "grad_norm": 0.3432691991329193, + "learning_rate": 1.676617356670089e-05, + "loss": 0.5571, + "step": 19264 + }, + { + "epoch": 0.5289676002196595, + "grad_norm": 0.39311617612838745, + "learning_rate": 1.676585554223948e-05, + "loss": 0.4831, + "step": 19265 + }, + { + "epoch": 0.528995057660626, + "grad_norm": 0.4017024338245392, + "learning_rate": 1.6765537505157587e-05, + "loss": 0.4741, + "step": 19266 + }, + { + "epoch": 0.5290225151015925, + "grad_norm": 0.36922845244407654, + "learning_rate": 1.676521945545581e-05, + "loss": 0.4801, + "step": 19267 + }, + { + "epoch": 0.529049972542559, + "grad_norm": 0.38912126421928406, + "learning_rate": 1.676490139313474e-05, + "loss": 0.4906, + "step": 19268 + }, + { + "epoch": 0.5290774299835256, + "grad_norm": 0.3729131817817688, + "learning_rate": 1.6764583318194966e-05, + "loss": 0.545, + "step": 19269 + }, + { + "epoch": 0.529104887424492, + "grad_norm": 0.3657056391239166, + "learning_rate": 1.676426523063709e-05, + "loss": 0.535, + "step": 19270 + }, + { + "epoch": 0.5291323448654586, + "grad_norm": 0.4061165452003479, + "learning_rate": 1.6763947130461698e-05, + "loss": 0.5143, + "step": 19271 + }, + { + "epoch": 0.529159802306425, + "grad_norm": 0.37780505418777466, + "learning_rate": 1.6763629017669384e-05, + "loss": 0.5414, + "step": 19272 + }, + { + "epoch": 0.5291872597473916, + "grad_norm": 0.49078184366226196, + "learning_rate": 1.6763310892260746e-05, + "loss": 0.5486, + "step": 19273 + }, + { + "epoch": 0.529214717188358, + "grad_norm": 0.3650389015674591, + "learning_rate": 1.676299275423637e-05, + "loss": 0.5328, + "step": 19274 + }, + { + "epoch": 0.5292421746293245, + "grad_norm": 0.38523370027542114, + "learning_rate": 1.676267460359686e-05, + "loss": 0.45, + "step": 19275 + }, + { + "epoch": 0.5292696320702911, + "grad_norm": 0.3226696848869324, + "learning_rate": 1.6762356440342804e-05, + "loss": 0.4151, + "step": 19276 + }, + { + "epoch": 0.5292970895112575, + "grad_norm": 0.3528428077697754, + "learning_rate": 1.6762038264474793e-05, + "loss": 0.5442, + "step": 19277 + }, + { + "epoch": 0.5293245469522241, + "grad_norm": 0.39372432231903076, + "learning_rate": 1.6761720075993418e-05, + "loss": 0.4767, + "step": 19278 + }, + { + "epoch": 0.5293520043931905, + "grad_norm": 0.34649986028671265, + "learning_rate": 1.6761401874899282e-05, + "loss": 0.4394, + "step": 19279 + }, + { + "epoch": 0.5293794618341571, + "grad_norm": 0.40644749999046326, + "learning_rate": 1.676108366119297e-05, + "loss": 0.4769, + "step": 19280 + }, + { + "epoch": 0.5294069192751235, + "grad_norm": 0.3695274293422699, + "learning_rate": 1.6760765434875082e-05, + "loss": 0.5228, + "step": 19281 + }, + { + "epoch": 0.5294343767160901, + "grad_norm": 0.3814099133014679, + "learning_rate": 1.6760447195946207e-05, + "loss": 0.5211, + "step": 19282 + }, + { + "epoch": 0.5294618341570566, + "grad_norm": 0.3743430972099304, + "learning_rate": 1.6760128944406942e-05, + "loss": 0.555, + "step": 19283 + }, + { + "epoch": 0.529489291598023, + "grad_norm": 0.4357761740684509, + "learning_rate": 1.6759810680257878e-05, + "loss": 0.5919, + "step": 19284 + }, + { + "epoch": 0.5295167490389896, + "grad_norm": 0.4259355664253235, + "learning_rate": 1.675949240349961e-05, + "loss": 0.5835, + "step": 19285 + }, + { + "epoch": 0.529544206479956, + "grad_norm": 0.3843569755554199, + "learning_rate": 1.675917411413273e-05, + "loss": 0.4336, + "step": 19286 + }, + { + "epoch": 0.5295716639209226, + "grad_norm": 0.4091778099536896, + "learning_rate": 1.6758855812157833e-05, + "loss": 0.5874, + "step": 19287 + }, + { + "epoch": 0.529599121361889, + "grad_norm": 0.35755568742752075, + "learning_rate": 1.6758537497575515e-05, + "loss": 0.5012, + "step": 19288 + }, + { + "epoch": 0.5296265788028556, + "grad_norm": 0.35312119126319885, + "learning_rate": 1.6758219170386366e-05, + "loss": 0.4488, + "step": 19289 + }, + { + "epoch": 0.5296540362438221, + "grad_norm": 0.7040501236915588, + "learning_rate": 1.675790083059098e-05, + "loss": 0.4751, + "step": 19290 + }, + { + "epoch": 0.5296814936847886, + "grad_norm": 0.3672594726085663, + "learning_rate": 1.6757582478189954e-05, + "loss": 0.5421, + "step": 19291 + }, + { + "epoch": 0.5297089511257551, + "grad_norm": 0.47420787811279297, + "learning_rate": 1.675726411318388e-05, + "loss": 0.588, + "step": 19292 + }, + { + "epoch": 0.5297364085667216, + "grad_norm": 0.34615710377693176, + "learning_rate": 1.675694573557335e-05, + "loss": 0.4502, + "step": 19293 + }, + { + "epoch": 0.5297638660076881, + "grad_norm": 0.3715755343437195, + "learning_rate": 1.6756627345358963e-05, + "loss": 0.5576, + "step": 19294 + }, + { + "epoch": 0.5297913234486545, + "grad_norm": 0.37166696786880493, + "learning_rate": 1.6756308942541305e-05, + "loss": 0.6345, + "step": 19295 + }, + { + "epoch": 0.5298187808896211, + "grad_norm": 0.38494718074798584, + "learning_rate": 1.675599052712098e-05, + "loss": 0.5423, + "step": 19296 + }, + { + "epoch": 0.5298462383305876, + "grad_norm": 0.37315356731414795, + "learning_rate": 1.6755672099098572e-05, + "loss": 0.5271, + "step": 19297 + }, + { + "epoch": 0.5298736957715541, + "grad_norm": 0.3780669867992401, + "learning_rate": 1.675535365847468e-05, + "loss": 0.5476, + "step": 19298 + }, + { + "epoch": 0.5299011532125206, + "grad_norm": 0.3897204101085663, + "learning_rate": 1.67550352052499e-05, + "loss": 0.5864, + "step": 19299 + }, + { + "epoch": 0.5299286106534871, + "grad_norm": 0.427480548620224, + "learning_rate": 1.675471673942482e-05, + "loss": 0.5317, + "step": 19300 + }, + { + "epoch": 0.5299560680944536, + "grad_norm": 0.34892576932907104, + "learning_rate": 1.6754398261000038e-05, + "loss": 0.406, + "step": 19301 + }, + { + "epoch": 0.52998352553542, + "grad_norm": 0.3658982515335083, + "learning_rate": 1.675407976997615e-05, + "loss": 0.4957, + "step": 19302 + }, + { + "epoch": 0.5300109829763866, + "grad_norm": 0.38249024748802185, + "learning_rate": 1.6753761266353744e-05, + "loss": 0.5187, + "step": 19303 + }, + { + "epoch": 0.5300384404173532, + "grad_norm": 0.3669368326663971, + "learning_rate": 1.675344275013342e-05, + "loss": 0.463, + "step": 19304 + }, + { + "epoch": 0.5300658978583196, + "grad_norm": 0.3971758186817169, + "learning_rate": 1.675312422131577e-05, + "loss": 0.4702, + "step": 19305 + }, + { + "epoch": 0.5300933552992861, + "grad_norm": 0.37217795848846436, + "learning_rate": 1.6752805679901384e-05, + "loss": 0.505, + "step": 19306 + }, + { + "epoch": 0.5301208127402526, + "grad_norm": 0.34922534227371216, + "learning_rate": 1.6752487125890865e-05, + "loss": 0.4916, + "step": 19307 + }, + { + "epoch": 0.5301482701812191, + "grad_norm": 0.35785776376724243, + "learning_rate": 1.67521685592848e-05, + "loss": 0.498, + "step": 19308 + }, + { + "epoch": 0.5301757276221856, + "grad_norm": 0.4068968594074249, + "learning_rate": 1.6751849980083787e-05, + "loss": 0.5453, + "step": 19309 + }, + { + "epoch": 0.5302031850631521, + "grad_norm": 0.41245296597480774, + "learning_rate": 1.675153138828842e-05, + "loss": 0.5195, + "step": 19310 + }, + { + "epoch": 0.5302306425041187, + "grad_norm": 0.3719991147518158, + "learning_rate": 1.6751212783899288e-05, + "loss": 0.5692, + "step": 19311 + }, + { + "epoch": 0.5302580999450851, + "grad_norm": 0.40111422538757324, + "learning_rate": 1.6750894166916993e-05, + "loss": 0.5016, + "step": 19312 + }, + { + "epoch": 0.5302855573860517, + "grad_norm": 0.33629217743873596, + "learning_rate": 1.6750575537342123e-05, + "loss": 0.5545, + "step": 19313 + }, + { + "epoch": 0.5303130148270181, + "grad_norm": 0.35935845971107483, + "learning_rate": 1.675025689517528e-05, + "loss": 0.574, + "step": 19314 + }, + { + "epoch": 0.5303404722679846, + "grad_norm": 0.36656132340431213, + "learning_rate": 1.674993824041705e-05, + "loss": 0.4275, + "step": 19315 + }, + { + "epoch": 0.5303679297089511, + "grad_norm": 0.44042009115219116, + "learning_rate": 1.674961957306803e-05, + "loss": 0.4919, + "step": 19316 + }, + { + "epoch": 0.5303953871499176, + "grad_norm": 0.3526274561882019, + "learning_rate": 1.6749300893128815e-05, + "loss": 0.4881, + "step": 19317 + }, + { + "epoch": 0.5304228445908842, + "grad_norm": 0.3255390226840973, + "learning_rate": 1.67489822006e-05, + "loss": 0.5277, + "step": 19318 + }, + { + "epoch": 0.5304503020318506, + "grad_norm": 0.4094306230545044, + "learning_rate": 1.674866349548218e-05, + "loss": 0.494, + "step": 19319 + }, + { + "epoch": 0.5304777594728172, + "grad_norm": 0.41542762517929077, + "learning_rate": 1.6748344777775953e-05, + "loss": 0.5518, + "step": 19320 + }, + { + "epoch": 0.5305052169137836, + "grad_norm": 0.386592835187912, + "learning_rate": 1.67480260474819e-05, + "loss": 0.5107, + "step": 19321 + }, + { + "epoch": 0.5305326743547502, + "grad_norm": 0.3939725160598755, + "learning_rate": 1.6747707304600632e-05, + "loss": 0.5195, + "step": 19322 + }, + { + "epoch": 0.5305601317957166, + "grad_norm": 0.44558098912239075, + "learning_rate": 1.6747388549132732e-05, + "loss": 0.5514, + "step": 19323 + }, + { + "epoch": 0.5305875892366831, + "grad_norm": 0.35958537459373474, + "learning_rate": 1.67470697810788e-05, + "loss": 0.4606, + "step": 19324 + }, + { + "epoch": 0.5306150466776497, + "grad_norm": 0.3878563642501831, + "learning_rate": 1.674675100043943e-05, + "loss": 0.4868, + "step": 19325 + }, + { + "epoch": 0.5306425041186161, + "grad_norm": 0.38232284784317017, + "learning_rate": 1.6746432207215217e-05, + "loss": 0.5182, + "step": 19326 + }, + { + "epoch": 0.5306699615595827, + "grad_norm": 0.45993366837501526, + "learning_rate": 1.6746113401406753e-05, + "loss": 0.4371, + "step": 19327 + }, + { + "epoch": 0.5306974190005491, + "grad_norm": 0.3306426703929901, + "learning_rate": 1.6745794583014636e-05, + "loss": 0.4711, + "step": 19328 + }, + { + "epoch": 0.5307248764415157, + "grad_norm": 0.39526742696762085, + "learning_rate": 1.674547575203946e-05, + "loss": 0.4176, + "step": 19329 + }, + { + "epoch": 0.5307523338824821, + "grad_norm": 0.4159373939037323, + "learning_rate": 1.674515690848182e-05, + "loss": 0.579, + "step": 19330 + }, + { + "epoch": 0.5307797913234487, + "grad_norm": 0.351931095123291, + "learning_rate": 1.6744838052342306e-05, + "loss": 0.5115, + "step": 19331 + }, + { + "epoch": 0.5308072487644152, + "grad_norm": 0.3727240264415741, + "learning_rate": 1.6744519183621514e-05, + "loss": 0.5881, + "step": 19332 + }, + { + "epoch": 0.5308347062053816, + "grad_norm": 0.35540154576301575, + "learning_rate": 1.6744200302320046e-05, + "loss": 0.5236, + "step": 19333 + }, + { + "epoch": 0.5308621636463482, + "grad_norm": 0.3660944700241089, + "learning_rate": 1.674388140843849e-05, + "loss": 0.5086, + "step": 19334 + }, + { + "epoch": 0.5308896210873146, + "grad_norm": 0.5373280644416809, + "learning_rate": 1.6743562501977443e-05, + "loss": 0.5284, + "step": 19335 + }, + { + "epoch": 0.5309170785282812, + "grad_norm": 0.36367037892341614, + "learning_rate": 1.6743243582937497e-05, + "loss": 0.4809, + "step": 19336 + }, + { + "epoch": 0.5309445359692476, + "grad_norm": 0.36581721901893616, + "learning_rate": 1.6742924651319253e-05, + "loss": 0.5139, + "step": 19337 + }, + { + "epoch": 0.5309719934102142, + "grad_norm": 0.43284475803375244, + "learning_rate": 1.6742605707123303e-05, + "loss": 0.5425, + "step": 19338 + }, + { + "epoch": 0.5309994508511807, + "grad_norm": 0.3975452780723572, + "learning_rate": 1.6742286750350237e-05, + "loss": 0.5604, + "step": 19339 + }, + { + "epoch": 0.5310269082921472, + "grad_norm": 0.4012303054332733, + "learning_rate": 1.6741967781000657e-05, + "loss": 0.5165, + "step": 19340 + }, + { + "epoch": 0.5310543657331137, + "grad_norm": 0.46749037504196167, + "learning_rate": 1.674164879907516e-05, + "loss": 0.4705, + "step": 19341 + }, + { + "epoch": 0.5310818231740801, + "grad_norm": 0.42247387766838074, + "learning_rate": 1.6741329804574328e-05, + "loss": 0.5521, + "step": 19342 + }, + { + "epoch": 0.5311092806150467, + "grad_norm": 0.4112529158592224, + "learning_rate": 1.674101079749877e-05, + "loss": 0.5556, + "step": 19343 + }, + { + "epoch": 0.5311367380560131, + "grad_norm": 0.42487868666648865, + "learning_rate": 1.674069177784907e-05, + "loss": 0.4801, + "step": 19344 + }, + { + "epoch": 0.5311641954969797, + "grad_norm": 0.3428756594657898, + "learning_rate": 1.6740372745625837e-05, + "loss": 0.4262, + "step": 19345 + }, + { + "epoch": 0.5311916529379462, + "grad_norm": 0.3145662546157837, + "learning_rate": 1.674005370082965e-05, + "loss": 0.37, + "step": 19346 + }, + { + "epoch": 0.5312191103789127, + "grad_norm": 0.402011901140213, + "learning_rate": 1.6739734643461116e-05, + "loss": 0.5044, + "step": 19347 + }, + { + "epoch": 0.5312465678198792, + "grad_norm": 0.3490923345088959, + "learning_rate": 1.6739415573520823e-05, + "loss": 0.5299, + "step": 19348 + }, + { + "epoch": 0.5312740252608457, + "grad_norm": 0.38590601086616516, + "learning_rate": 1.673909649100937e-05, + "loss": 0.5258, + "step": 19349 + }, + { + "epoch": 0.5313014827018122, + "grad_norm": 0.42892301082611084, + "learning_rate": 1.6738777395927354e-05, + "loss": 0.5804, + "step": 19350 + }, + { + "epoch": 0.5313289401427786, + "grad_norm": 0.3846324682235718, + "learning_rate": 1.6738458288275362e-05, + "loss": 0.562, + "step": 19351 + }, + { + "epoch": 0.5313563975837452, + "grad_norm": 0.3779344856739044, + "learning_rate": 1.6738139168053998e-05, + "loss": 0.5586, + "step": 19352 + }, + { + "epoch": 0.5313838550247117, + "grad_norm": 0.4812759757041931, + "learning_rate": 1.673782003526385e-05, + "loss": 0.6317, + "step": 19353 + }, + { + "epoch": 0.5314113124656782, + "grad_norm": 0.5619891285896301, + "learning_rate": 1.6737500889905524e-05, + "loss": 0.4823, + "step": 19354 + }, + { + "epoch": 0.5314387699066447, + "grad_norm": 0.4811290204524994, + "learning_rate": 1.6737181731979602e-05, + "loss": 0.57, + "step": 19355 + }, + { + "epoch": 0.5314662273476112, + "grad_norm": 0.4494136571884155, + "learning_rate": 1.673686256148669e-05, + "loss": 0.4852, + "step": 19356 + }, + { + "epoch": 0.5314936847885777, + "grad_norm": 0.40900710225105286, + "learning_rate": 1.6736543378427377e-05, + "loss": 0.497, + "step": 19357 + }, + { + "epoch": 0.5315211422295442, + "grad_norm": 0.3726123869419098, + "learning_rate": 1.673622418280226e-05, + "loss": 0.503, + "step": 19358 + }, + { + "epoch": 0.5315485996705107, + "grad_norm": 0.36603936553001404, + "learning_rate": 1.673590497461194e-05, + "loss": 0.4778, + "step": 19359 + }, + { + "epoch": 0.5315760571114773, + "grad_norm": 0.37542518973350525, + "learning_rate": 1.6735585753857e-05, + "loss": 0.457, + "step": 19360 + }, + { + "epoch": 0.5316035145524437, + "grad_norm": 0.3842017948627472, + "learning_rate": 1.6735266520538046e-05, + "loss": 0.5336, + "step": 19361 + }, + { + "epoch": 0.5316309719934103, + "grad_norm": 0.30286693572998047, + "learning_rate": 1.6734947274655674e-05, + "loss": 0.4092, + "step": 19362 + }, + { + "epoch": 0.5316584294343767, + "grad_norm": 0.38227716088294983, + "learning_rate": 1.6734628016210473e-05, + "loss": 0.4975, + "step": 19363 + }, + { + "epoch": 0.5316858868753432, + "grad_norm": 0.34778478741645813, + "learning_rate": 1.673430874520304e-05, + "loss": 0.4239, + "step": 19364 + }, + { + "epoch": 0.5317133443163097, + "grad_norm": 0.358067125082016, + "learning_rate": 1.6733989461633972e-05, + "loss": 0.5301, + "step": 19365 + }, + { + "epoch": 0.5317408017572762, + "grad_norm": 0.36643579602241516, + "learning_rate": 1.6733670165503865e-05, + "loss": 0.4524, + "step": 19366 + }, + { + "epoch": 0.5317682591982428, + "grad_norm": 0.3559860587120056, + "learning_rate": 1.6733350856813313e-05, + "loss": 0.5266, + "step": 19367 + }, + { + "epoch": 0.5317957166392092, + "grad_norm": 0.41223931312561035, + "learning_rate": 1.6733031535562916e-05, + "loss": 0.5905, + "step": 19368 + }, + { + "epoch": 0.5318231740801758, + "grad_norm": 0.3269866406917572, + "learning_rate": 1.6732712201753263e-05, + "loss": 0.4526, + "step": 19369 + }, + { + "epoch": 0.5318506315211422, + "grad_norm": 0.37068304419517517, + "learning_rate": 1.6732392855384954e-05, + "loss": 0.5972, + "step": 19370 + }, + { + "epoch": 0.5318780889621088, + "grad_norm": 0.38224315643310547, + "learning_rate": 1.6732073496458582e-05, + "loss": 0.5475, + "step": 19371 + }, + { + "epoch": 0.5319055464030752, + "grad_norm": 0.37334686517715454, + "learning_rate": 1.6731754124974747e-05, + "loss": 0.5157, + "step": 19372 + }, + { + "epoch": 0.5319330038440417, + "grad_norm": 0.36014029383659363, + "learning_rate": 1.673143474093404e-05, + "loss": 0.4956, + "step": 19373 + }, + { + "epoch": 0.5319604612850083, + "grad_norm": 0.39152592420578003, + "learning_rate": 1.673111534433706e-05, + "loss": 0.5721, + "step": 19374 + }, + { + "epoch": 0.5319879187259747, + "grad_norm": 0.4061623811721802, + "learning_rate": 1.67307959351844e-05, + "loss": 0.5032, + "step": 19375 + }, + { + "epoch": 0.5320153761669413, + "grad_norm": 0.3938007056713104, + "learning_rate": 1.673047651347666e-05, + "loss": 0.5489, + "step": 19376 + }, + { + "epoch": 0.5320428336079077, + "grad_norm": 0.3850138187408447, + "learning_rate": 1.6730157079214433e-05, + "loss": 0.4767, + "step": 19377 + }, + { + "epoch": 0.5320702910488743, + "grad_norm": 0.4848293364048004, + "learning_rate": 1.6729837632398313e-05, + "loss": 0.5507, + "step": 19378 + }, + { + "epoch": 0.5320977484898407, + "grad_norm": 0.4155327379703522, + "learning_rate": 1.6729518173028898e-05, + "loss": 0.5454, + "step": 19379 + }, + { + "epoch": 0.5321252059308073, + "grad_norm": 0.3578982651233673, + "learning_rate": 1.6729198701106782e-05, + "loss": 0.4317, + "step": 19380 + }, + { + "epoch": 0.5321526633717738, + "grad_norm": 0.387445867061615, + "learning_rate": 1.6728879216632567e-05, + "loss": 0.535, + "step": 19381 + }, + { + "epoch": 0.5321801208127402, + "grad_norm": 0.3569889962673187, + "learning_rate": 1.6728559719606844e-05, + "loss": 0.4938, + "step": 19382 + }, + { + "epoch": 0.5322075782537068, + "grad_norm": 0.3981384038925171, + "learning_rate": 1.6728240210030208e-05, + "loss": 0.5419, + "step": 19383 + }, + { + "epoch": 0.5322350356946732, + "grad_norm": 0.4326855540275574, + "learning_rate": 1.6727920687903257e-05, + "loss": 0.5005, + "step": 19384 + }, + { + "epoch": 0.5322624931356398, + "grad_norm": 0.3628528416156769, + "learning_rate": 1.6727601153226585e-05, + "loss": 0.5246, + "step": 19385 + }, + { + "epoch": 0.5322899505766062, + "grad_norm": 0.3546999394893646, + "learning_rate": 1.6727281606000794e-05, + "loss": 0.4722, + "step": 19386 + }, + { + "epoch": 0.5323174080175728, + "grad_norm": 0.3851064145565033, + "learning_rate": 1.672696204622647e-05, + "loss": 0.5097, + "step": 19387 + }, + { + "epoch": 0.5323448654585393, + "grad_norm": 0.4190930128097534, + "learning_rate": 1.672664247390422e-05, + "loss": 0.4946, + "step": 19388 + }, + { + "epoch": 0.5323723228995058, + "grad_norm": 0.42522090673446655, + "learning_rate": 1.672632288903463e-05, + "loss": 0.5349, + "step": 19389 + }, + { + "epoch": 0.5323997803404723, + "grad_norm": 0.36830562353134155, + "learning_rate": 1.6726003291618306e-05, + "loss": 0.4823, + "step": 19390 + }, + { + "epoch": 0.5324272377814387, + "grad_norm": 0.40661484003067017, + "learning_rate": 1.6725683681655834e-05, + "loss": 0.5418, + "step": 19391 + }, + { + "epoch": 0.5324546952224053, + "grad_norm": 0.40643957257270813, + "learning_rate": 1.672536405914782e-05, + "loss": 0.6065, + "step": 19392 + }, + { + "epoch": 0.5324821526633717, + "grad_norm": 0.3483487069606781, + "learning_rate": 1.6725044424094852e-05, + "loss": 0.5358, + "step": 19393 + }, + { + "epoch": 0.5325096101043383, + "grad_norm": 0.35646894574165344, + "learning_rate": 1.672472477649753e-05, + "loss": 0.4959, + "step": 19394 + }, + { + "epoch": 0.5325370675453048, + "grad_norm": 0.3634638786315918, + "learning_rate": 1.6724405116356454e-05, + "loss": 0.4514, + "step": 19395 + }, + { + "epoch": 0.5325645249862713, + "grad_norm": 0.389377236366272, + "learning_rate": 1.6724085443672216e-05, + "loss": 0.5676, + "step": 19396 + }, + { + "epoch": 0.5325919824272378, + "grad_norm": 0.36246708035469055, + "learning_rate": 1.672376575844541e-05, + "loss": 0.4794, + "step": 19397 + }, + { + "epoch": 0.5326194398682043, + "grad_norm": 0.3850906789302826, + "learning_rate": 1.6723446060676635e-05, + "loss": 0.4898, + "step": 19398 + }, + { + "epoch": 0.5326468973091708, + "grad_norm": 0.3455125093460083, + "learning_rate": 1.6723126350366485e-05, + "loss": 0.4897, + "step": 19399 + }, + { + "epoch": 0.5326743547501372, + "grad_norm": 0.3611743450164795, + "learning_rate": 1.672280662751556e-05, + "loss": 0.4593, + "step": 19400 + }, + { + "epoch": 0.5327018121911038, + "grad_norm": 0.38606566190719604, + "learning_rate": 1.6722486892124458e-05, + "loss": 0.5685, + "step": 19401 + }, + { + "epoch": 0.5327292696320703, + "grad_norm": 0.39980944991111755, + "learning_rate": 1.6722167144193773e-05, + "loss": 0.5342, + "step": 19402 + }, + { + "epoch": 0.5327567270730368, + "grad_norm": 0.4139919579029083, + "learning_rate": 1.6721847383724097e-05, + "loss": 0.4493, + "step": 19403 + }, + { + "epoch": 0.5327841845140033, + "grad_norm": 0.5198220014572144, + "learning_rate": 1.672152761071603e-05, + "loss": 0.4518, + "step": 19404 + }, + { + "epoch": 0.5328116419549698, + "grad_norm": 0.3754253089427948, + "learning_rate": 1.672120782517017e-05, + "loss": 0.5559, + "step": 19405 + }, + { + "epoch": 0.5328390993959363, + "grad_norm": 0.42607948184013367, + "learning_rate": 1.6720888027087112e-05, + "loss": 0.5164, + "step": 19406 + }, + { + "epoch": 0.5328665568369028, + "grad_norm": 0.4902487099170685, + "learning_rate": 1.6720568216467455e-05, + "loss": 0.5572, + "step": 19407 + }, + { + "epoch": 0.5328940142778693, + "grad_norm": 0.36007651686668396, + "learning_rate": 1.6720248393311795e-05, + "loss": 0.481, + "step": 19408 + }, + { + "epoch": 0.5329214717188359, + "grad_norm": 0.4118850529193878, + "learning_rate": 1.671992855762072e-05, + "loss": 0.5233, + "step": 19409 + }, + { + "epoch": 0.5329489291598023, + "grad_norm": 0.5909167528152466, + "learning_rate": 1.6719608709394837e-05, + "loss": 0.5569, + "step": 19410 + }, + { + "epoch": 0.5329763866007688, + "grad_norm": 0.42809680104255676, + "learning_rate": 1.6719288848634736e-05, + "loss": 0.5395, + "step": 19411 + }, + { + "epoch": 0.5330038440417353, + "grad_norm": 0.36552363634109497, + "learning_rate": 1.671896897534102e-05, + "loss": 0.4904, + "step": 19412 + }, + { + "epoch": 0.5330313014827018, + "grad_norm": 0.4246695935726166, + "learning_rate": 1.6718649089514286e-05, + "loss": 0.4116, + "step": 19413 + }, + { + "epoch": 0.5330587589236683, + "grad_norm": 0.353443443775177, + "learning_rate": 1.671832919115512e-05, + "loss": 0.5022, + "step": 19414 + }, + { + "epoch": 0.5330862163646348, + "grad_norm": 0.4004979729652405, + "learning_rate": 1.671800928026413e-05, + "loss": 0.6082, + "step": 19415 + }, + { + "epoch": 0.5331136738056013, + "grad_norm": 0.38645175099372864, + "learning_rate": 1.671768935684191e-05, + "loss": 0.5039, + "step": 19416 + }, + { + "epoch": 0.5331411312465678, + "grad_norm": 0.3336629867553711, + "learning_rate": 1.671736942088905e-05, + "loss": 0.5085, + "step": 19417 + }, + { + "epoch": 0.5331685886875344, + "grad_norm": 0.3915872275829315, + "learning_rate": 1.6717049472406155e-05, + "loss": 0.4709, + "step": 19418 + }, + { + "epoch": 0.5331960461285008, + "grad_norm": 0.3691037893295288, + "learning_rate": 1.6716729511393822e-05, + "loss": 0.5088, + "step": 19419 + }, + { + "epoch": 0.5332235035694673, + "grad_norm": 0.35532331466674805, + "learning_rate": 1.671640953785264e-05, + "loss": 0.4534, + "step": 19420 + }, + { + "epoch": 0.5332509610104338, + "grad_norm": 0.3765157163143158, + "learning_rate": 1.6716089551783212e-05, + "loss": 0.6152, + "step": 19421 + }, + { + "epoch": 0.5332784184514003, + "grad_norm": 0.3504530191421509, + "learning_rate": 1.671576955318613e-05, + "loss": 0.4223, + "step": 19422 + }, + { + "epoch": 0.5333058758923668, + "grad_norm": 0.34128376841545105, + "learning_rate": 1.6715449542062004e-05, + "loss": 0.4331, + "step": 19423 + }, + { + "epoch": 0.5333333333333333, + "grad_norm": 0.3494567275047302, + "learning_rate": 1.6715129518411412e-05, + "loss": 0.5329, + "step": 19424 + }, + { + "epoch": 0.5333607907742999, + "grad_norm": 0.35481521487236023, + "learning_rate": 1.6714809482234965e-05, + "loss": 0.5924, + "step": 19425 + }, + { + "epoch": 0.5333882482152663, + "grad_norm": 0.3485960066318512, + "learning_rate": 1.6714489433533252e-05, + "loss": 0.5248, + "step": 19426 + }, + { + "epoch": 0.5334157056562329, + "grad_norm": 0.38447362184524536, + "learning_rate": 1.6714169372306876e-05, + "loss": 0.4684, + "step": 19427 + }, + { + "epoch": 0.5334431630971993, + "grad_norm": 0.3922148644924164, + "learning_rate": 1.671384929855643e-05, + "loss": 0.514, + "step": 19428 + }, + { + "epoch": 0.5334706205381659, + "grad_norm": 0.37937527894973755, + "learning_rate": 1.6713529212282514e-05, + "loss": 0.521, + "step": 19429 + }, + { + "epoch": 0.5334980779791323, + "grad_norm": 0.37518739700317383, + "learning_rate": 1.671320911348572e-05, + "loss": 0.534, + "step": 19430 + }, + { + "epoch": 0.5335255354200988, + "grad_norm": 0.40079787373542786, + "learning_rate": 1.671288900216665e-05, + "loss": 0.5135, + "step": 19431 + }, + { + "epoch": 0.5335529928610654, + "grad_norm": 1.6351467370986938, + "learning_rate": 1.6712568878325897e-05, + "loss": 0.5528, + "step": 19432 + }, + { + "epoch": 0.5335804503020318, + "grad_norm": 0.4146071970462799, + "learning_rate": 1.6712248741964067e-05, + "loss": 0.5319, + "step": 19433 + }, + { + "epoch": 0.5336079077429984, + "grad_norm": 0.36332717537879944, + "learning_rate": 1.6711928593081744e-05, + "loss": 0.4655, + "step": 19434 + }, + { + "epoch": 0.5336353651839648, + "grad_norm": 0.3975016474723816, + "learning_rate": 1.6711608431679536e-05, + "loss": 0.4954, + "step": 19435 + }, + { + "epoch": 0.5336628226249314, + "grad_norm": 0.37246260046958923, + "learning_rate": 1.671128825775804e-05, + "loss": 0.5373, + "step": 19436 + }, + { + "epoch": 0.5336902800658978, + "grad_norm": 0.5320195555686951, + "learning_rate": 1.6710968071317842e-05, + "loss": 0.5725, + "step": 19437 + }, + { + "epoch": 0.5337177375068644, + "grad_norm": 0.3479880690574646, + "learning_rate": 1.6710647872359548e-05, + "loss": 0.5159, + "step": 19438 + }, + { + "epoch": 0.5337451949478309, + "grad_norm": 0.37756481766700745, + "learning_rate": 1.6710327660883758e-05, + "loss": 0.5319, + "step": 19439 + }, + { + "epoch": 0.5337726523887973, + "grad_norm": 0.3727855682373047, + "learning_rate": 1.671000743689106e-05, + "loss": 0.4487, + "step": 19440 + }, + { + "epoch": 0.5338001098297639, + "grad_norm": 0.33395010232925415, + "learning_rate": 1.6709687200382057e-05, + "loss": 0.4525, + "step": 19441 + }, + { + "epoch": 0.5338275672707303, + "grad_norm": 0.37841561436653137, + "learning_rate": 1.670936695135735e-05, + "loss": 0.4651, + "step": 19442 + }, + { + "epoch": 0.5338550247116969, + "grad_norm": 0.4073631465435028, + "learning_rate": 1.6709046689817528e-05, + "loss": 0.4926, + "step": 19443 + }, + { + "epoch": 0.5338824821526633, + "grad_norm": 0.36022499203681946, + "learning_rate": 1.6708726415763197e-05, + "loss": 0.5275, + "step": 19444 + }, + { + "epoch": 0.5339099395936299, + "grad_norm": 0.4838637411594391, + "learning_rate": 1.6708406129194948e-05, + "loss": 0.5518, + "step": 19445 + }, + { + "epoch": 0.5339373970345964, + "grad_norm": 0.4033936858177185, + "learning_rate": 1.6708085830113382e-05, + "loss": 0.5515, + "step": 19446 + }, + { + "epoch": 0.5339648544755629, + "grad_norm": 0.44100967049598694, + "learning_rate": 1.6707765518519093e-05, + "loss": 0.5942, + "step": 19447 + }, + { + "epoch": 0.5339923119165294, + "grad_norm": 0.46947649121284485, + "learning_rate": 1.6707445194412678e-05, + "loss": 0.5428, + "step": 19448 + }, + { + "epoch": 0.5340197693574958, + "grad_norm": 0.35154369473457336, + "learning_rate": 1.6707124857794742e-05, + "loss": 0.45, + "step": 19449 + }, + { + "epoch": 0.5340472267984624, + "grad_norm": 0.38932543992996216, + "learning_rate": 1.6706804508665872e-05, + "loss": 0.5022, + "step": 19450 + }, + { + "epoch": 0.5340746842394288, + "grad_norm": 0.4461614191532135, + "learning_rate": 1.6706484147026678e-05, + "loss": 0.4794, + "step": 19451 + }, + { + "epoch": 0.5341021416803954, + "grad_norm": 0.3579641878604889, + "learning_rate": 1.6706163772877745e-05, + "loss": 0.4962, + "step": 19452 + }, + { + "epoch": 0.5341295991213619, + "grad_norm": 0.4525510370731354, + "learning_rate": 1.6705843386219678e-05, + "loss": 0.5302, + "step": 19453 + }, + { + "epoch": 0.5341570565623284, + "grad_norm": 0.3582257330417633, + "learning_rate": 1.6705522987053072e-05, + "loss": 0.4275, + "step": 19454 + }, + { + "epoch": 0.5341845140032949, + "grad_norm": 0.598111629486084, + "learning_rate": 1.6705202575378527e-05, + "loss": 0.6786, + "step": 19455 + }, + { + "epoch": 0.5342119714442614, + "grad_norm": 0.34954091906547546, + "learning_rate": 1.670488215119664e-05, + "loss": 0.5039, + "step": 19456 + }, + { + "epoch": 0.5342394288852279, + "grad_norm": 0.38412410020828247, + "learning_rate": 1.6704561714508005e-05, + "loss": 0.5333, + "step": 19457 + }, + { + "epoch": 0.5342668863261943, + "grad_norm": 0.35423600673675537, + "learning_rate": 1.670424126531322e-05, + "loss": 0.4699, + "step": 19458 + }, + { + "epoch": 0.5342943437671609, + "grad_norm": 0.3286384046077728, + "learning_rate": 1.6703920803612892e-05, + "loss": 0.4765, + "step": 19459 + }, + { + "epoch": 0.5343218012081274, + "grad_norm": 0.39541956782341003, + "learning_rate": 1.6703600329407607e-05, + "loss": 0.5133, + "step": 19460 + }, + { + "epoch": 0.5343492586490939, + "grad_norm": 0.41839295625686646, + "learning_rate": 1.6703279842697974e-05, + "loss": 0.5174, + "step": 19461 + }, + { + "epoch": 0.5343767160900604, + "grad_norm": 0.33179572224617004, + "learning_rate": 1.670295934348458e-05, + "loss": 0.3967, + "step": 19462 + }, + { + "epoch": 0.5344041735310269, + "grad_norm": 0.45312127470970154, + "learning_rate": 1.6702638831768027e-05, + "loss": 0.5373, + "step": 19463 + }, + { + "epoch": 0.5344316309719934, + "grad_norm": 0.32612162828445435, + "learning_rate": 1.6702318307548915e-05, + "loss": 0.4661, + "step": 19464 + }, + { + "epoch": 0.5344590884129599, + "grad_norm": 0.3838077187538147, + "learning_rate": 1.6701997770827838e-05, + "loss": 0.5123, + "step": 19465 + }, + { + "epoch": 0.5344865458539264, + "grad_norm": 0.41694149374961853, + "learning_rate": 1.6701677221605398e-05, + "loss": 0.5021, + "step": 19466 + }, + { + "epoch": 0.534514003294893, + "grad_norm": 0.3411122262477875, + "learning_rate": 1.6701356659882192e-05, + "loss": 0.4493, + "step": 19467 + }, + { + "epoch": 0.5345414607358594, + "grad_norm": 0.373210072517395, + "learning_rate": 1.6701036085658816e-05, + "loss": 0.4359, + "step": 19468 + }, + { + "epoch": 0.534568918176826, + "grad_norm": 0.3401247560977936, + "learning_rate": 1.670071549893587e-05, + "loss": 0.5228, + "step": 19469 + }, + { + "epoch": 0.5345963756177924, + "grad_norm": 0.36342838406562805, + "learning_rate": 1.670039489971395e-05, + "loss": 0.478, + "step": 19470 + }, + { + "epoch": 0.5346238330587589, + "grad_norm": 0.32962462306022644, + "learning_rate": 1.6700074287993654e-05, + "loss": 0.4443, + "step": 19471 + }, + { + "epoch": 0.5346512904997254, + "grad_norm": 0.4055817127227783, + "learning_rate": 1.669975366377558e-05, + "loss": 0.5739, + "step": 19472 + }, + { + "epoch": 0.5346787479406919, + "grad_norm": 0.3414079546928406, + "learning_rate": 1.669943302706033e-05, + "loss": 0.4437, + "step": 19473 + }, + { + "epoch": 0.5347062053816585, + "grad_norm": 0.397477388381958, + "learning_rate": 1.6699112377848496e-05, + "loss": 0.6548, + "step": 19474 + }, + { + "epoch": 0.5347336628226249, + "grad_norm": 0.3588618040084839, + "learning_rate": 1.6698791716140684e-05, + "loss": 0.4869, + "step": 19475 + }, + { + "epoch": 0.5347611202635915, + "grad_norm": 0.39268118143081665, + "learning_rate": 1.6698471041937482e-05, + "loss": 0.485, + "step": 19476 + }, + { + "epoch": 0.5347885777045579, + "grad_norm": 0.33474844694137573, + "learning_rate": 1.66981503552395e-05, + "loss": 0.462, + "step": 19477 + }, + { + "epoch": 0.5348160351455244, + "grad_norm": 0.3634570240974426, + "learning_rate": 1.6697829656047323e-05, + "loss": 0.5039, + "step": 19478 + }, + { + "epoch": 0.5348434925864909, + "grad_norm": 0.369048148393631, + "learning_rate": 1.669750894436156e-05, + "loss": 0.5159, + "step": 19479 + }, + { + "epoch": 0.5348709500274574, + "grad_norm": 0.3656540811061859, + "learning_rate": 1.6697188220182807e-05, + "loss": 0.5153, + "step": 19480 + }, + { + "epoch": 0.534898407468424, + "grad_norm": 0.35698211193084717, + "learning_rate": 1.6696867483511657e-05, + "loss": 0.5303, + "step": 19481 + }, + { + "epoch": 0.5349258649093904, + "grad_norm": 0.37191689014434814, + "learning_rate": 1.6696546734348713e-05, + "loss": 0.4961, + "step": 19482 + }, + { + "epoch": 0.534953322350357, + "grad_norm": 0.3809208273887634, + "learning_rate": 1.6696225972694574e-05, + "loss": 0.5134, + "step": 19483 + }, + { + "epoch": 0.5349807797913234, + "grad_norm": 0.36249545216560364, + "learning_rate": 1.669590519854983e-05, + "loss": 0.5125, + "step": 19484 + }, + { + "epoch": 0.53500823723229, + "grad_norm": 0.4077986776828766, + "learning_rate": 1.669558441191509e-05, + "loss": 0.493, + "step": 19485 + }, + { + "epoch": 0.5350356946732564, + "grad_norm": 0.34413641691207886, + "learning_rate": 1.669526361279095e-05, + "loss": 0.4461, + "step": 19486 + }, + { + "epoch": 0.535063152114223, + "grad_norm": 0.3963479995727539, + "learning_rate": 1.6694942801178005e-05, + "loss": 0.5726, + "step": 19487 + }, + { + "epoch": 0.5350906095551895, + "grad_norm": 0.41516783833503723, + "learning_rate": 1.6694621977076854e-05, + "loss": 0.6221, + "step": 19488 + }, + { + "epoch": 0.5351180669961559, + "grad_norm": 0.37626516819000244, + "learning_rate": 1.6694301140488095e-05, + "loss": 0.508, + "step": 19489 + }, + { + "epoch": 0.5351455244371225, + "grad_norm": 0.5348260402679443, + "learning_rate": 1.6693980291412333e-05, + "loss": 0.4481, + "step": 19490 + }, + { + "epoch": 0.5351729818780889, + "grad_norm": 0.4007846713066101, + "learning_rate": 1.6693659429850156e-05, + "loss": 0.5339, + "step": 19491 + }, + { + "epoch": 0.5352004393190555, + "grad_norm": 0.3535490930080414, + "learning_rate": 1.6693338555802168e-05, + "loss": 0.4733, + "step": 19492 + }, + { + "epoch": 0.5352278967600219, + "grad_norm": 0.35954803228378296, + "learning_rate": 1.6693017669268972e-05, + "loss": 0.4199, + "step": 19493 + }, + { + "epoch": 0.5352553542009885, + "grad_norm": 0.4301975965499878, + "learning_rate": 1.6692696770251155e-05, + "loss": 0.537, + "step": 19494 + }, + { + "epoch": 0.535282811641955, + "grad_norm": 0.3862122893333435, + "learning_rate": 1.669237585874933e-05, + "loss": 0.4806, + "step": 19495 + }, + { + "epoch": 0.5353102690829215, + "grad_norm": 0.3595523238182068, + "learning_rate": 1.6692054934764083e-05, + "loss": 0.5402, + "step": 19496 + }, + { + "epoch": 0.535337726523888, + "grad_norm": 0.36400356888771057, + "learning_rate": 1.6691733998296018e-05, + "loss": 0.4536, + "step": 19497 + }, + { + "epoch": 0.5353651839648544, + "grad_norm": 0.4129052758216858, + "learning_rate": 1.6691413049345734e-05, + "loss": 0.4403, + "step": 19498 + }, + { + "epoch": 0.535392641405821, + "grad_norm": 0.3922627568244934, + "learning_rate": 1.669109208791383e-05, + "loss": 0.5433, + "step": 19499 + }, + { + "epoch": 0.5354200988467874, + "grad_norm": 0.41947463154792786, + "learning_rate": 1.6690771114000904e-05, + "loss": 0.4916, + "step": 19500 + }, + { + "epoch": 0.535447556287754, + "grad_norm": 0.39780300855636597, + "learning_rate": 1.6690450127607555e-05, + "loss": 0.588, + "step": 19501 + }, + { + "epoch": 0.5354750137287205, + "grad_norm": 0.3834473490715027, + "learning_rate": 1.669012912873438e-05, + "loss": 0.5678, + "step": 19502 + }, + { + "epoch": 0.535502471169687, + "grad_norm": 0.36770862340927124, + "learning_rate": 1.6689808117381978e-05, + "loss": 0.4926, + "step": 19503 + }, + { + "epoch": 0.5355299286106535, + "grad_norm": 0.35598456859588623, + "learning_rate": 1.6689487093550948e-05, + "loss": 0.4897, + "step": 19504 + }, + { + "epoch": 0.53555738605162, + "grad_norm": 0.3710547089576721, + "learning_rate": 1.668916605724189e-05, + "loss": 0.5387, + "step": 19505 + }, + { + "epoch": 0.5355848434925865, + "grad_norm": 0.33104440569877625, + "learning_rate": 1.6688845008455406e-05, + "loss": 0.4598, + "step": 19506 + }, + { + "epoch": 0.5356123009335529, + "grad_norm": 0.3871075212955475, + "learning_rate": 1.6688523947192087e-05, + "loss": 0.5331, + "step": 19507 + }, + { + "epoch": 0.5356397583745195, + "grad_norm": 0.3626008927822113, + "learning_rate": 1.6688202873452538e-05, + "loss": 0.4945, + "step": 19508 + }, + { + "epoch": 0.535667215815486, + "grad_norm": 0.40361687541007996, + "learning_rate": 1.6687881787237352e-05, + "loss": 0.6209, + "step": 19509 + }, + { + "epoch": 0.5356946732564525, + "grad_norm": 0.3698068857192993, + "learning_rate": 1.6687560688547137e-05, + "loss": 0.4905, + "step": 19510 + }, + { + "epoch": 0.535722130697419, + "grad_norm": 0.39064499735832214, + "learning_rate": 1.6687239577382487e-05, + "loss": 0.5625, + "step": 19511 + }, + { + "epoch": 0.5357495881383855, + "grad_norm": 0.3648398220539093, + "learning_rate": 1.6686918453744e-05, + "loss": 0.4794, + "step": 19512 + }, + { + "epoch": 0.535777045579352, + "grad_norm": 0.3869546949863434, + "learning_rate": 1.6686597317632275e-05, + "loss": 0.4639, + "step": 19513 + }, + { + "epoch": 0.5358045030203185, + "grad_norm": 0.3686214089393616, + "learning_rate": 1.668627616904791e-05, + "loss": 0.4809, + "step": 19514 + }, + { + "epoch": 0.535831960461285, + "grad_norm": 0.47000452876091003, + "learning_rate": 1.6685955007991508e-05, + "loss": 0.6073, + "step": 19515 + }, + { + "epoch": 0.5358594179022516, + "grad_norm": 0.4056825637817383, + "learning_rate": 1.6685633834463665e-05, + "loss": 0.5532, + "step": 19516 + }, + { + "epoch": 0.535886875343218, + "grad_norm": 0.4026842713356018, + "learning_rate": 1.6685312648464982e-05, + "loss": 0.4995, + "step": 19517 + }, + { + "epoch": 0.5359143327841845, + "grad_norm": 0.3878988027572632, + "learning_rate": 1.668499144999606e-05, + "loss": 0.4169, + "step": 19518 + }, + { + "epoch": 0.535941790225151, + "grad_norm": 0.3538348972797394, + "learning_rate": 1.6684670239057493e-05, + "loss": 0.449, + "step": 19519 + }, + { + "epoch": 0.5359692476661175, + "grad_norm": 1.2887301445007324, + "learning_rate": 1.6684349015649882e-05, + "loss": 0.5159, + "step": 19520 + }, + { + "epoch": 0.535996705107084, + "grad_norm": 0.372111976146698, + "learning_rate": 1.6684027779773827e-05, + "loss": 0.4869, + "step": 19521 + }, + { + "epoch": 0.5360241625480505, + "grad_norm": 0.3894801139831543, + "learning_rate": 1.6683706531429925e-05, + "loss": 0.4879, + "step": 19522 + }, + { + "epoch": 0.5360516199890171, + "grad_norm": 0.38229602575302124, + "learning_rate": 1.668338527061878e-05, + "loss": 0.5506, + "step": 19523 + }, + { + "epoch": 0.5360790774299835, + "grad_norm": 0.37080323696136475, + "learning_rate": 1.668306399734099e-05, + "loss": 0.5202, + "step": 19524 + }, + { + "epoch": 0.5361065348709501, + "grad_norm": 0.4076196551322937, + "learning_rate": 1.6682742711597146e-05, + "loss": 0.4917, + "step": 19525 + }, + { + "epoch": 0.5361339923119165, + "grad_norm": 0.501453697681427, + "learning_rate": 1.6682421413387856e-05, + "loss": 0.4904, + "step": 19526 + }, + { + "epoch": 0.536161449752883, + "grad_norm": 0.39683592319488525, + "learning_rate": 1.668210010271372e-05, + "loss": 0.5252, + "step": 19527 + }, + { + "epoch": 0.5361889071938495, + "grad_norm": 0.36515218019485474, + "learning_rate": 1.6681778779575335e-05, + "loss": 0.5096, + "step": 19528 + }, + { + "epoch": 0.536216364634816, + "grad_norm": 0.36251187324523926, + "learning_rate": 1.6681457443973297e-05, + "loss": 0.5489, + "step": 19529 + }, + { + "epoch": 0.5362438220757826, + "grad_norm": 0.34332624077796936, + "learning_rate": 1.668113609590821e-05, + "loss": 0.4719, + "step": 19530 + }, + { + "epoch": 0.536271279516749, + "grad_norm": 0.35295575857162476, + "learning_rate": 1.6680814735380672e-05, + "loss": 0.4415, + "step": 19531 + }, + { + "epoch": 0.5362987369577156, + "grad_norm": 0.3575795888900757, + "learning_rate": 1.6680493362391284e-05, + "loss": 0.5676, + "step": 19532 + }, + { + "epoch": 0.536326194398682, + "grad_norm": 0.5017375946044922, + "learning_rate": 1.668017197694064e-05, + "loss": 0.4377, + "step": 19533 + }, + { + "epoch": 0.5363536518396486, + "grad_norm": 0.37745586037635803, + "learning_rate": 1.6679850579029347e-05, + "loss": 0.4742, + "step": 19534 + }, + { + "epoch": 0.536381109280615, + "grad_norm": 0.36235329508781433, + "learning_rate": 1.6679529168657996e-05, + "loss": 0.5173, + "step": 19535 + }, + { + "epoch": 0.5364085667215815, + "grad_norm": 0.5407199263572693, + "learning_rate": 1.6679207745827195e-05, + "loss": 0.5166, + "step": 19536 + }, + { + "epoch": 0.5364360241625481, + "grad_norm": 0.3663750886917114, + "learning_rate": 1.6678886310537537e-05, + "loss": 0.5077, + "step": 19537 + }, + { + "epoch": 0.5364634816035145, + "grad_norm": 0.3933117985725403, + "learning_rate": 1.6678564862789632e-05, + "loss": 0.5631, + "step": 19538 + }, + { + "epoch": 0.5364909390444811, + "grad_norm": 0.3892660439014435, + "learning_rate": 1.6678243402584063e-05, + "loss": 0.5211, + "step": 19539 + }, + { + "epoch": 0.5365183964854475, + "grad_norm": 0.3741360604763031, + "learning_rate": 1.6677921929921443e-05, + "loss": 0.566, + "step": 19540 + }, + { + "epoch": 0.5365458539264141, + "grad_norm": 0.358063668012619, + "learning_rate": 1.6677600444802365e-05, + "loss": 0.5631, + "step": 19541 + }, + { + "epoch": 0.5365733113673805, + "grad_norm": 0.3932938277721405, + "learning_rate": 1.6677278947227435e-05, + "loss": 0.5006, + "step": 19542 + }, + { + "epoch": 0.5366007688083471, + "grad_norm": 0.395263135433197, + "learning_rate": 1.6676957437197244e-05, + "loss": 0.5385, + "step": 19543 + }, + { + "epoch": 0.5366282262493136, + "grad_norm": 0.3611341416835785, + "learning_rate": 1.66766359147124e-05, + "loss": 0.5269, + "step": 19544 + }, + { + "epoch": 0.53665568369028, + "grad_norm": 0.46949702501296997, + "learning_rate": 1.6676314379773497e-05, + "loss": 0.5801, + "step": 19545 + }, + { + "epoch": 0.5366831411312466, + "grad_norm": 0.38301512598991394, + "learning_rate": 1.6675992832381133e-05, + "loss": 0.5956, + "step": 19546 + }, + { + "epoch": 0.536710598572213, + "grad_norm": 0.4478590488433838, + "learning_rate": 1.6675671272535918e-05, + "loss": 0.4952, + "step": 19547 + }, + { + "epoch": 0.5367380560131796, + "grad_norm": 0.3638934791088104, + "learning_rate": 1.6675349700238438e-05, + "loss": 0.4479, + "step": 19548 + }, + { + "epoch": 0.536765513454146, + "grad_norm": 0.38088804483413696, + "learning_rate": 1.667502811548931e-05, + "loss": 0.5368, + "step": 19549 + }, + { + "epoch": 0.5367929708951126, + "grad_norm": 0.3511832654476166, + "learning_rate": 1.6674706518289118e-05, + "loss": 0.4467, + "step": 19550 + }, + { + "epoch": 0.5368204283360791, + "grad_norm": 0.43620362877845764, + "learning_rate": 1.6674384908638467e-05, + "loss": 0.494, + "step": 19551 + }, + { + "epoch": 0.5368478857770456, + "grad_norm": 0.4272417426109314, + "learning_rate": 1.6674063286537964e-05, + "loss": 0.5377, + "step": 19552 + }, + { + "epoch": 0.5368753432180121, + "grad_norm": 0.4310001730918884, + "learning_rate": 1.6673741651988197e-05, + "loss": 0.553, + "step": 19553 + }, + { + "epoch": 0.5369028006589786, + "grad_norm": 0.43435004353523254, + "learning_rate": 1.6673420004989776e-05, + "loss": 0.5623, + "step": 19554 + }, + { + "epoch": 0.5369302580999451, + "grad_norm": 0.3597783148288727, + "learning_rate": 1.6673098345543295e-05, + "loss": 0.5213, + "step": 19555 + }, + { + "epoch": 0.5369577155409115, + "grad_norm": 0.4145207703113556, + "learning_rate": 1.6672776673649353e-05, + "loss": 0.501, + "step": 19556 + }, + { + "epoch": 0.5369851729818781, + "grad_norm": 0.40807145833969116, + "learning_rate": 1.6672454989308557e-05, + "loss": 0.5411, + "step": 19557 + }, + { + "epoch": 0.5370126304228446, + "grad_norm": 0.3861338496208191, + "learning_rate": 1.66721332925215e-05, + "loss": 0.4544, + "step": 19558 + }, + { + "epoch": 0.5370400878638111, + "grad_norm": 0.3859710991382599, + "learning_rate": 1.6671811583288787e-05, + "loss": 0.4696, + "step": 19559 + }, + { + "epoch": 0.5370675453047776, + "grad_norm": 0.3972319960594177, + "learning_rate": 1.6671489861611016e-05, + "loss": 0.5156, + "step": 19560 + }, + { + "epoch": 0.5370950027457441, + "grad_norm": 0.3845534324645996, + "learning_rate": 1.6671168127488785e-05, + "loss": 0.6021, + "step": 19561 + }, + { + "epoch": 0.5371224601867106, + "grad_norm": 0.33772993087768555, + "learning_rate": 1.66708463809227e-05, + "loss": 0.4551, + "step": 19562 + }, + { + "epoch": 0.537149917627677, + "grad_norm": 0.37826231122016907, + "learning_rate": 1.6670524621913357e-05, + "loss": 0.4793, + "step": 19563 + }, + { + "epoch": 0.5371773750686436, + "grad_norm": 0.6146863698959351, + "learning_rate": 1.667020285046135e-05, + "loss": 0.4942, + "step": 19564 + }, + { + "epoch": 0.5372048325096102, + "grad_norm": 0.37128037214279175, + "learning_rate": 1.6669881066567292e-05, + "loss": 0.4819, + "step": 19565 + }, + { + "epoch": 0.5372322899505766, + "grad_norm": 0.3733628988265991, + "learning_rate": 1.6669559270231776e-05, + "loss": 0.4395, + "step": 19566 + }, + { + "epoch": 0.5372597473915431, + "grad_norm": 0.5011045336723328, + "learning_rate": 1.6669237461455402e-05, + "loss": 0.5421, + "step": 19567 + }, + { + "epoch": 0.5372872048325096, + "grad_norm": 0.35989755392074585, + "learning_rate": 1.666891564023877e-05, + "loss": 0.4673, + "step": 19568 + }, + { + "epoch": 0.5373146622734761, + "grad_norm": 0.3490027189254761, + "learning_rate": 1.6668593806582485e-05, + "loss": 0.5623, + "step": 19569 + }, + { + "epoch": 0.5373421197144426, + "grad_norm": 0.3739790618419647, + "learning_rate": 1.6668271960487144e-05, + "loss": 0.4394, + "step": 19570 + }, + { + "epoch": 0.5373695771554091, + "grad_norm": 0.37620607018470764, + "learning_rate": 1.6667950101953345e-05, + "loss": 0.6586, + "step": 19571 + }, + { + "epoch": 0.5373970345963757, + "grad_norm": 0.3925766050815582, + "learning_rate": 1.6667628230981693e-05, + "loss": 0.5538, + "step": 19572 + }, + { + "epoch": 0.5374244920373421, + "grad_norm": 0.38071608543395996, + "learning_rate": 1.6667306347572786e-05, + "loss": 0.4397, + "step": 19573 + }, + { + "epoch": 0.5374519494783087, + "grad_norm": 0.32436901330947876, + "learning_rate": 1.6666984451727226e-05, + "loss": 0.4069, + "step": 19574 + }, + { + "epoch": 0.5374794069192751, + "grad_norm": 0.41464418172836304, + "learning_rate": 1.6666662543445612e-05, + "loss": 0.5224, + "step": 19575 + }, + { + "epoch": 0.5375068643602416, + "grad_norm": 0.3798460066318512, + "learning_rate": 1.6666340622728543e-05, + "loss": 0.5785, + "step": 19576 + }, + { + "epoch": 0.5375343218012081, + "grad_norm": 0.35782644152641296, + "learning_rate": 1.666601868957662e-05, + "loss": 0.5091, + "step": 19577 + }, + { + "epoch": 0.5375617792421746, + "grad_norm": 0.3931209444999695, + "learning_rate": 1.666569674399045e-05, + "loss": 0.5409, + "step": 19578 + }, + { + "epoch": 0.5375892366831412, + "grad_norm": 0.3354811668395996, + "learning_rate": 1.666537478597062e-05, + "loss": 0.4652, + "step": 19579 + }, + { + "epoch": 0.5376166941241076, + "grad_norm": 0.4022499918937683, + "learning_rate": 1.6665052815517744e-05, + "loss": 0.5398, + "step": 19580 + }, + { + "epoch": 0.5376441515650742, + "grad_norm": 0.3516586422920227, + "learning_rate": 1.6664730832632417e-05, + "loss": 0.4691, + "step": 19581 + }, + { + "epoch": 0.5376716090060406, + "grad_norm": 0.4316345155239105, + "learning_rate": 1.6664408837315238e-05, + "loss": 0.56, + "step": 19582 + }, + { + "epoch": 0.5376990664470072, + "grad_norm": 0.3638538420200348, + "learning_rate": 1.6664086829566813e-05, + "loss": 0.5327, + "step": 19583 + }, + { + "epoch": 0.5377265238879736, + "grad_norm": 0.38836950063705444, + "learning_rate": 1.6663764809387736e-05, + "loss": 0.5134, + "step": 19584 + }, + { + "epoch": 0.5377539813289401, + "grad_norm": 0.4173794686794281, + "learning_rate": 1.666344277677861e-05, + "loss": 0.5892, + "step": 19585 + }, + { + "epoch": 0.5377814387699067, + "grad_norm": 0.37348833680152893, + "learning_rate": 1.6663120731740038e-05, + "loss": 0.56, + "step": 19586 + }, + { + "epoch": 0.5378088962108731, + "grad_norm": 0.3790948987007141, + "learning_rate": 1.666279867427262e-05, + "loss": 0.4763, + "step": 19587 + }, + { + "epoch": 0.5378363536518397, + "grad_norm": 0.37248486280441284, + "learning_rate": 1.6662476604376957e-05, + "loss": 0.4963, + "step": 19588 + }, + { + "epoch": 0.5378638110928061, + "grad_norm": 0.34528523683547974, + "learning_rate": 1.666215452205364e-05, + "loss": 0.4889, + "step": 19589 + }, + { + "epoch": 0.5378912685337727, + "grad_norm": 0.3619472086429596, + "learning_rate": 1.666183242730329e-05, + "loss": 0.5034, + "step": 19590 + }, + { + "epoch": 0.5379187259747391, + "grad_norm": 0.45127251744270325, + "learning_rate": 1.6661510320126494e-05, + "loss": 0.5417, + "step": 19591 + }, + { + "epoch": 0.5379461834157057, + "grad_norm": 0.34888535737991333, + "learning_rate": 1.666118820052385e-05, + "loss": 0.4825, + "step": 19592 + }, + { + "epoch": 0.5379736408566722, + "grad_norm": 0.4056456685066223, + "learning_rate": 1.6660866068495965e-05, + "loss": 0.5644, + "step": 19593 + }, + { + "epoch": 0.5380010982976386, + "grad_norm": 0.4055737555027008, + "learning_rate": 1.6660543924043443e-05, + "loss": 0.495, + "step": 19594 + }, + { + "epoch": 0.5380285557386052, + "grad_norm": 0.3782793879508972, + "learning_rate": 1.666022176716688e-05, + "loss": 0.5025, + "step": 19595 + }, + { + "epoch": 0.5380560131795716, + "grad_norm": 0.4030816853046417, + "learning_rate": 1.6659899597866873e-05, + "loss": 0.5524, + "step": 19596 + }, + { + "epoch": 0.5380834706205382, + "grad_norm": 0.4220723807811737, + "learning_rate": 1.665957741614403e-05, + "loss": 0.5699, + "step": 19597 + }, + { + "epoch": 0.5381109280615046, + "grad_norm": 0.3879261314868927, + "learning_rate": 1.665925522199895e-05, + "loss": 0.5417, + "step": 19598 + }, + { + "epoch": 0.5381383855024712, + "grad_norm": 0.37236934900283813, + "learning_rate": 1.6658933015432237e-05, + "loss": 0.615, + "step": 19599 + }, + { + "epoch": 0.5381658429434377, + "grad_norm": 0.4347868263721466, + "learning_rate": 1.6658610796444485e-05, + "loss": 0.5158, + "step": 19600 + }, + { + "epoch": 0.5381933003844042, + "grad_norm": 0.4138695299625397, + "learning_rate": 1.66582885650363e-05, + "loss": 0.5549, + "step": 19601 + }, + { + "epoch": 0.5382207578253707, + "grad_norm": 0.3784598112106323, + "learning_rate": 1.6657966321208283e-05, + "loss": 0.4752, + "step": 19602 + }, + { + "epoch": 0.5382482152663371, + "grad_norm": 0.3576120138168335, + "learning_rate": 1.665764406496103e-05, + "loss": 0.485, + "step": 19603 + }, + { + "epoch": 0.5382756727073037, + "grad_norm": 0.3706781268119812, + "learning_rate": 1.6657321796295147e-05, + "loss": 0.5242, + "step": 19604 + }, + { + "epoch": 0.5383031301482701, + "grad_norm": 0.3756777048110962, + "learning_rate": 1.6656999515211233e-05, + "loss": 0.4762, + "step": 19605 + }, + { + "epoch": 0.5383305875892367, + "grad_norm": 0.3773339092731476, + "learning_rate": 1.6656677221709894e-05, + "loss": 0.5483, + "step": 19606 + }, + { + "epoch": 0.5383580450302032, + "grad_norm": 0.39330536127090454, + "learning_rate": 1.6656354915791727e-05, + "loss": 0.4778, + "step": 19607 + }, + { + "epoch": 0.5383855024711697, + "grad_norm": 0.39864784479141235, + "learning_rate": 1.665603259745733e-05, + "loss": 0.4644, + "step": 19608 + }, + { + "epoch": 0.5384129599121362, + "grad_norm": 0.3666151463985443, + "learning_rate": 1.6655710266707312e-05, + "loss": 0.554, + "step": 19609 + }, + { + "epoch": 0.5384404173531027, + "grad_norm": 0.37031984329223633, + "learning_rate": 1.6655387923542266e-05, + "loss": 0.5727, + "step": 19610 + }, + { + "epoch": 0.5384678747940692, + "grad_norm": 0.5167794823646545, + "learning_rate": 1.66550655679628e-05, + "loss": 0.5509, + "step": 19611 + }, + { + "epoch": 0.5384953322350357, + "grad_norm": 0.3514653146266937, + "learning_rate": 1.6654743199969513e-05, + "loss": 0.3962, + "step": 19612 + }, + { + "epoch": 0.5385227896760022, + "grad_norm": 0.4097999930381775, + "learning_rate": 1.6654420819563007e-05, + "loss": 0.5839, + "step": 19613 + }, + { + "epoch": 0.5385502471169687, + "grad_norm": 0.34315863251686096, + "learning_rate": 1.665409842674388e-05, + "loss": 0.49, + "step": 19614 + }, + { + "epoch": 0.5385777045579352, + "grad_norm": 0.3985462486743927, + "learning_rate": 1.6653776021512737e-05, + "loss": 0.3926, + "step": 19615 + }, + { + "epoch": 0.5386051619989017, + "grad_norm": 0.4783534109592438, + "learning_rate": 1.6653453603870175e-05, + "loss": 0.5492, + "step": 19616 + }, + { + "epoch": 0.5386326194398682, + "grad_norm": 0.4241456389427185, + "learning_rate": 1.66531311738168e-05, + "loss": 0.5457, + "step": 19617 + }, + { + "epoch": 0.5386600768808347, + "grad_norm": 0.39854535460472107, + "learning_rate": 1.6652808731353217e-05, + "loss": 0.5544, + "step": 19618 + }, + { + "epoch": 0.5386875343218012, + "grad_norm": 0.3660352826118469, + "learning_rate": 1.6652486276480016e-05, + "loss": 0.6189, + "step": 19619 + }, + { + "epoch": 0.5387149917627677, + "grad_norm": 0.34752213954925537, + "learning_rate": 1.6652163809197807e-05, + "loss": 0.4629, + "step": 19620 + }, + { + "epoch": 0.5387424492037343, + "grad_norm": 0.36711394786834717, + "learning_rate": 1.665184132950719e-05, + "loss": 0.5732, + "step": 19621 + }, + { + "epoch": 0.5387699066447007, + "grad_norm": 0.37179097533226013, + "learning_rate": 1.6651518837408763e-05, + "loss": 0.4761, + "step": 19622 + }, + { + "epoch": 0.5387973640856673, + "grad_norm": 0.36111170053482056, + "learning_rate": 1.6651196332903135e-05, + "loss": 0.489, + "step": 19623 + }, + { + "epoch": 0.5388248215266337, + "grad_norm": 0.3930927813053131, + "learning_rate": 1.66508738159909e-05, + "loss": 0.5, + "step": 19624 + }, + { + "epoch": 0.5388522789676002, + "grad_norm": 0.3669028580188751, + "learning_rate": 1.6650551286672665e-05, + "loss": 0.4623, + "step": 19625 + }, + { + "epoch": 0.5388797364085667, + "grad_norm": 0.36012929677963257, + "learning_rate": 1.6650228744949026e-05, + "loss": 0.4774, + "step": 19626 + }, + { + "epoch": 0.5389071938495332, + "grad_norm": 0.4288899004459381, + "learning_rate": 1.664990619082059e-05, + "loss": 0.5048, + "step": 19627 + }, + { + "epoch": 0.5389346512904998, + "grad_norm": 0.37341082096099854, + "learning_rate": 1.6649583624287955e-05, + "loss": 0.5127, + "step": 19628 + }, + { + "epoch": 0.5389621087314662, + "grad_norm": 0.3527744710445404, + "learning_rate": 1.6649261045351726e-05, + "loss": 0.4873, + "step": 19629 + }, + { + "epoch": 0.5389895661724328, + "grad_norm": 0.40748733282089233, + "learning_rate": 1.6648938454012502e-05, + "loss": 0.4656, + "step": 19630 + }, + { + "epoch": 0.5390170236133992, + "grad_norm": 0.3947871923446655, + "learning_rate": 1.6648615850270886e-05, + "loss": 0.5514, + "step": 19631 + }, + { + "epoch": 0.5390444810543658, + "grad_norm": 0.374775230884552, + "learning_rate": 1.6648293234127478e-05, + "loss": 0.5077, + "step": 19632 + }, + { + "epoch": 0.5390719384953322, + "grad_norm": 0.4691302180290222, + "learning_rate": 1.6647970605582884e-05, + "loss": 0.5262, + "step": 19633 + }, + { + "epoch": 0.5390993959362987, + "grad_norm": 0.3694054186344147, + "learning_rate": 1.66476479646377e-05, + "loss": 0.5038, + "step": 19634 + }, + { + "epoch": 0.5391268533772653, + "grad_norm": 0.4255426228046417, + "learning_rate": 1.6647325311292534e-05, + "loss": 0.5921, + "step": 19635 + }, + { + "epoch": 0.5391543108182317, + "grad_norm": 0.36368417739868164, + "learning_rate": 1.6647002645547984e-05, + "loss": 0.5089, + "step": 19636 + }, + { + "epoch": 0.5391817682591983, + "grad_norm": 0.3619469106197357, + "learning_rate": 1.664667996740465e-05, + "loss": 0.4919, + "step": 19637 + }, + { + "epoch": 0.5392092257001647, + "grad_norm": 0.40337297320365906, + "learning_rate": 1.664635727686314e-05, + "loss": 0.5254, + "step": 19638 + }, + { + "epoch": 0.5392366831411313, + "grad_norm": 0.3473639488220215, + "learning_rate": 1.664603457392405e-05, + "loss": 0.4542, + "step": 19639 + }, + { + "epoch": 0.5392641405820977, + "grad_norm": 0.4249575734138489, + "learning_rate": 1.6645711858587987e-05, + "loss": 0.4889, + "step": 19640 + }, + { + "epoch": 0.5392915980230643, + "grad_norm": 0.3889387845993042, + "learning_rate": 1.6645389130855547e-05, + "loss": 0.5033, + "step": 19641 + }, + { + "epoch": 0.5393190554640308, + "grad_norm": 0.528272271156311, + "learning_rate": 1.6645066390727338e-05, + "loss": 0.415, + "step": 19642 + }, + { + "epoch": 0.5393465129049972, + "grad_norm": 0.6863782405853271, + "learning_rate": 1.6644743638203958e-05, + "loss": 0.5139, + "step": 19643 + }, + { + "epoch": 0.5393739703459638, + "grad_norm": 0.40019291639328003, + "learning_rate": 1.664442087328601e-05, + "loss": 0.5405, + "step": 19644 + }, + { + "epoch": 0.5394014277869302, + "grad_norm": 0.423967182636261, + "learning_rate": 1.6644098095974098e-05, + "loss": 0.4415, + "step": 19645 + }, + { + "epoch": 0.5394288852278968, + "grad_norm": 0.41218364238739014, + "learning_rate": 1.6643775306268818e-05, + "loss": 0.5463, + "step": 19646 + }, + { + "epoch": 0.5394563426688632, + "grad_norm": 0.3414413630962372, + "learning_rate": 1.6643452504170784e-05, + "loss": 0.4776, + "step": 19647 + }, + { + "epoch": 0.5394838001098298, + "grad_norm": 0.38970914483070374, + "learning_rate": 1.6643129689680585e-05, + "loss": 0.5706, + "step": 19648 + }, + { + "epoch": 0.5395112575507963, + "grad_norm": 0.3647949993610382, + "learning_rate": 1.664280686279883e-05, + "loss": 0.5133, + "step": 19649 + }, + { + "epoch": 0.5395387149917628, + "grad_norm": 0.41058462858200073, + "learning_rate": 1.664248402352612e-05, + "loss": 0.5389, + "step": 19650 + }, + { + "epoch": 0.5395661724327293, + "grad_norm": 0.38013148307800293, + "learning_rate": 1.6642161171863057e-05, + "loss": 0.4251, + "step": 19651 + }, + { + "epoch": 0.5395936298736957, + "grad_norm": 0.3205028176307678, + "learning_rate": 1.6641838307810246e-05, + "loss": 0.4335, + "step": 19652 + }, + { + "epoch": 0.5396210873146623, + "grad_norm": 0.3801233768463135, + "learning_rate": 1.6641515431368284e-05, + "loss": 0.5292, + "step": 19653 + }, + { + "epoch": 0.5396485447556287, + "grad_norm": 0.38884609937667847, + "learning_rate": 1.6641192542537776e-05, + "loss": 0.5239, + "step": 19654 + }, + { + "epoch": 0.5396760021965953, + "grad_norm": 0.4128047823905945, + "learning_rate": 1.6640869641319328e-05, + "loss": 0.47, + "step": 19655 + }, + { + "epoch": 0.5397034596375618, + "grad_norm": 0.38024958968162537, + "learning_rate": 1.6640546727713537e-05, + "loss": 0.5224, + "step": 19656 + }, + { + "epoch": 0.5397309170785283, + "grad_norm": 0.3610941767692566, + "learning_rate": 1.6640223801721004e-05, + "loss": 0.4908, + "step": 19657 + }, + { + "epoch": 0.5397583745194948, + "grad_norm": 0.4074316918849945, + "learning_rate": 1.6639900863342336e-05, + "loss": 0.5398, + "step": 19658 + }, + { + "epoch": 0.5397858319604613, + "grad_norm": 0.3671301603317261, + "learning_rate": 1.663957791257813e-05, + "loss": 0.4529, + "step": 19659 + }, + { + "epoch": 0.5398132894014278, + "grad_norm": 0.4052479565143585, + "learning_rate": 1.6639254949429e-05, + "loss": 0.4611, + "step": 19660 + }, + { + "epoch": 0.5398407468423942, + "grad_norm": 0.369039922952652, + "learning_rate": 1.6638931973895537e-05, + "loss": 0.4418, + "step": 19661 + }, + { + "epoch": 0.5398682042833608, + "grad_norm": 0.40478718280792236, + "learning_rate": 1.6638608985978347e-05, + "loss": 0.5244, + "step": 19662 + }, + { + "epoch": 0.5398956617243273, + "grad_norm": 0.4216059744358063, + "learning_rate": 1.663828598567803e-05, + "loss": 0.4934, + "step": 19663 + }, + { + "epoch": 0.5399231191652938, + "grad_norm": 0.416543573141098, + "learning_rate": 1.6637962972995195e-05, + "loss": 0.6032, + "step": 19664 + }, + { + "epoch": 0.5399505766062603, + "grad_norm": 0.33994969725608826, + "learning_rate": 1.6637639947930436e-05, + "loss": 0.4535, + "step": 19665 + }, + { + "epoch": 0.5399780340472268, + "grad_norm": 0.38874557614326477, + "learning_rate": 1.6637316910484363e-05, + "loss": 0.4717, + "step": 19666 + }, + { + "epoch": 0.5400054914881933, + "grad_norm": 0.35424157977104187, + "learning_rate": 1.6636993860657575e-05, + "loss": 0.43, + "step": 19667 + }, + { + "epoch": 0.5400329489291598, + "grad_norm": 0.4333404302597046, + "learning_rate": 1.6636670798450675e-05, + "loss": 0.5507, + "step": 19668 + }, + { + "epoch": 0.5400604063701263, + "grad_norm": 0.3633587062358856, + "learning_rate": 1.6636347723864264e-05, + "loss": 0.4522, + "step": 19669 + }, + { + "epoch": 0.5400878638110929, + "grad_norm": 0.4357368052005768, + "learning_rate": 1.663602463689895e-05, + "loss": 0.4461, + "step": 19670 + }, + { + "epoch": 0.5401153212520593, + "grad_norm": 0.4932962656021118, + "learning_rate": 1.663570153755533e-05, + "loss": 0.4946, + "step": 19671 + }, + { + "epoch": 0.5401427786930258, + "grad_norm": 0.40217944979667664, + "learning_rate": 1.6635378425834006e-05, + "loss": 0.5324, + "step": 19672 + }, + { + "epoch": 0.5401702361339923, + "grad_norm": 0.4372687041759491, + "learning_rate": 1.663505530173559e-05, + "loss": 0.5732, + "step": 19673 + }, + { + "epoch": 0.5401976935749588, + "grad_norm": 0.331733763217926, + "learning_rate": 1.6634732165260678e-05, + "loss": 0.4766, + "step": 19674 + }, + { + "epoch": 0.5402251510159253, + "grad_norm": 0.41974011063575745, + "learning_rate": 1.6634409016409863e-05, + "loss": 0.604, + "step": 19675 + }, + { + "epoch": 0.5402526084568918, + "grad_norm": 0.40951383113861084, + "learning_rate": 1.663408585518377e-05, + "loss": 0.474, + "step": 19676 + }, + { + "epoch": 0.5402800658978584, + "grad_norm": 0.3722664415836334, + "learning_rate": 1.663376268158298e-05, + "loss": 0.4871, + "step": 19677 + }, + { + "epoch": 0.5403075233388248, + "grad_norm": 0.4712080955505371, + "learning_rate": 1.663343949560811e-05, + "loss": 0.5512, + "step": 19678 + }, + { + "epoch": 0.5403349807797914, + "grad_norm": 0.49468183517456055, + "learning_rate": 1.663311629725976e-05, + "loss": 0.4656, + "step": 19679 + }, + { + "epoch": 0.5403624382207578, + "grad_norm": 0.39845213294029236, + "learning_rate": 1.6632793086538526e-05, + "loss": 0.5805, + "step": 19680 + }, + { + "epoch": 0.5403898956617244, + "grad_norm": 0.8063944578170776, + "learning_rate": 1.663246986344502e-05, + "loss": 0.5527, + "step": 19681 + }, + { + "epoch": 0.5404173531026908, + "grad_norm": 0.3747590184211731, + "learning_rate": 1.6632146627979838e-05, + "loss": 0.4817, + "step": 19682 + }, + { + "epoch": 0.5404448105436573, + "grad_norm": 1.1819287538528442, + "learning_rate": 1.6631823380143588e-05, + "loss": 0.4655, + "step": 19683 + }, + { + "epoch": 0.5404722679846238, + "grad_norm": 0.3925994634628296, + "learning_rate": 1.663150011993687e-05, + "loss": 0.503, + "step": 19684 + }, + { + "epoch": 0.5404997254255903, + "grad_norm": 0.3898352384567261, + "learning_rate": 1.6631176847360287e-05, + "loss": 0.5026, + "step": 19685 + }, + { + "epoch": 0.5405271828665569, + "grad_norm": 0.3709852397441864, + "learning_rate": 1.6630853562414442e-05, + "loss": 0.5266, + "step": 19686 + }, + { + "epoch": 0.5405546403075233, + "grad_norm": 0.39966267347335815, + "learning_rate": 1.6630530265099945e-05, + "loss": 0.4963, + "step": 19687 + }, + { + "epoch": 0.5405820977484899, + "grad_norm": 0.3299880027770996, + "learning_rate": 1.6630206955417384e-05, + "loss": 0.4508, + "step": 19688 + }, + { + "epoch": 0.5406095551894563, + "grad_norm": 0.33317139744758606, + "learning_rate": 1.6629883633367377e-05, + "loss": 0.4805, + "step": 19689 + }, + { + "epoch": 0.5406370126304229, + "grad_norm": 0.32179224491119385, + "learning_rate": 1.662956029895052e-05, + "loss": 0.4063, + "step": 19690 + }, + { + "epoch": 0.5406644700713893, + "grad_norm": 0.4001958668231964, + "learning_rate": 1.6629236952167414e-05, + "loss": 0.5574, + "step": 19691 + }, + { + "epoch": 0.5406919275123558, + "grad_norm": 0.3608749806880951, + "learning_rate": 1.6628913593018668e-05, + "loss": 0.5095, + "step": 19692 + }, + { + "epoch": 0.5407193849533224, + "grad_norm": 0.4081554114818573, + "learning_rate": 1.662859022150488e-05, + "loss": 0.5511, + "step": 19693 + }, + { + "epoch": 0.5407468423942888, + "grad_norm": 0.3965798318386078, + "learning_rate": 1.662826683762666e-05, + "loss": 0.4919, + "step": 19694 + }, + { + "epoch": 0.5407742998352554, + "grad_norm": 0.3306994140148163, + "learning_rate": 1.6627943441384605e-05, + "loss": 0.4551, + "step": 19695 + }, + { + "epoch": 0.5408017572762218, + "grad_norm": 0.37543752789497375, + "learning_rate": 1.6627620032779316e-05, + "loss": 0.4376, + "step": 19696 + }, + { + "epoch": 0.5408292147171884, + "grad_norm": 0.37513861060142517, + "learning_rate": 1.6627296611811405e-05, + "loss": 0.5408, + "step": 19697 + }, + { + "epoch": 0.5408566721581548, + "grad_norm": 0.4088978171348572, + "learning_rate": 1.6626973178481468e-05, + "loss": 0.536, + "step": 19698 + }, + { + "epoch": 0.5408841295991214, + "grad_norm": 0.3924083113670349, + "learning_rate": 1.6626649732790115e-05, + "loss": 0.5389, + "step": 19699 + }, + { + "epoch": 0.5409115870400879, + "grad_norm": 0.3386351764202118, + "learning_rate": 1.6626326274737943e-05, + "loss": 0.47, + "step": 19700 + }, + { + "epoch": 0.5409390444810543, + "grad_norm": 0.3930121064186096, + "learning_rate": 1.6626002804325555e-05, + "loss": 0.4808, + "step": 19701 + }, + { + "epoch": 0.5409665019220209, + "grad_norm": 0.368355929851532, + "learning_rate": 1.662567932155356e-05, + "loss": 0.5509, + "step": 19702 + }, + { + "epoch": 0.5409939593629873, + "grad_norm": 0.368168443441391, + "learning_rate": 1.6625355826422557e-05, + "loss": 0.4559, + "step": 19703 + }, + { + "epoch": 0.5410214168039539, + "grad_norm": 0.41686174273490906, + "learning_rate": 1.662503231893315e-05, + "loss": 0.5343, + "step": 19704 + }, + { + "epoch": 0.5410488742449203, + "grad_norm": 0.49016883969306946, + "learning_rate": 1.6624708799085948e-05, + "loss": 0.4855, + "step": 19705 + }, + { + "epoch": 0.5410763316858869, + "grad_norm": 0.36993491649627686, + "learning_rate": 1.6624385266881544e-05, + "loss": 0.5289, + "step": 19706 + }, + { + "epoch": 0.5411037891268534, + "grad_norm": 0.347615122795105, + "learning_rate": 1.662406172232055e-05, + "loss": 0.4832, + "step": 19707 + }, + { + "epoch": 0.5411312465678199, + "grad_norm": 0.4150081276893616, + "learning_rate": 1.6623738165403568e-05, + "loss": 0.4779, + "step": 19708 + }, + { + "epoch": 0.5411587040087864, + "grad_norm": 0.3809777796268463, + "learning_rate": 1.6623414596131196e-05, + "loss": 0.5871, + "step": 19709 + }, + { + "epoch": 0.5411861614497528, + "grad_norm": 0.41042959690093994, + "learning_rate": 1.6623091014504046e-05, + "loss": 0.5607, + "step": 19710 + }, + { + "epoch": 0.5412136188907194, + "grad_norm": 0.3646673560142517, + "learning_rate": 1.6622767420522716e-05, + "loss": 0.439, + "step": 19711 + }, + { + "epoch": 0.5412410763316858, + "grad_norm": 0.3686259984970093, + "learning_rate": 1.662244381418781e-05, + "loss": 0.497, + "step": 19712 + }, + { + "epoch": 0.5412685337726524, + "grad_norm": 0.33834195137023926, + "learning_rate": 1.6622120195499937e-05, + "loss": 0.495, + "step": 19713 + }, + { + "epoch": 0.5412959912136189, + "grad_norm": 0.3719194829463959, + "learning_rate": 1.662179656445969e-05, + "loss": 0.499, + "step": 19714 + }, + { + "epoch": 0.5413234486545854, + "grad_norm": 0.33387500047683716, + "learning_rate": 1.6621472921067683e-05, + "loss": 0.513, + "step": 19715 + }, + { + "epoch": 0.5413509060955519, + "grad_norm": 0.403230220079422, + "learning_rate": 1.6621149265324512e-05, + "loss": 0.5752, + "step": 19716 + }, + { + "epoch": 0.5413783635365184, + "grad_norm": 0.41798949241638184, + "learning_rate": 1.6620825597230788e-05, + "loss": 0.5668, + "step": 19717 + }, + { + "epoch": 0.5414058209774849, + "grad_norm": 0.4527246356010437, + "learning_rate": 1.662050191678711e-05, + "loss": 0.645, + "step": 19718 + }, + { + "epoch": 0.5414332784184513, + "grad_norm": 0.3787775933742523, + "learning_rate": 1.6620178223994082e-05, + "loss": 0.5642, + "step": 19719 + }, + { + "epoch": 0.5414607358594179, + "grad_norm": 0.34913957118988037, + "learning_rate": 1.6619854518852313e-05, + "loss": 0.525, + "step": 19720 + }, + { + "epoch": 0.5414881933003844, + "grad_norm": 0.3984299898147583, + "learning_rate": 1.6619530801362396e-05, + "loss": 0.5623, + "step": 19721 + }, + { + "epoch": 0.5415156507413509, + "grad_norm": 0.3409484326839447, + "learning_rate": 1.6619207071524947e-05, + "loss": 0.4561, + "step": 19722 + }, + { + "epoch": 0.5415431081823174, + "grad_norm": 0.37484094500541687, + "learning_rate": 1.661888332934056e-05, + "loss": 0.4705, + "step": 19723 + }, + { + "epoch": 0.5415705656232839, + "grad_norm": 0.33636876940727234, + "learning_rate": 1.6618559574809845e-05, + "loss": 0.4928, + "step": 19724 + }, + { + "epoch": 0.5415980230642504, + "grad_norm": 0.39195311069488525, + "learning_rate": 1.6618235807933404e-05, + "loss": 0.456, + "step": 19725 + }, + { + "epoch": 0.5416254805052169, + "grad_norm": 0.3852764070034027, + "learning_rate": 1.661791202871184e-05, + "loss": 0.5234, + "step": 19726 + }, + { + "epoch": 0.5416529379461834, + "grad_norm": 0.3700140714645386, + "learning_rate": 1.6617588237145758e-05, + "loss": 0.444, + "step": 19727 + }, + { + "epoch": 0.54168039538715, + "grad_norm": 0.4701383113861084, + "learning_rate": 1.6617264433235766e-05, + "loss": 0.4782, + "step": 19728 + }, + { + "epoch": 0.5417078528281164, + "grad_norm": 0.3594917058944702, + "learning_rate": 1.6616940616982454e-05, + "loss": 0.488, + "step": 19729 + }, + { + "epoch": 0.541735310269083, + "grad_norm": 0.521018922328949, + "learning_rate": 1.6616616788386446e-05, + "loss": 0.5972, + "step": 19730 + }, + { + "epoch": 0.5417627677100494, + "grad_norm": 0.4083833396434784, + "learning_rate": 1.661629294744833e-05, + "loss": 0.4958, + "step": 19731 + }, + { + "epoch": 0.5417902251510159, + "grad_norm": 0.33342042565345764, + "learning_rate": 1.661596909416872e-05, + "loss": 0.4315, + "step": 19732 + }, + { + "epoch": 0.5418176825919824, + "grad_norm": 0.35604435205459595, + "learning_rate": 1.6615645228548212e-05, + "loss": 0.5215, + "step": 19733 + }, + { + "epoch": 0.5418451400329489, + "grad_norm": 0.5902740359306335, + "learning_rate": 1.6615321350587415e-05, + "loss": 0.5676, + "step": 19734 + }, + { + "epoch": 0.5418725974739155, + "grad_norm": 0.3753475248813629, + "learning_rate": 1.6614997460286937e-05, + "loss": 0.5914, + "step": 19735 + }, + { + "epoch": 0.5419000549148819, + "grad_norm": 0.40451812744140625, + "learning_rate": 1.6614673557647375e-05, + "loss": 0.5193, + "step": 19736 + }, + { + "epoch": 0.5419275123558485, + "grad_norm": 0.3802521526813507, + "learning_rate": 1.6614349642669334e-05, + "loss": 0.5441, + "step": 19737 + }, + { + "epoch": 0.5419549697968149, + "grad_norm": 0.38647064566612244, + "learning_rate": 1.661402571535342e-05, + "loss": 0.5473, + "step": 19738 + }, + { + "epoch": 0.5419824272377815, + "grad_norm": 0.34574073553085327, + "learning_rate": 1.6613701775700236e-05, + "loss": 0.5766, + "step": 19739 + }, + { + "epoch": 0.5420098846787479, + "grad_norm": 0.37236279249191284, + "learning_rate": 1.661337782371039e-05, + "loss": 0.422, + "step": 19740 + }, + { + "epoch": 0.5420373421197144, + "grad_norm": 0.46751081943511963, + "learning_rate": 1.6613053859384483e-05, + "loss": 0.5561, + "step": 19741 + }, + { + "epoch": 0.542064799560681, + "grad_norm": 0.36395853757858276, + "learning_rate": 1.661272988272312e-05, + "loss": 0.4268, + "step": 19742 + }, + { + "epoch": 0.5420922570016474, + "grad_norm": 0.33163565397262573, + "learning_rate": 1.6612405893726903e-05, + "loss": 0.5508, + "step": 19743 + }, + { + "epoch": 0.542119714442614, + "grad_norm": 0.3685644865036011, + "learning_rate": 1.661208189239644e-05, + "loss": 0.5331, + "step": 19744 + }, + { + "epoch": 0.5421471718835804, + "grad_norm": 0.3723931610584259, + "learning_rate": 1.6611757878732337e-05, + "loss": 0.4317, + "step": 19745 + }, + { + "epoch": 0.542174629324547, + "grad_norm": 0.39240095019340515, + "learning_rate": 1.6611433852735192e-05, + "loss": 0.5263, + "step": 19746 + }, + { + "epoch": 0.5422020867655134, + "grad_norm": 0.3801056444644928, + "learning_rate": 1.6611109814405613e-05, + "loss": 0.5278, + "step": 19747 + }, + { + "epoch": 0.54222954420648, + "grad_norm": 0.3725067675113678, + "learning_rate": 1.6610785763744204e-05, + "loss": 0.5321, + "step": 19748 + }, + { + "epoch": 0.5422570016474465, + "grad_norm": 0.325718492269516, + "learning_rate": 1.661046170075157e-05, + "loss": 0.5313, + "step": 19749 + }, + { + "epoch": 0.5422844590884129, + "grad_norm": 0.4323998689651489, + "learning_rate": 1.6610137625428315e-05, + "loss": 0.4627, + "step": 19750 + }, + { + "epoch": 0.5423119165293795, + "grad_norm": 0.32611972093582153, + "learning_rate": 1.6609813537775042e-05, + "loss": 0.5317, + "step": 19751 + }, + { + "epoch": 0.5423393739703459, + "grad_norm": 0.38168543577194214, + "learning_rate": 1.660948943779236e-05, + "loss": 0.4942, + "step": 19752 + }, + { + "epoch": 0.5423668314113125, + "grad_norm": 0.36407700181007385, + "learning_rate": 1.660916532548087e-05, + "loss": 0.5882, + "step": 19753 + }, + { + "epoch": 0.5423942888522789, + "grad_norm": 0.4563869535923004, + "learning_rate": 1.660884120084118e-05, + "loss": 0.4929, + "step": 19754 + }, + { + "epoch": 0.5424217462932455, + "grad_norm": 0.3369366526603699, + "learning_rate": 1.660851706387389e-05, + "loss": 0.4959, + "step": 19755 + }, + { + "epoch": 0.542449203734212, + "grad_norm": 0.3881751596927643, + "learning_rate": 1.6608192914579603e-05, + "loss": 0.528, + "step": 19756 + }, + { + "epoch": 0.5424766611751785, + "grad_norm": 0.3909081816673279, + "learning_rate": 1.6607868752958927e-05, + "loss": 0.5098, + "step": 19757 + }, + { + "epoch": 0.542504118616145, + "grad_norm": 0.37460073828697205, + "learning_rate": 1.660754457901247e-05, + "loss": 0.5275, + "step": 19758 + }, + { + "epoch": 0.5425315760571114, + "grad_norm": 0.3406619131565094, + "learning_rate": 1.6607220392740836e-05, + "loss": 0.3846, + "step": 19759 + }, + { + "epoch": 0.542559033498078, + "grad_norm": 0.5270332098007202, + "learning_rate": 1.660689619414462e-05, + "loss": 0.7174, + "step": 19760 + }, + { + "epoch": 0.5425864909390444, + "grad_norm": 0.35631123185157776, + "learning_rate": 1.660657198322444e-05, + "loss": 0.541, + "step": 19761 + }, + { + "epoch": 0.542613948380011, + "grad_norm": 0.4103354513645172, + "learning_rate": 1.6606247759980893e-05, + "loss": 0.5174, + "step": 19762 + }, + { + "epoch": 0.5426414058209775, + "grad_norm": 0.3479849100112915, + "learning_rate": 1.6605923524414584e-05, + "loss": 0.4681, + "step": 19763 + }, + { + "epoch": 0.542668863261944, + "grad_norm": 0.4861186146736145, + "learning_rate": 1.660559927652612e-05, + "loss": 0.4675, + "step": 19764 + }, + { + "epoch": 0.5426963207029105, + "grad_norm": 0.3949238359928131, + "learning_rate": 1.6605275016316104e-05, + "loss": 0.5294, + "step": 19765 + }, + { + "epoch": 0.542723778143877, + "grad_norm": 0.384653776884079, + "learning_rate": 1.6604950743785144e-05, + "loss": 0.4792, + "step": 19766 + }, + { + "epoch": 0.5427512355848435, + "grad_norm": 0.40103763341903687, + "learning_rate": 1.6604626458933843e-05, + "loss": 0.4875, + "step": 19767 + }, + { + "epoch": 0.5427786930258099, + "grad_norm": 0.6704285144805908, + "learning_rate": 1.6604302161762803e-05, + "loss": 0.4883, + "step": 19768 + }, + { + "epoch": 0.5428061504667765, + "grad_norm": 0.3627004325389862, + "learning_rate": 1.6603977852272635e-05, + "loss": 0.5116, + "step": 19769 + }, + { + "epoch": 0.542833607907743, + "grad_norm": 0.37011951208114624, + "learning_rate": 1.6603653530463937e-05, + "loss": 0.4848, + "step": 19770 + }, + { + "epoch": 0.5428610653487095, + "grad_norm": 0.3776332437992096, + "learning_rate": 1.660332919633732e-05, + "loss": 0.4752, + "step": 19771 + }, + { + "epoch": 0.542888522789676, + "grad_norm": 0.36433079838752747, + "learning_rate": 1.660300484989339e-05, + "loss": 0.4772, + "step": 19772 + }, + { + "epoch": 0.5429159802306425, + "grad_norm": 0.47110483050346375, + "learning_rate": 1.660268049113274e-05, + "loss": 0.5827, + "step": 19773 + }, + { + "epoch": 0.542943437671609, + "grad_norm": 0.3398415148258209, + "learning_rate": 1.660235612005599e-05, + "loss": 0.482, + "step": 19774 + }, + { + "epoch": 0.5429708951125755, + "grad_norm": 0.439911812543869, + "learning_rate": 1.6602031736663734e-05, + "loss": 0.5697, + "step": 19775 + }, + { + "epoch": 0.542998352553542, + "grad_norm": 0.37539270520210266, + "learning_rate": 1.6601707340956585e-05, + "loss": 0.4586, + "step": 19776 + }, + { + "epoch": 0.5430258099945086, + "grad_norm": 0.4149915277957916, + "learning_rate": 1.6601382932935147e-05, + "loss": 0.4996, + "step": 19777 + }, + { + "epoch": 0.543053267435475, + "grad_norm": 0.4294966161251068, + "learning_rate": 1.6601058512600017e-05, + "loss": 0.5679, + "step": 19778 + }, + { + "epoch": 0.5430807248764415, + "grad_norm": 0.6289628148078918, + "learning_rate": 1.660073407995181e-05, + "loss": 0.5011, + "step": 19779 + }, + { + "epoch": 0.543108182317408, + "grad_norm": 0.39760518074035645, + "learning_rate": 1.660040963499113e-05, + "loss": 0.5833, + "step": 19780 + }, + { + "epoch": 0.5431356397583745, + "grad_norm": 0.38614699244499207, + "learning_rate": 1.660008517771857e-05, + "loss": 0.4741, + "step": 19781 + }, + { + "epoch": 0.543163097199341, + "grad_norm": 0.345188170671463, + "learning_rate": 1.6599760708134754e-05, + "loss": 0.464, + "step": 19782 + }, + { + "epoch": 0.5431905546403075, + "grad_norm": 0.380639910697937, + "learning_rate": 1.659943622624027e-05, + "loss": 0.531, + "step": 19783 + }, + { + "epoch": 0.5432180120812741, + "grad_norm": 0.35951054096221924, + "learning_rate": 1.659911173203574e-05, + "loss": 0.5059, + "step": 19784 + }, + { + "epoch": 0.5432454695222405, + "grad_norm": 0.45215538144111633, + "learning_rate": 1.6598787225521755e-05, + "loss": 0.5197, + "step": 19785 + }, + { + "epoch": 0.5432729269632071, + "grad_norm": 0.35310760140419006, + "learning_rate": 1.6598462706698927e-05, + "loss": 0.5281, + "step": 19786 + }, + { + "epoch": 0.5433003844041735, + "grad_norm": 0.3925228416919708, + "learning_rate": 1.659813817556786e-05, + "loss": 0.5595, + "step": 19787 + }, + { + "epoch": 0.54332784184514, + "grad_norm": 0.3976326584815979, + "learning_rate": 1.6597813632129156e-05, + "loss": 0.5611, + "step": 19788 + }, + { + "epoch": 0.5433552992861065, + "grad_norm": 0.35029137134552, + "learning_rate": 1.6597489076383427e-05, + "loss": 0.4473, + "step": 19789 + }, + { + "epoch": 0.543382756727073, + "grad_norm": 0.39030909538269043, + "learning_rate": 1.6597164508331278e-05, + "loss": 0.5677, + "step": 19790 + }, + { + "epoch": 0.5434102141680396, + "grad_norm": 0.41844263672828674, + "learning_rate": 1.659683992797331e-05, + "loss": 0.5404, + "step": 19791 + }, + { + "epoch": 0.543437671609006, + "grad_norm": 0.38570094108581543, + "learning_rate": 1.6596515335310126e-05, + "loss": 0.5107, + "step": 19792 + }, + { + "epoch": 0.5434651290499726, + "grad_norm": 0.3631781041622162, + "learning_rate": 1.659619073034234e-05, + "loss": 0.5213, + "step": 19793 + }, + { + "epoch": 0.543492586490939, + "grad_norm": 0.375789076089859, + "learning_rate": 1.659586611307055e-05, + "loss": 0.5676, + "step": 19794 + }, + { + "epoch": 0.5435200439319056, + "grad_norm": 0.3611355125904083, + "learning_rate": 1.6595541483495364e-05, + "loss": 0.4821, + "step": 19795 + }, + { + "epoch": 0.543547501372872, + "grad_norm": 0.35457876324653625, + "learning_rate": 1.659521684161739e-05, + "loss": 0.4821, + "step": 19796 + }, + { + "epoch": 0.5435749588138385, + "grad_norm": 0.4038681387901306, + "learning_rate": 1.6594892187437235e-05, + "loss": 0.5316, + "step": 19797 + }, + { + "epoch": 0.5436024162548051, + "grad_norm": 0.36495110392570496, + "learning_rate": 1.6594567520955497e-05, + "loss": 0.5212, + "step": 19798 + }, + { + "epoch": 0.5436298736957715, + "grad_norm": 0.4144493639469147, + "learning_rate": 1.6594242842172787e-05, + "loss": 0.4697, + "step": 19799 + }, + { + "epoch": 0.5436573311367381, + "grad_norm": 0.3487352728843689, + "learning_rate": 1.659391815108971e-05, + "loss": 0.4188, + "step": 19800 + }, + { + "epoch": 0.5436847885777045, + "grad_norm": 0.32795536518096924, + "learning_rate": 1.6593593447706865e-05, + "loss": 0.493, + "step": 19801 + }, + { + "epoch": 0.5437122460186711, + "grad_norm": 0.41444724798202515, + "learning_rate": 1.6593268732024872e-05, + "loss": 0.5163, + "step": 19802 + }, + { + "epoch": 0.5437397034596375, + "grad_norm": 0.5474461317062378, + "learning_rate": 1.6592944004044323e-05, + "loss": 0.4267, + "step": 19803 + }, + { + "epoch": 0.5437671609006041, + "grad_norm": 0.31163036823272705, + "learning_rate": 1.6592619263765836e-05, + "loss": 0.5019, + "step": 19804 + }, + { + "epoch": 0.5437946183415706, + "grad_norm": 0.36536309123039246, + "learning_rate": 1.6592294511190005e-05, + "loss": 0.509, + "step": 19805 + }, + { + "epoch": 0.543822075782537, + "grad_norm": 0.3765247166156769, + "learning_rate": 1.659196974631744e-05, + "loss": 0.4728, + "step": 19806 + }, + { + "epoch": 0.5438495332235036, + "grad_norm": 0.41895371675491333, + "learning_rate": 1.6591644969148748e-05, + "loss": 0.5124, + "step": 19807 + }, + { + "epoch": 0.54387699066447, + "grad_norm": 0.4211632013320923, + "learning_rate": 1.6591320179684534e-05, + "loss": 0.5398, + "step": 19808 + }, + { + "epoch": 0.5439044481054366, + "grad_norm": 0.33502358198165894, + "learning_rate": 1.6590995377925407e-05, + "loss": 0.4524, + "step": 19809 + }, + { + "epoch": 0.543931905546403, + "grad_norm": 0.3772011697292328, + "learning_rate": 1.6590670563871966e-05, + "loss": 0.5484, + "step": 19810 + }, + { + "epoch": 0.5439593629873696, + "grad_norm": 0.3493700325489044, + "learning_rate": 1.6590345737524827e-05, + "loss": 0.4875, + "step": 19811 + }, + { + "epoch": 0.5439868204283361, + "grad_norm": 0.3970179557800293, + "learning_rate": 1.6590020898884584e-05, + "loss": 0.5285, + "step": 19812 + }, + { + "epoch": 0.5440142778693026, + "grad_norm": 0.4384833574295044, + "learning_rate": 1.658969604795185e-05, + "loss": 0.4729, + "step": 19813 + }, + { + "epoch": 0.5440417353102691, + "grad_norm": 0.4911278784275055, + "learning_rate": 1.658937118472723e-05, + "loss": 0.4531, + "step": 19814 + }, + { + "epoch": 0.5440691927512356, + "grad_norm": 0.3607952892780304, + "learning_rate": 1.658904630921133e-05, + "loss": 0.4949, + "step": 19815 + }, + { + "epoch": 0.5440966501922021, + "grad_norm": 0.3903752863407135, + "learning_rate": 1.658872142140476e-05, + "loss": 0.4285, + "step": 19816 + }, + { + "epoch": 0.5441241076331685, + "grad_norm": 0.38979509472846985, + "learning_rate": 1.6588396521308116e-05, + "loss": 0.5003, + "step": 19817 + }, + { + "epoch": 0.5441515650741351, + "grad_norm": 0.43649065494537354, + "learning_rate": 1.658807160892201e-05, + "loss": 0.6059, + "step": 19818 + }, + { + "epoch": 0.5441790225151016, + "grad_norm": 0.3632781207561493, + "learning_rate": 1.6587746684247048e-05, + "loss": 0.5108, + "step": 19819 + }, + { + "epoch": 0.5442064799560681, + "grad_norm": 0.33911895751953125, + "learning_rate": 1.6587421747283837e-05, + "loss": 0.5107, + "step": 19820 + }, + { + "epoch": 0.5442339373970346, + "grad_norm": 0.4169740378856659, + "learning_rate": 1.6587096798032984e-05, + "loss": 0.5783, + "step": 19821 + }, + { + "epoch": 0.5442613948380011, + "grad_norm": 0.4253537654876709, + "learning_rate": 1.658677183649509e-05, + "loss": 0.5287, + "step": 19822 + }, + { + "epoch": 0.5442888522789676, + "grad_norm": 0.4095418155193329, + "learning_rate": 1.6586446862670765e-05, + "loss": 0.5586, + "step": 19823 + }, + { + "epoch": 0.544316309719934, + "grad_norm": 0.42518872022628784, + "learning_rate": 1.658612187656062e-05, + "loss": 0.5913, + "step": 19824 + }, + { + "epoch": 0.5443437671609006, + "grad_norm": 0.38951805233955383, + "learning_rate": 1.6585796878165246e-05, + "loss": 0.4834, + "step": 19825 + }, + { + "epoch": 0.5443712246018672, + "grad_norm": 0.41901078820228577, + "learning_rate": 1.6585471867485262e-05, + "loss": 0.5682, + "step": 19826 + }, + { + "epoch": 0.5443986820428336, + "grad_norm": 0.3741600215435028, + "learning_rate": 1.6585146844521275e-05, + "loss": 0.4851, + "step": 19827 + }, + { + "epoch": 0.5444261394838001, + "grad_norm": 0.4327826499938965, + "learning_rate": 1.658482180927388e-05, + "loss": 0.5348, + "step": 19828 + }, + { + "epoch": 0.5444535969247666, + "grad_norm": 0.44957804679870605, + "learning_rate": 1.65844967617437e-05, + "loss": 0.5408, + "step": 19829 + }, + { + "epoch": 0.5444810543657331, + "grad_norm": 0.37372153997421265, + "learning_rate": 1.6584171701931328e-05, + "loss": 0.5312, + "step": 19830 + }, + { + "epoch": 0.5445085118066996, + "grad_norm": 0.4408765137195587, + "learning_rate": 1.6583846629837373e-05, + "loss": 0.5584, + "step": 19831 + }, + { + "epoch": 0.5445359692476661, + "grad_norm": 0.3691295385360718, + "learning_rate": 1.658352154546244e-05, + "loss": 0.5633, + "step": 19832 + }, + { + "epoch": 0.5445634266886327, + "grad_norm": 0.39765238761901855, + "learning_rate": 1.6583196448807142e-05, + "loss": 0.5416, + "step": 19833 + }, + { + "epoch": 0.5445908841295991, + "grad_norm": 0.389516681432724, + "learning_rate": 1.6582871339872083e-05, + "loss": 0.5358, + "step": 19834 + }, + { + "epoch": 0.5446183415705657, + "grad_norm": 0.35538914799690247, + "learning_rate": 1.6582546218657864e-05, + "loss": 0.4351, + "step": 19835 + }, + { + "epoch": 0.5446457990115321, + "grad_norm": 0.4093911349773407, + "learning_rate": 1.65822210851651e-05, + "loss": 0.5751, + "step": 19836 + }, + { + "epoch": 0.5446732564524986, + "grad_norm": 0.39062345027923584, + "learning_rate": 1.6581895939394386e-05, + "loss": 0.5054, + "step": 19837 + }, + { + "epoch": 0.5447007138934651, + "grad_norm": 0.38360562920570374, + "learning_rate": 1.6581570781346344e-05, + "loss": 0.4828, + "step": 19838 + }, + { + "epoch": 0.5447281713344316, + "grad_norm": 0.35508477687835693, + "learning_rate": 1.6581245611021568e-05, + "loss": 0.4784, + "step": 19839 + }, + { + "epoch": 0.5447556287753982, + "grad_norm": 0.37319493293762207, + "learning_rate": 1.6580920428420666e-05, + "loss": 0.4168, + "step": 19840 + }, + { + "epoch": 0.5447830862163646, + "grad_norm": 0.34628257155418396, + "learning_rate": 1.658059523354425e-05, + "loss": 0.5151, + "step": 19841 + }, + { + "epoch": 0.5448105436573312, + "grad_norm": 0.35994282364845276, + "learning_rate": 1.6580270026392922e-05, + "loss": 0.5298, + "step": 19842 + }, + { + "epoch": 0.5448380010982976, + "grad_norm": 0.37631887197494507, + "learning_rate": 1.6579944806967293e-05, + "loss": 0.4957, + "step": 19843 + }, + { + "epoch": 0.5448654585392642, + "grad_norm": 0.3660814166069031, + "learning_rate": 1.6579619575267964e-05, + "loss": 0.497, + "step": 19844 + }, + { + "epoch": 0.5448929159802306, + "grad_norm": 0.3706534802913666, + "learning_rate": 1.6579294331295544e-05, + "loss": 0.5189, + "step": 19845 + }, + { + "epoch": 0.5449203734211971, + "grad_norm": 0.6812811493873596, + "learning_rate": 1.6578969075050643e-05, + "loss": 0.4195, + "step": 19846 + }, + { + "epoch": 0.5449478308621637, + "grad_norm": 0.37600138783454895, + "learning_rate": 1.6578643806533864e-05, + "loss": 0.5122, + "step": 19847 + }, + { + "epoch": 0.5449752883031301, + "grad_norm": 0.4015718996524811, + "learning_rate": 1.657831852574581e-05, + "loss": 0.5476, + "step": 19848 + }, + { + "epoch": 0.5450027457440967, + "grad_norm": 0.3526046872138977, + "learning_rate": 1.65779932326871e-05, + "loss": 0.4018, + "step": 19849 + }, + { + "epoch": 0.5450302031850631, + "grad_norm": 0.3703595995903015, + "learning_rate": 1.6577667927358327e-05, + "loss": 0.4676, + "step": 19850 + }, + { + "epoch": 0.5450576606260297, + "grad_norm": 0.4097972810268402, + "learning_rate": 1.657734260976011e-05, + "loss": 0.5479, + "step": 19851 + }, + { + "epoch": 0.5450851180669961, + "grad_norm": 0.35021325945854187, + "learning_rate": 1.6577017279893042e-05, + "loss": 0.445, + "step": 19852 + }, + { + "epoch": 0.5451125755079627, + "grad_norm": 0.4068858325481415, + "learning_rate": 1.6576691937757744e-05, + "loss": 0.4878, + "step": 19853 + }, + { + "epoch": 0.5451400329489292, + "grad_norm": 0.3431015908718109, + "learning_rate": 1.6576366583354816e-05, + "loss": 0.4881, + "step": 19854 + }, + { + "epoch": 0.5451674903898956, + "grad_norm": 0.4202606678009033, + "learning_rate": 1.6576041216684862e-05, + "loss": 0.543, + "step": 19855 + }, + { + "epoch": 0.5451949478308622, + "grad_norm": 0.37662434577941895, + "learning_rate": 1.6575715837748493e-05, + "loss": 0.5043, + "step": 19856 + }, + { + "epoch": 0.5452224052718286, + "grad_norm": 0.34563884139060974, + "learning_rate": 1.657539044654632e-05, + "loss": 0.4677, + "step": 19857 + }, + { + "epoch": 0.5452498627127952, + "grad_norm": 0.3965144753456116, + "learning_rate": 1.6575065043078938e-05, + "loss": 0.4758, + "step": 19858 + }, + { + "epoch": 0.5452773201537616, + "grad_norm": 0.30080172419548035, + "learning_rate": 1.6574739627346966e-05, + "loss": 0.4499, + "step": 19859 + }, + { + "epoch": 0.5453047775947282, + "grad_norm": 0.38530978560447693, + "learning_rate": 1.6574414199351007e-05, + "loss": 0.5337, + "step": 19860 + }, + { + "epoch": 0.5453322350356947, + "grad_norm": 0.39120858907699585, + "learning_rate": 1.6574088759091664e-05, + "loss": 0.5913, + "step": 19861 + }, + { + "epoch": 0.5453596924766612, + "grad_norm": 0.36986586451530457, + "learning_rate": 1.657376330656955e-05, + "loss": 0.5628, + "step": 19862 + }, + { + "epoch": 0.5453871499176277, + "grad_norm": 0.3609204888343811, + "learning_rate": 1.6573437841785264e-05, + "loss": 0.4349, + "step": 19863 + }, + { + "epoch": 0.5454146073585942, + "grad_norm": 0.41389545798301697, + "learning_rate": 1.6573112364739423e-05, + "loss": 0.4703, + "step": 19864 + }, + { + "epoch": 0.5454420647995607, + "grad_norm": 0.42142918705940247, + "learning_rate": 1.657278687543263e-05, + "loss": 0.5401, + "step": 19865 + }, + { + "epoch": 0.5454695222405271, + "grad_norm": 0.38261914253234863, + "learning_rate": 1.657246137386549e-05, + "loss": 0.4814, + "step": 19866 + }, + { + "epoch": 0.5454969796814937, + "grad_norm": 0.33335936069488525, + "learning_rate": 1.6572135860038612e-05, + "loss": 0.5063, + "step": 19867 + }, + { + "epoch": 0.5455244371224602, + "grad_norm": 0.35635629296302795, + "learning_rate": 1.6571810333952606e-05, + "loss": 0.5045, + "step": 19868 + }, + { + "epoch": 0.5455518945634267, + "grad_norm": 0.38845014572143555, + "learning_rate": 1.657148479560807e-05, + "loss": 0.4981, + "step": 19869 + }, + { + "epoch": 0.5455793520043932, + "grad_norm": 0.4398126006126404, + "learning_rate": 1.6571159245005624e-05, + "loss": 0.4352, + "step": 19870 + }, + { + "epoch": 0.5456068094453597, + "grad_norm": 0.4743586778640747, + "learning_rate": 1.6570833682145862e-05, + "loss": 0.4885, + "step": 19871 + }, + { + "epoch": 0.5456342668863262, + "grad_norm": 0.42740827798843384, + "learning_rate": 1.6570508107029405e-05, + "loss": 0.5417, + "step": 19872 + }, + { + "epoch": 0.5456617243272927, + "grad_norm": 0.39774811267852783, + "learning_rate": 1.657018251965685e-05, + "loss": 0.5068, + "step": 19873 + }, + { + "epoch": 0.5456891817682592, + "grad_norm": 0.3877210021018982, + "learning_rate": 1.6569856920028805e-05, + "loss": 0.4582, + "step": 19874 + }, + { + "epoch": 0.5457166392092258, + "grad_norm": 0.39974072575569153, + "learning_rate": 1.6569531308145886e-05, + "loss": 0.5063, + "step": 19875 + }, + { + "epoch": 0.5457440966501922, + "grad_norm": 0.392890065908432, + "learning_rate": 1.6569205684008688e-05, + "loss": 0.4573, + "step": 19876 + }, + { + "epoch": 0.5457715540911587, + "grad_norm": 0.4114449620246887, + "learning_rate": 1.656888004761783e-05, + "loss": 0.5142, + "step": 19877 + }, + { + "epoch": 0.5457990115321252, + "grad_norm": 0.38773471117019653, + "learning_rate": 1.6568554398973914e-05, + "loss": 0.5387, + "step": 19878 + }, + { + "epoch": 0.5458264689730917, + "grad_norm": 0.39508503675460815, + "learning_rate": 1.6568228738077542e-05, + "loss": 0.5104, + "step": 19879 + }, + { + "epoch": 0.5458539264140582, + "grad_norm": 0.37887847423553467, + "learning_rate": 1.6567903064929334e-05, + "loss": 0.4798, + "step": 19880 + }, + { + "epoch": 0.5458813838550247, + "grad_norm": 0.3163958191871643, + "learning_rate": 1.6567577379529884e-05, + "loss": 0.4533, + "step": 19881 + }, + { + "epoch": 0.5459088412959913, + "grad_norm": 0.4045231342315674, + "learning_rate": 1.656725168187981e-05, + "loss": 0.4632, + "step": 19882 + }, + { + "epoch": 0.5459362987369577, + "grad_norm": 0.3875863254070282, + "learning_rate": 1.6566925971979716e-05, + "loss": 0.4604, + "step": 19883 + }, + { + "epoch": 0.5459637561779243, + "grad_norm": 0.4426783621311188, + "learning_rate": 1.6566600249830208e-05, + "loss": 0.6009, + "step": 19884 + }, + { + "epoch": 0.5459912136188907, + "grad_norm": 0.3932056725025177, + "learning_rate": 1.6566274515431895e-05, + "loss": 0.4196, + "step": 19885 + }, + { + "epoch": 0.5460186710598572, + "grad_norm": 0.3497641980648041, + "learning_rate": 1.6565948768785383e-05, + "loss": 0.4668, + "step": 19886 + }, + { + "epoch": 0.5460461285008237, + "grad_norm": 0.4184666574001312, + "learning_rate": 1.6565623009891284e-05, + "loss": 0.4272, + "step": 19887 + }, + { + "epoch": 0.5460735859417902, + "grad_norm": 0.3688105642795563, + "learning_rate": 1.65652972387502e-05, + "loss": 0.4213, + "step": 19888 + }, + { + "epoch": 0.5461010433827568, + "grad_norm": 0.3591783940792084, + "learning_rate": 1.6564971455362743e-05, + "loss": 0.4571, + "step": 19889 + }, + { + "epoch": 0.5461285008237232, + "grad_norm": 0.4305286407470703, + "learning_rate": 1.6564645659729514e-05, + "loss": 0.5211, + "step": 19890 + }, + { + "epoch": 0.5461559582646898, + "grad_norm": 0.36632540822029114, + "learning_rate": 1.656431985185113e-05, + "loss": 0.4425, + "step": 19891 + }, + { + "epoch": 0.5461834157056562, + "grad_norm": 0.37213972210884094, + "learning_rate": 1.6563994031728194e-05, + "loss": 0.5397, + "step": 19892 + }, + { + "epoch": 0.5462108731466228, + "grad_norm": 0.40355736017227173, + "learning_rate": 1.6563668199361314e-05, + "loss": 0.487, + "step": 19893 + }, + { + "epoch": 0.5462383305875892, + "grad_norm": 0.3889090120792389, + "learning_rate": 1.65633423547511e-05, + "loss": 0.4844, + "step": 19894 + }, + { + "epoch": 0.5462657880285557, + "grad_norm": 0.4028017222881317, + "learning_rate": 1.6563016497898154e-05, + "loss": 0.5599, + "step": 19895 + }, + { + "epoch": 0.5462932454695223, + "grad_norm": 0.3708517253398895, + "learning_rate": 1.656269062880309e-05, + "loss": 0.4762, + "step": 19896 + }, + { + "epoch": 0.5463207029104887, + "grad_norm": 0.4672447144985199, + "learning_rate": 1.6562364747466512e-05, + "loss": 0.5197, + "step": 19897 + }, + { + "epoch": 0.5463481603514553, + "grad_norm": 0.3863880932331085, + "learning_rate": 1.6562038853889027e-05, + "loss": 0.4877, + "step": 19898 + }, + { + "epoch": 0.5463756177924217, + "grad_norm": 0.3615390956401825, + "learning_rate": 1.6561712948071253e-05, + "loss": 0.5007, + "step": 19899 + }, + { + "epoch": 0.5464030752333883, + "grad_norm": 0.3584060072898865, + "learning_rate": 1.6561387030013784e-05, + "loss": 0.5078, + "step": 19900 + }, + { + "epoch": 0.5464305326743547, + "grad_norm": 0.3494945764541626, + "learning_rate": 1.6561061099717235e-05, + "loss": 0.5717, + "step": 19901 + }, + { + "epoch": 0.5464579901153213, + "grad_norm": 0.4984592795372009, + "learning_rate": 1.656073515718221e-05, + "loss": 0.5021, + "step": 19902 + }, + { + "epoch": 0.5464854475562878, + "grad_norm": 0.5854906439781189, + "learning_rate": 1.6560409202409324e-05, + "loss": 0.5717, + "step": 19903 + }, + { + "epoch": 0.5465129049972542, + "grad_norm": 0.4218493402004242, + "learning_rate": 1.656008323539918e-05, + "loss": 0.5161, + "step": 19904 + }, + { + "epoch": 0.5465403624382208, + "grad_norm": 0.3701759874820709, + "learning_rate": 1.6559757256152388e-05, + "loss": 0.4088, + "step": 19905 + }, + { + "epoch": 0.5465678198791872, + "grad_norm": 0.35595473647117615, + "learning_rate": 1.6559431264669553e-05, + "loss": 0.4858, + "step": 19906 + }, + { + "epoch": 0.5465952773201538, + "grad_norm": 0.4086250364780426, + "learning_rate": 1.655910526095129e-05, + "loss": 0.6159, + "step": 19907 + }, + { + "epoch": 0.5466227347611202, + "grad_norm": 0.43254274129867554, + "learning_rate": 1.65587792449982e-05, + "loss": 0.4831, + "step": 19908 + }, + { + "epoch": 0.5466501922020868, + "grad_norm": 0.3610321581363678, + "learning_rate": 1.655845321681089e-05, + "loss": 0.4254, + "step": 19909 + }, + { + "epoch": 0.5466776496430533, + "grad_norm": 0.3576514720916748, + "learning_rate": 1.6558127176389973e-05, + "loss": 0.5133, + "step": 19910 + }, + { + "epoch": 0.5467051070840198, + "grad_norm": 0.41317903995513916, + "learning_rate": 1.6557801123736056e-05, + "loss": 0.5717, + "step": 19911 + }, + { + "epoch": 0.5467325645249863, + "grad_norm": 0.4436034560203552, + "learning_rate": 1.655747505884975e-05, + "loss": 0.5666, + "step": 19912 + }, + { + "epoch": 0.5467600219659527, + "grad_norm": 0.35503655672073364, + "learning_rate": 1.6557148981731656e-05, + "loss": 0.4905, + "step": 19913 + }, + { + "epoch": 0.5467874794069193, + "grad_norm": 0.38608771562576294, + "learning_rate": 1.655682289238239e-05, + "loss": 0.5559, + "step": 19914 + }, + { + "epoch": 0.5468149368478857, + "grad_norm": 0.36867034435272217, + "learning_rate": 1.6556496790802552e-05, + "loss": 0.5028, + "step": 19915 + }, + { + "epoch": 0.5468423942888523, + "grad_norm": 0.40870118141174316, + "learning_rate": 1.6556170676992754e-05, + "loss": 0.4378, + "step": 19916 + }, + { + "epoch": 0.5468698517298188, + "grad_norm": 0.3659781813621521, + "learning_rate": 1.655584455095361e-05, + "loss": 0.5354, + "step": 19917 + }, + { + "epoch": 0.5468973091707853, + "grad_norm": 0.30427059531211853, + "learning_rate": 1.655551841268572e-05, + "loss": 0.4317, + "step": 19918 + }, + { + "epoch": 0.5469247666117518, + "grad_norm": 0.3560012876987457, + "learning_rate": 1.65551922621897e-05, + "loss": 0.5209, + "step": 19919 + }, + { + "epoch": 0.5469522240527183, + "grad_norm": 0.3907695412635803, + "learning_rate": 1.6554866099466154e-05, + "loss": 0.5049, + "step": 19920 + }, + { + "epoch": 0.5469796814936848, + "grad_norm": 0.41198354959487915, + "learning_rate": 1.6554539924515688e-05, + "loss": 0.5299, + "step": 19921 + }, + { + "epoch": 0.5470071389346512, + "grad_norm": 0.3357371687889099, + "learning_rate": 1.6554213737338915e-05, + "loss": 0.5208, + "step": 19922 + }, + { + "epoch": 0.5470345963756178, + "grad_norm": 0.3806905448436737, + "learning_rate": 1.655388753793644e-05, + "loss": 0.4939, + "step": 19923 + }, + { + "epoch": 0.5470620538165843, + "grad_norm": 0.3641279339790344, + "learning_rate": 1.6553561326308872e-05, + "loss": 0.4343, + "step": 19924 + }, + { + "epoch": 0.5470895112575508, + "grad_norm": 0.4182398021221161, + "learning_rate": 1.6553235102456822e-05, + "loss": 0.5325, + "step": 19925 + }, + { + "epoch": 0.5471169686985173, + "grad_norm": 0.35697558522224426, + "learning_rate": 1.6552908866380898e-05, + "loss": 0.4123, + "step": 19926 + }, + { + "epoch": 0.5471444261394838, + "grad_norm": 0.5132626891136169, + "learning_rate": 1.6552582618081706e-05, + "loss": 0.556, + "step": 19927 + }, + { + "epoch": 0.5471718835804503, + "grad_norm": 0.4633185565471649, + "learning_rate": 1.6552256357559855e-05, + "loss": 0.5683, + "step": 19928 + }, + { + "epoch": 0.5471993410214168, + "grad_norm": 0.3541932702064514, + "learning_rate": 1.6551930084815955e-05, + "loss": 0.5065, + "step": 19929 + }, + { + "epoch": 0.5472267984623833, + "grad_norm": 0.42525404691696167, + "learning_rate": 1.6551603799850618e-05, + "loss": 0.5638, + "step": 19930 + }, + { + "epoch": 0.5472542559033499, + "grad_norm": 0.3838592767715454, + "learning_rate": 1.6551277502664444e-05, + "loss": 0.5113, + "step": 19931 + }, + { + "epoch": 0.5472817133443163, + "grad_norm": 0.3613794147968292, + "learning_rate": 1.6550951193258052e-05, + "loss": 0.458, + "step": 19932 + }, + { + "epoch": 0.5473091707852829, + "grad_norm": 0.36043307185173035, + "learning_rate": 1.655062487163204e-05, + "loss": 0.7077, + "step": 19933 + }, + { + "epoch": 0.5473366282262493, + "grad_norm": 0.3506811559200287, + "learning_rate": 1.6550298537787023e-05, + "loss": 0.506, + "step": 19934 + }, + { + "epoch": 0.5473640856672158, + "grad_norm": 0.4453998804092407, + "learning_rate": 1.654997219172361e-05, + "loss": 0.5744, + "step": 19935 + }, + { + "epoch": 0.5473915431081823, + "grad_norm": 0.3625525236129761, + "learning_rate": 1.6549645833442407e-05, + "loss": 0.435, + "step": 19936 + }, + { + "epoch": 0.5474190005491488, + "grad_norm": 0.45043864846229553, + "learning_rate": 1.6549319462944027e-05, + "loss": 0.5754, + "step": 19937 + }, + { + "epoch": 0.5474464579901154, + "grad_norm": 0.41232365369796753, + "learning_rate": 1.654899308022907e-05, + "loss": 0.4608, + "step": 19938 + }, + { + "epoch": 0.5474739154310818, + "grad_norm": 0.3940925598144531, + "learning_rate": 1.654866668529816e-05, + "loss": 0.4864, + "step": 19939 + }, + { + "epoch": 0.5475013728720484, + "grad_norm": 0.3694845736026764, + "learning_rate": 1.654834027815189e-05, + "loss": 0.4968, + "step": 19940 + }, + { + "epoch": 0.5475288303130148, + "grad_norm": 0.32306110858917236, + "learning_rate": 1.6548013858790873e-05, + "loss": 0.547, + "step": 19941 + }, + { + "epoch": 0.5475562877539814, + "grad_norm": 0.3939873278141022, + "learning_rate": 1.6547687427215726e-05, + "loss": 0.5109, + "step": 19942 + }, + { + "epoch": 0.5475837451949478, + "grad_norm": 0.36899036169052124, + "learning_rate": 1.654736098342705e-05, + "loss": 0.5147, + "step": 19943 + }, + { + "epoch": 0.5476112026359143, + "grad_norm": 0.36131832003593445, + "learning_rate": 1.6547034527425453e-05, + "loss": 0.4444, + "step": 19944 + }, + { + "epoch": 0.5476386600768809, + "grad_norm": 0.3484152555465698, + "learning_rate": 1.654670805921155e-05, + "loss": 0.4486, + "step": 19945 + }, + { + "epoch": 0.5476661175178473, + "grad_norm": 0.37589648365974426, + "learning_rate": 1.6546381578785947e-05, + "loss": 0.4855, + "step": 19946 + }, + { + "epoch": 0.5476935749588139, + "grad_norm": 0.4037702679634094, + "learning_rate": 1.6546055086149255e-05, + "loss": 0.5966, + "step": 19947 + }, + { + "epoch": 0.5477210323997803, + "grad_norm": 0.4214918613433838, + "learning_rate": 1.654572858130208e-05, + "loss": 0.6011, + "step": 19948 + }, + { + "epoch": 0.5477484898407469, + "grad_norm": 0.42284688353538513, + "learning_rate": 1.6545402064245028e-05, + "loss": 0.541, + "step": 19949 + }, + { + "epoch": 0.5477759472817133, + "grad_norm": 0.3538191616535187, + "learning_rate": 1.654507553497872e-05, + "loss": 0.4497, + "step": 19950 + }, + { + "epoch": 0.5478034047226799, + "grad_norm": 0.33278578519821167, + "learning_rate": 1.654474899350375e-05, + "loss": 0.3818, + "step": 19951 + }, + { + "epoch": 0.5478308621636463, + "grad_norm": 0.4145607650279999, + "learning_rate": 1.6544422439820737e-05, + "loss": 0.4852, + "step": 19952 + }, + { + "epoch": 0.5478583196046128, + "grad_norm": 0.38194042444229126, + "learning_rate": 1.6544095873930288e-05, + "loss": 0.518, + "step": 19953 + }, + { + "epoch": 0.5478857770455794, + "grad_norm": 0.49839431047439575, + "learning_rate": 1.6543769295833007e-05, + "loss": 0.5454, + "step": 19954 + }, + { + "epoch": 0.5479132344865458, + "grad_norm": 0.36886119842529297, + "learning_rate": 1.6543442705529513e-05, + "loss": 0.4344, + "step": 19955 + }, + { + "epoch": 0.5479406919275124, + "grad_norm": 0.37153902649879456, + "learning_rate": 1.654311610302041e-05, + "loss": 0.4863, + "step": 19956 + }, + { + "epoch": 0.5479681493684788, + "grad_norm": 0.38288459181785583, + "learning_rate": 1.6542789488306302e-05, + "loss": 0.5403, + "step": 19957 + }, + { + "epoch": 0.5479956068094454, + "grad_norm": 0.34385743737220764, + "learning_rate": 1.654246286138781e-05, + "loss": 0.4741, + "step": 19958 + }, + { + "epoch": 0.5480230642504118, + "grad_norm": 0.3775956928730011, + "learning_rate": 1.6542136222265532e-05, + "loss": 0.4401, + "step": 19959 + }, + { + "epoch": 0.5480505216913784, + "grad_norm": 0.4213370084762573, + "learning_rate": 1.6541809570940084e-05, + "loss": 0.5059, + "step": 19960 + }, + { + "epoch": 0.5480779791323449, + "grad_norm": 0.37350255250930786, + "learning_rate": 1.6541482907412073e-05, + "loss": 0.4938, + "step": 19961 + }, + { + "epoch": 0.5481054365733113, + "grad_norm": 0.3922489881515503, + "learning_rate": 1.6541156231682108e-05, + "loss": 0.5791, + "step": 19962 + }, + { + "epoch": 0.5481328940142779, + "grad_norm": 0.3642899990081787, + "learning_rate": 1.6540829543750797e-05, + "loss": 0.5212, + "step": 19963 + }, + { + "epoch": 0.5481603514552443, + "grad_norm": 0.46141111850738525, + "learning_rate": 1.6540502843618755e-05, + "loss": 0.5354, + "step": 19964 + }, + { + "epoch": 0.5481878088962109, + "grad_norm": 0.4368208944797516, + "learning_rate": 1.6540176131286586e-05, + "loss": 0.5585, + "step": 19965 + }, + { + "epoch": 0.5482152663371773, + "grad_norm": 0.3540128767490387, + "learning_rate": 1.6539849406754902e-05, + "loss": 0.4757, + "step": 19966 + }, + { + "epoch": 0.5482427237781439, + "grad_norm": 0.3513127565383911, + "learning_rate": 1.653952267002431e-05, + "loss": 0.4945, + "step": 19967 + }, + { + "epoch": 0.5482701812191104, + "grad_norm": 0.5294954776763916, + "learning_rate": 1.6539195921095423e-05, + "loss": 0.5971, + "step": 19968 + }, + { + "epoch": 0.5482976386600769, + "grad_norm": 0.49086958169937134, + "learning_rate": 1.6538869159968847e-05, + "loss": 0.5841, + "step": 19969 + }, + { + "epoch": 0.5483250961010434, + "grad_norm": 0.3714213967323303, + "learning_rate": 1.6538542386645193e-05, + "loss": 0.4981, + "step": 19970 + }, + { + "epoch": 0.5483525535420098, + "grad_norm": 0.31804320216178894, + "learning_rate": 1.653821560112507e-05, + "loss": 0.4849, + "step": 19971 + }, + { + "epoch": 0.5483800109829764, + "grad_norm": 0.5123619437217712, + "learning_rate": 1.653788880340909e-05, + "loss": 0.6265, + "step": 19972 + }, + { + "epoch": 0.5484074684239428, + "grad_norm": 0.3769945800304413, + "learning_rate": 1.6537561993497862e-05, + "loss": 0.619, + "step": 19973 + }, + { + "epoch": 0.5484349258649094, + "grad_norm": 0.4603307545185089, + "learning_rate": 1.653723517139199e-05, + "loss": 0.5796, + "step": 19974 + }, + { + "epoch": 0.5484623833058759, + "grad_norm": 0.3400490880012512, + "learning_rate": 1.653690833709209e-05, + "loss": 0.4955, + "step": 19975 + }, + { + "epoch": 0.5484898407468424, + "grad_norm": 0.36047425866127014, + "learning_rate": 1.653658149059877e-05, + "loss": 0.5009, + "step": 19976 + }, + { + "epoch": 0.5485172981878089, + "grad_norm": 0.40989553928375244, + "learning_rate": 1.653625463191264e-05, + "loss": 0.4637, + "step": 19977 + }, + { + "epoch": 0.5485447556287754, + "grad_norm": 0.4031884968280792, + "learning_rate": 1.6535927761034308e-05, + "loss": 0.4905, + "step": 19978 + }, + { + "epoch": 0.5485722130697419, + "grad_norm": 0.38078421354293823, + "learning_rate": 1.6535600877964383e-05, + "loss": 0.4295, + "step": 19979 + }, + { + "epoch": 0.5485996705107083, + "grad_norm": 0.3663323223590851, + "learning_rate": 1.653527398270348e-05, + "loss": 0.5086, + "step": 19980 + }, + { + "epoch": 0.5486271279516749, + "grad_norm": 0.3929852843284607, + "learning_rate": 1.6534947075252205e-05, + "loss": 0.487, + "step": 19981 + }, + { + "epoch": 0.5486545853926414, + "grad_norm": 0.4373931288719177, + "learning_rate": 1.6534620155611164e-05, + "loss": 0.5237, + "step": 19982 + }, + { + "epoch": 0.5486820428336079, + "grad_norm": 0.4501955211162567, + "learning_rate": 1.6534293223780974e-05, + "loss": 0.4773, + "step": 19983 + }, + { + "epoch": 0.5487095002745744, + "grad_norm": 0.4095143675804138, + "learning_rate": 1.6533966279762245e-05, + "loss": 0.4761, + "step": 19984 + }, + { + "epoch": 0.5487369577155409, + "grad_norm": 0.38518980145454407, + "learning_rate": 1.6533639323555575e-05, + "loss": 0.4765, + "step": 19985 + }, + { + "epoch": 0.5487644151565074, + "grad_norm": 0.9824503660202026, + "learning_rate": 1.6533312355161588e-05, + "loss": 0.5444, + "step": 19986 + }, + { + "epoch": 0.5487918725974739, + "grad_norm": 0.3876451253890991, + "learning_rate": 1.6532985374580888e-05, + "loss": 0.5532, + "step": 19987 + }, + { + "epoch": 0.5488193300384404, + "grad_norm": 0.3673728406429291, + "learning_rate": 1.653265838181408e-05, + "loss": 0.5136, + "step": 19988 + }, + { + "epoch": 0.548846787479407, + "grad_norm": 0.3760165572166443, + "learning_rate": 1.6532331376861786e-05, + "loss": 0.4365, + "step": 19989 + }, + { + "epoch": 0.5488742449203734, + "grad_norm": 0.3851488530635834, + "learning_rate": 1.6532004359724608e-05, + "loss": 0.4773, + "step": 19990 + }, + { + "epoch": 0.54890170236134, + "grad_norm": 0.348590224981308, + "learning_rate": 1.6531677330403155e-05, + "loss": 0.4045, + "step": 19991 + }, + { + "epoch": 0.5489291598023064, + "grad_norm": 0.3545083701610565, + "learning_rate": 1.6531350288898038e-05, + "loss": 0.5637, + "step": 19992 + }, + { + "epoch": 0.5489566172432729, + "grad_norm": 0.6794771552085876, + "learning_rate": 1.653102323520987e-05, + "loss": 0.4612, + "step": 19993 + }, + { + "epoch": 0.5489840746842394, + "grad_norm": 0.38179340958595276, + "learning_rate": 1.653069616933926e-05, + "loss": 0.5202, + "step": 19994 + }, + { + "epoch": 0.5490115321252059, + "grad_norm": 0.4083690345287323, + "learning_rate": 1.6530369091286816e-05, + "loss": 0.6058, + "step": 19995 + }, + { + "epoch": 0.5490389895661725, + "grad_norm": 0.3734842836856842, + "learning_rate": 1.653004200105315e-05, + "loss": 0.5235, + "step": 19996 + }, + { + "epoch": 0.5490664470071389, + "grad_norm": 0.37014156579971313, + "learning_rate": 1.6529714898638874e-05, + "loss": 0.4794, + "step": 19997 + }, + { + "epoch": 0.5490939044481055, + "grad_norm": 0.361056387424469, + "learning_rate": 1.652938778404459e-05, + "loss": 0.5904, + "step": 19998 + }, + { + "epoch": 0.5491213618890719, + "grad_norm": 0.39034304022789, + "learning_rate": 1.6529060657270917e-05, + "loss": 0.4734, + "step": 19999 + }, + { + "epoch": 0.5491488193300385, + "grad_norm": 0.36120620369911194, + "learning_rate": 1.652873351831846e-05, + "loss": 0.4637, + "step": 20000 + }, + { + "epoch": 0.5491762767710049, + "grad_norm": 0.3380375802516937, + "learning_rate": 1.6528406367187836e-05, + "loss": 0.4947, + "step": 20001 + }, + { + "epoch": 0.5492037342119714, + "grad_norm": 0.8314143419265747, + "learning_rate": 1.652807920387965e-05, + "loss": 0.4797, + "step": 20002 + }, + { + "epoch": 0.549231191652938, + "grad_norm": 0.3772182762622833, + "learning_rate": 1.6527752028394506e-05, + "loss": 0.4366, + "step": 20003 + }, + { + "epoch": 0.5492586490939044, + "grad_norm": 0.38172775506973267, + "learning_rate": 1.6527424840733027e-05, + "loss": 0.4718, + "step": 20004 + }, + { + "epoch": 0.549286106534871, + "grad_norm": 0.3680098056793213, + "learning_rate": 1.6527097640895812e-05, + "loss": 0.5151, + "step": 20005 + }, + { + "epoch": 0.5493135639758374, + "grad_norm": 0.34811854362487793, + "learning_rate": 1.652677042888348e-05, + "loss": 0.5155, + "step": 20006 + }, + { + "epoch": 0.549341021416804, + "grad_norm": 0.3689127564430237, + "learning_rate": 1.6526443204696644e-05, + "loss": 0.5305, + "step": 20007 + }, + { + "epoch": 0.5493684788577704, + "grad_norm": 0.43038809299468994, + "learning_rate": 1.65261159683359e-05, + "loss": 0.5025, + "step": 20008 + }, + { + "epoch": 0.549395936298737, + "grad_norm": 0.4039490222930908, + "learning_rate": 1.652578871980187e-05, + "loss": 0.5353, + "step": 20009 + }, + { + "epoch": 0.5494233937397035, + "grad_norm": 0.37857624888420105, + "learning_rate": 1.652546145909516e-05, + "loss": 0.4927, + "step": 20010 + }, + { + "epoch": 0.5494508511806699, + "grad_norm": 0.38521790504455566, + "learning_rate": 1.6525134186216384e-05, + "loss": 0.4383, + "step": 20011 + }, + { + "epoch": 0.5494783086216365, + "grad_norm": 0.3654613196849823, + "learning_rate": 1.652480690116615e-05, + "loss": 0.4851, + "step": 20012 + }, + { + "epoch": 0.5495057660626029, + "grad_norm": 0.368190199136734, + "learning_rate": 1.6524479603945066e-05, + "loss": 0.5419, + "step": 20013 + }, + { + "epoch": 0.5495332235035695, + "grad_norm": 0.34037917852401733, + "learning_rate": 1.6524152294553748e-05, + "loss": 0.5214, + "step": 20014 + }, + { + "epoch": 0.5495606809445359, + "grad_norm": 0.3990129232406616, + "learning_rate": 1.6523824972992803e-05, + "loss": 0.54, + "step": 20015 + }, + { + "epoch": 0.5495881383855025, + "grad_norm": 0.3980567157268524, + "learning_rate": 1.6523497639262837e-05, + "loss": 0.5447, + "step": 20016 + }, + { + "epoch": 0.549615595826469, + "grad_norm": 0.3174089193344116, + "learning_rate": 1.6523170293364473e-05, + "loss": 0.4117, + "step": 20017 + }, + { + "epoch": 0.5496430532674355, + "grad_norm": 0.39309269189834595, + "learning_rate": 1.6522842935298312e-05, + "loss": 0.5208, + "step": 20018 + }, + { + "epoch": 0.549670510708402, + "grad_norm": 0.41756364703178406, + "learning_rate": 1.6522515565064967e-05, + "loss": 0.5971, + "step": 20019 + }, + { + "epoch": 0.5496979681493684, + "grad_norm": 0.3815871775150299, + "learning_rate": 1.652218818266505e-05, + "loss": 0.5244, + "step": 20020 + }, + { + "epoch": 0.549725425590335, + "grad_norm": 0.3868386149406433, + "learning_rate": 1.6521860788099165e-05, + "loss": 0.5916, + "step": 20021 + }, + { + "epoch": 0.5497528830313014, + "grad_norm": 0.3638834059238434, + "learning_rate": 1.6521533381367936e-05, + "loss": 0.481, + "step": 20022 + }, + { + "epoch": 0.549780340472268, + "grad_norm": 0.39774858951568604, + "learning_rate": 1.652120596247196e-05, + "loss": 0.5566, + "step": 20023 + }, + { + "epoch": 0.5498077979132345, + "grad_norm": 0.3977053761482239, + "learning_rate": 1.652087853141186e-05, + "loss": 0.4616, + "step": 20024 + }, + { + "epoch": 0.549835255354201, + "grad_norm": 0.380033940076828, + "learning_rate": 1.6520551088188234e-05, + "loss": 0.5125, + "step": 20025 + }, + { + "epoch": 0.5498627127951675, + "grad_norm": 0.3897496461868286, + "learning_rate": 1.6520223632801697e-05, + "loss": 0.5308, + "step": 20026 + }, + { + "epoch": 0.549890170236134, + "grad_norm": 0.500124454498291, + "learning_rate": 1.651989616525287e-05, + "loss": 0.5877, + "step": 20027 + }, + { + "epoch": 0.5499176276771005, + "grad_norm": 0.3661353588104248, + "learning_rate": 1.651956868554235e-05, + "loss": 0.4939, + "step": 20028 + }, + { + "epoch": 0.549945085118067, + "grad_norm": 0.39508748054504395, + "learning_rate": 1.6519241193670754e-05, + "loss": 0.6036, + "step": 20029 + }, + { + "epoch": 0.5499725425590335, + "grad_norm": 0.3221937417984009, + "learning_rate": 1.6518913689638692e-05, + "loss": 0.4767, + "step": 20030 + }, + { + "epoch": 0.55, + "grad_norm": 0.3603828549385071, + "learning_rate": 1.651858617344678e-05, + "loss": 0.5051, + "step": 20031 + }, + { + "epoch": 0.5500274574409665, + "grad_norm": 0.3769824802875519, + "learning_rate": 1.651825864509562e-05, + "loss": 0.5751, + "step": 20032 + }, + { + "epoch": 0.550054914881933, + "grad_norm": 0.3834088444709778, + "learning_rate": 1.651793110458583e-05, + "loss": 0.4872, + "step": 20033 + }, + { + "epoch": 0.5500823723228995, + "grad_norm": 0.40044623613357544, + "learning_rate": 1.6517603551918016e-05, + "loss": 0.4601, + "step": 20034 + }, + { + "epoch": 0.550109829763866, + "grad_norm": 0.5327764749526978, + "learning_rate": 1.6517275987092793e-05, + "loss": 0.5834, + "step": 20035 + }, + { + "epoch": 0.5501372872048325, + "grad_norm": 0.37087032198905945, + "learning_rate": 1.651694841011077e-05, + "loss": 0.528, + "step": 20036 + }, + { + "epoch": 0.550164744645799, + "grad_norm": 0.36727747321128845, + "learning_rate": 1.6516620820972554e-05, + "loss": 0.505, + "step": 20037 + }, + { + "epoch": 0.5501922020867656, + "grad_norm": 0.3190910816192627, + "learning_rate": 1.6516293219678768e-05, + "loss": 0.5048, + "step": 20038 + }, + { + "epoch": 0.550219659527732, + "grad_norm": 0.40944188833236694, + "learning_rate": 1.6515965606230008e-05, + "loss": 0.6021, + "step": 20039 + }, + { + "epoch": 0.5502471169686985, + "grad_norm": 0.49952229857444763, + "learning_rate": 1.65156379806269e-05, + "loss": 0.6147, + "step": 20040 + }, + { + "epoch": 0.550274574409665, + "grad_norm": 0.371192067861557, + "learning_rate": 1.6515310342870038e-05, + "loss": 0.6078, + "step": 20041 + }, + { + "epoch": 0.5503020318506315, + "grad_norm": 0.34411999583244324, + "learning_rate": 1.6514982692960047e-05, + "loss": 0.479, + "step": 20042 + }, + { + "epoch": 0.550329489291598, + "grad_norm": 0.33148688077926636, + "learning_rate": 1.651465503089754e-05, + "loss": 0.5092, + "step": 20043 + }, + { + "epoch": 0.5503569467325645, + "grad_norm": 0.3717426061630249, + "learning_rate": 1.6514327356683114e-05, + "loss": 0.4716, + "step": 20044 + }, + { + "epoch": 0.5503844041735311, + "grad_norm": 0.36683133244514465, + "learning_rate": 1.6513999670317394e-05, + "loss": 0.4692, + "step": 20045 + }, + { + "epoch": 0.5504118616144975, + "grad_norm": 0.3692915737628937, + "learning_rate": 1.6513671971800983e-05, + "loss": 0.5044, + "step": 20046 + }, + { + "epoch": 0.5504393190554641, + "grad_norm": 0.4241466224193573, + "learning_rate": 1.6513344261134495e-05, + "loss": 0.5323, + "step": 20047 + }, + { + "epoch": 0.5504667764964305, + "grad_norm": 0.36769038438796997, + "learning_rate": 1.651301653831854e-05, + "loss": 0.4786, + "step": 20048 + }, + { + "epoch": 0.550494233937397, + "grad_norm": 0.4476281702518463, + "learning_rate": 1.651268880335373e-05, + "loss": 0.5607, + "step": 20049 + }, + { + "epoch": 0.5505216913783635, + "grad_norm": 0.37187573313713074, + "learning_rate": 1.651236105624068e-05, + "loss": 0.5287, + "step": 20050 + }, + { + "epoch": 0.55054914881933, + "grad_norm": 0.3753018379211426, + "learning_rate": 1.6512033296979994e-05, + "loss": 0.4092, + "step": 20051 + }, + { + "epoch": 0.5505766062602966, + "grad_norm": 0.39985421299934387, + "learning_rate": 1.651170552557229e-05, + "loss": 0.5441, + "step": 20052 + }, + { + "epoch": 0.550604063701263, + "grad_norm": 0.36357244849205017, + "learning_rate": 1.6511377742018178e-05, + "loss": 0.5332, + "step": 20053 + }, + { + "epoch": 0.5506315211422296, + "grad_norm": 0.3607281446456909, + "learning_rate": 1.6511049946318266e-05, + "loss": 0.5544, + "step": 20054 + }, + { + "epoch": 0.550658978583196, + "grad_norm": 0.3391963243484497, + "learning_rate": 1.651072213847317e-05, + "loss": 0.4539, + "step": 20055 + }, + { + "epoch": 0.5506864360241626, + "grad_norm": 0.43458035588264465, + "learning_rate": 1.6510394318483498e-05, + "loss": 0.4617, + "step": 20056 + }, + { + "epoch": 0.550713893465129, + "grad_norm": 0.37135159969329834, + "learning_rate": 1.6510066486349862e-05, + "loss": 0.5056, + "step": 20057 + }, + { + "epoch": 0.5507413509060956, + "grad_norm": 0.35332560539245605, + "learning_rate": 1.6509738642072878e-05, + "loss": 0.4339, + "step": 20058 + }, + { + "epoch": 0.5507688083470621, + "grad_norm": 0.37952399253845215, + "learning_rate": 1.6509410785653147e-05, + "loss": 0.4894, + "step": 20059 + }, + { + "epoch": 0.5507962657880285, + "grad_norm": 0.3433246910572052, + "learning_rate": 1.650908291709129e-05, + "loss": 0.4539, + "step": 20060 + }, + { + "epoch": 0.5508237232289951, + "grad_norm": 0.3899567723274231, + "learning_rate": 1.650875503638792e-05, + "loss": 0.5184, + "step": 20061 + }, + { + "epoch": 0.5508511806699615, + "grad_norm": 0.4263851046562195, + "learning_rate": 1.650842714354364e-05, + "loss": 0.5597, + "step": 20062 + }, + { + "epoch": 0.5508786381109281, + "grad_norm": 0.3864534795284271, + "learning_rate": 1.6508099238559067e-05, + "loss": 0.4951, + "step": 20063 + }, + { + "epoch": 0.5509060955518945, + "grad_norm": 0.5819922685623169, + "learning_rate": 1.6507771321434813e-05, + "loss": 0.533, + "step": 20064 + }, + { + "epoch": 0.5509335529928611, + "grad_norm": 0.31596049666404724, + "learning_rate": 1.6507443392171486e-05, + "loss": 0.3735, + "step": 20065 + }, + { + "epoch": 0.5509610104338276, + "grad_norm": 0.377443790435791, + "learning_rate": 1.6507115450769705e-05, + "loss": 0.486, + "step": 20066 + }, + { + "epoch": 0.550988467874794, + "grad_norm": 0.46061384677886963, + "learning_rate": 1.6506787497230075e-05, + "loss": 0.5749, + "step": 20067 + }, + { + "epoch": 0.5510159253157606, + "grad_norm": 0.38082119822502136, + "learning_rate": 1.650645953155321e-05, + "loss": 0.5199, + "step": 20068 + }, + { + "epoch": 0.551043382756727, + "grad_norm": 0.40951216220855713, + "learning_rate": 1.6506131553739718e-05, + "loss": 0.5188, + "step": 20069 + }, + { + "epoch": 0.5510708401976936, + "grad_norm": 0.3669876158237457, + "learning_rate": 1.6505803563790214e-05, + "loss": 0.4213, + "step": 20070 + }, + { + "epoch": 0.55109829763866, + "grad_norm": 0.37700650095939636, + "learning_rate": 1.6505475561705313e-05, + "loss": 0.5001, + "step": 20071 + }, + { + "epoch": 0.5511257550796266, + "grad_norm": 0.3806541860103607, + "learning_rate": 1.650514754748562e-05, + "loss": 0.633, + "step": 20072 + }, + { + "epoch": 0.5511532125205931, + "grad_norm": 0.4037267863750458, + "learning_rate": 1.6504819521131756e-05, + "loss": 0.456, + "step": 20073 + }, + { + "epoch": 0.5511806699615596, + "grad_norm": 0.41256874799728394, + "learning_rate": 1.6504491482644326e-05, + "loss": 0.4879, + "step": 20074 + }, + { + "epoch": 0.5512081274025261, + "grad_norm": 0.3532904088497162, + "learning_rate": 1.650416343202394e-05, + "loss": 0.3871, + "step": 20075 + }, + { + "epoch": 0.5512355848434926, + "grad_norm": 0.36304399371147156, + "learning_rate": 1.6503835369271215e-05, + "loss": 0.4851, + "step": 20076 + }, + { + "epoch": 0.5512630422844591, + "grad_norm": 0.3868916928768158, + "learning_rate": 1.6503507294386763e-05, + "loss": 0.5403, + "step": 20077 + }, + { + "epoch": 0.5512904997254255, + "grad_norm": 0.3792324662208557, + "learning_rate": 1.6503179207371193e-05, + "loss": 0.5279, + "step": 20078 + }, + { + "epoch": 0.5513179571663921, + "grad_norm": 0.3800071179866791, + "learning_rate": 1.650285110822512e-05, + "loss": 0.5332, + "step": 20079 + }, + { + "epoch": 0.5513454146073586, + "grad_norm": 0.402853399515152, + "learning_rate": 1.650252299694915e-05, + "loss": 0.503, + "step": 20080 + }, + { + "epoch": 0.5513728720483251, + "grad_norm": 0.38800516724586487, + "learning_rate": 1.6502194873543902e-05, + "loss": 0.4937, + "step": 20081 + }, + { + "epoch": 0.5514003294892916, + "grad_norm": 0.3792102038860321, + "learning_rate": 1.6501866738009984e-05, + "loss": 0.4799, + "step": 20082 + }, + { + "epoch": 0.5514277869302581, + "grad_norm": 0.5356727242469788, + "learning_rate": 1.6501538590348014e-05, + "loss": 0.5811, + "step": 20083 + }, + { + "epoch": 0.5514552443712246, + "grad_norm": 0.3773467540740967, + "learning_rate": 1.650121043055859e-05, + "loss": 0.4712, + "step": 20084 + }, + { + "epoch": 0.5514827018121911, + "grad_norm": 0.3755953013896942, + "learning_rate": 1.650088225864234e-05, + "loss": 0.5593, + "step": 20085 + }, + { + "epoch": 0.5515101592531576, + "grad_norm": 0.38026559352874756, + "learning_rate": 1.650055407459987e-05, + "loss": 0.5286, + "step": 20086 + }, + { + "epoch": 0.5515376166941242, + "grad_norm": 0.393631249666214, + "learning_rate": 1.650022587843179e-05, + "loss": 0.496, + "step": 20087 + }, + { + "epoch": 0.5515650741350906, + "grad_norm": 0.3675176203250885, + "learning_rate": 1.6499897670138718e-05, + "loss": 0.4561, + "step": 20088 + }, + { + "epoch": 0.5515925315760571, + "grad_norm": 0.346792072057724, + "learning_rate": 1.6499569449721256e-05, + "loss": 0.4357, + "step": 20089 + }, + { + "epoch": 0.5516199890170236, + "grad_norm": 0.3499426245689392, + "learning_rate": 1.649924121718003e-05, + "loss": 0.5146, + "step": 20090 + }, + { + "epoch": 0.5516474464579901, + "grad_norm": 0.3559553027153015, + "learning_rate": 1.649891297251564e-05, + "loss": 0.4824, + "step": 20091 + }, + { + "epoch": 0.5516749038989566, + "grad_norm": 0.3966233730316162, + "learning_rate": 1.6498584715728703e-05, + "loss": 0.4536, + "step": 20092 + }, + { + "epoch": 0.5517023613399231, + "grad_norm": 0.3696562945842743, + "learning_rate": 1.649825644681983e-05, + "loss": 0.5782, + "step": 20093 + }, + { + "epoch": 0.5517298187808897, + "grad_norm": 0.39399251341819763, + "learning_rate": 1.6497928165789637e-05, + "loss": 0.491, + "step": 20094 + }, + { + "epoch": 0.5517572762218561, + "grad_norm": 0.3877287805080414, + "learning_rate": 1.6497599872638736e-05, + "loss": 0.5827, + "step": 20095 + }, + { + "epoch": 0.5517847336628227, + "grad_norm": 0.3807818293571472, + "learning_rate": 1.6497271567367733e-05, + "loss": 0.5399, + "step": 20096 + }, + { + "epoch": 0.5518121911037891, + "grad_norm": 0.3848816454410553, + "learning_rate": 1.649694324997725e-05, + "loss": 0.5023, + "step": 20097 + }, + { + "epoch": 0.5518396485447556, + "grad_norm": 0.3904176354408264, + "learning_rate": 1.6496614920467894e-05, + "loss": 0.4987, + "step": 20098 + }, + { + "epoch": 0.5518671059857221, + "grad_norm": 0.3425867259502411, + "learning_rate": 1.649628657884027e-05, + "loss": 0.4905, + "step": 20099 + }, + { + "epoch": 0.5518945634266886, + "grad_norm": 0.40030649304389954, + "learning_rate": 1.649595822509501e-05, + "loss": 0.4511, + "step": 20100 + }, + { + "epoch": 0.5519220208676552, + "grad_norm": 0.35986724495887756, + "learning_rate": 1.6495629859232706e-05, + "loss": 0.5015, + "step": 20101 + }, + { + "epoch": 0.5519494783086216, + "grad_norm": 0.38332024216651917, + "learning_rate": 1.649530148125398e-05, + "loss": 0.5208, + "step": 20102 + }, + { + "epoch": 0.5519769357495882, + "grad_norm": 0.45404988527297974, + "learning_rate": 1.6494973091159444e-05, + "loss": 0.4997, + "step": 20103 + }, + { + "epoch": 0.5520043931905546, + "grad_norm": 0.38846924901008606, + "learning_rate": 1.649464468894971e-05, + "loss": 0.524, + "step": 20104 + }, + { + "epoch": 0.5520318506315212, + "grad_norm": 0.35313335061073303, + "learning_rate": 1.6494316274625394e-05, + "loss": 0.464, + "step": 20105 + }, + { + "epoch": 0.5520593080724876, + "grad_norm": 0.6483606696128845, + "learning_rate": 1.6493987848187104e-05, + "loss": 0.6209, + "step": 20106 + }, + { + "epoch": 0.5520867655134541, + "grad_norm": 0.40844833850860596, + "learning_rate": 1.6493659409635456e-05, + "loss": 0.5482, + "step": 20107 + }, + { + "epoch": 0.5521142229544207, + "grad_norm": 0.371170312166214, + "learning_rate": 1.6493330958971054e-05, + "loss": 0.5611, + "step": 20108 + }, + { + "epoch": 0.5521416803953871, + "grad_norm": 0.41387030482292175, + "learning_rate": 1.649300249619452e-05, + "loss": 0.5135, + "step": 20109 + }, + { + "epoch": 0.5521691378363537, + "grad_norm": 0.37586623430252075, + "learning_rate": 1.6492674021306468e-05, + "loss": 0.5031, + "step": 20110 + }, + { + "epoch": 0.5521965952773201, + "grad_norm": 0.37025442719459534, + "learning_rate": 1.6492345534307503e-05, + "loss": 0.4481, + "step": 20111 + }, + { + "epoch": 0.5522240527182867, + "grad_norm": 0.3399997353553772, + "learning_rate": 1.6492017035198245e-05, + "loss": 0.4974, + "step": 20112 + }, + { + "epoch": 0.5522515101592531, + "grad_norm": 0.3896021544933319, + "learning_rate": 1.64916885239793e-05, + "loss": 0.5862, + "step": 20113 + }, + { + "epoch": 0.5522789676002197, + "grad_norm": 0.3528329133987427, + "learning_rate": 1.6491360000651284e-05, + "loss": 0.5606, + "step": 20114 + }, + { + "epoch": 0.5523064250411862, + "grad_norm": 0.39975109696388245, + "learning_rate": 1.649103146521481e-05, + "loss": 0.5135, + "step": 20115 + }, + { + "epoch": 0.5523338824821526, + "grad_norm": 0.3727113604545593, + "learning_rate": 1.649070291767049e-05, + "loss": 0.4772, + "step": 20116 + }, + { + "epoch": 0.5523613399231192, + "grad_norm": 0.3703918755054474, + "learning_rate": 1.6490374358018942e-05, + "loss": 0.5031, + "step": 20117 + }, + { + "epoch": 0.5523887973640856, + "grad_norm": 0.3716111481189728, + "learning_rate": 1.6490045786260772e-05, + "loss": 0.4968, + "step": 20118 + }, + { + "epoch": 0.5524162548050522, + "grad_norm": 0.3828426003456116, + "learning_rate": 1.648971720239659e-05, + "loss": 0.5466, + "step": 20119 + }, + { + "epoch": 0.5524437122460186, + "grad_norm": 0.38084739446640015, + "learning_rate": 1.648938860642702e-05, + "loss": 0.5146, + "step": 20120 + }, + { + "epoch": 0.5524711696869852, + "grad_norm": 0.38489416241645813, + "learning_rate": 1.6489059998352668e-05, + "loss": 0.4457, + "step": 20121 + }, + { + "epoch": 0.5524986271279517, + "grad_norm": 0.4236896336078644, + "learning_rate": 1.6488731378174148e-05, + "loss": 0.4164, + "step": 20122 + }, + { + "epoch": 0.5525260845689182, + "grad_norm": 0.38429537415504456, + "learning_rate": 1.6488402745892075e-05, + "loss": 0.5335, + "step": 20123 + }, + { + "epoch": 0.5525535420098847, + "grad_norm": 0.37772315740585327, + "learning_rate": 1.6488074101507054e-05, + "loss": 0.5199, + "step": 20124 + }, + { + "epoch": 0.5525809994508512, + "grad_norm": 0.32997050881385803, + "learning_rate": 1.6487745445019707e-05, + "loss": 0.4016, + "step": 20125 + }, + { + "epoch": 0.5526084568918177, + "grad_norm": 0.3660522997379303, + "learning_rate": 1.6487416776430648e-05, + "loss": 0.5462, + "step": 20126 + }, + { + "epoch": 0.5526359143327841, + "grad_norm": 0.39341914653778076, + "learning_rate": 1.648708809574048e-05, + "loss": 0.547, + "step": 20127 + }, + { + "epoch": 0.5526633717737507, + "grad_norm": 0.4059383273124695, + "learning_rate": 1.6486759402949827e-05, + "loss": 0.4632, + "step": 20128 + }, + { + "epoch": 0.5526908292147172, + "grad_norm": 0.399560809135437, + "learning_rate": 1.6486430698059294e-05, + "loss": 0.4935, + "step": 20129 + }, + { + "epoch": 0.5527182866556837, + "grad_norm": 0.3889644742012024, + "learning_rate": 1.64861019810695e-05, + "loss": 0.521, + "step": 20130 + }, + { + "epoch": 0.5527457440966502, + "grad_norm": 0.34877192974090576, + "learning_rate": 1.6485773251981053e-05, + "loss": 0.5288, + "step": 20131 + }, + { + "epoch": 0.5527732015376167, + "grad_norm": 0.3701987862586975, + "learning_rate": 1.6485444510794572e-05, + "loss": 0.5434, + "step": 20132 + }, + { + "epoch": 0.5528006589785832, + "grad_norm": 0.3864341080188751, + "learning_rate": 1.6485115757510665e-05, + "loss": 0.4969, + "step": 20133 + }, + { + "epoch": 0.5528281164195497, + "grad_norm": 0.39359405636787415, + "learning_rate": 1.648478699212995e-05, + "loss": 0.4887, + "step": 20134 + }, + { + "epoch": 0.5528555738605162, + "grad_norm": 0.37667250633239746, + "learning_rate": 1.6484458214653032e-05, + "loss": 0.4567, + "step": 20135 + }, + { + "epoch": 0.5528830313014828, + "grad_norm": 0.3994331955909729, + "learning_rate": 1.6484129425080537e-05, + "loss": 0.5266, + "step": 20136 + }, + { + "epoch": 0.5529104887424492, + "grad_norm": 0.3508080542087555, + "learning_rate": 1.6483800623413064e-05, + "loss": 0.496, + "step": 20137 + }, + { + "epoch": 0.5529379461834157, + "grad_norm": 0.3907933533191681, + "learning_rate": 1.6483471809651238e-05, + "loss": 0.4908, + "step": 20138 + }, + { + "epoch": 0.5529654036243822, + "grad_norm": 0.3922945559024811, + "learning_rate": 1.648314298379567e-05, + "loss": 0.4738, + "step": 20139 + }, + { + "epoch": 0.5529928610653487, + "grad_norm": 0.4048019349575043, + "learning_rate": 1.6482814145846967e-05, + "loss": 0.516, + "step": 20140 + }, + { + "epoch": 0.5530203185063152, + "grad_norm": 0.3539634346961975, + "learning_rate": 1.6482485295805748e-05, + "loss": 0.5106, + "step": 20141 + }, + { + "epoch": 0.5530477759472817, + "grad_norm": 0.3490460515022278, + "learning_rate": 1.648215643367262e-05, + "loss": 0.5639, + "step": 20142 + }, + { + "epoch": 0.5530752333882483, + "grad_norm": 0.8991808295249939, + "learning_rate": 1.648182755944821e-05, + "loss": 0.5663, + "step": 20143 + }, + { + "epoch": 0.5531026908292147, + "grad_norm": 0.37436914443969727, + "learning_rate": 1.6481498673133115e-05, + "loss": 0.4932, + "step": 20144 + }, + { + "epoch": 0.5531301482701813, + "grad_norm": 0.34102290868759155, + "learning_rate": 1.648116977472796e-05, + "loss": 0.4246, + "step": 20145 + }, + { + "epoch": 0.5531576057111477, + "grad_norm": 0.38094717264175415, + "learning_rate": 1.6480840864233357e-05, + "loss": 0.5437, + "step": 20146 + }, + { + "epoch": 0.5531850631521142, + "grad_norm": 0.36383137106895447, + "learning_rate": 1.6480511941649915e-05, + "loss": 0.4531, + "step": 20147 + }, + { + "epoch": 0.5532125205930807, + "grad_norm": 0.3569903075695038, + "learning_rate": 1.648018300697825e-05, + "loss": 0.5086, + "step": 20148 + }, + { + "epoch": 0.5532399780340472, + "grad_norm": 0.3627673387527466, + "learning_rate": 1.6479854060218976e-05, + "loss": 0.5018, + "step": 20149 + }, + { + "epoch": 0.5532674354750138, + "grad_norm": 1.430594563484192, + "learning_rate": 1.6479525101372705e-05, + "loss": 0.4988, + "step": 20150 + }, + { + "epoch": 0.5532948929159802, + "grad_norm": 0.34283581376075745, + "learning_rate": 1.647919613044005e-05, + "loss": 0.4482, + "step": 20151 + }, + { + "epoch": 0.5533223503569468, + "grad_norm": 0.37608855962753296, + "learning_rate": 1.647886714742163e-05, + "loss": 0.457, + "step": 20152 + }, + { + "epoch": 0.5533498077979132, + "grad_norm": 0.4209042489528656, + "learning_rate": 1.6478538152318054e-05, + "loss": 0.5435, + "step": 20153 + }, + { + "epoch": 0.5533772652388798, + "grad_norm": 0.3934909999370575, + "learning_rate": 1.6478209145129936e-05, + "loss": 0.5019, + "step": 20154 + }, + { + "epoch": 0.5534047226798462, + "grad_norm": 0.3750247061252594, + "learning_rate": 1.647788012585789e-05, + "loss": 0.5321, + "step": 20155 + }, + { + "epoch": 0.5534321801208127, + "grad_norm": 0.4152064323425293, + "learning_rate": 1.647755109450253e-05, + "loss": 0.5454, + "step": 20156 + }, + { + "epoch": 0.5534596375617793, + "grad_norm": 0.349907249212265, + "learning_rate": 1.647722205106447e-05, + "loss": 0.469, + "step": 20157 + }, + { + "epoch": 0.5534870950027457, + "grad_norm": 0.5042437314987183, + "learning_rate": 1.6476892995544324e-05, + "loss": 0.6294, + "step": 20158 + }, + { + "epoch": 0.5535145524437123, + "grad_norm": 0.4379086494445801, + "learning_rate": 1.6476563927942706e-05, + "loss": 0.442, + "step": 20159 + }, + { + "epoch": 0.5535420098846787, + "grad_norm": 0.39005881547927856, + "learning_rate": 1.647623484826023e-05, + "loss": 0.5591, + "step": 20160 + }, + { + "epoch": 0.5535694673256453, + "grad_norm": 0.45695826411247253, + "learning_rate": 1.6475905756497505e-05, + "loss": 0.5648, + "step": 20161 + }, + { + "epoch": 0.5535969247666117, + "grad_norm": 0.38863369822502136, + "learning_rate": 1.6475576652655152e-05, + "loss": 0.4341, + "step": 20162 + }, + { + "epoch": 0.5536243822075783, + "grad_norm": 0.4194428622722626, + "learning_rate": 1.647524753673378e-05, + "loss": 0.498, + "step": 20163 + }, + { + "epoch": 0.5536518396485448, + "grad_norm": 0.36987099051475525, + "learning_rate": 1.6474918408734007e-05, + "loss": 0.4219, + "step": 20164 + }, + { + "epoch": 0.5536792970895112, + "grad_norm": 0.4030095934867859, + "learning_rate": 1.647458926865644e-05, + "loss": 0.5006, + "step": 20165 + }, + { + "epoch": 0.5537067545304778, + "grad_norm": 0.41510286927223206, + "learning_rate": 1.6474260116501704e-05, + "loss": 0.5771, + "step": 20166 + }, + { + "epoch": 0.5537342119714442, + "grad_norm": 0.4058142900466919, + "learning_rate": 1.64739309522704e-05, + "loss": 0.5397, + "step": 20167 + }, + { + "epoch": 0.5537616694124108, + "grad_norm": 0.41471534967422485, + "learning_rate": 1.6473601775963156e-05, + "loss": 0.529, + "step": 20168 + }, + { + "epoch": 0.5537891268533772, + "grad_norm": 0.3378960192203522, + "learning_rate": 1.6473272587580576e-05, + "loss": 0.4891, + "step": 20169 + }, + { + "epoch": 0.5538165842943438, + "grad_norm": 0.33718016743659973, + "learning_rate": 1.6472943387123273e-05, + "loss": 0.4271, + "step": 20170 + }, + { + "epoch": 0.5538440417353103, + "grad_norm": 0.33630335330963135, + "learning_rate": 1.6472614174591868e-05, + "loss": 0.4647, + "step": 20171 + }, + { + "epoch": 0.5538714991762768, + "grad_norm": 0.32663705945014954, + "learning_rate": 1.647228494998697e-05, + "loss": 0.3977, + "step": 20172 + }, + { + "epoch": 0.5538989566172433, + "grad_norm": 0.385714054107666, + "learning_rate": 1.6471955713309197e-05, + "loss": 0.4813, + "step": 20173 + }, + { + "epoch": 0.5539264140582097, + "grad_norm": 0.3425789773464203, + "learning_rate": 1.6471626464559158e-05, + "loss": 0.3523, + "step": 20174 + }, + { + "epoch": 0.5539538714991763, + "grad_norm": 0.41745099425315857, + "learning_rate": 1.647129720373747e-05, + "loss": 0.4518, + "step": 20175 + }, + { + "epoch": 0.5539813289401427, + "grad_norm": 0.37504270672798157, + "learning_rate": 1.6470967930844752e-05, + "loss": 0.4461, + "step": 20176 + }, + { + "epoch": 0.5540087863811093, + "grad_norm": 0.3643954396247864, + "learning_rate": 1.6470638645881608e-05, + "loss": 0.4122, + "step": 20177 + }, + { + "epoch": 0.5540362438220758, + "grad_norm": 0.4053237736225128, + "learning_rate": 1.647030934884866e-05, + "loss": 0.5481, + "step": 20178 + }, + { + "epoch": 0.5540637012630423, + "grad_norm": 0.5372850298881531, + "learning_rate": 1.646998003974652e-05, + "loss": 0.4937, + "step": 20179 + }, + { + "epoch": 0.5540911587040088, + "grad_norm": 0.3336838185787201, + "learning_rate": 1.6469650718575802e-05, + "loss": 0.4375, + "step": 20180 + }, + { + "epoch": 0.5541186161449753, + "grad_norm": 0.3654647767543793, + "learning_rate": 1.6469321385337123e-05, + "loss": 0.4984, + "step": 20181 + }, + { + "epoch": 0.5541460735859418, + "grad_norm": 0.4210224449634552, + "learning_rate": 1.646899204003109e-05, + "loss": 0.444, + "step": 20182 + }, + { + "epoch": 0.5541735310269083, + "grad_norm": 0.3753102421760559, + "learning_rate": 1.646866268265833e-05, + "loss": 0.4679, + "step": 20183 + }, + { + "epoch": 0.5542009884678748, + "grad_norm": 0.35331031680107117, + "learning_rate": 1.646833331321944e-05, + "loss": 0.4725, + "step": 20184 + }, + { + "epoch": 0.5542284459088413, + "grad_norm": 0.3715324401855469, + "learning_rate": 1.646800393171505e-05, + "loss": 0.4497, + "step": 20185 + }, + { + "epoch": 0.5542559033498078, + "grad_norm": 0.36299705505371094, + "learning_rate": 1.6467674538145765e-05, + "loss": 0.5811, + "step": 20186 + }, + { + "epoch": 0.5542833607907743, + "grad_norm": 0.4170243442058563, + "learning_rate": 1.6467345132512206e-05, + "loss": 0.576, + "step": 20187 + }, + { + "epoch": 0.5543108182317408, + "grad_norm": 0.36679068207740784, + "learning_rate": 1.6467015714814985e-05, + "loss": 0.4459, + "step": 20188 + }, + { + "epoch": 0.5543382756727073, + "grad_norm": 0.38606953620910645, + "learning_rate": 1.6466686285054713e-05, + "loss": 0.5984, + "step": 20189 + }, + { + "epoch": 0.5543657331136738, + "grad_norm": 0.3983590006828308, + "learning_rate": 1.6466356843232007e-05, + "loss": 0.5433, + "step": 20190 + }, + { + "epoch": 0.5543931905546403, + "grad_norm": 0.3884018659591675, + "learning_rate": 1.646602738934748e-05, + "loss": 0.5426, + "step": 20191 + }, + { + "epoch": 0.5544206479956069, + "grad_norm": 0.38308918476104736, + "learning_rate": 1.6465697923401752e-05, + "loss": 0.5367, + "step": 20192 + }, + { + "epoch": 0.5544481054365733, + "grad_norm": 0.35512298345565796, + "learning_rate": 1.6465368445395432e-05, + "loss": 0.5146, + "step": 20193 + }, + { + "epoch": 0.5544755628775399, + "grad_norm": 0.38658955693244934, + "learning_rate": 1.6465038955329135e-05, + "loss": 0.56, + "step": 20194 + }, + { + "epoch": 0.5545030203185063, + "grad_norm": 0.4527896046638489, + "learning_rate": 1.646470945320348e-05, + "loss": 0.4865, + "step": 20195 + }, + { + "epoch": 0.5545304777594728, + "grad_norm": 0.3851822018623352, + "learning_rate": 1.6464379939019076e-05, + "loss": 0.5476, + "step": 20196 + }, + { + "epoch": 0.5545579352004393, + "grad_norm": 0.369793176651001, + "learning_rate": 1.646405041277654e-05, + "loss": 0.5347, + "step": 20197 + }, + { + "epoch": 0.5545853926414058, + "grad_norm": 0.3561685085296631, + "learning_rate": 1.646372087447649e-05, + "loss": 0.5033, + "step": 20198 + }, + { + "epoch": 0.5546128500823724, + "grad_norm": 0.4042803943157196, + "learning_rate": 1.6463391324119537e-05, + "loss": 0.5421, + "step": 20199 + }, + { + "epoch": 0.5546403075233388, + "grad_norm": 0.3857647180557251, + "learning_rate": 1.6463061761706292e-05, + "loss": 0.5306, + "step": 20200 + }, + { + "epoch": 0.5546677649643054, + "grad_norm": 0.5380737781524658, + "learning_rate": 1.6462732187237377e-05, + "loss": 0.4844, + "step": 20201 + }, + { + "epoch": 0.5546952224052718, + "grad_norm": 0.4317311942577362, + "learning_rate": 1.6462402600713404e-05, + "loss": 0.5477, + "step": 20202 + }, + { + "epoch": 0.5547226798462384, + "grad_norm": 0.40832772850990295, + "learning_rate": 1.646207300213499e-05, + "loss": 0.5288, + "step": 20203 + }, + { + "epoch": 0.5547501372872048, + "grad_norm": 0.36950185894966125, + "learning_rate": 1.646174339150274e-05, + "loss": 0.4961, + "step": 20204 + }, + { + "epoch": 0.5547775947281713, + "grad_norm": 0.465455561876297, + "learning_rate": 1.6461413768817285e-05, + "loss": 0.5052, + "step": 20205 + }, + { + "epoch": 0.5548050521691379, + "grad_norm": 0.39981111884117126, + "learning_rate": 1.6461084134079227e-05, + "loss": 0.5161, + "step": 20206 + }, + { + "epoch": 0.5548325096101043, + "grad_norm": 0.3676760792732239, + "learning_rate": 1.6460754487289182e-05, + "loss": 0.5319, + "step": 20207 + }, + { + "epoch": 0.5548599670510709, + "grad_norm": 0.35794395208358765, + "learning_rate": 1.6460424828447775e-05, + "loss": 0.5074, + "step": 20208 + }, + { + "epoch": 0.5548874244920373, + "grad_norm": 0.4003955125808716, + "learning_rate": 1.6460095157555607e-05, + "loss": 0.5108, + "step": 20209 + }, + { + "epoch": 0.5549148819330039, + "grad_norm": 0.37843626737594604, + "learning_rate": 1.64597654746133e-05, + "loss": 0.5142, + "step": 20210 + }, + { + "epoch": 0.5549423393739703, + "grad_norm": 0.3527695834636688, + "learning_rate": 1.6459435779621474e-05, + "loss": 0.4822, + "step": 20211 + }, + { + "epoch": 0.5549697968149369, + "grad_norm": 0.37994199991226196, + "learning_rate": 1.6459106072580735e-05, + "loss": 0.4243, + "step": 20212 + }, + { + "epoch": 0.5549972542559034, + "grad_norm": 0.45816242694854736, + "learning_rate": 1.6458776353491704e-05, + "loss": 0.5781, + "step": 20213 + }, + { + "epoch": 0.5550247116968698, + "grad_norm": 0.3571593463420868, + "learning_rate": 1.6458446622354992e-05, + "loss": 0.4706, + "step": 20214 + }, + { + "epoch": 0.5550521691378364, + "grad_norm": 0.34383267164230347, + "learning_rate": 1.645811687917122e-05, + "loss": 0.4708, + "step": 20215 + }, + { + "epoch": 0.5550796265788028, + "grad_norm": 0.3769657611846924, + "learning_rate": 1.6457787123940993e-05, + "loss": 0.549, + "step": 20216 + }, + { + "epoch": 0.5551070840197694, + "grad_norm": 0.333621621131897, + "learning_rate": 1.645745735666493e-05, + "loss": 0.5055, + "step": 20217 + }, + { + "epoch": 0.5551345414607358, + "grad_norm": 0.41225188970565796, + "learning_rate": 1.6457127577343653e-05, + "loss": 0.5758, + "step": 20218 + }, + { + "epoch": 0.5551619989017024, + "grad_norm": 0.3868938386440277, + "learning_rate": 1.6456797785977772e-05, + "loss": 0.5381, + "step": 20219 + }, + { + "epoch": 0.5551894563426688, + "grad_norm": 0.39834484457969666, + "learning_rate": 1.64564679825679e-05, + "loss": 0.5125, + "step": 20220 + }, + { + "epoch": 0.5552169137836354, + "grad_norm": 0.35582175850868225, + "learning_rate": 1.6456138167114658e-05, + "loss": 0.5039, + "step": 20221 + }, + { + "epoch": 0.5552443712246019, + "grad_norm": 0.401955783367157, + "learning_rate": 1.6455808339618654e-05, + "loss": 0.5202, + "step": 20222 + }, + { + "epoch": 0.5552718286655683, + "grad_norm": 0.40118566155433655, + "learning_rate": 1.645547850008051e-05, + "loss": 0.4946, + "step": 20223 + }, + { + "epoch": 0.5552992861065349, + "grad_norm": 0.3765190541744232, + "learning_rate": 1.6455148648500838e-05, + "loss": 0.4676, + "step": 20224 + }, + { + "epoch": 0.5553267435475013, + "grad_norm": 0.35392647981643677, + "learning_rate": 1.6454818784880248e-05, + "loss": 0.4823, + "step": 20225 + }, + { + "epoch": 0.5553542009884679, + "grad_norm": 0.39039677381515503, + "learning_rate": 1.645448890921937e-05, + "loss": 0.4638, + "step": 20226 + }, + { + "epoch": 0.5553816584294343, + "grad_norm": 0.40326356887817383, + "learning_rate": 1.64541590215188e-05, + "loss": 0.5153, + "step": 20227 + }, + { + "epoch": 0.5554091158704009, + "grad_norm": 0.3760007917881012, + "learning_rate": 1.645382912177917e-05, + "loss": 0.5392, + "step": 20228 + }, + { + "epoch": 0.5554365733113674, + "grad_norm": 0.4069371521472931, + "learning_rate": 1.6453499210001085e-05, + "loss": 0.5595, + "step": 20229 + }, + { + "epoch": 0.5554640307523339, + "grad_norm": 0.41930606961250305, + "learning_rate": 1.645316928618517e-05, + "loss": 0.4984, + "step": 20230 + }, + { + "epoch": 0.5554914881933004, + "grad_norm": 0.3605698347091675, + "learning_rate": 1.645283935033203e-05, + "loss": 0.4143, + "step": 20231 + }, + { + "epoch": 0.5555189456342668, + "grad_norm": 0.39063334465026855, + "learning_rate": 1.645250940244229e-05, + "loss": 0.5276, + "step": 20232 + }, + { + "epoch": 0.5555464030752334, + "grad_norm": 0.40996408462524414, + "learning_rate": 1.6452179442516555e-05, + "loss": 0.4733, + "step": 20233 + }, + { + "epoch": 0.5555738605161998, + "grad_norm": 0.43693968653678894, + "learning_rate": 1.6451849470555447e-05, + "loss": 0.555, + "step": 20234 + }, + { + "epoch": 0.5556013179571664, + "grad_norm": 0.36617717146873474, + "learning_rate": 1.6451519486559583e-05, + "loss": 0.5412, + "step": 20235 + }, + { + "epoch": 0.5556287753981329, + "grad_norm": 0.3859819769859314, + "learning_rate": 1.6451189490529576e-05, + "loss": 0.5583, + "step": 20236 + }, + { + "epoch": 0.5556562328390994, + "grad_norm": 0.34744104743003845, + "learning_rate": 1.645085948246604e-05, + "loss": 0.51, + "step": 20237 + }, + { + "epoch": 0.5556836902800659, + "grad_norm": 0.39332884550094604, + "learning_rate": 1.6450529462369594e-05, + "loss": 0.608, + "step": 20238 + }, + { + "epoch": 0.5557111477210324, + "grad_norm": 0.42024800181388855, + "learning_rate": 1.645019943024085e-05, + "loss": 0.5195, + "step": 20239 + }, + { + "epoch": 0.5557386051619989, + "grad_norm": 0.3658624589443207, + "learning_rate": 1.6449869386080424e-05, + "loss": 0.5168, + "step": 20240 + }, + { + "epoch": 0.5557660626029653, + "grad_norm": 0.38009509444236755, + "learning_rate": 1.6449539329888934e-05, + "loss": 0.4777, + "step": 20241 + }, + { + "epoch": 0.5557935200439319, + "grad_norm": 0.3355199992656708, + "learning_rate": 1.6449209261667e-05, + "loss": 0.3961, + "step": 20242 + }, + { + "epoch": 0.5558209774848984, + "grad_norm": 0.44806209206581116, + "learning_rate": 1.6448879181415227e-05, + "loss": 0.5083, + "step": 20243 + }, + { + "epoch": 0.5558484349258649, + "grad_norm": 0.390697717666626, + "learning_rate": 1.6448549089134237e-05, + "loss": 0.5189, + "step": 20244 + }, + { + "epoch": 0.5558758923668314, + "grad_norm": 0.5547188520431519, + "learning_rate": 1.6448218984824644e-05, + "loss": 0.5482, + "step": 20245 + }, + { + "epoch": 0.5559033498077979, + "grad_norm": 0.35165131092071533, + "learning_rate": 1.6447888868487065e-05, + "loss": 0.6403, + "step": 20246 + }, + { + "epoch": 0.5559308072487644, + "grad_norm": 0.38964730501174927, + "learning_rate": 1.6447558740122116e-05, + "loss": 0.4937, + "step": 20247 + }, + { + "epoch": 0.5559582646897309, + "grad_norm": 0.36910247802734375, + "learning_rate": 1.644722859973041e-05, + "loss": 0.5111, + "step": 20248 + }, + { + "epoch": 0.5559857221306974, + "grad_norm": 0.37214407324790955, + "learning_rate": 1.6446898447312568e-05, + "loss": 0.4828, + "step": 20249 + }, + { + "epoch": 0.556013179571664, + "grad_norm": 0.3713202476501465, + "learning_rate": 1.6446568282869202e-05, + "loss": 0.481, + "step": 20250 + }, + { + "epoch": 0.5560406370126304, + "grad_norm": 0.7811570167541504, + "learning_rate": 1.6446238106400927e-05, + "loss": 0.5893, + "step": 20251 + }, + { + "epoch": 0.556068094453597, + "grad_norm": 0.39297011494636536, + "learning_rate": 1.6445907917908365e-05, + "loss": 0.5298, + "step": 20252 + }, + { + "epoch": 0.5560955518945634, + "grad_norm": 0.4151470959186554, + "learning_rate": 1.644557771739212e-05, + "loss": 0.491, + "step": 20253 + }, + { + "epoch": 0.5561230093355299, + "grad_norm": 0.4163072407245636, + "learning_rate": 1.644524750485282e-05, + "loss": 0.5641, + "step": 20254 + }, + { + "epoch": 0.5561504667764964, + "grad_norm": 0.3912801444530487, + "learning_rate": 1.644491728029108e-05, + "loss": 0.505, + "step": 20255 + }, + { + "epoch": 0.5561779242174629, + "grad_norm": 0.34802675247192383, + "learning_rate": 1.6444587043707506e-05, + "loss": 0.5029, + "step": 20256 + }, + { + "epoch": 0.5562053816584295, + "grad_norm": 0.3699260652065277, + "learning_rate": 1.6444256795102723e-05, + "loss": 0.4956, + "step": 20257 + }, + { + "epoch": 0.5562328390993959, + "grad_norm": 0.3644157946109772, + "learning_rate": 1.644392653447734e-05, + "loss": 0.557, + "step": 20258 + }, + { + "epoch": 0.5562602965403625, + "grad_norm": 0.4045586585998535, + "learning_rate": 1.6443596261831984e-05, + "loss": 0.5679, + "step": 20259 + }, + { + "epoch": 0.5562877539813289, + "grad_norm": 0.34530389308929443, + "learning_rate": 1.644326597716726e-05, + "loss": 0.4793, + "step": 20260 + }, + { + "epoch": 0.5563152114222955, + "grad_norm": 0.38252878189086914, + "learning_rate": 1.644293568048379e-05, + "loss": 0.5495, + "step": 20261 + }, + { + "epoch": 0.5563426688632619, + "grad_norm": 0.33399298787117004, + "learning_rate": 1.644260537178219e-05, + "loss": 0.5079, + "step": 20262 + }, + { + "epoch": 0.5563701263042284, + "grad_norm": 0.3800945281982422, + "learning_rate": 1.6442275051063075e-05, + "loss": 0.5506, + "step": 20263 + }, + { + "epoch": 0.556397583745195, + "grad_norm": 0.41455358266830444, + "learning_rate": 1.644194471832706e-05, + "loss": 0.5274, + "step": 20264 + }, + { + "epoch": 0.5564250411861614, + "grad_norm": 0.38693058490753174, + "learning_rate": 1.644161437357476e-05, + "loss": 0.504, + "step": 20265 + }, + { + "epoch": 0.556452498627128, + "grad_norm": 0.3884378671646118, + "learning_rate": 1.6441284016806795e-05, + "loss": 0.4561, + "step": 20266 + }, + { + "epoch": 0.5564799560680944, + "grad_norm": 0.4349499046802521, + "learning_rate": 1.6440953648023778e-05, + "loss": 0.6189, + "step": 20267 + }, + { + "epoch": 0.556507413509061, + "grad_norm": 0.4187443256378174, + "learning_rate": 1.644062326722633e-05, + "loss": 0.4806, + "step": 20268 + }, + { + "epoch": 0.5565348709500274, + "grad_norm": 0.36575835943222046, + "learning_rate": 1.644029287441506e-05, + "loss": 0.4565, + "step": 20269 + }, + { + "epoch": 0.556562328390994, + "grad_norm": 0.3781886398792267, + "learning_rate": 1.643996246959059e-05, + "loss": 0.4968, + "step": 20270 + }, + { + "epoch": 0.5565897858319605, + "grad_norm": 0.3672904372215271, + "learning_rate": 1.6439632052753536e-05, + "loss": 0.4758, + "step": 20271 + }, + { + "epoch": 0.5566172432729269, + "grad_norm": 0.372806578874588, + "learning_rate": 1.6439301623904513e-05, + "loss": 0.4556, + "step": 20272 + }, + { + "epoch": 0.5566447007138935, + "grad_norm": 0.3715762794017792, + "learning_rate": 1.6438971183044135e-05, + "loss": 0.5142, + "step": 20273 + }, + { + "epoch": 0.5566721581548599, + "grad_norm": 0.33996066451072693, + "learning_rate": 1.6438640730173017e-05, + "loss": 0.4307, + "step": 20274 + }, + { + "epoch": 0.5566996155958265, + "grad_norm": 0.3362007141113281, + "learning_rate": 1.6438310265291785e-05, + "loss": 0.4394, + "step": 20275 + }, + { + "epoch": 0.5567270730367929, + "grad_norm": 0.33020609617233276, + "learning_rate": 1.6437979788401052e-05, + "loss": 0.4969, + "step": 20276 + }, + { + "epoch": 0.5567545304777595, + "grad_norm": 0.361072301864624, + "learning_rate": 1.6437649299501425e-05, + "loss": 0.487, + "step": 20277 + }, + { + "epoch": 0.556781987918726, + "grad_norm": 0.4077190160751343, + "learning_rate": 1.6437318798593534e-05, + "loss": 0.5812, + "step": 20278 + }, + { + "epoch": 0.5568094453596925, + "grad_norm": 0.4131031334400177, + "learning_rate": 1.643698828567798e-05, + "loss": 0.4492, + "step": 20279 + }, + { + "epoch": 0.556836902800659, + "grad_norm": 0.3791561722755432, + "learning_rate": 1.6436657760755398e-05, + "loss": 0.4551, + "step": 20280 + }, + { + "epoch": 0.5568643602416254, + "grad_norm": 0.49113011360168457, + "learning_rate": 1.643632722382639e-05, + "loss": 0.547, + "step": 20281 + }, + { + "epoch": 0.556891817682592, + "grad_norm": 0.3744960129261017, + "learning_rate": 1.643599667489158e-05, + "loss": 0.4444, + "step": 20282 + }, + { + "epoch": 0.5569192751235584, + "grad_norm": 0.41314318776130676, + "learning_rate": 1.643566611395158e-05, + "loss": 0.5765, + "step": 20283 + }, + { + "epoch": 0.556946732564525, + "grad_norm": 0.39940470457077026, + "learning_rate": 1.643533554100701e-05, + "loss": 0.5775, + "step": 20284 + }, + { + "epoch": 0.5569741900054915, + "grad_norm": 0.32721057534217834, + "learning_rate": 1.643500495605848e-05, + "loss": 0.4452, + "step": 20285 + }, + { + "epoch": 0.557001647446458, + "grad_norm": 0.4730414152145386, + "learning_rate": 1.6434674359106617e-05, + "loss": 0.6058, + "step": 20286 + }, + { + "epoch": 0.5570291048874245, + "grad_norm": 0.4019206166267395, + "learning_rate": 1.643434375015203e-05, + "loss": 0.5868, + "step": 20287 + }, + { + "epoch": 0.557056562328391, + "grad_norm": 0.36188942193984985, + "learning_rate": 1.6434013129195345e-05, + "loss": 0.509, + "step": 20288 + }, + { + "epoch": 0.5570840197693575, + "grad_norm": 0.4097791314125061, + "learning_rate": 1.6433682496237165e-05, + "loss": 0.594, + "step": 20289 + }, + { + "epoch": 0.557111477210324, + "grad_norm": 0.3902775049209595, + "learning_rate": 1.6433351851278114e-05, + "loss": 0.4938, + "step": 20290 + }, + { + "epoch": 0.5571389346512905, + "grad_norm": 0.3682023286819458, + "learning_rate": 1.6433021194318812e-05, + "loss": 0.5764, + "step": 20291 + }, + { + "epoch": 0.557166392092257, + "grad_norm": 0.3933098316192627, + "learning_rate": 1.643269052535987e-05, + "loss": 0.5054, + "step": 20292 + }, + { + "epoch": 0.5571938495332235, + "grad_norm": 0.3762798011302948, + "learning_rate": 1.6432359844401907e-05, + "loss": 0.4858, + "step": 20293 + }, + { + "epoch": 0.55722130697419, + "grad_norm": 0.374263733625412, + "learning_rate": 1.6432029151445544e-05, + "loss": 0.4535, + "step": 20294 + }, + { + "epoch": 0.5572487644151565, + "grad_norm": 0.42095598578453064, + "learning_rate": 1.643169844649139e-05, + "loss": 0.5233, + "step": 20295 + }, + { + "epoch": 0.557276221856123, + "grad_norm": 0.4902118742465973, + "learning_rate": 1.6431367729540065e-05, + "loss": 0.4329, + "step": 20296 + }, + { + "epoch": 0.5573036792970895, + "grad_norm": 0.3666732609272003, + "learning_rate": 1.6431037000592188e-05, + "loss": 0.4686, + "step": 20297 + }, + { + "epoch": 0.557331136738056, + "grad_norm": 0.44683846831321716, + "learning_rate": 1.6430706259648373e-05, + "loss": 0.5253, + "step": 20298 + }, + { + "epoch": 0.5573585941790226, + "grad_norm": 0.39229485392570496, + "learning_rate": 1.643037550670924e-05, + "loss": 0.4764, + "step": 20299 + }, + { + "epoch": 0.557386051619989, + "grad_norm": 0.631496012210846, + "learning_rate": 1.6430044741775403e-05, + "loss": 0.5023, + "step": 20300 + }, + { + "epoch": 0.5574135090609555, + "grad_norm": 0.40017467737197876, + "learning_rate": 1.6429713964847483e-05, + "loss": 0.4775, + "step": 20301 + }, + { + "epoch": 0.557440966501922, + "grad_norm": 0.3605625629425049, + "learning_rate": 1.642938317592609e-05, + "loss": 0.522, + "step": 20302 + }, + { + "epoch": 0.5574684239428885, + "grad_norm": 0.35034605860710144, + "learning_rate": 1.642905237501185e-05, + "loss": 0.4934, + "step": 20303 + }, + { + "epoch": 0.557495881383855, + "grad_norm": 0.34260040521621704, + "learning_rate": 1.6428721562105373e-05, + "loss": 0.3538, + "step": 20304 + }, + { + "epoch": 0.5575233388248215, + "grad_norm": 0.3356960117816925, + "learning_rate": 1.6428390737207278e-05, + "loss": 0.4125, + "step": 20305 + }, + { + "epoch": 0.5575507962657881, + "grad_norm": 0.5235896110534668, + "learning_rate": 1.6428059900318185e-05, + "loss": 0.5063, + "step": 20306 + }, + { + "epoch": 0.5575782537067545, + "grad_norm": 0.36490899324417114, + "learning_rate": 1.6427729051438706e-05, + "loss": 0.5124, + "step": 20307 + }, + { + "epoch": 0.5576057111477211, + "grad_norm": 0.4243975281715393, + "learning_rate": 1.6427398190569463e-05, + "loss": 0.5073, + "step": 20308 + }, + { + "epoch": 0.5576331685886875, + "grad_norm": 0.3476022481918335, + "learning_rate": 1.642706731771107e-05, + "loss": 0.5432, + "step": 20309 + }, + { + "epoch": 0.557660626029654, + "grad_norm": 0.35392364859580994, + "learning_rate": 1.6426736432864145e-05, + "loss": 0.4314, + "step": 20310 + }, + { + "epoch": 0.5576880834706205, + "grad_norm": 0.4082086980342865, + "learning_rate": 1.6426405536029303e-05, + "loss": 0.5363, + "step": 20311 + }, + { + "epoch": 0.557715540911587, + "grad_norm": 0.4229736030101776, + "learning_rate": 1.6426074627207166e-05, + "loss": 0.4929, + "step": 20312 + }, + { + "epoch": 0.5577429983525536, + "grad_norm": 0.41695255041122437, + "learning_rate": 1.6425743706398348e-05, + "loss": 0.4876, + "step": 20313 + }, + { + "epoch": 0.55777045579352, + "grad_norm": 0.36287322640419006, + "learning_rate": 1.6425412773603468e-05, + "loss": 0.4064, + "step": 20314 + }, + { + "epoch": 0.5577979132344866, + "grad_norm": 0.36662808060646057, + "learning_rate": 1.6425081828823143e-05, + "loss": 0.4678, + "step": 20315 + }, + { + "epoch": 0.557825370675453, + "grad_norm": 0.4197006821632385, + "learning_rate": 1.642475087205799e-05, + "loss": 0.4678, + "step": 20316 + }, + { + "epoch": 0.5578528281164196, + "grad_norm": 0.37479016184806824, + "learning_rate": 1.6424419903308627e-05, + "loss": 0.4857, + "step": 20317 + }, + { + "epoch": 0.557880285557386, + "grad_norm": 0.4174877405166626, + "learning_rate": 1.6424088922575667e-05, + "loss": 0.4791, + "step": 20318 + }, + { + "epoch": 0.5579077429983526, + "grad_norm": 0.3609885275363922, + "learning_rate": 1.642375792985973e-05, + "loss": 0.4123, + "step": 20319 + }, + { + "epoch": 0.5579352004393191, + "grad_norm": 0.37020015716552734, + "learning_rate": 1.6423426925161437e-05, + "loss": 0.4968, + "step": 20320 + }, + { + "epoch": 0.5579626578802855, + "grad_norm": 0.35799267888069153, + "learning_rate": 1.6423095908481405e-05, + "loss": 0.5312, + "step": 20321 + }, + { + "epoch": 0.5579901153212521, + "grad_norm": 0.3939209282398224, + "learning_rate": 1.6422764879820247e-05, + "loss": 0.4989, + "step": 20322 + }, + { + "epoch": 0.5580175727622185, + "grad_norm": 0.3827025890350342, + "learning_rate": 1.642243383917858e-05, + "loss": 0.5449, + "step": 20323 + }, + { + "epoch": 0.5580450302031851, + "grad_norm": 0.37037405371665955, + "learning_rate": 1.6422102786557026e-05, + "loss": 0.4664, + "step": 20324 + }, + { + "epoch": 0.5580724876441515, + "grad_norm": 0.3948405086994171, + "learning_rate": 1.64217717219562e-05, + "loss": 0.4899, + "step": 20325 + }, + { + "epoch": 0.5580999450851181, + "grad_norm": 0.3822045922279358, + "learning_rate": 1.642144064537672e-05, + "loss": 0.5845, + "step": 20326 + }, + { + "epoch": 0.5581274025260846, + "grad_norm": 0.46515029668807983, + "learning_rate": 1.6421109556819204e-05, + "loss": 0.4953, + "step": 20327 + }, + { + "epoch": 0.558154859967051, + "grad_norm": 0.3494088053703308, + "learning_rate": 1.6420778456284272e-05, + "loss": 0.4731, + "step": 20328 + }, + { + "epoch": 0.5581823174080176, + "grad_norm": 0.4271300137042999, + "learning_rate": 1.6420447343772538e-05, + "loss": 0.5994, + "step": 20329 + }, + { + "epoch": 0.558209774848984, + "grad_norm": 0.3628305196762085, + "learning_rate": 1.642011621928462e-05, + "loss": 0.4477, + "step": 20330 + }, + { + "epoch": 0.5582372322899506, + "grad_norm": 0.4301166534423828, + "learning_rate": 1.6419785082821134e-05, + "loss": 0.5865, + "step": 20331 + }, + { + "epoch": 0.558264689730917, + "grad_norm": 0.3920251727104187, + "learning_rate": 1.6419453934382704e-05, + "loss": 0.4655, + "step": 20332 + }, + { + "epoch": 0.5582921471718836, + "grad_norm": 0.38450363278388977, + "learning_rate": 1.641912277396994e-05, + "loss": 0.4461, + "step": 20333 + }, + { + "epoch": 0.5583196046128501, + "grad_norm": 0.4028998613357544, + "learning_rate": 1.641879160158346e-05, + "loss": 0.5963, + "step": 20334 + }, + { + "epoch": 0.5583470620538166, + "grad_norm": 0.3507046699523926, + "learning_rate": 1.6418460417223892e-05, + "loss": 0.4609, + "step": 20335 + }, + { + "epoch": 0.5583745194947831, + "grad_norm": 0.36255598068237305, + "learning_rate": 1.6418129220891844e-05, + "loss": 0.5174, + "step": 20336 + }, + { + "epoch": 0.5584019769357496, + "grad_norm": 0.31922098994255066, + "learning_rate": 1.6417798012587938e-05, + "loss": 0.3855, + "step": 20337 + }, + { + "epoch": 0.5584294343767161, + "grad_norm": 0.40900370478630066, + "learning_rate": 1.6417466792312786e-05, + "loss": 0.5105, + "step": 20338 + }, + { + "epoch": 0.5584568918176825, + "grad_norm": 0.4212241768836975, + "learning_rate": 1.6417135560067016e-05, + "loss": 0.5891, + "step": 20339 + }, + { + "epoch": 0.5584843492586491, + "grad_norm": 0.39285406470298767, + "learning_rate": 1.6416804315851237e-05, + "loss": 0.4827, + "step": 20340 + }, + { + "epoch": 0.5585118066996156, + "grad_norm": 0.42513421177864075, + "learning_rate": 1.641647305966607e-05, + "loss": 0.6229, + "step": 20341 + }, + { + "epoch": 0.5585392641405821, + "grad_norm": 0.387540727853775, + "learning_rate": 1.6416141791512132e-05, + "loss": 0.5591, + "step": 20342 + }, + { + "epoch": 0.5585667215815486, + "grad_norm": 0.40205785632133484, + "learning_rate": 1.6415810511390044e-05, + "loss": 0.5949, + "step": 20343 + }, + { + "epoch": 0.5585941790225151, + "grad_norm": 0.4196454584598541, + "learning_rate": 1.641547921930042e-05, + "loss": 0.5344, + "step": 20344 + }, + { + "epoch": 0.5586216364634816, + "grad_norm": 0.3594636023044586, + "learning_rate": 1.641514791524388e-05, + "loss": 0.5517, + "step": 20345 + }, + { + "epoch": 0.5586490939044481, + "grad_norm": 0.3402223587036133, + "learning_rate": 1.641481659922104e-05, + "loss": 0.4633, + "step": 20346 + }, + { + "epoch": 0.5586765513454146, + "grad_norm": 0.39276614785194397, + "learning_rate": 1.6414485271232522e-05, + "loss": 0.5008, + "step": 20347 + }, + { + "epoch": 0.5587040087863812, + "grad_norm": 0.36474600434303284, + "learning_rate": 1.6414153931278943e-05, + "loss": 0.5357, + "step": 20348 + }, + { + "epoch": 0.5587314662273476, + "grad_norm": 0.3599739968776703, + "learning_rate": 1.6413822579360916e-05, + "loss": 0.542, + "step": 20349 + }, + { + "epoch": 0.5587589236683141, + "grad_norm": 0.33788546919822693, + "learning_rate": 1.6413491215479066e-05, + "loss": 0.542, + "step": 20350 + }, + { + "epoch": 0.5587863811092806, + "grad_norm": 0.3851885199546814, + "learning_rate": 1.6413159839634005e-05, + "loss": 0.5092, + "step": 20351 + }, + { + "epoch": 0.5588138385502471, + "grad_norm": 0.35423871874809265, + "learning_rate": 1.6412828451826357e-05, + "loss": 0.4999, + "step": 20352 + }, + { + "epoch": 0.5588412959912136, + "grad_norm": 0.37766820192337036, + "learning_rate": 1.641249705205674e-05, + "loss": 0.5341, + "step": 20353 + }, + { + "epoch": 0.5588687534321801, + "grad_norm": 0.3562803566455841, + "learning_rate": 1.641216564032576e-05, + "loss": 0.4646, + "step": 20354 + }, + { + "epoch": 0.5588962108731467, + "grad_norm": 0.4079355001449585, + "learning_rate": 1.641183421663405e-05, + "loss": 0.4001, + "step": 20355 + }, + { + "epoch": 0.5589236683141131, + "grad_norm": 0.4961546063423157, + "learning_rate": 1.6411502780982225e-05, + "loss": 0.4566, + "step": 20356 + }, + { + "epoch": 0.5589511257550797, + "grad_norm": 0.41032278537750244, + "learning_rate": 1.6411171333370898e-05, + "loss": 0.5528, + "step": 20357 + }, + { + "epoch": 0.5589785831960461, + "grad_norm": 0.38336676359176636, + "learning_rate": 1.641083987380069e-05, + "loss": 0.517, + "step": 20358 + }, + { + "epoch": 0.5590060406370126, + "grad_norm": 0.4026595652103424, + "learning_rate": 1.641050840227222e-05, + "loss": 0.4975, + "step": 20359 + }, + { + "epoch": 0.5590334980779791, + "grad_norm": 0.38539934158325195, + "learning_rate": 1.6410176918786106e-05, + "loss": 0.5285, + "step": 20360 + }, + { + "epoch": 0.5590609555189456, + "grad_norm": 0.4016752541065216, + "learning_rate": 1.6409845423342967e-05, + "loss": 0.4864, + "step": 20361 + }, + { + "epoch": 0.5590884129599122, + "grad_norm": 0.38341930508613586, + "learning_rate": 1.640951391594342e-05, + "loss": 0.3714, + "step": 20362 + }, + { + "epoch": 0.5591158704008786, + "grad_norm": 0.39866358041763306, + "learning_rate": 1.6409182396588086e-05, + "loss": 0.4346, + "step": 20363 + }, + { + "epoch": 0.5591433278418452, + "grad_norm": 0.3741065263748169, + "learning_rate": 1.6408850865277577e-05, + "loss": 0.5289, + "step": 20364 + }, + { + "epoch": 0.5591707852828116, + "grad_norm": 0.36073973774909973, + "learning_rate": 1.640851932201252e-05, + "loss": 0.4967, + "step": 20365 + }, + { + "epoch": 0.5591982427237782, + "grad_norm": 0.38460129499435425, + "learning_rate": 1.6408187766793524e-05, + "loss": 0.4745, + "step": 20366 + }, + { + "epoch": 0.5592257001647446, + "grad_norm": 0.365278035402298, + "learning_rate": 1.6407856199621216e-05, + "loss": 0.4322, + "step": 20367 + }, + { + "epoch": 0.5592531576057111, + "grad_norm": 0.3886480927467346, + "learning_rate": 1.6407524620496212e-05, + "loss": 0.533, + "step": 20368 + }, + { + "epoch": 0.5592806150466777, + "grad_norm": 0.37326401472091675, + "learning_rate": 1.640719302941913e-05, + "loss": 0.4704, + "step": 20369 + }, + { + "epoch": 0.5593080724876441, + "grad_norm": 0.44183996319770813, + "learning_rate": 1.6406861426390582e-05, + "loss": 0.5959, + "step": 20370 + }, + { + "epoch": 0.5593355299286107, + "grad_norm": 0.3419206738471985, + "learning_rate": 1.64065298114112e-05, + "loss": 0.4639, + "step": 20371 + }, + { + "epoch": 0.5593629873695771, + "grad_norm": 0.3730214238166809, + "learning_rate": 1.6406198184481587e-05, + "loss": 0.5031, + "step": 20372 + }, + { + "epoch": 0.5593904448105437, + "grad_norm": 0.36851269006729126, + "learning_rate": 1.6405866545602377e-05, + "loss": 0.4117, + "step": 20373 + }, + { + "epoch": 0.5594179022515101, + "grad_norm": 0.38713768124580383, + "learning_rate": 1.640553489477418e-05, + "loss": 0.4534, + "step": 20374 + }, + { + "epoch": 0.5594453596924767, + "grad_norm": 0.34501412510871887, + "learning_rate": 1.6405203231997613e-05, + "loss": 0.5184, + "step": 20375 + }, + { + "epoch": 0.5594728171334432, + "grad_norm": 0.42019110918045044, + "learning_rate": 1.64048715572733e-05, + "loss": 0.5626, + "step": 20376 + }, + { + "epoch": 0.5595002745744097, + "grad_norm": 0.3870408535003662, + "learning_rate": 1.6404539870601858e-05, + "loss": 0.5051, + "step": 20377 + }, + { + "epoch": 0.5595277320153762, + "grad_norm": 0.4209171533584595, + "learning_rate": 1.6404208171983904e-05, + "loss": 0.5551, + "step": 20378 + }, + { + "epoch": 0.5595551894563426, + "grad_norm": 0.3529369831085205, + "learning_rate": 1.6403876461420058e-05, + "loss": 0.5361, + "step": 20379 + }, + { + "epoch": 0.5595826468973092, + "grad_norm": 0.37445148825645447, + "learning_rate": 1.6403544738910937e-05, + "loss": 0.4731, + "step": 20380 + }, + { + "epoch": 0.5596101043382756, + "grad_norm": 0.38545623421669006, + "learning_rate": 1.6403213004457163e-05, + "loss": 0.4258, + "step": 20381 + }, + { + "epoch": 0.5596375617792422, + "grad_norm": 0.40043121576309204, + "learning_rate": 1.640288125805935e-05, + "loss": 0.5229, + "step": 20382 + }, + { + "epoch": 0.5596650192202087, + "grad_norm": 0.406954824924469, + "learning_rate": 1.6402549499718124e-05, + "loss": 0.5272, + "step": 20383 + }, + { + "epoch": 0.5596924766611752, + "grad_norm": 0.38899990916252136, + "learning_rate": 1.64022177294341e-05, + "loss": 0.4918, + "step": 20384 + }, + { + "epoch": 0.5597199341021417, + "grad_norm": 0.3781847655773163, + "learning_rate": 1.6401885947207894e-05, + "loss": 0.6125, + "step": 20385 + }, + { + "epoch": 0.5597473915431082, + "grad_norm": 0.35411155223846436, + "learning_rate": 1.640155415304013e-05, + "loss": 0.5248, + "step": 20386 + }, + { + "epoch": 0.5597748489840747, + "grad_norm": 0.40730565786361694, + "learning_rate": 1.6401222346931422e-05, + "loss": 0.4913, + "step": 20387 + }, + { + "epoch": 0.5598023064250411, + "grad_norm": 0.4449562132358551, + "learning_rate": 1.6400890528882392e-05, + "loss": 0.436, + "step": 20388 + }, + { + "epoch": 0.5598297638660077, + "grad_norm": 0.34323009848594666, + "learning_rate": 1.640055869889366e-05, + "loss": 0.4688, + "step": 20389 + }, + { + "epoch": 0.5598572213069742, + "grad_norm": 0.41725197434425354, + "learning_rate": 1.640022685696584e-05, + "loss": 0.4817, + "step": 20390 + }, + { + "epoch": 0.5598846787479407, + "grad_norm": 0.36510708928108215, + "learning_rate": 1.6399895003099556e-05, + "loss": 0.5198, + "step": 20391 + }, + { + "epoch": 0.5599121361889072, + "grad_norm": 0.471561998128891, + "learning_rate": 1.6399563137295428e-05, + "loss": 0.5221, + "step": 20392 + }, + { + "epoch": 0.5599395936298737, + "grad_norm": 0.42003482580184937, + "learning_rate": 1.639923125955407e-05, + "loss": 0.423, + "step": 20393 + }, + { + "epoch": 0.5599670510708402, + "grad_norm": 0.425125390291214, + "learning_rate": 1.63988993698761e-05, + "loss": 0.531, + "step": 20394 + }, + { + "epoch": 0.5599945085118067, + "grad_norm": 0.4303893744945526, + "learning_rate": 1.639856746826214e-05, + "loss": 0.531, + "step": 20395 + }, + { + "epoch": 0.5600219659527732, + "grad_norm": 0.40897682309150696, + "learning_rate": 1.6398235554712815e-05, + "loss": 0.6042, + "step": 20396 + }, + { + "epoch": 0.5600494233937398, + "grad_norm": 0.3443061411380768, + "learning_rate": 1.6397903629228734e-05, + "loss": 0.5019, + "step": 20397 + }, + { + "epoch": 0.5600768808347062, + "grad_norm": 0.4209725558757782, + "learning_rate": 1.6397571691810522e-05, + "loss": 0.5717, + "step": 20398 + }, + { + "epoch": 0.5601043382756727, + "grad_norm": 0.42184874415397644, + "learning_rate": 1.6397239742458798e-05, + "loss": 0.5397, + "step": 20399 + }, + { + "epoch": 0.5601317957166392, + "grad_norm": 0.3342571258544922, + "learning_rate": 1.6396907781174182e-05, + "loss": 0.4405, + "step": 20400 + }, + { + "epoch": 0.5601592531576057, + "grad_norm": 0.41280466318130493, + "learning_rate": 1.6396575807957288e-05, + "loss": 0.5489, + "step": 20401 + }, + { + "epoch": 0.5601867105985722, + "grad_norm": 0.37300026416778564, + "learning_rate": 1.639624382280874e-05, + "loss": 0.5055, + "step": 20402 + }, + { + "epoch": 0.5602141680395387, + "grad_norm": 0.33588624000549316, + "learning_rate": 1.6395911825729154e-05, + "loss": 0.4558, + "step": 20403 + }, + { + "epoch": 0.5602416254805053, + "grad_norm": 0.37404316663742065, + "learning_rate": 1.6395579816719155e-05, + "loss": 0.51, + "step": 20404 + }, + { + "epoch": 0.5602690829214717, + "grad_norm": 0.36189815402030945, + "learning_rate": 1.6395247795779353e-05, + "loss": 0.541, + "step": 20405 + }, + { + "epoch": 0.5602965403624383, + "grad_norm": 0.3943541347980499, + "learning_rate": 1.6394915762910377e-05, + "loss": 0.5908, + "step": 20406 + }, + { + "epoch": 0.5603239978034047, + "grad_norm": 0.3556414246559143, + "learning_rate": 1.639458371811284e-05, + "loss": 0.5195, + "step": 20407 + }, + { + "epoch": 0.5603514552443712, + "grad_norm": 0.41693490743637085, + "learning_rate": 1.6394251661387363e-05, + "loss": 0.6013, + "step": 20408 + }, + { + "epoch": 0.5603789126853377, + "grad_norm": 0.3524847626686096, + "learning_rate": 1.639391959273457e-05, + "loss": 0.4729, + "step": 20409 + }, + { + "epoch": 0.5604063701263042, + "grad_norm": 0.35882365703582764, + "learning_rate": 1.6393587512155073e-05, + "loss": 0.5655, + "step": 20410 + }, + { + "epoch": 0.5604338275672708, + "grad_norm": 0.4013114869594574, + "learning_rate": 1.6393255419649495e-05, + "loss": 0.4661, + "step": 20411 + }, + { + "epoch": 0.5604612850082372, + "grad_norm": 0.3949970006942749, + "learning_rate": 1.6392923315218453e-05, + "loss": 0.5321, + "step": 20412 + }, + { + "epoch": 0.5604887424492038, + "grad_norm": 0.3918602764606476, + "learning_rate": 1.6392591198862572e-05, + "loss": 0.5157, + "step": 20413 + }, + { + "epoch": 0.5605161998901702, + "grad_norm": 0.40095436573028564, + "learning_rate": 1.6392259070582466e-05, + "loss": 0.5455, + "step": 20414 + }, + { + "epoch": 0.5605436573311368, + "grad_norm": 0.37093040347099304, + "learning_rate": 1.6391926930378762e-05, + "loss": 0.5755, + "step": 20415 + }, + { + "epoch": 0.5605711147721032, + "grad_norm": 0.33476921916007996, + "learning_rate": 1.6391594778252064e-05, + "loss": 0.4887, + "step": 20416 + }, + { + "epoch": 0.5605985722130697, + "grad_norm": 0.38206055760383606, + "learning_rate": 1.639126261420301e-05, + "loss": 0.5912, + "step": 20417 + }, + { + "epoch": 0.5606260296540363, + "grad_norm": 0.35230541229248047, + "learning_rate": 1.6390930438232212e-05, + "loss": 0.4501, + "step": 20418 + }, + { + "epoch": 0.5606534870950027, + "grad_norm": 0.3683376908302307, + "learning_rate": 1.6390598250340282e-05, + "loss": 0.4785, + "step": 20419 + }, + { + "epoch": 0.5606809445359693, + "grad_norm": 0.3759908974170685, + "learning_rate": 1.639026605052785e-05, + "loss": 0.547, + "step": 20420 + }, + { + "epoch": 0.5607084019769357, + "grad_norm": 0.36794212460517883, + "learning_rate": 1.6389933838795532e-05, + "loss": 0.5454, + "step": 20421 + }, + { + "epoch": 0.5607358594179023, + "grad_norm": 0.34605079889297485, + "learning_rate": 1.6389601615143952e-05, + "loss": 0.4995, + "step": 20422 + }, + { + "epoch": 0.5607633168588687, + "grad_norm": 0.39285945892333984, + "learning_rate": 1.6389269379573723e-05, + "loss": 0.5574, + "step": 20423 + }, + { + "epoch": 0.5607907742998353, + "grad_norm": 0.3999477028846741, + "learning_rate": 1.6388937132085463e-05, + "loss": 0.5773, + "step": 20424 + }, + { + "epoch": 0.5608182317408018, + "grad_norm": 0.3509948253631592, + "learning_rate": 1.6388604872679802e-05, + "loss": 0.4419, + "step": 20425 + }, + { + "epoch": 0.5608456891817682, + "grad_norm": 0.36289724707603455, + "learning_rate": 1.6388272601357353e-05, + "loss": 0.4979, + "step": 20426 + }, + { + "epoch": 0.5608731466227348, + "grad_norm": 0.32781678438186646, + "learning_rate": 1.6387940318118736e-05, + "loss": 0.4255, + "step": 20427 + }, + { + "epoch": 0.5609006040637012, + "grad_norm": 0.42143720388412476, + "learning_rate": 1.6387608022964568e-05, + "loss": 0.5306, + "step": 20428 + }, + { + "epoch": 0.5609280615046678, + "grad_norm": 0.4510056674480438, + "learning_rate": 1.6387275715895475e-05, + "loss": 0.503, + "step": 20429 + }, + { + "epoch": 0.5609555189456342, + "grad_norm": 0.38523828983306885, + "learning_rate": 1.638694339691208e-05, + "loss": 0.5108, + "step": 20430 + }, + { + "epoch": 0.5609829763866008, + "grad_norm": 0.37688902020454407, + "learning_rate": 1.6386611066014987e-05, + "loss": 0.5498, + "step": 20431 + }, + { + "epoch": 0.5610104338275673, + "grad_norm": 0.39533963799476624, + "learning_rate": 1.638627872320483e-05, + "loss": 0.5535, + "step": 20432 + }, + { + "epoch": 0.5610378912685338, + "grad_norm": 0.4607431888580322, + "learning_rate": 1.638594636848223e-05, + "loss": 0.6839, + "step": 20433 + }, + { + "epoch": 0.5610653487095003, + "grad_norm": 0.3446064293384552, + "learning_rate": 1.6385614001847795e-05, + "loss": 0.4959, + "step": 20434 + }, + { + "epoch": 0.5610928061504667, + "grad_norm": 0.38158342242240906, + "learning_rate": 1.6385281623302156e-05, + "loss": 0.4809, + "step": 20435 + }, + { + "epoch": 0.5611202635914333, + "grad_norm": 0.38331338763237, + "learning_rate": 1.6384949232845925e-05, + "loss": 0.4966, + "step": 20436 + }, + { + "epoch": 0.5611477210323997, + "grad_norm": 0.3598603904247284, + "learning_rate": 1.638461683047973e-05, + "loss": 0.5126, + "step": 20437 + }, + { + "epoch": 0.5611751784733663, + "grad_norm": 0.4226660132408142, + "learning_rate": 1.6384284416204185e-05, + "loss": 0.6028, + "step": 20438 + }, + { + "epoch": 0.5612026359143328, + "grad_norm": 0.5422347784042358, + "learning_rate": 1.6383951990019912e-05, + "loss": 0.5188, + "step": 20439 + }, + { + "epoch": 0.5612300933552993, + "grad_norm": 0.3989599347114563, + "learning_rate": 1.638361955192753e-05, + "loss": 0.5339, + "step": 20440 + }, + { + "epoch": 0.5612575507962658, + "grad_norm": 0.3992835283279419, + "learning_rate": 1.638328710192766e-05, + "loss": 0.4268, + "step": 20441 + }, + { + "epoch": 0.5612850082372323, + "grad_norm": 0.3764842450618744, + "learning_rate": 1.6382954640020924e-05, + "loss": 0.5273, + "step": 20442 + }, + { + "epoch": 0.5613124656781988, + "grad_norm": 0.5680180788040161, + "learning_rate": 1.638262216620794e-05, + "loss": 0.5577, + "step": 20443 + }, + { + "epoch": 0.5613399231191653, + "grad_norm": 0.38193342089653015, + "learning_rate": 1.6382289680489326e-05, + "loss": 0.5073, + "step": 20444 + }, + { + "epoch": 0.5613673805601318, + "grad_norm": 0.40487897396087646, + "learning_rate": 1.6381957182865708e-05, + "loss": 0.4969, + "step": 20445 + }, + { + "epoch": 0.5613948380010984, + "grad_norm": 0.4708491861820221, + "learning_rate": 1.6381624673337703e-05, + "loss": 0.5174, + "step": 20446 + }, + { + "epoch": 0.5614222954420648, + "grad_norm": 0.41322144865989685, + "learning_rate": 1.638129215190593e-05, + "loss": 0.5695, + "step": 20447 + }, + { + "epoch": 0.5614497528830313, + "grad_norm": 0.37356120347976685, + "learning_rate": 1.638095961857101e-05, + "loss": 0.5248, + "step": 20448 + }, + { + "epoch": 0.5614772103239978, + "grad_norm": 0.4235539436340332, + "learning_rate": 1.6380627073333566e-05, + "loss": 0.6227, + "step": 20449 + }, + { + "epoch": 0.5615046677649643, + "grad_norm": 0.41826963424682617, + "learning_rate": 1.6380294516194214e-05, + "loss": 0.5614, + "step": 20450 + }, + { + "epoch": 0.5615321252059308, + "grad_norm": 0.3944030702114105, + "learning_rate": 1.6379961947153576e-05, + "loss": 0.4832, + "step": 20451 + }, + { + "epoch": 0.5615595826468973, + "grad_norm": 0.3871837258338928, + "learning_rate": 1.6379629366212273e-05, + "loss": 0.4851, + "step": 20452 + }, + { + "epoch": 0.5615870400878639, + "grad_norm": 0.35583120584487915, + "learning_rate": 1.6379296773370927e-05, + "loss": 0.4568, + "step": 20453 + }, + { + "epoch": 0.5616144975288303, + "grad_norm": 0.38624107837677, + "learning_rate": 1.6378964168630153e-05, + "loss": 0.524, + "step": 20454 + }, + { + "epoch": 0.5616419549697969, + "grad_norm": 0.3432099223136902, + "learning_rate": 1.637863155199058e-05, + "loss": 0.5285, + "step": 20455 + }, + { + "epoch": 0.5616694124107633, + "grad_norm": 0.40188169479370117, + "learning_rate": 1.637829892345282e-05, + "loss": 0.486, + "step": 20456 + }, + { + "epoch": 0.5616968698517298, + "grad_norm": 0.3673308193683624, + "learning_rate": 1.6377966283017497e-05, + "loss": 0.4796, + "step": 20457 + }, + { + "epoch": 0.5617243272926963, + "grad_norm": 0.3970005512237549, + "learning_rate": 1.637763363068523e-05, + "loss": 0.5812, + "step": 20458 + }, + { + "epoch": 0.5617517847336628, + "grad_norm": 0.3507813513278961, + "learning_rate": 1.637730096645664e-05, + "loss": 0.5108, + "step": 20459 + }, + { + "epoch": 0.5617792421746294, + "grad_norm": 0.3389035761356354, + "learning_rate": 1.6376968290332352e-05, + "loss": 0.4885, + "step": 20460 + }, + { + "epoch": 0.5618066996155958, + "grad_norm": 0.4083121716976166, + "learning_rate": 1.6376635602312983e-05, + "loss": 0.5542, + "step": 20461 + }, + { + "epoch": 0.5618341570565624, + "grad_norm": 0.4159267246723175, + "learning_rate": 1.637630290239915e-05, + "loss": 0.5982, + "step": 20462 + }, + { + "epoch": 0.5618616144975288, + "grad_norm": 0.3484916090965271, + "learning_rate": 1.6375970190591478e-05, + "loss": 0.606, + "step": 20463 + }, + { + "epoch": 0.5618890719384954, + "grad_norm": 0.4427848160266876, + "learning_rate": 1.637563746689059e-05, + "loss": 0.4523, + "step": 20464 + }, + { + "epoch": 0.5619165293794618, + "grad_norm": 0.35420048236846924, + "learning_rate": 1.6375304731297096e-05, + "loss": 0.4767, + "step": 20465 + }, + { + "epoch": 0.5619439868204283, + "grad_norm": 0.46661150455474854, + "learning_rate": 1.637497198381163e-05, + "loss": 0.5099, + "step": 20466 + }, + { + "epoch": 0.5619714442613949, + "grad_norm": 0.33800482749938965, + "learning_rate": 1.6374639224434807e-05, + "loss": 0.5166, + "step": 20467 + }, + { + "epoch": 0.5619989017023613, + "grad_norm": 0.3861345052719116, + "learning_rate": 1.6374306453167245e-05, + "loss": 0.4851, + "step": 20468 + }, + { + "epoch": 0.5620263591433279, + "grad_norm": 0.3708644509315491, + "learning_rate": 1.637397367000957e-05, + "loss": 0.5617, + "step": 20469 + }, + { + "epoch": 0.5620538165842943, + "grad_norm": 0.5480740666389465, + "learning_rate": 1.6373640874962393e-05, + "loss": 0.5286, + "step": 20470 + }, + { + "epoch": 0.5620812740252609, + "grad_norm": 0.374271035194397, + "learning_rate": 1.6373308068026344e-05, + "loss": 0.5005, + "step": 20471 + }, + { + "epoch": 0.5621087314662273, + "grad_norm": 0.41216185688972473, + "learning_rate": 1.6372975249202045e-05, + "loss": 0.5153, + "step": 20472 + }, + { + "epoch": 0.5621361889071939, + "grad_norm": 0.3257453441619873, + "learning_rate": 1.6372642418490114e-05, + "loss": 0.4802, + "step": 20473 + }, + { + "epoch": 0.5621636463481604, + "grad_norm": 0.3707584738731384, + "learning_rate": 1.637230957589117e-05, + "loss": 0.4928, + "step": 20474 + }, + { + "epoch": 0.5621911037891268, + "grad_norm": 0.4390774667263031, + "learning_rate": 1.6371976721405828e-05, + "loss": 0.4771, + "step": 20475 + }, + { + "epoch": 0.5622185612300934, + "grad_norm": 0.3385012745857239, + "learning_rate": 1.6371643855034724e-05, + "loss": 0.4805, + "step": 20476 + }, + { + "epoch": 0.5622460186710598, + "grad_norm": 0.41389021277427673, + "learning_rate": 1.6371310976778463e-05, + "loss": 0.5064, + "step": 20477 + }, + { + "epoch": 0.5622734761120264, + "grad_norm": 0.3531809449195862, + "learning_rate": 1.637097808663768e-05, + "loss": 0.4797, + "step": 20478 + }, + { + "epoch": 0.5623009335529928, + "grad_norm": 0.43921464681625366, + "learning_rate": 1.6370645184612988e-05, + "loss": 0.4555, + "step": 20479 + }, + { + "epoch": 0.5623283909939594, + "grad_norm": 0.380204439163208, + "learning_rate": 1.637031227070501e-05, + "loss": 0.464, + "step": 20480 + }, + { + "epoch": 0.5623558484349259, + "grad_norm": 0.3961621820926666, + "learning_rate": 1.6369979344914364e-05, + "loss": 0.4871, + "step": 20481 + }, + { + "epoch": 0.5623833058758924, + "grad_norm": 0.3721272647380829, + "learning_rate": 1.6369646407241677e-05, + "loss": 0.4711, + "step": 20482 + }, + { + "epoch": 0.5624107633168589, + "grad_norm": 0.3873903155326843, + "learning_rate": 1.636931345768756e-05, + "loss": 0.4953, + "step": 20483 + }, + { + "epoch": 0.5624382207578253, + "grad_norm": 0.38575196266174316, + "learning_rate": 1.6368980496252647e-05, + "loss": 0.4748, + "step": 20484 + }, + { + "epoch": 0.5624656781987919, + "grad_norm": 0.351106196641922, + "learning_rate": 1.636864752293755e-05, + "loss": 0.4359, + "step": 20485 + }, + { + "epoch": 0.5624931356397583, + "grad_norm": 0.36982810497283936, + "learning_rate": 1.6368314537742894e-05, + "loss": 0.4639, + "step": 20486 + }, + { + "epoch": 0.5625205930807249, + "grad_norm": 0.391107976436615, + "learning_rate": 1.6367981540669297e-05, + "loss": 0.5383, + "step": 20487 + }, + { + "epoch": 0.5625480505216913, + "grad_norm": 0.35814088582992554, + "learning_rate": 1.636764853171738e-05, + "loss": 0.565, + "step": 20488 + }, + { + "epoch": 0.5625755079626579, + "grad_norm": 0.3530321419239044, + "learning_rate": 1.6367315510887772e-05, + "loss": 0.5116, + "step": 20489 + }, + { + "epoch": 0.5626029654036244, + "grad_norm": 0.3638050854206085, + "learning_rate": 1.6366982478181085e-05, + "loss": 0.5173, + "step": 20490 + }, + { + "epoch": 0.5626304228445909, + "grad_norm": 0.39184877276420593, + "learning_rate": 1.6366649433597945e-05, + "loss": 0.5285, + "step": 20491 + }, + { + "epoch": 0.5626578802855574, + "grad_norm": 0.33228856325149536, + "learning_rate": 1.636631637713897e-05, + "loss": 0.502, + "step": 20492 + }, + { + "epoch": 0.5626853377265238, + "grad_norm": 0.37851908802986145, + "learning_rate": 1.6365983308804786e-05, + "loss": 0.4957, + "step": 20493 + }, + { + "epoch": 0.5627127951674904, + "grad_norm": 1.2311793565750122, + "learning_rate": 1.6365650228596006e-05, + "loss": 0.5018, + "step": 20494 + }, + { + "epoch": 0.5627402526084568, + "grad_norm": 0.37898775935173035, + "learning_rate": 1.6365317136513265e-05, + "loss": 0.4589, + "step": 20495 + }, + { + "epoch": 0.5627677100494234, + "grad_norm": 0.39820873737335205, + "learning_rate": 1.6364984032557168e-05, + "loss": 0.4892, + "step": 20496 + }, + { + "epoch": 0.5627951674903899, + "grad_norm": 0.3844335973262787, + "learning_rate": 1.6364650916728345e-05, + "loss": 0.5247, + "step": 20497 + }, + { + "epoch": 0.5628226249313564, + "grad_norm": 0.4531098008155823, + "learning_rate": 1.636431778902742e-05, + "loss": 0.4119, + "step": 20498 + }, + { + "epoch": 0.5628500823723229, + "grad_norm": 0.37701529264450073, + "learning_rate": 1.6363984649455012e-05, + "loss": 0.4687, + "step": 20499 + }, + { + "epoch": 0.5628775398132894, + "grad_norm": 0.36235129833221436, + "learning_rate": 1.636365149801174e-05, + "loss": 0.5374, + "step": 20500 + }, + { + "epoch": 0.5629049972542559, + "grad_norm": 0.3634245693683624, + "learning_rate": 1.6363318334698224e-05, + "loss": 0.5438, + "step": 20501 + }, + { + "epoch": 0.5629324546952224, + "grad_norm": 0.37933874130249023, + "learning_rate": 1.6362985159515093e-05, + "loss": 0.5677, + "step": 20502 + }, + { + "epoch": 0.5629599121361889, + "grad_norm": 0.3296794295310974, + "learning_rate": 1.6362651972462963e-05, + "loss": 0.428, + "step": 20503 + }, + { + "epoch": 0.5629873695771554, + "grad_norm": 0.36243385076522827, + "learning_rate": 1.636231877354245e-05, + "loss": 0.4724, + "step": 20504 + }, + { + "epoch": 0.5630148270181219, + "grad_norm": 0.420883446931839, + "learning_rate": 1.636198556275419e-05, + "loss": 0.5682, + "step": 20505 + }, + { + "epoch": 0.5630422844590884, + "grad_norm": 0.409927636384964, + "learning_rate": 1.6361652340098794e-05, + "loss": 0.5514, + "step": 20506 + }, + { + "epoch": 0.5630697419000549, + "grad_norm": 0.3318100869655609, + "learning_rate": 1.6361319105576884e-05, + "loss": 0.4381, + "step": 20507 + }, + { + "epoch": 0.5630971993410214, + "grad_norm": 0.40708494186401367, + "learning_rate": 1.6360985859189083e-05, + "loss": 0.556, + "step": 20508 + }, + { + "epoch": 0.5631246567819879, + "grad_norm": 0.3521324098110199, + "learning_rate": 1.6360652600936015e-05, + "loss": 0.5071, + "step": 20509 + }, + { + "epoch": 0.5631521142229544, + "grad_norm": 0.39441928267478943, + "learning_rate": 1.63603193308183e-05, + "loss": 0.5, + "step": 20510 + }, + { + "epoch": 0.563179571663921, + "grad_norm": 0.38234609365463257, + "learning_rate": 1.635998604883656e-05, + "loss": 0.5001, + "step": 20511 + }, + { + "epoch": 0.5632070291048874, + "grad_norm": 0.38792383670806885, + "learning_rate": 1.6359652754991414e-05, + "loss": 0.5311, + "step": 20512 + }, + { + "epoch": 0.563234486545854, + "grad_norm": 0.38032466173171997, + "learning_rate": 1.635931944928349e-05, + "loss": 0.4997, + "step": 20513 + }, + { + "epoch": 0.5632619439868204, + "grad_norm": 0.9085074663162231, + "learning_rate": 1.6358986131713402e-05, + "loss": 0.5253, + "step": 20514 + }, + { + "epoch": 0.5632894014277869, + "grad_norm": 0.4365297257900238, + "learning_rate": 1.6358652802281776e-05, + "loss": 0.5287, + "step": 20515 + }, + { + "epoch": 0.5633168588687534, + "grad_norm": 0.37197962403297424, + "learning_rate": 1.6358319460989232e-05, + "loss": 0.4992, + "step": 20516 + }, + { + "epoch": 0.5633443163097199, + "grad_norm": 0.3430827856063843, + "learning_rate": 1.6357986107836395e-05, + "loss": 0.458, + "step": 20517 + }, + { + "epoch": 0.5633717737506865, + "grad_norm": 0.4030255675315857, + "learning_rate": 1.6357652742823883e-05, + "loss": 0.5169, + "step": 20518 + }, + { + "epoch": 0.5633992311916529, + "grad_norm": 0.36033737659454346, + "learning_rate": 1.635731936595232e-05, + "loss": 0.51, + "step": 20519 + }, + { + "epoch": 0.5634266886326195, + "grad_norm": 0.4218926727771759, + "learning_rate": 1.635698597722233e-05, + "loss": 0.5234, + "step": 20520 + }, + { + "epoch": 0.5634541460735859, + "grad_norm": 0.38989686965942383, + "learning_rate": 1.635665257663453e-05, + "loss": 0.5357, + "step": 20521 + }, + { + "epoch": 0.5634816035145525, + "grad_norm": 1.6895956993103027, + "learning_rate": 1.6356319164189544e-05, + "loss": 0.5343, + "step": 20522 + }, + { + "epoch": 0.5635090609555189, + "grad_norm": 0.3469582498073578, + "learning_rate": 1.6355985739887992e-05, + "loss": 0.4968, + "step": 20523 + }, + { + "epoch": 0.5635365183964854, + "grad_norm": 0.3591817021369934, + "learning_rate": 1.63556523037305e-05, + "loss": 0.5546, + "step": 20524 + }, + { + "epoch": 0.563563975837452, + "grad_norm": 0.35904863476753235, + "learning_rate": 1.6355318855717686e-05, + "loss": 0.4843, + "step": 20525 + }, + { + "epoch": 0.5635914332784184, + "grad_norm": 0.422787606716156, + "learning_rate": 1.635498539585018e-05, + "loss": 0.484, + "step": 20526 + }, + { + "epoch": 0.563618890719385, + "grad_norm": 0.33982449769973755, + "learning_rate": 1.6354651924128592e-05, + "loss": 0.5312, + "step": 20527 + }, + { + "epoch": 0.5636463481603514, + "grad_norm": 0.3451935052871704, + "learning_rate": 1.635431844055355e-05, + "loss": 0.4159, + "step": 20528 + }, + { + "epoch": 0.563673805601318, + "grad_norm": 0.3671390414237976, + "learning_rate": 1.635398494512568e-05, + "loss": 0.4797, + "step": 20529 + }, + { + "epoch": 0.5637012630422844, + "grad_norm": 0.43958479166030884, + "learning_rate": 1.6353651437845596e-05, + "loss": 0.5209, + "step": 20530 + }, + { + "epoch": 0.563728720483251, + "grad_norm": 0.42482295632362366, + "learning_rate": 1.6353317918713928e-05, + "loss": 0.6279, + "step": 20531 + }, + { + "epoch": 0.5637561779242175, + "grad_norm": 0.32556870579719543, + "learning_rate": 1.6352984387731294e-05, + "loss": 0.4519, + "step": 20532 + }, + { + "epoch": 0.5637836353651839, + "grad_norm": 0.41658976674079895, + "learning_rate": 1.6352650844898316e-05, + "loss": 0.5664, + "step": 20533 + }, + { + "epoch": 0.5638110928061505, + "grad_norm": 0.7431604862213135, + "learning_rate": 1.6352317290215615e-05, + "loss": 0.5009, + "step": 20534 + }, + { + "epoch": 0.5638385502471169, + "grad_norm": 0.40580061078071594, + "learning_rate": 1.6351983723683814e-05, + "loss": 0.5781, + "step": 20535 + }, + { + "epoch": 0.5638660076880835, + "grad_norm": 0.43296632170677185, + "learning_rate": 1.6351650145303536e-05, + "loss": 0.5489, + "step": 20536 + }, + { + "epoch": 0.5638934651290499, + "grad_norm": 0.4112752079963684, + "learning_rate": 1.6351316555075406e-05, + "loss": 0.5608, + "step": 20537 + }, + { + "epoch": 0.5639209225700165, + "grad_norm": 0.38435736298561096, + "learning_rate": 1.6350982953000042e-05, + "loss": 0.4707, + "step": 20538 + }, + { + "epoch": 0.563948380010983, + "grad_norm": 0.38297969102859497, + "learning_rate": 1.635064933907807e-05, + "loss": 0.4393, + "step": 20539 + }, + { + "epoch": 0.5639758374519495, + "grad_norm": 0.3612883687019348, + "learning_rate": 1.6350315713310107e-05, + "loss": 0.5467, + "step": 20540 + }, + { + "epoch": 0.564003294892916, + "grad_norm": 0.36423230171203613, + "learning_rate": 1.6349982075696778e-05, + "loss": 0.5446, + "step": 20541 + }, + { + "epoch": 0.5640307523338824, + "grad_norm": 0.4499000906944275, + "learning_rate": 1.634964842623871e-05, + "loss": 0.4986, + "step": 20542 + }, + { + "epoch": 0.564058209774849, + "grad_norm": 0.4161536395549774, + "learning_rate": 1.6349314764936516e-05, + "loss": 0.4904, + "step": 20543 + }, + { + "epoch": 0.5640856672158154, + "grad_norm": 0.41302576661109924, + "learning_rate": 1.634898109179083e-05, + "loss": 0.5273, + "step": 20544 + }, + { + "epoch": 0.564113124656782, + "grad_norm": 0.40110400319099426, + "learning_rate": 1.6348647406802264e-05, + "loss": 0.4771, + "step": 20545 + }, + { + "epoch": 0.5641405820977485, + "grad_norm": 0.39073535799980164, + "learning_rate": 1.6348313709971445e-05, + "loss": 0.5179, + "step": 20546 + }, + { + "epoch": 0.564168039538715, + "grad_norm": 0.34682658314704895, + "learning_rate": 1.6347980001298995e-05, + "loss": 0.4034, + "step": 20547 + }, + { + "epoch": 0.5641954969796815, + "grad_norm": 0.39446476101875305, + "learning_rate": 1.6347646280785532e-05, + "loss": 0.5412, + "step": 20548 + }, + { + "epoch": 0.564222954420648, + "grad_norm": 0.5643588900566101, + "learning_rate": 1.6347312548431686e-05, + "loss": 0.4564, + "step": 20549 + }, + { + "epoch": 0.5642504118616145, + "grad_norm": 0.37165629863739014, + "learning_rate": 1.6346978804238076e-05, + "loss": 0.424, + "step": 20550 + }, + { + "epoch": 0.564277869302581, + "grad_norm": 0.3623132109642029, + "learning_rate": 1.6346645048205326e-05, + "loss": 0.4298, + "step": 20551 + }, + { + "epoch": 0.5643053267435475, + "grad_norm": 0.4554952383041382, + "learning_rate": 1.6346311280334054e-05, + "loss": 0.5396, + "step": 20552 + }, + { + "epoch": 0.564332784184514, + "grad_norm": 0.36759448051452637, + "learning_rate": 1.634597750062489e-05, + "loss": 0.4815, + "step": 20553 + }, + { + "epoch": 0.5643602416254805, + "grad_norm": 0.3791283071041107, + "learning_rate": 1.6345643709078452e-05, + "loss": 0.4883, + "step": 20554 + }, + { + "epoch": 0.564387699066447, + "grad_norm": 0.3639301657676697, + "learning_rate": 1.634530990569536e-05, + "loss": 0.5113, + "step": 20555 + }, + { + "epoch": 0.5644151565074135, + "grad_norm": 0.34587305784225464, + "learning_rate": 1.6344976090476242e-05, + "loss": 0.4742, + "step": 20556 + }, + { + "epoch": 0.56444261394838, + "grad_norm": 0.3502821624279022, + "learning_rate": 1.634464226342172e-05, + "loss": 0.4956, + "step": 20557 + }, + { + "epoch": 0.5644700713893465, + "grad_norm": 0.37109315395355225, + "learning_rate": 1.6344308424532412e-05, + "loss": 0.4559, + "step": 20558 + }, + { + "epoch": 0.564497528830313, + "grad_norm": 0.3434556722640991, + "learning_rate": 1.634397457380895e-05, + "loss": 0.4525, + "step": 20559 + }, + { + "epoch": 0.5645249862712796, + "grad_norm": 0.42299553751945496, + "learning_rate": 1.6343640711251946e-05, + "loss": 0.5383, + "step": 20560 + }, + { + "epoch": 0.564552443712246, + "grad_norm": 0.39741137623786926, + "learning_rate": 1.6343306836862026e-05, + "loss": 0.5523, + "step": 20561 + }, + { + "epoch": 0.5645799011532125, + "grad_norm": 0.3998224139213562, + "learning_rate": 1.6342972950639818e-05, + "loss": 0.5939, + "step": 20562 + }, + { + "epoch": 0.564607358594179, + "grad_norm": 0.3534794747829437, + "learning_rate": 1.6342639052585938e-05, + "loss": 0.4206, + "step": 20563 + }, + { + "epoch": 0.5646348160351455, + "grad_norm": 0.3768831491470337, + "learning_rate": 1.634230514270101e-05, + "loss": 0.4989, + "step": 20564 + }, + { + "epoch": 0.564662273476112, + "grad_norm": 0.4396803677082062, + "learning_rate": 1.6341971220985662e-05, + "loss": 0.606, + "step": 20565 + }, + { + "epoch": 0.5646897309170785, + "grad_norm": 0.3371696472167969, + "learning_rate": 1.6341637287440516e-05, + "loss": 0.5437, + "step": 20566 + }, + { + "epoch": 0.5647171883580451, + "grad_norm": 0.34939059615135193, + "learning_rate": 1.634130334206619e-05, + "loss": 0.4362, + "step": 20567 + }, + { + "epoch": 0.5647446457990115, + "grad_norm": 0.3888326585292816, + "learning_rate": 1.6340969384863304e-05, + "loss": 0.5749, + "step": 20568 + }, + { + "epoch": 0.5647721032399781, + "grad_norm": 0.4547484219074249, + "learning_rate": 1.6340635415832492e-05, + "loss": 0.5706, + "step": 20569 + }, + { + "epoch": 0.5647995606809445, + "grad_norm": 0.5122742652893066, + "learning_rate": 1.6340301434974372e-05, + "loss": 0.52, + "step": 20570 + }, + { + "epoch": 0.564827018121911, + "grad_norm": 0.42701253294944763, + "learning_rate": 1.6339967442289566e-05, + "loss": 0.6288, + "step": 20571 + }, + { + "epoch": 0.5648544755628775, + "grad_norm": 0.37062329053878784, + "learning_rate": 1.6339633437778695e-05, + "loss": 0.4396, + "step": 20572 + }, + { + "epoch": 0.564881933003844, + "grad_norm": 0.3864344656467438, + "learning_rate": 1.6339299421442387e-05, + "loss": 0.536, + "step": 20573 + }, + { + "epoch": 0.5649093904448106, + "grad_norm": 0.40329092741012573, + "learning_rate": 1.6338965393281258e-05, + "loss": 0.4858, + "step": 20574 + }, + { + "epoch": 0.564936847885777, + "grad_norm": 0.4018467962741852, + "learning_rate": 1.633863135329594e-05, + "loss": 0.476, + "step": 20575 + }, + { + "epoch": 0.5649643053267436, + "grad_norm": 0.3691905438899994, + "learning_rate": 1.6338297301487047e-05, + "loss": 0.4344, + "step": 20576 + }, + { + "epoch": 0.56499176276771, + "grad_norm": 0.39165663719177246, + "learning_rate": 1.633796323785521e-05, + "loss": 0.575, + "step": 20577 + }, + { + "epoch": 0.5650192202086766, + "grad_norm": 0.35369277000427246, + "learning_rate": 1.6337629162401048e-05, + "loss": 0.5044, + "step": 20578 + }, + { + "epoch": 0.565046677649643, + "grad_norm": 0.40925437211990356, + "learning_rate": 1.6337295075125185e-05, + "loss": 0.4858, + "step": 20579 + }, + { + "epoch": 0.5650741350906096, + "grad_norm": 0.3648555278778076, + "learning_rate": 1.6336960976028242e-05, + "loss": 0.5997, + "step": 20580 + }, + { + "epoch": 0.5651015925315761, + "grad_norm": 0.33511003851890564, + "learning_rate": 1.6336626865110846e-05, + "loss": 0.4579, + "step": 20581 + }, + { + "epoch": 0.5651290499725425, + "grad_norm": 0.32862919569015503, + "learning_rate": 1.6336292742373617e-05, + "loss": 0.4863, + "step": 20582 + }, + { + "epoch": 0.5651565074135091, + "grad_norm": 0.3548717796802521, + "learning_rate": 1.633595860781718e-05, + "loss": 0.4828, + "step": 20583 + }, + { + "epoch": 0.5651839648544755, + "grad_norm": 0.38445577025413513, + "learning_rate": 1.6335624461442163e-05, + "loss": 0.5373, + "step": 20584 + }, + { + "epoch": 0.5652114222954421, + "grad_norm": 0.38352179527282715, + "learning_rate": 1.633529030324918e-05, + "loss": 0.4574, + "step": 20585 + }, + { + "epoch": 0.5652388797364085, + "grad_norm": 0.3737546503543854, + "learning_rate": 1.6334956133238857e-05, + "loss": 0.4499, + "step": 20586 + }, + { + "epoch": 0.5652663371773751, + "grad_norm": 0.3576468825340271, + "learning_rate": 1.6334621951411823e-05, + "loss": 0.5772, + "step": 20587 + }, + { + "epoch": 0.5652937946183416, + "grad_norm": 0.4032418429851532, + "learning_rate": 1.6334287757768693e-05, + "loss": 0.4726, + "step": 20588 + }, + { + "epoch": 0.565321252059308, + "grad_norm": 0.4289211630821228, + "learning_rate": 1.6333953552310098e-05, + "loss": 0.5093, + "step": 20589 + }, + { + "epoch": 0.5653487095002746, + "grad_norm": 0.4060177505016327, + "learning_rate": 1.6333619335036656e-05, + "loss": 0.4829, + "step": 20590 + }, + { + "epoch": 0.565376166941241, + "grad_norm": 0.38711047172546387, + "learning_rate": 1.6333285105948996e-05, + "loss": 0.5025, + "step": 20591 + }, + { + "epoch": 0.5654036243822076, + "grad_norm": 0.38262107968330383, + "learning_rate": 1.6332950865047733e-05, + "loss": 0.519, + "step": 20592 + }, + { + "epoch": 0.565431081823174, + "grad_norm": 0.3862617611885071, + "learning_rate": 1.6332616612333494e-05, + "loss": 0.4674, + "step": 20593 + }, + { + "epoch": 0.5654585392641406, + "grad_norm": 0.3858075737953186, + "learning_rate": 1.633228234780691e-05, + "loss": 0.525, + "step": 20594 + }, + { + "epoch": 0.5654859967051071, + "grad_norm": 0.34345167875289917, + "learning_rate": 1.6331948071468598e-05, + "loss": 0.4533, + "step": 20595 + }, + { + "epoch": 0.5655134541460736, + "grad_norm": 0.42987656593322754, + "learning_rate": 1.6331613783319177e-05, + "loss": 0.5383, + "step": 20596 + }, + { + "epoch": 0.5655409115870401, + "grad_norm": 0.4039032757282257, + "learning_rate": 1.633127948335928e-05, + "loss": 0.6023, + "step": 20597 + }, + { + "epoch": 0.5655683690280066, + "grad_norm": 0.37783902883529663, + "learning_rate": 1.6330945171589525e-05, + "loss": 0.4647, + "step": 20598 + }, + { + "epoch": 0.5655958264689731, + "grad_norm": 0.38612523674964905, + "learning_rate": 1.6330610848010535e-05, + "loss": 0.53, + "step": 20599 + }, + { + "epoch": 0.5656232839099395, + "grad_norm": 0.4021594226360321, + "learning_rate": 1.6330276512622936e-05, + "loss": 0.5054, + "step": 20600 + }, + { + "epoch": 0.5656507413509061, + "grad_norm": 0.38004571199417114, + "learning_rate": 1.632994216542735e-05, + "loss": 0.5199, + "step": 20601 + }, + { + "epoch": 0.5656781987918726, + "grad_norm": 0.38056614995002747, + "learning_rate": 1.6329607806424405e-05, + "loss": 0.4945, + "step": 20602 + }, + { + "epoch": 0.5657056562328391, + "grad_norm": 0.33992618322372437, + "learning_rate": 1.632927343561472e-05, + "loss": 0.5024, + "step": 20603 + }, + { + "epoch": 0.5657331136738056, + "grad_norm": 0.38081398606300354, + "learning_rate": 1.6328939052998918e-05, + "loss": 0.4859, + "step": 20604 + }, + { + "epoch": 0.5657605711147721, + "grad_norm": 0.3655245900154114, + "learning_rate": 1.6328604658577626e-05, + "loss": 0.5711, + "step": 20605 + }, + { + "epoch": 0.5657880285557386, + "grad_norm": 0.35513079166412354, + "learning_rate": 1.6328270252351466e-05, + "loss": 0.5539, + "step": 20606 + }, + { + "epoch": 0.5658154859967051, + "grad_norm": 0.44321513175964355, + "learning_rate": 1.6327935834321062e-05, + "loss": 0.5245, + "step": 20607 + }, + { + "epoch": 0.5658429434376716, + "grad_norm": 0.5778223276138306, + "learning_rate": 1.6327601404487037e-05, + "loss": 0.6337, + "step": 20608 + }, + { + "epoch": 0.5658704008786382, + "grad_norm": 0.37304192781448364, + "learning_rate": 1.6327266962850018e-05, + "loss": 0.528, + "step": 20609 + }, + { + "epoch": 0.5658978583196046, + "grad_norm": 0.38240474462509155, + "learning_rate": 1.6326932509410624e-05, + "loss": 0.5773, + "step": 20610 + }, + { + "epoch": 0.5659253157605711, + "grad_norm": 0.36785581707954407, + "learning_rate": 1.6326598044169487e-05, + "loss": 0.4724, + "step": 20611 + }, + { + "epoch": 0.5659527732015376, + "grad_norm": 0.37862733006477356, + "learning_rate": 1.632626356712722e-05, + "loss": 0.5163, + "step": 20612 + }, + { + "epoch": 0.5659802306425041, + "grad_norm": 0.3996167480945587, + "learning_rate": 1.6325929078284454e-05, + "loss": 0.5266, + "step": 20613 + }, + { + "epoch": 0.5660076880834706, + "grad_norm": 0.3466898798942566, + "learning_rate": 1.632559457764181e-05, + "loss": 0.5301, + "step": 20614 + }, + { + "epoch": 0.5660351455244371, + "grad_norm": 0.3221718668937683, + "learning_rate": 1.6325260065199916e-05, + "loss": 0.4801, + "step": 20615 + }, + { + "epoch": 0.5660626029654037, + "grad_norm": 0.35972461104393005, + "learning_rate": 1.6324925540959393e-05, + "loss": 0.5777, + "step": 20616 + }, + { + "epoch": 0.5660900604063701, + "grad_norm": 0.36867082118988037, + "learning_rate": 1.6324591004920863e-05, + "loss": 0.582, + "step": 20617 + }, + { + "epoch": 0.5661175178473367, + "grad_norm": 0.39707890152931213, + "learning_rate": 1.6324256457084954e-05, + "loss": 0.4785, + "step": 20618 + }, + { + "epoch": 0.5661449752883031, + "grad_norm": 0.37865668535232544, + "learning_rate": 1.6323921897452287e-05, + "loss": 0.504, + "step": 20619 + }, + { + "epoch": 0.5661724327292696, + "grad_norm": 0.3887976109981537, + "learning_rate": 1.6323587326023488e-05, + "loss": 0.4716, + "step": 20620 + }, + { + "epoch": 0.5661998901702361, + "grad_norm": 0.48873403668403625, + "learning_rate": 1.6323252742799182e-05, + "loss": 0.5692, + "step": 20621 + }, + { + "epoch": 0.5662273476112026, + "grad_norm": 0.47238636016845703, + "learning_rate": 1.632291814777999e-05, + "loss": 0.4266, + "step": 20622 + }, + { + "epoch": 0.5662548050521692, + "grad_norm": 0.40625491738319397, + "learning_rate": 1.6322583540966535e-05, + "loss": 0.505, + "step": 20623 + }, + { + "epoch": 0.5662822624931356, + "grad_norm": 0.39686572551727295, + "learning_rate": 1.6322248922359447e-05, + "loss": 0.4815, + "step": 20624 + }, + { + "epoch": 0.5663097199341022, + "grad_norm": 0.36998096108436584, + "learning_rate": 1.6321914291959347e-05, + "loss": 0.5339, + "step": 20625 + }, + { + "epoch": 0.5663371773750686, + "grad_norm": 0.3614301085472107, + "learning_rate": 1.6321579649766857e-05, + "loss": 0.46, + "step": 20626 + }, + { + "epoch": 0.5663646348160352, + "grad_norm": 0.45861056447029114, + "learning_rate": 1.6321244995782605e-05, + "loss": 0.5394, + "step": 20627 + }, + { + "epoch": 0.5663920922570016, + "grad_norm": 0.4457368850708008, + "learning_rate": 1.6320910330007213e-05, + "loss": 0.5521, + "step": 20628 + }, + { + "epoch": 0.5664195496979681, + "grad_norm": 0.3619052469730377, + "learning_rate": 1.6320575652441303e-05, + "loss": 0.4457, + "step": 20629 + }, + { + "epoch": 0.5664470071389347, + "grad_norm": 0.36880192160606384, + "learning_rate": 1.6320240963085507e-05, + "loss": 0.4812, + "step": 20630 + }, + { + "epoch": 0.5664744645799011, + "grad_norm": 0.35267242789268494, + "learning_rate": 1.6319906261940442e-05, + "loss": 0.5019, + "step": 20631 + }, + { + "epoch": 0.5665019220208677, + "grad_norm": 0.3671497106552124, + "learning_rate": 1.6319571549006735e-05, + "loss": 0.494, + "step": 20632 + }, + { + "epoch": 0.5665293794618341, + "grad_norm": 0.4104071259498596, + "learning_rate": 1.631923682428501e-05, + "loss": 0.5459, + "step": 20633 + }, + { + "epoch": 0.5665568369028007, + "grad_norm": 0.408366322517395, + "learning_rate": 1.6318902087775893e-05, + "loss": 0.5498, + "step": 20634 + }, + { + "epoch": 0.5665842943437671, + "grad_norm": 0.36114656925201416, + "learning_rate": 1.6318567339480004e-05, + "loss": 0.4774, + "step": 20635 + }, + { + "epoch": 0.5666117517847337, + "grad_norm": 0.3958599269390106, + "learning_rate": 1.6318232579397973e-05, + "loss": 0.4726, + "step": 20636 + }, + { + "epoch": 0.5666392092257002, + "grad_norm": 0.3740885853767395, + "learning_rate": 1.631789780753042e-05, + "loss": 0.4366, + "step": 20637 + }, + { + "epoch": 0.5666666666666667, + "grad_norm": 0.4239836037158966, + "learning_rate": 1.6317563023877974e-05, + "loss": 0.5034, + "step": 20638 + }, + { + "epoch": 0.5666941241076332, + "grad_norm": 0.3563210964202881, + "learning_rate": 1.6317228228441252e-05, + "loss": 0.5036, + "step": 20639 + }, + { + "epoch": 0.5667215815485996, + "grad_norm": 0.3894434869289398, + "learning_rate": 1.6316893421220886e-05, + "loss": 0.4667, + "step": 20640 + }, + { + "epoch": 0.5667490389895662, + "grad_norm": 0.35611388087272644, + "learning_rate": 1.63165586022175e-05, + "loss": 0.4945, + "step": 20641 + }, + { + "epoch": 0.5667764964305326, + "grad_norm": 0.3770473003387451, + "learning_rate": 1.631622377143171e-05, + "loss": 0.4952, + "step": 20642 + }, + { + "epoch": 0.5668039538714992, + "grad_norm": 0.3648149371147156, + "learning_rate": 1.631588892886415e-05, + "loss": 0.5729, + "step": 20643 + }, + { + "epoch": 0.5668314113124657, + "grad_norm": 0.6053686141967773, + "learning_rate": 1.6315554074515444e-05, + "loss": 0.5167, + "step": 20644 + }, + { + "epoch": 0.5668588687534322, + "grad_norm": 0.4376186430454254, + "learning_rate": 1.6315219208386207e-05, + "loss": 0.5963, + "step": 20645 + }, + { + "epoch": 0.5668863261943987, + "grad_norm": 0.3602019250392914, + "learning_rate": 1.6314884330477075e-05, + "loss": 0.5479, + "step": 20646 + }, + { + "epoch": 0.5669137836353652, + "grad_norm": 0.37741193175315857, + "learning_rate": 1.631454944078867e-05, + "loss": 0.4755, + "step": 20647 + }, + { + "epoch": 0.5669412410763317, + "grad_norm": 0.40227916836738586, + "learning_rate": 1.6314214539321614e-05, + "loss": 0.4988, + "step": 20648 + }, + { + "epoch": 0.5669686985172981, + "grad_norm": 0.4117346704006195, + "learning_rate": 1.631387962607653e-05, + "loss": 0.5559, + "step": 20649 + }, + { + "epoch": 0.5669961559582647, + "grad_norm": 0.40767139196395874, + "learning_rate": 1.631354470105405e-05, + "loss": 0.5064, + "step": 20650 + }, + { + "epoch": 0.5670236133992312, + "grad_norm": 0.3871402442455292, + "learning_rate": 1.631320976425479e-05, + "loss": 0.4596, + "step": 20651 + }, + { + "epoch": 0.5670510708401977, + "grad_norm": 0.3619860112667084, + "learning_rate": 1.631287481567938e-05, + "loss": 0.4459, + "step": 20652 + }, + { + "epoch": 0.5670785282811642, + "grad_norm": 0.44328808784484863, + "learning_rate": 1.6312539855328446e-05, + "loss": 0.4874, + "step": 20653 + }, + { + "epoch": 0.5671059857221307, + "grad_norm": 0.4399421811103821, + "learning_rate": 1.6312204883202606e-05, + "loss": 0.5613, + "step": 20654 + }, + { + "epoch": 0.5671334431630972, + "grad_norm": 0.3754312992095947, + "learning_rate": 1.631186989930249e-05, + "loss": 0.4939, + "step": 20655 + }, + { + "epoch": 0.5671609006040637, + "grad_norm": 0.37809258699417114, + "learning_rate": 1.6311534903628722e-05, + "loss": 0.486, + "step": 20656 + }, + { + "epoch": 0.5671883580450302, + "grad_norm": 0.3609181046485901, + "learning_rate": 1.631119989618193e-05, + "loss": 0.5496, + "step": 20657 + }, + { + "epoch": 0.5672158154859968, + "grad_norm": 0.3719877600669861, + "learning_rate": 1.6310864876962736e-05, + "loss": 0.4589, + "step": 20658 + }, + { + "epoch": 0.5672432729269632, + "grad_norm": 0.3936196267604828, + "learning_rate": 1.6310529845971762e-05, + "loss": 0.5409, + "step": 20659 + }, + { + "epoch": 0.5672707303679297, + "grad_norm": 0.35478487610816956, + "learning_rate": 1.6310194803209636e-05, + "loss": 0.4424, + "step": 20660 + }, + { + "epoch": 0.5672981878088962, + "grad_norm": 0.3888084590435028, + "learning_rate": 1.6309859748676985e-05, + "loss": 0.5723, + "step": 20661 + }, + { + "epoch": 0.5673256452498627, + "grad_norm": 0.37210872769355774, + "learning_rate": 1.630952468237443e-05, + "loss": 0.4834, + "step": 20662 + }, + { + "epoch": 0.5673531026908292, + "grad_norm": 0.3810447156429291, + "learning_rate": 1.6309189604302598e-05, + "loss": 0.6435, + "step": 20663 + }, + { + "epoch": 0.5673805601317957, + "grad_norm": 0.36711788177490234, + "learning_rate": 1.6308854514462113e-05, + "loss": 0.5457, + "step": 20664 + }, + { + "epoch": 0.5674080175727623, + "grad_norm": 0.3771422803401947, + "learning_rate": 1.6308519412853603e-05, + "loss": 0.4932, + "step": 20665 + }, + { + "epoch": 0.5674354750137287, + "grad_norm": 0.384708970785141, + "learning_rate": 1.6308184299477688e-05, + "loss": 0.4517, + "step": 20666 + }, + { + "epoch": 0.5674629324546953, + "grad_norm": 0.40480321645736694, + "learning_rate": 1.6307849174334997e-05, + "loss": 0.5561, + "step": 20667 + }, + { + "epoch": 0.5674903898956617, + "grad_norm": 0.3800649344921112, + "learning_rate": 1.6307514037426155e-05, + "loss": 0.5578, + "step": 20668 + }, + { + "epoch": 0.5675178473366282, + "grad_norm": 0.35724785923957825, + "learning_rate": 1.6307178888751785e-05, + "loss": 0.4581, + "step": 20669 + }, + { + "epoch": 0.5675453047775947, + "grad_norm": 0.4085018038749695, + "learning_rate": 1.6306843728312513e-05, + "loss": 0.4831, + "step": 20670 + }, + { + "epoch": 0.5675727622185612, + "grad_norm": 0.44947561621665955, + "learning_rate": 1.6306508556108967e-05, + "loss": 0.4686, + "step": 20671 + }, + { + "epoch": 0.5676002196595278, + "grad_norm": 0.3525388538837433, + "learning_rate": 1.6306173372141766e-05, + "loss": 0.5209, + "step": 20672 + }, + { + "epoch": 0.5676276771004942, + "grad_norm": 0.38501447439193726, + "learning_rate": 1.6305838176411545e-05, + "loss": 0.4661, + "step": 20673 + }, + { + "epoch": 0.5676551345414608, + "grad_norm": 0.47890838980674744, + "learning_rate": 1.6305502968918915e-05, + "loss": 0.5151, + "step": 20674 + }, + { + "epoch": 0.5676825919824272, + "grad_norm": 0.3509874641895294, + "learning_rate": 1.6305167749664515e-05, + "loss": 0.563, + "step": 20675 + }, + { + "epoch": 0.5677100494233938, + "grad_norm": 0.4421960115432739, + "learning_rate": 1.6304832518648964e-05, + "loss": 0.5555, + "step": 20676 + }, + { + "epoch": 0.5677375068643602, + "grad_norm": 0.3734467327594757, + "learning_rate": 1.630449727587289e-05, + "loss": 0.5124, + "step": 20677 + }, + { + "epoch": 0.5677649643053267, + "grad_norm": 0.37376290559768677, + "learning_rate": 1.6304162021336914e-05, + "loss": 0.4483, + "step": 20678 + }, + { + "epoch": 0.5677924217462933, + "grad_norm": 0.36558496952056885, + "learning_rate": 1.630382675504166e-05, + "loss": 0.4724, + "step": 20679 + }, + { + "epoch": 0.5678198791872597, + "grad_norm": 0.3931330144405365, + "learning_rate": 1.6303491476987762e-05, + "loss": 0.535, + "step": 20680 + }, + { + "epoch": 0.5678473366282263, + "grad_norm": 0.41127002239227295, + "learning_rate": 1.6303156187175843e-05, + "loss": 0.5436, + "step": 20681 + }, + { + "epoch": 0.5678747940691927, + "grad_norm": 0.3633127212524414, + "learning_rate": 1.630282088560652e-05, + "loss": 0.5502, + "step": 20682 + }, + { + "epoch": 0.5679022515101593, + "grad_norm": 0.3832589089870453, + "learning_rate": 1.6302485572280427e-05, + "loss": 0.5613, + "step": 20683 + }, + { + "epoch": 0.5679297089511257, + "grad_norm": 0.4189336895942688, + "learning_rate": 1.6302150247198188e-05, + "loss": 0.5514, + "step": 20684 + }, + { + "epoch": 0.5679571663920923, + "grad_norm": 0.3969650864601135, + "learning_rate": 1.6301814910360426e-05, + "loss": 0.5791, + "step": 20685 + }, + { + "epoch": 0.5679846238330588, + "grad_norm": 0.32324519753456116, + "learning_rate": 1.630147956176777e-05, + "loss": 0.4231, + "step": 20686 + }, + { + "epoch": 0.5680120812740252, + "grad_norm": 0.3660754859447479, + "learning_rate": 1.630114420142084e-05, + "loss": 0.4809, + "step": 20687 + }, + { + "epoch": 0.5680395387149918, + "grad_norm": 0.3757556974887848, + "learning_rate": 1.630080882932027e-05, + "loss": 0.4609, + "step": 20688 + }, + { + "epoch": 0.5680669961559582, + "grad_norm": 0.33011165261268616, + "learning_rate": 1.6300473445466676e-05, + "loss": 0.4868, + "step": 20689 + }, + { + "epoch": 0.5680944535969248, + "grad_norm": 0.40938350558280945, + "learning_rate": 1.6300138049860687e-05, + "loss": 0.5726, + "step": 20690 + }, + { + "epoch": 0.5681219110378912, + "grad_norm": 0.37226852774620056, + "learning_rate": 1.6299802642502932e-05, + "loss": 0.4965, + "step": 20691 + }, + { + "epoch": 0.5681493684788578, + "grad_norm": 0.40996435284614563, + "learning_rate": 1.6299467223394035e-05, + "loss": 0.4976, + "step": 20692 + }, + { + "epoch": 0.5681768259198243, + "grad_norm": 0.3443029522895813, + "learning_rate": 1.6299131792534623e-05, + "loss": 0.4405, + "step": 20693 + }, + { + "epoch": 0.5682042833607908, + "grad_norm": 0.4025702476501465, + "learning_rate": 1.6298796349925316e-05, + "loss": 0.421, + "step": 20694 + }, + { + "epoch": 0.5682317408017573, + "grad_norm": 0.467110812664032, + "learning_rate": 1.6298460895566743e-05, + "loss": 0.5649, + "step": 20695 + }, + { + "epoch": 0.5682591982427238, + "grad_norm": 0.3692920207977295, + "learning_rate": 1.6298125429459532e-05, + "loss": 0.5803, + "step": 20696 + }, + { + "epoch": 0.5682866556836903, + "grad_norm": 0.32728537917137146, + "learning_rate": 1.6297789951604306e-05, + "loss": 0.4064, + "step": 20697 + }, + { + "epoch": 0.5683141131246567, + "grad_norm": 0.36716797947883606, + "learning_rate": 1.6297454462001692e-05, + "loss": 0.4541, + "step": 20698 + }, + { + "epoch": 0.5683415705656233, + "grad_norm": 0.3563305735588074, + "learning_rate": 1.629711896065232e-05, + "loss": 0.5207, + "step": 20699 + }, + { + "epoch": 0.5683690280065898, + "grad_norm": 0.4711093604564667, + "learning_rate": 1.6296783447556803e-05, + "loss": 0.5506, + "step": 20700 + }, + { + "epoch": 0.5683964854475563, + "grad_norm": 0.364092081785202, + "learning_rate": 1.6296447922715782e-05, + "loss": 0.4402, + "step": 20701 + }, + { + "epoch": 0.5684239428885228, + "grad_norm": 0.38028061389923096, + "learning_rate": 1.6296112386129873e-05, + "loss": 0.4157, + "step": 20702 + }, + { + "epoch": 0.5684514003294893, + "grad_norm": 0.4465927183628082, + "learning_rate": 1.6295776837799704e-05, + "loss": 0.4349, + "step": 20703 + }, + { + "epoch": 0.5684788577704558, + "grad_norm": 0.3695653975009918, + "learning_rate": 1.6295441277725907e-05, + "loss": 0.499, + "step": 20704 + }, + { + "epoch": 0.5685063152114223, + "grad_norm": 0.3308243453502655, + "learning_rate": 1.6295105705909096e-05, + "loss": 0.4501, + "step": 20705 + }, + { + "epoch": 0.5685337726523888, + "grad_norm": 0.47346052527427673, + "learning_rate": 1.6294770122349908e-05, + "loss": 0.5306, + "step": 20706 + }, + { + "epoch": 0.5685612300933554, + "grad_norm": 0.4769842028617859, + "learning_rate": 1.6294434527048963e-05, + "loss": 0.597, + "step": 20707 + }, + { + "epoch": 0.5685886875343218, + "grad_norm": 0.3582133650779724, + "learning_rate": 1.629409892000689e-05, + "loss": 0.4829, + "step": 20708 + }, + { + "epoch": 0.5686161449752883, + "grad_norm": 0.37460020184516907, + "learning_rate": 1.629376330122431e-05, + "loss": 0.5115, + "step": 20709 + }, + { + "epoch": 0.5686436024162548, + "grad_norm": 0.3501301407814026, + "learning_rate": 1.6293427670701856e-05, + "loss": 0.503, + "step": 20710 + }, + { + "epoch": 0.5686710598572213, + "grad_norm": 0.395555704832077, + "learning_rate": 1.629309202844015e-05, + "loss": 0.5321, + "step": 20711 + }, + { + "epoch": 0.5686985172981878, + "grad_norm": 0.3623402714729309, + "learning_rate": 1.629275637443982e-05, + "loss": 0.5655, + "step": 20712 + }, + { + "epoch": 0.5687259747391543, + "grad_norm": 0.3653925061225891, + "learning_rate": 1.629242070870149e-05, + "loss": 0.4906, + "step": 20713 + }, + { + "epoch": 0.5687534321801209, + "grad_norm": 0.3690163493156433, + "learning_rate": 1.6292085031225787e-05, + "loss": 0.5121, + "step": 20714 + }, + { + "epoch": 0.5687808896210873, + "grad_norm": 0.45783352851867676, + "learning_rate": 1.629174934201334e-05, + "loss": 0.5668, + "step": 20715 + }, + { + "epoch": 0.5688083470620539, + "grad_norm": 0.3524676561355591, + "learning_rate": 1.6291413641064767e-05, + "loss": 0.4856, + "step": 20716 + }, + { + "epoch": 0.5688358045030203, + "grad_norm": 0.35023990273475647, + "learning_rate": 1.62910779283807e-05, + "loss": 0.442, + "step": 20717 + }, + { + "epoch": 0.5688632619439868, + "grad_norm": 0.4144151508808136, + "learning_rate": 1.629074220396177e-05, + "loss": 0.4952, + "step": 20718 + }, + { + "epoch": 0.5688907193849533, + "grad_norm": 0.3929734528064728, + "learning_rate": 1.6290406467808594e-05, + "loss": 0.5873, + "step": 20719 + }, + { + "epoch": 0.5689181768259198, + "grad_norm": 0.32519757747650146, + "learning_rate": 1.6290070719921806e-05, + "loss": 0.4721, + "step": 20720 + }, + { + "epoch": 0.5689456342668864, + "grad_norm": 0.36838969588279724, + "learning_rate": 1.6289734960302026e-05, + "loss": 0.5232, + "step": 20721 + }, + { + "epoch": 0.5689730917078528, + "grad_norm": 0.3608323037624359, + "learning_rate": 1.628939918894988e-05, + "loss": 0.4537, + "step": 20722 + }, + { + "epoch": 0.5690005491488194, + "grad_norm": 0.37272539734840393, + "learning_rate": 1.6289063405866003e-05, + "loss": 0.5036, + "step": 20723 + }, + { + "epoch": 0.5690280065897858, + "grad_norm": 0.41393107175827026, + "learning_rate": 1.628872761105101e-05, + "loss": 0.5339, + "step": 20724 + }, + { + "epoch": 0.5690554640307524, + "grad_norm": 0.4032852053642273, + "learning_rate": 1.6288391804505537e-05, + "loss": 0.5571, + "step": 20725 + }, + { + "epoch": 0.5690829214717188, + "grad_norm": 0.36378344893455505, + "learning_rate": 1.6288055986230206e-05, + "loss": 0.4508, + "step": 20726 + }, + { + "epoch": 0.5691103789126853, + "grad_norm": 0.44989144802093506, + "learning_rate": 1.628772015622564e-05, + "loss": 0.4847, + "step": 20727 + }, + { + "epoch": 0.5691378363536519, + "grad_norm": 0.37476539611816406, + "learning_rate": 1.628738431449247e-05, + "loss": 0.6091, + "step": 20728 + }, + { + "epoch": 0.5691652937946183, + "grad_norm": 0.44592922925949097, + "learning_rate": 1.6287048461031325e-05, + "loss": 0.5321, + "step": 20729 + }, + { + "epoch": 0.5691927512355849, + "grad_norm": 0.37697380781173706, + "learning_rate": 1.6286712595842824e-05, + "loss": 0.4531, + "step": 20730 + }, + { + "epoch": 0.5692202086765513, + "grad_norm": 0.35438278317451477, + "learning_rate": 1.62863767189276e-05, + "loss": 0.4983, + "step": 20731 + }, + { + "epoch": 0.5692476661175179, + "grad_norm": 0.3275255560874939, + "learning_rate": 1.6286040830286278e-05, + "loss": 0.4352, + "step": 20732 + }, + { + "epoch": 0.5692751235584843, + "grad_norm": 0.395128071308136, + "learning_rate": 1.628570492991948e-05, + "loss": 0.4879, + "step": 20733 + }, + { + "epoch": 0.5693025809994509, + "grad_norm": 0.3747974634170532, + "learning_rate": 1.6285369017827838e-05, + "loss": 0.5018, + "step": 20734 + }, + { + "epoch": 0.5693300384404174, + "grad_norm": 0.3588474690914154, + "learning_rate": 1.6285033094011978e-05, + "loss": 0.4793, + "step": 20735 + }, + { + "epoch": 0.5693574958813838, + "grad_norm": 0.38819620013237, + "learning_rate": 1.628469715847252e-05, + "loss": 0.4896, + "step": 20736 + }, + { + "epoch": 0.5693849533223504, + "grad_norm": 0.36193135380744934, + "learning_rate": 1.6284361211210104e-05, + "loss": 0.5608, + "step": 20737 + }, + { + "epoch": 0.5694124107633168, + "grad_norm": 0.37300702929496765, + "learning_rate": 1.6284025252225345e-05, + "loss": 0.5027, + "step": 20738 + }, + { + "epoch": 0.5694398682042834, + "grad_norm": 0.4326159358024597, + "learning_rate": 1.6283689281518872e-05, + "loss": 0.5242, + "step": 20739 + }, + { + "epoch": 0.5694673256452498, + "grad_norm": 0.36351820826530457, + "learning_rate": 1.6283353299091317e-05, + "loss": 0.4641, + "step": 20740 + }, + { + "epoch": 0.5694947830862164, + "grad_norm": 0.39974188804626465, + "learning_rate": 1.6283017304943296e-05, + "loss": 0.4745, + "step": 20741 + }, + { + "epoch": 0.5695222405271829, + "grad_norm": 0.3838856816291809, + "learning_rate": 1.6282681299075445e-05, + "loss": 0.458, + "step": 20742 + }, + { + "epoch": 0.5695496979681494, + "grad_norm": 0.3757867217063904, + "learning_rate": 1.6282345281488393e-05, + "loss": 0.484, + "step": 20743 + }, + { + "epoch": 0.5695771554091159, + "grad_norm": 0.4008026719093323, + "learning_rate": 1.6282009252182758e-05, + "loss": 0.5413, + "step": 20744 + }, + { + "epoch": 0.5696046128500823, + "grad_norm": 0.3786238133907318, + "learning_rate": 1.628167321115917e-05, + "loss": 0.5206, + "step": 20745 + }, + { + "epoch": 0.5696320702910489, + "grad_norm": 0.4045695662498474, + "learning_rate": 1.628133715841826e-05, + "loss": 0.5099, + "step": 20746 + }, + { + "epoch": 0.5696595277320153, + "grad_norm": 0.32261624932289124, + "learning_rate": 1.6281001093960648e-05, + "loss": 0.441, + "step": 20747 + }, + { + "epoch": 0.5696869851729819, + "grad_norm": 0.3629243075847626, + "learning_rate": 1.6280665017786966e-05, + "loss": 0.5246, + "step": 20748 + }, + { + "epoch": 0.5697144426139484, + "grad_norm": 0.4072001278400421, + "learning_rate": 1.628032892989784e-05, + "loss": 0.5547, + "step": 20749 + }, + { + "epoch": 0.5697419000549149, + "grad_norm": 0.3848189413547516, + "learning_rate": 1.6279992830293894e-05, + "loss": 0.5154, + "step": 20750 + }, + { + "epoch": 0.5697693574958814, + "grad_norm": 0.35627439618110657, + "learning_rate": 1.6279656718975757e-05, + "loss": 0.482, + "step": 20751 + }, + { + "epoch": 0.5697968149368479, + "grad_norm": 0.34826377034187317, + "learning_rate": 1.6279320595944053e-05, + "loss": 0.4264, + "step": 20752 + }, + { + "epoch": 0.5698242723778144, + "grad_norm": 0.3492867052555084, + "learning_rate": 1.6278984461199422e-05, + "loss": 0.5389, + "step": 20753 + }, + { + "epoch": 0.5698517298187808, + "grad_norm": 0.34018373489379883, + "learning_rate": 1.6278648314742473e-05, + "loss": 0.4602, + "step": 20754 + }, + { + "epoch": 0.5698791872597474, + "grad_norm": 0.4100825786590576, + "learning_rate": 1.627831215657384e-05, + "loss": 0.5717, + "step": 20755 + }, + { + "epoch": 0.5699066447007138, + "grad_norm": 0.40944650769233704, + "learning_rate": 1.6277975986694156e-05, + "loss": 0.5656, + "step": 20756 + }, + { + "epoch": 0.5699341021416804, + "grad_norm": 0.49932214617729187, + "learning_rate": 1.627763980510404e-05, + "loss": 0.4762, + "step": 20757 + }, + { + "epoch": 0.5699615595826469, + "grad_norm": 0.39093250036239624, + "learning_rate": 1.6277303611804124e-05, + "loss": 0.4871, + "step": 20758 + }, + { + "epoch": 0.5699890170236134, + "grad_norm": 0.3415631949901581, + "learning_rate": 1.627696740679503e-05, + "loss": 0.4606, + "step": 20759 + }, + { + "epoch": 0.5700164744645799, + "grad_norm": 0.3604094088077545, + "learning_rate": 1.627663119007739e-05, + "loss": 0.4841, + "step": 20760 + }, + { + "epoch": 0.5700439319055464, + "grad_norm": 0.39569517970085144, + "learning_rate": 1.6276294961651832e-05, + "loss": 0.5709, + "step": 20761 + }, + { + "epoch": 0.5700713893465129, + "grad_norm": 0.38825440406799316, + "learning_rate": 1.6275958721518978e-05, + "loss": 0.5436, + "step": 20762 + }, + { + "epoch": 0.5700988467874794, + "grad_norm": 0.45905476808547974, + "learning_rate": 1.6275622469679458e-05, + "loss": 0.4511, + "step": 20763 + }, + { + "epoch": 0.5701263042284459, + "grad_norm": 0.6257897615432739, + "learning_rate": 1.62752862061339e-05, + "loss": 0.5942, + "step": 20764 + }, + { + "epoch": 0.5701537616694125, + "grad_norm": 0.3969634175300598, + "learning_rate": 1.627494993088293e-05, + "loss": 0.5264, + "step": 20765 + }, + { + "epoch": 0.5701812191103789, + "grad_norm": 0.35166501998901367, + "learning_rate": 1.6274613643927176e-05, + "loss": 0.4563, + "step": 20766 + }, + { + "epoch": 0.5702086765513454, + "grad_norm": 0.47065263986587524, + "learning_rate": 1.6274277345267266e-05, + "loss": 0.5773, + "step": 20767 + }, + { + "epoch": 0.5702361339923119, + "grad_norm": 0.3358026444911957, + "learning_rate": 1.6273941034903825e-05, + "loss": 0.4474, + "step": 20768 + }, + { + "epoch": 0.5702635914332784, + "grad_norm": 0.3949466049671173, + "learning_rate": 1.6273604712837484e-05, + "loss": 0.4536, + "step": 20769 + }, + { + "epoch": 0.5702910488742449, + "grad_norm": 0.3889349102973938, + "learning_rate": 1.627326837906886e-05, + "loss": 0.6163, + "step": 20770 + }, + { + "epoch": 0.5703185063152114, + "grad_norm": 0.36021649837493896, + "learning_rate": 1.6272932033598597e-05, + "loss": 0.4744, + "step": 20771 + }, + { + "epoch": 0.570345963756178, + "grad_norm": 0.34802865982055664, + "learning_rate": 1.6272595676427312e-05, + "loss": 0.5055, + "step": 20772 + }, + { + "epoch": 0.5703734211971444, + "grad_norm": 0.6298422813415527, + "learning_rate": 1.627225930755563e-05, + "loss": 0.6408, + "step": 20773 + }, + { + "epoch": 0.570400878638111, + "grad_norm": 0.4019562602043152, + "learning_rate": 1.6271922926984187e-05, + "loss": 0.5395, + "step": 20774 + }, + { + "epoch": 0.5704283360790774, + "grad_norm": 0.3766404390335083, + "learning_rate": 1.6271586534713604e-05, + "loss": 0.6466, + "step": 20775 + }, + { + "epoch": 0.5704557935200439, + "grad_norm": 0.3403806686401367, + "learning_rate": 1.627125013074451e-05, + "loss": 0.4684, + "step": 20776 + }, + { + "epoch": 0.5704832509610104, + "grad_norm": 0.40492501854896545, + "learning_rate": 1.6270913715077536e-05, + "loss": 0.4909, + "step": 20777 + }, + { + "epoch": 0.5705107084019769, + "grad_norm": 0.3950720429420471, + "learning_rate": 1.6270577287713304e-05, + "loss": 0.5314, + "step": 20778 + }, + { + "epoch": 0.5705381658429435, + "grad_norm": 0.3490827679634094, + "learning_rate": 1.6270240848652442e-05, + "loss": 0.4769, + "step": 20779 + }, + { + "epoch": 0.5705656232839099, + "grad_norm": 0.41417697072029114, + "learning_rate": 1.6269904397895583e-05, + "loss": 0.4648, + "step": 20780 + }, + { + "epoch": 0.5705930807248765, + "grad_norm": 0.3802061975002289, + "learning_rate": 1.626956793544335e-05, + "loss": 0.5899, + "step": 20781 + }, + { + "epoch": 0.5706205381658429, + "grad_norm": 0.3225896656513214, + "learning_rate": 1.626923146129637e-05, + "loss": 0.4419, + "step": 20782 + }, + { + "epoch": 0.5706479956068095, + "grad_norm": 0.3852909505367279, + "learning_rate": 1.6268894975455275e-05, + "loss": 0.5831, + "step": 20783 + }, + { + "epoch": 0.5706754530477759, + "grad_norm": 0.4464075565338135, + "learning_rate": 1.626855847792069e-05, + "loss": 0.5556, + "step": 20784 + }, + { + "epoch": 0.5707029104887424, + "grad_norm": 0.45898228883743286, + "learning_rate": 1.626822196869324e-05, + "loss": 0.5365, + "step": 20785 + }, + { + "epoch": 0.570730367929709, + "grad_norm": 0.3733334243297577, + "learning_rate": 1.626788544777356e-05, + "loss": 0.5002, + "step": 20786 + }, + { + "epoch": 0.5707578253706754, + "grad_norm": 0.437216192483902, + "learning_rate": 1.6267548915162268e-05, + "loss": 0.4999, + "step": 20787 + }, + { + "epoch": 0.570785282811642, + "grad_norm": 0.4099981188774109, + "learning_rate": 1.6267212370859998e-05, + "loss": 0.4634, + "step": 20788 + }, + { + "epoch": 0.5708127402526084, + "grad_norm": 0.3484162390232086, + "learning_rate": 1.626687581486738e-05, + "loss": 0.5316, + "step": 20789 + }, + { + "epoch": 0.570840197693575, + "grad_norm": 0.37505197525024414, + "learning_rate": 1.6266539247185035e-05, + "loss": 0.493, + "step": 20790 + }, + { + "epoch": 0.5708676551345414, + "grad_norm": 0.39721667766571045, + "learning_rate": 1.62662026678136e-05, + "loss": 0.5306, + "step": 20791 + }, + { + "epoch": 0.570895112575508, + "grad_norm": 0.3930392563343048, + "learning_rate": 1.626586607675369e-05, + "loss": 0.621, + "step": 20792 + }, + { + "epoch": 0.5709225700164745, + "grad_norm": 0.39057016372680664, + "learning_rate": 1.6265529474005944e-05, + "loss": 0.5178, + "step": 20793 + }, + { + "epoch": 0.570950027457441, + "grad_norm": 0.41890037059783936, + "learning_rate": 1.6265192859570983e-05, + "loss": 0.4644, + "step": 20794 + }, + { + "epoch": 0.5709774848984075, + "grad_norm": 0.35801297426223755, + "learning_rate": 1.626485623344944e-05, + "loss": 0.4881, + "step": 20795 + }, + { + "epoch": 0.5710049423393739, + "grad_norm": 0.4181388020515442, + "learning_rate": 1.626451959564194e-05, + "loss": 0.4388, + "step": 20796 + }, + { + "epoch": 0.5710323997803405, + "grad_norm": 0.3628963828086853, + "learning_rate": 1.626418294614911e-05, + "loss": 0.419, + "step": 20797 + }, + { + "epoch": 0.5710598572213069, + "grad_norm": 0.363842248916626, + "learning_rate": 1.626384628497158e-05, + "loss": 0.5569, + "step": 20798 + }, + { + "epoch": 0.5710873146622735, + "grad_norm": 0.4370478391647339, + "learning_rate": 1.626350961210998e-05, + "loss": 0.5522, + "step": 20799 + }, + { + "epoch": 0.57111477210324, + "grad_norm": 0.4899080693721771, + "learning_rate": 1.626317292756493e-05, + "loss": 0.5712, + "step": 20800 + }, + { + "epoch": 0.5711422295442065, + "grad_norm": 0.38881683349609375, + "learning_rate": 1.6262836231337073e-05, + "loss": 0.5661, + "step": 20801 + }, + { + "epoch": 0.571169686985173, + "grad_norm": 0.34586387872695923, + "learning_rate": 1.626249952342702e-05, + "loss": 0.4947, + "step": 20802 + }, + { + "epoch": 0.5711971444261394, + "grad_norm": 0.37428396940231323, + "learning_rate": 1.626216280383541e-05, + "loss": 0.4752, + "step": 20803 + }, + { + "epoch": 0.571224601867106, + "grad_norm": 0.3929687738418579, + "learning_rate": 1.6261826072562865e-05, + "loss": 0.4334, + "step": 20804 + }, + { + "epoch": 0.5712520593080724, + "grad_norm": 0.39043331146240234, + "learning_rate": 1.6261489329610017e-05, + "loss": 0.4999, + "step": 20805 + }, + { + "epoch": 0.571279516749039, + "grad_norm": 0.4174078404903412, + "learning_rate": 1.6261152574977496e-05, + "loss": 0.5296, + "step": 20806 + }, + { + "epoch": 0.5713069741900055, + "grad_norm": 0.36779022216796875, + "learning_rate": 1.6260815808665922e-05, + "loss": 0.4548, + "step": 20807 + }, + { + "epoch": 0.571334431630972, + "grad_norm": 0.37761592864990234, + "learning_rate": 1.6260479030675932e-05, + "loss": 0.5019, + "step": 20808 + }, + { + "epoch": 0.5713618890719385, + "grad_norm": 0.3448602557182312, + "learning_rate": 1.6260142241008147e-05, + "loss": 0.5168, + "step": 20809 + }, + { + "epoch": 0.571389346512905, + "grad_norm": 0.3351631760597229, + "learning_rate": 1.6259805439663202e-05, + "loss": 0.4422, + "step": 20810 + }, + { + "epoch": 0.5714168039538715, + "grad_norm": 0.3722843825817108, + "learning_rate": 1.625946862664172e-05, + "loss": 0.5829, + "step": 20811 + }, + { + "epoch": 0.571444261394838, + "grad_norm": 0.3840782344341278, + "learning_rate": 1.625913180194433e-05, + "loss": 0.4595, + "step": 20812 + }, + { + "epoch": 0.5714717188358045, + "grad_norm": 0.3596605360507965, + "learning_rate": 1.6258794965571665e-05, + "loss": 0.4956, + "step": 20813 + }, + { + "epoch": 0.571499176276771, + "grad_norm": 0.39231932163238525, + "learning_rate": 1.6258458117524346e-05, + "loss": 0.5434, + "step": 20814 + }, + { + "epoch": 0.5715266337177375, + "grad_norm": 0.3562372922897339, + "learning_rate": 1.625812125780301e-05, + "loss": 0.5049, + "step": 20815 + }, + { + "epoch": 0.571554091158704, + "grad_norm": 0.4121272563934326, + "learning_rate": 1.6257784386408277e-05, + "loss": 0.4883, + "step": 20816 + }, + { + "epoch": 0.5715815485996705, + "grad_norm": 0.40300092101097107, + "learning_rate": 1.625744750334078e-05, + "loss": 0.4823, + "step": 20817 + }, + { + "epoch": 0.571609006040637, + "grad_norm": 0.35874757170677185, + "learning_rate": 1.6257110608601146e-05, + "loss": 0.4713, + "step": 20818 + }, + { + "epoch": 0.5716364634816035, + "grad_norm": 0.4096626043319702, + "learning_rate": 1.6256773702190004e-05, + "loss": 0.5252, + "step": 20819 + }, + { + "epoch": 0.57166392092257, + "grad_norm": 0.4090612828731537, + "learning_rate": 1.6256436784107982e-05, + "loss": 0.4493, + "step": 20820 + }, + { + "epoch": 0.5716913783635366, + "grad_norm": 0.39767199754714966, + "learning_rate": 1.625609985435571e-05, + "loss": 0.4155, + "step": 20821 + }, + { + "epoch": 0.571718835804503, + "grad_norm": 0.3441488444805145, + "learning_rate": 1.6255762912933812e-05, + "loss": 0.4463, + "step": 20822 + }, + { + "epoch": 0.5717462932454696, + "grad_norm": 0.36611121892929077, + "learning_rate": 1.625542595984292e-05, + "loss": 0.4148, + "step": 20823 + }, + { + "epoch": 0.571773750686436, + "grad_norm": 0.3731817305088043, + "learning_rate": 1.6255088995083664e-05, + "loss": 0.5233, + "step": 20824 + }, + { + "epoch": 0.5718012081274025, + "grad_norm": 1.1249477863311768, + "learning_rate": 1.6254752018656667e-05, + "loss": 0.4851, + "step": 20825 + }, + { + "epoch": 0.571828665568369, + "grad_norm": 0.44153451919555664, + "learning_rate": 1.6254415030562567e-05, + "loss": 0.6318, + "step": 20826 + }, + { + "epoch": 0.5718561230093355, + "grad_norm": 0.5559028387069702, + "learning_rate": 1.6254078030801984e-05, + "loss": 0.4594, + "step": 20827 + }, + { + "epoch": 0.5718835804503021, + "grad_norm": 0.397697776556015, + "learning_rate": 1.625374101937555e-05, + "loss": 0.5461, + "step": 20828 + }, + { + "epoch": 0.5719110378912685, + "grad_norm": 0.35584378242492676, + "learning_rate": 1.625340399628389e-05, + "loss": 0.5161, + "step": 20829 + }, + { + "epoch": 0.5719384953322351, + "grad_norm": 0.3685246706008911, + "learning_rate": 1.6253066961527636e-05, + "loss": 0.4804, + "step": 20830 + }, + { + "epoch": 0.5719659527732015, + "grad_norm": 0.33928605914115906, + "learning_rate": 1.625272991510742e-05, + "loss": 0.3917, + "step": 20831 + }, + { + "epoch": 0.571993410214168, + "grad_norm": 0.3365391194820404, + "learning_rate": 1.6252392857023865e-05, + "loss": 0.541, + "step": 20832 + }, + { + "epoch": 0.5720208676551345, + "grad_norm": 0.4132090210914612, + "learning_rate": 1.6252055787277602e-05, + "loss": 0.5169, + "step": 20833 + }, + { + "epoch": 0.572048325096101, + "grad_norm": 0.4078461229801178, + "learning_rate": 1.625171870586926e-05, + "loss": 0.5701, + "step": 20834 + }, + { + "epoch": 0.5720757825370676, + "grad_norm": 0.3717350363731384, + "learning_rate": 1.6251381612799467e-05, + "loss": 0.4895, + "step": 20835 + }, + { + "epoch": 0.572103239978034, + "grad_norm": 0.3987646698951721, + "learning_rate": 1.6251044508068852e-05, + "loss": 0.5737, + "step": 20836 + }, + { + "epoch": 0.5721306974190006, + "grad_norm": 0.41768792271614075, + "learning_rate": 1.6250707391678046e-05, + "loss": 0.554, + "step": 20837 + }, + { + "epoch": 0.572158154859967, + "grad_norm": 0.39522528648376465, + "learning_rate": 1.625037026362767e-05, + "loss": 0.5034, + "step": 20838 + }, + { + "epoch": 0.5721856123009336, + "grad_norm": 0.3506503999233246, + "learning_rate": 1.6250033123918364e-05, + "loss": 0.6323, + "step": 20839 + }, + { + "epoch": 0.5722130697419, + "grad_norm": 0.3963997960090637, + "learning_rate": 1.624969597255075e-05, + "loss": 0.4387, + "step": 20840 + }, + { + "epoch": 0.5722405271828666, + "grad_norm": 0.42788976430892944, + "learning_rate": 1.6249358809525457e-05, + "loss": 0.5299, + "step": 20841 + }, + { + "epoch": 0.5722679846238331, + "grad_norm": 0.3837425708770752, + "learning_rate": 1.6249021634843117e-05, + "loss": 0.4491, + "step": 20842 + }, + { + "epoch": 0.5722954420647995, + "grad_norm": 0.42310038208961487, + "learning_rate": 1.6248684448504354e-05, + "loss": 0.5876, + "step": 20843 + }, + { + "epoch": 0.5723228995057661, + "grad_norm": 3.306506395339966, + "learning_rate": 1.6248347250509805e-05, + "loss": 0.4376, + "step": 20844 + }, + { + "epoch": 0.5723503569467325, + "grad_norm": 0.3460780084133148, + "learning_rate": 1.6248010040860092e-05, + "loss": 0.471, + "step": 20845 + }, + { + "epoch": 0.5723778143876991, + "grad_norm": 0.3610418736934662, + "learning_rate": 1.6247672819555846e-05, + "loss": 0.4459, + "step": 20846 + }, + { + "epoch": 0.5724052718286655, + "grad_norm": 0.3698329031467438, + "learning_rate": 1.6247335586597694e-05, + "loss": 0.5315, + "step": 20847 + }, + { + "epoch": 0.5724327292696321, + "grad_norm": 0.3579369783401489, + "learning_rate": 1.624699834198627e-05, + "loss": 0.5123, + "step": 20848 + }, + { + "epoch": 0.5724601867105986, + "grad_norm": 0.35882753133773804, + "learning_rate": 1.6246661085722202e-05, + "loss": 0.4616, + "step": 20849 + }, + { + "epoch": 0.5724876441515651, + "grad_norm": 0.3345099687576294, + "learning_rate": 1.6246323817806113e-05, + "loss": 0.4502, + "step": 20850 + }, + { + "epoch": 0.5725151015925316, + "grad_norm": 0.5213518738746643, + "learning_rate": 1.6245986538238638e-05, + "loss": 0.5564, + "step": 20851 + }, + { + "epoch": 0.572542559033498, + "grad_norm": 0.3860699236392975, + "learning_rate": 1.6245649247020405e-05, + "loss": 0.496, + "step": 20852 + }, + { + "epoch": 0.5725700164744646, + "grad_norm": 0.40391871333122253, + "learning_rate": 1.6245311944152043e-05, + "loss": 0.5427, + "step": 20853 + }, + { + "epoch": 0.572597473915431, + "grad_norm": 0.3620806634426117, + "learning_rate": 1.624497462963418e-05, + "loss": 0.452, + "step": 20854 + }, + { + "epoch": 0.5726249313563976, + "grad_norm": 0.3836316764354706, + "learning_rate": 1.6244637303467444e-05, + "loss": 0.4435, + "step": 20855 + }, + { + "epoch": 0.5726523887973641, + "grad_norm": 0.4403822720050812, + "learning_rate": 1.624429996565247e-05, + "loss": 0.6015, + "step": 20856 + }, + { + "epoch": 0.5726798462383306, + "grad_norm": 0.4436423182487488, + "learning_rate": 1.6243962616189884e-05, + "loss": 0.4471, + "step": 20857 + }, + { + "epoch": 0.5727073036792971, + "grad_norm": 0.45510226488113403, + "learning_rate": 1.6243625255080316e-05, + "loss": 0.6406, + "step": 20858 + }, + { + "epoch": 0.5727347611202636, + "grad_norm": 0.37637847661972046, + "learning_rate": 1.624328788232439e-05, + "loss": 0.4531, + "step": 20859 + }, + { + "epoch": 0.5727622185612301, + "grad_norm": 0.3584069609642029, + "learning_rate": 1.624295049792274e-05, + "loss": 0.4796, + "step": 20860 + }, + { + "epoch": 0.5727896760021965, + "grad_norm": 0.42730018496513367, + "learning_rate": 1.6242613101875998e-05, + "loss": 0.5945, + "step": 20861 + }, + { + "epoch": 0.5728171334431631, + "grad_norm": 0.3859197497367859, + "learning_rate": 1.6242275694184792e-05, + "loss": 0.5374, + "step": 20862 + }, + { + "epoch": 0.5728445908841296, + "grad_norm": 0.36612799763679504, + "learning_rate": 1.6241938274849742e-05, + "loss": 0.5169, + "step": 20863 + }, + { + "epoch": 0.5728720483250961, + "grad_norm": 0.40852996706962585, + "learning_rate": 1.624160084387149e-05, + "loss": 0.4881, + "step": 20864 + }, + { + "epoch": 0.5728995057660626, + "grad_norm": 0.41497281193733215, + "learning_rate": 1.6241263401250656e-05, + "loss": 0.492, + "step": 20865 + }, + { + "epoch": 0.5729269632070291, + "grad_norm": 0.34270015358924866, + "learning_rate": 1.6240925946987877e-05, + "loss": 0.4324, + "step": 20866 + }, + { + "epoch": 0.5729544206479956, + "grad_norm": 0.3793346881866455, + "learning_rate": 1.6240588481083783e-05, + "loss": 0.5018, + "step": 20867 + }, + { + "epoch": 0.5729818780889621, + "grad_norm": 0.4204308092594147, + "learning_rate": 1.6240251003538995e-05, + "loss": 0.5883, + "step": 20868 + }, + { + "epoch": 0.5730093355299286, + "grad_norm": 0.36468371748924255, + "learning_rate": 1.623991351435415e-05, + "loss": 0.5228, + "step": 20869 + }, + { + "epoch": 0.5730367929708952, + "grad_norm": 0.366685152053833, + "learning_rate": 1.623957601352987e-05, + "loss": 0.4966, + "step": 20870 + }, + { + "epoch": 0.5730642504118616, + "grad_norm": 0.38735780119895935, + "learning_rate": 1.623923850106679e-05, + "loss": 0.5082, + "step": 20871 + }, + { + "epoch": 0.5730917078528281, + "grad_norm": 0.3310830891132355, + "learning_rate": 1.6238900976965543e-05, + "loss": 0.4612, + "step": 20872 + }, + { + "epoch": 0.5731191652937946, + "grad_norm": 0.3623993396759033, + "learning_rate": 1.6238563441226753e-05, + "loss": 0.5521, + "step": 20873 + }, + { + "epoch": 0.5731466227347611, + "grad_norm": 0.39407986402511597, + "learning_rate": 1.623822589385105e-05, + "loss": 0.4995, + "step": 20874 + }, + { + "epoch": 0.5731740801757276, + "grad_norm": 0.47070375084877014, + "learning_rate": 1.6237888334839066e-05, + "loss": 0.4838, + "step": 20875 + }, + { + "epoch": 0.5732015376166941, + "grad_norm": 0.37321561574935913, + "learning_rate": 1.6237550764191426e-05, + "loss": 0.4633, + "step": 20876 + }, + { + "epoch": 0.5732289950576607, + "grad_norm": 0.37014269828796387, + "learning_rate": 1.6237213181908765e-05, + "loss": 0.5358, + "step": 20877 + }, + { + "epoch": 0.5732564524986271, + "grad_norm": 0.37294018268585205, + "learning_rate": 1.6236875587991714e-05, + "loss": 0.428, + "step": 20878 + }, + { + "epoch": 0.5732839099395937, + "grad_norm": 0.3668651580810547, + "learning_rate": 1.6236537982440895e-05, + "loss": 0.4605, + "step": 20879 + }, + { + "epoch": 0.5733113673805601, + "grad_norm": 0.39758241176605225, + "learning_rate": 1.6236200365256943e-05, + "loss": 0.4552, + "step": 20880 + }, + { + "epoch": 0.5733388248215266, + "grad_norm": 0.40630170702934265, + "learning_rate": 1.6235862736440488e-05, + "loss": 0.5292, + "step": 20881 + }, + { + "epoch": 0.5733662822624931, + "grad_norm": 0.34289655089378357, + "learning_rate": 1.623552509599216e-05, + "loss": 0.4709, + "step": 20882 + }, + { + "epoch": 0.5733937397034596, + "grad_norm": 0.3872988522052765, + "learning_rate": 1.6235187443912584e-05, + "loss": 0.5593, + "step": 20883 + }, + { + "epoch": 0.5734211971444262, + "grad_norm": 0.40005964040756226, + "learning_rate": 1.62348497802024e-05, + "loss": 0.5476, + "step": 20884 + }, + { + "epoch": 0.5734486545853926, + "grad_norm": 0.4384595453739166, + "learning_rate": 1.6234512104862226e-05, + "loss": 0.5195, + "step": 20885 + }, + { + "epoch": 0.5734761120263592, + "grad_norm": 0.4018765985965729, + "learning_rate": 1.6234174417892697e-05, + "loss": 0.5077, + "step": 20886 + }, + { + "epoch": 0.5735035694673256, + "grad_norm": 0.3703628182411194, + "learning_rate": 1.6233836719294445e-05, + "loss": 0.5445, + "step": 20887 + }, + { + "epoch": 0.5735310269082922, + "grad_norm": 0.363452285528183, + "learning_rate": 1.6233499009068094e-05, + "loss": 0.4546, + "step": 20888 + }, + { + "epoch": 0.5735584843492586, + "grad_norm": 0.477959007024765, + "learning_rate": 1.6233161287214282e-05, + "loss": 0.517, + "step": 20889 + }, + { + "epoch": 0.5735859417902252, + "grad_norm": 0.396270215511322, + "learning_rate": 1.6232823553733635e-05, + "loss": 0.4023, + "step": 20890 + }, + { + "epoch": 0.5736133992311917, + "grad_norm": 0.3235037326812744, + "learning_rate": 1.623248580862678e-05, + "loss": 0.3914, + "step": 20891 + }, + { + "epoch": 0.5736408566721581, + "grad_norm": 0.42866232991218567, + "learning_rate": 1.6232148051894352e-05, + "loss": 0.5046, + "step": 20892 + }, + { + "epoch": 0.5736683141131247, + "grad_norm": 1.563644289970398, + "learning_rate": 1.6231810283536974e-05, + "loss": 0.4843, + "step": 20893 + }, + { + "epoch": 0.5736957715540911, + "grad_norm": 0.3844318985939026, + "learning_rate": 1.623147250355529e-05, + "loss": 0.5123, + "step": 20894 + }, + { + "epoch": 0.5737232289950577, + "grad_norm": 0.39729180932044983, + "learning_rate": 1.6231134711949912e-05, + "loss": 0.5683, + "step": 20895 + }, + { + "epoch": 0.5737506864360241, + "grad_norm": 0.4076063632965088, + "learning_rate": 1.6230796908721482e-05, + "loss": 0.5382, + "step": 20896 + }, + { + "epoch": 0.5737781438769907, + "grad_norm": 0.41465601325035095, + "learning_rate": 1.623045909387063e-05, + "loss": 0.4914, + "step": 20897 + }, + { + "epoch": 0.5738056013179572, + "grad_norm": 0.3559700846672058, + "learning_rate": 1.623012126739798e-05, + "loss": 0.5096, + "step": 20898 + }, + { + "epoch": 0.5738330587589237, + "grad_norm": 0.4624376893043518, + "learning_rate": 1.6229783429304167e-05, + "loss": 0.468, + "step": 20899 + }, + { + "epoch": 0.5738605161998902, + "grad_norm": 0.3423447608947754, + "learning_rate": 1.6229445579589816e-05, + "loss": 0.4798, + "step": 20900 + }, + { + "epoch": 0.5738879736408566, + "grad_norm": 0.38177987933158875, + "learning_rate": 1.6229107718255566e-05, + "loss": 0.5044, + "step": 20901 + }, + { + "epoch": 0.5739154310818232, + "grad_norm": 0.39088937640190125, + "learning_rate": 1.622876984530204e-05, + "loss": 0.4487, + "step": 20902 + }, + { + "epoch": 0.5739428885227896, + "grad_norm": 0.3567960858345032, + "learning_rate": 1.6228431960729868e-05, + "loss": 0.4918, + "step": 20903 + }, + { + "epoch": 0.5739703459637562, + "grad_norm": 0.38986262679100037, + "learning_rate": 1.6228094064539687e-05, + "loss": 0.5044, + "step": 20904 + }, + { + "epoch": 0.5739978034047227, + "grad_norm": 0.3946370780467987, + "learning_rate": 1.622775615673212e-05, + "loss": 0.5157, + "step": 20905 + }, + { + "epoch": 0.5740252608456892, + "grad_norm": 0.3690735399723053, + "learning_rate": 1.6227418237307802e-05, + "loss": 0.4896, + "step": 20906 + }, + { + "epoch": 0.5740527182866557, + "grad_norm": 0.38739410042762756, + "learning_rate": 1.622708030626736e-05, + "loss": 0.5535, + "step": 20907 + }, + { + "epoch": 0.5740801757276222, + "grad_norm": 0.3946773111820221, + "learning_rate": 1.6226742363611424e-05, + "loss": 0.5047, + "step": 20908 + }, + { + "epoch": 0.5741076331685887, + "grad_norm": 0.39362606406211853, + "learning_rate": 1.6226404409340627e-05, + "loss": 0.5301, + "step": 20909 + }, + { + "epoch": 0.5741350906095551, + "grad_norm": 0.35539260506629944, + "learning_rate": 1.62260664434556e-05, + "loss": 0.4427, + "step": 20910 + }, + { + "epoch": 0.5741625480505217, + "grad_norm": 0.39691585302352905, + "learning_rate": 1.622572846595697e-05, + "loss": 0.5408, + "step": 20911 + }, + { + "epoch": 0.5741900054914882, + "grad_norm": 0.40126833319664, + "learning_rate": 1.622539047684537e-05, + "loss": 0.5192, + "step": 20912 + }, + { + "epoch": 0.5742174629324547, + "grad_norm": 0.4018343687057495, + "learning_rate": 1.622505247612143e-05, + "loss": 0.5073, + "step": 20913 + }, + { + "epoch": 0.5742449203734212, + "grad_norm": 0.362333744764328, + "learning_rate": 1.622471446378578e-05, + "loss": 0.4694, + "step": 20914 + }, + { + "epoch": 0.5742723778143877, + "grad_norm": 0.36925196647644043, + "learning_rate": 1.6224376439839057e-05, + "loss": 0.5072, + "step": 20915 + }, + { + "epoch": 0.5742998352553542, + "grad_norm": 0.392006516456604, + "learning_rate": 1.622403840428188e-05, + "loss": 0.5588, + "step": 20916 + }, + { + "epoch": 0.5743272926963207, + "grad_norm": 0.34326276183128357, + "learning_rate": 1.6223700357114883e-05, + "loss": 0.3995, + "step": 20917 + }, + { + "epoch": 0.5743547501372872, + "grad_norm": 1.1203263998031616, + "learning_rate": 1.62233622983387e-05, + "loss": 0.5348, + "step": 20918 + }, + { + "epoch": 0.5743822075782538, + "grad_norm": 0.3369385898113251, + "learning_rate": 1.622302422795396e-05, + "loss": 0.4114, + "step": 20919 + }, + { + "epoch": 0.5744096650192202, + "grad_norm": 0.38501250743865967, + "learning_rate": 1.622268614596129e-05, + "loss": 0.4154, + "step": 20920 + }, + { + "epoch": 0.5744371224601867, + "grad_norm": 0.4021569490432739, + "learning_rate": 1.622234805236133e-05, + "loss": 0.5231, + "step": 20921 + }, + { + "epoch": 0.5744645799011532, + "grad_norm": 0.36398550868034363, + "learning_rate": 1.6222009947154704e-05, + "loss": 0.5018, + "step": 20922 + }, + { + "epoch": 0.5744920373421197, + "grad_norm": 0.34999579191207886, + "learning_rate": 1.6221671830342046e-05, + "loss": 0.5464, + "step": 20923 + }, + { + "epoch": 0.5745194947830862, + "grad_norm": 0.3825182616710663, + "learning_rate": 1.622133370192398e-05, + "loss": 0.5473, + "step": 20924 + }, + { + "epoch": 0.5745469522240527, + "grad_norm": 0.3554196059703827, + "learning_rate": 1.622099556190114e-05, + "loss": 0.4717, + "step": 20925 + }, + { + "epoch": 0.5745744096650193, + "grad_norm": 0.451681524515152, + "learning_rate": 1.622065741027416e-05, + "loss": 0.5604, + "step": 20926 + }, + { + "epoch": 0.5746018671059857, + "grad_norm": 0.34658780694007874, + "learning_rate": 1.622031924704367e-05, + "loss": 0.4499, + "step": 20927 + }, + { + "epoch": 0.5746293245469523, + "grad_norm": 0.38811370730400085, + "learning_rate": 1.62199810722103e-05, + "loss": 0.5206, + "step": 20928 + }, + { + "epoch": 0.5746567819879187, + "grad_norm": 0.3392069637775421, + "learning_rate": 1.6219642885774677e-05, + "loss": 0.5029, + "step": 20929 + }, + { + "epoch": 0.5746842394288852, + "grad_norm": 0.5031186938285828, + "learning_rate": 1.6219304687737434e-05, + "loss": 0.665, + "step": 20930 + }, + { + "epoch": 0.5747116968698517, + "grad_norm": 0.3625243306159973, + "learning_rate": 1.6218966478099204e-05, + "loss": 0.401, + "step": 20931 + }, + { + "epoch": 0.5747391543108182, + "grad_norm": 0.38520389795303345, + "learning_rate": 1.621862825686062e-05, + "loss": 0.4656, + "step": 20932 + }, + { + "epoch": 0.5747666117517848, + "grad_norm": 0.5292840600013733, + "learning_rate": 1.6218290024022305e-05, + "loss": 0.582, + "step": 20933 + }, + { + "epoch": 0.5747940691927512, + "grad_norm": 0.3592855632305145, + "learning_rate": 1.62179517795849e-05, + "loss": 0.439, + "step": 20934 + }, + { + "epoch": 0.5748215266337178, + "grad_norm": 0.41490501165390015, + "learning_rate": 1.6217613523549026e-05, + "loss": 0.5182, + "step": 20935 + }, + { + "epoch": 0.5748489840746842, + "grad_norm": 0.39287495613098145, + "learning_rate": 1.6217275255915318e-05, + "loss": 0.466, + "step": 20936 + }, + { + "epoch": 0.5748764415156508, + "grad_norm": 0.40467017889022827, + "learning_rate": 1.621693697668441e-05, + "loss": 0.4536, + "step": 20937 + }, + { + "epoch": 0.5749038989566172, + "grad_norm": 0.370430052280426, + "learning_rate": 1.6216598685856927e-05, + "loss": 0.5371, + "step": 20938 + }, + { + "epoch": 0.5749313563975837, + "grad_norm": 0.4402439296245575, + "learning_rate": 1.6216260383433506e-05, + "loss": 0.5273, + "step": 20939 + }, + { + "epoch": 0.5749588138385503, + "grad_norm": 0.41700252890586853, + "learning_rate": 1.6215922069414772e-05, + "loss": 0.4763, + "step": 20940 + }, + { + "epoch": 0.5749862712795167, + "grad_norm": 0.42033445835113525, + "learning_rate": 1.621558374380136e-05, + "loss": 0.5222, + "step": 20941 + }, + { + "epoch": 0.5750137287204833, + "grad_norm": 0.40189656615257263, + "learning_rate": 1.6215245406593907e-05, + "loss": 0.5572, + "step": 20942 + }, + { + "epoch": 0.5750411861614497, + "grad_norm": 0.3530421555042267, + "learning_rate": 1.6214907057793033e-05, + "loss": 0.473, + "step": 20943 + }, + { + "epoch": 0.5750686436024163, + "grad_norm": 0.4205865263938904, + "learning_rate": 1.6214568697399374e-05, + "loss": 0.4832, + "step": 20944 + }, + { + "epoch": 0.5750961010433827, + "grad_norm": 0.3558129072189331, + "learning_rate": 1.6214230325413558e-05, + "loss": 0.4942, + "step": 20945 + }, + { + "epoch": 0.5751235584843493, + "grad_norm": 0.3731186091899872, + "learning_rate": 1.6213891941836225e-05, + "loss": 0.4753, + "step": 20946 + }, + { + "epoch": 0.5751510159253158, + "grad_norm": 0.3688884675502777, + "learning_rate": 1.6213553546667995e-05, + "loss": 0.5166, + "step": 20947 + }, + { + "epoch": 0.5751784733662823, + "grad_norm": 0.44873446226119995, + "learning_rate": 1.6213215139909508e-05, + "loss": 0.5124, + "step": 20948 + }, + { + "epoch": 0.5752059308072488, + "grad_norm": 0.3517833352088928, + "learning_rate": 1.621287672156139e-05, + "loss": 0.4566, + "step": 20949 + }, + { + "epoch": 0.5752333882482152, + "grad_norm": 0.42841485142707825, + "learning_rate": 1.6212538291624274e-05, + "loss": 0.4801, + "step": 20950 + }, + { + "epoch": 0.5752608456891818, + "grad_norm": 0.32258570194244385, + "learning_rate": 1.621219985009879e-05, + "loss": 0.5041, + "step": 20951 + }, + { + "epoch": 0.5752883031301482, + "grad_norm": 0.3912141025066376, + "learning_rate": 1.6211861396985572e-05, + "loss": 0.525, + "step": 20952 + }, + { + "epoch": 0.5753157605711148, + "grad_norm": 0.37746307253837585, + "learning_rate": 1.621152293228525e-05, + "loss": 0.5511, + "step": 20953 + }, + { + "epoch": 0.5753432180120813, + "grad_norm": 0.35619834065437317, + "learning_rate": 1.6211184455998457e-05, + "loss": 0.477, + "step": 20954 + }, + { + "epoch": 0.5753706754530478, + "grad_norm": 0.35199007391929626, + "learning_rate": 1.621084596812582e-05, + "loss": 0.4749, + "step": 20955 + }, + { + "epoch": 0.5753981328940143, + "grad_norm": 0.4098707139492035, + "learning_rate": 1.6210507468667974e-05, + "loss": 0.5146, + "step": 20956 + }, + { + "epoch": 0.5754255903349808, + "grad_norm": 0.375472754240036, + "learning_rate": 1.621016895762555e-05, + "loss": 0.5561, + "step": 20957 + }, + { + "epoch": 0.5754530477759473, + "grad_norm": 0.636448085308075, + "learning_rate": 1.6209830434999178e-05, + "loss": 0.594, + "step": 20958 + }, + { + "epoch": 0.5754805052169137, + "grad_norm": 0.41762927174568176, + "learning_rate": 1.620949190078949e-05, + "loss": 0.5451, + "step": 20959 + }, + { + "epoch": 0.5755079626578803, + "grad_norm": 0.38104188442230225, + "learning_rate": 1.6209153354997118e-05, + "loss": 0.4831, + "step": 20960 + }, + { + "epoch": 0.5755354200988468, + "grad_norm": 0.4120206832885742, + "learning_rate": 1.6208814797622695e-05, + "loss": 0.511, + "step": 20961 + }, + { + "epoch": 0.5755628775398133, + "grad_norm": 0.3670699894428253, + "learning_rate": 1.620847622866685e-05, + "loss": 0.5287, + "step": 20962 + }, + { + "epoch": 0.5755903349807798, + "grad_norm": 0.3656221330165863, + "learning_rate": 1.620813764813021e-05, + "loss": 0.4572, + "step": 20963 + }, + { + "epoch": 0.5756177924217463, + "grad_norm": 0.4117569923400879, + "learning_rate": 1.620779905601342e-05, + "loss": 0.4497, + "step": 20964 + }, + { + "epoch": 0.5756452498627128, + "grad_norm": 0.34083014726638794, + "learning_rate": 1.6207460452317102e-05, + "loss": 0.5295, + "step": 20965 + }, + { + "epoch": 0.5756727073036793, + "grad_norm": 0.860584020614624, + "learning_rate": 1.6207121837041882e-05, + "loss": 0.3959, + "step": 20966 + }, + { + "epoch": 0.5757001647446458, + "grad_norm": 0.3538675606250763, + "learning_rate": 1.6206783210188406e-05, + "loss": 0.5081, + "step": 20967 + }, + { + "epoch": 0.5757276221856124, + "grad_norm": 0.3399595320224762, + "learning_rate": 1.6206444571757297e-05, + "loss": 0.4543, + "step": 20968 + }, + { + "epoch": 0.5757550796265788, + "grad_norm": 0.3452746570110321, + "learning_rate": 1.6206105921749186e-05, + "loss": 0.4283, + "step": 20969 + }, + { + "epoch": 0.5757825370675453, + "grad_norm": 0.34506797790527344, + "learning_rate": 1.620576726016471e-05, + "loss": 0.4694, + "step": 20970 + }, + { + "epoch": 0.5758099945085118, + "grad_norm": 0.42218878865242004, + "learning_rate": 1.6205428587004494e-05, + "loss": 0.6049, + "step": 20971 + }, + { + "epoch": 0.5758374519494783, + "grad_norm": 0.6600610613822937, + "learning_rate": 1.6205089902269174e-05, + "loss": 0.5867, + "step": 20972 + }, + { + "epoch": 0.5758649093904448, + "grad_norm": 0.4079875946044922, + "learning_rate": 1.6204751205959382e-05, + "loss": 0.5122, + "step": 20973 + }, + { + "epoch": 0.5758923668314113, + "grad_norm": 0.4054216146469116, + "learning_rate": 1.620441249807575e-05, + "loss": 0.4784, + "step": 20974 + }, + { + "epoch": 0.5759198242723779, + "grad_norm": 0.330305814743042, + "learning_rate": 1.62040737786189e-05, + "loss": 0.4496, + "step": 20975 + }, + { + "epoch": 0.5759472817133443, + "grad_norm": 0.38914692401885986, + "learning_rate": 1.620373504758948e-05, + "loss": 0.4724, + "step": 20976 + }, + { + "epoch": 0.5759747391543109, + "grad_norm": 0.36408349871635437, + "learning_rate": 1.6203396304988115e-05, + "loss": 0.5023, + "step": 20977 + }, + { + "epoch": 0.5760021965952773, + "grad_norm": 0.38993039727211, + "learning_rate": 1.6203057550815432e-05, + "loss": 0.5167, + "step": 20978 + }, + { + "epoch": 0.5760296540362438, + "grad_norm": 0.6605259776115417, + "learning_rate": 1.620271878507207e-05, + "loss": 0.5387, + "step": 20979 + }, + { + "epoch": 0.5760571114772103, + "grad_norm": 0.40047940611839294, + "learning_rate": 1.6202380007758657e-05, + "loss": 0.4737, + "step": 20980 + }, + { + "epoch": 0.5760845689181768, + "grad_norm": 0.40364524722099304, + "learning_rate": 1.6202041218875825e-05, + "loss": 0.4898, + "step": 20981 + }, + { + "epoch": 0.5761120263591434, + "grad_norm": 0.39270633459091187, + "learning_rate": 1.6201702418424206e-05, + "loss": 0.4992, + "step": 20982 + }, + { + "epoch": 0.5761394838001098, + "grad_norm": 0.38883131742477417, + "learning_rate": 1.6201363606404435e-05, + "loss": 0.5466, + "step": 20983 + }, + { + "epoch": 0.5761669412410764, + "grad_norm": 0.35050728917121887, + "learning_rate": 1.620102478281714e-05, + "loss": 0.486, + "step": 20984 + }, + { + "epoch": 0.5761943986820428, + "grad_norm": 0.3443639874458313, + "learning_rate": 1.6200685947662953e-05, + "loss": 0.4771, + "step": 20985 + }, + { + "epoch": 0.5762218561230094, + "grad_norm": 0.41249746084213257, + "learning_rate": 1.6200347100942512e-05, + "loss": 0.5198, + "step": 20986 + }, + { + "epoch": 0.5762493135639758, + "grad_norm": 0.38163426518440247, + "learning_rate": 1.620000824265644e-05, + "loss": 0.4926, + "step": 20987 + }, + { + "epoch": 0.5762767710049423, + "grad_norm": 0.498576819896698, + "learning_rate": 1.6199669372805375e-05, + "loss": 0.4822, + "step": 20988 + }, + { + "epoch": 0.5763042284459089, + "grad_norm": 0.39927858114242554, + "learning_rate": 1.619933049138995e-05, + "loss": 0.5195, + "step": 20989 + }, + { + "epoch": 0.5763316858868753, + "grad_norm": 0.3743223547935486, + "learning_rate": 1.6198991598410794e-05, + "loss": 0.502, + "step": 20990 + }, + { + "epoch": 0.5763591433278419, + "grad_norm": 0.34389349818229675, + "learning_rate": 1.619865269386854e-05, + "loss": 0.4954, + "step": 20991 + }, + { + "epoch": 0.5763866007688083, + "grad_norm": 0.3621140420436859, + "learning_rate": 1.619831377776382e-05, + "loss": 0.4838, + "step": 20992 + }, + { + "epoch": 0.5764140582097749, + "grad_norm": 0.38879209756851196, + "learning_rate": 1.6197974850097267e-05, + "loss": 0.5618, + "step": 20993 + }, + { + "epoch": 0.5764415156507413, + "grad_norm": 0.3893915116786957, + "learning_rate": 1.6197635910869512e-05, + "loss": 0.5639, + "step": 20994 + }, + { + "epoch": 0.5764689730917079, + "grad_norm": 0.38905787467956543, + "learning_rate": 1.6197296960081187e-05, + "loss": 0.4451, + "step": 20995 + }, + { + "epoch": 0.5764964305326744, + "grad_norm": 0.43307432532310486, + "learning_rate": 1.619695799773293e-05, + "loss": 0.4706, + "step": 20996 + }, + { + "epoch": 0.5765238879736408, + "grad_norm": 0.3920542895793915, + "learning_rate": 1.6196619023825365e-05, + "loss": 0.5303, + "step": 20997 + }, + { + "epoch": 0.5765513454146074, + "grad_norm": 0.33225229382514954, + "learning_rate": 1.6196280038359125e-05, + "loss": 0.4674, + "step": 20998 + }, + { + "epoch": 0.5765788028555738, + "grad_norm": 0.40328145027160645, + "learning_rate": 1.6195941041334853e-05, + "loss": 0.4806, + "step": 20999 + }, + { + "epoch": 0.5766062602965404, + "grad_norm": 0.36586275696754456, + "learning_rate": 1.6195602032753165e-05, + "loss": 0.5511, + "step": 21000 + }, + { + "epoch": 0.5766337177375068, + "grad_norm": 0.36848708987236023, + "learning_rate": 1.6195263012614705e-05, + "loss": 0.4804, + "step": 21001 + }, + { + "epoch": 0.5766611751784734, + "grad_norm": 0.3625204265117645, + "learning_rate": 1.6194923980920104e-05, + "loss": 0.475, + "step": 21002 + }, + { + "epoch": 0.5766886326194399, + "grad_norm": 0.43720880150794983, + "learning_rate": 1.619458493766999e-05, + "loss": 0.5495, + "step": 21003 + }, + { + "epoch": 0.5767160900604064, + "grad_norm": 0.3983374536037445, + "learning_rate": 1.6194245882865e-05, + "loss": 0.4801, + "step": 21004 + }, + { + "epoch": 0.5767435475013729, + "grad_norm": 0.34298333525657654, + "learning_rate": 1.6193906816505765e-05, + "loss": 0.5039, + "step": 21005 + }, + { + "epoch": 0.5767710049423393, + "grad_norm": 0.36116135120391846, + "learning_rate": 1.6193567738592916e-05, + "loss": 0.4947, + "step": 21006 + }, + { + "epoch": 0.5767984623833059, + "grad_norm": 0.40506628155708313, + "learning_rate": 1.6193228649127087e-05, + "loss": 0.5664, + "step": 21007 + }, + { + "epoch": 0.5768259198242723, + "grad_norm": 0.42807918787002563, + "learning_rate": 1.6192889548108907e-05, + "loss": 0.4915, + "step": 21008 + }, + { + "epoch": 0.5768533772652389, + "grad_norm": 0.38928791880607605, + "learning_rate": 1.6192550435539013e-05, + "loss": 0.4908, + "step": 21009 + }, + { + "epoch": 0.5768808347062054, + "grad_norm": 0.35756006836891174, + "learning_rate": 1.6192211311418037e-05, + "loss": 0.4533, + "step": 21010 + }, + { + "epoch": 0.5769082921471719, + "grad_norm": 0.6645637154579163, + "learning_rate": 1.619187217574661e-05, + "loss": 0.5124, + "step": 21011 + }, + { + "epoch": 0.5769357495881384, + "grad_norm": 0.3633092939853668, + "learning_rate": 1.619153302852537e-05, + "loss": 0.4427, + "step": 21012 + }, + { + "epoch": 0.5769632070291049, + "grad_norm": 0.4048064947128296, + "learning_rate": 1.6191193869754934e-05, + "loss": 0.5095, + "step": 21013 + }, + { + "epoch": 0.5769906644700714, + "grad_norm": 0.3969387710094452, + "learning_rate": 1.6190854699435954e-05, + "loss": 0.4429, + "step": 21014 + }, + { + "epoch": 0.5770181219110379, + "grad_norm": 0.4186748266220093, + "learning_rate": 1.6190515517569048e-05, + "loss": 0.4779, + "step": 21015 + }, + { + "epoch": 0.5770455793520044, + "grad_norm": 0.3775739073753357, + "learning_rate": 1.619017632415486e-05, + "loss": 0.4971, + "step": 21016 + }, + { + "epoch": 0.577073036792971, + "grad_norm": 0.3960813581943512, + "learning_rate": 1.618983711919402e-05, + "loss": 0.4947, + "step": 21017 + }, + { + "epoch": 0.5771004942339374, + "grad_norm": 0.43048200011253357, + "learning_rate": 1.618949790268715e-05, + "loss": 0.5819, + "step": 21018 + }, + { + "epoch": 0.5771279516749039, + "grad_norm": 0.3707483410835266, + "learning_rate": 1.6189158674634894e-05, + "loss": 0.4882, + "step": 21019 + }, + { + "epoch": 0.5771554091158704, + "grad_norm": 0.419536828994751, + "learning_rate": 1.6188819435037882e-05, + "loss": 0.527, + "step": 21020 + }, + { + "epoch": 0.5771828665568369, + "grad_norm": 0.37728551030158997, + "learning_rate": 1.618848018389675e-05, + "loss": 0.4705, + "step": 21021 + }, + { + "epoch": 0.5772103239978034, + "grad_norm": 0.38708680868148804, + "learning_rate": 1.6188140921212126e-05, + "loss": 0.574, + "step": 21022 + }, + { + "epoch": 0.5772377814387699, + "grad_norm": 0.36911657452583313, + "learning_rate": 1.618780164698464e-05, + "loss": 0.48, + "step": 21023 + }, + { + "epoch": 0.5772652388797364, + "grad_norm": 0.3945554792881012, + "learning_rate": 1.6187462361214934e-05, + "loss": 0.5436, + "step": 21024 + }, + { + "epoch": 0.5772926963207029, + "grad_norm": 0.42925116419792175, + "learning_rate": 1.618712306390363e-05, + "loss": 0.5696, + "step": 21025 + }, + { + "epoch": 0.5773201537616695, + "grad_norm": 0.3835682272911072, + "learning_rate": 1.6186783755051375e-05, + "loss": 0.5289, + "step": 21026 + }, + { + "epoch": 0.5773476112026359, + "grad_norm": 0.3795356750488281, + "learning_rate": 1.6186444434658787e-05, + "loss": 0.483, + "step": 21027 + }, + { + "epoch": 0.5773750686436024, + "grad_norm": 0.3714221119880676, + "learning_rate": 1.6186105102726507e-05, + "loss": 0.5668, + "step": 21028 + }, + { + "epoch": 0.5774025260845689, + "grad_norm": 0.46108782291412354, + "learning_rate": 1.6185765759255168e-05, + "loss": 0.4929, + "step": 21029 + }, + { + "epoch": 0.5774299835255354, + "grad_norm": 0.3710031509399414, + "learning_rate": 1.61854264042454e-05, + "loss": 0.4736, + "step": 21030 + }, + { + "epoch": 0.5774574409665019, + "grad_norm": 0.4024684727191925, + "learning_rate": 1.6185087037697838e-05, + "loss": 0.5044, + "step": 21031 + }, + { + "epoch": 0.5774848984074684, + "grad_norm": 0.40853792428970337, + "learning_rate": 1.6184747659613118e-05, + "loss": 0.563, + "step": 21032 + }, + { + "epoch": 0.577512355848435, + "grad_norm": 0.43153294920921326, + "learning_rate": 1.6184408269991866e-05, + "loss": 0.5902, + "step": 21033 + }, + { + "epoch": 0.5775398132894014, + "grad_norm": 0.46708911657333374, + "learning_rate": 1.618406886883472e-05, + "loss": 0.525, + "step": 21034 + }, + { + "epoch": 0.577567270730368, + "grad_norm": 0.37165382504463196, + "learning_rate": 1.618372945614231e-05, + "loss": 0.473, + "step": 21035 + }, + { + "epoch": 0.5775947281713344, + "grad_norm": 0.4049939513206482, + "learning_rate": 1.6183390031915274e-05, + "loss": 0.5777, + "step": 21036 + }, + { + "epoch": 0.5776221856123009, + "grad_norm": 0.38714590668678284, + "learning_rate": 1.618305059615424e-05, + "loss": 0.5438, + "step": 21037 + }, + { + "epoch": 0.5776496430532674, + "grad_norm": 0.369933545589447, + "learning_rate": 1.618271114885984e-05, + "loss": 0.566, + "step": 21038 + }, + { + "epoch": 0.5776771004942339, + "grad_norm": 0.48515790700912476, + "learning_rate": 1.6182371690032717e-05, + "loss": 0.495, + "step": 21039 + }, + { + "epoch": 0.5777045579352005, + "grad_norm": 0.5232720971107483, + "learning_rate": 1.6182032219673495e-05, + "loss": 0.4974, + "step": 21040 + }, + { + "epoch": 0.5777320153761669, + "grad_norm": 0.3647095859050751, + "learning_rate": 1.6181692737782806e-05, + "loss": 0.568, + "step": 21041 + }, + { + "epoch": 0.5777594728171335, + "grad_norm": 0.5132710933685303, + "learning_rate": 1.6181353244361293e-05, + "loss": 0.6286, + "step": 21042 + }, + { + "epoch": 0.5777869302580999, + "grad_norm": 0.4020809233188629, + "learning_rate": 1.618101373940958e-05, + "loss": 0.4174, + "step": 21043 + }, + { + "epoch": 0.5778143876990665, + "grad_norm": 0.39014431834220886, + "learning_rate": 1.6180674222928306e-05, + "loss": 0.5443, + "step": 21044 + }, + { + "epoch": 0.5778418451400329, + "grad_norm": 0.3430628776550293, + "learning_rate": 1.61803346949181e-05, + "loss": 0.502, + "step": 21045 + }, + { + "epoch": 0.5778693025809994, + "grad_norm": 0.39478838443756104, + "learning_rate": 1.61799951553796e-05, + "loss": 0.5033, + "step": 21046 + }, + { + "epoch": 0.577896760021966, + "grad_norm": 0.3374863266944885, + "learning_rate": 1.617965560431343e-05, + "loss": 0.4757, + "step": 21047 + }, + { + "epoch": 0.5779242174629324, + "grad_norm": 0.35532885789871216, + "learning_rate": 1.6179316041720235e-05, + "loss": 0.5351, + "step": 21048 + }, + { + "epoch": 0.577951674903899, + "grad_norm": 0.37188029289245605, + "learning_rate": 1.6178976467600645e-05, + "loss": 0.5509, + "step": 21049 + }, + { + "epoch": 0.5779791323448654, + "grad_norm": 0.3592908978462219, + "learning_rate": 1.617863688195529e-05, + "loss": 0.4856, + "step": 21050 + }, + { + "epoch": 0.578006589785832, + "grad_norm": 0.37347546219825745, + "learning_rate": 1.6178297284784802e-05, + "loss": 0.442, + "step": 21051 + }, + { + "epoch": 0.5780340472267984, + "grad_norm": 0.388260155916214, + "learning_rate": 1.6177957676089822e-05, + "loss": 0.48, + "step": 21052 + }, + { + "epoch": 0.578061504667765, + "grad_norm": 0.4198438823223114, + "learning_rate": 1.617761805587098e-05, + "loss": 0.4734, + "step": 21053 + }, + { + "epoch": 0.5780889621087315, + "grad_norm": 0.378131240606308, + "learning_rate": 1.6177278424128904e-05, + "loss": 0.563, + "step": 21054 + }, + { + "epoch": 0.578116419549698, + "grad_norm": 0.4165869951248169, + "learning_rate": 1.6176938780864233e-05, + "loss": 0.554, + "step": 21055 + }, + { + "epoch": 0.5781438769906645, + "grad_norm": 0.38825780153274536, + "learning_rate": 1.6176599126077602e-05, + "loss": 0.5302, + "step": 21056 + }, + { + "epoch": 0.5781713344316309, + "grad_norm": 0.4153463542461395, + "learning_rate": 1.6176259459769644e-05, + "loss": 0.547, + "step": 21057 + }, + { + "epoch": 0.5781987918725975, + "grad_norm": 0.33497610688209534, + "learning_rate": 1.6175919781940987e-05, + "loss": 0.4941, + "step": 21058 + }, + { + "epoch": 0.5782262493135639, + "grad_norm": 0.37138551473617554, + "learning_rate": 1.6175580092592268e-05, + "loss": 0.5127, + "step": 21059 + }, + { + "epoch": 0.5782537067545305, + "grad_norm": 0.5510372519493103, + "learning_rate": 1.6175240391724125e-05, + "loss": 0.4957, + "step": 21060 + }, + { + "epoch": 0.578281164195497, + "grad_norm": 0.417133092880249, + "learning_rate": 1.6174900679337185e-05, + "loss": 0.554, + "step": 21061 + }, + { + "epoch": 0.5783086216364635, + "grad_norm": 0.3699566721916199, + "learning_rate": 1.6174560955432083e-05, + "loss": 0.5348, + "step": 21062 + }, + { + "epoch": 0.57833607907743, + "grad_norm": 0.39545226097106934, + "learning_rate": 1.617422122000946e-05, + "loss": 0.5859, + "step": 21063 + }, + { + "epoch": 0.5783635365183964, + "grad_norm": 0.3440853953361511, + "learning_rate": 1.6173881473069936e-05, + "loss": 0.4487, + "step": 21064 + }, + { + "epoch": 0.578390993959363, + "grad_norm": 0.36826151609420776, + "learning_rate": 1.6173541714614158e-05, + "loss": 0.4578, + "step": 21065 + }, + { + "epoch": 0.5784184514003294, + "grad_norm": 0.4085372984409332, + "learning_rate": 1.6173201944642753e-05, + "loss": 0.5586, + "step": 21066 + }, + { + "epoch": 0.578445908841296, + "grad_norm": 0.407913476228714, + "learning_rate": 1.617286216315636e-05, + "loss": 0.4888, + "step": 21067 + }, + { + "epoch": 0.5784733662822625, + "grad_norm": 0.3701746165752411, + "learning_rate": 1.6172522370155602e-05, + "loss": 0.5033, + "step": 21068 + }, + { + "epoch": 0.578500823723229, + "grad_norm": 0.3585933744907379, + "learning_rate": 1.6172182565641123e-05, + "loss": 0.4864, + "step": 21069 + }, + { + "epoch": 0.5785282811641955, + "grad_norm": 0.36838412284851074, + "learning_rate": 1.617184274961355e-05, + "loss": 0.549, + "step": 21070 + }, + { + "epoch": 0.578555738605162, + "grad_norm": 0.3568468689918518, + "learning_rate": 1.617150292207353e-05, + "loss": 0.4534, + "step": 21071 + }, + { + "epoch": 0.5785831960461285, + "grad_norm": 0.4885975420475006, + "learning_rate": 1.6171163083021678e-05, + "loss": 0.5392, + "step": 21072 + }, + { + "epoch": 0.578610653487095, + "grad_norm": 0.3947300910949707, + "learning_rate": 1.617082323245864e-05, + "loss": 0.4258, + "step": 21073 + }, + { + "epoch": 0.5786381109280615, + "grad_norm": 0.3736065626144409, + "learning_rate": 1.617048337038505e-05, + "loss": 0.4635, + "step": 21074 + }, + { + "epoch": 0.578665568369028, + "grad_norm": 0.4010109305381775, + "learning_rate": 1.6170143496801534e-05, + "loss": 0.5469, + "step": 21075 + }, + { + "epoch": 0.5786930258099945, + "grad_norm": 0.38330918550491333, + "learning_rate": 1.6169803611708733e-05, + "loss": 0.5539, + "step": 21076 + }, + { + "epoch": 0.578720483250961, + "grad_norm": 0.3805399537086487, + "learning_rate": 1.616946371510728e-05, + "loss": 0.5573, + "step": 21077 + }, + { + "epoch": 0.5787479406919275, + "grad_norm": 0.35181549191474915, + "learning_rate": 1.616912380699781e-05, + "loss": 0.4343, + "step": 21078 + }, + { + "epoch": 0.578775398132894, + "grad_norm": 0.3567991554737091, + "learning_rate": 1.616878388738095e-05, + "loss": 0.4969, + "step": 21079 + }, + { + "epoch": 0.5788028555738605, + "grad_norm": 0.37538090348243713, + "learning_rate": 1.6168443956257345e-05, + "loss": 0.4416, + "step": 21080 + }, + { + "epoch": 0.578830313014827, + "grad_norm": 0.36170899868011475, + "learning_rate": 1.6168104013627618e-05, + "loss": 0.4539, + "step": 21081 + }, + { + "epoch": 0.5788577704557936, + "grad_norm": 0.39925888180732727, + "learning_rate": 1.6167764059492412e-05, + "loss": 0.4123, + "step": 21082 + }, + { + "epoch": 0.57888522789676, + "grad_norm": 0.3750722408294678, + "learning_rate": 1.6167424093852354e-05, + "loss": 0.5257, + "step": 21083 + }, + { + "epoch": 0.5789126853377266, + "grad_norm": 0.345400869846344, + "learning_rate": 1.616708411670809e-05, + "loss": 0.4338, + "step": 21084 + }, + { + "epoch": 0.578940142778693, + "grad_norm": 0.3888583779335022, + "learning_rate": 1.6166744128060237e-05, + "loss": 0.5128, + "step": 21085 + }, + { + "epoch": 0.5789676002196595, + "grad_norm": 0.4158656895160675, + "learning_rate": 1.616640412790944e-05, + "loss": 0.4485, + "step": 21086 + }, + { + "epoch": 0.578995057660626, + "grad_norm": 0.3502817749977112, + "learning_rate": 1.616606411625633e-05, + "loss": 0.4707, + "step": 21087 + }, + { + "epoch": 0.5790225151015925, + "grad_norm": 0.40070006251335144, + "learning_rate": 1.6165724093101548e-05, + "loss": 0.543, + "step": 21088 + }, + { + "epoch": 0.5790499725425591, + "grad_norm": 0.3595455586910248, + "learning_rate": 1.6165384058445716e-05, + "loss": 0.5093, + "step": 21089 + }, + { + "epoch": 0.5790774299835255, + "grad_norm": 0.3320959806442261, + "learning_rate": 1.6165044012289478e-05, + "loss": 0.4949, + "step": 21090 + }, + { + "epoch": 0.5791048874244921, + "grad_norm": 0.4028443396091461, + "learning_rate": 1.6164703954633466e-05, + "loss": 0.5567, + "step": 21091 + }, + { + "epoch": 0.5791323448654585, + "grad_norm": 0.3670755624771118, + "learning_rate": 1.6164363885478315e-05, + "loss": 0.5036, + "step": 21092 + }, + { + "epoch": 0.579159802306425, + "grad_norm": 0.3542087972164154, + "learning_rate": 1.6164023804824654e-05, + "loss": 0.4559, + "step": 21093 + }, + { + "epoch": 0.5791872597473915, + "grad_norm": 0.3829369843006134, + "learning_rate": 1.6163683712673125e-05, + "loss": 0.5623, + "step": 21094 + }, + { + "epoch": 0.579214717188358, + "grad_norm": 0.3676750361919403, + "learning_rate": 1.616334360902436e-05, + "loss": 0.5953, + "step": 21095 + }, + { + "epoch": 0.5792421746293246, + "grad_norm": 0.4147208034992218, + "learning_rate": 1.6163003493878985e-05, + "loss": 0.5608, + "step": 21096 + }, + { + "epoch": 0.579269632070291, + "grad_norm": 0.456452339887619, + "learning_rate": 1.6162663367237645e-05, + "loss": 0.5732, + "step": 21097 + }, + { + "epoch": 0.5792970895112576, + "grad_norm": 0.3554527163505554, + "learning_rate": 1.616232322910097e-05, + "loss": 0.4997, + "step": 21098 + }, + { + "epoch": 0.579324546952224, + "grad_norm": 0.3919999599456787, + "learning_rate": 1.61619830794696e-05, + "loss": 0.5016, + "step": 21099 + }, + { + "epoch": 0.5793520043931906, + "grad_norm": 0.37867915630340576, + "learning_rate": 1.6161642918344163e-05, + "loss": 0.5116, + "step": 21100 + }, + { + "epoch": 0.579379461834157, + "grad_norm": 0.3781815767288208, + "learning_rate": 1.6161302745725292e-05, + "loss": 0.5259, + "step": 21101 + }, + { + "epoch": 0.5794069192751236, + "grad_norm": 0.3836900293827057, + "learning_rate": 1.6160962561613628e-05, + "loss": 0.555, + "step": 21102 + }, + { + "epoch": 0.5794343767160901, + "grad_norm": 0.35570597648620605, + "learning_rate": 1.61606223660098e-05, + "loss": 0.4591, + "step": 21103 + }, + { + "epoch": 0.5794618341570565, + "grad_norm": 0.510765552520752, + "learning_rate": 1.616028215891445e-05, + "loss": 0.5339, + "step": 21104 + }, + { + "epoch": 0.5794892915980231, + "grad_norm": 0.3815319538116455, + "learning_rate": 1.6159941940328202e-05, + "loss": 0.5094, + "step": 21105 + }, + { + "epoch": 0.5795167490389895, + "grad_norm": 0.3469858467578888, + "learning_rate": 1.6159601710251698e-05, + "loss": 0.5943, + "step": 21106 + }, + { + "epoch": 0.5795442064799561, + "grad_norm": 0.3687015771865845, + "learning_rate": 1.6159261468685572e-05, + "loss": 0.5553, + "step": 21107 + }, + { + "epoch": 0.5795716639209225, + "grad_norm": 0.39546167850494385, + "learning_rate": 1.6158921215630458e-05, + "loss": 0.5625, + "step": 21108 + }, + { + "epoch": 0.5795991213618891, + "grad_norm": 0.36835646629333496, + "learning_rate": 1.6158580951086988e-05, + "loss": 0.449, + "step": 21109 + }, + { + "epoch": 0.5796265788028556, + "grad_norm": 0.3605920374393463, + "learning_rate": 1.61582406750558e-05, + "loss": 0.3816, + "step": 21110 + }, + { + "epoch": 0.5796540362438221, + "grad_norm": 0.3755429685115814, + "learning_rate": 1.6157900387537526e-05, + "loss": 0.5762, + "step": 21111 + }, + { + "epoch": 0.5796814936847886, + "grad_norm": 0.4290286898612976, + "learning_rate": 1.6157560088532805e-05, + "loss": 0.595, + "step": 21112 + }, + { + "epoch": 0.579708951125755, + "grad_norm": 0.37590986490249634, + "learning_rate": 1.6157219778042267e-05, + "loss": 0.5328, + "step": 21113 + }, + { + "epoch": 0.5797364085667216, + "grad_norm": 0.35470399260520935, + "learning_rate": 1.615687945606655e-05, + "loss": 0.4506, + "step": 21114 + }, + { + "epoch": 0.579763866007688, + "grad_norm": 0.42589202523231506, + "learning_rate": 1.6156539122606288e-05, + "loss": 0.5905, + "step": 21115 + }, + { + "epoch": 0.5797913234486546, + "grad_norm": 0.3568902313709259, + "learning_rate": 1.6156198777662115e-05, + "loss": 0.4373, + "step": 21116 + }, + { + "epoch": 0.5798187808896211, + "grad_norm": 0.3959977328777313, + "learning_rate": 1.615585842123467e-05, + "loss": 0.5629, + "step": 21117 + }, + { + "epoch": 0.5798462383305876, + "grad_norm": 0.3742942810058594, + "learning_rate": 1.615551805332458e-05, + "loss": 0.4652, + "step": 21118 + }, + { + "epoch": 0.5798736957715541, + "grad_norm": 0.3384424149990082, + "learning_rate": 1.6155177673932486e-05, + "loss": 0.3797, + "step": 21119 + }, + { + "epoch": 0.5799011532125206, + "grad_norm": 0.37512215971946716, + "learning_rate": 1.6154837283059022e-05, + "loss": 0.5516, + "step": 21120 + }, + { + "epoch": 0.5799286106534871, + "grad_norm": 0.41497108340263367, + "learning_rate": 1.615449688070482e-05, + "loss": 0.4927, + "step": 21121 + }, + { + "epoch": 0.5799560680944535, + "grad_norm": 0.421512633562088, + "learning_rate": 1.6154156466870515e-05, + "loss": 0.4546, + "step": 21122 + }, + { + "epoch": 0.5799835255354201, + "grad_norm": 0.352551132440567, + "learning_rate": 1.615381604155675e-05, + "loss": 0.438, + "step": 21123 + }, + { + "epoch": 0.5800109829763866, + "grad_norm": 0.3837610185146332, + "learning_rate": 1.6153475604764152e-05, + "loss": 0.4162, + "step": 21124 + }, + { + "epoch": 0.5800384404173531, + "grad_norm": 0.36279845237731934, + "learning_rate": 1.6153135156493354e-05, + "loss": 0.493, + "step": 21125 + }, + { + "epoch": 0.5800658978583196, + "grad_norm": 0.3779540956020355, + "learning_rate": 1.6152794696745e-05, + "loss": 0.5511, + "step": 21126 + }, + { + "epoch": 0.5800933552992861, + "grad_norm": 0.4185730218887329, + "learning_rate": 1.6152454225519716e-05, + "loss": 0.5768, + "step": 21127 + }, + { + "epoch": 0.5801208127402526, + "grad_norm": 0.34817633032798767, + "learning_rate": 1.6152113742818147e-05, + "loss": 0.4737, + "step": 21128 + }, + { + "epoch": 0.5801482701812191, + "grad_norm": 0.37130671739578247, + "learning_rate": 1.6151773248640914e-05, + "loss": 0.4604, + "step": 21129 + }, + { + "epoch": 0.5801757276221856, + "grad_norm": 0.40660083293914795, + "learning_rate": 1.6151432742988665e-05, + "loss": 0.6092, + "step": 21130 + }, + { + "epoch": 0.5802031850631522, + "grad_norm": 0.34974467754364014, + "learning_rate": 1.6151092225862033e-05, + "loss": 0.5246, + "step": 21131 + }, + { + "epoch": 0.5802306425041186, + "grad_norm": 0.5082796812057495, + "learning_rate": 1.615075169726165e-05, + "loss": 0.5575, + "step": 21132 + }, + { + "epoch": 0.5802580999450851, + "grad_norm": 0.40530067682266235, + "learning_rate": 1.615041115718815e-05, + "loss": 0.4551, + "step": 21133 + }, + { + "epoch": 0.5802855573860516, + "grad_norm": 0.3660169243812561, + "learning_rate": 1.615007060564217e-05, + "loss": 0.5445, + "step": 21134 + }, + { + "epoch": 0.5803130148270181, + "grad_norm": 0.4062086343765259, + "learning_rate": 1.6149730042624346e-05, + "loss": 0.543, + "step": 21135 + }, + { + "epoch": 0.5803404722679846, + "grad_norm": 0.47891783714294434, + "learning_rate": 1.614938946813531e-05, + "loss": 0.4878, + "step": 21136 + }, + { + "epoch": 0.5803679297089511, + "grad_norm": 0.45675143599510193, + "learning_rate": 1.6149048882175703e-05, + "loss": 0.4874, + "step": 21137 + }, + { + "epoch": 0.5803953871499177, + "grad_norm": 0.37275102734565735, + "learning_rate": 1.614870828474616e-05, + "loss": 0.457, + "step": 21138 + }, + { + "epoch": 0.5804228445908841, + "grad_norm": 0.320388525724411, + "learning_rate": 1.614836767584731e-05, + "loss": 0.4636, + "step": 21139 + }, + { + "epoch": 0.5804503020318507, + "grad_norm": 0.4236691892147064, + "learning_rate": 1.6148027055479793e-05, + "loss": 0.4718, + "step": 21140 + }, + { + "epoch": 0.5804777594728171, + "grad_norm": 0.3942870497703552, + "learning_rate": 1.6147686423644243e-05, + "loss": 0.5397, + "step": 21141 + }, + { + "epoch": 0.5805052169137837, + "grad_norm": 0.36060890555381775, + "learning_rate": 1.6147345780341293e-05, + "loss": 0.479, + "step": 21142 + }, + { + "epoch": 0.5805326743547501, + "grad_norm": 0.39557743072509766, + "learning_rate": 1.6147005125571588e-05, + "loss": 0.4087, + "step": 21143 + }, + { + "epoch": 0.5805601317957166, + "grad_norm": 0.3715820014476776, + "learning_rate": 1.614666445933575e-05, + "loss": 0.5211, + "step": 21144 + }, + { + "epoch": 0.5805875892366832, + "grad_norm": 0.34634697437286377, + "learning_rate": 1.6146323781634422e-05, + "loss": 0.5011, + "step": 21145 + }, + { + "epoch": 0.5806150466776496, + "grad_norm": 0.39563706517219543, + "learning_rate": 1.6145983092468243e-05, + "loss": 0.4716, + "step": 21146 + }, + { + "epoch": 0.5806425041186162, + "grad_norm": 0.36046019196510315, + "learning_rate": 1.614564239183784e-05, + "loss": 0.5155, + "step": 21147 + }, + { + "epoch": 0.5806699615595826, + "grad_norm": 0.938191831111908, + "learning_rate": 1.6145301679743854e-05, + "loss": 0.5709, + "step": 21148 + }, + { + "epoch": 0.5806974190005492, + "grad_norm": 0.41859158873558044, + "learning_rate": 1.6144960956186918e-05, + "loss": 0.5358, + "step": 21149 + }, + { + "epoch": 0.5807248764415156, + "grad_norm": 0.3915003836154938, + "learning_rate": 1.6144620221167668e-05, + "loss": 0.5813, + "step": 21150 + }, + { + "epoch": 0.5807523338824822, + "grad_norm": 0.38850530982017517, + "learning_rate": 1.6144279474686743e-05, + "loss": 0.4872, + "step": 21151 + }, + { + "epoch": 0.5807797913234487, + "grad_norm": 0.367949515581131, + "learning_rate": 1.6143938716744772e-05, + "loss": 0.3929, + "step": 21152 + }, + { + "epoch": 0.5808072487644151, + "grad_norm": 0.36562207341194153, + "learning_rate": 1.6143597947342398e-05, + "loss": 0.5352, + "step": 21153 + }, + { + "epoch": 0.5808347062053817, + "grad_norm": 0.36746636033058167, + "learning_rate": 1.614325716648025e-05, + "loss": 0.4785, + "step": 21154 + }, + { + "epoch": 0.5808621636463481, + "grad_norm": 0.3658316433429718, + "learning_rate": 1.6142916374158967e-05, + "loss": 0.4691, + "step": 21155 + }, + { + "epoch": 0.5808896210873147, + "grad_norm": 0.43570974469184875, + "learning_rate": 1.6142575570379185e-05, + "loss": 0.5311, + "step": 21156 + }, + { + "epoch": 0.5809170785282811, + "grad_norm": 0.35501614212989807, + "learning_rate": 1.614223475514154e-05, + "loss": 0.4531, + "step": 21157 + }, + { + "epoch": 0.5809445359692477, + "grad_norm": 0.3405054211616516, + "learning_rate": 1.6141893928446667e-05, + "loss": 0.509, + "step": 21158 + }, + { + "epoch": 0.5809719934102142, + "grad_norm": 0.3461112082004547, + "learning_rate": 1.6141553090295202e-05, + "loss": 0.4803, + "step": 21159 + }, + { + "epoch": 0.5809994508511807, + "grad_norm": 0.36415600776672363, + "learning_rate": 1.6141212240687776e-05, + "loss": 0.4583, + "step": 21160 + }, + { + "epoch": 0.5810269082921472, + "grad_norm": 0.4015338718891144, + "learning_rate": 1.6140871379625033e-05, + "loss": 0.5149, + "step": 21161 + }, + { + "epoch": 0.5810543657331136, + "grad_norm": 0.3863988518714905, + "learning_rate": 1.6140530507107605e-05, + "loss": 0.5744, + "step": 21162 + }, + { + "epoch": 0.5810818231740802, + "grad_norm": 0.35629045963287354, + "learning_rate": 1.6140189623136127e-05, + "loss": 0.4637, + "step": 21163 + }, + { + "epoch": 0.5811092806150466, + "grad_norm": 0.4541007876396179, + "learning_rate": 1.6139848727711235e-05, + "loss": 0.4848, + "step": 21164 + }, + { + "epoch": 0.5811367380560132, + "grad_norm": 0.37079057097435, + "learning_rate": 1.6139507820833564e-05, + "loss": 0.5271, + "step": 21165 + }, + { + "epoch": 0.5811641954969797, + "grad_norm": 0.44012296199798584, + "learning_rate": 1.6139166902503756e-05, + "loss": 0.5668, + "step": 21166 + }, + { + "epoch": 0.5811916529379462, + "grad_norm": 0.3689175546169281, + "learning_rate": 1.6138825972722442e-05, + "loss": 0.4456, + "step": 21167 + }, + { + "epoch": 0.5812191103789127, + "grad_norm": 0.40422213077545166, + "learning_rate": 1.6138485031490253e-05, + "loss": 0.5222, + "step": 21168 + }, + { + "epoch": 0.5812465678198792, + "grad_norm": 0.3705901801586151, + "learning_rate": 1.613814407880783e-05, + "loss": 0.5142, + "step": 21169 + }, + { + "epoch": 0.5812740252608457, + "grad_norm": 0.37258240580558777, + "learning_rate": 1.6137803114675815e-05, + "loss": 0.6344, + "step": 21170 + }, + { + "epoch": 0.5813014827018121, + "grad_norm": 0.42369797825813293, + "learning_rate": 1.6137462139094836e-05, + "loss": 0.635, + "step": 21171 + }, + { + "epoch": 0.5813289401427787, + "grad_norm": 0.3720398247241974, + "learning_rate": 1.613712115206553e-05, + "loss": 0.6029, + "step": 21172 + }, + { + "epoch": 0.5813563975837452, + "grad_norm": 0.38557180762290955, + "learning_rate": 1.6136780153588537e-05, + "loss": 0.556, + "step": 21173 + }, + { + "epoch": 0.5813838550247117, + "grad_norm": 0.39516186714172363, + "learning_rate": 1.613643914366449e-05, + "loss": 0.5882, + "step": 21174 + }, + { + "epoch": 0.5814113124656782, + "grad_norm": 0.3162324130535126, + "learning_rate": 1.613609812229402e-05, + "loss": 0.4666, + "step": 21175 + }, + { + "epoch": 0.5814387699066447, + "grad_norm": 0.3697158396244049, + "learning_rate": 1.6135757089477773e-05, + "loss": 0.5144, + "step": 21176 + }, + { + "epoch": 0.5814662273476112, + "grad_norm": 0.39619362354278564, + "learning_rate": 1.6135416045216382e-05, + "loss": 0.5531, + "step": 21177 + }, + { + "epoch": 0.5814936847885777, + "grad_norm": 0.41623106598854065, + "learning_rate": 1.613507498951048e-05, + "loss": 0.5554, + "step": 21178 + }, + { + "epoch": 0.5815211422295442, + "grad_norm": 0.3983595669269562, + "learning_rate": 1.6134733922360705e-05, + "loss": 0.5437, + "step": 21179 + }, + { + "epoch": 0.5815485996705108, + "grad_norm": 0.35298386216163635, + "learning_rate": 1.6134392843767694e-05, + "loss": 0.4788, + "step": 21180 + }, + { + "epoch": 0.5815760571114772, + "grad_norm": 0.3758779764175415, + "learning_rate": 1.6134051753732083e-05, + "loss": 0.4906, + "step": 21181 + }, + { + "epoch": 0.5816035145524437, + "grad_norm": 0.3997713327407837, + "learning_rate": 1.6133710652254507e-05, + "loss": 0.396, + "step": 21182 + }, + { + "epoch": 0.5816309719934102, + "grad_norm": 0.3680139482021332, + "learning_rate": 1.61333695393356e-05, + "loss": 0.5507, + "step": 21183 + }, + { + "epoch": 0.5816584294343767, + "grad_norm": 0.4361574649810791, + "learning_rate": 1.6133028414976006e-05, + "loss": 0.5785, + "step": 21184 + }, + { + "epoch": 0.5816858868753432, + "grad_norm": 0.38822340965270996, + "learning_rate": 1.6132687279176357e-05, + "loss": 0.4988, + "step": 21185 + }, + { + "epoch": 0.5817133443163097, + "grad_norm": 0.4322441816329956, + "learning_rate": 1.6132346131937285e-05, + "loss": 0.5653, + "step": 21186 + }, + { + "epoch": 0.5817408017572763, + "grad_norm": 0.3880590498447418, + "learning_rate": 1.6132004973259432e-05, + "loss": 0.4207, + "step": 21187 + }, + { + "epoch": 0.5817682591982427, + "grad_norm": 0.41412153840065, + "learning_rate": 1.6131663803143432e-05, + "loss": 0.4539, + "step": 21188 + }, + { + "epoch": 0.5817957166392093, + "grad_norm": 0.43592569231987, + "learning_rate": 1.6131322621589924e-05, + "loss": 0.5483, + "step": 21189 + }, + { + "epoch": 0.5818231740801757, + "grad_norm": 0.4527169167995453, + "learning_rate": 1.6130981428599542e-05, + "loss": 0.5507, + "step": 21190 + }, + { + "epoch": 0.5818506315211422, + "grad_norm": 0.4116746485233307, + "learning_rate": 1.6130640224172918e-05, + "loss": 0.4891, + "step": 21191 + }, + { + "epoch": 0.5818780889621087, + "grad_norm": 0.43417349457740784, + "learning_rate": 1.61302990083107e-05, + "loss": 0.6141, + "step": 21192 + }, + { + "epoch": 0.5819055464030752, + "grad_norm": 0.4031166732311249, + "learning_rate": 1.612995778101351e-05, + "loss": 0.5593, + "step": 21193 + }, + { + "epoch": 0.5819330038440418, + "grad_norm": 0.37139979004859924, + "learning_rate": 1.6129616542282e-05, + "loss": 0.4914, + "step": 21194 + }, + { + "epoch": 0.5819604612850082, + "grad_norm": 0.36679428815841675, + "learning_rate": 1.6129275292116794e-05, + "loss": 0.4747, + "step": 21195 + }, + { + "epoch": 0.5819879187259748, + "grad_norm": 0.38614049553871155, + "learning_rate": 1.6128934030518536e-05, + "loss": 0.5954, + "step": 21196 + }, + { + "epoch": 0.5820153761669412, + "grad_norm": 0.3812297582626343, + "learning_rate": 1.612859275748786e-05, + "loss": 0.5431, + "step": 21197 + }, + { + "epoch": 0.5820428336079078, + "grad_norm": 0.3961833119392395, + "learning_rate": 1.6128251473025402e-05, + "loss": 0.5407, + "step": 21198 + }, + { + "epoch": 0.5820702910488742, + "grad_norm": 0.4305078685283661, + "learning_rate": 1.6127910177131797e-05, + "loss": 0.5195, + "step": 21199 + }, + { + "epoch": 0.5820977484898407, + "grad_norm": 0.4063071012496948, + "learning_rate": 1.6127568869807685e-05, + "loss": 0.5937, + "step": 21200 + }, + { + "epoch": 0.5821252059308073, + "grad_norm": 0.3801192045211792, + "learning_rate": 1.6127227551053704e-05, + "loss": 0.5091, + "step": 21201 + }, + { + "epoch": 0.5821526633717737, + "grad_norm": 0.3969520032405853, + "learning_rate": 1.6126886220870487e-05, + "loss": 0.4681, + "step": 21202 + }, + { + "epoch": 0.5821801208127403, + "grad_norm": 0.391012579202652, + "learning_rate": 1.612654487925867e-05, + "loss": 0.5308, + "step": 21203 + }, + { + "epoch": 0.5822075782537067, + "grad_norm": 0.3582218885421753, + "learning_rate": 1.612620352621889e-05, + "loss": 0.5099, + "step": 21204 + }, + { + "epoch": 0.5822350356946733, + "grad_norm": 0.345019668340683, + "learning_rate": 1.6125862161751787e-05, + "loss": 0.4559, + "step": 21205 + }, + { + "epoch": 0.5822624931356397, + "grad_norm": 0.4182300269603729, + "learning_rate": 1.6125520785857996e-05, + "loss": 0.5883, + "step": 21206 + }, + { + "epoch": 0.5822899505766063, + "grad_norm": 0.4008396565914154, + "learning_rate": 1.612517939853815e-05, + "loss": 0.5124, + "step": 21207 + }, + { + "epoch": 0.5823174080175728, + "grad_norm": 0.4058840870857239, + "learning_rate": 1.6124837999792896e-05, + "loss": 0.5477, + "step": 21208 + }, + { + "epoch": 0.5823448654585393, + "grad_norm": 0.4117266535758972, + "learning_rate": 1.6124496589622864e-05, + "loss": 0.4727, + "step": 21209 + }, + { + "epoch": 0.5823723228995058, + "grad_norm": 0.41523560881614685, + "learning_rate": 1.612415516802869e-05, + "loss": 0.4449, + "step": 21210 + }, + { + "epoch": 0.5823997803404722, + "grad_norm": 0.3789553940296173, + "learning_rate": 1.612381373501101e-05, + "loss": 0.4591, + "step": 21211 + }, + { + "epoch": 0.5824272377814388, + "grad_norm": 0.9850039482116699, + "learning_rate": 1.612347229057046e-05, + "loss": 0.5296, + "step": 21212 + }, + { + "epoch": 0.5824546952224052, + "grad_norm": 0.5352584719657898, + "learning_rate": 1.6123130834707686e-05, + "loss": 0.4764, + "step": 21213 + }, + { + "epoch": 0.5824821526633718, + "grad_norm": 0.3898897171020508, + "learning_rate": 1.6122789367423317e-05, + "loss": 0.5689, + "step": 21214 + }, + { + "epoch": 0.5825096101043383, + "grad_norm": 0.32067176699638367, + "learning_rate": 1.6122447888717992e-05, + "loss": 0.3733, + "step": 21215 + }, + { + "epoch": 0.5825370675453048, + "grad_norm": 0.36533036828041077, + "learning_rate": 1.6122106398592345e-05, + "loss": 0.4849, + "step": 21216 + }, + { + "epoch": 0.5825645249862713, + "grad_norm": 0.4269741177558899, + "learning_rate": 1.6121764897047018e-05, + "loss": 0.57, + "step": 21217 + }, + { + "epoch": 0.5825919824272378, + "grad_norm": 0.41833600401878357, + "learning_rate": 1.6121423384082645e-05, + "loss": 0.5917, + "step": 21218 + }, + { + "epoch": 0.5826194398682043, + "grad_norm": 0.35699743032455444, + "learning_rate": 1.612108185969986e-05, + "loss": 0.5493, + "step": 21219 + }, + { + "epoch": 0.5826468973091707, + "grad_norm": 0.3763284683227539, + "learning_rate": 1.612074032389931e-05, + "loss": 0.5209, + "step": 21220 + }, + { + "epoch": 0.5826743547501373, + "grad_norm": 0.3895307183265686, + "learning_rate": 1.612039877668162e-05, + "loss": 0.4872, + "step": 21221 + }, + { + "epoch": 0.5827018121911038, + "grad_norm": 0.45322877168655396, + "learning_rate": 1.6120057218047437e-05, + "loss": 0.5435, + "step": 21222 + }, + { + "epoch": 0.5827292696320703, + "grad_norm": 0.35408976674079895, + "learning_rate": 1.611971564799739e-05, + "loss": 0.5325, + "step": 21223 + }, + { + "epoch": 0.5827567270730368, + "grad_norm": 0.3359763026237488, + "learning_rate": 1.6119374066532126e-05, + "loss": 0.4269, + "step": 21224 + }, + { + "epoch": 0.5827841845140033, + "grad_norm": 0.3831687867641449, + "learning_rate": 1.6119032473652273e-05, + "loss": 0.4267, + "step": 21225 + }, + { + "epoch": 0.5828116419549698, + "grad_norm": 0.3582947850227356, + "learning_rate": 1.611869086935847e-05, + "loss": 0.5311, + "step": 21226 + }, + { + "epoch": 0.5828390993959363, + "grad_norm": 0.34726935625076294, + "learning_rate": 1.6118349253651357e-05, + "loss": 0.4708, + "step": 21227 + }, + { + "epoch": 0.5828665568369028, + "grad_norm": 0.45481860637664795, + "learning_rate": 1.611800762653157e-05, + "loss": 0.613, + "step": 21228 + }, + { + "epoch": 0.5828940142778694, + "grad_norm": 0.3873506784439087, + "learning_rate": 1.611766598799975e-05, + "loss": 0.4866, + "step": 21229 + }, + { + "epoch": 0.5829214717188358, + "grad_norm": 0.4234866201877594, + "learning_rate": 1.6117324338056522e-05, + "loss": 0.5567, + "step": 21230 + }, + { + "epoch": 0.5829489291598023, + "grad_norm": 0.3780277669429779, + "learning_rate": 1.611698267670254e-05, + "loss": 0.5208, + "step": 21231 + }, + { + "epoch": 0.5829763866007688, + "grad_norm": 0.3667088449001312, + "learning_rate": 1.611664100393843e-05, + "loss": 0.4425, + "step": 21232 + }, + { + "epoch": 0.5830038440417353, + "grad_norm": 0.46584028005599976, + "learning_rate": 1.611629931976483e-05, + "loss": 0.4993, + "step": 21233 + }, + { + "epoch": 0.5830313014827018, + "grad_norm": 0.464614599943161, + "learning_rate": 1.6115957624182382e-05, + "loss": 0.4573, + "step": 21234 + }, + { + "epoch": 0.5830587589236683, + "grad_norm": 0.406038761138916, + "learning_rate": 1.611561591719172e-05, + "loss": 0.4468, + "step": 21235 + }, + { + "epoch": 0.5830862163646349, + "grad_norm": 0.3388383984565735, + "learning_rate": 1.6115274198793483e-05, + "loss": 0.4894, + "step": 21236 + }, + { + "epoch": 0.5831136738056013, + "grad_norm": 0.4083103537559509, + "learning_rate": 1.6114932468988307e-05, + "loss": 0.5307, + "step": 21237 + }, + { + "epoch": 0.5831411312465679, + "grad_norm": 0.3521682620048523, + "learning_rate": 1.611459072777683e-05, + "loss": 0.4796, + "step": 21238 + }, + { + "epoch": 0.5831685886875343, + "grad_norm": 0.8415577411651611, + "learning_rate": 1.611424897515969e-05, + "loss": 0.5235, + "step": 21239 + }, + { + "epoch": 0.5831960461285008, + "grad_norm": 0.4066687524318695, + "learning_rate": 1.6113907211137525e-05, + "loss": 0.5972, + "step": 21240 + }, + { + "epoch": 0.5832235035694673, + "grad_norm": 0.41477474570274353, + "learning_rate": 1.6113565435710975e-05, + "loss": 0.5149, + "step": 21241 + }, + { + "epoch": 0.5832509610104338, + "grad_norm": 0.3899105489253998, + "learning_rate": 1.6113223648880668e-05, + "loss": 0.512, + "step": 21242 + }, + { + "epoch": 0.5832784184514004, + "grad_norm": 0.3337099552154541, + "learning_rate": 1.611288185064725e-05, + "loss": 0.475, + "step": 21243 + }, + { + "epoch": 0.5833058758923668, + "grad_norm": 0.6104755997657776, + "learning_rate": 1.6112540041011358e-05, + "loss": 0.5512, + "step": 21244 + }, + { + "epoch": 0.5833333333333334, + "grad_norm": 0.36482909321784973, + "learning_rate": 1.6112198219973625e-05, + "loss": 0.4486, + "step": 21245 + }, + { + "epoch": 0.5833607907742998, + "grad_norm": 0.3636209964752197, + "learning_rate": 1.6111856387534697e-05, + "loss": 0.4837, + "step": 21246 + }, + { + "epoch": 0.5833882482152664, + "grad_norm": 0.35087481141090393, + "learning_rate": 1.6111514543695204e-05, + "loss": 0.4286, + "step": 21247 + }, + { + "epoch": 0.5834157056562328, + "grad_norm": 0.3428530693054199, + "learning_rate": 1.611117268845578e-05, + "loss": 0.418, + "step": 21248 + }, + { + "epoch": 0.5834431630971993, + "grad_norm": 0.37464645504951477, + "learning_rate": 1.6110830821817075e-05, + "loss": 0.4727, + "step": 21249 + }, + { + "epoch": 0.5834706205381659, + "grad_norm": 0.3216412663459778, + "learning_rate": 1.6110488943779717e-05, + "loss": 0.483, + "step": 21250 + }, + { + "epoch": 0.5834980779791323, + "grad_norm": 0.3745121359825134, + "learning_rate": 1.611014705434435e-05, + "loss": 0.5083, + "step": 21251 + }, + { + "epoch": 0.5835255354200989, + "grad_norm": 0.34712153673171997, + "learning_rate": 1.6109805153511606e-05, + "loss": 0.5264, + "step": 21252 + }, + { + "epoch": 0.5835529928610653, + "grad_norm": 0.3746022880077362, + "learning_rate": 1.6109463241282127e-05, + "loss": 0.4804, + "step": 21253 + }, + { + "epoch": 0.5835804503020319, + "grad_norm": 0.36023518443107605, + "learning_rate": 1.6109121317656548e-05, + "loss": 0.5094, + "step": 21254 + }, + { + "epoch": 0.5836079077429983, + "grad_norm": 0.46353960037231445, + "learning_rate": 1.6108779382635506e-05, + "loss": 0.4473, + "step": 21255 + }, + { + "epoch": 0.5836353651839649, + "grad_norm": 0.649553656578064, + "learning_rate": 1.610843743621964e-05, + "loss": 0.5928, + "step": 21256 + }, + { + "epoch": 0.5836628226249314, + "grad_norm": 0.43268081545829773, + "learning_rate": 1.6108095478409595e-05, + "loss": 0.4707, + "step": 21257 + }, + { + "epoch": 0.5836902800658978, + "grad_norm": 0.37631580233573914, + "learning_rate": 1.6107753509205996e-05, + "loss": 0.5136, + "step": 21258 + }, + { + "epoch": 0.5837177375068644, + "grad_norm": 0.3769768178462982, + "learning_rate": 1.6107411528609492e-05, + "loss": 0.5149, + "step": 21259 + }, + { + "epoch": 0.5837451949478308, + "grad_norm": 0.34461772441864014, + "learning_rate": 1.6107069536620714e-05, + "loss": 0.4917, + "step": 21260 + }, + { + "epoch": 0.5837726523887974, + "grad_norm": 0.36751461029052734, + "learning_rate": 1.61067275332403e-05, + "loss": 0.4942, + "step": 21261 + }, + { + "epoch": 0.5838001098297638, + "grad_norm": 0.398088663816452, + "learning_rate": 1.6106385518468895e-05, + "loss": 0.5292, + "step": 21262 + }, + { + "epoch": 0.5838275672707304, + "grad_norm": 0.37324175238609314, + "learning_rate": 1.610604349230713e-05, + "loss": 0.4398, + "step": 21263 + }, + { + "epoch": 0.5838550247116969, + "grad_norm": 0.3872585892677307, + "learning_rate": 1.6105701454755645e-05, + "loss": 0.567, + "step": 21264 + }, + { + "epoch": 0.5838824821526634, + "grad_norm": 0.37501952052116394, + "learning_rate": 1.610535940581508e-05, + "loss": 0.4863, + "step": 21265 + }, + { + "epoch": 0.5839099395936299, + "grad_norm": 0.5118427276611328, + "learning_rate": 1.6105017345486066e-05, + "loss": 0.4685, + "step": 21266 + }, + { + "epoch": 0.5839373970345964, + "grad_norm": 0.3743548095226288, + "learning_rate": 1.610467527376925e-05, + "loss": 0.4901, + "step": 21267 + }, + { + "epoch": 0.5839648544755629, + "grad_norm": 0.34674790501594543, + "learning_rate": 1.6104333190665264e-05, + "loss": 0.5025, + "step": 21268 + }, + { + "epoch": 0.5839923119165293, + "grad_norm": 0.43209540843963623, + "learning_rate": 1.6103991096174752e-05, + "loss": 0.5654, + "step": 21269 + }, + { + "epoch": 0.5840197693574959, + "grad_norm": 0.4114006459712982, + "learning_rate": 1.6103648990298342e-05, + "loss": 0.4969, + "step": 21270 + }, + { + "epoch": 0.5840472267984624, + "grad_norm": 0.3788813054561615, + "learning_rate": 1.6103306873036684e-05, + "loss": 0.4483, + "step": 21271 + }, + { + "epoch": 0.5840746842394289, + "grad_norm": 0.42829933762550354, + "learning_rate": 1.610296474439041e-05, + "loss": 0.4866, + "step": 21272 + }, + { + "epoch": 0.5841021416803954, + "grad_norm": 0.36395618319511414, + "learning_rate": 1.610262260436016e-05, + "loss": 0.478, + "step": 21273 + }, + { + "epoch": 0.5841295991213619, + "grad_norm": 0.41468989849090576, + "learning_rate": 1.6102280452946568e-05, + "loss": 0.465, + "step": 21274 + }, + { + "epoch": 0.5841570565623284, + "grad_norm": 0.37473928928375244, + "learning_rate": 1.6101938290150275e-05, + "loss": 0.4974, + "step": 21275 + }, + { + "epoch": 0.5841845140032949, + "grad_norm": 0.4835295081138611, + "learning_rate": 1.6101596115971923e-05, + "loss": 0.5773, + "step": 21276 + }, + { + "epoch": 0.5842119714442614, + "grad_norm": 0.40472400188446045, + "learning_rate": 1.6101253930412142e-05, + "loss": 0.5918, + "step": 21277 + }, + { + "epoch": 0.584239428885228, + "grad_norm": 0.3882187604904175, + "learning_rate": 1.6100911733471583e-05, + "loss": 0.5431, + "step": 21278 + }, + { + "epoch": 0.5842668863261944, + "grad_norm": 0.3852077126502991, + "learning_rate": 1.610056952515087e-05, + "loss": 0.5184, + "step": 21279 + }, + { + "epoch": 0.5842943437671609, + "grad_norm": 0.3876115083694458, + "learning_rate": 1.610022730545065e-05, + "loss": 0.56, + "step": 21280 + }, + { + "epoch": 0.5843218012081274, + "grad_norm": 0.538373589515686, + "learning_rate": 1.609988507437156e-05, + "loss": 0.48, + "step": 21281 + }, + { + "epoch": 0.5843492586490939, + "grad_norm": 0.37171968817710876, + "learning_rate": 1.6099542831914235e-05, + "loss": 0.4844, + "step": 21282 + }, + { + "epoch": 0.5843767160900604, + "grad_norm": 0.4479483962059021, + "learning_rate": 1.6099200578079315e-05, + "loss": 0.5048, + "step": 21283 + }, + { + "epoch": 0.5844041735310269, + "grad_norm": 0.3600446879863739, + "learning_rate": 1.6098858312867443e-05, + "loss": 0.57, + "step": 21284 + }, + { + "epoch": 0.5844316309719935, + "grad_norm": 0.361345499753952, + "learning_rate": 1.6098516036279253e-05, + "loss": 0.4852, + "step": 21285 + }, + { + "epoch": 0.5844590884129599, + "grad_norm": 0.4295426607131958, + "learning_rate": 1.609817374831538e-05, + "loss": 0.5091, + "step": 21286 + }, + { + "epoch": 0.5844865458539265, + "grad_norm": 0.4036419689655304, + "learning_rate": 1.609783144897647e-05, + "loss": 0.5548, + "step": 21287 + }, + { + "epoch": 0.5845140032948929, + "grad_norm": 0.4391203820705414, + "learning_rate": 1.609748913826316e-05, + "loss": 0.5319, + "step": 21288 + }, + { + "epoch": 0.5845414607358594, + "grad_norm": 0.3263184130191803, + "learning_rate": 1.6097146816176084e-05, + "loss": 0.4761, + "step": 21289 + }, + { + "epoch": 0.5845689181768259, + "grad_norm": 0.38509148359298706, + "learning_rate": 1.6096804482715883e-05, + "loss": 0.5261, + "step": 21290 + }, + { + "epoch": 0.5845963756177924, + "grad_norm": 0.40050920844078064, + "learning_rate": 1.6096462137883198e-05, + "loss": 0.5568, + "step": 21291 + }, + { + "epoch": 0.5846238330587589, + "grad_norm": 0.3438301086425781, + "learning_rate": 1.6096119781678662e-05, + "loss": 0.5008, + "step": 21292 + }, + { + "epoch": 0.5846512904997254, + "grad_norm": 0.4184826612472534, + "learning_rate": 1.6095777414102922e-05, + "loss": 0.5215, + "step": 21293 + }, + { + "epoch": 0.584678747940692, + "grad_norm": 0.35368236899375916, + "learning_rate": 1.6095435035156605e-05, + "loss": 0.5029, + "step": 21294 + }, + { + "epoch": 0.5847062053816584, + "grad_norm": 0.35445356369018555, + "learning_rate": 1.609509264484036e-05, + "loss": 0.4815, + "step": 21295 + }, + { + "epoch": 0.584733662822625, + "grad_norm": 0.325330525636673, + "learning_rate": 1.6094750243154825e-05, + "loss": 0.4734, + "step": 21296 + }, + { + "epoch": 0.5847611202635914, + "grad_norm": 0.41878825426101685, + "learning_rate": 1.609440783010063e-05, + "loss": 0.5775, + "step": 21297 + }, + { + "epoch": 0.5847885777045579, + "grad_norm": 0.4062322974205017, + "learning_rate": 1.609406540567842e-05, + "loss": 0.5291, + "step": 21298 + }, + { + "epoch": 0.5848160351455244, + "grad_norm": 0.4348547160625458, + "learning_rate": 1.6093722969888834e-05, + "loss": 0.5386, + "step": 21299 + }, + { + "epoch": 0.5848434925864909, + "grad_norm": 0.3933655619621277, + "learning_rate": 1.6093380522732515e-05, + "loss": 0.4556, + "step": 21300 + }, + { + "epoch": 0.5848709500274575, + "grad_norm": 0.3649437725543976, + "learning_rate": 1.609303806421009e-05, + "loss": 0.475, + "step": 21301 + }, + { + "epoch": 0.5848984074684239, + "grad_norm": 0.4191082715988159, + "learning_rate": 1.6092695594322205e-05, + "loss": 0.5191, + "step": 21302 + }, + { + "epoch": 0.5849258649093905, + "grad_norm": 0.31631165742874146, + "learning_rate": 1.60923531130695e-05, + "loss": 0.4017, + "step": 21303 + }, + { + "epoch": 0.5849533223503569, + "grad_norm": 0.3924362361431122, + "learning_rate": 1.6092010620452608e-05, + "loss": 0.4752, + "step": 21304 + }, + { + "epoch": 0.5849807797913235, + "grad_norm": 0.34938833117485046, + "learning_rate": 1.6091668116472176e-05, + "loss": 0.4758, + "step": 21305 + }, + { + "epoch": 0.5850082372322899, + "grad_norm": 0.4159508943557739, + "learning_rate": 1.609132560112884e-05, + "loss": 0.6009, + "step": 21306 + }, + { + "epoch": 0.5850356946732564, + "grad_norm": 0.38312390446662903, + "learning_rate": 1.6090983074423234e-05, + "loss": 0.5503, + "step": 21307 + }, + { + "epoch": 0.585063152114223, + "grad_norm": 0.39639943838119507, + "learning_rate": 1.6090640536356e-05, + "loss": 0.518, + "step": 21308 + }, + { + "epoch": 0.5850906095551894, + "grad_norm": 0.35618704557418823, + "learning_rate": 1.6090297986927783e-05, + "loss": 0.553, + "step": 21309 + }, + { + "epoch": 0.585118066996156, + "grad_norm": 0.39169633388519287, + "learning_rate": 1.6089955426139213e-05, + "loss": 0.4761, + "step": 21310 + }, + { + "epoch": 0.5851455244371224, + "grad_norm": 0.45335105061531067, + "learning_rate": 1.6089612853990932e-05, + "loss": 0.5036, + "step": 21311 + }, + { + "epoch": 0.585172981878089, + "grad_norm": 0.376676470041275, + "learning_rate": 1.608927027048358e-05, + "loss": 0.5324, + "step": 21312 + }, + { + "epoch": 0.5852004393190554, + "grad_norm": 0.370243102312088, + "learning_rate": 1.6088927675617797e-05, + "loss": 0.5073, + "step": 21313 + }, + { + "epoch": 0.585227896760022, + "grad_norm": 0.3771556317806244, + "learning_rate": 1.6088585069394217e-05, + "loss": 0.557, + "step": 21314 + }, + { + "epoch": 0.5852553542009885, + "grad_norm": 0.3795780539512634, + "learning_rate": 1.6088242451813484e-05, + "loss": 0.5261, + "step": 21315 + }, + { + "epoch": 0.585282811641955, + "grad_norm": 0.4022909700870514, + "learning_rate": 1.6087899822876235e-05, + "loss": 0.6022, + "step": 21316 + }, + { + "epoch": 0.5853102690829215, + "grad_norm": 0.40606197714805603, + "learning_rate": 1.6087557182583113e-05, + "loss": 0.545, + "step": 21317 + }, + { + "epoch": 0.5853377265238879, + "grad_norm": 0.3656183183193207, + "learning_rate": 1.6087214530934748e-05, + "loss": 0.4132, + "step": 21318 + }, + { + "epoch": 0.5853651839648545, + "grad_norm": 0.3661345839500427, + "learning_rate": 1.6086871867931792e-05, + "loss": 0.4112, + "step": 21319 + }, + { + "epoch": 0.5853926414058209, + "grad_norm": 0.3930721580982208, + "learning_rate": 1.6086529193574874e-05, + "loss": 0.5798, + "step": 21320 + }, + { + "epoch": 0.5854200988467875, + "grad_norm": 0.4215219020843506, + "learning_rate": 1.6086186507864635e-05, + "loss": 0.5581, + "step": 21321 + }, + { + "epoch": 0.585447556287754, + "grad_norm": 0.39606142044067383, + "learning_rate": 1.6085843810801718e-05, + "loss": 0.4586, + "step": 21322 + }, + { + "epoch": 0.5854750137287205, + "grad_norm": 0.3777654767036438, + "learning_rate": 1.608550110238676e-05, + "loss": 0.5102, + "step": 21323 + }, + { + "epoch": 0.585502471169687, + "grad_norm": 0.4549945592880249, + "learning_rate": 1.60851583826204e-05, + "loss": 0.4922, + "step": 21324 + }, + { + "epoch": 0.5855299286106534, + "grad_norm": 0.3838466703891754, + "learning_rate": 1.6084815651503275e-05, + "loss": 0.499, + "step": 21325 + }, + { + "epoch": 0.58555738605162, + "grad_norm": 0.44715091586112976, + "learning_rate": 1.6084472909036032e-05, + "loss": 0.5304, + "step": 21326 + }, + { + "epoch": 0.5855848434925864, + "grad_norm": 0.5546845197677612, + "learning_rate": 1.6084130155219302e-05, + "loss": 0.431, + "step": 21327 + }, + { + "epoch": 0.585612300933553, + "grad_norm": 0.38248196244239807, + "learning_rate": 1.6083787390053723e-05, + "loss": 0.4349, + "step": 21328 + }, + { + "epoch": 0.5856397583745195, + "grad_norm": 0.3472059965133667, + "learning_rate": 1.6083444613539946e-05, + "loss": 0.5129, + "step": 21329 + }, + { + "epoch": 0.585667215815486, + "grad_norm": 0.4445887506008148, + "learning_rate": 1.60831018256786e-05, + "loss": 0.4564, + "step": 21330 + }, + { + "epoch": 0.5856946732564525, + "grad_norm": 0.33410021662712097, + "learning_rate": 1.6082759026470328e-05, + "loss": 0.4825, + "step": 21331 + }, + { + "epoch": 0.585722130697419, + "grad_norm": 0.39910364151000977, + "learning_rate": 1.6082416215915767e-05, + "loss": 0.5159, + "step": 21332 + }, + { + "epoch": 0.5857495881383855, + "grad_norm": 0.36702319979667664, + "learning_rate": 1.6082073394015562e-05, + "loss": 0.4594, + "step": 21333 + }, + { + "epoch": 0.585777045579352, + "grad_norm": 0.39802059531211853, + "learning_rate": 1.6081730560770344e-05, + "loss": 0.5146, + "step": 21334 + }, + { + "epoch": 0.5858045030203185, + "grad_norm": 0.3603227138519287, + "learning_rate": 1.608138771618076e-05, + "loss": 0.4798, + "step": 21335 + }, + { + "epoch": 0.585831960461285, + "grad_norm": 0.38644322752952576, + "learning_rate": 1.608104486024745e-05, + "loss": 0.5631, + "step": 21336 + }, + { + "epoch": 0.5858594179022515, + "grad_norm": 0.40742355585098267, + "learning_rate": 1.608070199297105e-05, + "loss": 0.5315, + "step": 21337 + }, + { + "epoch": 0.585886875343218, + "grad_norm": 0.4441278278827667, + "learning_rate": 1.6080359114352196e-05, + "loss": 0.5776, + "step": 21338 + }, + { + "epoch": 0.5859143327841845, + "grad_norm": 0.3608386516571045, + "learning_rate": 1.6080016224391536e-05, + "loss": 0.4691, + "step": 21339 + }, + { + "epoch": 0.585941790225151, + "grad_norm": 0.36862191557884216, + "learning_rate": 1.60796733230897e-05, + "loss": 0.5527, + "step": 21340 + }, + { + "epoch": 0.5859692476661175, + "grad_norm": 0.4074125289916992, + "learning_rate": 1.6079330410447337e-05, + "loss": 0.4865, + "step": 21341 + }, + { + "epoch": 0.585996705107084, + "grad_norm": 0.3703363239765167, + "learning_rate": 1.607898748646508e-05, + "loss": 0.471, + "step": 21342 + }, + { + "epoch": 0.5860241625480506, + "grad_norm": 0.3643762171268463, + "learning_rate": 1.6078644551143572e-05, + "loss": 0.4486, + "step": 21343 + }, + { + "epoch": 0.586051619989017, + "grad_norm": 0.40503379702568054, + "learning_rate": 1.6078301604483454e-05, + "loss": 0.5005, + "step": 21344 + }, + { + "epoch": 0.5860790774299836, + "grad_norm": 0.40422913432121277, + "learning_rate": 1.6077958646485364e-05, + "loss": 0.4806, + "step": 21345 + }, + { + "epoch": 0.58610653487095, + "grad_norm": 0.4324236512184143, + "learning_rate": 1.6077615677149937e-05, + "loss": 0.4919, + "step": 21346 + }, + { + "epoch": 0.5861339923119165, + "grad_norm": 0.3854408860206604, + "learning_rate": 1.607727269647782e-05, + "loss": 0.5303, + "step": 21347 + }, + { + "epoch": 0.586161449752883, + "grad_norm": 0.40843522548675537, + "learning_rate": 1.607692970446965e-05, + "loss": 0.4756, + "step": 21348 + }, + { + "epoch": 0.5861889071938495, + "grad_norm": 0.39039647579193115, + "learning_rate": 1.6076586701126063e-05, + "loss": 0.5168, + "step": 21349 + }, + { + "epoch": 0.5862163646348161, + "grad_norm": 0.4569324254989624, + "learning_rate": 1.6076243686447702e-05, + "loss": 0.4485, + "step": 21350 + }, + { + "epoch": 0.5862438220757825, + "grad_norm": 0.3567771911621094, + "learning_rate": 1.607590066043521e-05, + "loss": 0.5208, + "step": 21351 + }, + { + "epoch": 0.5862712795167491, + "grad_norm": 0.4875302016735077, + "learning_rate": 1.6075557623089228e-05, + "loss": 0.514, + "step": 21352 + }, + { + "epoch": 0.5862987369577155, + "grad_norm": 0.47146013379096985, + "learning_rate": 1.607521457441039e-05, + "loss": 0.6466, + "step": 21353 + }, + { + "epoch": 0.586326194398682, + "grad_norm": 0.3471103310585022, + "learning_rate": 1.6074871514399333e-05, + "loss": 0.4798, + "step": 21354 + }, + { + "epoch": 0.5863536518396485, + "grad_norm": 0.5603978037834167, + "learning_rate": 1.6074528443056704e-05, + "loss": 0.5085, + "step": 21355 + }, + { + "epoch": 0.586381109280615, + "grad_norm": 0.39028629660606384, + "learning_rate": 1.6074185360383142e-05, + "loss": 0.5286, + "step": 21356 + }, + { + "epoch": 0.5864085667215816, + "grad_norm": 0.3981369733810425, + "learning_rate": 1.6073842266379285e-05, + "loss": 0.5004, + "step": 21357 + }, + { + "epoch": 0.586436024162548, + "grad_norm": 0.34253403544425964, + "learning_rate": 1.6073499161045773e-05, + "loss": 0.4067, + "step": 21358 + }, + { + "epoch": 0.5864634816035146, + "grad_norm": 0.40743759274482727, + "learning_rate": 1.6073156044383246e-05, + "loss": 0.5668, + "step": 21359 + }, + { + "epoch": 0.586490939044481, + "grad_norm": 0.4002404510974884, + "learning_rate": 1.6072812916392348e-05, + "loss": 0.5749, + "step": 21360 + }, + { + "epoch": 0.5865183964854476, + "grad_norm": 0.3362743556499481, + "learning_rate": 1.6072469777073712e-05, + "loss": 0.4684, + "step": 21361 + }, + { + "epoch": 0.586545853926414, + "grad_norm": 0.4204544723033905, + "learning_rate": 1.6072126626427983e-05, + "loss": 0.5275, + "step": 21362 + }, + { + "epoch": 0.5865733113673806, + "grad_norm": 0.4053398072719574, + "learning_rate": 1.60717834644558e-05, + "loss": 0.5934, + "step": 21363 + }, + { + "epoch": 0.5866007688083471, + "grad_norm": 0.38261502981185913, + "learning_rate": 1.6071440291157804e-05, + "loss": 0.5311, + "step": 21364 + }, + { + "epoch": 0.5866282262493135, + "grad_norm": 0.39222586154937744, + "learning_rate": 1.607109710653463e-05, + "loss": 0.4581, + "step": 21365 + }, + { + "epoch": 0.5866556836902801, + "grad_norm": 0.5829600691795349, + "learning_rate": 1.6070753910586927e-05, + "loss": 0.5955, + "step": 21366 + }, + { + "epoch": 0.5866831411312465, + "grad_norm": 0.3544497787952423, + "learning_rate": 1.6070410703315327e-05, + "loss": 0.4842, + "step": 21367 + }, + { + "epoch": 0.5867105985722131, + "grad_norm": 0.5294165015220642, + "learning_rate": 1.607006748472048e-05, + "loss": 0.4461, + "step": 21368 + }, + { + "epoch": 0.5867380560131795, + "grad_norm": 0.37721970677375793, + "learning_rate": 1.6069724254803013e-05, + "loss": 0.5206, + "step": 21369 + }, + { + "epoch": 0.5867655134541461, + "grad_norm": 0.36376938223838806, + "learning_rate": 1.6069381013563576e-05, + "loss": 0.4579, + "step": 21370 + }, + { + "epoch": 0.5867929708951126, + "grad_norm": 0.7062408328056335, + "learning_rate": 1.6069037761002805e-05, + "loss": 0.4722, + "step": 21371 + }, + { + "epoch": 0.5868204283360791, + "grad_norm": 0.5192480087280273, + "learning_rate": 1.606869449712134e-05, + "loss": 0.5899, + "step": 21372 + }, + { + "epoch": 0.5868478857770456, + "grad_norm": 0.37329164147377014, + "learning_rate": 1.6068351221919826e-05, + "loss": 0.4068, + "step": 21373 + }, + { + "epoch": 0.586875343218012, + "grad_norm": 0.34479978680610657, + "learning_rate": 1.6068007935398902e-05, + "loss": 0.545, + "step": 21374 + }, + { + "epoch": 0.5869028006589786, + "grad_norm": 1.1792371273040771, + "learning_rate": 1.6067664637559203e-05, + "loss": 0.5243, + "step": 21375 + }, + { + "epoch": 0.586930258099945, + "grad_norm": 0.37657466530799866, + "learning_rate": 1.6067321328401374e-05, + "loss": 0.4415, + "step": 21376 + }, + { + "epoch": 0.5869577155409116, + "grad_norm": 0.3800581991672516, + "learning_rate": 1.6066978007926054e-05, + "loss": 0.458, + "step": 21377 + }, + { + "epoch": 0.5869851729818781, + "grad_norm": 0.4102453887462616, + "learning_rate": 1.606663467613388e-05, + "loss": 0.5438, + "step": 21378 + }, + { + "epoch": 0.5870126304228446, + "grad_norm": 0.3837204873561859, + "learning_rate": 1.6066291333025502e-05, + "loss": 0.6015, + "step": 21379 + }, + { + "epoch": 0.5870400878638111, + "grad_norm": 0.4148036539554596, + "learning_rate": 1.6065947978601552e-05, + "loss": 0.5585, + "step": 21380 + }, + { + "epoch": 0.5870675453047776, + "grad_norm": 0.40851548314094543, + "learning_rate": 1.6065604612862674e-05, + "loss": 0.4727, + "step": 21381 + }, + { + "epoch": 0.5870950027457441, + "grad_norm": 0.33512207865715027, + "learning_rate": 1.6065261235809507e-05, + "loss": 0.5369, + "step": 21382 + }, + { + "epoch": 0.5871224601867105, + "grad_norm": 0.40586692094802856, + "learning_rate": 1.606491784744269e-05, + "loss": 0.4835, + "step": 21383 + }, + { + "epoch": 0.5871499176276771, + "grad_norm": 0.48856794834136963, + "learning_rate": 1.6064574447762867e-05, + "loss": 0.5117, + "step": 21384 + }, + { + "epoch": 0.5871773750686436, + "grad_norm": 0.3666112422943115, + "learning_rate": 1.6064231036770675e-05, + "loss": 0.467, + "step": 21385 + }, + { + "epoch": 0.5872048325096101, + "grad_norm": 0.46318596601486206, + "learning_rate": 1.606388761446676e-05, + "loss": 0.578, + "step": 21386 + }, + { + "epoch": 0.5872322899505766, + "grad_norm": 0.3672381639480591, + "learning_rate": 1.606354418085176e-05, + "loss": 0.515, + "step": 21387 + }, + { + "epoch": 0.5872597473915431, + "grad_norm": 0.4746719300746918, + "learning_rate": 1.6063200735926313e-05, + "loss": 0.5309, + "step": 21388 + }, + { + "epoch": 0.5872872048325096, + "grad_norm": 0.4605122208595276, + "learning_rate": 1.6062857279691063e-05, + "loss": 0.5403, + "step": 21389 + }, + { + "epoch": 0.5873146622734761, + "grad_norm": 0.3861536681652069, + "learning_rate": 1.606251381214664e-05, + "loss": 0.5323, + "step": 21390 + }, + { + "epoch": 0.5873421197144426, + "grad_norm": 0.40896034240722656, + "learning_rate": 1.6062170333293704e-05, + "loss": 0.5122, + "step": 21391 + }, + { + "epoch": 0.5873695771554092, + "grad_norm": 0.4081052839756012, + "learning_rate": 1.6061826843132883e-05, + "loss": 0.557, + "step": 21392 + }, + { + "epoch": 0.5873970345963756, + "grad_norm": 0.38191959261894226, + "learning_rate": 1.6061483341664823e-05, + "loss": 0.4803, + "step": 21393 + }, + { + "epoch": 0.5874244920373421, + "grad_norm": 0.38186895847320557, + "learning_rate": 1.6061139828890157e-05, + "loss": 0.4332, + "step": 21394 + }, + { + "epoch": 0.5874519494783086, + "grad_norm": 0.34676143527030945, + "learning_rate": 1.6060796304809532e-05, + "loss": 0.4918, + "step": 21395 + }, + { + "epoch": 0.5874794069192751, + "grad_norm": 0.39021217823028564, + "learning_rate": 1.606045276942359e-05, + "loss": 0.489, + "step": 21396 + }, + { + "epoch": 0.5875068643602416, + "grad_norm": 0.408552348613739, + "learning_rate": 1.6060109222732965e-05, + "loss": 0.5122, + "step": 21397 + }, + { + "epoch": 0.5875343218012081, + "grad_norm": 0.40925681591033936, + "learning_rate": 1.6059765664738307e-05, + "loss": 0.5363, + "step": 21398 + }, + { + "epoch": 0.5875617792421747, + "grad_norm": 0.39733314514160156, + "learning_rate": 1.605942209544025e-05, + "loss": 0.4539, + "step": 21399 + }, + { + "epoch": 0.5875892366831411, + "grad_norm": 0.4093766510486603, + "learning_rate": 1.6059078514839436e-05, + "loss": 0.6354, + "step": 21400 + }, + { + "epoch": 0.5876166941241077, + "grad_norm": 0.3743625581264496, + "learning_rate": 1.6058734922936507e-05, + "loss": 0.4935, + "step": 21401 + }, + { + "epoch": 0.5876441515650741, + "grad_norm": 0.39678749442100525, + "learning_rate": 1.6058391319732106e-05, + "loss": 0.5007, + "step": 21402 + }, + { + "epoch": 0.5876716090060407, + "grad_norm": 0.46316733956336975, + "learning_rate": 1.605804770522687e-05, + "loss": 0.5606, + "step": 21403 + }, + { + "epoch": 0.5876990664470071, + "grad_norm": 0.376273512840271, + "learning_rate": 1.605770407942144e-05, + "loss": 0.5397, + "step": 21404 + }, + { + "epoch": 0.5877265238879736, + "grad_norm": 0.35880690813064575, + "learning_rate": 1.605736044231646e-05, + "loss": 0.4543, + "step": 21405 + }, + { + "epoch": 0.5877539813289402, + "grad_norm": 0.39584028720855713, + "learning_rate": 1.6057016793912566e-05, + "loss": 0.5198, + "step": 21406 + }, + { + "epoch": 0.5877814387699066, + "grad_norm": 0.43890342116355896, + "learning_rate": 1.6056673134210406e-05, + "loss": 0.5413, + "step": 21407 + }, + { + "epoch": 0.5878088962108732, + "grad_norm": 0.37428393959999084, + "learning_rate": 1.6056329463210616e-05, + "loss": 0.5115, + "step": 21408 + }, + { + "epoch": 0.5878363536518396, + "grad_norm": 0.5834339261054993, + "learning_rate": 1.6055985780913842e-05, + "loss": 0.5072, + "step": 21409 + }, + { + "epoch": 0.5878638110928062, + "grad_norm": 0.3962455093860626, + "learning_rate": 1.605564208732072e-05, + "loss": 0.4622, + "step": 21410 + }, + { + "epoch": 0.5878912685337726, + "grad_norm": 0.4025016725063324, + "learning_rate": 1.605529838243189e-05, + "loss": 0.4574, + "step": 21411 + }, + { + "epoch": 0.5879187259747392, + "grad_norm": 0.4272722899913788, + "learning_rate": 1.6054954666247997e-05, + "loss": 0.5129, + "step": 21412 + }, + { + "epoch": 0.5879461834157057, + "grad_norm": 0.5183151960372925, + "learning_rate": 1.6054610938769683e-05, + "loss": 0.541, + "step": 21413 + }, + { + "epoch": 0.5879736408566721, + "grad_norm": 0.49211400747299194, + "learning_rate": 1.6054267199997587e-05, + "loss": 0.4547, + "step": 21414 + }, + { + "epoch": 0.5880010982976387, + "grad_norm": 0.3334355056285858, + "learning_rate": 1.6053923449932347e-05, + "loss": 0.474, + "step": 21415 + }, + { + "epoch": 0.5880285557386051, + "grad_norm": 0.3532576858997345, + "learning_rate": 1.6053579688574612e-05, + "loss": 0.5558, + "step": 21416 + }, + { + "epoch": 0.5880560131795717, + "grad_norm": 0.3742949962615967, + "learning_rate": 1.6053235915925017e-05, + "loss": 0.547, + "step": 21417 + }, + { + "epoch": 0.5880834706205381, + "grad_norm": 0.45399484038352966, + "learning_rate": 1.6052892131984204e-05, + "loss": 0.4703, + "step": 21418 + }, + { + "epoch": 0.5881109280615047, + "grad_norm": 0.35127493739128113, + "learning_rate": 1.605254833675282e-05, + "loss": 0.4683, + "step": 21419 + }, + { + "epoch": 0.5881383855024712, + "grad_norm": 0.4042772650718689, + "learning_rate": 1.6052204530231494e-05, + "loss": 0.5646, + "step": 21420 + }, + { + "epoch": 0.5881658429434377, + "grad_norm": 0.38157814741134644, + "learning_rate": 1.605186071242088e-05, + "loss": 0.4935, + "step": 21421 + }, + { + "epoch": 0.5881933003844042, + "grad_norm": 0.39841532707214355, + "learning_rate": 1.6051516883321615e-05, + "loss": 0.4443, + "step": 21422 + }, + { + "epoch": 0.5882207578253706, + "grad_norm": 0.4648243486881256, + "learning_rate": 1.6051173042934334e-05, + "loss": 0.3841, + "step": 21423 + }, + { + "epoch": 0.5882482152663372, + "grad_norm": 0.37332043051719666, + "learning_rate": 1.6050829191259685e-05, + "loss": 0.486, + "step": 21424 + }, + { + "epoch": 0.5882756727073036, + "grad_norm": 0.6334229111671448, + "learning_rate": 1.6050485328298316e-05, + "loss": 0.4628, + "step": 21425 + }, + { + "epoch": 0.5883031301482702, + "grad_norm": 0.35021087527275085, + "learning_rate": 1.6050141454050852e-05, + "loss": 0.4499, + "step": 21426 + }, + { + "epoch": 0.5883305875892367, + "grad_norm": 0.45326828956604004, + "learning_rate": 1.604979756851795e-05, + "loss": 0.6065, + "step": 21427 + }, + { + "epoch": 0.5883580450302032, + "grad_norm": 0.389594703912735, + "learning_rate": 1.604945367170024e-05, + "loss": 0.5957, + "step": 21428 + }, + { + "epoch": 0.5883855024711697, + "grad_norm": 0.45376911759376526, + "learning_rate": 1.604910976359837e-05, + "loss": 0.4788, + "step": 21429 + }, + { + "epoch": 0.5884129599121362, + "grad_norm": 0.37075939774513245, + "learning_rate": 1.604876584421298e-05, + "loss": 0.5301, + "step": 21430 + }, + { + "epoch": 0.5884404173531027, + "grad_norm": 0.3375810384750366, + "learning_rate": 1.604842191354471e-05, + "loss": 0.5108, + "step": 21431 + }, + { + "epoch": 0.5884678747940691, + "grad_norm": 0.4213574230670929, + "learning_rate": 1.6048077971594204e-05, + "loss": 0.5315, + "step": 21432 + }, + { + "epoch": 0.5884953322350357, + "grad_norm": 0.3640591502189636, + "learning_rate": 1.60477340183621e-05, + "loss": 0.468, + "step": 21433 + }, + { + "epoch": 0.5885227896760022, + "grad_norm": 0.3819221258163452, + "learning_rate": 1.6047390053849043e-05, + "loss": 0.512, + "step": 21434 + }, + { + "epoch": 0.5885502471169687, + "grad_norm": 0.39641863107681274, + "learning_rate": 1.6047046078055675e-05, + "loss": 0.5614, + "step": 21435 + }, + { + "epoch": 0.5885777045579352, + "grad_norm": 0.36107152700424194, + "learning_rate": 1.6046702090982633e-05, + "loss": 0.47, + "step": 21436 + }, + { + "epoch": 0.5886051619989017, + "grad_norm": 0.38981881737709045, + "learning_rate": 1.6046358092630565e-05, + "loss": 0.4093, + "step": 21437 + }, + { + "epoch": 0.5886326194398682, + "grad_norm": 0.33813029527664185, + "learning_rate": 1.604601408300011e-05, + "loss": 0.4396, + "step": 21438 + }, + { + "epoch": 0.5886600768808347, + "grad_norm": 0.387286901473999, + "learning_rate": 1.6045670062091905e-05, + "loss": 0.4788, + "step": 21439 + }, + { + "epoch": 0.5886875343218012, + "grad_norm": 0.36994507908821106, + "learning_rate": 1.6045326029906597e-05, + "loss": 0.4955, + "step": 21440 + }, + { + "epoch": 0.5887149917627678, + "grad_norm": 0.3722115159034729, + "learning_rate": 1.6044981986444827e-05, + "loss": 0.5386, + "step": 21441 + }, + { + "epoch": 0.5887424492037342, + "grad_norm": 0.38520726561546326, + "learning_rate": 1.604463793170724e-05, + "loss": 0.4647, + "step": 21442 + }, + { + "epoch": 0.5887699066447007, + "grad_norm": 0.40866199135780334, + "learning_rate": 1.6044293865694468e-05, + "loss": 0.4968, + "step": 21443 + }, + { + "epoch": 0.5887973640856672, + "grad_norm": 0.4030528962612152, + "learning_rate": 1.6043949788407162e-05, + "loss": 0.5098, + "step": 21444 + }, + { + "epoch": 0.5888248215266337, + "grad_norm": 0.4165848195552826, + "learning_rate": 1.604360569984596e-05, + "loss": 0.5729, + "step": 21445 + }, + { + "epoch": 0.5888522789676002, + "grad_norm": 0.4657222330570221, + "learning_rate": 1.60432616000115e-05, + "loss": 0.6059, + "step": 21446 + }, + { + "epoch": 0.5888797364085667, + "grad_norm": 0.3711622357368469, + "learning_rate": 1.6042917488904437e-05, + "loss": 0.4855, + "step": 21447 + }, + { + "epoch": 0.5889071938495333, + "grad_norm": 0.42531174421310425, + "learning_rate": 1.6042573366525397e-05, + "loss": 0.5504, + "step": 21448 + }, + { + "epoch": 0.5889346512904997, + "grad_norm": 0.3611631691455841, + "learning_rate": 1.6042229232875035e-05, + "loss": 0.4888, + "step": 21449 + }, + { + "epoch": 0.5889621087314663, + "grad_norm": 0.46158668398857117, + "learning_rate": 1.6041885087953986e-05, + "loss": 0.5834, + "step": 21450 + }, + { + "epoch": 0.5889895661724327, + "grad_norm": 0.4538050889968872, + "learning_rate": 1.6041540931762888e-05, + "loss": 0.5136, + "step": 21451 + }, + { + "epoch": 0.5890170236133992, + "grad_norm": 0.36424383521080017, + "learning_rate": 1.6041196764302393e-05, + "loss": 0.4497, + "step": 21452 + }, + { + "epoch": 0.5890444810543657, + "grad_norm": 0.3888211250305176, + "learning_rate": 1.6040852585573134e-05, + "loss": 0.6155, + "step": 21453 + }, + { + "epoch": 0.5890719384953322, + "grad_norm": 0.40302661061286926, + "learning_rate": 1.604050839557576e-05, + "loss": 0.501, + "step": 21454 + }, + { + "epoch": 0.5890993959362988, + "grad_norm": 0.47300952672958374, + "learning_rate": 1.604016419431091e-05, + "loss": 0.5313, + "step": 21455 + }, + { + "epoch": 0.5891268533772652, + "grad_norm": 0.3970646560192108, + "learning_rate": 1.6039819981779223e-05, + "loss": 0.488, + "step": 21456 + }, + { + "epoch": 0.5891543108182318, + "grad_norm": 0.4187946617603302, + "learning_rate": 1.6039475757981347e-05, + "loss": 0.5043, + "step": 21457 + }, + { + "epoch": 0.5891817682591982, + "grad_norm": 0.3966483175754547, + "learning_rate": 1.603913152291792e-05, + "loss": 0.4781, + "step": 21458 + }, + { + "epoch": 0.5892092257001648, + "grad_norm": 0.35040342807769775, + "learning_rate": 1.6038787276589584e-05, + "loss": 0.4795, + "step": 21459 + }, + { + "epoch": 0.5892366831411312, + "grad_norm": 0.4027577042579651, + "learning_rate": 1.6038443018996984e-05, + "loss": 0.5046, + "step": 21460 + }, + { + "epoch": 0.5892641405820978, + "grad_norm": 0.3712054491043091, + "learning_rate": 1.603809875014076e-05, + "loss": 0.5265, + "step": 21461 + }, + { + "epoch": 0.5892915980230643, + "grad_norm": 0.35834985971450806, + "learning_rate": 1.6037754470021556e-05, + "loss": 0.5276, + "step": 21462 + }, + { + "epoch": 0.5893190554640307, + "grad_norm": 0.3905424475669861, + "learning_rate": 1.6037410178640008e-05, + "loss": 0.4388, + "step": 21463 + }, + { + "epoch": 0.5893465129049973, + "grad_norm": 0.4908965229988098, + "learning_rate": 1.6037065875996768e-05, + "loss": 0.5376, + "step": 21464 + }, + { + "epoch": 0.5893739703459637, + "grad_norm": 0.364498496055603, + "learning_rate": 1.6036721562092476e-05, + "loss": 0.5324, + "step": 21465 + }, + { + "epoch": 0.5894014277869303, + "grad_norm": 0.3806611895561218, + "learning_rate": 1.6036377236927765e-05, + "loss": 0.4883, + "step": 21466 + }, + { + "epoch": 0.5894288852278967, + "grad_norm": 0.36859938502311707, + "learning_rate": 1.6036032900503286e-05, + "loss": 0.4271, + "step": 21467 + }, + { + "epoch": 0.5894563426688633, + "grad_norm": 0.4003593325614929, + "learning_rate": 1.6035688552819682e-05, + "loss": 0.5073, + "step": 21468 + }, + { + "epoch": 0.5894838001098298, + "grad_norm": 0.38554590940475464, + "learning_rate": 1.603534419387759e-05, + "loss": 0.5796, + "step": 21469 + }, + { + "epoch": 0.5895112575507963, + "grad_norm": 0.4106822609901428, + "learning_rate": 1.6034999823677652e-05, + "loss": 0.5042, + "step": 21470 + }, + { + "epoch": 0.5895387149917628, + "grad_norm": 0.36980104446411133, + "learning_rate": 1.603465544222052e-05, + "loss": 0.4941, + "step": 21471 + }, + { + "epoch": 0.5895661724327292, + "grad_norm": 0.4236767292022705, + "learning_rate": 1.6034311049506823e-05, + "loss": 0.5856, + "step": 21472 + }, + { + "epoch": 0.5895936298736958, + "grad_norm": 0.36444568634033203, + "learning_rate": 1.6033966645537212e-05, + "loss": 0.481, + "step": 21473 + }, + { + "epoch": 0.5896210873146622, + "grad_norm": 0.38661208748817444, + "learning_rate": 1.603362223031233e-05, + "loss": 0.5416, + "step": 21474 + }, + { + "epoch": 0.5896485447556288, + "grad_norm": 0.368897020816803, + "learning_rate": 1.6033277803832815e-05, + "loss": 0.5092, + "step": 21475 + }, + { + "epoch": 0.5896760021965953, + "grad_norm": 0.34310945868492126, + "learning_rate": 1.603293336609931e-05, + "loss": 0.4264, + "step": 21476 + }, + { + "epoch": 0.5897034596375618, + "grad_norm": 0.35825175046920776, + "learning_rate": 1.6032588917112462e-05, + "loss": 0.5813, + "step": 21477 + }, + { + "epoch": 0.5897309170785283, + "grad_norm": 0.41216251254081726, + "learning_rate": 1.6032244456872908e-05, + "loss": 0.4841, + "step": 21478 + }, + { + "epoch": 0.5897583745194948, + "grad_norm": 0.3895282447338104, + "learning_rate": 1.603189998538129e-05, + "loss": 0.543, + "step": 21479 + }, + { + "epoch": 0.5897858319604613, + "grad_norm": 0.32734933495521545, + "learning_rate": 1.603155550263826e-05, + "loss": 0.4438, + "step": 21480 + }, + { + "epoch": 0.5898132894014277, + "grad_norm": 0.4271828830242157, + "learning_rate": 1.603121100864445e-05, + "loss": 0.5088, + "step": 21481 + }, + { + "epoch": 0.5898407468423943, + "grad_norm": 0.47016677260398865, + "learning_rate": 1.6030866503400506e-05, + "loss": 0.5697, + "step": 21482 + }, + { + "epoch": 0.5898682042833608, + "grad_norm": 0.43538898229599, + "learning_rate": 1.603052198690707e-05, + "loss": 0.5391, + "step": 21483 + }, + { + "epoch": 0.5898956617243273, + "grad_norm": 0.3630640506744385, + "learning_rate": 1.6030177459164792e-05, + "loss": 0.4457, + "step": 21484 + }, + { + "epoch": 0.5899231191652938, + "grad_norm": 0.35015639662742615, + "learning_rate": 1.6029832920174304e-05, + "loss": 0.4808, + "step": 21485 + }, + { + "epoch": 0.5899505766062603, + "grad_norm": 0.43256109952926636, + "learning_rate": 1.6029488369936253e-05, + "loss": 0.5233, + "step": 21486 + }, + { + "epoch": 0.5899780340472268, + "grad_norm": 0.4134038984775543, + "learning_rate": 1.602914380845128e-05, + "loss": 0.4962, + "step": 21487 + }, + { + "epoch": 0.5900054914881933, + "grad_norm": 0.35060372948646545, + "learning_rate": 1.6028799235720035e-05, + "loss": 0.5578, + "step": 21488 + }, + { + "epoch": 0.5900329489291598, + "grad_norm": 0.3724535405635834, + "learning_rate": 1.602845465174315e-05, + "loss": 0.4805, + "step": 21489 + }, + { + "epoch": 0.5900604063701264, + "grad_norm": 0.4152035415172577, + "learning_rate": 1.6028110056521276e-05, + "loss": 0.6234, + "step": 21490 + }, + { + "epoch": 0.5900878638110928, + "grad_norm": 0.3733558654785156, + "learning_rate": 1.6027765450055053e-05, + "loss": 0.5526, + "step": 21491 + }, + { + "epoch": 0.5901153212520593, + "grad_norm": 0.3646096885204315, + "learning_rate": 1.602742083234512e-05, + "loss": 0.4685, + "step": 21492 + }, + { + "epoch": 0.5901427786930258, + "grad_norm": 0.35129112005233765, + "learning_rate": 1.602707620339213e-05, + "loss": 0.4139, + "step": 21493 + }, + { + "epoch": 0.5901702361339923, + "grad_norm": 0.4203624427318573, + "learning_rate": 1.6026731563196715e-05, + "loss": 0.4642, + "step": 21494 + }, + { + "epoch": 0.5901976935749588, + "grad_norm": 0.40360310673713684, + "learning_rate": 1.602638691175952e-05, + "loss": 0.4943, + "step": 21495 + }, + { + "epoch": 0.5902251510159253, + "grad_norm": 0.44878822565078735, + "learning_rate": 1.6026042249081192e-05, + "loss": 0.5329, + "step": 21496 + }, + { + "epoch": 0.5902526084568919, + "grad_norm": 0.6131880283355713, + "learning_rate": 1.6025697575162374e-05, + "loss": 0.4516, + "step": 21497 + }, + { + "epoch": 0.5902800658978583, + "grad_norm": 0.3740101158618927, + "learning_rate": 1.6025352890003704e-05, + "loss": 0.4986, + "step": 21498 + }, + { + "epoch": 0.5903075233388249, + "grad_norm": 0.3936583697795868, + "learning_rate": 1.6025008193605828e-05, + "loss": 0.5968, + "step": 21499 + }, + { + "epoch": 0.5903349807797913, + "grad_norm": 0.3740116059780121, + "learning_rate": 1.6024663485969393e-05, + "loss": 0.4489, + "step": 21500 + }, + { + "epoch": 0.5903624382207578, + "grad_norm": 0.3718230724334717, + "learning_rate": 1.602431876709503e-05, + "loss": 0.4685, + "step": 21501 + }, + { + "epoch": 0.5903898956617243, + "grad_norm": 0.35741108655929565, + "learning_rate": 1.6023974036983396e-05, + "loss": 0.482, + "step": 21502 + }, + { + "epoch": 0.5904173531026908, + "grad_norm": 0.36427491903305054, + "learning_rate": 1.6023629295635125e-05, + "loss": 0.5079, + "step": 21503 + }, + { + "epoch": 0.5904448105436574, + "grad_norm": 0.38975247740745544, + "learning_rate": 1.6023284543050863e-05, + "loss": 0.5496, + "step": 21504 + }, + { + "epoch": 0.5904722679846238, + "grad_norm": 0.36991143226623535, + "learning_rate": 1.6022939779231255e-05, + "loss": 0.478, + "step": 21505 + }, + { + "epoch": 0.5904997254255904, + "grad_norm": 0.3664740324020386, + "learning_rate": 1.602259500417694e-05, + "loss": 0.5261, + "step": 21506 + }, + { + "epoch": 0.5905271828665568, + "grad_norm": 0.35924646258354187, + "learning_rate": 1.6022250217888564e-05, + "loss": 0.4601, + "step": 21507 + }, + { + "epoch": 0.5905546403075234, + "grad_norm": 0.39521804451942444, + "learning_rate": 1.6021905420366768e-05, + "loss": 0.5764, + "step": 21508 + }, + { + "epoch": 0.5905820977484898, + "grad_norm": 0.33987957239151, + "learning_rate": 1.6021560611612198e-05, + "loss": 0.4233, + "step": 21509 + }, + { + "epoch": 0.5906095551894563, + "grad_norm": 0.36575350165367126, + "learning_rate": 1.6021215791625496e-05, + "loss": 0.5012, + "step": 21510 + }, + { + "epoch": 0.5906370126304229, + "grad_norm": 0.41507822275161743, + "learning_rate": 1.6020870960407303e-05, + "loss": 0.5692, + "step": 21511 + }, + { + "epoch": 0.5906644700713893, + "grad_norm": 0.3615175485610962, + "learning_rate": 1.6020526117958267e-05, + "loss": 0.467, + "step": 21512 + }, + { + "epoch": 0.5906919275123559, + "grad_norm": 0.4112790822982788, + "learning_rate": 1.6020181264279024e-05, + "loss": 0.5699, + "step": 21513 + }, + { + "epoch": 0.5907193849533223, + "grad_norm": 0.361366868019104, + "learning_rate": 1.6019836399370224e-05, + "loss": 0.5469, + "step": 21514 + }, + { + "epoch": 0.5907468423942889, + "grad_norm": 0.3912656009197235, + "learning_rate": 1.601949152323251e-05, + "loss": 0.6503, + "step": 21515 + }, + { + "epoch": 0.5907742998352553, + "grad_norm": 0.3618023693561554, + "learning_rate": 1.601914663586652e-05, + "loss": 0.5005, + "step": 21516 + }, + { + "epoch": 0.5908017572762219, + "grad_norm": 0.3914335370063782, + "learning_rate": 1.60188017372729e-05, + "loss": 0.5403, + "step": 21517 + }, + { + "epoch": 0.5908292147171884, + "grad_norm": 0.3110455870628357, + "learning_rate": 1.6018456827452292e-05, + "loss": 0.4398, + "step": 21518 + }, + { + "epoch": 0.5908566721581548, + "grad_norm": 0.4432625472545624, + "learning_rate": 1.6018111906405344e-05, + "loss": 0.449, + "step": 21519 + }, + { + "epoch": 0.5908841295991214, + "grad_norm": 0.3686738610267639, + "learning_rate": 1.60177669741327e-05, + "loss": 0.4834, + "step": 21520 + }, + { + "epoch": 0.5909115870400878, + "grad_norm": 0.4385509192943573, + "learning_rate": 1.6017422030634993e-05, + "loss": 0.4474, + "step": 21521 + }, + { + "epoch": 0.5909390444810544, + "grad_norm": 0.42595386505126953, + "learning_rate": 1.6017077075912878e-05, + "loss": 0.5339, + "step": 21522 + }, + { + "epoch": 0.5909665019220208, + "grad_norm": 0.37508144974708557, + "learning_rate": 1.6016732109966994e-05, + "loss": 0.5021, + "step": 21523 + }, + { + "epoch": 0.5909939593629874, + "grad_norm": 0.38849055767059326, + "learning_rate": 1.601638713279798e-05, + "loss": 0.5387, + "step": 21524 + }, + { + "epoch": 0.5910214168039539, + "grad_norm": 0.38650548458099365, + "learning_rate": 1.601604214440649e-05, + "loss": 0.5552, + "step": 21525 + }, + { + "epoch": 0.5910488742449204, + "grad_norm": 0.3708053529262543, + "learning_rate": 1.601569714479316e-05, + "loss": 0.5473, + "step": 21526 + }, + { + "epoch": 0.5910763316858869, + "grad_norm": 0.36492595076560974, + "learning_rate": 1.601535213395863e-05, + "loss": 0.4627, + "step": 21527 + }, + { + "epoch": 0.5911037891268534, + "grad_norm": 0.5813947916030884, + "learning_rate": 1.601500711190355e-05, + "loss": 0.4946, + "step": 21528 + }, + { + "epoch": 0.5911312465678199, + "grad_norm": 0.3584710657596588, + "learning_rate": 1.601466207862856e-05, + "loss": 0.4826, + "step": 21529 + }, + { + "epoch": 0.5911587040087863, + "grad_norm": 0.38181042671203613, + "learning_rate": 1.6014317034134313e-05, + "loss": 0.483, + "step": 21530 + }, + { + "epoch": 0.5911861614497529, + "grad_norm": 0.3726422190666199, + "learning_rate": 1.6013971978421442e-05, + "loss": 0.5159, + "step": 21531 + }, + { + "epoch": 0.5912136188907194, + "grad_norm": 0.5162054300308228, + "learning_rate": 1.601362691149059e-05, + "loss": 0.4941, + "step": 21532 + }, + { + "epoch": 0.5912410763316859, + "grad_norm": 0.3670933246612549, + "learning_rate": 1.6013281833342407e-05, + "loss": 0.4864, + "step": 21533 + }, + { + "epoch": 0.5912685337726524, + "grad_norm": 5.97435998916626, + "learning_rate": 1.6012936743977534e-05, + "loss": 0.5026, + "step": 21534 + }, + { + "epoch": 0.5912959912136189, + "grad_norm": 0.5258584022521973, + "learning_rate": 1.6012591643396616e-05, + "loss": 0.5275, + "step": 21535 + }, + { + "epoch": 0.5913234486545854, + "grad_norm": 0.45063483715057373, + "learning_rate": 1.6012246531600295e-05, + "loss": 0.5448, + "step": 21536 + }, + { + "epoch": 0.5913509060955519, + "grad_norm": 0.3921394646167755, + "learning_rate": 1.6011901408589213e-05, + "loss": 0.4255, + "step": 21537 + }, + { + "epoch": 0.5913783635365184, + "grad_norm": 0.4393773674964905, + "learning_rate": 1.601155627436402e-05, + "loss": 0.491, + "step": 21538 + }, + { + "epoch": 0.591405820977485, + "grad_norm": 0.3587026000022888, + "learning_rate": 1.6011211128925354e-05, + "loss": 0.5282, + "step": 21539 + }, + { + "epoch": 0.5914332784184514, + "grad_norm": 0.3885626792907715, + "learning_rate": 1.6010865972273864e-05, + "loss": 0.5379, + "step": 21540 + }, + { + "epoch": 0.5914607358594179, + "grad_norm": 0.39426350593566895, + "learning_rate": 1.6010520804410185e-05, + "loss": 0.5796, + "step": 21541 + }, + { + "epoch": 0.5914881933003844, + "grad_norm": 0.3860085904598236, + "learning_rate": 1.6010175625334968e-05, + "loss": 0.44, + "step": 21542 + }, + { + "epoch": 0.5915156507413509, + "grad_norm": 0.39274683594703674, + "learning_rate": 1.600983043504886e-05, + "loss": 0.5223, + "step": 21543 + }, + { + "epoch": 0.5915431081823174, + "grad_norm": 0.3777177929878235, + "learning_rate": 1.600948523355249e-05, + "loss": 0.4979, + "step": 21544 + }, + { + "epoch": 0.5915705656232839, + "grad_norm": 0.38170477747917175, + "learning_rate": 1.600914002084652e-05, + "loss": 0.5361, + "step": 21545 + }, + { + "epoch": 0.5915980230642505, + "grad_norm": 0.35656559467315674, + "learning_rate": 1.6008794796931587e-05, + "loss": 0.4649, + "step": 21546 + }, + { + "epoch": 0.5916254805052169, + "grad_norm": 0.335309237241745, + "learning_rate": 1.600844956180833e-05, + "loss": 0.4204, + "step": 21547 + }, + { + "epoch": 0.5916529379461835, + "grad_norm": 0.35161134600639343, + "learning_rate": 1.60081043154774e-05, + "loss": 0.5201, + "step": 21548 + }, + { + "epoch": 0.5916803953871499, + "grad_norm": 0.37710678577423096, + "learning_rate": 1.6007759057939433e-05, + "loss": 0.5502, + "step": 21549 + }, + { + "epoch": 0.5917078528281164, + "grad_norm": 0.46812087297439575, + "learning_rate": 1.6007413789195082e-05, + "loss": 0.5133, + "step": 21550 + }, + { + "epoch": 0.5917353102690829, + "grad_norm": 0.3610036075115204, + "learning_rate": 1.6007068509244984e-05, + "loss": 0.4856, + "step": 21551 + }, + { + "epoch": 0.5917627677100494, + "grad_norm": 0.3454486131668091, + "learning_rate": 1.6006723218089788e-05, + "loss": 0.4375, + "step": 21552 + }, + { + "epoch": 0.591790225151016, + "grad_norm": 0.3636390268802643, + "learning_rate": 1.6006377915730135e-05, + "loss": 0.4883, + "step": 21553 + }, + { + "epoch": 0.5918176825919824, + "grad_norm": 0.4506637156009674, + "learning_rate": 1.6006032602166672e-05, + "loss": 0.5136, + "step": 21554 + }, + { + "epoch": 0.591845140032949, + "grad_norm": 0.3723263740539551, + "learning_rate": 1.600568727740004e-05, + "loss": 0.478, + "step": 21555 + }, + { + "epoch": 0.5918725974739154, + "grad_norm": 0.3520340621471405, + "learning_rate": 1.6005341941430886e-05, + "loss": 0.4243, + "step": 21556 + }, + { + "epoch": 0.591900054914882, + "grad_norm": 0.4556431174278259, + "learning_rate": 1.6004996594259853e-05, + "loss": 0.4822, + "step": 21557 + }, + { + "epoch": 0.5919275123558484, + "grad_norm": 0.39400535821914673, + "learning_rate": 1.600465123588758e-05, + "loss": 0.4746, + "step": 21558 + }, + { + "epoch": 0.591954969796815, + "grad_norm": 0.378370463848114, + "learning_rate": 1.600430586631472e-05, + "loss": 0.4808, + "step": 21559 + }, + { + "epoch": 0.5919824272377814, + "grad_norm": 0.39441776275634766, + "learning_rate": 1.6003960485541913e-05, + "loss": 0.453, + "step": 21560 + }, + { + "epoch": 0.5920098846787479, + "grad_norm": 0.35849425196647644, + "learning_rate": 1.6003615093569803e-05, + "loss": 0.5019, + "step": 21561 + }, + { + "epoch": 0.5920373421197145, + "grad_norm": 0.4814780652523041, + "learning_rate": 1.6003269690399034e-05, + "loss": 0.6412, + "step": 21562 + }, + { + "epoch": 0.5920647995606809, + "grad_norm": 0.36142298579216003, + "learning_rate": 1.600292427603025e-05, + "loss": 0.4978, + "step": 21563 + }, + { + "epoch": 0.5920922570016475, + "grad_norm": 0.404452383518219, + "learning_rate": 1.6002578850464096e-05, + "loss": 0.4992, + "step": 21564 + }, + { + "epoch": 0.5921197144426139, + "grad_norm": 0.3768053650856018, + "learning_rate": 1.600223341370122e-05, + "loss": 0.481, + "step": 21565 + }, + { + "epoch": 0.5921471718835805, + "grad_norm": 0.3810424506664276, + "learning_rate": 1.600188796574226e-05, + "loss": 0.4838, + "step": 21566 + }, + { + "epoch": 0.5921746293245469, + "grad_norm": 0.36002296209335327, + "learning_rate": 1.6001542506587865e-05, + "loss": 0.46, + "step": 21567 + }, + { + "epoch": 0.5922020867655134, + "grad_norm": 0.3910340964794159, + "learning_rate": 1.600119703623868e-05, + "loss": 0.5606, + "step": 21568 + }, + { + "epoch": 0.59222954420648, + "grad_norm": 0.38204288482666016, + "learning_rate": 1.6000851554695342e-05, + "loss": 0.4976, + "step": 21569 + }, + { + "epoch": 0.5922570016474464, + "grad_norm": 0.39516332745552063, + "learning_rate": 1.6000506061958502e-05, + "loss": 0.4923, + "step": 21570 + }, + { + "epoch": 0.592284459088413, + "grad_norm": 0.41485074162483215, + "learning_rate": 1.600016055802881e-05, + "loss": 0.6442, + "step": 21571 + }, + { + "epoch": 0.5923119165293794, + "grad_norm": 0.35284140706062317, + "learning_rate": 1.5999815042906894e-05, + "loss": 0.4311, + "step": 21572 + }, + { + "epoch": 0.592339373970346, + "grad_norm": 0.4001729190349579, + "learning_rate": 1.599946951659341e-05, + "loss": 0.4919, + "step": 21573 + }, + { + "epoch": 0.5923668314113124, + "grad_norm": 0.3734440505504608, + "learning_rate": 1.5999123979089003e-05, + "loss": 0.5007, + "step": 21574 + }, + { + "epoch": 0.592394288852279, + "grad_norm": 0.4502127766609192, + "learning_rate": 1.5998778430394317e-05, + "loss": 0.5889, + "step": 21575 + }, + { + "epoch": 0.5924217462932455, + "grad_norm": 0.4269816279411316, + "learning_rate": 1.599843287050999e-05, + "loss": 0.5137, + "step": 21576 + }, + { + "epoch": 0.592449203734212, + "grad_norm": 0.36465173959732056, + "learning_rate": 1.5998087299436673e-05, + "loss": 0.4906, + "step": 21577 + }, + { + "epoch": 0.5924766611751785, + "grad_norm": 0.35471850633621216, + "learning_rate": 1.5997741717175007e-05, + "loss": 0.4527, + "step": 21578 + }, + { + "epoch": 0.5925041186161449, + "grad_norm": 0.39683225750923157, + "learning_rate": 1.5997396123725645e-05, + "loss": 0.4797, + "step": 21579 + }, + { + "epoch": 0.5925315760571115, + "grad_norm": 0.38048118352890015, + "learning_rate": 1.599705051908922e-05, + "loss": 0.5005, + "step": 21580 + }, + { + "epoch": 0.5925590334980779, + "grad_norm": 0.3964901864528656, + "learning_rate": 1.5996704903266384e-05, + "loss": 0.5324, + "step": 21581 + }, + { + "epoch": 0.5925864909390445, + "grad_norm": 0.3867605924606323, + "learning_rate": 1.5996359276257776e-05, + "loss": 0.5252, + "step": 21582 + }, + { + "epoch": 0.592613948380011, + "grad_norm": 0.3575596511363983, + "learning_rate": 1.5996013638064044e-05, + "loss": 0.5789, + "step": 21583 + }, + { + "epoch": 0.5926414058209775, + "grad_norm": 0.35581186413764954, + "learning_rate": 1.5995667988685838e-05, + "loss": 0.4943, + "step": 21584 + }, + { + "epoch": 0.592668863261944, + "grad_norm": 0.3783428966999054, + "learning_rate": 1.5995322328123792e-05, + "loss": 0.5036, + "step": 21585 + }, + { + "epoch": 0.5926963207029105, + "grad_norm": 0.36729124188423157, + "learning_rate": 1.599497665637856e-05, + "loss": 0.502, + "step": 21586 + }, + { + "epoch": 0.592723778143877, + "grad_norm": 0.43494847416877747, + "learning_rate": 1.599463097345078e-05, + "loss": 0.5367, + "step": 21587 + }, + { + "epoch": 0.5927512355848434, + "grad_norm": 0.37854263186454773, + "learning_rate": 1.5994285279341104e-05, + "loss": 0.5144, + "step": 21588 + }, + { + "epoch": 0.59277869302581, + "grad_norm": 0.42274266481399536, + "learning_rate": 1.599393957405017e-05, + "loss": 0.5796, + "step": 21589 + }, + { + "epoch": 0.5928061504667765, + "grad_norm": 0.37950077652931213, + "learning_rate": 1.599359385757863e-05, + "loss": 0.4837, + "step": 21590 + }, + { + "epoch": 0.592833607907743, + "grad_norm": 0.4015316367149353, + "learning_rate": 1.5993248129927118e-05, + "loss": 0.5578, + "step": 21591 + }, + { + "epoch": 0.5928610653487095, + "grad_norm": 0.39663660526275635, + "learning_rate": 1.5992902391096287e-05, + "loss": 0.5335, + "step": 21592 + }, + { + "epoch": 0.592888522789676, + "grad_norm": 0.35792291164398193, + "learning_rate": 1.5992556641086782e-05, + "loss": 0.4609, + "step": 21593 + }, + { + "epoch": 0.5929159802306425, + "grad_norm": 0.3692243993282318, + "learning_rate": 1.5992210879899245e-05, + "loss": 0.4448, + "step": 21594 + }, + { + "epoch": 0.592943437671609, + "grad_norm": 0.35977593064308167, + "learning_rate": 1.5991865107534325e-05, + "loss": 0.4901, + "step": 21595 + }, + { + "epoch": 0.5929708951125755, + "grad_norm": 0.40563634037971497, + "learning_rate": 1.599151932399266e-05, + "loss": 0.4694, + "step": 21596 + }, + { + "epoch": 0.592998352553542, + "grad_norm": 0.3726326823234558, + "learning_rate": 1.5991173529274903e-05, + "loss": 0.5585, + "step": 21597 + }, + { + "epoch": 0.5930258099945085, + "grad_norm": 0.38107535243034363, + "learning_rate": 1.5990827723381695e-05, + "loss": 0.5229, + "step": 21598 + }, + { + "epoch": 0.593053267435475, + "grad_norm": 0.5190727114677429, + "learning_rate": 1.599048190631368e-05, + "loss": 0.5115, + "step": 21599 + }, + { + "epoch": 0.5930807248764415, + "grad_norm": 0.3553035259246826, + "learning_rate": 1.5990136078071504e-05, + "loss": 0.5475, + "step": 21600 + }, + { + "epoch": 0.593108182317408, + "grad_norm": 0.3884257376194, + "learning_rate": 1.5989790238655812e-05, + "loss": 0.4872, + "step": 21601 + }, + { + "epoch": 0.5931356397583745, + "grad_norm": 0.3829786479473114, + "learning_rate": 1.598944438806725e-05, + "loss": 0.4614, + "step": 21602 + }, + { + "epoch": 0.593163097199341, + "grad_norm": 0.7189985513687134, + "learning_rate": 1.5989098526306463e-05, + "loss": 0.6368, + "step": 21603 + }, + { + "epoch": 0.5931905546403076, + "grad_norm": 0.3992803394794464, + "learning_rate": 1.5988752653374094e-05, + "loss": 0.5109, + "step": 21604 + }, + { + "epoch": 0.593218012081274, + "grad_norm": 0.3386070728302002, + "learning_rate": 1.5988406769270793e-05, + "loss": 0.4587, + "step": 21605 + }, + { + "epoch": 0.5932454695222406, + "grad_norm": 0.391655296087265, + "learning_rate": 1.5988060873997202e-05, + "loss": 0.455, + "step": 21606 + }, + { + "epoch": 0.593272926963207, + "grad_norm": 0.3804088830947876, + "learning_rate": 1.5987714967553963e-05, + "loss": 0.484, + "step": 21607 + }, + { + "epoch": 0.5933003844041735, + "grad_norm": 0.38664335012435913, + "learning_rate": 1.5987369049941727e-05, + "loss": 0.5045, + "step": 21608 + }, + { + "epoch": 0.59332784184514, + "grad_norm": 0.32564425468444824, + "learning_rate": 1.5987023121161135e-05, + "loss": 0.5075, + "step": 21609 + }, + { + "epoch": 0.5933552992861065, + "grad_norm": 0.4623730778694153, + "learning_rate": 1.5986677181212837e-05, + "loss": 0.5123, + "step": 21610 + }, + { + "epoch": 0.5933827567270731, + "grad_norm": 0.408325731754303, + "learning_rate": 1.5986331230097474e-05, + "loss": 0.4994, + "step": 21611 + }, + { + "epoch": 0.5934102141680395, + "grad_norm": 0.37334638833999634, + "learning_rate": 1.5985985267815692e-05, + "loss": 0.4807, + "step": 21612 + }, + { + "epoch": 0.5934376716090061, + "grad_norm": 0.3533439040184021, + "learning_rate": 1.5985639294368135e-05, + "loss": 0.4868, + "step": 21613 + }, + { + "epoch": 0.5934651290499725, + "grad_norm": 0.3895469307899475, + "learning_rate": 1.5985293309755455e-05, + "loss": 0.432, + "step": 21614 + }, + { + "epoch": 0.5934925864909391, + "grad_norm": 0.41556233167648315, + "learning_rate": 1.5984947313978287e-05, + "loss": 0.5425, + "step": 21615 + }, + { + "epoch": 0.5935200439319055, + "grad_norm": 0.35045745968818665, + "learning_rate": 1.5984601307037286e-05, + "loss": 0.482, + "step": 21616 + }, + { + "epoch": 0.593547501372872, + "grad_norm": 0.3281785249710083, + "learning_rate": 1.598425528893309e-05, + "loss": 0.4725, + "step": 21617 + }, + { + "epoch": 0.5935749588138386, + "grad_norm": 0.34671032428741455, + "learning_rate": 1.5983909259666352e-05, + "loss": 0.4695, + "step": 21618 + }, + { + "epoch": 0.593602416254805, + "grad_norm": 0.392383873462677, + "learning_rate": 1.5983563219237713e-05, + "loss": 0.3954, + "step": 21619 + }, + { + "epoch": 0.5936298736957716, + "grad_norm": 0.3815106749534607, + "learning_rate": 1.5983217167647817e-05, + "loss": 0.4266, + "step": 21620 + }, + { + "epoch": 0.593657331136738, + "grad_norm": 0.41963282227516174, + "learning_rate": 1.5982871104897315e-05, + "loss": 0.5144, + "step": 21621 + }, + { + "epoch": 0.5936847885777046, + "grad_norm": 0.34298989176750183, + "learning_rate": 1.5982525030986847e-05, + "loss": 0.5187, + "step": 21622 + }, + { + "epoch": 0.593712246018671, + "grad_norm": 0.3803490698337555, + "learning_rate": 1.5982178945917058e-05, + "loss": 0.5095, + "step": 21623 + }, + { + "epoch": 0.5937397034596376, + "grad_norm": 0.4120052754878998, + "learning_rate": 1.59818328496886e-05, + "loss": 0.5646, + "step": 21624 + }, + { + "epoch": 0.5937671609006041, + "grad_norm": 0.3682212829589844, + "learning_rate": 1.5981486742302112e-05, + "loss": 0.4365, + "step": 21625 + }, + { + "epoch": 0.5937946183415705, + "grad_norm": 0.39813438057899475, + "learning_rate": 1.5981140623758242e-05, + "loss": 0.5035, + "step": 21626 + }, + { + "epoch": 0.5938220757825371, + "grad_norm": 0.3904571235179901, + "learning_rate": 1.5980794494057633e-05, + "loss": 0.5018, + "step": 21627 + }, + { + "epoch": 0.5938495332235035, + "grad_norm": 0.4379083812236786, + "learning_rate": 1.5980448353200936e-05, + "loss": 0.4658, + "step": 21628 + }, + { + "epoch": 0.5938769906644701, + "grad_norm": 0.49432632327079773, + "learning_rate": 1.5980102201188796e-05, + "loss": 0.5696, + "step": 21629 + }, + { + "epoch": 0.5939044481054365, + "grad_norm": 0.35707736015319824, + "learning_rate": 1.5979756038021854e-05, + "loss": 0.4563, + "step": 21630 + }, + { + "epoch": 0.5939319055464031, + "grad_norm": 0.35836562514305115, + "learning_rate": 1.597940986370076e-05, + "loss": 0.4013, + "step": 21631 + }, + { + "epoch": 0.5939593629873696, + "grad_norm": 0.3517371416091919, + "learning_rate": 1.5979063678226155e-05, + "loss": 0.4871, + "step": 21632 + }, + { + "epoch": 0.5939868204283361, + "grad_norm": 0.36278876662254333, + "learning_rate": 1.5978717481598695e-05, + "loss": 0.5251, + "step": 21633 + }, + { + "epoch": 0.5940142778693026, + "grad_norm": 0.38015633821487427, + "learning_rate": 1.5978371273819013e-05, + "loss": 0.5492, + "step": 21634 + }, + { + "epoch": 0.594041735310269, + "grad_norm": 0.41710972785949707, + "learning_rate": 1.5978025054887762e-05, + "loss": 0.4936, + "step": 21635 + }, + { + "epoch": 0.5940691927512356, + "grad_norm": 0.3829708397388458, + "learning_rate": 1.5977678824805587e-05, + "loss": 0.5297, + "step": 21636 + }, + { + "epoch": 0.594096650192202, + "grad_norm": 0.3414422571659088, + "learning_rate": 1.5977332583573132e-05, + "loss": 0.4723, + "step": 21637 + }, + { + "epoch": 0.5941241076331686, + "grad_norm": 0.4014362096786499, + "learning_rate": 1.5976986331191046e-05, + "loss": 0.4834, + "step": 21638 + }, + { + "epoch": 0.5941515650741351, + "grad_norm": 0.4152536988258362, + "learning_rate": 1.5976640067659974e-05, + "loss": 0.5275, + "step": 21639 + }, + { + "epoch": 0.5941790225151016, + "grad_norm": 0.4433457851409912, + "learning_rate": 1.5976293792980555e-05, + "loss": 0.533, + "step": 21640 + }, + { + "epoch": 0.5942064799560681, + "grad_norm": 0.3586195707321167, + "learning_rate": 1.597594750715344e-05, + "loss": 0.5076, + "step": 21641 + }, + { + "epoch": 0.5942339373970346, + "grad_norm": 0.35300853848457336, + "learning_rate": 1.5975601210179282e-05, + "loss": 0.4884, + "step": 21642 + }, + { + "epoch": 0.5942613948380011, + "grad_norm": 0.3989848494529724, + "learning_rate": 1.5975254902058718e-05, + "loss": 0.4313, + "step": 21643 + }, + { + "epoch": 0.5942888522789675, + "grad_norm": 0.32974159717559814, + "learning_rate": 1.59749085827924e-05, + "loss": 0.422, + "step": 21644 + }, + { + "epoch": 0.5943163097199341, + "grad_norm": 0.3740629553794861, + "learning_rate": 1.5974562252380965e-05, + "loss": 0.4768, + "step": 21645 + }, + { + "epoch": 0.5943437671609006, + "grad_norm": 0.4019325375556946, + "learning_rate": 1.5974215910825067e-05, + "loss": 0.5706, + "step": 21646 + }, + { + "epoch": 0.5943712246018671, + "grad_norm": 0.37259814143180847, + "learning_rate": 1.597386955812535e-05, + "loss": 0.4832, + "step": 21647 + }, + { + "epoch": 0.5943986820428336, + "grad_norm": 0.4027015268802643, + "learning_rate": 1.5973523194282458e-05, + "loss": 0.5674, + "step": 21648 + }, + { + "epoch": 0.5944261394838001, + "grad_norm": 0.37741154432296753, + "learning_rate": 1.597317681929704e-05, + "loss": 0.5419, + "step": 21649 + }, + { + "epoch": 0.5944535969247666, + "grad_norm": 0.38168174028396606, + "learning_rate": 1.5972830433169738e-05, + "loss": 0.4251, + "step": 21650 + }, + { + "epoch": 0.5944810543657331, + "grad_norm": 0.4446272552013397, + "learning_rate": 1.5972484035901203e-05, + "loss": 0.6141, + "step": 21651 + }, + { + "epoch": 0.5945085118066996, + "grad_norm": 0.39434128999710083, + "learning_rate": 1.597213762749208e-05, + "loss": 0.6002, + "step": 21652 + }, + { + "epoch": 0.5945359692476662, + "grad_norm": 0.359584778547287, + "learning_rate": 1.597179120794301e-05, + "loss": 0.4625, + "step": 21653 + }, + { + "epoch": 0.5945634266886326, + "grad_norm": 0.40658998489379883, + "learning_rate": 1.5971444777254652e-05, + "loss": 0.512, + "step": 21654 + }, + { + "epoch": 0.5945908841295992, + "grad_norm": 0.3999905586242676, + "learning_rate": 1.597109833542764e-05, + "loss": 0.5061, + "step": 21655 + }, + { + "epoch": 0.5946183415705656, + "grad_norm": 0.4103797972202301, + "learning_rate": 1.597075188246262e-05, + "loss": 0.4992, + "step": 21656 + }, + { + "epoch": 0.5946457990115321, + "grad_norm": 0.33789241313934326, + "learning_rate": 1.5970405418360244e-05, + "loss": 0.4857, + "step": 21657 + }, + { + "epoch": 0.5946732564524986, + "grad_norm": 0.36388295888900757, + "learning_rate": 1.5970058943121158e-05, + "loss": 0.504, + "step": 21658 + }, + { + "epoch": 0.5947007138934651, + "grad_norm": 0.3837275207042694, + "learning_rate": 1.5969712456746007e-05, + "loss": 0.5266, + "step": 21659 + }, + { + "epoch": 0.5947281713344317, + "grad_norm": 0.3193417489528656, + "learning_rate": 1.5969365959235435e-05, + "loss": 0.4084, + "step": 21660 + }, + { + "epoch": 0.5947556287753981, + "grad_norm": 0.3747400641441345, + "learning_rate": 1.5969019450590087e-05, + "loss": 0.5207, + "step": 21661 + }, + { + "epoch": 0.5947830862163647, + "grad_norm": 0.3875921964645386, + "learning_rate": 1.5968672930810616e-05, + "loss": 0.5604, + "step": 21662 + }, + { + "epoch": 0.5948105436573311, + "grad_norm": 0.3370065689086914, + "learning_rate": 1.5968326399897667e-05, + "loss": 0.4235, + "step": 21663 + }, + { + "epoch": 0.5948380010982977, + "grad_norm": 0.39944177865982056, + "learning_rate": 1.5967979857851882e-05, + "loss": 0.4552, + "step": 21664 + }, + { + "epoch": 0.5948654585392641, + "grad_norm": 0.3652159869670868, + "learning_rate": 1.5967633304673912e-05, + "loss": 0.5363, + "step": 21665 + }, + { + "epoch": 0.5948929159802306, + "grad_norm": 0.3915611207485199, + "learning_rate": 1.59672867403644e-05, + "loss": 0.5432, + "step": 21666 + }, + { + "epoch": 0.5949203734211972, + "grad_norm": 0.42678290605545044, + "learning_rate": 1.5966940164923993e-05, + "loss": 0.4926, + "step": 21667 + }, + { + "epoch": 0.5949478308621636, + "grad_norm": 0.47274208068847656, + "learning_rate": 1.5966593578353342e-05, + "loss": 0.5443, + "step": 21668 + }, + { + "epoch": 0.5949752883031302, + "grad_norm": 0.3597835302352905, + "learning_rate": 1.5966246980653085e-05, + "loss": 0.4601, + "step": 21669 + }, + { + "epoch": 0.5950027457440966, + "grad_norm": 0.37822869420051575, + "learning_rate": 1.5965900371823875e-05, + "loss": 0.4281, + "step": 21670 + }, + { + "epoch": 0.5950302031850632, + "grad_norm": 0.38378146290779114, + "learning_rate": 1.5965553751866356e-05, + "loss": 0.6406, + "step": 21671 + }, + { + "epoch": 0.5950576606260296, + "grad_norm": 0.39794933795928955, + "learning_rate": 1.5965207120781176e-05, + "loss": 0.4885, + "step": 21672 + }, + { + "epoch": 0.5950851180669962, + "grad_norm": 0.3924503028392792, + "learning_rate": 1.596486047856898e-05, + "loss": 0.4998, + "step": 21673 + }, + { + "epoch": 0.5951125755079627, + "grad_norm": 0.3808087706565857, + "learning_rate": 1.5964513825230416e-05, + "loss": 0.5058, + "step": 21674 + }, + { + "epoch": 0.5951400329489291, + "grad_norm": 0.35151273012161255, + "learning_rate": 1.596416716076613e-05, + "loss": 0.4507, + "step": 21675 + }, + { + "epoch": 0.5951674903898957, + "grad_norm": 0.36850792169570923, + "learning_rate": 1.5963820485176765e-05, + "loss": 0.3999, + "step": 21676 + }, + { + "epoch": 0.5951949478308621, + "grad_norm": 0.4224865734577179, + "learning_rate": 1.5963473798462977e-05, + "loss": 0.4562, + "step": 21677 + }, + { + "epoch": 0.5952224052718287, + "grad_norm": 0.40690645575523376, + "learning_rate": 1.5963127100625407e-05, + "loss": 0.4955, + "step": 21678 + }, + { + "epoch": 0.5952498627127951, + "grad_norm": 0.33127495646476746, + "learning_rate": 1.59627803916647e-05, + "loss": 0.4488, + "step": 21679 + }, + { + "epoch": 0.5952773201537617, + "grad_norm": 0.8051819205284119, + "learning_rate": 1.5962433671581502e-05, + "loss": 0.5949, + "step": 21680 + }, + { + "epoch": 0.5953047775947282, + "grad_norm": 0.42313748598098755, + "learning_rate": 1.5962086940376465e-05, + "loss": 0.4928, + "step": 21681 + }, + { + "epoch": 0.5953322350356947, + "grad_norm": 0.37392693758010864, + "learning_rate": 1.5961740198050234e-05, + "loss": 0.4741, + "step": 21682 + }, + { + "epoch": 0.5953596924766612, + "grad_norm": 0.3998919427394867, + "learning_rate": 1.5961393444603454e-05, + "loss": 0.5279, + "step": 21683 + }, + { + "epoch": 0.5953871499176276, + "grad_norm": 0.364777535200119, + "learning_rate": 1.596104668003677e-05, + "loss": 0.5009, + "step": 21684 + }, + { + "epoch": 0.5954146073585942, + "grad_norm": 0.4161120653152466, + "learning_rate": 1.5960699904350835e-05, + "loss": 0.4943, + "step": 21685 + }, + { + "epoch": 0.5954420647995606, + "grad_norm": 0.3980713188648224, + "learning_rate": 1.596035311754629e-05, + "loss": 0.4872, + "step": 21686 + }, + { + "epoch": 0.5954695222405272, + "grad_norm": 0.3896341621875763, + "learning_rate": 1.5960006319623782e-05, + "loss": 0.5193, + "step": 21687 + }, + { + "epoch": 0.5954969796814937, + "grad_norm": 0.3638424873352051, + "learning_rate": 1.5959659510583963e-05, + "loss": 0.521, + "step": 21688 + }, + { + "epoch": 0.5955244371224602, + "grad_norm": 0.4099225699901581, + "learning_rate": 1.5959312690427476e-05, + "loss": 0.5691, + "step": 21689 + }, + { + "epoch": 0.5955518945634267, + "grad_norm": 0.34615424275398254, + "learning_rate": 1.595896585915497e-05, + "loss": 0.4801, + "step": 21690 + }, + { + "epoch": 0.5955793520043932, + "grad_norm": 0.3555276691913605, + "learning_rate": 1.595861901676709e-05, + "loss": 0.4594, + "step": 21691 + }, + { + "epoch": 0.5956068094453597, + "grad_norm": 0.3950687050819397, + "learning_rate": 1.5958272163264482e-05, + "loss": 0.4772, + "step": 21692 + }, + { + "epoch": 0.5956342668863261, + "grad_norm": 0.38654398918151855, + "learning_rate": 1.59579252986478e-05, + "loss": 0.5077, + "step": 21693 + }, + { + "epoch": 0.5956617243272927, + "grad_norm": 0.3603242337703705, + "learning_rate": 1.595757842291768e-05, + "loss": 0.4285, + "step": 21694 + }, + { + "epoch": 0.5956891817682592, + "grad_norm": 0.3700500726699829, + "learning_rate": 1.5957231536074777e-05, + "loss": 0.4664, + "step": 21695 + }, + { + "epoch": 0.5957166392092257, + "grad_norm": 0.500645637512207, + "learning_rate": 1.5956884638119737e-05, + "loss": 0.5798, + "step": 21696 + }, + { + "epoch": 0.5957440966501922, + "grad_norm": 0.3725320100784302, + "learning_rate": 1.5956537729053204e-05, + "loss": 0.4939, + "step": 21697 + }, + { + "epoch": 0.5957715540911587, + "grad_norm": 0.3667197525501251, + "learning_rate": 1.5956190808875828e-05, + "loss": 0.5138, + "step": 21698 + }, + { + "epoch": 0.5957990115321252, + "grad_norm": 0.3695150911808014, + "learning_rate": 1.5955843877588256e-05, + "loss": 0.4778, + "step": 21699 + }, + { + "epoch": 0.5958264689730917, + "grad_norm": 0.3810870945453644, + "learning_rate": 1.5955496935191136e-05, + "loss": 0.4904, + "step": 21700 + }, + { + "epoch": 0.5958539264140582, + "grad_norm": 0.3729828894138336, + "learning_rate": 1.5955149981685107e-05, + "loss": 0.5512, + "step": 21701 + }, + { + "epoch": 0.5958813838550248, + "grad_norm": 0.3493822515010834, + "learning_rate": 1.5954803017070828e-05, + "loss": 0.4359, + "step": 21702 + }, + { + "epoch": 0.5959088412959912, + "grad_norm": 0.3809067904949188, + "learning_rate": 1.595445604134894e-05, + "loss": 0.5269, + "step": 21703 + }, + { + "epoch": 0.5959362987369577, + "grad_norm": 0.35443630814552307, + "learning_rate": 1.595410905452009e-05, + "loss": 0.5224, + "step": 21704 + }, + { + "epoch": 0.5959637561779242, + "grad_norm": 0.34098318219184875, + "learning_rate": 1.5953762056584924e-05, + "loss": 0.4184, + "step": 21705 + }, + { + "epoch": 0.5959912136188907, + "grad_norm": 0.4497235119342804, + "learning_rate": 1.5953415047544098e-05, + "loss": 0.5893, + "step": 21706 + }, + { + "epoch": 0.5960186710598572, + "grad_norm": 0.3522782623767853, + "learning_rate": 1.5953068027398247e-05, + "loss": 0.4647, + "step": 21707 + }, + { + "epoch": 0.5960461285008237, + "grad_norm": 0.3995654582977295, + "learning_rate": 1.5952720996148028e-05, + "loss": 0.4985, + "step": 21708 + }, + { + "epoch": 0.5960735859417903, + "grad_norm": 0.34515780210494995, + "learning_rate": 1.595237395379408e-05, + "loss": 0.4814, + "step": 21709 + }, + { + "epoch": 0.5961010433827567, + "grad_norm": 0.3649619519710541, + "learning_rate": 1.595202690033706e-05, + "loss": 0.4549, + "step": 21710 + }, + { + "epoch": 0.5961285008237233, + "grad_norm": 0.3574879467487335, + "learning_rate": 1.595167983577761e-05, + "loss": 0.506, + "step": 21711 + }, + { + "epoch": 0.5961559582646897, + "grad_norm": 0.393477201461792, + "learning_rate": 1.595133276011637e-05, + "loss": 0.5206, + "step": 21712 + }, + { + "epoch": 0.5961834157056562, + "grad_norm": 0.3936120867729187, + "learning_rate": 1.5950985673354004e-05, + "loss": 0.5096, + "step": 21713 + }, + { + "epoch": 0.5962108731466227, + "grad_norm": 0.3972810208797455, + "learning_rate": 1.5950638575491145e-05, + "loss": 0.5515, + "step": 21714 + }, + { + "epoch": 0.5962383305875892, + "grad_norm": 0.3933134973049164, + "learning_rate": 1.5950291466528445e-05, + "loss": 0.5145, + "step": 21715 + }, + { + "epoch": 0.5962657880285558, + "grad_norm": 0.3486424684524536, + "learning_rate": 1.5949944346466553e-05, + "loss": 0.5814, + "step": 21716 + }, + { + "epoch": 0.5962932454695222, + "grad_norm": 0.3897119164466858, + "learning_rate": 1.594959721530612e-05, + "loss": 0.5085, + "step": 21717 + }, + { + "epoch": 0.5963207029104888, + "grad_norm": 0.40288040041923523, + "learning_rate": 1.5949250073047786e-05, + "loss": 0.459, + "step": 21718 + }, + { + "epoch": 0.5963481603514552, + "grad_norm": 0.6181684732437134, + "learning_rate": 1.5948902919692204e-05, + "loss": 0.5297, + "step": 21719 + }, + { + "epoch": 0.5963756177924218, + "grad_norm": 0.39317306876182556, + "learning_rate": 1.5948555755240016e-05, + "loss": 0.4954, + "step": 21720 + }, + { + "epoch": 0.5964030752333882, + "grad_norm": 0.4130921959877014, + "learning_rate": 1.5948208579691876e-05, + "loss": 0.5914, + "step": 21721 + }, + { + "epoch": 0.5964305326743548, + "grad_norm": 0.40566009283065796, + "learning_rate": 1.5947861393048428e-05, + "loss": 0.5304, + "step": 21722 + }, + { + "epoch": 0.5964579901153213, + "grad_norm": 0.33482298254966736, + "learning_rate": 1.5947514195310318e-05, + "loss": 0.4826, + "step": 21723 + }, + { + "epoch": 0.5964854475562877, + "grad_norm": 0.3592824339866638, + "learning_rate": 1.5947166986478198e-05, + "loss": 0.4355, + "step": 21724 + }, + { + "epoch": 0.5965129049972543, + "grad_norm": 0.3892030417919159, + "learning_rate": 1.594681976655271e-05, + "loss": 0.4587, + "step": 21725 + }, + { + "epoch": 0.5965403624382207, + "grad_norm": 0.40509817004203796, + "learning_rate": 1.5946472535534508e-05, + "loss": 0.5353, + "step": 21726 + }, + { + "epoch": 0.5965678198791873, + "grad_norm": 1.1417204141616821, + "learning_rate": 1.594612529342424e-05, + "loss": 0.501, + "step": 21727 + }, + { + "epoch": 0.5965952773201537, + "grad_norm": 0.5374469757080078, + "learning_rate": 1.5945778040222548e-05, + "loss": 0.5004, + "step": 21728 + }, + { + "epoch": 0.5966227347611203, + "grad_norm": 0.39794686436653137, + "learning_rate": 1.594543077593008e-05, + "loss": 0.5501, + "step": 21729 + }, + { + "epoch": 0.5966501922020868, + "grad_norm": 0.38900241255760193, + "learning_rate": 1.5945083500547487e-05, + "loss": 0.4835, + "step": 21730 + }, + { + "epoch": 0.5966776496430533, + "grad_norm": 0.35718151926994324, + "learning_rate": 1.594473621407542e-05, + "loss": 0.5287, + "step": 21731 + }, + { + "epoch": 0.5967051070840198, + "grad_norm": 0.3481411337852478, + "learning_rate": 1.5944388916514518e-05, + "loss": 0.4817, + "step": 21732 + }, + { + "epoch": 0.5967325645249862, + "grad_norm": 0.3608609735965729, + "learning_rate": 1.5944041607865433e-05, + "loss": 0.5087, + "step": 21733 + }, + { + "epoch": 0.5967600219659528, + "grad_norm": 0.40064477920532227, + "learning_rate": 1.5943694288128816e-05, + "loss": 0.5335, + "step": 21734 + }, + { + "epoch": 0.5967874794069192, + "grad_norm": 0.4062650799751282, + "learning_rate": 1.594334695730531e-05, + "loss": 0.4835, + "step": 21735 + }, + { + "epoch": 0.5968149368478858, + "grad_norm": 0.3626977503299713, + "learning_rate": 1.5942999615395566e-05, + "loss": 0.4518, + "step": 21736 + }, + { + "epoch": 0.5968423942888523, + "grad_norm": 0.37430527806282043, + "learning_rate": 1.594265226240023e-05, + "loss": 0.5147, + "step": 21737 + }, + { + "epoch": 0.5968698517298188, + "grad_norm": 0.43416711688041687, + "learning_rate": 1.594230489831995e-05, + "loss": 0.465, + "step": 21738 + }, + { + "epoch": 0.5968973091707853, + "grad_norm": 0.43734481930732727, + "learning_rate": 1.594195752315538e-05, + "loss": 0.4268, + "step": 21739 + }, + { + "epoch": 0.5969247666117518, + "grad_norm": 0.37840673327445984, + "learning_rate": 1.594161013690716e-05, + "loss": 0.4629, + "step": 21740 + }, + { + "epoch": 0.5969522240527183, + "grad_norm": 0.3960587680339813, + "learning_rate": 1.5941262739575937e-05, + "loss": 0.5095, + "step": 21741 + }, + { + "epoch": 0.5969796814936847, + "grad_norm": 0.35826346278190613, + "learning_rate": 1.5940915331162367e-05, + "loss": 0.5009, + "step": 21742 + }, + { + "epoch": 0.5970071389346513, + "grad_norm": 0.3992864787578583, + "learning_rate": 1.594056791166709e-05, + "loss": 0.4955, + "step": 21743 + }, + { + "epoch": 0.5970345963756178, + "grad_norm": 0.4591447412967682, + "learning_rate": 1.594022048109076e-05, + "loss": 0.5714, + "step": 21744 + }, + { + "epoch": 0.5970620538165843, + "grad_norm": 0.37396788597106934, + "learning_rate": 1.5939873039434028e-05, + "loss": 0.4547, + "step": 21745 + }, + { + "epoch": 0.5970895112575508, + "grad_norm": 0.45405369997024536, + "learning_rate": 1.593952558669753e-05, + "loss": 0.6542, + "step": 21746 + }, + { + "epoch": 0.5971169686985173, + "grad_norm": 0.37208497524261475, + "learning_rate": 1.5939178122881924e-05, + "loss": 0.4772, + "step": 21747 + }, + { + "epoch": 0.5971444261394838, + "grad_norm": 0.3759411871433258, + "learning_rate": 1.5938830647987854e-05, + "loss": 0.5341, + "step": 21748 + }, + { + "epoch": 0.5971718835804503, + "grad_norm": 0.3888380229473114, + "learning_rate": 1.593848316201597e-05, + "loss": 0.5228, + "step": 21749 + }, + { + "epoch": 0.5971993410214168, + "grad_norm": 0.34081265330314636, + "learning_rate": 1.5938135664966923e-05, + "loss": 0.5037, + "step": 21750 + }, + { + "epoch": 0.5972267984623834, + "grad_norm": 0.39694130420684814, + "learning_rate": 1.5937788156841353e-05, + "loss": 0.5178, + "step": 21751 + }, + { + "epoch": 0.5972542559033498, + "grad_norm": 0.3604068160057068, + "learning_rate": 1.5937440637639915e-05, + "loss": 0.5385, + "step": 21752 + }, + { + "epoch": 0.5972817133443163, + "grad_norm": 0.41468408703804016, + "learning_rate": 1.5937093107363253e-05, + "loss": 0.5523, + "step": 21753 + }, + { + "epoch": 0.5973091707852828, + "grad_norm": 0.3732326030731201, + "learning_rate": 1.5936745566012016e-05, + "loss": 0.492, + "step": 21754 + }, + { + "epoch": 0.5973366282262493, + "grad_norm": 0.3487294316291809, + "learning_rate": 1.593639801358686e-05, + "loss": 0.484, + "step": 21755 + }, + { + "epoch": 0.5973640856672158, + "grad_norm": 0.40183690190315247, + "learning_rate": 1.5936050450088424e-05, + "loss": 0.5245, + "step": 21756 + }, + { + "epoch": 0.5973915431081823, + "grad_norm": 0.3273717164993286, + "learning_rate": 1.593570287551736e-05, + "loss": 0.5097, + "step": 21757 + }, + { + "epoch": 0.5974190005491489, + "grad_norm": 0.4305534064769745, + "learning_rate": 1.5935355289874316e-05, + "loss": 0.5523, + "step": 21758 + }, + { + "epoch": 0.5974464579901153, + "grad_norm": 0.36713096499443054, + "learning_rate": 1.5935007693159937e-05, + "loss": 0.5107, + "step": 21759 + }, + { + "epoch": 0.5974739154310819, + "grad_norm": 0.40823742747306824, + "learning_rate": 1.5934660085374876e-05, + "loss": 0.5858, + "step": 21760 + }, + { + "epoch": 0.5975013728720483, + "grad_norm": 0.40607067942619324, + "learning_rate": 1.593431246651978e-05, + "loss": 0.4413, + "step": 21761 + }, + { + "epoch": 0.5975288303130148, + "grad_norm": 0.37142330408096313, + "learning_rate": 1.5933964836595297e-05, + "loss": 0.4861, + "step": 21762 + }, + { + "epoch": 0.5975562877539813, + "grad_norm": 0.3611002266407013, + "learning_rate": 1.593361719560208e-05, + "loss": 0.5004, + "step": 21763 + }, + { + "epoch": 0.5975837451949478, + "grad_norm": 0.4214264750480652, + "learning_rate": 1.5933269543540765e-05, + "loss": 0.5323, + "step": 21764 + }, + { + "epoch": 0.5976112026359144, + "grad_norm": 0.44664904475212097, + "learning_rate": 1.5932921880412014e-05, + "loss": 0.5353, + "step": 21765 + }, + { + "epoch": 0.5976386600768808, + "grad_norm": 0.3887653052806854, + "learning_rate": 1.593257420621647e-05, + "loss": 0.5129, + "step": 21766 + }, + { + "epoch": 0.5976661175178474, + "grad_norm": 0.36975815892219543, + "learning_rate": 1.5932226520954777e-05, + "loss": 0.4943, + "step": 21767 + }, + { + "epoch": 0.5976935749588138, + "grad_norm": 0.34472185373306274, + "learning_rate": 1.5931878824627593e-05, + "loss": 0.4995, + "step": 21768 + }, + { + "epoch": 0.5977210323997804, + "grad_norm": 0.444450706243515, + "learning_rate": 1.593153111723556e-05, + "loss": 0.5225, + "step": 21769 + }, + { + "epoch": 0.5977484898407468, + "grad_norm": 0.38393229246139526, + "learning_rate": 1.593118339877933e-05, + "loss": 0.4847, + "step": 21770 + }, + { + "epoch": 0.5977759472817133, + "grad_norm": 0.4024169147014618, + "learning_rate": 1.593083566925955e-05, + "loss": 0.4877, + "step": 21771 + }, + { + "epoch": 0.5978034047226799, + "grad_norm": 0.4046167731285095, + "learning_rate": 1.5930487928676864e-05, + "loss": 0.6111, + "step": 21772 + }, + { + "epoch": 0.5978308621636463, + "grad_norm": 0.3816922903060913, + "learning_rate": 1.593014017703193e-05, + "loss": 0.5304, + "step": 21773 + }, + { + "epoch": 0.5978583196046129, + "grad_norm": 0.36318260431289673, + "learning_rate": 1.5929792414325393e-05, + "loss": 0.464, + "step": 21774 + }, + { + "epoch": 0.5978857770455793, + "grad_norm": 0.34587565064430237, + "learning_rate": 1.5929444640557896e-05, + "loss": 0.4503, + "step": 21775 + }, + { + "epoch": 0.5979132344865459, + "grad_norm": 0.4767628312110901, + "learning_rate": 1.5929096855730094e-05, + "loss": 0.5811, + "step": 21776 + }, + { + "epoch": 0.5979406919275123, + "grad_norm": 0.4097137749195099, + "learning_rate": 1.5928749059842633e-05, + "loss": 0.4486, + "step": 21777 + }, + { + "epoch": 0.5979681493684789, + "grad_norm": 0.3322499394416809, + "learning_rate": 1.5928401252896165e-05, + "loss": 0.4978, + "step": 21778 + }, + { + "epoch": 0.5979956068094454, + "grad_norm": 0.3778994381427765, + "learning_rate": 1.592805343489134e-05, + "loss": 0.5344, + "step": 21779 + }, + { + "epoch": 0.5980230642504119, + "grad_norm": 0.40297508239746094, + "learning_rate": 1.5927705605828796e-05, + "loss": 0.4976, + "step": 21780 + }, + { + "epoch": 0.5980505216913784, + "grad_norm": 0.3926171362400055, + "learning_rate": 1.592735776570919e-05, + "loss": 0.4995, + "step": 21781 + }, + { + "epoch": 0.5980779791323448, + "grad_norm": 0.8965089917182922, + "learning_rate": 1.5927009914533172e-05, + "loss": 0.5036, + "step": 21782 + }, + { + "epoch": 0.5981054365733114, + "grad_norm": 0.45400169491767883, + "learning_rate": 1.592666205230139e-05, + "loss": 0.5436, + "step": 21783 + }, + { + "epoch": 0.5981328940142778, + "grad_norm": 0.38812416791915894, + "learning_rate": 1.592631417901449e-05, + "loss": 0.4786, + "step": 21784 + }, + { + "epoch": 0.5981603514552444, + "grad_norm": 0.40294456481933594, + "learning_rate": 1.5925966294673126e-05, + "loss": 0.6172, + "step": 21785 + }, + { + "epoch": 0.5981878088962109, + "grad_norm": 0.4106404185295105, + "learning_rate": 1.592561839927794e-05, + "loss": 0.4811, + "step": 21786 + }, + { + "epoch": 0.5982152663371774, + "grad_norm": 0.7015767693519592, + "learning_rate": 1.5925270492829582e-05, + "loss": 0.5864, + "step": 21787 + }, + { + "epoch": 0.5982427237781439, + "grad_norm": 0.404568612575531, + "learning_rate": 1.592492257532871e-05, + "loss": 0.5595, + "step": 21788 + }, + { + "epoch": 0.5982701812191104, + "grad_norm": 0.3280406892299652, + "learning_rate": 1.592457464677596e-05, + "loss": 0.4517, + "step": 21789 + }, + { + "epoch": 0.5982976386600769, + "grad_norm": 0.4367057979106903, + "learning_rate": 1.5924226707171992e-05, + "loss": 0.4909, + "step": 21790 + }, + { + "epoch": 0.5983250961010433, + "grad_norm": 0.40454185009002686, + "learning_rate": 1.592387875651745e-05, + "loss": 0.5718, + "step": 21791 + }, + { + "epoch": 0.5983525535420099, + "grad_norm": 0.36648768186569214, + "learning_rate": 1.5923530794812983e-05, + "loss": 0.4681, + "step": 21792 + }, + { + "epoch": 0.5983800109829764, + "grad_norm": 0.39048629999160767, + "learning_rate": 1.592318282205924e-05, + "loss": 0.4791, + "step": 21793 + }, + { + "epoch": 0.5984074684239429, + "grad_norm": 0.3705051839351654, + "learning_rate": 1.592283483825687e-05, + "loss": 0.4728, + "step": 21794 + }, + { + "epoch": 0.5984349258649094, + "grad_norm": 0.40011516213417053, + "learning_rate": 1.592248684340652e-05, + "loss": 0.5713, + "step": 21795 + }, + { + "epoch": 0.5984623833058759, + "grad_norm": 0.39546987414360046, + "learning_rate": 1.5922138837508846e-05, + "loss": 0.4657, + "step": 21796 + }, + { + "epoch": 0.5984898407468424, + "grad_norm": 0.3491068482398987, + "learning_rate": 1.5921790820564492e-05, + "loss": 0.5536, + "step": 21797 + }, + { + "epoch": 0.5985172981878089, + "grad_norm": 0.3886427879333496, + "learning_rate": 1.5921442792574107e-05, + "loss": 0.4903, + "step": 21798 + }, + { + "epoch": 0.5985447556287754, + "grad_norm": 0.3634822964668274, + "learning_rate": 1.592109475353834e-05, + "loss": 0.5187, + "step": 21799 + }, + { + "epoch": 0.598572213069742, + "grad_norm": 0.4906805157661438, + "learning_rate": 1.5920746703457845e-05, + "loss": 0.595, + "step": 21800 + }, + { + "epoch": 0.5985996705107084, + "grad_norm": 0.41450387239456177, + "learning_rate": 1.5920398642333265e-05, + "loss": 0.5043, + "step": 21801 + }, + { + "epoch": 0.5986271279516749, + "grad_norm": 0.47179439663887024, + "learning_rate": 1.5920050570165256e-05, + "loss": 0.4622, + "step": 21802 + }, + { + "epoch": 0.5986545853926414, + "grad_norm": 0.4134626090526581, + "learning_rate": 1.591970248695446e-05, + "loss": 0.5214, + "step": 21803 + }, + { + "epoch": 0.5986820428336079, + "grad_norm": 0.33374717831611633, + "learning_rate": 1.591935439270153e-05, + "loss": 0.5051, + "step": 21804 + }, + { + "epoch": 0.5987095002745744, + "grad_norm": 0.37324991822242737, + "learning_rate": 1.5919006287407113e-05, + "loss": 0.5404, + "step": 21805 + }, + { + "epoch": 0.5987369577155409, + "grad_norm": 0.4127584397792816, + "learning_rate": 1.5918658171071862e-05, + "loss": 0.4669, + "step": 21806 + }, + { + "epoch": 0.5987644151565075, + "grad_norm": 0.33512166142463684, + "learning_rate": 1.5918310043696424e-05, + "loss": 0.5131, + "step": 21807 + }, + { + "epoch": 0.5987918725974739, + "grad_norm": 0.37965288758277893, + "learning_rate": 1.591796190528145e-05, + "loss": 0.5609, + "step": 21808 + }, + { + "epoch": 0.5988193300384405, + "grad_norm": 0.36195001006126404, + "learning_rate": 1.5917613755827588e-05, + "loss": 0.6113, + "step": 21809 + }, + { + "epoch": 0.5988467874794069, + "grad_norm": 0.3564576804637909, + "learning_rate": 1.5917265595335486e-05, + "loss": 0.5117, + "step": 21810 + }, + { + "epoch": 0.5988742449203734, + "grad_norm": 0.44437670707702637, + "learning_rate": 1.5916917423805796e-05, + "loss": 0.5209, + "step": 21811 + }, + { + "epoch": 0.5989017023613399, + "grad_norm": 0.41089722514152527, + "learning_rate": 1.591656924123917e-05, + "loss": 0.4502, + "step": 21812 + }, + { + "epoch": 0.5989291598023064, + "grad_norm": 0.3979044556617737, + "learning_rate": 1.5916221047636248e-05, + "loss": 0.5347, + "step": 21813 + }, + { + "epoch": 0.598956617243273, + "grad_norm": 0.35370779037475586, + "learning_rate": 1.5915872842997687e-05, + "loss": 0.4785, + "step": 21814 + }, + { + "epoch": 0.5989840746842394, + "grad_norm": 0.3850441873073578, + "learning_rate": 1.591552462732414e-05, + "loss": 0.5005, + "step": 21815 + }, + { + "epoch": 0.599011532125206, + "grad_norm": 0.5010243654251099, + "learning_rate": 1.5915176400616247e-05, + "loss": 0.561, + "step": 21816 + }, + { + "epoch": 0.5990389895661724, + "grad_norm": 0.39204102754592896, + "learning_rate": 1.5914828162874663e-05, + "loss": 0.5109, + "step": 21817 + }, + { + "epoch": 0.599066447007139, + "grad_norm": 0.4545150399208069, + "learning_rate": 1.5914479914100038e-05, + "loss": 0.5218, + "step": 21818 + }, + { + "epoch": 0.5990939044481054, + "grad_norm": 0.35920727252960205, + "learning_rate": 1.5914131654293018e-05, + "loss": 0.4268, + "step": 21819 + }, + { + "epoch": 0.599121361889072, + "grad_norm": 0.386434942483902, + "learning_rate": 1.5913783383454255e-05, + "loss": 0.5757, + "step": 21820 + }, + { + "epoch": 0.5991488193300385, + "grad_norm": 0.4401495158672333, + "learning_rate": 1.59134351015844e-05, + "loss": 0.508, + "step": 21821 + }, + { + "epoch": 0.5991762767710049, + "grad_norm": 0.43861958384513855, + "learning_rate": 1.59130868086841e-05, + "loss": 0.5361, + "step": 21822 + }, + { + "epoch": 0.5992037342119715, + "grad_norm": 0.40603938698768616, + "learning_rate": 1.5912738504754006e-05, + "loss": 0.5099, + "step": 21823 + }, + { + "epoch": 0.5992311916529379, + "grad_norm": 0.4112069308757782, + "learning_rate": 1.5912390189794767e-05, + "loss": 0.5656, + "step": 21824 + }, + { + "epoch": 0.5992586490939045, + "grad_norm": 0.4254595637321472, + "learning_rate": 1.5912041863807037e-05, + "loss": 0.6064, + "step": 21825 + }, + { + "epoch": 0.5992861065348709, + "grad_norm": 0.4631904363632202, + "learning_rate": 1.591169352679146e-05, + "loss": 0.4681, + "step": 21826 + }, + { + "epoch": 0.5993135639758375, + "grad_norm": 0.35579225420951843, + "learning_rate": 1.5911345178748686e-05, + "loss": 0.5262, + "step": 21827 + }, + { + "epoch": 0.5993410214168039, + "grad_norm": 0.45294302701950073, + "learning_rate": 1.5910996819679366e-05, + "loss": 0.5758, + "step": 21828 + }, + { + "epoch": 0.5993684788577704, + "grad_norm": 0.3843606412410736, + "learning_rate": 1.591064844958415e-05, + "loss": 0.4943, + "step": 21829 + }, + { + "epoch": 0.599395936298737, + "grad_norm": 0.34844258427619934, + "learning_rate": 1.5910300068463694e-05, + "loss": 0.4555, + "step": 21830 + }, + { + "epoch": 0.5994233937397034, + "grad_norm": 0.4186326861381531, + "learning_rate": 1.5909951676318635e-05, + "loss": 0.5782, + "step": 21831 + }, + { + "epoch": 0.59945085118067, + "grad_norm": 0.40605369210243225, + "learning_rate": 1.5909603273149636e-05, + "loss": 0.4983, + "step": 21832 + }, + { + "epoch": 0.5994783086216364, + "grad_norm": 0.34883925318717957, + "learning_rate": 1.5909254858957336e-05, + "loss": 0.5802, + "step": 21833 + }, + { + "epoch": 0.599505766062603, + "grad_norm": 0.4811926484107971, + "learning_rate": 1.5908906433742393e-05, + "loss": 0.5331, + "step": 21834 + }, + { + "epoch": 0.5995332235035694, + "grad_norm": 0.3521099090576172, + "learning_rate": 1.590855799750545e-05, + "loss": 0.4773, + "step": 21835 + }, + { + "epoch": 0.599560680944536, + "grad_norm": 0.3845130503177643, + "learning_rate": 1.5908209550247164e-05, + "loss": 0.5496, + "step": 21836 + }, + { + "epoch": 0.5995881383855025, + "grad_norm": 0.3481486737728119, + "learning_rate": 1.590786109196818e-05, + "loss": 0.4771, + "step": 21837 + }, + { + "epoch": 0.599615595826469, + "grad_norm": 0.35041698813438416, + "learning_rate": 1.590751262266915e-05, + "loss": 0.4893, + "step": 21838 + }, + { + "epoch": 0.5996430532674355, + "grad_norm": 0.3828336000442505, + "learning_rate": 1.590716414235072e-05, + "loss": 0.5589, + "step": 21839 + }, + { + "epoch": 0.5996705107084019, + "grad_norm": 0.36478039622306824, + "learning_rate": 1.590681565101355e-05, + "loss": 0.5024, + "step": 21840 + }, + { + "epoch": 0.5996979681493685, + "grad_norm": 0.4144516885280609, + "learning_rate": 1.590646714865828e-05, + "loss": 0.5664, + "step": 21841 + }, + { + "epoch": 0.5997254255903349, + "grad_norm": 0.3801555931568146, + "learning_rate": 1.5906118635285562e-05, + "loss": 0.569, + "step": 21842 + }, + { + "epoch": 0.5997528830313015, + "grad_norm": 0.3749721348285675, + "learning_rate": 1.590577011089605e-05, + "loss": 0.4906, + "step": 21843 + }, + { + "epoch": 0.599780340472268, + "grad_norm": 0.3475201427936554, + "learning_rate": 1.5905421575490393e-05, + "loss": 0.5123, + "step": 21844 + }, + { + "epoch": 0.5998077979132345, + "grad_norm": 0.3830910921096802, + "learning_rate": 1.5905073029069236e-05, + "loss": 0.5459, + "step": 21845 + }, + { + "epoch": 0.599835255354201, + "grad_norm": 0.36321207880973816, + "learning_rate": 1.590472447163324e-05, + "loss": 0.48, + "step": 21846 + }, + { + "epoch": 0.5998627127951675, + "grad_norm": 0.37598690390586853, + "learning_rate": 1.5904375903183044e-05, + "loss": 0.4344, + "step": 21847 + }, + { + "epoch": 0.599890170236134, + "grad_norm": 0.37261340022087097, + "learning_rate": 1.59040273237193e-05, + "loss": 0.5232, + "step": 21848 + }, + { + "epoch": 0.5999176276771004, + "grad_norm": 0.42023324966430664, + "learning_rate": 1.5903678733242664e-05, + "loss": 0.6301, + "step": 21849 + }, + { + "epoch": 0.599945085118067, + "grad_norm": 0.3627382516860962, + "learning_rate": 1.590333013175378e-05, + "loss": 0.4401, + "step": 21850 + }, + { + "epoch": 0.5999725425590335, + "grad_norm": 0.38771477341651917, + "learning_rate": 1.5902981519253306e-05, + "loss": 0.4393, + "step": 21851 + }, + { + "epoch": 0.6, + "grad_norm": 0.39763084053993225, + "learning_rate": 1.5902632895741882e-05, + "loss": 0.4808, + "step": 21852 + }, + { + "epoch": 0.6000274574409665, + "grad_norm": 0.37308141589164734, + "learning_rate": 1.5902284261220168e-05, + "loss": 0.5192, + "step": 21853 + }, + { + "epoch": 0.600054914881933, + "grad_norm": 0.46075206995010376, + "learning_rate": 1.5901935615688812e-05, + "loss": 0.5129, + "step": 21854 + }, + { + "epoch": 0.6000823723228995, + "grad_norm": 0.4008239507675171, + "learning_rate": 1.5901586959148456e-05, + "loss": 0.5049, + "step": 21855 + }, + { + "epoch": 0.600109829763866, + "grad_norm": 0.41046759486198425, + "learning_rate": 1.590123829159976e-05, + "loss": 0.4272, + "step": 21856 + }, + { + "epoch": 0.6001372872048325, + "grad_norm": 0.4524318277835846, + "learning_rate": 1.5900889613043367e-05, + "loss": 0.5504, + "step": 21857 + }, + { + "epoch": 0.600164744645799, + "grad_norm": 0.4111354351043701, + "learning_rate": 1.5900540923479938e-05, + "loss": 0.5295, + "step": 21858 + }, + { + "epoch": 0.6001922020867655, + "grad_norm": 0.4033288359642029, + "learning_rate": 1.5900192222910112e-05, + "loss": 0.5267, + "step": 21859 + }, + { + "epoch": 0.600219659527732, + "grad_norm": 0.4021080434322357, + "learning_rate": 1.5899843511334546e-05, + "loss": 0.5081, + "step": 21860 + }, + { + "epoch": 0.6002471169686985, + "grad_norm": 0.41602179408073425, + "learning_rate": 1.589949478875389e-05, + "loss": 0.4854, + "step": 21861 + }, + { + "epoch": 0.600274574409665, + "grad_norm": 0.3441467583179474, + "learning_rate": 1.589914605516879e-05, + "loss": 0.3694, + "step": 21862 + }, + { + "epoch": 0.6003020318506315, + "grad_norm": 0.4055629372596741, + "learning_rate": 1.5898797310579907e-05, + "loss": 0.5038, + "step": 21863 + }, + { + "epoch": 0.600329489291598, + "grad_norm": 0.3693135380744934, + "learning_rate": 1.589844855498788e-05, + "loss": 0.4934, + "step": 21864 + }, + { + "epoch": 0.6003569467325646, + "grad_norm": 0.4076460003852844, + "learning_rate": 1.589809978839336e-05, + "loss": 0.4853, + "step": 21865 + }, + { + "epoch": 0.600384404173531, + "grad_norm": 0.3547992706298828, + "learning_rate": 1.589775101079701e-05, + "loss": 0.513, + "step": 21866 + }, + { + "epoch": 0.6004118616144976, + "grad_norm": 0.34956321120262146, + "learning_rate": 1.5897402222199466e-05, + "loss": 0.5112, + "step": 21867 + }, + { + "epoch": 0.600439319055464, + "grad_norm": 0.3800528049468994, + "learning_rate": 1.5897053422601385e-05, + "loss": 0.5213, + "step": 21868 + }, + { + "epoch": 0.6004667764964305, + "grad_norm": 0.35359591245651245, + "learning_rate": 1.589670461200342e-05, + "loss": 0.5282, + "step": 21869 + }, + { + "epoch": 0.600494233937397, + "grad_norm": 0.36760810017585754, + "learning_rate": 1.5896355790406216e-05, + "loss": 0.4289, + "step": 21870 + }, + { + "epoch": 0.6005216913783635, + "grad_norm": 0.4137773811817169, + "learning_rate": 1.5896006957810428e-05, + "loss": 0.5272, + "step": 21871 + }, + { + "epoch": 0.6005491488193301, + "grad_norm": 0.3829239308834076, + "learning_rate": 1.5895658114216707e-05, + "loss": 0.5172, + "step": 21872 + }, + { + "epoch": 0.6005766062602965, + "grad_norm": 0.42025095224380493, + "learning_rate": 1.5895309259625696e-05, + "loss": 0.5043, + "step": 21873 + }, + { + "epoch": 0.6006040637012631, + "grad_norm": 0.39107775688171387, + "learning_rate": 1.5894960394038057e-05, + "loss": 0.5276, + "step": 21874 + }, + { + "epoch": 0.6006315211422295, + "grad_norm": 0.419564813375473, + "learning_rate": 1.5894611517454432e-05, + "loss": 0.5057, + "step": 21875 + }, + { + "epoch": 0.6006589785831961, + "grad_norm": 0.3757323920726776, + "learning_rate": 1.589426262987548e-05, + "loss": 0.5049, + "step": 21876 + }, + { + "epoch": 0.6006864360241625, + "grad_norm": 0.4411619007587433, + "learning_rate": 1.5893913731301842e-05, + "loss": 0.6151, + "step": 21877 + }, + { + "epoch": 0.600713893465129, + "grad_norm": 0.40758439898490906, + "learning_rate": 1.5893564821734175e-05, + "loss": 0.5321, + "step": 21878 + }, + { + "epoch": 0.6007413509060956, + "grad_norm": 0.36475086212158203, + "learning_rate": 1.5893215901173128e-05, + "loss": 0.5462, + "step": 21879 + }, + { + "epoch": 0.600768808347062, + "grad_norm": 0.3325038254261017, + "learning_rate": 1.5892866969619355e-05, + "loss": 0.4787, + "step": 21880 + }, + { + "epoch": 0.6007962657880286, + "grad_norm": 0.35329484939575195, + "learning_rate": 1.5892518027073504e-05, + "loss": 0.5698, + "step": 21881 + }, + { + "epoch": 0.600823723228995, + "grad_norm": 0.40276145935058594, + "learning_rate": 1.5892169073536224e-05, + "loss": 0.4872, + "step": 21882 + }, + { + "epoch": 0.6008511806699616, + "grad_norm": 0.3787010908126831, + "learning_rate": 1.589182010900817e-05, + "loss": 0.5633, + "step": 21883 + }, + { + "epoch": 0.600878638110928, + "grad_norm": 0.34360837936401367, + "learning_rate": 1.589147113348999e-05, + "loss": 0.5437, + "step": 21884 + }, + { + "epoch": 0.6009060955518946, + "grad_norm": 0.3615754544734955, + "learning_rate": 1.589112214698234e-05, + "loss": 0.4644, + "step": 21885 + }, + { + "epoch": 0.6009335529928611, + "grad_norm": 0.3500153124332428, + "learning_rate": 1.5890773149485862e-05, + "loss": 0.4627, + "step": 21886 + }, + { + "epoch": 0.6009610104338275, + "grad_norm": 0.38393157720565796, + "learning_rate": 1.5890424141001213e-05, + "loss": 0.54, + "step": 21887 + }, + { + "epoch": 0.6009884678747941, + "grad_norm": 0.4227464497089386, + "learning_rate": 1.5890075121529042e-05, + "loss": 0.4485, + "step": 21888 + }, + { + "epoch": 0.6010159253157605, + "grad_norm": 0.3786725103855133, + "learning_rate": 1.588972609107e-05, + "loss": 0.4263, + "step": 21889 + }, + { + "epoch": 0.6010433827567271, + "grad_norm": 0.3662354648113251, + "learning_rate": 1.5889377049624744e-05, + "loss": 0.5864, + "step": 21890 + }, + { + "epoch": 0.6010708401976935, + "grad_norm": 0.3965330421924591, + "learning_rate": 1.588902799719392e-05, + "loss": 0.5326, + "step": 21891 + }, + { + "epoch": 0.6010982976386601, + "grad_norm": 0.3496745824813843, + "learning_rate": 1.5888678933778173e-05, + "loss": 0.4418, + "step": 21892 + }, + { + "epoch": 0.6011257550796266, + "grad_norm": 0.3649943768978119, + "learning_rate": 1.5888329859378165e-05, + "loss": 0.529, + "step": 21893 + }, + { + "epoch": 0.6011532125205931, + "grad_norm": 0.4727184772491455, + "learning_rate": 1.5887980773994543e-05, + "loss": 0.5208, + "step": 21894 + }, + { + "epoch": 0.6011806699615596, + "grad_norm": 0.4397592544555664, + "learning_rate": 1.5887631677627955e-05, + "loss": 0.615, + "step": 21895 + }, + { + "epoch": 0.601208127402526, + "grad_norm": 0.42077216506004333, + "learning_rate": 1.5887282570279054e-05, + "loss": 0.4543, + "step": 21896 + }, + { + "epoch": 0.6012355848434926, + "grad_norm": 0.38015732169151306, + "learning_rate": 1.5886933451948495e-05, + "loss": 0.5989, + "step": 21897 + }, + { + "epoch": 0.601263042284459, + "grad_norm": 0.35448360443115234, + "learning_rate": 1.5886584322636927e-05, + "loss": 0.4769, + "step": 21898 + }, + { + "epoch": 0.6012904997254256, + "grad_norm": 0.36791524291038513, + "learning_rate": 1.5886235182344997e-05, + "loss": 0.5012, + "step": 21899 + }, + { + "epoch": 0.6013179571663921, + "grad_norm": 0.3673514425754547, + "learning_rate": 1.5885886031073365e-05, + "loss": 0.5225, + "step": 21900 + }, + { + "epoch": 0.6013454146073586, + "grad_norm": 0.41706734895706177, + "learning_rate": 1.588553686882267e-05, + "loss": 0.5095, + "step": 21901 + }, + { + "epoch": 0.6013728720483251, + "grad_norm": 0.3634053170681, + "learning_rate": 1.5885187695593573e-05, + "loss": 0.4696, + "step": 21902 + }, + { + "epoch": 0.6014003294892916, + "grad_norm": 0.37967726588249207, + "learning_rate": 1.5884838511386728e-05, + "loss": 0.421, + "step": 21903 + }, + { + "epoch": 0.6014277869302581, + "grad_norm": 0.3656013011932373, + "learning_rate": 1.5884489316202773e-05, + "loss": 0.4705, + "step": 21904 + }, + { + "epoch": 0.6014552443712246, + "grad_norm": 0.3753873407840729, + "learning_rate": 1.588414011004237e-05, + "loss": 0.4496, + "step": 21905 + }, + { + "epoch": 0.6014827018121911, + "grad_norm": 0.4276305139064789, + "learning_rate": 1.588379089290617e-05, + "loss": 0.5405, + "step": 21906 + }, + { + "epoch": 0.6015101592531577, + "grad_norm": 0.3858581781387329, + "learning_rate": 1.588344166479482e-05, + "loss": 0.5091, + "step": 21907 + }, + { + "epoch": 0.6015376166941241, + "grad_norm": 0.45314082503318787, + "learning_rate": 1.5883092425708976e-05, + "loss": 0.4715, + "step": 21908 + }, + { + "epoch": 0.6015650741350906, + "grad_norm": 0.3316369950771332, + "learning_rate": 1.5882743175649284e-05, + "loss": 0.4375, + "step": 21909 + }, + { + "epoch": 0.6015925315760571, + "grad_norm": 0.42109692096710205, + "learning_rate": 1.5882393914616398e-05, + "loss": 0.4829, + "step": 21910 + }, + { + "epoch": 0.6016199890170236, + "grad_norm": 0.3924688994884491, + "learning_rate": 1.588204464261097e-05, + "loss": 0.5056, + "step": 21911 + }, + { + "epoch": 0.6016474464579901, + "grad_norm": 0.3727668523788452, + "learning_rate": 1.5881695359633652e-05, + "loss": 0.4507, + "step": 21912 + }, + { + "epoch": 0.6016749038989566, + "grad_norm": 0.3394390642642975, + "learning_rate": 1.5881346065685097e-05, + "loss": 0.4932, + "step": 21913 + }, + { + "epoch": 0.6017023613399232, + "grad_norm": 0.3741621673107147, + "learning_rate": 1.5880996760765953e-05, + "loss": 0.517, + "step": 21914 + }, + { + "epoch": 0.6017298187808896, + "grad_norm": 0.3582232594490051, + "learning_rate": 1.5880647444876873e-05, + "loss": 0.424, + "step": 21915 + }, + { + "epoch": 0.6017572762218562, + "grad_norm": 0.3942861258983612, + "learning_rate": 1.588029811801851e-05, + "loss": 0.4861, + "step": 21916 + }, + { + "epoch": 0.6017847336628226, + "grad_norm": 0.3412391245365143, + "learning_rate": 1.587994878019151e-05, + "loss": 0.4192, + "step": 21917 + }, + { + "epoch": 0.6018121911037891, + "grad_norm": 0.391165554523468, + "learning_rate": 1.5879599431396533e-05, + "loss": 0.5178, + "step": 21918 + }, + { + "epoch": 0.6018396485447556, + "grad_norm": 0.39322856068611145, + "learning_rate": 1.5879250071634226e-05, + "loss": 0.5361, + "step": 21919 + }, + { + "epoch": 0.6018671059857221, + "grad_norm": 0.3669569790363312, + "learning_rate": 1.587890070090524e-05, + "loss": 0.4422, + "step": 21920 + }, + { + "epoch": 0.6018945634266887, + "grad_norm": 0.46596336364746094, + "learning_rate": 1.5878551319210228e-05, + "loss": 0.4795, + "step": 21921 + }, + { + "epoch": 0.6019220208676551, + "grad_norm": 0.3610822558403015, + "learning_rate": 1.587820192654984e-05, + "loss": 0.4204, + "step": 21922 + }, + { + "epoch": 0.6019494783086217, + "grad_norm": 0.3694925904273987, + "learning_rate": 1.5877852522924733e-05, + "loss": 0.5033, + "step": 21923 + }, + { + "epoch": 0.6019769357495881, + "grad_norm": 0.3494751751422882, + "learning_rate": 1.587750310833555e-05, + "loss": 0.3847, + "step": 21924 + }, + { + "epoch": 0.6020043931905547, + "grad_norm": 0.40547749400138855, + "learning_rate": 1.5877153682782955e-05, + "loss": 0.4165, + "step": 21925 + }, + { + "epoch": 0.6020318506315211, + "grad_norm": 0.409934937953949, + "learning_rate": 1.587680424626759e-05, + "loss": 0.4988, + "step": 21926 + }, + { + "epoch": 0.6020593080724876, + "grad_norm": 0.3213319182395935, + "learning_rate": 1.5876454798790105e-05, + "loss": 0.4126, + "step": 21927 + }, + { + "epoch": 0.6020867655134542, + "grad_norm": 0.38318076729774475, + "learning_rate": 1.587610534035116e-05, + "loss": 0.4851, + "step": 21928 + }, + { + "epoch": 0.6021142229544206, + "grad_norm": 0.4118233621120453, + "learning_rate": 1.5875755870951404e-05, + "loss": 0.4717, + "step": 21929 + }, + { + "epoch": 0.6021416803953872, + "grad_norm": 0.5177521705627441, + "learning_rate": 1.5875406390591487e-05, + "loss": 0.4646, + "step": 21930 + }, + { + "epoch": 0.6021691378363536, + "grad_norm": 0.4124404788017273, + "learning_rate": 1.587505689927206e-05, + "loss": 0.4728, + "step": 21931 + }, + { + "epoch": 0.6021965952773202, + "grad_norm": 0.5261924266815186, + "learning_rate": 1.5874707396993776e-05, + "loss": 0.4873, + "step": 21932 + }, + { + "epoch": 0.6022240527182866, + "grad_norm": 0.379585862159729, + "learning_rate": 1.587435788375729e-05, + "loss": 0.5316, + "step": 21933 + }, + { + "epoch": 0.6022515101592532, + "grad_norm": 0.3852885365486145, + "learning_rate": 1.5874008359563253e-05, + "loss": 0.5674, + "step": 21934 + }, + { + "epoch": 0.6022789676002197, + "grad_norm": 0.3539833426475525, + "learning_rate": 1.5873658824412314e-05, + "loss": 0.4717, + "step": 21935 + }, + { + "epoch": 0.6023064250411861, + "grad_norm": 0.3832300901412964, + "learning_rate": 1.5873309278305127e-05, + "loss": 0.5301, + "step": 21936 + }, + { + "epoch": 0.6023338824821527, + "grad_norm": 0.39762234687805176, + "learning_rate": 1.5872959721242344e-05, + "loss": 0.4672, + "step": 21937 + }, + { + "epoch": 0.6023613399231191, + "grad_norm": 0.37968435883522034, + "learning_rate": 1.5872610153224613e-05, + "loss": 0.4985, + "step": 21938 + }, + { + "epoch": 0.6023887973640857, + "grad_norm": 0.36594775319099426, + "learning_rate": 1.5872260574252595e-05, + "loss": 0.4439, + "step": 21939 + }, + { + "epoch": 0.6024162548050521, + "grad_norm": 0.4393734931945801, + "learning_rate": 1.5871910984326936e-05, + "loss": 0.4482, + "step": 21940 + }, + { + "epoch": 0.6024437122460187, + "grad_norm": 0.3637950122356415, + "learning_rate": 1.5871561383448287e-05, + "loss": 0.489, + "step": 21941 + }, + { + "epoch": 0.6024711696869852, + "grad_norm": 0.42783698439598083, + "learning_rate": 1.58712117716173e-05, + "loss": 0.5169, + "step": 21942 + }, + { + "epoch": 0.6024986271279517, + "grad_norm": 0.3258659839630127, + "learning_rate": 1.5870862148834633e-05, + "loss": 0.3533, + "step": 21943 + }, + { + "epoch": 0.6025260845689182, + "grad_norm": 0.388263076543808, + "learning_rate": 1.5870512515100935e-05, + "loss": 0.5363, + "step": 21944 + }, + { + "epoch": 0.6025535420098846, + "grad_norm": 0.33460450172424316, + "learning_rate": 1.5870162870416856e-05, + "loss": 0.4975, + "step": 21945 + }, + { + "epoch": 0.6025809994508512, + "grad_norm": 0.38805922865867615, + "learning_rate": 1.5869813214783046e-05, + "loss": 0.5059, + "step": 21946 + }, + { + "epoch": 0.6026084568918176, + "grad_norm": 0.44581669569015503, + "learning_rate": 1.5869463548200167e-05, + "loss": 0.3835, + "step": 21947 + }, + { + "epoch": 0.6026359143327842, + "grad_norm": 0.3905944228172302, + "learning_rate": 1.586911387066886e-05, + "loss": 0.5329, + "step": 21948 + }, + { + "epoch": 0.6026633717737507, + "grad_norm": 0.3579159080982208, + "learning_rate": 1.5868764182189785e-05, + "loss": 0.5179, + "step": 21949 + }, + { + "epoch": 0.6026908292147172, + "grad_norm": 0.37748244404792786, + "learning_rate": 1.5868414482763593e-05, + "loss": 0.4779, + "step": 21950 + }, + { + "epoch": 0.6027182866556837, + "grad_norm": 0.3483544886112213, + "learning_rate": 1.5868064772390933e-05, + "loss": 0.5824, + "step": 21951 + }, + { + "epoch": 0.6027457440966502, + "grad_norm": 0.5912672877311707, + "learning_rate": 1.586771505107246e-05, + "loss": 0.6305, + "step": 21952 + }, + { + "epoch": 0.6027732015376167, + "grad_norm": 0.37713518738746643, + "learning_rate": 1.5867365318808826e-05, + "loss": 0.5985, + "step": 21953 + }, + { + "epoch": 0.6028006589785831, + "grad_norm": 0.3961130380630493, + "learning_rate": 1.586701557560068e-05, + "loss": 0.5475, + "step": 21954 + }, + { + "epoch": 0.6028281164195497, + "grad_norm": 0.3894003629684448, + "learning_rate": 1.5866665821448682e-05, + "loss": 0.5181, + "step": 21955 + }, + { + "epoch": 0.6028555738605162, + "grad_norm": 0.3629873991012573, + "learning_rate": 1.586631605635348e-05, + "loss": 0.5163, + "step": 21956 + }, + { + "epoch": 0.6028830313014827, + "grad_norm": 0.3464488983154297, + "learning_rate": 1.5865966280315722e-05, + "loss": 0.4484, + "step": 21957 + }, + { + "epoch": 0.6029104887424492, + "grad_norm": 0.3926655352115631, + "learning_rate": 1.5865616493336068e-05, + "loss": 0.4958, + "step": 21958 + }, + { + "epoch": 0.6029379461834157, + "grad_norm": 0.3730889856815338, + "learning_rate": 1.5865266695415167e-05, + "loss": 0.4261, + "step": 21959 + }, + { + "epoch": 0.6029654036243822, + "grad_norm": 0.4760822355747223, + "learning_rate": 1.5864916886553666e-05, + "loss": 0.5575, + "step": 21960 + }, + { + "epoch": 0.6029928610653487, + "grad_norm": 0.40154916048049927, + "learning_rate": 1.586456706675223e-05, + "loss": 0.514, + "step": 21961 + }, + { + "epoch": 0.6030203185063152, + "grad_norm": 0.39313316345214844, + "learning_rate": 1.5864217236011503e-05, + "loss": 0.5347, + "step": 21962 + }, + { + "epoch": 0.6030477759472818, + "grad_norm": 0.33279064297676086, + "learning_rate": 1.586386739433214e-05, + "loss": 0.4536, + "step": 21963 + }, + { + "epoch": 0.6030752333882482, + "grad_norm": 0.4006602168083191, + "learning_rate": 1.586351754171479e-05, + "loss": 0.533, + "step": 21964 + }, + { + "epoch": 0.6031026908292147, + "grad_norm": 0.38476303219795227, + "learning_rate": 1.586316767816011e-05, + "loss": 0.4814, + "step": 21965 + }, + { + "epoch": 0.6031301482701812, + "grad_norm": 0.5103604793548584, + "learning_rate": 1.586281780366875e-05, + "loss": 0.6082, + "step": 21966 + }, + { + "epoch": 0.6031576057111477, + "grad_norm": 0.37775400280952454, + "learning_rate": 1.5862467918241366e-05, + "loss": 0.594, + "step": 21967 + }, + { + "epoch": 0.6031850631521142, + "grad_norm": 0.4842468500137329, + "learning_rate": 1.5862118021878605e-05, + "loss": 0.5558, + "step": 21968 + }, + { + "epoch": 0.6032125205930807, + "grad_norm": 0.3938913643360138, + "learning_rate": 1.5861768114581125e-05, + "loss": 0.5114, + "step": 21969 + }, + { + "epoch": 0.6032399780340473, + "grad_norm": 0.39458370208740234, + "learning_rate": 1.586141819634958e-05, + "loss": 0.4749, + "step": 21970 + }, + { + "epoch": 0.6032674354750137, + "grad_norm": 0.37340155243873596, + "learning_rate": 1.5861068267184612e-05, + "loss": 0.4646, + "step": 21971 + }, + { + "epoch": 0.6032948929159803, + "grad_norm": 0.30501580238342285, + "learning_rate": 1.5860718327086883e-05, + "loss": 0.3994, + "step": 21972 + }, + { + "epoch": 0.6033223503569467, + "grad_norm": 0.3760983943939209, + "learning_rate": 1.5860368376057045e-05, + "loss": 0.5045, + "step": 21973 + }, + { + "epoch": 0.6033498077979133, + "grad_norm": 0.3696975111961365, + "learning_rate": 1.586001841409575e-05, + "loss": 0.4375, + "step": 21974 + }, + { + "epoch": 0.6033772652388797, + "grad_norm": 0.35255134105682373, + "learning_rate": 1.5859668441203652e-05, + "loss": 0.567, + "step": 21975 + }, + { + "epoch": 0.6034047226798462, + "grad_norm": 0.3789617419242859, + "learning_rate": 1.58593184573814e-05, + "loss": 0.5169, + "step": 21976 + }, + { + "epoch": 0.6034321801208128, + "grad_norm": 0.36332494020462036, + "learning_rate": 1.585896846262965e-05, + "loss": 0.5084, + "step": 21977 + }, + { + "epoch": 0.6034596375617792, + "grad_norm": 0.41622650623321533, + "learning_rate": 1.5858618456949054e-05, + "loss": 0.4811, + "step": 21978 + }, + { + "epoch": 0.6034870950027458, + "grad_norm": 0.39039328694343567, + "learning_rate": 1.5858268440340262e-05, + "loss": 0.4848, + "step": 21979 + }, + { + "epoch": 0.6035145524437122, + "grad_norm": 0.38691970705986023, + "learning_rate": 1.585791841280393e-05, + "loss": 0.5486, + "step": 21980 + }, + { + "epoch": 0.6035420098846788, + "grad_norm": 0.42754843831062317, + "learning_rate": 1.5857568374340713e-05, + "loss": 0.4894, + "step": 21981 + }, + { + "epoch": 0.6035694673256452, + "grad_norm": 0.3838261067867279, + "learning_rate": 1.585721832495126e-05, + "loss": 0.6242, + "step": 21982 + }, + { + "epoch": 0.6035969247666118, + "grad_norm": 0.37268924713134766, + "learning_rate": 1.585686826463623e-05, + "loss": 0.4947, + "step": 21983 + }, + { + "epoch": 0.6036243822075783, + "grad_norm": 0.4403771162033081, + "learning_rate": 1.5856518193396266e-05, + "loss": 0.6128, + "step": 21984 + }, + { + "epoch": 0.6036518396485447, + "grad_norm": 0.36013492941856384, + "learning_rate": 1.5856168111232026e-05, + "loss": 0.5198, + "step": 21985 + }, + { + "epoch": 0.6036792970895113, + "grad_norm": 0.4674946963787079, + "learning_rate": 1.5855818018144165e-05, + "loss": 0.5743, + "step": 21986 + }, + { + "epoch": 0.6037067545304777, + "grad_norm": 0.4737405478954315, + "learning_rate": 1.5855467914133336e-05, + "loss": 0.4853, + "step": 21987 + }, + { + "epoch": 0.6037342119714443, + "grad_norm": 0.368693083524704, + "learning_rate": 1.5855117799200187e-05, + "loss": 0.5771, + "step": 21988 + }, + { + "epoch": 0.6037616694124107, + "grad_norm": 0.3832642138004303, + "learning_rate": 1.5854767673345378e-05, + "loss": 0.4598, + "step": 21989 + }, + { + "epoch": 0.6037891268533773, + "grad_norm": 0.4485381841659546, + "learning_rate": 1.5854417536569556e-05, + "loss": 0.5032, + "step": 21990 + }, + { + "epoch": 0.6038165842943438, + "grad_norm": 0.44682446122169495, + "learning_rate": 1.585406738887338e-05, + "loss": 0.5534, + "step": 21991 + }, + { + "epoch": 0.6038440417353103, + "grad_norm": 0.40228596329689026, + "learning_rate": 1.5853717230257498e-05, + "loss": 0.5806, + "step": 21992 + }, + { + "epoch": 0.6038714991762768, + "grad_norm": 0.3701048791408539, + "learning_rate": 1.5853367060722565e-05, + "loss": 0.5917, + "step": 21993 + }, + { + "epoch": 0.6038989566172432, + "grad_norm": 0.3823002576828003, + "learning_rate": 1.5853016880269235e-05, + "loss": 0.5331, + "step": 21994 + }, + { + "epoch": 0.6039264140582098, + "grad_norm": 0.41685935854911804, + "learning_rate": 1.585266668889816e-05, + "loss": 0.4843, + "step": 21995 + }, + { + "epoch": 0.6039538714991762, + "grad_norm": 0.38519036769866943, + "learning_rate": 1.585231648660999e-05, + "loss": 0.5083, + "step": 21996 + }, + { + "epoch": 0.6039813289401428, + "grad_norm": 0.39891815185546875, + "learning_rate": 1.5851966273405387e-05, + "loss": 0.4825, + "step": 21997 + }, + { + "epoch": 0.6040087863811093, + "grad_norm": 0.3900177776813507, + "learning_rate": 1.5851616049284998e-05, + "loss": 0.4841, + "step": 21998 + }, + { + "epoch": 0.6040362438220758, + "grad_norm": 0.38722583651542664, + "learning_rate": 1.585126581424948e-05, + "loss": 0.4266, + "step": 21999 + }, + { + "epoch": 0.6040637012630423, + "grad_norm": 0.43576669692993164, + "learning_rate": 1.585091556829948e-05, + "loss": 0.5808, + "step": 22000 + }, + { + "epoch": 0.6040911587040088, + "grad_norm": 0.4100823700428009, + "learning_rate": 1.5850565311435652e-05, + "loss": 0.4534, + "step": 22001 + }, + { + "epoch": 0.6041186161449753, + "grad_norm": 0.6824311017990112, + "learning_rate": 1.5850215043658657e-05, + "loss": 0.5233, + "step": 22002 + }, + { + "epoch": 0.6041460735859417, + "grad_norm": 0.39708805084228516, + "learning_rate": 1.5849864764969143e-05, + "loss": 0.5111, + "step": 22003 + }, + { + "epoch": 0.6041735310269083, + "grad_norm": 0.3641435503959656, + "learning_rate": 1.5849514475367764e-05, + "loss": 0.5045, + "step": 22004 + }, + { + "epoch": 0.6042009884678748, + "grad_norm": 0.34734851121902466, + "learning_rate": 1.5849164174855175e-05, + "loss": 0.5308, + "step": 22005 + }, + { + "epoch": 0.6042284459088413, + "grad_norm": 0.3554893732070923, + "learning_rate": 1.5848813863432026e-05, + "loss": 0.4376, + "step": 22006 + }, + { + "epoch": 0.6042559033498078, + "grad_norm": 0.39100903272628784, + "learning_rate": 1.5848463541098973e-05, + "loss": 0.5231, + "step": 22007 + }, + { + "epoch": 0.6042833607907743, + "grad_norm": 0.6587493419647217, + "learning_rate": 1.5848113207856668e-05, + "loss": 0.5219, + "step": 22008 + }, + { + "epoch": 0.6043108182317408, + "grad_norm": 0.3491673767566681, + "learning_rate": 1.5847762863705768e-05, + "loss": 0.5176, + "step": 22009 + }, + { + "epoch": 0.6043382756727073, + "grad_norm": 0.4434831142425537, + "learning_rate": 1.584741250864692e-05, + "loss": 0.5389, + "step": 22010 + }, + { + "epoch": 0.6043657331136738, + "grad_norm": 0.3851216733455658, + "learning_rate": 1.5847062142680784e-05, + "loss": 0.5481, + "step": 22011 + }, + { + "epoch": 0.6043931905546404, + "grad_norm": 0.5208058953285217, + "learning_rate": 1.584671176580801e-05, + "loss": 0.527, + "step": 22012 + }, + { + "epoch": 0.6044206479956068, + "grad_norm": 0.4112820625305176, + "learning_rate": 1.5846361378029254e-05, + "loss": 0.525, + "step": 22013 + }, + { + "epoch": 0.6044481054365733, + "grad_norm": 0.5114617347717285, + "learning_rate": 1.5846010979345166e-05, + "loss": 0.4873, + "step": 22014 + }, + { + "epoch": 0.6044755628775398, + "grad_norm": 0.4386422634124756, + "learning_rate": 1.5845660569756404e-05, + "loss": 0.4898, + "step": 22015 + }, + { + "epoch": 0.6045030203185063, + "grad_norm": 0.46579617261886597, + "learning_rate": 1.5845310149263617e-05, + "loss": 0.5227, + "step": 22016 + }, + { + "epoch": 0.6045304777594728, + "grad_norm": 0.4261919856071472, + "learning_rate": 1.5844959717867463e-05, + "loss": 0.5586, + "step": 22017 + }, + { + "epoch": 0.6045579352004393, + "grad_norm": 0.36521458625793457, + "learning_rate": 1.5844609275568592e-05, + "loss": 0.5602, + "step": 22018 + }, + { + "epoch": 0.6045853926414059, + "grad_norm": 0.40752655267715454, + "learning_rate": 1.5844258822367658e-05, + "loss": 0.4777, + "step": 22019 + }, + { + "epoch": 0.6046128500823723, + "grad_norm": 0.38092315196990967, + "learning_rate": 1.5843908358265322e-05, + "loss": 0.5912, + "step": 22020 + }, + { + "epoch": 0.6046403075233389, + "grad_norm": 0.350268691778183, + "learning_rate": 1.5843557883262224e-05, + "loss": 0.4893, + "step": 22021 + }, + { + "epoch": 0.6046677649643053, + "grad_norm": 0.42278265953063965, + "learning_rate": 1.5843207397359033e-05, + "loss": 0.5272, + "step": 22022 + }, + { + "epoch": 0.6046952224052718, + "grad_norm": 0.4499445855617523, + "learning_rate": 1.584285690055639e-05, + "loss": 0.4741, + "step": 22023 + }, + { + "epoch": 0.6047226798462383, + "grad_norm": 0.382244735956192, + "learning_rate": 1.5842506392854958e-05, + "loss": 0.5081, + "step": 22024 + }, + { + "epoch": 0.6047501372872048, + "grad_norm": 0.4162083566188812, + "learning_rate": 1.584215587425538e-05, + "loss": 0.4692, + "step": 22025 + }, + { + "epoch": 0.6047775947281714, + "grad_norm": 0.3337762653827667, + "learning_rate": 1.5841805344758325e-05, + "loss": 0.4367, + "step": 22026 + }, + { + "epoch": 0.6048050521691378, + "grad_norm": 0.39718976616859436, + "learning_rate": 1.5841454804364437e-05, + "loss": 0.558, + "step": 22027 + }, + { + "epoch": 0.6048325096101044, + "grad_norm": 0.38476407527923584, + "learning_rate": 1.5841104253074363e-05, + "loss": 0.479, + "step": 22028 + }, + { + "epoch": 0.6048599670510708, + "grad_norm": 0.4053555428981781, + "learning_rate": 1.5840753690888773e-05, + "loss": 0.5724, + "step": 22029 + }, + { + "epoch": 0.6048874244920374, + "grad_norm": 0.38143840432167053, + "learning_rate": 1.584040311780831e-05, + "loss": 0.502, + "step": 22030 + }, + { + "epoch": 0.6049148819330038, + "grad_norm": 0.44752082228660583, + "learning_rate": 1.5840052533833638e-05, + "loss": 0.6008, + "step": 22031 + }, + { + "epoch": 0.6049423393739704, + "grad_norm": 0.3775634467601776, + "learning_rate": 1.5839701938965397e-05, + "loss": 0.5143, + "step": 22032 + }, + { + "epoch": 0.6049697968149369, + "grad_norm": 0.34329158067703247, + "learning_rate": 1.583935133320425e-05, + "loss": 0.4434, + "step": 22033 + }, + { + "epoch": 0.6049972542559033, + "grad_norm": 0.38565322756767273, + "learning_rate": 1.583900071655085e-05, + "loss": 0.5364, + "step": 22034 + }, + { + "epoch": 0.6050247116968699, + "grad_norm": 0.385924369096756, + "learning_rate": 1.5838650089005847e-05, + "loss": 0.4859, + "step": 22035 + }, + { + "epoch": 0.6050521691378363, + "grad_norm": 0.465825617313385, + "learning_rate": 1.58382994505699e-05, + "loss": 0.5728, + "step": 22036 + }, + { + "epoch": 0.6050796265788029, + "grad_norm": 0.39024296402931213, + "learning_rate": 1.583794880124366e-05, + "loss": 0.5511, + "step": 22037 + }, + { + "epoch": 0.6051070840197693, + "grad_norm": 0.38440871238708496, + "learning_rate": 1.5837598141027784e-05, + "loss": 0.4558, + "step": 22038 + }, + { + "epoch": 0.6051345414607359, + "grad_norm": 0.386993944644928, + "learning_rate": 1.583724746992293e-05, + "loss": 0.5569, + "step": 22039 + }, + { + "epoch": 0.6051619989017024, + "grad_norm": 0.35573261976242065, + "learning_rate": 1.5836896787929736e-05, + "loss": 0.5213, + "step": 22040 + }, + { + "epoch": 0.6051894563426689, + "grad_norm": 0.3912200629711151, + "learning_rate": 1.5836546095048873e-05, + "loss": 0.5338, + "step": 22041 + }, + { + "epoch": 0.6052169137836354, + "grad_norm": 0.38286322355270386, + "learning_rate": 1.5836195391280984e-05, + "loss": 0.5278, + "step": 22042 + }, + { + "epoch": 0.6052443712246018, + "grad_norm": 0.3643689453601837, + "learning_rate": 1.5835844676626732e-05, + "loss": 0.509, + "step": 22043 + }, + { + "epoch": 0.6052718286655684, + "grad_norm": 0.39928707480430603, + "learning_rate": 1.5835493951086766e-05, + "loss": 0.461, + "step": 22044 + }, + { + "epoch": 0.6052992861065348, + "grad_norm": 0.34975969791412354, + "learning_rate": 1.583514321466174e-05, + "loss": 0.459, + "step": 22045 + }, + { + "epoch": 0.6053267435475014, + "grad_norm": 0.3743583559989929, + "learning_rate": 1.583479246735231e-05, + "loss": 0.4258, + "step": 22046 + }, + { + "epoch": 0.6053542009884679, + "grad_norm": 0.36621275544166565, + "learning_rate": 1.5834441709159132e-05, + "loss": 0.5417, + "step": 22047 + }, + { + "epoch": 0.6053816584294344, + "grad_norm": 0.39711233973503113, + "learning_rate": 1.5834090940082855e-05, + "loss": 0.5181, + "step": 22048 + }, + { + "epoch": 0.6054091158704009, + "grad_norm": 0.3767964243888855, + "learning_rate": 1.5833740160124138e-05, + "loss": 0.5255, + "step": 22049 + }, + { + "epoch": 0.6054365733113674, + "grad_norm": 0.38204848766326904, + "learning_rate": 1.5833389369283634e-05, + "loss": 0.4886, + "step": 22050 + }, + { + "epoch": 0.6054640307523339, + "grad_norm": 0.7063992619514465, + "learning_rate": 1.5833038567561995e-05, + "loss": 0.5643, + "step": 22051 + }, + { + "epoch": 0.6054914881933003, + "grad_norm": 0.3610214591026306, + "learning_rate": 1.583268775495988e-05, + "loss": 0.4851, + "step": 22052 + }, + { + "epoch": 0.6055189456342669, + "grad_norm": 0.38465631008148193, + "learning_rate": 1.5832336931477937e-05, + "loss": 0.4708, + "step": 22053 + }, + { + "epoch": 0.6055464030752334, + "grad_norm": 0.4002041518688202, + "learning_rate": 1.583198609711683e-05, + "loss": 0.4663, + "step": 22054 + }, + { + "epoch": 0.6055738605161999, + "grad_norm": 0.36461061239242554, + "learning_rate": 1.5831635251877205e-05, + "loss": 0.5334, + "step": 22055 + }, + { + "epoch": 0.6056013179571664, + "grad_norm": 0.3981207311153412, + "learning_rate": 1.5831284395759717e-05, + "loss": 0.4591, + "step": 22056 + }, + { + "epoch": 0.6056287753981329, + "grad_norm": 0.412582129240036, + "learning_rate": 1.583093352876502e-05, + "loss": 0.5127, + "step": 22057 + }, + { + "epoch": 0.6056562328390994, + "grad_norm": 0.38834890723228455, + "learning_rate": 1.583058265089378e-05, + "loss": 0.5061, + "step": 22058 + }, + { + "epoch": 0.6056836902800659, + "grad_norm": 0.45924457907676697, + "learning_rate": 1.5830231762146634e-05, + "loss": 0.5281, + "step": 22059 + }, + { + "epoch": 0.6057111477210324, + "grad_norm": 0.37035611271858215, + "learning_rate": 1.5829880862524252e-05, + "loss": 0.5188, + "step": 22060 + }, + { + "epoch": 0.605738605161999, + "grad_norm": 0.42507433891296387, + "learning_rate": 1.5829529952027276e-05, + "loss": 0.514, + "step": 22061 + }, + { + "epoch": 0.6057660626029654, + "grad_norm": 0.3672136962413788, + "learning_rate": 1.582917903065637e-05, + "loss": 0.5013, + "step": 22062 + }, + { + "epoch": 0.6057935200439319, + "grad_norm": 0.41776242852211, + "learning_rate": 1.582882809841218e-05, + "loss": 0.5143, + "step": 22063 + }, + { + "epoch": 0.6058209774848984, + "grad_norm": 0.36281895637512207, + "learning_rate": 1.5828477155295366e-05, + "loss": 0.5113, + "step": 22064 + }, + { + "epoch": 0.6058484349258649, + "grad_norm": 0.4372508227825165, + "learning_rate": 1.5828126201306585e-05, + "loss": 0.544, + "step": 22065 + }, + { + "epoch": 0.6058758923668314, + "grad_norm": 0.3605383038520813, + "learning_rate": 1.582777523644649e-05, + "loss": 0.4445, + "step": 22066 + }, + { + "epoch": 0.6059033498077979, + "grad_norm": 0.33509519696235657, + "learning_rate": 1.582742426071573e-05, + "loss": 0.5025, + "step": 22067 + }, + { + "epoch": 0.6059308072487645, + "grad_norm": 0.42036348581314087, + "learning_rate": 1.5827073274114965e-05, + "loss": 0.5581, + "step": 22068 + }, + { + "epoch": 0.6059582646897309, + "grad_norm": 0.4040825366973877, + "learning_rate": 1.582672227664485e-05, + "loss": 0.6065, + "step": 22069 + }, + { + "epoch": 0.6059857221306975, + "grad_norm": 0.48206230998039246, + "learning_rate": 1.5826371268306037e-05, + "loss": 0.5691, + "step": 22070 + }, + { + "epoch": 0.6060131795716639, + "grad_norm": 0.4056321680545807, + "learning_rate": 1.5826020249099178e-05, + "loss": 0.5332, + "step": 22071 + }, + { + "epoch": 0.6060406370126304, + "grad_norm": 0.3296893835067749, + "learning_rate": 1.5825669219024934e-05, + "loss": 0.4048, + "step": 22072 + }, + { + "epoch": 0.6060680944535969, + "grad_norm": 0.35816094279289246, + "learning_rate": 1.5825318178083963e-05, + "loss": 0.5036, + "step": 22073 + }, + { + "epoch": 0.6060955518945634, + "grad_norm": 0.3891911208629608, + "learning_rate": 1.582496712627691e-05, + "loss": 0.506, + "step": 22074 + }, + { + "epoch": 0.60612300933553, + "grad_norm": 0.4552501440048218, + "learning_rate": 1.5824616063604434e-05, + "loss": 0.5564, + "step": 22075 + }, + { + "epoch": 0.6061504667764964, + "grad_norm": 0.330242395401001, + "learning_rate": 1.582426499006719e-05, + "loss": 0.4454, + "step": 22076 + }, + { + "epoch": 0.606177924217463, + "grad_norm": 0.4871516525745392, + "learning_rate": 1.5823913905665837e-05, + "loss": 0.5412, + "step": 22077 + }, + { + "epoch": 0.6062053816584294, + "grad_norm": 0.5009018778800964, + "learning_rate": 1.582356281040102e-05, + "loss": 0.4972, + "step": 22078 + }, + { + "epoch": 0.606232839099396, + "grad_norm": 0.34702619910240173, + "learning_rate": 1.5823211704273402e-05, + "loss": 0.5153, + "step": 22079 + }, + { + "epoch": 0.6062602965403624, + "grad_norm": 0.3811483085155487, + "learning_rate": 1.5822860587283637e-05, + "loss": 0.4957, + "step": 22080 + }, + { + "epoch": 0.606287753981329, + "grad_norm": 0.4125136137008667, + "learning_rate": 1.582250945943238e-05, + "loss": 0.5517, + "step": 22081 + }, + { + "epoch": 0.6063152114222955, + "grad_norm": 0.3311814069747925, + "learning_rate": 1.582215832072028e-05, + "loss": 0.473, + "step": 22082 + }, + { + "epoch": 0.6063426688632619, + "grad_norm": 0.3878011405467987, + "learning_rate": 1.5821807171148e-05, + "loss": 0.4384, + "step": 22083 + }, + { + "epoch": 0.6063701263042285, + "grad_norm": 0.38572394847869873, + "learning_rate": 1.5821456010716187e-05, + "loss": 0.5004, + "step": 22084 + }, + { + "epoch": 0.6063975837451949, + "grad_norm": 0.3880552053451538, + "learning_rate": 1.5821104839425505e-05, + "loss": 0.5454, + "step": 22085 + }, + { + "epoch": 0.6064250411861615, + "grad_norm": 0.4241284430027008, + "learning_rate": 1.5820753657276606e-05, + "loss": 0.4663, + "step": 22086 + }, + { + "epoch": 0.6064524986271279, + "grad_norm": 0.37312987446784973, + "learning_rate": 1.5820402464270136e-05, + "loss": 0.6423, + "step": 22087 + }, + { + "epoch": 0.6064799560680945, + "grad_norm": 0.3681066036224365, + "learning_rate": 1.5820051260406765e-05, + "loss": 0.5087, + "step": 22088 + }, + { + "epoch": 0.606507413509061, + "grad_norm": 0.3662545084953308, + "learning_rate": 1.5819700045687135e-05, + "loss": 0.5006, + "step": 22089 + }, + { + "epoch": 0.6065348709500274, + "grad_norm": 0.4031451940536499, + "learning_rate": 1.5819348820111913e-05, + "loss": 0.5658, + "step": 22090 + }, + { + "epoch": 0.606562328390994, + "grad_norm": 0.34718087315559387, + "learning_rate": 1.5818997583681745e-05, + "loss": 0.4529, + "step": 22091 + }, + { + "epoch": 0.6065897858319604, + "grad_norm": 0.35310637950897217, + "learning_rate": 1.581864633639729e-05, + "loss": 0.528, + "step": 22092 + }, + { + "epoch": 0.606617243272927, + "grad_norm": 0.43954989314079285, + "learning_rate": 1.5818295078259204e-05, + "loss": 0.5546, + "step": 22093 + }, + { + "epoch": 0.6066447007138934, + "grad_norm": 0.514403760433197, + "learning_rate": 1.5817943809268137e-05, + "loss": 0.4346, + "step": 22094 + }, + { + "epoch": 0.60667215815486, + "grad_norm": 0.39188629388809204, + "learning_rate": 1.581759252942475e-05, + "loss": 0.5332, + "step": 22095 + }, + { + "epoch": 0.6066996155958264, + "grad_norm": 0.40558576583862305, + "learning_rate": 1.5817241238729698e-05, + "loss": 0.5214, + "step": 22096 + }, + { + "epoch": 0.606727073036793, + "grad_norm": 0.3398810625076294, + "learning_rate": 1.581688993718363e-05, + "loss": 0.4107, + "step": 22097 + }, + { + "epoch": 0.6067545304777595, + "grad_norm": 0.4979395270347595, + "learning_rate": 1.581653862478721e-05, + "loss": 0.5787, + "step": 22098 + }, + { + "epoch": 0.606781987918726, + "grad_norm": 0.3995693325996399, + "learning_rate": 1.5816187301541087e-05, + "loss": 0.4773, + "step": 22099 + }, + { + "epoch": 0.6068094453596925, + "grad_norm": 0.36908799409866333, + "learning_rate": 1.581583596744592e-05, + "loss": 0.5241, + "step": 22100 + }, + { + "epoch": 0.6068369028006589, + "grad_norm": 0.3589445650577545, + "learning_rate": 1.581548462250236e-05, + "loss": 0.4986, + "step": 22101 + }, + { + "epoch": 0.6068643602416255, + "grad_norm": 0.3230968415737152, + "learning_rate": 1.5815133266711065e-05, + "loss": 0.4185, + "step": 22102 + }, + { + "epoch": 0.6068918176825919, + "grad_norm": 0.41053497791290283, + "learning_rate": 1.5814781900072695e-05, + "loss": 0.5187, + "step": 22103 + }, + { + "epoch": 0.6069192751235585, + "grad_norm": 0.39029964804649353, + "learning_rate": 1.5814430522587894e-05, + "loss": 0.4453, + "step": 22104 + }, + { + "epoch": 0.606946732564525, + "grad_norm": 0.35968488454818726, + "learning_rate": 1.581407913425733e-05, + "loss": 0.5161, + "step": 22105 + }, + { + "epoch": 0.6069741900054915, + "grad_norm": 0.7116995453834534, + "learning_rate": 1.581372773508165e-05, + "loss": 0.5388, + "step": 22106 + }, + { + "epoch": 0.607001647446458, + "grad_norm": 0.39681312441825867, + "learning_rate": 1.5813376325061515e-05, + "loss": 0.5519, + "step": 22107 + }, + { + "epoch": 0.6070291048874245, + "grad_norm": 0.3241901099681854, + "learning_rate": 1.5813024904197573e-05, + "loss": 0.4224, + "step": 22108 + }, + { + "epoch": 0.607056562328391, + "grad_norm": 0.4172731339931488, + "learning_rate": 1.581267347249049e-05, + "loss": 0.5128, + "step": 22109 + }, + { + "epoch": 0.6070840197693574, + "grad_norm": 0.38281717896461487, + "learning_rate": 1.5812322029940915e-05, + "loss": 0.4974, + "step": 22110 + }, + { + "epoch": 0.607111477210324, + "grad_norm": 0.38229283690452576, + "learning_rate": 1.58119705765495e-05, + "loss": 0.4022, + "step": 22111 + }, + { + "epoch": 0.6071389346512905, + "grad_norm": 0.37011414766311646, + "learning_rate": 1.581161911231691e-05, + "loss": 0.561, + "step": 22112 + }, + { + "epoch": 0.607166392092257, + "grad_norm": 0.4037761092185974, + "learning_rate": 1.581126763724379e-05, + "loss": 0.4374, + "step": 22113 + }, + { + "epoch": 0.6071938495332235, + "grad_norm": 0.35760006308555603, + "learning_rate": 1.5810916151330803e-05, + "loss": 0.5416, + "step": 22114 + }, + { + "epoch": 0.60722130697419, + "grad_norm": 0.3770168721675873, + "learning_rate": 1.5810564654578607e-05, + "loss": 0.55, + "step": 22115 + }, + { + "epoch": 0.6072487644151565, + "grad_norm": 0.38601550459861755, + "learning_rate": 1.581021314698785e-05, + "loss": 0.488, + "step": 22116 + }, + { + "epoch": 0.607276221856123, + "grad_norm": 0.37698793411254883, + "learning_rate": 1.580986162855919e-05, + "loss": 0.5317, + "step": 22117 + }, + { + "epoch": 0.6073036792970895, + "grad_norm": 0.39780402183532715, + "learning_rate": 1.5809510099293284e-05, + "loss": 0.5391, + "step": 22118 + }, + { + "epoch": 0.607331136738056, + "grad_norm": 0.3784915506839752, + "learning_rate": 1.580915855919079e-05, + "loss": 0.463, + "step": 22119 + }, + { + "epoch": 0.6073585941790225, + "grad_norm": 0.3905419707298279, + "learning_rate": 1.5808807008252357e-05, + "loss": 0.3821, + "step": 22120 + }, + { + "epoch": 0.607386051619989, + "grad_norm": 0.3899904191493988, + "learning_rate": 1.580845544647865e-05, + "loss": 0.4019, + "step": 22121 + }, + { + "epoch": 0.6074135090609555, + "grad_norm": 0.4141240119934082, + "learning_rate": 1.5808103873870316e-05, + "loss": 0.4964, + "step": 22122 + }, + { + "epoch": 0.607440966501922, + "grad_norm": 0.3833721876144409, + "learning_rate": 1.5807752290428014e-05, + "loss": 0.491, + "step": 22123 + }, + { + "epoch": 0.6074684239428885, + "grad_norm": 0.42812344431877136, + "learning_rate": 1.5807400696152405e-05, + "loss": 0.5241, + "step": 22124 + }, + { + "epoch": 0.607495881383855, + "grad_norm": 0.3568246066570282, + "learning_rate": 1.5807049091044136e-05, + "loss": 0.5473, + "step": 22125 + }, + { + "epoch": 0.6075233388248216, + "grad_norm": 0.36504271626472473, + "learning_rate": 1.580669747510387e-05, + "loss": 0.4721, + "step": 22126 + }, + { + "epoch": 0.607550796265788, + "grad_norm": 0.689912736415863, + "learning_rate": 1.580634584833226e-05, + "loss": 0.6213, + "step": 22127 + }, + { + "epoch": 0.6075782537067546, + "grad_norm": 0.42413294315338135, + "learning_rate": 1.5805994210729957e-05, + "loss": 0.4986, + "step": 22128 + }, + { + "epoch": 0.607605711147721, + "grad_norm": 0.38781794905662537, + "learning_rate": 1.5805642562297626e-05, + "loss": 0.488, + "step": 22129 + }, + { + "epoch": 0.6076331685886875, + "grad_norm": 0.38299888372421265, + "learning_rate": 1.5805290903035918e-05, + "loss": 0.5014, + "step": 22130 + }, + { + "epoch": 0.607660626029654, + "grad_norm": 0.41099846363067627, + "learning_rate": 1.5804939232945487e-05, + "loss": 0.5192, + "step": 22131 + }, + { + "epoch": 0.6076880834706205, + "grad_norm": 0.3956263065338135, + "learning_rate": 1.5804587552026997e-05, + "loss": 0.5487, + "step": 22132 + }, + { + "epoch": 0.6077155409115871, + "grad_norm": 0.3573462665081024, + "learning_rate": 1.580423586028109e-05, + "loss": 0.4534, + "step": 22133 + }, + { + "epoch": 0.6077429983525535, + "grad_norm": 0.3751536011695862, + "learning_rate": 1.5803884157708437e-05, + "loss": 0.5375, + "step": 22134 + }, + { + "epoch": 0.6077704557935201, + "grad_norm": 0.35012564063072205, + "learning_rate": 1.5803532444309692e-05, + "loss": 0.4569, + "step": 22135 + }, + { + "epoch": 0.6077979132344865, + "grad_norm": 0.39495429396629333, + "learning_rate": 1.5803180720085497e-05, + "loss": 0.4655, + "step": 22136 + }, + { + "epoch": 0.6078253706754531, + "grad_norm": 0.4516630470752716, + "learning_rate": 1.5802828985036526e-05, + "loss": 0.4964, + "step": 22137 + }, + { + "epoch": 0.6078528281164195, + "grad_norm": 0.4064875543117523, + "learning_rate": 1.5802477239163422e-05, + "loss": 0.4325, + "step": 22138 + }, + { + "epoch": 0.607880285557386, + "grad_norm": 0.43938004970550537, + "learning_rate": 1.5802125482466847e-05, + "loss": 0.4949, + "step": 22139 + }, + { + "epoch": 0.6079077429983526, + "grad_norm": 0.3735351860523224, + "learning_rate": 1.5801773714947457e-05, + "loss": 0.4857, + "step": 22140 + }, + { + "epoch": 0.607935200439319, + "grad_norm": 0.37340858578681946, + "learning_rate": 1.5801421936605904e-05, + "loss": 0.5456, + "step": 22141 + }, + { + "epoch": 0.6079626578802856, + "grad_norm": 0.3619963228702545, + "learning_rate": 1.580107014744285e-05, + "loss": 0.4814, + "step": 22142 + }, + { + "epoch": 0.607990115321252, + "grad_norm": 0.4126189649105072, + "learning_rate": 1.580071834745895e-05, + "loss": 0.5151, + "step": 22143 + }, + { + "epoch": 0.6080175727622186, + "grad_norm": 0.329281747341156, + "learning_rate": 1.5800366536654857e-05, + "loss": 0.4687, + "step": 22144 + }, + { + "epoch": 0.608045030203185, + "grad_norm": 0.385412335395813, + "learning_rate": 1.580001471503123e-05, + "loss": 0.4565, + "step": 22145 + }, + { + "epoch": 0.6080724876441516, + "grad_norm": 0.3456641137599945, + "learning_rate": 1.5799662882588724e-05, + "loss": 0.5119, + "step": 22146 + }, + { + "epoch": 0.6080999450851181, + "grad_norm": 0.4667154848575592, + "learning_rate": 1.5799311039327997e-05, + "loss": 0.5187, + "step": 22147 + }, + { + "epoch": 0.6081274025260845, + "grad_norm": 0.45305895805358887, + "learning_rate": 1.5798959185249704e-05, + "loss": 0.5699, + "step": 22148 + }, + { + "epoch": 0.6081548599670511, + "grad_norm": 0.3830673098564148, + "learning_rate": 1.57986073203545e-05, + "loss": 0.5125, + "step": 22149 + }, + { + "epoch": 0.6081823174080175, + "grad_norm": 0.4098523259162903, + "learning_rate": 1.5798255444643042e-05, + "loss": 0.5614, + "step": 22150 + }, + { + "epoch": 0.6082097748489841, + "grad_norm": 0.4794161915779114, + "learning_rate": 1.579790355811599e-05, + "loss": 0.5462, + "step": 22151 + }, + { + "epoch": 0.6082372322899505, + "grad_norm": 0.36046695709228516, + "learning_rate": 1.5797551660773992e-05, + "loss": 0.453, + "step": 22152 + }, + { + "epoch": 0.6082646897309171, + "grad_norm": 0.4022983908653259, + "learning_rate": 1.5797199752617714e-05, + "loss": 0.5717, + "step": 22153 + }, + { + "epoch": 0.6082921471718836, + "grad_norm": 0.3415639400482178, + "learning_rate": 1.5796847833647807e-05, + "loss": 0.4571, + "step": 22154 + }, + { + "epoch": 0.6083196046128501, + "grad_norm": 0.39974719285964966, + "learning_rate": 1.579649590386493e-05, + "loss": 0.5214, + "step": 22155 + }, + { + "epoch": 0.6083470620538166, + "grad_norm": 0.4378117322921753, + "learning_rate": 1.5796143963269737e-05, + "loss": 0.5754, + "step": 22156 + }, + { + "epoch": 0.608374519494783, + "grad_norm": 0.383973628282547, + "learning_rate": 1.5795792011862886e-05, + "loss": 0.4821, + "step": 22157 + }, + { + "epoch": 0.6084019769357496, + "grad_norm": 0.34439054131507874, + "learning_rate": 1.5795440049645032e-05, + "loss": 0.5771, + "step": 22158 + }, + { + "epoch": 0.608429434376716, + "grad_norm": 0.38958635926246643, + "learning_rate": 1.579508807661683e-05, + "loss": 0.5647, + "step": 22159 + }, + { + "epoch": 0.6084568918176826, + "grad_norm": 0.35139337182044983, + "learning_rate": 1.5794736092778946e-05, + "loss": 0.5245, + "step": 22160 + }, + { + "epoch": 0.6084843492586491, + "grad_norm": 0.3289163112640381, + "learning_rate": 1.579438409813203e-05, + "loss": 0.4481, + "step": 22161 + }, + { + "epoch": 0.6085118066996156, + "grad_norm": 0.4133893847465515, + "learning_rate": 1.579403209267673e-05, + "loss": 0.4786, + "step": 22162 + }, + { + "epoch": 0.6085392641405821, + "grad_norm": 0.3905380964279175, + "learning_rate": 1.5793680076413718e-05, + "loss": 0.5899, + "step": 22163 + }, + { + "epoch": 0.6085667215815486, + "grad_norm": 0.35780879855155945, + "learning_rate": 1.5793328049343637e-05, + "loss": 0.4488, + "step": 22164 + }, + { + "epoch": 0.6085941790225151, + "grad_norm": 0.3628959059715271, + "learning_rate": 1.5792976011467156e-05, + "loss": 0.5132, + "step": 22165 + }, + { + "epoch": 0.6086216364634816, + "grad_norm": 0.35345855355262756, + "learning_rate": 1.5792623962784924e-05, + "loss": 0.4446, + "step": 22166 + }, + { + "epoch": 0.6086490939044481, + "grad_norm": 0.35759517550468445, + "learning_rate": 1.57922719032976e-05, + "loss": 0.467, + "step": 22167 + }, + { + "epoch": 0.6086765513454147, + "grad_norm": 0.41339898109436035, + "learning_rate": 1.579191983300584e-05, + "loss": 0.577, + "step": 22168 + }, + { + "epoch": 0.6087040087863811, + "grad_norm": 0.4117659628391266, + "learning_rate": 1.57915677519103e-05, + "loss": 0.5659, + "step": 22169 + }, + { + "epoch": 0.6087314662273476, + "grad_norm": 0.45280247926712036, + "learning_rate": 1.579121566001164e-05, + "loss": 0.5908, + "step": 22170 + }, + { + "epoch": 0.6087589236683141, + "grad_norm": 0.36044663190841675, + "learning_rate": 1.5790863557310512e-05, + "loss": 0.5338, + "step": 22171 + }, + { + "epoch": 0.6087863811092806, + "grad_norm": 0.34795480966567993, + "learning_rate": 1.5790511443807576e-05, + "loss": 0.5583, + "step": 22172 + }, + { + "epoch": 0.6088138385502471, + "grad_norm": 0.4134006202220917, + "learning_rate": 1.5790159319503485e-05, + "loss": 0.504, + "step": 22173 + }, + { + "epoch": 0.6088412959912136, + "grad_norm": 0.3408929705619812, + "learning_rate": 1.5789807184398905e-05, + "loss": 0.5709, + "step": 22174 + }, + { + "epoch": 0.6088687534321802, + "grad_norm": 0.36412474513053894, + "learning_rate": 1.578945503849448e-05, + "loss": 0.4595, + "step": 22175 + }, + { + "epoch": 0.6088962108731466, + "grad_norm": 0.3979083299636841, + "learning_rate": 1.578910288179088e-05, + "loss": 0.4969, + "step": 22176 + }, + { + "epoch": 0.6089236683141132, + "grad_norm": 0.3977890610694885, + "learning_rate": 1.5788750714288753e-05, + "loss": 0.4747, + "step": 22177 + }, + { + "epoch": 0.6089511257550796, + "grad_norm": 0.39019882678985596, + "learning_rate": 1.5788398535988758e-05, + "loss": 0.5, + "step": 22178 + }, + { + "epoch": 0.6089785831960461, + "grad_norm": 0.3748627007007599, + "learning_rate": 1.5788046346891553e-05, + "loss": 0.4674, + "step": 22179 + }, + { + "epoch": 0.6090060406370126, + "grad_norm": 0.3849509358406067, + "learning_rate": 1.5787694146997796e-05, + "loss": 0.4832, + "step": 22180 + }, + { + "epoch": 0.6090334980779791, + "grad_norm": 0.341423362493515, + "learning_rate": 1.5787341936308135e-05, + "loss": 0.525, + "step": 22181 + }, + { + "epoch": 0.6090609555189457, + "grad_norm": 0.38075128197669983, + "learning_rate": 1.5786989714823244e-05, + "loss": 0.45, + "step": 22182 + }, + { + "epoch": 0.6090884129599121, + "grad_norm": 0.3354419469833374, + "learning_rate": 1.578663748254376e-05, + "loss": 0.4371, + "step": 22183 + }, + { + "epoch": 0.6091158704008787, + "grad_norm": 0.37784838676452637, + "learning_rate": 1.5786285239470357e-05, + "loss": 0.4821, + "step": 22184 + }, + { + "epoch": 0.6091433278418451, + "grad_norm": 0.3513641953468323, + "learning_rate": 1.5785932985603683e-05, + "loss": 0.5098, + "step": 22185 + }, + { + "epoch": 0.6091707852828117, + "grad_norm": 0.40708065032958984, + "learning_rate": 1.57855807209444e-05, + "loss": 0.4362, + "step": 22186 + }, + { + "epoch": 0.6091982427237781, + "grad_norm": 0.404041588306427, + "learning_rate": 1.578522844549316e-05, + "loss": 0.4709, + "step": 22187 + }, + { + "epoch": 0.6092257001647446, + "grad_norm": 0.3751591444015503, + "learning_rate": 1.578487615925062e-05, + "loss": 0.5196, + "step": 22188 + }, + { + "epoch": 0.6092531576057112, + "grad_norm": 0.35765376687049866, + "learning_rate": 1.5784523862217443e-05, + "loss": 0.496, + "step": 22189 + }, + { + "epoch": 0.6092806150466776, + "grad_norm": 0.5689919590950012, + "learning_rate": 1.5784171554394283e-05, + "loss": 0.5224, + "step": 22190 + }, + { + "epoch": 0.6093080724876442, + "grad_norm": 0.34813812375068665, + "learning_rate": 1.5783819235781797e-05, + "loss": 0.4978, + "step": 22191 + }, + { + "epoch": 0.6093355299286106, + "grad_norm": 0.37208694219589233, + "learning_rate": 1.578346690638064e-05, + "loss": 0.4551, + "step": 22192 + }, + { + "epoch": 0.6093629873695772, + "grad_norm": 0.4006715714931488, + "learning_rate": 1.5783114566191472e-05, + "loss": 0.508, + "step": 22193 + }, + { + "epoch": 0.6093904448105436, + "grad_norm": 0.43344560265541077, + "learning_rate": 1.578276221521495e-05, + "loss": 0.616, + "step": 22194 + }, + { + "epoch": 0.6094179022515102, + "grad_norm": 0.4291875660419464, + "learning_rate": 1.5782409853451734e-05, + "loss": 0.5072, + "step": 22195 + }, + { + "epoch": 0.6094453596924767, + "grad_norm": 0.34838780760765076, + "learning_rate": 1.5782057480902473e-05, + "loss": 0.5197, + "step": 22196 + }, + { + "epoch": 0.6094728171334431, + "grad_norm": 0.38382917642593384, + "learning_rate": 1.5781705097567833e-05, + "loss": 0.4904, + "step": 22197 + }, + { + "epoch": 0.6095002745744097, + "grad_norm": 0.3871453106403351, + "learning_rate": 1.578135270344847e-05, + "loss": 0.5058, + "step": 22198 + }, + { + "epoch": 0.6095277320153761, + "grad_norm": 0.47653159499168396, + "learning_rate": 1.578100029854503e-05, + "loss": 0.577, + "step": 22199 + }, + { + "epoch": 0.6095551894563427, + "grad_norm": 0.36534708738327026, + "learning_rate": 1.578064788285819e-05, + "loss": 0.5429, + "step": 22200 + }, + { + "epoch": 0.6095826468973091, + "grad_norm": 0.3328348994255066, + "learning_rate": 1.5780295456388587e-05, + "loss": 0.471, + "step": 22201 + }, + { + "epoch": 0.6096101043382757, + "grad_norm": 0.396613746881485, + "learning_rate": 1.5779943019136896e-05, + "loss": 0.4894, + "step": 22202 + }, + { + "epoch": 0.6096375617792422, + "grad_norm": 0.3427937924861908, + "learning_rate": 1.577959057110376e-05, + "loss": 0.528, + "step": 22203 + }, + { + "epoch": 0.6096650192202087, + "grad_norm": 0.39479321241378784, + "learning_rate": 1.5779238112289847e-05, + "loss": 0.5505, + "step": 22204 + }, + { + "epoch": 0.6096924766611752, + "grad_norm": 0.41960492730140686, + "learning_rate": 1.577888564269581e-05, + "loss": 0.4446, + "step": 22205 + }, + { + "epoch": 0.6097199341021416, + "grad_norm": 0.34106117486953735, + "learning_rate": 1.5778533162322306e-05, + "loss": 0.5086, + "step": 22206 + }, + { + "epoch": 0.6097473915431082, + "grad_norm": 0.38357850909233093, + "learning_rate": 1.5778180671169994e-05, + "loss": 0.6167, + "step": 22207 + }, + { + "epoch": 0.6097748489840746, + "grad_norm": 0.3277190625667572, + "learning_rate": 1.5777828169239527e-05, + "loss": 0.4569, + "step": 22208 + }, + { + "epoch": 0.6098023064250412, + "grad_norm": 0.44710683822631836, + "learning_rate": 1.5777475656531572e-05, + "loss": 0.5501, + "step": 22209 + }, + { + "epoch": 0.6098297638660077, + "grad_norm": 0.46421998739242554, + "learning_rate": 1.577712313304678e-05, + "loss": 0.5072, + "step": 22210 + }, + { + "epoch": 0.6098572213069742, + "grad_norm": 0.3625626266002655, + "learning_rate": 1.5776770598785807e-05, + "loss": 0.5418, + "step": 22211 + }, + { + "epoch": 0.6098846787479407, + "grad_norm": 0.4489111006259918, + "learning_rate": 1.5776418053749315e-05, + "loss": 0.4951, + "step": 22212 + }, + { + "epoch": 0.6099121361889072, + "grad_norm": 0.34615063667297363, + "learning_rate": 1.577606549793796e-05, + "loss": 0.4332, + "step": 22213 + }, + { + "epoch": 0.6099395936298737, + "grad_norm": 0.4649277627468109, + "learning_rate": 1.57757129313524e-05, + "loss": 0.4734, + "step": 22214 + }, + { + "epoch": 0.6099670510708401, + "grad_norm": 0.35214611887931824, + "learning_rate": 1.577536035399329e-05, + "loss": 0.5238, + "step": 22215 + }, + { + "epoch": 0.6099945085118067, + "grad_norm": 0.395408570766449, + "learning_rate": 1.577500776586129e-05, + "loss": 0.5115, + "step": 22216 + }, + { + "epoch": 0.6100219659527732, + "grad_norm": 0.3464343547821045, + "learning_rate": 1.5774655166957055e-05, + "loss": 0.4399, + "step": 22217 + }, + { + "epoch": 0.6100494233937397, + "grad_norm": 0.4346863627433777, + "learning_rate": 1.5774302557281253e-05, + "loss": 0.5518, + "step": 22218 + }, + { + "epoch": 0.6100768808347062, + "grad_norm": 0.38884350657463074, + "learning_rate": 1.5773949936834525e-05, + "loss": 0.4731, + "step": 22219 + }, + { + "epoch": 0.6101043382756727, + "grad_norm": 0.39773014187812805, + "learning_rate": 1.577359730561754e-05, + "loss": 0.4282, + "step": 22220 + }, + { + "epoch": 0.6101317957166392, + "grad_norm": 0.4026013910770416, + "learning_rate": 1.5773244663630955e-05, + "loss": 0.5039, + "step": 22221 + }, + { + "epoch": 0.6101592531576057, + "grad_norm": 0.40135249495506287, + "learning_rate": 1.5772892010875426e-05, + "loss": 0.4933, + "step": 22222 + }, + { + "epoch": 0.6101867105985722, + "grad_norm": 0.37555697560310364, + "learning_rate": 1.577253934735161e-05, + "loss": 0.4589, + "step": 22223 + }, + { + "epoch": 0.6102141680395388, + "grad_norm": 0.40948793292045593, + "learning_rate": 1.5772186673060165e-05, + "loss": 0.5894, + "step": 22224 + }, + { + "epoch": 0.6102416254805052, + "grad_norm": 0.36204996705055237, + "learning_rate": 1.577183398800175e-05, + "loss": 0.5111, + "step": 22225 + }, + { + "epoch": 0.6102690829214718, + "grad_norm": 0.4689998924732208, + "learning_rate": 1.5771481292177023e-05, + "loss": 0.4118, + "step": 22226 + }, + { + "epoch": 0.6102965403624382, + "grad_norm": 0.36782726645469666, + "learning_rate": 1.5771128585586645e-05, + "loss": 0.5473, + "step": 22227 + }, + { + "epoch": 0.6103239978034047, + "grad_norm": 0.36295372247695923, + "learning_rate": 1.577077586823126e-05, + "loss": 0.5027, + "step": 22228 + }, + { + "epoch": 0.6103514552443712, + "grad_norm": 0.4023386538028717, + "learning_rate": 1.5770423140111545e-05, + "loss": 0.5306, + "step": 22229 + }, + { + "epoch": 0.6103789126853377, + "grad_norm": 0.3907145857810974, + "learning_rate": 1.577007040122815e-05, + "loss": 0.5521, + "step": 22230 + }, + { + "epoch": 0.6104063701263043, + "grad_norm": 0.38940373063087463, + "learning_rate": 1.576971765158173e-05, + "loss": 0.5148, + "step": 22231 + }, + { + "epoch": 0.6104338275672707, + "grad_norm": 0.34481024742126465, + "learning_rate": 1.5769364891172942e-05, + "loss": 0.473, + "step": 22232 + }, + { + "epoch": 0.6104612850082373, + "grad_norm": 0.4060300886631012, + "learning_rate": 1.5769012120002448e-05, + "loss": 0.5675, + "step": 22233 + }, + { + "epoch": 0.6104887424492037, + "grad_norm": 0.37747102975845337, + "learning_rate": 1.5768659338070905e-05, + "loss": 0.4409, + "step": 22234 + }, + { + "epoch": 0.6105161998901703, + "grad_norm": 0.361987829208374, + "learning_rate": 1.5768306545378974e-05, + "loss": 0.5299, + "step": 22235 + }, + { + "epoch": 0.6105436573311367, + "grad_norm": 0.3565824627876282, + "learning_rate": 1.5767953741927312e-05, + "loss": 0.4851, + "step": 22236 + }, + { + "epoch": 0.6105711147721032, + "grad_norm": 0.36538293957710266, + "learning_rate": 1.5767600927716567e-05, + "loss": 0.5194, + "step": 22237 + }, + { + "epoch": 0.6105985722130698, + "grad_norm": 0.3875940442085266, + "learning_rate": 1.5767248102747416e-05, + "loss": 0.5036, + "step": 22238 + }, + { + "epoch": 0.6106260296540362, + "grad_norm": 0.3986596167087555, + "learning_rate": 1.57668952670205e-05, + "loss": 0.5963, + "step": 22239 + }, + { + "epoch": 0.6106534870950028, + "grad_norm": 0.39053529500961304, + "learning_rate": 1.5766542420536483e-05, + "loss": 0.5292, + "step": 22240 + }, + { + "epoch": 0.6106809445359692, + "grad_norm": 0.36083224415779114, + "learning_rate": 1.5766189563296027e-05, + "loss": 0.5147, + "step": 22241 + }, + { + "epoch": 0.6107084019769358, + "grad_norm": 0.47257012128829956, + "learning_rate": 1.5765836695299784e-05, + "loss": 0.4962, + "step": 22242 + }, + { + "epoch": 0.6107358594179022, + "grad_norm": 0.36781129240989685, + "learning_rate": 1.5765483816548418e-05, + "loss": 0.5305, + "step": 22243 + }, + { + "epoch": 0.6107633168588688, + "grad_norm": 0.40821731090545654, + "learning_rate": 1.5765130927042586e-05, + "loss": 0.5214, + "step": 22244 + }, + { + "epoch": 0.6107907742998353, + "grad_norm": 0.39149895310401917, + "learning_rate": 1.5764778026782943e-05, + "loss": 0.5778, + "step": 22245 + }, + { + "epoch": 0.6108182317408017, + "grad_norm": 0.38333266973495483, + "learning_rate": 1.576442511577015e-05, + "loss": 0.4452, + "step": 22246 + }, + { + "epoch": 0.6108456891817683, + "grad_norm": 0.3765413761138916, + "learning_rate": 1.576407219400486e-05, + "loss": 0.5188, + "step": 22247 + }, + { + "epoch": 0.6108731466227347, + "grad_norm": 0.3660796880722046, + "learning_rate": 1.576371926148774e-05, + "loss": 0.4522, + "step": 22248 + }, + { + "epoch": 0.6109006040637013, + "grad_norm": 0.3793901801109314, + "learning_rate": 1.5763366318219446e-05, + "loss": 0.5143, + "step": 22249 + }, + { + "epoch": 0.6109280615046677, + "grad_norm": 0.35793235898017883, + "learning_rate": 1.5763013364200627e-05, + "loss": 0.4964, + "step": 22250 + }, + { + "epoch": 0.6109555189456343, + "grad_norm": 0.3555724322795868, + "learning_rate": 1.5762660399431954e-05, + "loss": 0.4559, + "step": 22251 + }, + { + "epoch": 0.6109829763866008, + "grad_norm": 0.4238298535346985, + "learning_rate": 1.5762307423914077e-05, + "loss": 0.5361, + "step": 22252 + }, + { + "epoch": 0.6110104338275673, + "grad_norm": 0.3470783829689026, + "learning_rate": 1.5761954437647663e-05, + "loss": 0.4617, + "step": 22253 + }, + { + "epoch": 0.6110378912685338, + "grad_norm": 0.3909376561641693, + "learning_rate": 1.5761601440633364e-05, + "loss": 0.5821, + "step": 22254 + }, + { + "epoch": 0.6110653487095002, + "grad_norm": 0.4402157962322235, + "learning_rate": 1.5761248432871833e-05, + "loss": 0.4733, + "step": 22255 + }, + { + "epoch": 0.6110928061504668, + "grad_norm": 0.37749382853507996, + "learning_rate": 1.576089541436374e-05, + "loss": 0.4704, + "step": 22256 + }, + { + "epoch": 0.6111202635914332, + "grad_norm": 0.36298874020576477, + "learning_rate": 1.5760542385109737e-05, + "loss": 0.4699, + "step": 22257 + }, + { + "epoch": 0.6111477210323998, + "grad_norm": 0.3920440673828125, + "learning_rate": 1.5760189345110485e-05, + "loss": 0.5625, + "step": 22258 + }, + { + "epoch": 0.6111751784733663, + "grad_norm": 0.37410855293273926, + "learning_rate": 1.5759836294366642e-05, + "loss": 0.4254, + "step": 22259 + }, + { + "epoch": 0.6112026359143328, + "grad_norm": 0.40716034173965454, + "learning_rate": 1.5759483232878862e-05, + "loss": 0.5665, + "step": 22260 + }, + { + "epoch": 0.6112300933552993, + "grad_norm": 0.42992883920669556, + "learning_rate": 1.575913016064781e-05, + "loss": 0.5012, + "step": 22261 + }, + { + "epoch": 0.6112575507962658, + "grad_norm": 0.33926594257354736, + "learning_rate": 1.5758777077674144e-05, + "loss": 0.4646, + "step": 22262 + }, + { + "epoch": 0.6112850082372323, + "grad_norm": 0.40045300126075745, + "learning_rate": 1.5758423983958518e-05, + "loss": 0.5881, + "step": 22263 + }, + { + "epoch": 0.6113124656781987, + "grad_norm": 0.41343575716018677, + "learning_rate": 1.5758070879501596e-05, + "loss": 0.5471, + "step": 22264 + }, + { + "epoch": 0.6113399231191653, + "grad_norm": 0.3798188269138336, + "learning_rate": 1.5757717764304032e-05, + "loss": 0.4616, + "step": 22265 + }, + { + "epoch": 0.6113673805601318, + "grad_norm": 0.329255074262619, + "learning_rate": 1.5757364638366487e-05, + "loss": 0.4039, + "step": 22266 + }, + { + "epoch": 0.6113948380010983, + "grad_norm": 0.42243093252182007, + "learning_rate": 1.5757011501689617e-05, + "loss": 0.503, + "step": 22267 + }, + { + "epoch": 0.6114222954420648, + "grad_norm": 0.36034345626831055, + "learning_rate": 1.5756658354274084e-05, + "loss": 0.4579, + "step": 22268 + }, + { + "epoch": 0.6114497528830313, + "grad_norm": 0.3666810095310211, + "learning_rate": 1.5756305196120552e-05, + "loss": 0.4894, + "step": 22269 + }, + { + "epoch": 0.6114772103239978, + "grad_norm": 0.4027080833911896, + "learning_rate": 1.5755952027229668e-05, + "loss": 0.5178, + "step": 22270 + }, + { + "epoch": 0.6115046677649643, + "grad_norm": 0.4269193112850189, + "learning_rate": 1.57555988476021e-05, + "loss": 0.5645, + "step": 22271 + }, + { + "epoch": 0.6115321252059308, + "grad_norm": 0.41972389817237854, + "learning_rate": 1.57552456572385e-05, + "loss": 0.5585, + "step": 22272 + }, + { + "epoch": 0.6115595826468974, + "grad_norm": 0.35306113958358765, + "learning_rate": 1.575489245613953e-05, + "loss": 0.5372, + "step": 22273 + }, + { + "epoch": 0.6115870400878638, + "grad_norm": 0.36466968059539795, + "learning_rate": 1.575453924430585e-05, + "loss": 0.4901, + "step": 22274 + }, + { + "epoch": 0.6116144975288303, + "grad_norm": 0.37586653232574463, + "learning_rate": 1.575418602173812e-05, + "loss": 0.4668, + "step": 22275 + }, + { + "epoch": 0.6116419549697968, + "grad_norm": 0.39249420166015625, + "learning_rate": 1.575383278843699e-05, + "loss": 0.5396, + "step": 22276 + }, + { + "epoch": 0.6116694124107633, + "grad_norm": 0.4205707609653473, + "learning_rate": 1.575347954440313e-05, + "loss": 0.5576, + "step": 22277 + }, + { + "epoch": 0.6116968698517298, + "grad_norm": 0.5073122978210449, + "learning_rate": 1.575312628963719e-05, + "loss": 0.5255, + "step": 22278 + }, + { + "epoch": 0.6117243272926963, + "grad_norm": 0.36328601837158203, + "learning_rate": 1.575277302413984e-05, + "loss": 0.5162, + "step": 22279 + }, + { + "epoch": 0.6117517847336629, + "grad_norm": 0.35701146721839905, + "learning_rate": 1.575241974791173e-05, + "loss": 0.4431, + "step": 22280 + }, + { + "epoch": 0.6117792421746293, + "grad_norm": 0.41955891251564026, + "learning_rate": 1.5752066460953522e-05, + "loss": 0.5381, + "step": 22281 + }, + { + "epoch": 0.6118066996155959, + "grad_norm": 0.332657128572464, + "learning_rate": 1.575171316326587e-05, + "loss": 0.5222, + "step": 22282 + }, + { + "epoch": 0.6118341570565623, + "grad_norm": 0.37133780121803284, + "learning_rate": 1.575135985484944e-05, + "loss": 0.4955, + "step": 22283 + }, + { + "epoch": 0.6118616144975288, + "grad_norm": 0.3627047836780548, + "learning_rate": 1.5751006535704888e-05, + "loss": 0.4814, + "step": 22284 + }, + { + "epoch": 0.6118890719384953, + "grad_norm": 0.35918405652046204, + "learning_rate": 1.5750653205832875e-05, + "loss": 0.5084, + "step": 22285 + }, + { + "epoch": 0.6119165293794618, + "grad_norm": 0.3813706636428833, + "learning_rate": 1.5750299865234057e-05, + "loss": 0.5229, + "step": 22286 + }, + { + "epoch": 0.6119439868204284, + "grad_norm": 0.3598233461380005, + "learning_rate": 1.574994651390909e-05, + "loss": 0.4296, + "step": 22287 + }, + { + "epoch": 0.6119714442613948, + "grad_norm": 0.3709660470485687, + "learning_rate": 1.5749593151858645e-05, + "loss": 0.4895, + "step": 22288 + }, + { + "epoch": 0.6119989017023614, + "grad_norm": 0.3715469539165497, + "learning_rate": 1.574923977908337e-05, + "loss": 0.5333, + "step": 22289 + }, + { + "epoch": 0.6120263591433278, + "grad_norm": 0.3820526599884033, + "learning_rate": 1.5748886395583925e-05, + "loss": 0.5229, + "step": 22290 + }, + { + "epoch": 0.6120538165842944, + "grad_norm": 0.39448776841163635, + "learning_rate": 1.574853300136098e-05, + "loss": 0.5233, + "step": 22291 + }, + { + "epoch": 0.6120812740252608, + "grad_norm": 0.42508170008659363, + "learning_rate": 1.574817959641518e-05, + "loss": 0.5382, + "step": 22292 + }, + { + "epoch": 0.6121087314662274, + "grad_norm": 0.4328725337982178, + "learning_rate": 1.574782618074719e-05, + "loss": 0.5214, + "step": 22293 + }, + { + "epoch": 0.6121361889071939, + "grad_norm": 0.37644755840301514, + "learning_rate": 1.5747472754357675e-05, + "loss": 0.479, + "step": 22294 + }, + { + "epoch": 0.6121636463481603, + "grad_norm": 0.44581642746925354, + "learning_rate": 1.5747119317247282e-05, + "loss": 0.5448, + "step": 22295 + }, + { + "epoch": 0.6121911037891269, + "grad_norm": 0.3883346617221832, + "learning_rate": 1.5746765869416685e-05, + "loss": 0.482, + "step": 22296 + }, + { + "epoch": 0.6122185612300933, + "grad_norm": 0.3205782473087311, + "learning_rate": 1.574641241086653e-05, + "loss": 0.4533, + "step": 22297 + }, + { + "epoch": 0.6122460186710599, + "grad_norm": 0.47209155559539795, + "learning_rate": 1.574605894159748e-05, + "loss": 0.4861, + "step": 22298 + }, + { + "epoch": 0.6122734761120263, + "grad_norm": 0.3832240402698517, + "learning_rate": 1.5745705461610203e-05, + "loss": 0.5489, + "step": 22299 + }, + { + "epoch": 0.6123009335529929, + "grad_norm": 0.40516626834869385, + "learning_rate": 1.574535197090535e-05, + "loss": 0.5339, + "step": 22300 + }, + { + "epoch": 0.6123283909939594, + "grad_norm": 0.35815978050231934, + "learning_rate": 1.5744998469483576e-05, + "loss": 0.4997, + "step": 22301 + }, + { + "epoch": 0.6123558484349259, + "grad_norm": 0.335467666387558, + "learning_rate": 1.5744644957345553e-05, + "loss": 0.4236, + "step": 22302 + }, + { + "epoch": 0.6123833058758924, + "grad_norm": 0.3941707909107208, + "learning_rate": 1.574429143449193e-05, + "loss": 0.5176, + "step": 22303 + }, + { + "epoch": 0.6124107633168588, + "grad_norm": 0.31672507524490356, + "learning_rate": 1.5743937900923367e-05, + "loss": 0.4962, + "step": 22304 + }, + { + "epoch": 0.6124382207578254, + "grad_norm": 0.3727671504020691, + "learning_rate": 1.574358435664053e-05, + "loss": 0.4285, + "step": 22305 + }, + { + "epoch": 0.6124656781987918, + "grad_norm": 0.36487576365470886, + "learning_rate": 1.5743230801644078e-05, + "loss": 0.5583, + "step": 22306 + }, + { + "epoch": 0.6124931356397584, + "grad_norm": 0.40188220143318176, + "learning_rate": 1.5742877235934666e-05, + "loss": 0.5515, + "step": 22307 + }, + { + "epoch": 0.6125205930807249, + "grad_norm": 0.35644564032554626, + "learning_rate": 1.574252365951295e-05, + "loss": 0.4323, + "step": 22308 + }, + { + "epoch": 0.6125480505216914, + "grad_norm": 0.3337382376194, + "learning_rate": 1.57421700723796e-05, + "loss": 0.482, + "step": 22309 + }, + { + "epoch": 0.6125755079626579, + "grad_norm": 0.35368168354034424, + "learning_rate": 1.574181647453527e-05, + "loss": 0.4247, + "step": 22310 + }, + { + "epoch": 0.6126029654036244, + "grad_norm": 0.39114248752593994, + "learning_rate": 1.5741462865980618e-05, + "loss": 0.5189, + "step": 22311 + }, + { + "epoch": 0.6126304228445909, + "grad_norm": 0.3684851825237274, + "learning_rate": 1.5741109246716305e-05, + "loss": 0.4586, + "step": 22312 + }, + { + "epoch": 0.6126578802855573, + "grad_norm": 0.350041002035141, + "learning_rate": 1.5740755616742992e-05, + "loss": 0.5223, + "step": 22313 + }, + { + "epoch": 0.6126853377265239, + "grad_norm": 0.40297773480415344, + "learning_rate": 1.5740401976061336e-05, + "loss": 0.5817, + "step": 22314 + }, + { + "epoch": 0.6127127951674904, + "grad_norm": 0.3800107538700104, + "learning_rate": 1.5740048324672e-05, + "loss": 0.4833, + "step": 22315 + }, + { + "epoch": 0.6127402526084569, + "grad_norm": 0.3722301721572876, + "learning_rate": 1.573969466257564e-05, + "loss": 0.4138, + "step": 22316 + }, + { + "epoch": 0.6127677100494234, + "grad_norm": 0.3753984272480011, + "learning_rate": 1.573934098977292e-05, + "loss": 0.5311, + "step": 22317 + }, + { + "epoch": 0.6127951674903899, + "grad_norm": 0.3875497579574585, + "learning_rate": 1.57389873062645e-05, + "loss": 0.4596, + "step": 22318 + }, + { + "epoch": 0.6128226249313564, + "grad_norm": 0.3613750636577606, + "learning_rate": 1.5738633612051028e-05, + "loss": 0.4427, + "step": 22319 + }, + { + "epoch": 0.6128500823723229, + "grad_norm": 0.35349443554878235, + "learning_rate": 1.5738279907133178e-05, + "loss": 0.5474, + "step": 22320 + }, + { + "epoch": 0.6128775398132894, + "grad_norm": 0.3991449177265167, + "learning_rate": 1.5737926191511607e-05, + "loss": 0.5047, + "step": 22321 + }, + { + "epoch": 0.612904997254256, + "grad_norm": 0.32441431283950806, + "learning_rate": 1.5737572465186968e-05, + "loss": 0.4505, + "step": 22322 + }, + { + "epoch": 0.6129324546952224, + "grad_norm": 0.44071316719055176, + "learning_rate": 1.5737218728159925e-05, + "loss": 0.5769, + "step": 22323 + }, + { + "epoch": 0.612959912136189, + "grad_norm": 0.412457674741745, + "learning_rate": 1.5736864980431143e-05, + "loss": 0.4911, + "step": 22324 + }, + { + "epoch": 0.6129873695771554, + "grad_norm": 0.3591390550136566, + "learning_rate": 1.573651122200127e-05, + "loss": 0.496, + "step": 22325 + }, + { + "epoch": 0.6130148270181219, + "grad_norm": 0.3497498631477356, + "learning_rate": 1.5736157452870976e-05, + "loss": 0.482, + "step": 22326 + }, + { + "epoch": 0.6130422844590884, + "grad_norm": 0.38614487648010254, + "learning_rate": 1.573580367304092e-05, + "loss": 0.4969, + "step": 22327 + }, + { + "epoch": 0.6130697419000549, + "grad_norm": 0.36253419518470764, + "learning_rate": 1.5735449882511758e-05, + "loss": 0.5635, + "step": 22328 + }, + { + "epoch": 0.6130971993410215, + "grad_norm": 0.3599514961242676, + "learning_rate": 1.5735096081284148e-05, + "loss": 0.4987, + "step": 22329 + }, + { + "epoch": 0.6131246567819879, + "grad_norm": 0.34198230504989624, + "learning_rate": 1.5734742269358756e-05, + "loss": 0.5161, + "step": 22330 + }, + { + "epoch": 0.6131521142229545, + "grad_norm": 0.3852802515029907, + "learning_rate": 1.5734388446736242e-05, + "loss": 0.5353, + "step": 22331 + }, + { + "epoch": 0.6131795716639209, + "grad_norm": 0.36459124088287354, + "learning_rate": 1.573403461341726e-05, + "loss": 0.5221, + "step": 22332 + }, + { + "epoch": 0.6132070291048874, + "grad_norm": 0.4692228138446808, + "learning_rate": 1.5733680769402476e-05, + "loss": 0.5487, + "step": 22333 + }, + { + "epoch": 0.6132344865458539, + "grad_norm": 0.37739425897598267, + "learning_rate": 1.5733326914692545e-05, + "loss": 0.558, + "step": 22334 + }, + { + "epoch": 0.6132619439868204, + "grad_norm": 0.35902783274650574, + "learning_rate": 1.573297304928813e-05, + "loss": 0.5286, + "step": 22335 + }, + { + "epoch": 0.613289401427787, + "grad_norm": 0.37758535146713257, + "learning_rate": 1.573261917318989e-05, + "loss": 0.4398, + "step": 22336 + }, + { + "epoch": 0.6133168588687534, + "grad_norm": 0.4123183786869049, + "learning_rate": 1.5732265286398485e-05, + "loss": 0.4743, + "step": 22337 + }, + { + "epoch": 0.61334431630972, + "grad_norm": 0.39421606063842773, + "learning_rate": 1.5731911388914576e-05, + "loss": 0.5659, + "step": 22338 + }, + { + "epoch": 0.6133717737506864, + "grad_norm": 0.43935301899909973, + "learning_rate": 1.5731557480738824e-05, + "loss": 0.5511, + "step": 22339 + }, + { + "epoch": 0.613399231191653, + "grad_norm": 0.38147106766700745, + "learning_rate": 1.5731203561871887e-05, + "loss": 0.456, + "step": 22340 + }, + { + "epoch": 0.6134266886326194, + "grad_norm": 0.3597048819065094, + "learning_rate": 1.5730849632314428e-05, + "loss": 0.4967, + "step": 22341 + }, + { + "epoch": 0.613454146073586, + "grad_norm": 0.3484095633029938, + "learning_rate": 1.5730495692067105e-05, + "loss": 0.5002, + "step": 22342 + }, + { + "epoch": 0.6134816035145525, + "grad_norm": 0.3630552887916565, + "learning_rate": 1.5730141741130575e-05, + "loss": 0.4332, + "step": 22343 + }, + { + "epoch": 0.6135090609555189, + "grad_norm": 0.3765169084072113, + "learning_rate": 1.5729787779505506e-05, + "loss": 0.5472, + "step": 22344 + }, + { + "epoch": 0.6135365183964855, + "grad_norm": 0.3400070071220398, + "learning_rate": 1.5729433807192552e-05, + "loss": 0.4717, + "step": 22345 + }, + { + "epoch": 0.6135639758374519, + "grad_norm": 0.42126747965812683, + "learning_rate": 1.5729079824192376e-05, + "loss": 0.5233, + "step": 22346 + }, + { + "epoch": 0.6135914332784185, + "grad_norm": 0.35679954290390015, + "learning_rate": 1.572872583050564e-05, + "loss": 0.5003, + "step": 22347 + }, + { + "epoch": 0.6136188907193849, + "grad_norm": 0.3954769968986511, + "learning_rate": 1.5728371826132996e-05, + "loss": 0.5737, + "step": 22348 + }, + { + "epoch": 0.6136463481603515, + "grad_norm": 0.3443745970726013, + "learning_rate": 1.5728017811075114e-05, + "loss": 0.4906, + "step": 22349 + }, + { + "epoch": 0.613673805601318, + "grad_norm": 0.3985420763492584, + "learning_rate": 1.572766378533265e-05, + "loss": 0.5053, + "step": 22350 + }, + { + "epoch": 0.6137012630422845, + "grad_norm": 0.37189149856567383, + "learning_rate": 1.5727309748906266e-05, + "loss": 0.5204, + "step": 22351 + }, + { + "epoch": 0.613728720483251, + "grad_norm": 0.3315165638923645, + "learning_rate": 1.5726955701796623e-05, + "loss": 0.4516, + "step": 22352 + }, + { + "epoch": 0.6137561779242174, + "grad_norm": 0.357103168964386, + "learning_rate": 1.5726601644004373e-05, + "loss": 0.4501, + "step": 22353 + }, + { + "epoch": 0.613783635365184, + "grad_norm": 0.37696170806884766, + "learning_rate": 1.572624757553019e-05, + "loss": 0.5775, + "step": 22354 + }, + { + "epoch": 0.6138110928061504, + "grad_norm": 0.33919617533683777, + "learning_rate": 1.5725893496374726e-05, + "loss": 0.488, + "step": 22355 + }, + { + "epoch": 0.613838550247117, + "grad_norm": 0.5796383023262024, + "learning_rate": 1.572553940653864e-05, + "loss": 0.5575, + "step": 22356 + }, + { + "epoch": 0.6138660076880835, + "grad_norm": 0.4033207893371582, + "learning_rate": 1.57251853060226e-05, + "loss": 0.4949, + "step": 22357 + }, + { + "epoch": 0.61389346512905, + "grad_norm": 0.3770495355129242, + "learning_rate": 1.5724831194827258e-05, + "loss": 0.4207, + "step": 22358 + }, + { + "epoch": 0.6139209225700165, + "grad_norm": 0.3960598111152649, + "learning_rate": 1.5724477072953283e-05, + "loss": 0.5292, + "step": 22359 + }, + { + "epoch": 0.613948380010983, + "grad_norm": 0.3861221671104431, + "learning_rate": 1.572412294040133e-05, + "loss": 0.5043, + "step": 22360 + }, + { + "epoch": 0.6139758374519495, + "grad_norm": 0.3493136465549469, + "learning_rate": 1.5723768797172057e-05, + "loss": 0.4537, + "step": 22361 + }, + { + "epoch": 0.6140032948929159, + "grad_norm": 0.37213313579559326, + "learning_rate": 1.5723414643266135e-05, + "loss": 0.4073, + "step": 22362 + }, + { + "epoch": 0.6140307523338825, + "grad_norm": 0.3684461712837219, + "learning_rate": 1.572306047868421e-05, + "loss": 0.5287, + "step": 22363 + }, + { + "epoch": 0.6140582097748489, + "grad_norm": 0.3765597641468048, + "learning_rate": 1.5722706303426955e-05, + "loss": 0.4715, + "step": 22364 + }, + { + "epoch": 0.6140856672158155, + "grad_norm": 0.36326783895492554, + "learning_rate": 1.572235211749503e-05, + "loss": 0.4601, + "step": 22365 + }, + { + "epoch": 0.614113124656782, + "grad_norm": 0.4628138542175293, + "learning_rate": 1.572199792088909e-05, + "loss": 0.4352, + "step": 22366 + }, + { + "epoch": 0.6141405820977485, + "grad_norm": 0.4248127043247223, + "learning_rate": 1.5721643713609794e-05, + "loss": 0.4223, + "step": 22367 + }, + { + "epoch": 0.614168039538715, + "grad_norm": 0.3917737901210785, + "learning_rate": 1.5721289495657807e-05, + "loss": 0.4955, + "step": 22368 + }, + { + "epoch": 0.6141954969796815, + "grad_norm": 0.389168381690979, + "learning_rate": 1.572093526703379e-05, + "loss": 0.51, + "step": 22369 + }, + { + "epoch": 0.614222954420648, + "grad_norm": 0.45336952805519104, + "learning_rate": 1.5720581027738402e-05, + "loss": 0.4563, + "step": 22370 + }, + { + "epoch": 0.6142504118616144, + "grad_norm": 0.36962857842445374, + "learning_rate": 1.5720226777772303e-05, + "loss": 0.4596, + "step": 22371 + }, + { + "epoch": 0.614277869302581, + "grad_norm": 0.4024910628795624, + "learning_rate": 1.5719872517136157e-05, + "loss": 0.4769, + "step": 22372 + }, + { + "epoch": 0.6143053267435475, + "grad_norm": 0.3402158319950104, + "learning_rate": 1.571951824583063e-05, + "loss": 0.4262, + "step": 22373 + }, + { + "epoch": 0.614332784184514, + "grad_norm": 0.34194216132164, + "learning_rate": 1.5719163963856367e-05, + "loss": 0.445, + "step": 22374 + }, + { + "epoch": 0.6143602416254805, + "grad_norm": 0.36392393708229065, + "learning_rate": 1.571880967121404e-05, + "loss": 0.5716, + "step": 22375 + }, + { + "epoch": 0.614387699066447, + "grad_norm": 0.3907800316810608, + "learning_rate": 1.571845536790431e-05, + "loss": 0.4888, + "step": 22376 + }, + { + "epoch": 0.6144151565074135, + "grad_norm": 0.4176240861415863, + "learning_rate": 1.5718101053927834e-05, + "loss": 0.4936, + "step": 22377 + }, + { + "epoch": 0.61444261394838, + "grad_norm": 0.3670918643474579, + "learning_rate": 1.5717746729285274e-05, + "loss": 0.5752, + "step": 22378 + }, + { + "epoch": 0.6144700713893465, + "grad_norm": 0.37597203254699707, + "learning_rate": 1.571739239397729e-05, + "loss": 0.4859, + "step": 22379 + }, + { + "epoch": 0.6144975288303131, + "grad_norm": 0.441389799118042, + "learning_rate": 1.5717038048004548e-05, + "loss": 0.6218, + "step": 22380 + }, + { + "epoch": 0.6145249862712795, + "grad_norm": 0.5084608197212219, + "learning_rate": 1.5716683691367704e-05, + "loss": 0.5657, + "step": 22381 + }, + { + "epoch": 0.614552443712246, + "grad_norm": 0.4000180661678314, + "learning_rate": 1.5716329324067423e-05, + "loss": 0.5494, + "step": 22382 + }, + { + "epoch": 0.6145799011532125, + "grad_norm": 0.3384699821472168, + "learning_rate": 1.571597494610436e-05, + "loss": 0.4668, + "step": 22383 + }, + { + "epoch": 0.614607358594179, + "grad_norm": 0.3452565670013428, + "learning_rate": 1.571562055747918e-05, + "loss": 0.495, + "step": 22384 + }, + { + "epoch": 0.6146348160351455, + "grad_norm": 0.34684741497039795, + "learning_rate": 1.5715266158192543e-05, + "loss": 0.5045, + "step": 22385 + }, + { + "epoch": 0.614662273476112, + "grad_norm": 0.3506554663181305, + "learning_rate": 1.5714911748245115e-05, + "loss": 0.4788, + "step": 22386 + }, + { + "epoch": 0.6146897309170786, + "grad_norm": 0.4257354438304901, + "learning_rate": 1.5714557327637544e-05, + "loss": 0.5523, + "step": 22387 + }, + { + "epoch": 0.614717188358045, + "grad_norm": 0.33242201805114746, + "learning_rate": 1.5714202896370507e-05, + "loss": 0.3884, + "step": 22388 + }, + { + "epoch": 0.6147446457990116, + "grad_norm": 0.3994636833667755, + "learning_rate": 1.5713848454444655e-05, + "loss": 0.5383, + "step": 22389 + }, + { + "epoch": 0.614772103239978, + "grad_norm": 0.8916911482810974, + "learning_rate": 1.571349400186065e-05, + "loss": 0.4923, + "step": 22390 + }, + { + "epoch": 0.6147995606809445, + "grad_norm": 0.3596300482749939, + "learning_rate": 1.571313953861916e-05, + "loss": 0.3837, + "step": 22391 + }, + { + "epoch": 0.614827018121911, + "grad_norm": 0.393643856048584, + "learning_rate": 1.5712785064720837e-05, + "loss": 0.4881, + "step": 22392 + }, + { + "epoch": 0.6148544755628775, + "grad_norm": 0.41327959299087524, + "learning_rate": 1.5712430580166348e-05, + "loss": 0.6245, + "step": 22393 + }, + { + "epoch": 0.6148819330038441, + "grad_norm": 0.4574626088142395, + "learning_rate": 1.5712076084956354e-05, + "loss": 0.4208, + "step": 22394 + }, + { + "epoch": 0.6149093904448105, + "grad_norm": 0.3615926504135132, + "learning_rate": 1.5711721579091515e-05, + "loss": 0.4724, + "step": 22395 + }, + { + "epoch": 0.6149368478857771, + "grad_norm": 0.3980497717857361, + "learning_rate": 1.5711367062572492e-05, + "loss": 0.4991, + "step": 22396 + }, + { + "epoch": 0.6149643053267435, + "grad_norm": 0.37417009472846985, + "learning_rate": 1.5711012535399946e-05, + "loss": 0.533, + "step": 22397 + }, + { + "epoch": 0.6149917627677101, + "grad_norm": 0.3309810757637024, + "learning_rate": 1.5710657997574535e-05, + "loss": 0.4067, + "step": 22398 + }, + { + "epoch": 0.6150192202086765, + "grad_norm": 0.4476729929447174, + "learning_rate": 1.571030344909693e-05, + "loss": 0.5024, + "step": 22399 + }, + { + "epoch": 0.615046677649643, + "grad_norm": 0.36522457003593445, + "learning_rate": 1.570994888996778e-05, + "loss": 0.4931, + "step": 22400 + }, + { + "epoch": 0.6150741350906096, + "grad_norm": 0.4306666851043701, + "learning_rate": 1.570959432018776e-05, + "loss": 0.5797, + "step": 22401 + }, + { + "epoch": 0.615101592531576, + "grad_norm": 0.4648493230342865, + "learning_rate": 1.5709239739757524e-05, + "loss": 0.606, + "step": 22402 + }, + { + "epoch": 0.6151290499725426, + "grad_norm": 0.4590343236923218, + "learning_rate": 1.5708885148677732e-05, + "loss": 0.5066, + "step": 22403 + }, + { + "epoch": 0.615156507413509, + "grad_norm": 0.4329231381416321, + "learning_rate": 1.5708530546949048e-05, + "loss": 0.4367, + "step": 22404 + }, + { + "epoch": 0.6151839648544756, + "grad_norm": 0.3508068323135376, + "learning_rate": 1.570817593457213e-05, + "loss": 0.5327, + "step": 22405 + }, + { + "epoch": 0.615211422295442, + "grad_norm": 0.41138675808906555, + "learning_rate": 1.5707821311547645e-05, + "loss": 0.5149, + "step": 22406 + }, + { + "epoch": 0.6152388797364086, + "grad_norm": 0.4616115391254425, + "learning_rate": 1.5707466677876245e-05, + "loss": 0.4539, + "step": 22407 + }, + { + "epoch": 0.6152663371773751, + "grad_norm": 0.3997574746608734, + "learning_rate": 1.5707112033558605e-05, + "loss": 0.4675, + "step": 22408 + }, + { + "epoch": 0.6152937946183415, + "grad_norm": 0.3512255549430847, + "learning_rate": 1.5706757378595378e-05, + "loss": 0.4616, + "step": 22409 + }, + { + "epoch": 0.6153212520593081, + "grad_norm": 0.3764665126800537, + "learning_rate": 1.5706402712987227e-05, + "loss": 0.5224, + "step": 22410 + }, + { + "epoch": 0.6153487095002745, + "grad_norm": 0.3815498352050781, + "learning_rate": 1.5706048036734812e-05, + "loss": 0.5515, + "step": 22411 + }, + { + "epoch": 0.6153761669412411, + "grad_norm": 0.47489452362060547, + "learning_rate": 1.57056933498388e-05, + "loss": 0.5134, + "step": 22412 + }, + { + "epoch": 0.6154036243822075, + "grad_norm": 0.4045666456222534, + "learning_rate": 1.5705338652299843e-05, + "loss": 0.5851, + "step": 22413 + }, + { + "epoch": 0.6154310818231741, + "grad_norm": 0.34440135955810547, + "learning_rate": 1.5704983944118613e-05, + "loss": 0.5187, + "step": 22414 + }, + { + "epoch": 0.6154585392641406, + "grad_norm": 0.37784966826438904, + "learning_rate": 1.5704629225295763e-05, + "loss": 0.3937, + "step": 22415 + }, + { + "epoch": 0.6154859967051071, + "grad_norm": 0.357105016708374, + "learning_rate": 1.5704274495831966e-05, + "loss": 0.5652, + "step": 22416 + }, + { + "epoch": 0.6155134541460736, + "grad_norm": 0.38424667716026306, + "learning_rate": 1.570391975572787e-05, + "loss": 0.5099, + "step": 22417 + }, + { + "epoch": 0.61554091158704, + "grad_norm": 0.36078813672065735, + "learning_rate": 1.5703565004984146e-05, + "loss": 0.4964, + "step": 22418 + }, + { + "epoch": 0.6155683690280066, + "grad_norm": 0.37554067373275757, + "learning_rate": 1.570321024360145e-05, + "loss": 0.5091, + "step": 22419 + }, + { + "epoch": 0.615595826468973, + "grad_norm": 0.36112555861473083, + "learning_rate": 1.570285547158045e-05, + "loss": 0.5605, + "step": 22420 + }, + { + "epoch": 0.6156232839099396, + "grad_norm": 0.45782092213630676, + "learning_rate": 1.5702500688921804e-05, + "loss": 0.4812, + "step": 22421 + }, + { + "epoch": 0.6156507413509061, + "grad_norm": 0.41082102060317993, + "learning_rate": 1.5702145895626177e-05, + "loss": 0.5948, + "step": 22422 + }, + { + "epoch": 0.6156781987918726, + "grad_norm": 0.37824687361717224, + "learning_rate": 1.570179109169422e-05, + "loss": 0.527, + "step": 22423 + }, + { + "epoch": 0.6157056562328391, + "grad_norm": 0.4140215218067169, + "learning_rate": 1.570143627712661e-05, + "loss": 0.5308, + "step": 22424 + }, + { + "epoch": 0.6157331136738056, + "grad_norm": 0.4178859293460846, + "learning_rate": 1.5701081451923996e-05, + "loss": 0.4879, + "step": 22425 + }, + { + "epoch": 0.6157605711147721, + "grad_norm": 0.36644741892814636, + "learning_rate": 1.570072661608705e-05, + "loss": 0.4808, + "step": 22426 + }, + { + "epoch": 0.6157880285557386, + "grad_norm": 0.40483301877975464, + "learning_rate": 1.570037176961643e-05, + "loss": 0.5445, + "step": 22427 + }, + { + "epoch": 0.6158154859967051, + "grad_norm": 0.35162755846977234, + "learning_rate": 1.570001691251279e-05, + "loss": 0.4605, + "step": 22428 + }, + { + "epoch": 0.6158429434376717, + "grad_norm": 0.3669903874397278, + "learning_rate": 1.5699662044776805e-05, + "loss": 0.5129, + "step": 22429 + }, + { + "epoch": 0.6158704008786381, + "grad_norm": 0.3989444077014923, + "learning_rate": 1.5699307166409133e-05, + "loss": 0.5128, + "step": 22430 + }, + { + "epoch": 0.6158978583196046, + "grad_norm": 0.3426071107387543, + "learning_rate": 1.569895227741043e-05, + "loss": 0.5074, + "step": 22431 + }, + { + "epoch": 0.6159253157605711, + "grad_norm": 0.4168485701084137, + "learning_rate": 1.5698597377781368e-05, + "loss": 0.5836, + "step": 22432 + }, + { + "epoch": 0.6159527732015376, + "grad_norm": 0.3694388270378113, + "learning_rate": 1.5698242467522596e-05, + "loss": 0.525, + "step": 22433 + }, + { + "epoch": 0.6159802306425041, + "grad_norm": 0.3906443119049072, + "learning_rate": 1.5697887546634785e-05, + "loss": 0.5378, + "step": 22434 + }, + { + "epoch": 0.6160076880834706, + "grad_norm": 0.34419670701026917, + "learning_rate": 1.5697532615118597e-05, + "loss": 0.4807, + "step": 22435 + }, + { + "epoch": 0.6160351455244372, + "grad_norm": 0.3505313992500305, + "learning_rate": 1.569717767297469e-05, + "loss": 0.5424, + "step": 22436 + }, + { + "epoch": 0.6160626029654036, + "grad_norm": 0.3377642035484314, + "learning_rate": 1.569682272020373e-05, + "loss": 0.4973, + "step": 22437 + }, + { + "epoch": 0.6160900604063702, + "grad_norm": 0.39497652649879456, + "learning_rate": 1.569646775680638e-05, + "loss": 0.5662, + "step": 22438 + }, + { + "epoch": 0.6161175178473366, + "grad_norm": 0.4313346743583679, + "learning_rate": 1.5696112782783296e-05, + "loss": 0.4723, + "step": 22439 + }, + { + "epoch": 0.6161449752883031, + "grad_norm": 0.41157907247543335, + "learning_rate": 1.5695757798135143e-05, + "loss": 0.5044, + "step": 22440 + }, + { + "epoch": 0.6161724327292696, + "grad_norm": 0.35974326729774475, + "learning_rate": 1.5695402802862586e-05, + "loss": 0.5058, + "step": 22441 + }, + { + "epoch": 0.6161998901702361, + "grad_norm": 0.38614991307258606, + "learning_rate": 1.5695047796966287e-05, + "loss": 0.5685, + "step": 22442 + }, + { + "epoch": 0.6162273476112027, + "grad_norm": 0.4355829954147339, + "learning_rate": 1.56946927804469e-05, + "loss": 0.4541, + "step": 22443 + }, + { + "epoch": 0.6162548050521691, + "grad_norm": 0.3852423429489136, + "learning_rate": 1.5694337753305097e-05, + "loss": 0.5405, + "step": 22444 + }, + { + "epoch": 0.6162822624931357, + "grad_norm": 0.39085647463798523, + "learning_rate": 1.5693982715541535e-05, + "loss": 0.5302, + "step": 22445 + }, + { + "epoch": 0.6163097199341021, + "grad_norm": 0.39774662256240845, + "learning_rate": 1.569362766715688e-05, + "loss": 0.522, + "step": 22446 + }, + { + "epoch": 0.6163371773750687, + "grad_norm": 0.43441006541252136, + "learning_rate": 1.569327260815179e-05, + "loss": 0.6035, + "step": 22447 + }, + { + "epoch": 0.6163646348160351, + "grad_norm": 0.39261582493782043, + "learning_rate": 1.5692917538526936e-05, + "loss": 0.4311, + "step": 22448 + }, + { + "epoch": 0.6163920922570016, + "grad_norm": 0.44348111748695374, + "learning_rate": 1.5692562458282967e-05, + "loss": 0.5131, + "step": 22449 + }, + { + "epoch": 0.6164195496979682, + "grad_norm": 0.405502587556839, + "learning_rate": 1.5692207367420556e-05, + "loss": 0.5683, + "step": 22450 + }, + { + "epoch": 0.6164470071389346, + "grad_norm": 0.35782089829444885, + "learning_rate": 1.5691852265940356e-05, + "loss": 0.582, + "step": 22451 + }, + { + "epoch": 0.6164744645799012, + "grad_norm": 0.38939744234085083, + "learning_rate": 1.569149715384304e-05, + "loss": 0.4735, + "step": 22452 + }, + { + "epoch": 0.6165019220208676, + "grad_norm": 0.3565541207790375, + "learning_rate": 1.569114203112926e-05, + "loss": 0.477, + "step": 22453 + }, + { + "epoch": 0.6165293794618342, + "grad_norm": 0.3621019124984741, + "learning_rate": 1.569078689779969e-05, + "loss": 0.584, + "step": 22454 + }, + { + "epoch": 0.6165568369028006, + "grad_norm": 0.4409540295600891, + "learning_rate": 1.5690431753854986e-05, + "loss": 0.5491, + "step": 22455 + }, + { + "epoch": 0.6165842943437672, + "grad_norm": 0.34002095460891724, + "learning_rate": 1.5690076599295805e-05, + "loss": 0.4808, + "step": 22456 + }, + { + "epoch": 0.6166117517847337, + "grad_norm": 0.35386621952056885, + "learning_rate": 1.568972143412282e-05, + "loss": 0.4496, + "step": 22457 + }, + { + "epoch": 0.6166392092257001, + "grad_norm": 0.37079381942749023, + "learning_rate": 1.5689366258336687e-05, + "loss": 0.4988, + "step": 22458 + }, + { + "epoch": 0.6166666666666667, + "grad_norm": 0.45040878653526306, + "learning_rate": 1.568901107193807e-05, + "loss": 0.5233, + "step": 22459 + }, + { + "epoch": 0.6166941241076331, + "grad_norm": 0.38504672050476074, + "learning_rate": 1.568865587492763e-05, + "loss": 0.522, + "step": 22460 + }, + { + "epoch": 0.6167215815485997, + "grad_norm": 0.37678220868110657, + "learning_rate": 1.5688300667306034e-05, + "loss": 0.4456, + "step": 22461 + }, + { + "epoch": 0.6167490389895661, + "grad_norm": 0.3849036395549774, + "learning_rate": 1.568794544907394e-05, + "loss": 0.5553, + "step": 22462 + }, + { + "epoch": 0.6167764964305327, + "grad_norm": 0.3511054217815399, + "learning_rate": 1.5687590220232013e-05, + "loss": 0.4902, + "step": 22463 + }, + { + "epoch": 0.6168039538714992, + "grad_norm": 0.34521570801734924, + "learning_rate": 1.5687234980780918e-05, + "loss": 0.4487, + "step": 22464 + }, + { + "epoch": 0.6168314113124657, + "grad_norm": 0.36269545555114746, + "learning_rate": 1.5686879730721307e-05, + "loss": 0.5086, + "step": 22465 + }, + { + "epoch": 0.6168588687534322, + "grad_norm": 0.38413697481155396, + "learning_rate": 1.5686524470053858e-05, + "loss": 0.4302, + "step": 22466 + }, + { + "epoch": 0.6168863261943986, + "grad_norm": 0.3978446125984192, + "learning_rate": 1.5686169198779223e-05, + "loss": 0.5909, + "step": 22467 + }, + { + "epoch": 0.6169137836353652, + "grad_norm": 0.36741313338279724, + "learning_rate": 1.5685813916898065e-05, + "loss": 0.4652, + "step": 22468 + }, + { + "epoch": 0.6169412410763316, + "grad_norm": 0.3810917139053345, + "learning_rate": 1.568545862441105e-05, + "loss": 0.504, + "step": 22469 + }, + { + "epoch": 0.6169686985172982, + "grad_norm": 0.386318564414978, + "learning_rate": 1.5685103321318843e-05, + "loss": 0.4802, + "step": 22470 + }, + { + "epoch": 0.6169961559582647, + "grad_norm": 0.42613542079925537, + "learning_rate": 1.5684748007622104e-05, + "loss": 0.5744, + "step": 22471 + }, + { + "epoch": 0.6170236133992312, + "grad_norm": 0.3909852206707001, + "learning_rate": 1.5684392683321496e-05, + "loss": 0.5039, + "step": 22472 + }, + { + "epoch": 0.6170510708401977, + "grad_norm": 0.3132966160774231, + "learning_rate": 1.5684037348417678e-05, + "loss": 0.4712, + "step": 22473 + }, + { + "epoch": 0.6170785282811642, + "grad_norm": 0.34158822894096375, + "learning_rate": 1.5683682002911318e-05, + "loss": 0.5119, + "step": 22474 + }, + { + "epoch": 0.6171059857221307, + "grad_norm": 0.36224526166915894, + "learning_rate": 1.5683326646803077e-05, + "loss": 0.5083, + "step": 22475 + }, + { + "epoch": 0.6171334431630972, + "grad_norm": 0.36369964480400085, + "learning_rate": 1.568297128009362e-05, + "loss": 0.456, + "step": 22476 + }, + { + "epoch": 0.6171609006040637, + "grad_norm": 0.4338120222091675, + "learning_rate": 1.56826159027836e-05, + "loss": 0.5359, + "step": 22477 + }, + { + "epoch": 0.6171883580450302, + "grad_norm": 0.39159539341926575, + "learning_rate": 1.5682260514873698e-05, + "loss": 0.477, + "step": 22478 + }, + { + "epoch": 0.6172158154859967, + "grad_norm": 0.397657185792923, + "learning_rate": 1.568190511636456e-05, + "loss": 0.5043, + "step": 22479 + }, + { + "epoch": 0.6172432729269632, + "grad_norm": 0.3937775790691376, + "learning_rate": 1.5681549707256854e-05, + "loss": 0.4635, + "step": 22480 + }, + { + "epoch": 0.6172707303679297, + "grad_norm": 0.39815840125083923, + "learning_rate": 1.568119428755125e-05, + "loss": 0.4971, + "step": 22481 + }, + { + "epoch": 0.6172981878088962, + "grad_norm": 0.40140581130981445, + "learning_rate": 1.5680838857248403e-05, + "loss": 0.5177, + "step": 22482 + }, + { + "epoch": 0.6173256452498627, + "grad_norm": 0.3692626655101776, + "learning_rate": 1.568048341634898e-05, + "loss": 0.463, + "step": 22483 + }, + { + "epoch": 0.6173531026908292, + "grad_norm": 0.3488836884498596, + "learning_rate": 1.568012796485364e-05, + "loss": 0.4654, + "step": 22484 + }, + { + "epoch": 0.6173805601317958, + "grad_norm": 0.3457472622394562, + "learning_rate": 1.567977250276305e-05, + "loss": 0.5006, + "step": 22485 + }, + { + "epoch": 0.6174080175727622, + "grad_norm": 0.364148885011673, + "learning_rate": 1.567941703007787e-05, + "loss": 0.5189, + "step": 22486 + }, + { + "epoch": 0.6174354750137288, + "grad_norm": 0.4103710353374481, + "learning_rate": 1.5679061546798765e-05, + "loss": 0.466, + "step": 22487 + }, + { + "epoch": 0.6174629324546952, + "grad_norm": 0.3951040208339691, + "learning_rate": 1.56787060529264e-05, + "loss": 0.562, + "step": 22488 + }, + { + "epoch": 0.6174903898956617, + "grad_norm": 0.3802453875541687, + "learning_rate": 1.5678350548461435e-05, + "loss": 0.4775, + "step": 22489 + }, + { + "epoch": 0.6175178473366282, + "grad_norm": 0.3959280252456665, + "learning_rate": 1.567799503340453e-05, + "loss": 0.4867, + "step": 22490 + }, + { + "epoch": 0.6175453047775947, + "grad_norm": 0.3710545003414154, + "learning_rate": 1.5677639507756354e-05, + "loss": 0.4591, + "step": 22491 + }, + { + "epoch": 0.6175727622185613, + "grad_norm": 0.4095623195171356, + "learning_rate": 1.567728397151757e-05, + "loss": 0.5751, + "step": 22492 + }, + { + "epoch": 0.6176002196595277, + "grad_norm": 0.4128479063510895, + "learning_rate": 1.567692842468884e-05, + "loss": 0.4414, + "step": 22493 + }, + { + "epoch": 0.6176276771004943, + "grad_norm": 0.3670276999473572, + "learning_rate": 1.5676572867270826e-05, + "loss": 0.4403, + "step": 22494 + }, + { + "epoch": 0.6176551345414607, + "grad_norm": 0.43889325857162476, + "learning_rate": 1.5676217299264188e-05, + "loss": 0.5016, + "step": 22495 + }, + { + "epoch": 0.6176825919824273, + "grad_norm": 0.37587711215019226, + "learning_rate": 1.5675861720669598e-05, + "loss": 0.5456, + "step": 22496 + }, + { + "epoch": 0.6177100494233937, + "grad_norm": 0.3920922577381134, + "learning_rate": 1.5675506131487712e-05, + "loss": 0.5459, + "step": 22497 + }, + { + "epoch": 0.6177375068643602, + "grad_norm": 0.47176799178123474, + "learning_rate": 1.5675150531719195e-05, + "loss": 0.5513, + "step": 22498 + }, + { + "epoch": 0.6177649643053268, + "grad_norm": 0.4128495156764984, + "learning_rate": 1.5674794921364715e-05, + "loss": 0.5645, + "step": 22499 + }, + { + "epoch": 0.6177924217462932, + "grad_norm": 0.3483002185821533, + "learning_rate": 1.5674439300424928e-05, + "loss": 0.4971, + "step": 22500 + }, + { + "epoch": 0.6178198791872598, + "grad_norm": 0.4404960870742798, + "learning_rate": 1.56740836689005e-05, + "loss": 0.5405, + "step": 22501 + }, + { + "epoch": 0.6178473366282262, + "grad_norm": 0.39649009704589844, + "learning_rate": 1.5673728026792098e-05, + "loss": 0.4954, + "step": 22502 + }, + { + "epoch": 0.6178747940691928, + "grad_norm": 0.4728178083896637, + "learning_rate": 1.5673372374100376e-05, + "loss": 0.5752, + "step": 22503 + }, + { + "epoch": 0.6179022515101592, + "grad_norm": 0.3820502758026123, + "learning_rate": 1.567301671082601e-05, + "loss": 0.4969, + "step": 22504 + }, + { + "epoch": 0.6179297089511258, + "grad_norm": 0.3793199062347412, + "learning_rate": 1.5672661036969654e-05, + "loss": 0.4578, + "step": 22505 + }, + { + "epoch": 0.6179571663920923, + "grad_norm": 0.5064769983291626, + "learning_rate": 1.5672305352531978e-05, + "loss": 0.4991, + "step": 22506 + }, + { + "epoch": 0.6179846238330587, + "grad_norm": 0.3548373281955719, + "learning_rate": 1.567194965751364e-05, + "loss": 0.5038, + "step": 22507 + }, + { + "epoch": 0.6180120812740253, + "grad_norm": 0.36565566062927246, + "learning_rate": 1.5671593951915306e-05, + "loss": 0.4591, + "step": 22508 + }, + { + "epoch": 0.6180395387149917, + "grad_norm": 0.41689202189445496, + "learning_rate": 1.5671238235737642e-05, + "loss": 0.5052, + "step": 22509 + }, + { + "epoch": 0.6180669961559583, + "grad_norm": 0.4053367078304291, + "learning_rate": 1.5670882508981305e-05, + "loss": 0.4925, + "step": 22510 + }, + { + "epoch": 0.6180944535969247, + "grad_norm": 0.7188882231712341, + "learning_rate": 1.567052677164696e-05, + "loss": 0.5269, + "step": 22511 + }, + { + "epoch": 0.6181219110378913, + "grad_norm": 0.38092395663261414, + "learning_rate": 1.5670171023735278e-05, + "loss": 0.4686, + "step": 22512 + }, + { + "epoch": 0.6181493684788578, + "grad_norm": 0.37020590901374817, + "learning_rate": 1.5669815265246912e-05, + "loss": 0.5541, + "step": 22513 + }, + { + "epoch": 0.6181768259198243, + "grad_norm": 0.40984034538269043, + "learning_rate": 1.5669459496182537e-05, + "loss": 0.4438, + "step": 22514 + }, + { + "epoch": 0.6182042833607908, + "grad_norm": 0.4050333499908447, + "learning_rate": 1.5669103716542806e-05, + "loss": 0.5373, + "step": 22515 + }, + { + "epoch": 0.6182317408017572, + "grad_norm": 0.3880467712879181, + "learning_rate": 1.566874792632839e-05, + "loss": 0.5009, + "step": 22516 + }, + { + "epoch": 0.6182591982427238, + "grad_norm": 0.36311259865760803, + "learning_rate": 1.5668392125539948e-05, + "loss": 0.4725, + "step": 22517 + }, + { + "epoch": 0.6182866556836902, + "grad_norm": 0.3772839605808258, + "learning_rate": 1.5668036314178142e-05, + "loss": 0.4247, + "step": 22518 + }, + { + "epoch": 0.6183141131246568, + "grad_norm": 0.38857123255729675, + "learning_rate": 1.5667680492243647e-05, + "loss": 0.5217, + "step": 22519 + }, + { + "epoch": 0.6183415705656233, + "grad_norm": 0.3524090647697449, + "learning_rate": 1.5667324659737114e-05, + "loss": 0.4996, + "step": 22520 + }, + { + "epoch": 0.6183690280065898, + "grad_norm": 0.35601529479026794, + "learning_rate": 1.5666968816659213e-05, + "loss": 0.4758, + "step": 22521 + }, + { + "epoch": 0.6183964854475563, + "grad_norm": 0.48193857073783875, + "learning_rate": 1.5666612963010605e-05, + "loss": 0.6365, + "step": 22522 + }, + { + "epoch": 0.6184239428885228, + "grad_norm": 0.37535110116004944, + "learning_rate": 1.5666257098791958e-05, + "loss": 0.4974, + "step": 22523 + }, + { + "epoch": 0.6184514003294893, + "grad_norm": 0.48583611845970154, + "learning_rate": 1.5665901224003932e-05, + "loss": 0.4878, + "step": 22524 + }, + { + "epoch": 0.6184788577704557, + "grad_norm": 0.40410780906677246, + "learning_rate": 1.5665545338647192e-05, + "loss": 0.4991, + "step": 22525 + }, + { + "epoch": 0.6185063152114223, + "grad_norm": 0.3774104118347168, + "learning_rate": 1.56651894427224e-05, + "loss": 0.4895, + "step": 22526 + }, + { + "epoch": 0.6185337726523888, + "grad_norm": 0.37482893466949463, + "learning_rate": 1.5664833536230223e-05, + "loss": 0.5018, + "step": 22527 + }, + { + "epoch": 0.6185612300933553, + "grad_norm": 0.4690578281879425, + "learning_rate": 1.5664477619171323e-05, + "loss": 0.4754, + "step": 22528 + }, + { + "epoch": 0.6185886875343218, + "grad_norm": 0.4314991533756256, + "learning_rate": 1.5664121691546367e-05, + "loss": 0.5237, + "step": 22529 + }, + { + "epoch": 0.6186161449752883, + "grad_norm": 0.3820810616016388, + "learning_rate": 1.5663765753356014e-05, + "loss": 0.5792, + "step": 22530 + }, + { + "epoch": 0.6186436024162548, + "grad_norm": 0.37724629044532776, + "learning_rate": 1.5663409804600928e-05, + "loss": 0.5033, + "step": 22531 + }, + { + "epoch": 0.6186710598572213, + "grad_norm": 0.40327388048171997, + "learning_rate": 1.566305384528178e-05, + "loss": 0.4932, + "step": 22532 + }, + { + "epoch": 0.6186985172981878, + "grad_norm": 0.35402265191078186, + "learning_rate": 1.5662697875399224e-05, + "loss": 0.4803, + "step": 22533 + }, + { + "epoch": 0.6187259747391544, + "grad_norm": 0.383098840713501, + "learning_rate": 1.566234189495393e-05, + "loss": 0.5312, + "step": 22534 + }, + { + "epoch": 0.6187534321801208, + "grad_norm": 0.34646594524383545, + "learning_rate": 1.5661985903946564e-05, + "loss": 0.5014, + "step": 22535 + }, + { + "epoch": 0.6187808896210873, + "grad_norm": 0.4238612949848175, + "learning_rate": 1.5661629902377784e-05, + "loss": 0.5047, + "step": 22536 + }, + { + "epoch": 0.6188083470620538, + "grad_norm": 0.45120003819465637, + "learning_rate": 1.566127389024826e-05, + "loss": 0.6061, + "step": 22537 + }, + { + "epoch": 0.6188358045030203, + "grad_norm": 0.49754321575164795, + "learning_rate": 1.5660917867558657e-05, + "loss": 0.573, + "step": 22538 + }, + { + "epoch": 0.6188632619439868, + "grad_norm": 0.3791137933731079, + "learning_rate": 1.5660561834309625e-05, + "loss": 0.4359, + "step": 22539 + }, + { + "epoch": 0.6188907193849533, + "grad_norm": 0.3578185439109802, + "learning_rate": 1.5660205790501846e-05, + "loss": 0.481, + "step": 22540 + }, + { + "epoch": 0.6189181768259199, + "grad_norm": 0.4144853353500366, + "learning_rate": 1.5659849736135978e-05, + "loss": 0.5825, + "step": 22541 + }, + { + "epoch": 0.6189456342668863, + "grad_norm": 0.344546914100647, + "learning_rate": 1.565949367121268e-05, + "loss": 0.511, + "step": 22542 + }, + { + "epoch": 0.6189730917078529, + "grad_norm": 0.4364735186100006, + "learning_rate": 1.5659137595732622e-05, + "loss": 0.4951, + "step": 22543 + }, + { + "epoch": 0.6190005491488193, + "grad_norm": 0.3719455301761627, + "learning_rate": 1.5658781509696463e-05, + "loss": 0.43, + "step": 22544 + }, + { + "epoch": 0.6190280065897859, + "grad_norm": 0.44046077132225037, + "learning_rate": 1.565842541310487e-05, + "loss": 0.5231, + "step": 22545 + }, + { + "epoch": 0.6190554640307523, + "grad_norm": 0.392814040184021, + "learning_rate": 1.5658069305958513e-05, + "loss": 0.5, + "step": 22546 + }, + { + "epoch": 0.6190829214717188, + "grad_norm": 0.3251745104789734, + "learning_rate": 1.565771318825805e-05, + "loss": 0.455, + "step": 22547 + }, + { + "epoch": 0.6191103789126854, + "grad_norm": 0.47120508551597595, + "learning_rate": 1.5657357060004145e-05, + "loss": 0.4408, + "step": 22548 + }, + { + "epoch": 0.6191378363536518, + "grad_norm": 0.38633379340171814, + "learning_rate": 1.5657000921197457e-05, + "loss": 0.4884, + "step": 22549 + }, + { + "epoch": 0.6191652937946184, + "grad_norm": 0.3656744658946991, + "learning_rate": 1.5656644771838667e-05, + "loss": 0.4734, + "step": 22550 + }, + { + "epoch": 0.6191927512355848, + "grad_norm": 0.41446247696876526, + "learning_rate": 1.5656288611928424e-05, + "loss": 0.4433, + "step": 22551 + }, + { + "epoch": 0.6192202086765514, + "grad_norm": 0.3832482099533081, + "learning_rate": 1.5655932441467397e-05, + "loss": 0.5289, + "step": 22552 + }, + { + "epoch": 0.6192476661175178, + "grad_norm": 0.3296089768409729, + "learning_rate": 1.5655576260456252e-05, + "loss": 0.5573, + "step": 22553 + }, + { + "epoch": 0.6192751235584844, + "grad_norm": 0.4522261917591095, + "learning_rate": 1.565522006889565e-05, + "loss": 0.5375, + "step": 22554 + }, + { + "epoch": 0.6193025809994509, + "grad_norm": 0.3587777316570282, + "learning_rate": 1.565486386678626e-05, + "loss": 0.5655, + "step": 22555 + }, + { + "epoch": 0.6193300384404173, + "grad_norm": 0.38999614119529724, + "learning_rate": 1.5654507654128745e-05, + "loss": 0.589, + "step": 22556 + }, + { + "epoch": 0.6193574958813839, + "grad_norm": 0.3547980487346649, + "learning_rate": 1.5654151430923766e-05, + "loss": 0.4976, + "step": 22557 + }, + { + "epoch": 0.6193849533223503, + "grad_norm": 0.39765843749046326, + "learning_rate": 1.5653795197171993e-05, + "loss": 0.4987, + "step": 22558 + }, + { + "epoch": 0.6194124107633169, + "grad_norm": 0.36940908432006836, + "learning_rate": 1.5653438952874087e-05, + "loss": 0.5602, + "step": 22559 + }, + { + "epoch": 0.6194398682042833, + "grad_norm": 0.41817039251327515, + "learning_rate": 1.565308269803071e-05, + "loss": 0.5042, + "step": 22560 + }, + { + "epoch": 0.6194673256452499, + "grad_norm": 0.5144942998886108, + "learning_rate": 1.5652726432642533e-05, + "loss": 0.5298, + "step": 22561 + }, + { + "epoch": 0.6194947830862164, + "grad_norm": 0.4038824439048767, + "learning_rate": 1.5652370156710213e-05, + "loss": 0.5381, + "step": 22562 + }, + { + "epoch": 0.6195222405271829, + "grad_norm": 0.43323618173599243, + "learning_rate": 1.5652013870234424e-05, + "loss": 0.5913, + "step": 22563 + }, + { + "epoch": 0.6195496979681494, + "grad_norm": 0.3764198422431946, + "learning_rate": 1.5651657573215822e-05, + "loss": 0.5325, + "step": 22564 + }, + { + "epoch": 0.6195771554091158, + "grad_norm": 0.3528798818588257, + "learning_rate": 1.5651301265655075e-05, + "loss": 0.4174, + "step": 22565 + }, + { + "epoch": 0.6196046128500824, + "grad_norm": 0.9821829199790955, + "learning_rate": 1.5650944947552847e-05, + "loss": 0.5163, + "step": 22566 + }, + { + "epoch": 0.6196320702910488, + "grad_norm": 0.35506102442741394, + "learning_rate": 1.56505886189098e-05, + "loss": 0.5539, + "step": 22567 + }, + { + "epoch": 0.6196595277320154, + "grad_norm": 0.37647560238838196, + "learning_rate": 1.565023227972661e-05, + "loss": 0.4637, + "step": 22568 + }, + { + "epoch": 0.6196869851729819, + "grad_norm": 0.3382308781147003, + "learning_rate": 1.564987593000393e-05, + "loss": 0.4214, + "step": 22569 + }, + { + "epoch": 0.6197144426139484, + "grad_norm": 0.4244212806224823, + "learning_rate": 1.5649519569742423e-05, + "loss": 0.484, + "step": 22570 + }, + { + "epoch": 0.6197419000549149, + "grad_norm": 0.44075238704681396, + "learning_rate": 1.5649163198942762e-05, + "loss": 0.5294, + "step": 22571 + }, + { + "epoch": 0.6197693574958814, + "grad_norm": 0.37580519914627075, + "learning_rate": 1.564880681760561e-05, + "loss": 0.4475, + "step": 22572 + }, + { + "epoch": 0.6197968149368479, + "grad_norm": 0.4242590665817261, + "learning_rate": 1.564845042573163e-05, + "loss": 0.5493, + "step": 22573 + }, + { + "epoch": 0.6198242723778143, + "grad_norm": 0.3536640405654907, + "learning_rate": 1.564809402332149e-05, + "loss": 0.4751, + "step": 22574 + }, + { + "epoch": 0.6198517298187809, + "grad_norm": 0.3672976791858673, + "learning_rate": 1.5647737610375845e-05, + "loss": 0.5642, + "step": 22575 + }, + { + "epoch": 0.6198791872597474, + "grad_norm": 0.35885342955589294, + "learning_rate": 1.564738118689537e-05, + "loss": 0.5382, + "step": 22576 + }, + { + "epoch": 0.6199066447007139, + "grad_norm": 0.3349631130695343, + "learning_rate": 1.564702475288073e-05, + "loss": 0.435, + "step": 22577 + }, + { + "epoch": 0.6199341021416804, + "grad_norm": 0.37958282232284546, + "learning_rate": 1.5646668308332583e-05, + "loss": 0.5096, + "step": 22578 + }, + { + "epoch": 0.6199615595826469, + "grad_norm": 0.4214232861995697, + "learning_rate": 1.5646311853251602e-05, + "loss": 0.4949, + "step": 22579 + }, + { + "epoch": 0.6199890170236134, + "grad_norm": 0.35895994305610657, + "learning_rate": 1.564595538763844e-05, + "loss": 0.497, + "step": 22580 + }, + { + "epoch": 0.6200164744645799, + "grad_norm": 0.40956422686576843, + "learning_rate": 1.5645598911493777e-05, + "loss": 0.5213, + "step": 22581 + }, + { + "epoch": 0.6200439319055464, + "grad_norm": 0.38975080847740173, + "learning_rate": 1.5645242424818264e-05, + "loss": 0.5406, + "step": 22582 + }, + { + "epoch": 0.620071389346513, + "grad_norm": 0.48833921551704407, + "learning_rate": 1.5644885927612573e-05, + "loss": 0.4871, + "step": 22583 + }, + { + "epoch": 0.6200988467874794, + "grad_norm": 0.35668689012527466, + "learning_rate": 1.564452941987737e-05, + "loss": 0.519, + "step": 22584 + }, + { + "epoch": 0.620126304228446, + "grad_norm": 0.34398776292800903, + "learning_rate": 1.5644172901613316e-05, + "loss": 0.4791, + "step": 22585 + }, + { + "epoch": 0.6201537616694124, + "grad_norm": 0.3954851031303406, + "learning_rate": 1.5643816372821082e-05, + "loss": 0.4846, + "step": 22586 + }, + { + "epoch": 0.6201812191103789, + "grad_norm": 0.3854080140590668, + "learning_rate": 1.5643459833501326e-05, + "loss": 0.5038, + "step": 22587 + }, + { + "epoch": 0.6202086765513454, + "grad_norm": 0.40405189990997314, + "learning_rate": 1.5643103283654716e-05, + "loss": 0.4982, + "step": 22588 + }, + { + "epoch": 0.6202361339923119, + "grad_norm": 0.3797658681869507, + "learning_rate": 1.564274672328192e-05, + "loss": 0.535, + "step": 22589 + }, + { + "epoch": 0.6202635914332785, + "grad_norm": 0.37836140394210815, + "learning_rate": 1.5642390152383596e-05, + "loss": 0.5108, + "step": 22590 + }, + { + "epoch": 0.6202910488742449, + "grad_norm": 0.3773358166217804, + "learning_rate": 1.5642033570960418e-05, + "loss": 0.4456, + "step": 22591 + }, + { + "epoch": 0.6203185063152115, + "grad_norm": 0.3703380227088928, + "learning_rate": 1.5641676979013044e-05, + "loss": 0.5331, + "step": 22592 + }, + { + "epoch": 0.6203459637561779, + "grad_norm": 0.42922472953796387, + "learning_rate": 1.5641320376542144e-05, + "loss": 0.6243, + "step": 22593 + }, + { + "epoch": 0.6203734211971444, + "grad_norm": 0.5236635804176331, + "learning_rate": 1.5640963763548377e-05, + "loss": 0.5139, + "step": 22594 + }, + { + "epoch": 0.6204008786381109, + "grad_norm": 0.49097001552581787, + "learning_rate": 1.564060714003242e-05, + "loss": 0.5626, + "step": 22595 + }, + { + "epoch": 0.6204283360790774, + "grad_norm": 0.3785485029220581, + "learning_rate": 1.5640250505994924e-05, + "loss": 0.4871, + "step": 22596 + }, + { + "epoch": 0.620455793520044, + "grad_norm": 0.34385889768600464, + "learning_rate": 1.5639893861436563e-05, + "loss": 0.4676, + "step": 22597 + }, + { + "epoch": 0.6204832509610104, + "grad_norm": 0.36455509066581726, + "learning_rate": 1.5639537206357995e-05, + "loss": 0.4661, + "step": 22598 + }, + { + "epoch": 0.620510708401977, + "grad_norm": 0.41368114948272705, + "learning_rate": 1.5639180540759894e-05, + "loss": 0.4591, + "step": 22599 + }, + { + "epoch": 0.6205381658429434, + "grad_norm": 0.37270450592041016, + "learning_rate": 1.563882386464292e-05, + "loss": 0.4966, + "step": 22600 + }, + { + "epoch": 0.62056562328391, + "grad_norm": 0.46291279792785645, + "learning_rate": 1.5638467178007744e-05, + "loss": 0.5738, + "step": 22601 + }, + { + "epoch": 0.6205930807248764, + "grad_norm": 0.3718380033969879, + "learning_rate": 1.5638110480855023e-05, + "loss": 0.5004, + "step": 22602 + }, + { + "epoch": 0.620620538165843, + "grad_norm": 0.4439149796962738, + "learning_rate": 1.563775377318543e-05, + "loss": 0.5323, + "step": 22603 + }, + { + "epoch": 0.6206479956068095, + "grad_norm": 0.33601468801498413, + "learning_rate": 1.5637397054999627e-05, + "loss": 0.4517, + "step": 22604 + }, + { + "epoch": 0.6206754530477759, + "grad_norm": 0.4747694730758667, + "learning_rate": 1.5637040326298273e-05, + "loss": 0.5014, + "step": 22605 + }, + { + "epoch": 0.6207029104887425, + "grad_norm": 0.39658188819885254, + "learning_rate": 1.5636683587082045e-05, + "loss": 0.5622, + "step": 22606 + }, + { + "epoch": 0.6207303679297089, + "grad_norm": 0.3651837706565857, + "learning_rate": 1.5636326837351604e-05, + "loss": 0.5836, + "step": 22607 + }, + { + "epoch": 0.6207578253706755, + "grad_norm": 0.36169058084487915, + "learning_rate": 1.563597007710761e-05, + "loss": 0.5424, + "step": 22608 + }, + { + "epoch": 0.6207852828116419, + "grad_norm": 0.3912159502506256, + "learning_rate": 1.5635613306350735e-05, + "loss": 0.5342, + "step": 22609 + }, + { + "epoch": 0.6208127402526085, + "grad_norm": 0.3356937766075134, + "learning_rate": 1.5635256525081642e-05, + "loss": 0.4676, + "step": 22610 + }, + { + "epoch": 0.620840197693575, + "grad_norm": 0.41820642352104187, + "learning_rate": 1.5634899733301e-05, + "loss": 0.5162, + "step": 22611 + }, + { + "epoch": 0.6208676551345415, + "grad_norm": 0.4147910475730896, + "learning_rate": 1.563454293100947e-05, + "loss": 0.5248, + "step": 22612 + }, + { + "epoch": 0.620895112575508, + "grad_norm": 0.3181074261665344, + "learning_rate": 1.5634186118207715e-05, + "loss": 0.3554, + "step": 22613 + }, + { + "epoch": 0.6209225700164744, + "grad_norm": 0.40373265743255615, + "learning_rate": 1.5633829294896406e-05, + "loss": 0.4901, + "step": 22614 + }, + { + "epoch": 0.620950027457441, + "grad_norm": 0.3634554445743561, + "learning_rate": 1.5633472461076214e-05, + "loss": 0.5209, + "step": 22615 + }, + { + "epoch": 0.6209774848984074, + "grad_norm": 0.3847080171108246, + "learning_rate": 1.563311561674779e-05, + "loss": 0.5679, + "step": 22616 + }, + { + "epoch": 0.621004942339374, + "grad_norm": 0.32066574692726135, + "learning_rate": 1.5632758761911813e-05, + "loss": 0.4273, + "step": 22617 + }, + { + "epoch": 0.6210323997803405, + "grad_norm": 0.38394179940223694, + "learning_rate": 1.5632401896568944e-05, + "loss": 0.5163, + "step": 22618 + }, + { + "epoch": 0.621059857221307, + "grad_norm": 0.3617333173751831, + "learning_rate": 1.563204502071984e-05, + "loss": 0.458, + "step": 22619 + }, + { + "epoch": 0.6210873146622735, + "grad_norm": 0.5677087903022766, + "learning_rate": 1.563168813436518e-05, + "loss": 0.4831, + "step": 22620 + }, + { + "epoch": 0.62111477210324, + "grad_norm": 0.45325592160224915, + "learning_rate": 1.5631331237505625e-05, + "loss": 0.5384, + "step": 22621 + }, + { + "epoch": 0.6211422295442065, + "grad_norm": 0.42556658387184143, + "learning_rate": 1.5630974330141837e-05, + "loss": 0.5262, + "step": 22622 + }, + { + "epoch": 0.6211696869851729, + "grad_norm": 0.3974032402038574, + "learning_rate": 1.563061741227449e-05, + "loss": 0.5294, + "step": 22623 + }, + { + "epoch": 0.6211971444261395, + "grad_norm": 0.39191335439682007, + "learning_rate": 1.5630260483904236e-05, + "loss": 0.4514, + "step": 22624 + }, + { + "epoch": 0.621224601867106, + "grad_norm": 0.3502022922039032, + "learning_rate": 1.5629903545031755e-05, + "loss": 0.5468, + "step": 22625 + }, + { + "epoch": 0.6212520593080725, + "grad_norm": 0.4027695059776306, + "learning_rate": 1.5629546595657705e-05, + "loss": 0.5074, + "step": 22626 + }, + { + "epoch": 0.621279516749039, + "grad_norm": 0.3561052083969116, + "learning_rate": 1.5629189635782753e-05, + "loss": 0.505, + "step": 22627 + }, + { + "epoch": 0.6213069741900055, + "grad_norm": 0.38470059633255005, + "learning_rate": 1.5628832665407568e-05, + "loss": 0.5201, + "step": 22628 + }, + { + "epoch": 0.621334431630972, + "grad_norm": 0.36712646484375, + "learning_rate": 1.5628475684532813e-05, + "loss": 0.5117, + "step": 22629 + }, + { + "epoch": 0.6213618890719385, + "grad_norm": 0.9152352213859558, + "learning_rate": 1.5628118693159153e-05, + "loss": 0.5441, + "step": 22630 + }, + { + "epoch": 0.621389346512905, + "grad_norm": 0.3418535590171814, + "learning_rate": 1.5627761691287257e-05, + "loss": 0.5302, + "step": 22631 + }, + { + "epoch": 0.6214168039538714, + "grad_norm": 0.4033520519733429, + "learning_rate": 1.562740467891779e-05, + "loss": 0.4855, + "step": 22632 + }, + { + "epoch": 0.621444261394838, + "grad_norm": 0.3610757887363434, + "learning_rate": 1.562704765605142e-05, + "loss": 0.5, + "step": 22633 + }, + { + "epoch": 0.6214717188358045, + "grad_norm": 0.4056897759437561, + "learning_rate": 1.5626690622688804e-05, + "loss": 0.4693, + "step": 22634 + }, + { + "epoch": 0.621499176276771, + "grad_norm": 0.332472026348114, + "learning_rate": 1.5626333578830613e-05, + "loss": 0.4633, + "step": 22635 + }, + { + "epoch": 0.6215266337177375, + "grad_norm": 0.4023151993751526, + "learning_rate": 1.562597652447752e-05, + "loss": 0.5453, + "step": 22636 + }, + { + "epoch": 0.621554091158704, + "grad_norm": 0.3857662081718445, + "learning_rate": 1.562561945963018e-05, + "loss": 0.544, + "step": 22637 + }, + { + "epoch": 0.6215815485996705, + "grad_norm": 0.4395194947719574, + "learning_rate": 1.562526238428927e-05, + "loss": 0.5481, + "step": 22638 + }, + { + "epoch": 0.621609006040637, + "grad_norm": 0.3387903869152069, + "learning_rate": 1.5624905298455448e-05, + "loss": 0.4646, + "step": 22639 + }, + { + "epoch": 0.6216364634816035, + "grad_norm": 0.3833998143672943, + "learning_rate": 1.562454820212938e-05, + "loss": 0.4652, + "step": 22640 + }, + { + "epoch": 0.6216639209225701, + "grad_norm": 0.3517824411392212, + "learning_rate": 1.5624191095311736e-05, + "loss": 0.5051, + "step": 22641 + }, + { + "epoch": 0.6216913783635365, + "grad_norm": 0.4741119146347046, + "learning_rate": 1.562383397800318e-05, + "loss": 0.4815, + "step": 22642 + }, + { + "epoch": 0.621718835804503, + "grad_norm": 0.4316595792770386, + "learning_rate": 1.562347685020438e-05, + "loss": 0.4719, + "step": 22643 + }, + { + "epoch": 0.6217462932454695, + "grad_norm": 0.4460110068321228, + "learning_rate": 1.5623119711915998e-05, + "loss": 0.4274, + "step": 22644 + }, + { + "epoch": 0.621773750686436, + "grad_norm": 0.40927886962890625, + "learning_rate": 1.5622762563138706e-05, + "loss": 0.4898, + "step": 22645 + }, + { + "epoch": 0.6218012081274025, + "grad_norm": 0.3435615599155426, + "learning_rate": 1.5622405403873168e-05, + "loss": 0.5115, + "step": 22646 + }, + { + "epoch": 0.621828665568369, + "grad_norm": 0.3902473747730255, + "learning_rate": 1.5622048234120047e-05, + "loss": 0.475, + "step": 22647 + }, + { + "epoch": 0.6218561230093356, + "grad_norm": 0.4384199380874634, + "learning_rate": 1.5621691053880015e-05, + "loss": 0.5083, + "step": 22648 + }, + { + "epoch": 0.621883580450302, + "grad_norm": 0.3897465765476227, + "learning_rate": 1.5621333863153732e-05, + "loss": 0.5252, + "step": 22649 + }, + { + "epoch": 0.6219110378912686, + "grad_norm": 0.3587454855442047, + "learning_rate": 1.562097666194187e-05, + "loss": 0.4685, + "step": 22650 + }, + { + "epoch": 0.621938495332235, + "grad_norm": 0.43092775344848633, + "learning_rate": 1.5620619450245086e-05, + "loss": 0.5357, + "step": 22651 + }, + { + "epoch": 0.6219659527732015, + "grad_norm": 0.4256090223789215, + "learning_rate": 1.5620262228064058e-05, + "loss": 0.5146, + "step": 22652 + }, + { + "epoch": 0.621993410214168, + "grad_norm": 0.41692033410072327, + "learning_rate": 1.561990499539945e-05, + "loss": 0.4634, + "step": 22653 + }, + { + "epoch": 0.6220208676551345, + "grad_norm": 0.3598794639110565, + "learning_rate": 1.561954775225192e-05, + "loss": 0.4653, + "step": 22654 + }, + { + "epoch": 0.6220483250961011, + "grad_norm": 0.3634341061115265, + "learning_rate": 1.561919049862214e-05, + "loss": 0.5068, + "step": 22655 + }, + { + "epoch": 0.6220757825370675, + "grad_norm": 0.3861195743083954, + "learning_rate": 1.561883323451078e-05, + "loss": 0.5829, + "step": 22656 + }, + { + "epoch": 0.6221032399780341, + "grad_norm": 0.3582019507884979, + "learning_rate": 1.56184759599185e-05, + "loss": 0.5283, + "step": 22657 + }, + { + "epoch": 0.6221306974190005, + "grad_norm": 0.5511125922203064, + "learning_rate": 1.5618118674845967e-05, + "loss": 0.6275, + "step": 22658 + }, + { + "epoch": 0.6221581548599671, + "grad_norm": 0.35205045342445374, + "learning_rate": 1.5617761379293854e-05, + "loss": 0.5127, + "step": 22659 + }, + { + "epoch": 0.6221856123009335, + "grad_norm": 0.33741921186447144, + "learning_rate": 1.5617404073262822e-05, + "loss": 0.4287, + "step": 22660 + }, + { + "epoch": 0.6222130697419, + "grad_norm": 0.43713945150375366, + "learning_rate": 1.561704675675354e-05, + "loss": 0.5366, + "step": 22661 + }, + { + "epoch": 0.6222405271828666, + "grad_norm": 0.34969034790992737, + "learning_rate": 1.5616689429766668e-05, + "loss": 0.4907, + "step": 22662 + }, + { + "epoch": 0.622267984623833, + "grad_norm": 0.3576207756996155, + "learning_rate": 1.561633209230288e-05, + "loss": 0.4884, + "step": 22663 + }, + { + "epoch": 0.6222954420647996, + "grad_norm": 0.3525940179824829, + "learning_rate": 1.561597474436284e-05, + "loss": 0.5335, + "step": 22664 + }, + { + "epoch": 0.622322899505766, + "grad_norm": 0.4156389534473419, + "learning_rate": 1.5615617385947218e-05, + "loss": 0.5456, + "step": 22665 + }, + { + "epoch": 0.6223503569467326, + "grad_norm": 0.43618497252464294, + "learning_rate": 1.5615260017056675e-05, + "loss": 0.5799, + "step": 22666 + }, + { + "epoch": 0.622377814387699, + "grad_norm": 0.36887216567993164, + "learning_rate": 1.561490263769188e-05, + "loss": 0.5182, + "step": 22667 + }, + { + "epoch": 0.6224052718286656, + "grad_norm": 0.3900572955608368, + "learning_rate": 1.56145452478535e-05, + "loss": 0.5064, + "step": 22668 + }, + { + "epoch": 0.6224327292696321, + "grad_norm": 0.47804415225982666, + "learning_rate": 1.56141878475422e-05, + "loss": 0.5715, + "step": 22669 + }, + { + "epoch": 0.6224601867105986, + "grad_norm": 0.4438173472881317, + "learning_rate": 1.5613830436758646e-05, + "loss": 0.5277, + "step": 22670 + }, + { + "epoch": 0.6224876441515651, + "grad_norm": 0.3804474472999573, + "learning_rate": 1.561347301550351e-05, + "loss": 0.5093, + "step": 22671 + }, + { + "epoch": 0.6225151015925315, + "grad_norm": 0.3673146665096283, + "learning_rate": 1.5613115583777454e-05, + "loss": 0.5292, + "step": 22672 + }, + { + "epoch": 0.6225425590334981, + "grad_norm": 0.48643922805786133, + "learning_rate": 1.5612758141581143e-05, + "loss": 0.5609, + "step": 22673 + }, + { + "epoch": 0.6225700164744645, + "grad_norm": 0.3528304398059845, + "learning_rate": 1.5612400688915254e-05, + "loss": 0.4291, + "step": 22674 + }, + { + "epoch": 0.6225974739154311, + "grad_norm": 0.3978547751903534, + "learning_rate": 1.561204322578044e-05, + "loss": 0.5745, + "step": 22675 + }, + { + "epoch": 0.6226249313563976, + "grad_norm": 0.3570099472999573, + "learning_rate": 1.5611685752177374e-05, + "loss": 0.5707, + "step": 22676 + }, + { + "epoch": 0.6226523887973641, + "grad_norm": 0.39851608872413635, + "learning_rate": 1.5611328268106728e-05, + "loss": 0.5501, + "step": 22677 + }, + { + "epoch": 0.6226798462383306, + "grad_norm": 0.33250051736831665, + "learning_rate": 1.5610970773569158e-05, + "loss": 0.4324, + "step": 22678 + }, + { + "epoch": 0.622707303679297, + "grad_norm": 0.4050813317298889, + "learning_rate": 1.561061326856534e-05, + "loss": 0.5022, + "step": 22679 + }, + { + "epoch": 0.6227347611202636, + "grad_norm": 0.47232159972190857, + "learning_rate": 1.561025575309594e-05, + "loss": 0.4989, + "step": 22680 + }, + { + "epoch": 0.62276221856123, + "grad_norm": 0.3878662884235382, + "learning_rate": 1.5609898227161618e-05, + "loss": 0.4574, + "step": 22681 + }, + { + "epoch": 0.6227896760021966, + "grad_norm": 0.46175870299339294, + "learning_rate": 1.5609540690763046e-05, + "loss": 0.554, + "step": 22682 + }, + { + "epoch": 0.6228171334431631, + "grad_norm": 0.4465007185935974, + "learning_rate": 1.5609183143900892e-05, + "loss": 0.6144, + "step": 22683 + }, + { + "epoch": 0.6228445908841296, + "grad_norm": 0.4138753116130829, + "learning_rate": 1.560882558657582e-05, + "loss": 0.4966, + "step": 22684 + }, + { + "epoch": 0.6228720483250961, + "grad_norm": 0.3988894820213318, + "learning_rate": 1.56084680187885e-05, + "loss": 0.5141, + "step": 22685 + }, + { + "epoch": 0.6228995057660626, + "grad_norm": 0.4476565420627594, + "learning_rate": 1.5608110440539594e-05, + "loss": 0.5225, + "step": 22686 + }, + { + "epoch": 0.6229269632070291, + "grad_norm": 0.3888532817363739, + "learning_rate": 1.5607752851829775e-05, + "loss": 0.5389, + "step": 22687 + }, + { + "epoch": 0.6229544206479956, + "grad_norm": 0.45980122685432434, + "learning_rate": 1.5607395252659708e-05, + "loss": 0.4463, + "step": 22688 + }, + { + "epoch": 0.6229818780889621, + "grad_norm": 0.3242710828781128, + "learning_rate": 1.5607037643030055e-05, + "loss": 0.4269, + "step": 22689 + }, + { + "epoch": 0.6230093355299287, + "grad_norm": 0.39005526900291443, + "learning_rate": 1.560668002294149e-05, + "loss": 0.5987, + "step": 22690 + }, + { + "epoch": 0.6230367929708951, + "grad_norm": 0.40786340832710266, + "learning_rate": 1.5606322392394674e-05, + "loss": 0.5291, + "step": 22691 + }, + { + "epoch": 0.6230642504118616, + "grad_norm": 0.36352455615997314, + "learning_rate": 1.560596475139028e-05, + "loss": 0.5293, + "step": 22692 + }, + { + "epoch": 0.6230917078528281, + "grad_norm": 0.3889124393463135, + "learning_rate": 1.5605607099928977e-05, + "loss": 0.4513, + "step": 22693 + }, + { + "epoch": 0.6231191652937946, + "grad_norm": 0.4477842152118683, + "learning_rate": 1.560524943801142e-05, + "loss": 0.5376, + "step": 22694 + }, + { + "epoch": 0.6231466227347611, + "grad_norm": 0.47595280408859253, + "learning_rate": 1.5604891765638288e-05, + "loss": 0.4798, + "step": 22695 + }, + { + "epoch": 0.6231740801757276, + "grad_norm": 0.39103659987449646, + "learning_rate": 1.5604534082810246e-05, + "loss": 0.4693, + "step": 22696 + }, + { + "epoch": 0.6232015376166942, + "grad_norm": 0.39449864625930786, + "learning_rate": 1.5604176389527955e-05, + "loss": 0.4763, + "step": 22697 + }, + { + "epoch": 0.6232289950576606, + "grad_norm": 0.3627716600894928, + "learning_rate": 1.5603818685792088e-05, + "loss": 0.4547, + "step": 22698 + }, + { + "epoch": 0.6232564524986272, + "grad_norm": 0.35830676555633545, + "learning_rate": 1.560346097160331e-05, + "loss": 0.4742, + "step": 22699 + }, + { + "epoch": 0.6232839099395936, + "grad_norm": 0.39674195647239685, + "learning_rate": 1.560310324696229e-05, + "loss": 0.4726, + "step": 22700 + }, + { + "epoch": 0.6233113673805601, + "grad_norm": 0.31904152035713196, + "learning_rate": 1.5602745511869693e-05, + "loss": 0.4338, + "step": 22701 + }, + { + "epoch": 0.6233388248215266, + "grad_norm": 0.405972421169281, + "learning_rate": 1.5602387766326187e-05, + "loss": 0.4822, + "step": 22702 + }, + { + "epoch": 0.6233662822624931, + "grad_norm": 0.37426167726516724, + "learning_rate": 1.560203001033244e-05, + "loss": 0.5103, + "step": 22703 + }, + { + "epoch": 0.6233937397034597, + "grad_norm": 0.3816124200820923, + "learning_rate": 1.5601672243889117e-05, + "loss": 0.529, + "step": 22704 + }, + { + "epoch": 0.6234211971444261, + "grad_norm": 0.36131882667541504, + "learning_rate": 1.560131446699689e-05, + "loss": 0.554, + "step": 22705 + }, + { + "epoch": 0.6234486545853927, + "grad_norm": 0.42496657371520996, + "learning_rate": 1.5600956679656426e-05, + "loss": 0.535, + "step": 22706 + }, + { + "epoch": 0.6234761120263591, + "grad_norm": 0.45510175824165344, + "learning_rate": 1.560059888186839e-05, + "loss": 0.5216, + "step": 22707 + }, + { + "epoch": 0.6235035694673257, + "grad_norm": 0.38218075037002563, + "learning_rate": 1.5600241073633444e-05, + "loss": 0.49, + "step": 22708 + }, + { + "epoch": 0.6235310269082921, + "grad_norm": 0.44072282314300537, + "learning_rate": 1.5599883254952265e-05, + "loss": 0.5628, + "step": 22709 + }, + { + "epoch": 0.6235584843492586, + "grad_norm": 0.40684399008750916, + "learning_rate": 1.5599525425825516e-05, + "loss": 0.4922, + "step": 22710 + }, + { + "epoch": 0.6235859417902252, + "grad_norm": 0.42713338136672974, + "learning_rate": 1.5599167586253866e-05, + "loss": 0.443, + "step": 22711 + }, + { + "epoch": 0.6236133992311916, + "grad_norm": 0.35335904359817505, + "learning_rate": 1.559880973623798e-05, + "loss": 0.497, + "step": 22712 + }, + { + "epoch": 0.6236408566721582, + "grad_norm": 0.3614974021911621, + "learning_rate": 1.5598451875778527e-05, + "loss": 0.5819, + "step": 22713 + }, + { + "epoch": 0.6236683141131246, + "grad_norm": 0.3396138548851013, + "learning_rate": 1.5598094004876175e-05, + "loss": 0.4732, + "step": 22714 + }, + { + "epoch": 0.6236957715540912, + "grad_norm": 0.34503644704818726, + "learning_rate": 1.559773612353159e-05, + "loss": 0.4349, + "step": 22715 + }, + { + "epoch": 0.6237232289950576, + "grad_norm": 0.38056257367134094, + "learning_rate": 1.5597378231745443e-05, + "loss": 0.4903, + "step": 22716 + }, + { + "epoch": 0.6237506864360242, + "grad_norm": 0.40777167677879333, + "learning_rate": 1.5597020329518396e-05, + "loss": 0.4778, + "step": 22717 + }, + { + "epoch": 0.6237781438769907, + "grad_norm": 0.3716745376586914, + "learning_rate": 1.559666241685112e-05, + "loss": 0.5017, + "step": 22718 + }, + { + "epoch": 0.6238056013179571, + "grad_norm": 0.35604244470596313, + "learning_rate": 1.5596304493744286e-05, + "loss": 0.4767, + "step": 22719 + }, + { + "epoch": 0.6238330587589237, + "grad_norm": 0.37230029702186584, + "learning_rate": 1.5595946560198555e-05, + "loss": 0.5212, + "step": 22720 + }, + { + "epoch": 0.6238605161998901, + "grad_norm": 0.36314722895622253, + "learning_rate": 1.5595588616214596e-05, + "loss": 0.4847, + "step": 22721 + }, + { + "epoch": 0.6238879736408567, + "grad_norm": 0.44539469480514526, + "learning_rate": 1.559523066179308e-05, + "loss": 0.494, + "step": 22722 + }, + { + "epoch": 0.6239154310818231, + "grad_norm": 0.414894700050354, + "learning_rate": 1.5594872696934676e-05, + "loss": 0.4902, + "step": 22723 + }, + { + "epoch": 0.6239428885227897, + "grad_norm": 0.4315321147441864, + "learning_rate": 1.5594514721640046e-05, + "loss": 0.5386, + "step": 22724 + }, + { + "epoch": 0.6239703459637562, + "grad_norm": 0.39150580763816833, + "learning_rate": 1.5594156735909858e-05, + "loss": 0.5448, + "step": 22725 + }, + { + "epoch": 0.6239978034047227, + "grad_norm": 0.3787269592285156, + "learning_rate": 1.5593798739744787e-05, + "loss": 0.495, + "step": 22726 + }, + { + "epoch": 0.6240252608456892, + "grad_norm": 0.34430986642837524, + "learning_rate": 1.5593440733145493e-05, + "loss": 0.4511, + "step": 22727 + }, + { + "epoch": 0.6240527182866556, + "grad_norm": 0.6568994522094727, + "learning_rate": 1.5593082716112647e-05, + "loss": 0.4838, + "step": 22728 + }, + { + "epoch": 0.6240801757276222, + "grad_norm": 0.38530707359313965, + "learning_rate": 1.559272468864692e-05, + "loss": 0.4793, + "step": 22729 + }, + { + "epoch": 0.6241076331685886, + "grad_norm": 0.40359801054000854, + "learning_rate": 1.5592366650748973e-05, + "loss": 0.5192, + "step": 22730 + }, + { + "epoch": 0.6241350906095552, + "grad_norm": 0.37862539291381836, + "learning_rate": 1.5592008602419476e-05, + "loss": 0.4663, + "step": 22731 + }, + { + "epoch": 0.6241625480505217, + "grad_norm": 0.45218411087989807, + "learning_rate": 1.55916505436591e-05, + "loss": 0.5075, + "step": 22732 + }, + { + "epoch": 0.6241900054914882, + "grad_norm": 0.39124545454978943, + "learning_rate": 1.559129247446851e-05, + "loss": 0.5156, + "step": 22733 + }, + { + "epoch": 0.6242174629324547, + "grad_norm": 0.33560776710510254, + "learning_rate": 1.559093439484838e-05, + "loss": 0.4552, + "step": 22734 + }, + { + "epoch": 0.6242449203734212, + "grad_norm": 0.3826710879802704, + "learning_rate": 1.559057630479937e-05, + "loss": 0.5314, + "step": 22735 + }, + { + "epoch": 0.6242723778143877, + "grad_norm": 0.3951885998249054, + "learning_rate": 1.5590218204322146e-05, + "loss": 0.51, + "step": 22736 + }, + { + "epoch": 0.6242998352553542, + "grad_norm": 0.38460275530815125, + "learning_rate": 1.5589860093417385e-05, + "loss": 0.485, + "step": 22737 + }, + { + "epoch": 0.6243272926963207, + "grad_norm": 0.3685416877269745, + "learning_rate": 1.558950197208575e-05, + "loss": 0.5065, + "step": 22738 + }, + { + "epoch": 0.6243547501372873, + "grad_norm": 0.3551209270954132, + "learning_rate": 1.5589143840327913e-05, + "loss": 0.439, + "step": 22739 + }, + { + "epoch": 0.6243822075782537, + "grad_norm": 0.3589816987514496, + "learning_rate": 1.5588785698144536e-05, + "loss": 0.4577, + "step": 22740 + }, + { + "epoch": 0.6244096650192202, + "grad_norm": 0.483177125453949, + "learning_rate": 1.558842754553629e-05, + "loss": 0.5091, + "step": 22741 + }, + { + "epoch": 0.6244371224601867, + "grad_norm": 0.38260507583618164, + "learning_rate": 1.5588069382503846e-05, + "loss": 0.5084, + "step": 22742 + }, + { + "epoch": 0.6244645799011532, + "grad_norm": 0.36432504653930664, + "learning_rate": 1.5587711209047863e-05, + "loss": 0.4884, + "step": 22743 + }, + { + "epoch": 0.6244920373421197, + "grad_norm": 0.4380388557910919, + "learning_rate": 1.5587353025169022e-05, + "loss": 0.5313, + "step": 22744 + }, + { + "epoch": 0.6245194947830862, + "grad_norm": 0.4005570709705353, + "learning_rate": 1.558699483086798e-05, + "loss": 0.4904, + "step": 22745 + }, + { + "epoch": 0.6245469522240528, + "grad_norm": 0.46796050667762756, + "learning_rate": 1.5586636626145412e-05, + "loss": 0.5251, + "step": 22746 + }, + { + "epoch": 0.6245744096650192, + "grad_norm": 0.38354021310806274, + "learning_rate": 1.5586278411001982e-05, + "loss": 0.5363, + "step": 22747 + }, + { + "epoch": 0.6246018671059858, + "grad_norm": 0.35663899779319763, + "learning_rate": 1.558592018543836e-05, + "loss": 0.4533, + "step": 22748 + }, + { + "epoch": 0.6246293245469522, + "grad_norm": 0.4117232859134674, + "learning_rate": 1.5585561949455214e-05, + "loss": 0.4495, + "step": 22749 + }, + { + "epoch": 0.6246567819879187, + "grad_norm": 0.41512274742126465, + "learning_rate": 1.5585203703053212e-05, + "loss": 0.5262, + "step": 22750 + }, + { + "epoch": 0.6246842394288852, + "grad_norm": 0.4299023747444153, + "learning_rate": 1.558484544623302e-05, + "loss": 0.5089, + "step": 22751 + }, + { + "epoch": 0.6247116968698517, + "grad_norm": 0.3952581584453583, + "learning_rate": 1.5584487178995315e-05, + "loss": 0.5521, + "step": 22752 + }, + { + "epoch": 0.6247391543108183, + "grad_norm": 0.33879366517066956, + "learning_rate": 1.5584128901340755e-05, + "loss": 0.4641, + "step": 22753 + }, + { + "epoch": 0.6247666117517847, + "grad_norm": 0.40377378463745117, + "learning_rate": 1.5583770613270014e-05, + "loss": 0.4736, + "step": 22754 + }, + { + "epoch": 0.6247940691927513, + "grad_norm": 0.3976646959781647, + "learning_rate": 1.5583412314783756e-05, + "loss": 0.5197, + "step": 22755 + }, + { + "epoch": 0.6248215266337177, + "grad_norm": 0.3554416000843048, + "learning_rate": 1.5583054005882655e-05, + "loss": 0.4831, + "step": 22756 + }, + { + "epoch": 0.6248489840746843, + "grad_norm": 0.37269291281700134, + "learning_rate": 1.5582695686567375e-05, + "loss": 0.5301, + "step": 22757 + }, + { + "epoch": 0.6248764415156507, + "grad_norm": 0.41960182785987854, + "learning_rate": 1.5582337356838585e-05, + "loss": 0.4962, + "step": 22758 + }, + { + "epoch": 0.6249038989566172, + "grad_norm": 0.4099493622779846, + "learning_rate": 1.5581979016696958e-05, + "loss": 0.5084, + "step": 22759 + }, + { + "epoch": 0.6249313563975838, + "grad_norm": 0.4261516332626343, + "learning_rate": 1.5581620666143154e-05, + "loss": 0.4589, + "step": 22760 + }, + { + "epoch": 0.6249588138385502, + "grad_norm": 0.3573504388332367, + "learning_rate": 1.5581262305177848e-05, + "loss": 0.5036, + "step": 22761 + }, + { + "epoch": 0.6249862712795168, + "grad_norm": 0.3524474799633026, + "learning_rate": 1.5580903933801705e-05, + "loss": 0.4447, + "step": 22762 + }, + { + "epoch": 0.6250137287204832, + "grad_norm": 0.37237852811813354, + "learning_rate": 1.5580545552015398e-05, + "loss": 0.4896, + "step": 22763 + }, + { + "epoch": 0.6250411861614498, + "grad_norm": 0.4299606382846832, + "learning_rate": 1.5580187159819587e-05, + "loss": 0.5009, + "step": 22764 + }, + { + "epoch": 0.6250686436024162, + "grad_norm": 0.36828887462615967, + "learning_rate": 1.5579828757214952e-05, + "loss": 0.4155, + "step": 22765 + }, + { + "epoch": 0.6250961010433828, + "grad_norm": 0.38769084215164185, + "learning_rate": 1.557947034420215e-05, + "loss": 0.4752, + "step": 22766 + }, + { + "epoch": 0.6251235584843493, + "grad_norm": 0.37206271290779114, + "learning_rate": 1.557911192078186e-05, + "loss": 0.4872, + "step": 22767 + }, + { + "epoch": 0.6251510159253157, + "grad_norm": 0.38320013880729675, + "learning_rate": 1.5578753486954744e-05, + "loss": 0.4651, + "step": 22768 + }, + { + "epoch": 0.6251784733662823, + "grad_norm": 0.44733524322509766, + "learning_rate": 1.557839504272147e-05, + "loss": 0.5082, + "step": 22769 + }, + { + "epoch": 0.6252059308072487, + "grad_norm": 0.3549010157585144, + "learning_rate": 1.5578036588082714e-05, + "loss": 0.5009, + "step": 22770 + }, + { + "epoch": 0.6252333882482153, + "grad_norm": 0.49526211619377136, + "learning_rate": 1.5577678123039137e-05, + "loss": 0.492, + "step": 22771 + }, + { + "epoch": 0.6252608456891817, + "grad_norm": 0.3599933981895447, + "learning_rate": 1.5577319647591406e-05, + "loss": 0.4295, + "step": 22772 + }, + { + "epoch": 0.6252883031301483, + "grad_norm": 0.9174363613128662, + "learning_rate": 1.55769611617402e-05, + "loss": 0.537, + "step": 22773 + }, + { + "epoch": 0.6253157605711148, + "grad_norm": 0.3954835832118988, + "learning_rate": 1.5576602665486178e-05, + "loss": 0.4932, + "step": 22774 + }, + { + "epoch": 0.6253432180120813, + "grad_norm": 0.3601706922054291, + "learning_rate": 1.5576244158830013e-05, + "loss": 0.4549, + "step": 22775 + }, + { + "epoch": 0.6253706754530478, + "grad_norm": 0.37067365646362305, + "learning_rate": 1.557588564177237e-05, + "loss": 0.5007, + "step": 22776 + }, + { + "epoch": 0.6253981328940142, + "grad_norm": 0.4348433017730713, + "learning_rate": 1.5575527114313924e-05, + "loss": 0.5612, + "step": 22777 + }, + { + "epoch": 0.6254255903349808, + "grad_norm": 0.6428379416465759, + "learning_rate": 1.557516857645534e-05, + "loss": 0.4535, + "step": 22778 + }, + { + "epoch": 0.6254530477759472, + "grad_norm": 0.3750077784061432, + "learning_rate": 1.5574810028197286e-05, + "loss": 0.4938, + "step": 22779 + }, + { + "epoch": 0.6254805052169138, + "grad_norm": 0.38986048102378845, + "learning_rate": 1.5574451469540436e-05, + "loss": 0.5633, + "step": 22780 + }, + { + "epoch": 0.6255079626578803, + "grad_norm": 0.48992031812667847, + "learning_rate": 1.557409290048545e-05, + "loss": 0.4444, + "step": 22781 + }, + { + "epoch": 0.6255354200988468, + "grad_norm": 0.4276306629180908, + "learning_rate": 1.5573734321033005e-05, + "loss": 0.5149, + "step": 22782 + }, + { + "epoch": 0.6255628775398133, + "grad_norm": 0.4124740958213806, + "learning_rate": 1.5573375731183764e-05, + "loss": 0.5508, + "step": 22783 + }, + { + "epoch": 0.6255903349807798, + "grad_norm": 0.4212484359741211, + "learning_rate": 1.55730171309384e-05, + "loss": 0.5488, + "step": 22784 + }, + { + "epoch": 0.6256177924217463, + "grad_norm": 0.458383172750473, + "learning_rate": 1.557265852029758e-05, + "loss": 0.5121, + "step": 22785 + }, + { + "epoch": 0.6256452498627127, + "grad_norm": 0.36886003613471985, + "learning_rate": 1.5572299899261972e-05, + "loss": 0.4295, + "step": 22786 + }, + { + "epoch": 0.6256727073036793, + "grad_norm": 0.4020388722419739, + "learning_rate": 1.5571941267832246e-05, + "loss": 0.4822, + "step": 22787 + }, + { + "epoch": 0.6257001647446458, + "grad_norm": 0.5739309787750244, + "learning_rate": 1.5571582626009074e-05, + "loss": 0.4956, + "step": 22788 + }, + { + "epoch": 0.6257276221856123, + "grad_norm": 0.5499763488769531, + "learning_rate": 1.557122397379312e-05, + "loss": 0.5177, + "step": 22789 + }, + { + "epoch": 0.6257550796265788, + "grad_norm": 0.3462575674057007, + "learning_rate": 1.5570865311185056e-05, + "loss": 0.4311, + "step": 22790 + }, + { + "epoch": 0.6257825370675453, + "grad_norm": 0.44280657172203064, + "learning_rate": 1.5570506638185552e-05, + "loss": 0.4957, + "step": 22791 + }, + { + "epoch": 0.6258099945085118, + "grad_norm": 0.38041606545448303, + "learning_rate": 1.557014795479527e-05, + "loss": 0.5174, + "step": 22792 + }, + { + "epoch": 0.6258374519494783, + "grad_norm": 0.37107986211776733, + "learning_rate": 1.5569789261014888e-05, + "loss": 0.5224, + "step": 22793 + }, + { + "epoch": 0.6258649093904448, + "grad_norm": 0.3959111273288727, + "learning_rate": 1.5569430556845073e-05, + "loss": 0.4643, + "step": 22794 + }, + { + "epoch": 0.6258923668314114, + "grad_norm": 0.3983900249004364, + "learning_rate": 1.556907184228649e-05, + "loss": 0.5265, + "step": 22795 + }, + { + "epoch": 0.6259198242723778, + "grad_norm": 0.37801679968833923, + "learning_rate": 1.556871311733981e-05, + "loss": 0.5135, + "step": 22796 + }, + { + "epoch": 0.6259472817133444, + "grad_norm": 0.39358097314834595, + "learning_rate": 1.5568354382005702e-05, + "loss": 0.5241, + "step": 22797 + }, + { + "epoch": 0.6259747391543108, + "grad_norm": 0.42735204100608826, + "learning_rate": 1.5567995636284838e-05, + "loss": 0.5028, + "step": 22798 + }, + { + "epoch": 0.6260021965952773, + "grad_norm": 0.364994078874588, + "learning_rate": 1.5567636880177887e-05, + "loss": 0.43, + "step": 22799 + }, + { + "epoch": 0.6260296540362438, + "grad_norm": 0.3359917998313904, + "learning_rate": 1.5567278113685512e-05, + "loss": 0.4711, + "step": 22800 + }, + { + "epoch": 0.6260571114772103, + "grad_norm": 0.3706456422805786, + "learning_rate": 1.5566919336808388e-05, + "loss": 0.5135, + "step": 22801 + }, + { + "epoch": 0.6260845689181769, + "grad_norm": 0.35560882091522217, + "learning_rate": 1.5566560549547185e-05, + "loss": 0.4687, + "step": 22802 + }, + { + "epoch": 0.6261120263591433, + "grad_norm": 0.4092799723148346, + "learning_rate": 1.5566201751902568e-05, + "loss": 0.5098, + "step": 22803 + }, + { + "epoch": 0.6261394838001099, + "grad_norm": 0.3720338046550751, + "learning_rate": 1.5565842943875208e-05, + "loss": 0.5246, + "step": 22804 + }, + { + "epoch": 0.6261669412410763, + "grad_norm": 0.39075514674186707, + "learning_rate": 1.5565484125465774e-05, + "loss": 0.4956, + "step": 22805 + }, + { + "epoch": 0.6261943986820429, + "grad_norm": 0.36669614911079407, + "learning_rate": 1.5565125296674936e-05, + "loss": 0.5522, + "step": 22806 + }, + { + "epoch": 0.6262218561230093, + "grad_norm": 0.3880726099014282, + "learning_rate": 1.5564766457503363e-05, + "loss": 0.5263, + "step": 22807 + }, + { + "epoch": 0.6262493135639758, + "grad_norm": 0.4020625948905945, + "learning_rate": 1.5564407607951727e-05, + "loss": 0.5234, + "step": 22808 + }, + { + "epoch": 0.6262767710049424, + "grad_norm": 0.3920706808567047, + "learning_rate": 1.556404874802069e-05, + "loss": 0.5092, + "step": 22809 + }, + { + "epoch": 0.6263042284459088, + "grad_norm": 0.37713736295700073, + "learning_rate": 1.556368987771093e-05, + "loss": 0.5631, + "step": 22810 + }, + { + "epoch": 0.6263316858868754, + "grad_norm": 0.3829099237918854, + "learning_rate": 1.5563330997023115e-05, + "loss": 0.49, + "step": 22811 + }, + { + "epoch": 0.6263591433278418, + "grad_norm": 0.38515612483024597, + "learning_rate": 1.5562972105957906e-05, + "loss": 0.6449, + "step": 22812 + }, + { + "epoch": 0.6263866007688084, + "grad_norm": 0.36675822734832764, + "learning_rate": 1.556261320451598e-05, + "loss": 0.4394, + "step": 22813 + }, + { + "epoch": 0.6264140582097748, + "grad_norm": 0.4350670278072357, + "learning_rate": 1.5562254292698008e-05, + "loss": 0.5397, + "step": 22814 + }, + { + "epoch": 0.6264415156507414, + "grad_norm": 0.4069001376628876, + "learning_rate": 1.5561895370504653e-05, + "loss": 0.5161, + "step": 22815 + }, + { + "epoch": 0.6264689730917079, + "grad_norm": 0.36855360865592957, + "learning_rate": 1.556153643793659e-05, + "loss": 0.4666, + "step": 22816 + }, + { + "epoch": 0.6264964305326743, + "grad_norm": 0.38894593715667725, + "learning_rate": 1.5561177494994487e-05, + "loss": 0.5153, + "step": 22817 + }, + { + "epoch": 0.6265238879736409, + "grad_norm": 0.34574103355407715, + "learning_rate": 1.5560818541679014e-05, + "loss": 0.4004, + "step": 22818 + }, + { + "epoch": 0.6265513454146073, + "grad_norm": 0.3992389142513275, + "learning_rate": 1.5560459577990837e-05, + "loss": 0.5846, + "step": 22819 + }, + { + "epoch": 0.6265788028555739, + "grad_norm": 0.43168431520462036, + "learning_rate": 1.556010060393063e-05, + "loss": 0.5918, + "step": 22820 + }, + { + "epoch": 0.6266062602965403, + "grad_norm": 0.4261564612388611, + "learning_rate": 1.555974161949906e-05, + "loss": 0.5989, + "step": 22821 + }, + { + "epoch": 0.6266337177375069, + "grad_norm": 1.2410686016082764, + "learning_rate": 1.5559382624696797e-05, + "loss": 0.4839, + "step": 22822 + }, + { + "epoch": 0.6266611751784734, + "grad_norm": 0.41785067319869995, + "learning_rate": 1.555902361952451e-05, + "loss": 0.5445, + "step": 22823 + }, + { + "epoch": 0.6266886326194399, + "grad_norm": 0.42996448278427124, + "learning_rate": 1.5558664603982873e-05, + "loss": 0.4732, + "step": 22824 + }, + { + "epoch": 0.6267160900604064, + "grad_norm": 0.40454572439193726, + "learning_rate": 1.555830557807255e-05, + "loss": 0.4784, + "step": 22825 + }, + { + "epoch": 0.6267435475013728, + "grad_norm": 0.5430436134338379, + "learning_rate": 1.5557946541794214e-05, + "loss": 0.4844, + "step": 22826 + }, + { + "epoch": 0.6267710049423394, + "grad_norm": 0.40204474329948425, + "learning_rate": 1.5557587495148535e-05, + "loss": 0.5854, + "step": 22827 + }, + { + "epoch": 0.6267984623833058, + "grad_norm": 0.3946147859096527, + "learning_rate": 1.5557228438136175e-05, + "loss": 0.5938, + "step": 22828 + }, + { + "epoch": 0.6268259198242724, + "grad_norm": 0.39173489809036255, + "learning_rate": 1.5556869370757816e-05, + "loss": 0.5717, + "step": 22829 + }, + { + "epoch": 0.6268533772652389, + "grad_norm": 0.38907572627067566, + "learning_rate": 1.555651029301412e-05, + "loss": 0.4707, + "step": 22830 + }, + { + "epoch": 0.6268808347062054, + "grad_norm": 0.3478902578353882, + "learning_rate": 1.5556151204905762e-05, + "loss": 0.4891, + "step": 22831 + }, + { + "epoch": 0.6269082921471719, + "grad_norm": 0.37382790446281433, + "learning_rate": 1.555579210643341e-05, + "loss": 0.4575, + "step": 22832 + }, + { + "epoch": 0.6269357495881384, + "grad_norm": 0.33274486660957336, + "learning_rate": 1.5555432997597727e-05, + "loss": 0.406, + "step": 22833 + }, + { + "epoch": 0.6269632070291049, + "grad_norm": 0.3746849000453949, + "learning_rate": 1.5555073878399393e-05, + "loss": 0.5383, + "step": 22834 + }, + { + "epoch": 0.6269906644700713, + "grad_norm": 0.3733014464378357, + "learning_rate": 1.5554714748839067e-05, + "loss": 0.5393, + "step": 22835 + }, + { + "epoch": 0.6270181219110379, + "grad_norm": 0.38321173191070557, + "learning_rate": 1.5554355608917432e-05, + "loss": 0.4955, + "step": 22836 + }, + { + "epoch": 0.6270455793520044, + "grad_norm": 0.41200900077819824, + "learning_rate": 1.5553996458635147e-05, + "loss": 0.5136, + "step": 22837 + }, + { + "epoch": 0.6270730367929709, + "grad_norm": 0.44663986563682556, + "learning_rate": 1.555363729799289e-05, + "loss": 0.5137, + "step": 22838 + }, + { + "epoch": 0.6271004942339374, + "grad_norm": 0.35124775767326355, + "learning_rate": 1.5553278126991322e-05, + "loss": 0.4507, + "step": 22839 + }, + { + "epoch": 0.6271279516749039, + "grad_norm": 0.3486226201057434, + "learning_rate": 1.555291894563112e-05, + "loss": 0.4178, + "step": 22840 + }, + { + "epoch": 0.6271554091158704, + "grad_norm": 0.3821168541908264, + "learning_rate": 1.5552559753912952e-05, + "loss": 0.5253, + "step": 22841 + }, + { + "epoch": 0.6271828665568369, + "grad_norm": 0.4208936393260956, + "learning_rate": 1.5552200551837486e-05, + "loss": 0.5636, + "step": 22842 + }, + { + "epoch": 0.6272103239978034, + "grad_norm": 0.421752005815506, + "learning_rate": 1.55518413394054e-05, + "loss": 0.4758, + "step": 22843 + }, + { + "epoch": 0.62723778143877, + "grad_norm": 0.38488152623176575, + "learning_rate": 1.555148211661735e-05, + "loss": 0.5373, + "step": 22844 + }, + { + "epoch": 0.6272652388797364, + "grad_norm": 0.3750523030757904, + "learning_rate": 1.555112288347402e-05, + "loss": 0.4945, + "step": 22845 + }, + { + "epoch": 0.627292696320703, + "grad_norm": 0.38427919149398804, + "learning_rate": 1.555076363997607e-05, + "loss": 0.5055, + "step": 22846 + }, + { + "epoch": 0.6273201537616694, + "grad_norm": 0.41494688391685486, + "learning_rate": 1.5550404386124177e-05, + "loss": 0.5223, + "step": 22847 + }, + { + "epoch": 0.6273476112026359, + "grad_norm": 0.34224554896354675, + "learning_rate": 1.5550045121919007e-05, + "loss": 0.457, + "step": 22848 + }, + { + "epoch": 0.6273750686436024, + "grad_norm": 0.40508005023002625, + "learning_rate": 1.554968584736123e-05, + "loss": 0.5519, + "step": 22849 + }, + { + "epoch": 0.6274025260845689, + "grad_norm": 0.38020893931388855, + "learning_rate": 1.5549326562451522e-05, + "loss": 0.4958, + "step": 22850 + }, + { + "epoch": 0.6274299835255355, + "grad_norm": 0.36947232484817505, + "learning_rate": 1.5548967267190546e-05, + "loss": 0.5076, + "step": 22851 + }, + { + "epoch": 0.6274574409665019, + "grad_norm": 0.6492322087287903, + "learning_rate": 1.5548607961578976e-05, + "loss": 0.5007, + "step": 22852 + }, + { + "epoch": 0.6274848984074685, + "grad_norm": 0.3472132980823517, + "learning_rate": 1.554824864561748e-05, + "loss": 0.4317, + "step": 22853 + }, + { + "epoch": 0.6275123558484349, + "grad_norm": 0.42903244495391846, + "learning_rate": 1.554788931930673e-05, + "loss": 0.5834, + "step": 22854 + }, + { + "epoch": 0.6275398132894014, + "grad_norm": 0.3504354953765869, + "learning_rate": 1.5547529982647397e-05, + "loss": 0.4944, + "step": 22855 + }, + { + "epoch": 0.6275672707303679, + "grad_norm": 0.3607040047645569, + "learning_rate": 1.5547170635640148e-05, + "loss": 0.4252, + "step": 22856 + }, + { + "epoch": 0.6275947281713344, + "grad_norm": 0.3889426589012146, + "learning_rate": 1.5546811278285656e-05, + "loss": 0.5082, + "step": 22857 + }, + { + "epoch": 0.627622185612301, + "grad_norm": 0.41101598739624023, + "learning_rate": 1.554645191058459e-05, + "loss": 0.4646, + "step": 22858 + }, + { + "epoch": 0.6276496430532674, + "grad_norm": 0.4031015634536743, + "learning_rate": 1.554609253253762e-05, + "loss": 0.5549, + "step": 22859 + }, + { + "epoch": 0.627677100494234, + "grad_norm": 0.6448476314544678, + "learning_rate": 1.554573314414542e-05, + "loss": 0.4427, + "step": 22860 + }, + { + "epoch": 0.6277045579352004, + "grad_norm": 0.37643370032310486, + "learning_rate": 1.554537374540866e-05, + "loss": 0.469, + "step": 22861 + }, + { + "epoch": 0.627732015376167, + "grad_norm": 0.35449615120887756, + "learning_rate": 1.5545014336328e-05, + "loss": 0.4818, + "step": 22862 + }, + { + "epoch": 0.6277594728171334, + "grad_norm": 0.3600231111049652, + "learning_rate": 1.5544654916904124e-05, + "loss": 0.4883, + "step": 22863 + }, + { + "epoch": 0.6277869302581, + "grad_norm": 0.3974609076976776, + "learning_rate": 1.5544295487137697e-05, + "loss": 0.5301, + "step": 22864 + }, + { + "epoch": 0.6278143876990665, + "grad_norm": 0.4185226857662201, + "learning_rate": 1.554393604702939e-05, + "loss": 0.5473, + "step": 22865 + }, + { + "epoch": 0.6278418451400329, + "grad_norm": 0.3722182810306549, + "learning_rate": 1.554357659657987e-05, + "loss": 0.5716, + "step": 22866 + }, + { + "epoch": 0.6278693025809995, + "grad_norm": 0.3273524343967438, + "learning_rate": 1.554321713578981e-05, + "loss": 0.4711, + "step": 22867 + }, + { + "epoch": 0.6278967600219659, + "grad_norm": 0.4521144926548004, + "learning_rate": 1.5542857664659888e-05, + "loss": 0.5389, + "step": 22868 + }, + { + "epoch": 0.6279242174629325, + "grad_norm": 0.3632655739784241, + "learning_rate": 1.5542498183190763e-05, + "loss": 0.5022, + "step": 22869 + }, + { + "epoch": 0.6279516749038989, + "grad_norm": 0.3864021301269531, + "learning_rate": 1.5542138691383107e-05, + "loss": 0.5172, + "step": 22870 + }, + { + "epoch": 0.6279791323448655, + "grad_norm": 0.35259300470352173, + "learning_rate": 1.5541779189237596e-05, + "loss": 0.5079, + "step": 22871 + }, + { + "epoch": 0.628006589785832, + "grad_norm": 0.4184204041957855, + "learning_rate": 1.5541419676754895e-05, + "loss": 0.5212, + "step": 22872 + }, + { + "epoch": 0.6280340472267985, + "grad_norm": 0.3980865478515625, + "learning_rate": 1.554106015393568e-05, + "loss": 0.5447, + "step": 22873 + }, + { + "epoch": 0.628061504667765, + "grad_norm": 0.42075014114379883, + "learning_rate": 1.5540700620780623e-05, + "loss": 0.5182, + "step": 22874 + }, + { + "epoch": 0.6280889621087314, + "grad_norm": 0.33270177245140076, + "learning_rate": 1.5540341077290384e-05, + "loss": 0.4186, + "step": 22875 + }, + { + "epoch": 0.628116419549698, + "grad_norm": 0.3945555090904236, + "learning_rate": 1.5539981523465645e-05, + "loss": 0.4882, + "step": 22876 + }, + { + "epoch": 0.6281438769906644, + "grad_norm": 0.3895684778690338, + "learning_rate": 1.5539621959307073e-05, + "loss": 0.4899, + "step": 22877 + }, + { + "epoch": 0.628171334431631, + "grad_norm": 0.36045852303504944, + "learning_rate": 1.5539262384815333e-05, + "loss": 0.4723, + "step": 22878 + }, + { + "epoch": 0.6281987918725975, + "grad_norm": 0.3603440225124359, + "learning_rate": 1.5538902799991106e-05, + "loss": 0.4668, + "step": 22879 + }, + { + "epoch": 0.628226249313564, + "grad_norm": 0.5865445137023926, + "learning_rate": 1.5538543204835055e-05, + "loss": 0.6415, + "step": 22880 + }, + { + "epoch": 0.6282537067545305, + "grad_norm": 0.3868865966796875, + "learning_rate": 1.5538183599347852e-05, + "loss": 0.5384, + "step": 22881 + }, + { + "epoch": 0.628281164195497, + "grad_norm": 0.38580116629600525, + "learning_rate": 1.5537823983530174e-05, + "loss": 0.4093, + "step": 22882 + }, + { + "epoch": 0.6283086216364635, + "grad_norm": 0.3335117697715759, + "learning_rate": 1.5537464357382682e-05, + "loss": 0.4028, + "step": 22883 + }, + { + "epoch": 0.6283360790774299, + "grad_norm": 0.39272889494895935, + "learning_rate": 1.5537104720906057e-05, + "loss": 0.5333, + "step": 22884 + }, + { + "epoch": 0.6283635365183965, + "grad_norm": 0.41438502073287964, + "learning_rate": 1.553674507410096e-05, + "loss": 0.5463, + "step": 22885 + }, + { + "epoch": 0.628390993959363, + "grad_norm": 0.40538445115089417, + "learning_rate": 1.5536385416968063e-05, + "loss": 0.5306, + "step": 22886 + }, + { + "epoch": 0.6284184514003295, + "grad_norm": 0.36028042435646057, + "learning_rate": 1.5536025749508047e-05, + "loss": 0.515, + "step": 22887 + }, + { + "epoch": 0.628445908841296, + "grad_norm": 0.36792925000190735, + "learning_rate": 1.553566607172157e-05, + "loss": 0.486, + "step": 22888 + }, + { + "epoch": 0.6284733662822625, + "grad_norm": 0.38605278730392456, + "learning_rate": 1.5535306383609316e-05, + "loss": 0.5534, + "step": 22889 + }, + { + "epoch": 0.628500823723229, + "grad_norm": 0.3563316762447357, + "learning_rate": 1.5534946685171943e-05, + "loss": 0.4876, + "step": 22890 + }, + { + "epoch": 0.6285282811641955, + "grad_norm": 0.4463382661342621, + "learning_rate": 1.5534586976410132e-05, + "loss": 0.5101, + "step": 22891 + }, + { + "epoch": 0.628555738605162, + "grad_norm": 0.3798021674156189, + "learning_rate": 1.553422725732455e-05, + "loss": 0.6178, + "step": 22892 + }, + { + "epoch": 0.6285831960461286, + "grad_norm": 0.39021819829940796, + "learning_rate": 1.5533867527915863e-05, + "loss": 0.4958, + "step": 22893 + }, + { + "epoch": 0.628610653487095, + "grad_norm": 0.38101503252983093, + "learning_rate": 1.553350778818475e-05, + "loss": 0.4599, + "step": 22894 + }, + { + "epoch": 0.6286381109280615, + "grad_norm": 0.364689439535141, + "learning_rate": 1.553314803813188e-05, + "loss": 0.5207, + "step": 22895 + }, + { + "epoch": 0.628665568369028, + "grad_norm": 0.38474416732788086, + "learning_rate": 1.553278827775792e-05, + "loss": 0.4767, + "step": 22896 + }, + { + "epoch": 0.6286930258099945, + "grad_norm": 0.38186755776405334, + "learning_rate": 1.5532428507063545e-05, + "loss": 0.3628, + "step": 22897 + }, + { + "epoch": 0.628720483250961, + "grad_norm": 0.4659254252910614, + "learning_rate": 1.5532068726049425e-05, + "loss": 0.4506, + "step": 22898 + }, + { + "epoch": 0.6287479406919275, + "grad_norm": 0.33205774426460266, + "learning_rate": 1.5531708934716234e-05, + "loss": 0.4864, + "step": 22899 + }, + { + "epoch": 0.628775398132894, + "grad_norm": 0.40065476298332214, + "learning_rate": 1.5531349133064636e-05, + "loss": 0.3985, + "step": 22900 + }, + { + "epoch": 0.6288028555738605, + "grad_norm": 0.4364413619041443, + "learning_rate": 1.553098932109531e-05, + "loss": 0.5059, + "step": 22901 + }, + { + "epoch": 0.6288303130148271, + "grad_norm": 0.40858250856399536, + "learning_rate": 1.5530629498808923e-05, + "loss": 0.486, + "step": 22902 + }, + { + "epoch": 0.6288577704557935, + "grad_norm": 0.38818472623825073, + "learning_rate": 1.5530269666206142e-05, + "loss": 0.4953, + "step": 22903 + }, + { + "epoch": 0.62888522789676, + "grad_norm": 0.46644943952560425, + "learning_rate": 1.552990982328765e-05, + "loss": 0.4777, + "step": 22904 + }, + { + "epoch": 0.6289126853377265, + "grad_norm": 0.34150806069374084, + "learning_rate": 1.552954997005411e-05, + "loss": 0.4126, + "step": 22905 + }, + { + "epoch": 0.628940142778693, + "grad_norm": 0.3914794325828552, + "learning_rate": 1.552919010650619e-05, + "loss": 0.5492, + "step": 22906 + }, + { + "epoch": 0.6289676002196595, + "grad_norm": 0.3092358410358429, + "learning_rate": 1.552883023264457e-05, + "loss": 0.4025, + "step": 22907 + }, + { + "epoch": 0.628995057660626, + "grad_norm": 0.3769543170928955, + "learning_rate": 1.5528470348469914e-05, + "loss": 0.526, + "step": 22908 + }, + { + "epoch": 0.6290225151015926, + "grad_norm": 0.369853138923645, + "learning_rate": 1.55281104539829e-05, + "loss": 0.47, + "step": 22909 + }, + { + "epoch": 0.629049972542559, + "grad_norm": 0.6670646667480469, + "learning_rate": 1.5527750549184193e-05, + "loss": 0.3542, + "step": 22910 + }, + { + "epoch": 0.6290774299835256, + "grad_norm": 0.3979659676551819, + "learning_rate": 1.5527390634074464e-05, + "loss": 0.4738, + "step": 22911 + }, + { + "epoch": 0.629104887424492, + "grad_norm": 0.41558852791786194, + "learning_rate": 1.5527030708654392e-05, + "loss": 0.612, + "step": 22912 + }, + { + "epoch": 0.6291323448654585, + "grad_norm": 0.36219242215156555, + "learning_rate": 1.5526670772924642e-05, + "loss": 0.5168, + "step": 22913 + }, + { + "epoch": 0.629159802306425, + "grad_norm": 0.5033413767814636, + "learning_rate": 1.5526310826885887e-05, + "loss": 0.4556, + "step": 22914 + }, + { + "epoch": 0.6291872597473915, + "grad_norm": 0.3732967674732208, + "learning_rate": 1.55259508705388e-05, + "loss": 0.5071, + "step": 22915 + }, + { + "epoch": 0.6292147171883581, + "grad_norm": 0.3665003180503845, + "learning_rate": 1.552559090388405e-05, + "loss": 0.5335, + "step": 22916 + }, + { + "epoch": 0.6292421746293245, + "grad_norm": 0.4048526883125305, + "learning_rate": 1.552523092692231e-05, + "loss": 0.5046, + "step": 22917 + }, + { + "epoch": 0.6292696320702911, + "grad_norm": 0.43777260184288025, + "learning_rate": 1.552487093965425e-05, + "loss": 0.5957, + "step": 22918 + }, + { + "epoch": 0.6292970895112575, + "grad_norm": 0.4162471294403076, + "learning_rate": 1.5524510942080545e-05, + "loss": 0.4852, + "step": 22919 + }, + { + "epoch": 0.6293245469522241, + "grad_norm": 0.4480338990688324, + "learning_rate": 1.552415093420186e-05, + "loss": 0.5534, + "step": 22920 + }, + { + "epoch": 0.6293520043931905, + "grad_norm": 0.43530499935150146, + "learning_rate": 1.552379091601887e-05, + "loss": 0.4981, + "step": 22921 + }, + { + "epoch": 0.629379461834157, + "grad_norm": 0.41387566924095154, + "learning_rate": 1.552343088753225e-05, + "loss": 0.552, + "step": 22922 + }, + { + "epoch": 0.6294069192751236, + "grad_norm": 0.37844300270080566, + "learning_rate": 1.5523070848742668e-05, + "loss": 0.4801, + "step": 22923 + }, + { + "epoch": 0.62943437671609, + "grad_norm": 0.35349395871162415, + "learning_rate": 1.5522710799650795e-05, + "loss": 0.4507, + "step": 22924 + }, + { + "epoch": 0.6294618341570566, + "grad_norm": 0.3599468469619751, + "learning_rate": 1.5522350740257304e-05, + "loss": 0.5003, + "step": 22925 + }, + { + "epoch": 0.629489291598023, + "grad_norm": 0.36881163716316223, + "learning_rate": 1.5521990670562866e-05, + "loss": 0.4976, + "step": 22926 + }, + { + "epoch": 0.6295167490389896, + "grad_norm": 0.37747684121131897, + "learning_rate": 1.552163059056815e-05, + "loss": 0.5033, + "step": 22927 + }, + { + "epoch": 0.629544206479956, + "grad_norm": 0.37997132539749146, + "learning_rate": 1.5521270500273838e-05, + "loss": 0.5049, + "step": 22928 + }, + { + "epoch": 0.6295716639209226, + "grad_norm": 0.3771025538444519, + "learning_rate": 1.552091039968059e-05, + "loss": 0.5421, + "step": 22929 + }, + { + "epoch": 0.6295991213618891, + "grad_norm": 0.39909541606903076, + "learning_rate": 1.5520550288789085e-05, + "loss": 0.5348, + "step": 22930 + }, + { + "epoch": 0.6296265788028556, + "grad_norm": 0.3892349600791931, + "learning_rate": 1.552019016759999e-05, + "loss": 0.5113, + "step": 22931 + }, + { + "epoch": 0.6296540362438221, + "grad_norm": 0.3954881727695465, + "learning_rate": 1.5519830036113974e-05, + "loss": 0.4903, + "step": 22932 + }, + { + "epoch": 0.6296814936847885, + "grad_norm": 0.37848150730133057, + "learning_rate": 1.551946989433172e-05, + "loss": 0.467, + "step": 22933 + }, + { + "epoch": 0.6297089511257551, + "grad_norm": 0.33760467171669006, + "learning_rate": 1.5519109742253887e-05, + "loss": 0.4737, + "step": 22934 + }, + { + "epoch": 0.6297364085667215, + "grad_norm": 0.4375936686992645, + "learning_rate": 1.551874957988116e-05, + "loss": 0.4385, + "step": 22935 + }, + { + "epoch": 0.6297638660076881, + "grad_norm": 0.4070012867450714, + "learning_rate": 1.55183894072142e-05, + "loss": 0.5442, + "step": 22936 + }, + { + "epoch": 0.6297913234486546, + "grad_norm": 0.38781827688217163, + "learning_rate": 1.551802922425368e-05, + "loss": 0.504, + "step": 22937 + }, + { + "epoch": 0.6298187808896211, + "grad_norm": 0.36431658267974854, + "learning_rate": 1.5517669031000275e-05, + "loss": 0.4919, + "step": 22938 + }, + { + "epoch": 0.6298462383305876, + "grad_norm": 0.40120962262153625, + "learning_rate": 1.5517308827454658e-05, + "loss": 0.4572, + "step": 22939 + }, + { + "epoch": 0.629873695771554, + "grad_norm": 0.45476678013801575, + "learning_rate": 1.55169486136175e-05, + "loss": 0.4951, + "step": 22940 + }, + { + "epoch": 0.6299011532125206, + "grad_norm": 0.426628977060318, + "learning_rate": 1.551658838948947e-05, + "loss": 0.5363, + "step": 22941 + }, + { + "epoch": 0.629928610653487, + "grad_norm": 0.39707183837890625, + "learning_rate": 1.5516228155071243e-05, + "loss": 0.5059, + "step": 22942 + }, + { + "epoch": 0.6299560680944536, + "grad_norm": 0.39806270599365234, + "learning_rate": 1.5515867910363488e-05, + "loss": 0.5273, + "step": 22943 + }, + { + "epoch": 0.6299835255354201, + "grad_norm": 0.4168098568916321, + "learning_rate": 1.551550765536688e-05, + "loss": 0.5993, + "step": 22944 + }, + { + "epoch": 0.6300109829763866, + "grad_norm": 0.3636227548122406, + "learning_rate": 1.5515147390082093e-05, + "loss": 0.5276, + "step": 22945 + }, + { + "epoch": 0.6300384404173531, + "grad_norm": 0.380204439163208, + "learning_rate": 1.551478711450979e-05, + "loss": 0.5359, + "step": 22946 + }, + { + "epoch": 0.6300658978583196, + "grad_norm": 0.41259220242500305, + "learning_rate": 1.551442682865065e-05, + "loss": 0.4738, + "step": 22947 + }, + { + "epoch": 0.6300933552992861, + "grad_norm": 0.4089786112308502, + "learning_rate": 1.5514066532505353e-05, + "loss": 0.502, + "step": 22948 + }, + { + "epoch": 0.6301208127402526, + "grad_norm": 0.40132635831832886, + "learning_rate": 1.5513706226074554e-05, + "loss": 0.5188, + "step": 22949 + }, + { + "epoch": 0.6301482701812191, + "grad_norm": 0.47363170981407166, + "learning_rate": 1.5513345909358935e-05, + "loss": 0.6702, + "step": 22950 + }, + { + "epoch": 0.6301757276221857, + "grad_norm": 0.4880548119544983, + "learning_rate": 1.5512985582359164e-05, + "loss": 0.6107, + "step": 22951 + }, + { + "epoch": 0.6302031850631521, + "grad_norm": 0.3790552616119385, + "learning_rate": 1.5512625245075915e-05, + "loss": 0.4982, + "step": 22952 + }, + { + "epoch": 0.6302306425041186, + "grad_norm": 0.35731813311576843, + "learning_rate": 1.5512264897509862e-05, + "loss": 0.4888, + "step": 22953 + }, + { + "epoch": 0.6302580999450851, + "grad_norm": 0.45342081785202026, + "learning_rate": 1.551190453966168e-05, + "loss": 0.498, + "step": 22954 + }, + { + "epoch": 0.6302855573860516, + "grad_norm": 0.3632499575614929, + "learning_rate": 1.551154417153203e-05, + "loss": 0.4954, + "step": 22955 + }, + { + "epoch": 0.6303130148270181, + "grad_norm": 0.3739071786403656, + "learning_rate": 1.5511183793121596e-05, + "loss": 0.5076, + "step": 22956 + }, + { + "epoch": 0.6303404722679846, + "grad_norm": 0.3282206654548645, + "learning_rate": 1.5510823404431043e-05, + "loss": 0.4573, + "step": 22957 + }, + { + "epoch": 0.6303679297089512, + "grad_norm": 0.4143099784851074, + "learning_rate": 1.5510463005461044e-05, + "loss": 0.4939, + "step": 22958 + }, + { + "epoch": 0.6303953871499176, + "grad_norm": 0.3599453568458557, + "learning_rate": 1.5510102596212276e-05, + "loss": 0.5093, + "step": 22959 + }, + { + "epoch": 0.6304228445908842, + "grad_norm": 0.38897475600242615, + "learning_rate": 1.5509742176685408e-05, + "loss": 0.4864, + "step": 22960 + }, + { + "epoch": 0.6304503020318506, + "grad_norm": 0.35738980770111084, + "learning_rate": 1.550938174688111e-05, + "loss": 0.5789, + "step": 22961 + }, + { + "epoch": 0.6304777594728171, + "grad_norm": 0.36751410365104675, + "learning_rate": 1.5509021306800057e-05, + "loss": 0.5056, + "step": 22962 + }, + { + "epoch": 0.6305052169137836, + "grad_norm": 0.3869949281215668, + "learning_rate": 1.550866085644292e-05, + "loss": 0.5262, + "step": 22963 + }, + { + "epoch": 0.6305326743547501, + "grad_norm": 0.34070298075675964, + "learning_rate": 1.5508300395810377e-05, + "loss": 0.4178, + "step": 22964 + }, + { + "epoch": 0.6305601317957167, + "grad_norm": 0.4324380159378052, + "learning_rate": 1.550793992490309e-05, + "loss": 0.4132, + "step": 22965 + }, + { + "epoch": 0.6305875892366831, + "grad_norm": 0.7570326924324036, + "learning_rate": 1.550757944372174e-05, + "loss": 0.5679, + "step": 22966 + }, + { + "epoch": 0.6306150466776497, + "grad_norm": 0.3829781413078308, + "learning_rate": 1.5507218952266998e-05, + "loss": 0.506, + "step": 22967 + }, + { + "epoch": 0.6306425041186161, + "grad_norm": 0.3756467401981354, + "learning_rate": 1.5506858450539532e-05, + "loss": 0.5107, + "step": 22968 + }, + { + "epoch": 0.6306699615595827, + "grad_norm": 0.4128085672855377, + "learning_rate": 1.5506497938540017e-05, + "loss": 0.5065, + "step": 22969 + }, + { + "epoch": 0.6306974190005491, + "grad_norm": 0.44040563702583313, + "learning_rate": 1.5506137416269126e-05, + "loss": 0.4693, + "step": 22970 + }, + { + "epoch": 0.6307248764415156, + "grad_norm": 0.3799006938934326, + "learning_rate": 1.5505776883727533e-05, + "loss": 0.575, + "step": 22971 + }, + { + "epoch": 0.6307523338824822, + "grad_norm": 0.39654120802879333, + "learning_rate": 1.5505416340915914e-05, + "loss": 0.4861, + "step": 22972 + }, + { + "epoch": 0.6307797913234486, + "grad_norm": 0.41995829343795776, + "learning_rate": 1.5505055787834928e-05, + "loss": 0.5109, + "step": 22973 + }, + { + "epoch": 0.6308072487644152, + "grad_norm": 0.3409193158149719, + "learning_rate": 1.550469522448526e-05, + "loss": 0.4153, + "step": 22974 + }, + { + "epoch": 0.6308347062053816, + "grad_norm": 0.3789425194263458, + "learning_rate": 1.5504334650867577e-05, + "loss": 0.5455, + "step": 22975 + }, + { + "epoch": 0.6308621636463482, + "grad_norm": 0.37028443813323975, + "learning_rate": 1.5503974066982554e-05, + "loss": 0.57, + "step": 22976 + }, + { + "epoch": 0.6308896210873146, + "grad_norm": 0.3581285774707794, + "learning_rate": 1.550361347283086e-05, + "loss": 0.5004, + "step": 22977 + }, + { + "epoch": 0.6309170785282812, + "grad_norm": 0.3471972942352295, + "learning_rate": 1.5503252868413173e-05, + "loss": 0.5012, + "step": 22978 + }, + { + "epoch": 0.6309445359692477, + "grad_norm": 0.3664281666278839, + "learning_rate": 1.5502892253730164e-05, + "loss": 0.4903, + "step": 22979 + }, + { + "epoch": 0.6309719934102141, + "grad_norm": 0.34512823820114136, + "learning_rate": 1.5502531628782502e-05, + "loss": 0.3929, + "step": 22980 + }, + { + "epoch": 0.6309994508511807, + "grad_norm": 0.41000044345855713, + "learning_rate": 1.5502170993570863e-05, + "loss": 0.5631, + "step": 22981 + }, + { + "epoch": 0.6310269082921471, + "grad_norm": 0.38099145889282227, + "learning_rate": 1.550181034809592e-05, + "loss": 0.4801, + "step": 22982 + }, + { + "epoch": 0.6310543657331137, + "grad_norm": 0.3215654492378235, + "learning_rate": 1.5501449692358342e-05, + "loss": 0.5111, + "step": 22983 + }, + { + "epoch": 0.6310818231740801, + "grad_norm": 0.3611699640750885, + "learning_rate": 1.550108902635881e-05, + "loss": 0.4901, + "step": 22984 + }, + { + "epoch": 0.6311092806150467, + "grad_norm": 0.43625354766845703, + "learning_rate": 1.5500728350097988e-05, + "loss": 0.6227, + "step": 22985 + }, + { + "epoch": 0.6311367380560132, + "grad_norm": 0.4470072090625763, + "learning_rate": 1.5500367663576552e-05, + "loss": 0.5361, + "step": 22986 + }, + { + "epoch": 0.6311641954969797, + "grad_norm": 0.369899183511734, + "learning_rate": 1.5500006966795177e-05, + "loss": 0.5058, + "step": 22987 + }, + { + "epoch": 0.6311916529379462, + "grad_norm": 0.38127028942108154, + "learning_rate": 1.5499646259754528e-05, + "loss": 0.5528, + "step": 22988 + }, + { + "epoch": 0.6312191103789127, + "grad_norm": 0.5687307715415955, + "learning_rate": 1.5499285542455286e-05, + "loss": 0.5159, + "step": 22989 + }, + { + "epoch": 0.6312465678198792, + "grad_norm": 0.37348249554634094, + "learning_rate": 1.5498924814898124e-05, + "loss": 0.448, + "step": 22990 + }, + { + "epoch": 0.6312740252608456, + "grad_norm": 0.33262965083122253, + "learning_rate": 1.5498564077083707e-05, + "loss": 0.4209, + "step": 22991 + }, + { + "epoch": 0.6313014827018122, + "grad_norm": 0.3300248384475708, + "learning_rate": 1.549820332901272e-05, + "loss": 0.4716, + "step": 22992 + }, + { + "epoch": 0.6313289401427787, + "grad_norm": 0.34855470061302185, + "learning_rate": 1.5497842570685823e-05, + "loss": 0.5353, + "step": 22993 + }, + { + "epoch": 0.6313563975837452, + "grad_norm": 0.3763744533061981, + "learning_rate": 1.54974818021037e-05, + "loss": 0.4528, + "step": 22994 + }, + { + "epoch": 0.6313838550247117, + "grad_norm": 0.3990195393562317, + "learning_rate": 1.5497121023267014e-05, + "loss": 0.583, + "step": 22995 + }, + { + "epoch": 0.6314113124656782, + "grad_norm": 0.622748613357544, + "learning_rate": 1.5496760234176445e-05, + "loss": 0.5596, + "step": 22996 + }, + { + "epoch": 0.6314387699066447, + "grad_norm": 0.4373394250869751, + "learning_rate": 1.5496399434832665e-05, + "loss": 0.5257, + "step": 22997 + }, + { + "epoch": 0.6314662273476112, + "grad_norm": 0.37158703804016113, + "learning_rate": 1.5496038625236343e-05, + "loss": 0.597, + "step": 22998 + }, + { + "epoch": 0.6314936847885777, + "grad_norm": 0.4015958607196808, + "learning_rate": 1.5495677805388157e-05, + "loss": 0.5728, + "step": 22999 + }, + { + "epoch": 0.6315211422295443, + "grad_norm": 0.3419346809387207, + "learning_rate": 1.5495316975288776e-05, + "loss": 0.4799, + "step": 23000 + }, + { + "epoch": 0.6315485996705107, + "grad_norm": 0.44648393988609314, + "learning_rate": 1.5494956134938877e-05, + "loss": 0.5393, + "step": 23001 + }, + { + "epoch": 0.6315760571114772, + "grad_norm": 0.355996698141098, + "learning_rate": 1.549459528433913e-05, + "loss": 0.504, + "step": 23002 + }, + { + "epoch": 0.6316035145524437, + "grad_norm": 0.44878286123275757, + "learning_rate": 1.549423442349021e-05, + "loss": 0.5551, + "step": 23003 + }, + { + "epoch": 0.6316309719934102, + "grad_norm": 0.3759143054485321, + "learning_rate": 1.5493873552392788e-05, + "loss": 0.5243, + "step": 23004 + }, + { + "epoch": 0.6316584294343767, + "grad_norm": 0.3697158098220825, + "learning_rate": 1.549351267104754e-05, + "loss": 0.5109, + "step": 23005 + }, + { + "epoch": 0.6316858868753432, + "grad_norm": 0.4165758490562439, + "learning_rate": 1.5493151779455136e-05, + "loss": 0.6003, + "step": 23006 + }, + { + "epoch": 0.6317133443163098, + "grad_norm": 0.3785117268562317, + "learning_rate": 1.5492790877616252e-05, + "loss": 0.5426, + "step": 23007 + }, + { + "epoch": 0.6317408017572762, + "grad_norm": 0.3606944978237152, + "learning_rate": 1.5492429965531556e-05, + "loss": 0.5128, + "step": 23008 + }, + { + "epoch": 0.6317682591982428, + "grad_norm": 0.3882286846637726, + "learning_rate": 1.549206904320173e-05, + "loss": 0.5526, + "step": 23009 + }, + { + "epoch": 0.6317957166392092, + "grad_norm": 0.45027923583984375, + "learning_rate": 1.549170811062744e-05, + "loss": 0.5131, + "step": 23010 + }, + { + "epoch": 0.6318231740801757, + "grad_norm": 0.41854751110076904, + "learning_rate": 1.549134716780936e-05, + "loss": 0.4339, + "step": 23011 + }, + { + "epoch": 0.6318506315211422, + "grad_norm": 0.3416934013366699, + "learning_rate": 1.5490986214748168e-05, + "loss": 0.4681, + "step": 23012 + }, + { + "epoch": 0.6318780889621087, + "grad_norm": 0.31367728114128113, + "learning_rate": 1.5490625251444535e-05, + "loss": 0.4279, + "step": 23013 + }, + { + "epoch": 0.6319055464030753, + "grad_norm": 0.33447420597076416, + "learning_rate": 1.549026427789913e-05, + "loss": 0.4394, + "step": 23014 + }, + { + "epoch": 0.6319330038440417, + "grad_norm": 0.4804166853427887, + "learning_rate": 1.548990329411263e-05, + "loss": 0.5008, + "step": 23015 + }, + { + "epoch": 0.6319604612850083, + "grad_norm": 0.3724417984485626, + "learning_rate": 1.548954230008571e-05, + "loss": 0.4979, + "step": 23016 + }, + { + "epoch": 0.6319879187259747, + "grad_norm": 0.39227259159088135, + "learning_rate": 1.5489181295819046e-05, + "loss": 0.5473, + "step": 23017 + }, + { + "epoch": 0.6320153761669413, + "grad_norm": 0.38329410552978516, + "learning_rate": 1.54888202813133e-05, + "loss": 0.4833, + "step": 23018 + }, + { + "epoch": 0.6320428336079077, + "grad_norm": 0.3778405785560608, + "learning_rate": 1.5488459256569153e-05, + "loss": 0.4863, + "step": 23019 + }, + { + "epoch": 0.6320702910488742, + "grad_norm": 0.43031617999076843, + "learning_rate": 1.548809822158728e-05, + "loss": 0.5496, + "step": 23020 + }, + { + "epoch": 0.6320977484898408, + "grad_norm": 0.3574939966201782, + "learning_rate": 1.5487737176368352e-05, + "loss": 0.5407, + "step": 23021 + }, + { + "epoch": 0.6321252059308072, + "grad_norm": 0.3761540651321411, + "learning_rate": 1.5487376120913043e-05, + "loss": 0.5075, + "step": 23022 + }, + { + "epoch": 0.6321526633717738, + "grad_norm": 0.4044707417488098, + "learning_rate": 1.5487015055222022e-05, + "loss": 0.4666, + "step": 23023 + }, + { + "epoch": 0.6321801208127402, + "grad_norm": 0.3461194336414337, + "learning_rate": 1.5486653979295973e-05, + "loss": 0.4671, + "step": 23024 + }, + { + "epoch": 0.6322075782537068, + "grad_norm": 0.34024983644485474, + "learning_rate": 1.548629289313556e-05, + "loss": 0.4816, + "step": 23025 + }, + { + "epoch": 0.6322350356946732, + "grad_norm": 0.3737383186817169, + "learning_rate": 1.548593179674146e-05, + "loss": 0.5188, + "step": 23026 + }, + { + "epoch": 0.6322624931356398, + "grad_norm": 0.36025986075401306, + "learning_rate": 1.5485570690114345e-05, + "loss": 0.4496, + "step": 23027 + }, + { + "epoch": 0.6322899505766063, + "grad_norm": 0.37574565410614014, + "learning_rate": 1.548520957325489e-05, + "loss": 0.505, + "step": 23028 + }, + { + "epoch": 0.6323174080175727, + "grad_norm": 0.38083672523498535, + "learning_rate": 1.5484848446163772e-05, + "loss": 0.533, + "step": 23029 + }, + { + "epoch": 0.6323448654585393, + "grad_norm": 0.368287056684494, + "learning_rate": 1.548448730884166e-05, + "loss": 0.4947, + "step": 23030 + }, + { + "epoch": 0.6323723228995057, + "grad_norm": 0.37873774766921997, + "learning_rate": 1.5484126161289226e-05, + "loss": 0.5177, + "step": 23031 + }, + { + "epoch": 0.6323997803404723, + "grad_norm": 0.329054057598114, + "learning_rate": 1.548376500350715e-05, + "loss": 0.4408, + "step": 23032 + }, + { + "epoch": 0.6324272377814387, + "grad_norm": 0.3749113082885742, + "learning_rate": 1.5483403835496097e-05, + "loss": 0.5422, + "step": 23033 + }, + { + "epoch": 0.6324546952224053, + "grad_norm": 0.43550029397010803, + "learning_rate": 1.548304265725675e-05, + "loss": 0.5083, + "step": 23034 + }, + { + "epoch": 0.6324821526633718, + "grad_norm": 0.3979592025279999, + "learning_rate": 1.5482681468789776e-05, + "loss": 0.5593, + "step": 23035 + }, + { + "epoch": 0.6325096101043383, + "grad_norm": 0.4033834636211395, + "learning_rate": 1.5482320270095853e-05, + "loss": 0.5163, + "step": 23036 + }, + { + "epoch": 0.6325370675453048, + "grad_norm": 0.369273841381073, + "learning_rate": 1.5481959061175653e-05, + "loss": 0.5198, + "step": 23037 + }, + { + "epoch": 0.6325645249862712, + "grad_norm": 0.5434684157371521, + "learning_rate": 1.548159784202985e-05, + "loss": 0.5531, + "step": 23038 + }, + { + "epoch": 0.6325919824272378, + "grad_norm": 0.3642076253890991, + "learning_rate": 1.5481236612659116e-05, + "loss": 0.4597, + "step": 23039 + }, + { + "epoch": 0.6326194398682042, + "grad_norm": 0.3645142912864685, + "learning_rate": 1.5480875373064126e-05, + "loss": 0.4934, + "step": 23040 + }, + { + "epoch": 0.6326468973091708, + "grad_norm": 0.3129270374774933, + "learning_rate": 1.5480514123245557e-05, + "loss": 0.3625, + "step": 23041 + }, + { + "epoch": 0.6326743547501373, + "grad_norm": 0.3582443594932556, + "learning_rate": 1.5480152863204077e-05, + "loss": 0.527, + "step": 23042 + }, + { + "epoch": 0.6327018121911038, + "grad_norm": 0.35164445638656616, + "learning_rate": 1.5479791592940362e-05, + "loss": 0.5229, + "step": 23043 + }, + { + "epoch": 0.6327292696320703, + "grad_norm": 0.36163780093193054, + "learning_rate": 1.547943031245509e-05, + "loss": 0.5407, + "step": 23044 + }, + { + "epoch": 0.6327567270730368, + "grad_norm": 0.42777371406555176, + "learning_rate": 1.547906902174893e-05, + "loss": 0.4825, + "step": 23045 + }, + { + "epoch": 0.6327841845140033, + "grad_norm": 0.3543710708618164, + "learning_rate": 1.5478707720822555e-05, + "loss": 0.542, + "step": 23046 + }, + { + "epoch": 0.6328116419549698, + "grad_norm": 0.43004539608955383, + "learning_rate": 1.5478346409676646e-05, + "loss": 0.5023, + "step": 23047 + }, + { + "epoch": 0.6328390993959363, + "grad_norm": 0.42154502868652344, + "learning_rate": 1.5477985088311867e-05, + "loss": 0.5912, + "step": 23048 + }, + { + "epoch": 0.6328665568369028, + "grad_norm": 0.3852606415748596, + "learning_rate": 1.5477623756728907e-05, + "loss": 0.5186, + "step": 23049 + }, + { + "epoch": 0.6328940142778693, + "grad_norm": 0.3314886689186096, + "learning_rate": 1.547726241492842e-05, + "loss": 0.4681, + "step": 23050 + }, + { + "epoch": 0.6329214717188358, + "grad_norm": 0.4275548756122589, + "learning_rate": 1.5476901062911096e-05, + "loss": 0.5928, + "step": 23051 + }, + { + "epoch": 0.6329489291598023, + "grad_norm": 0.45936164259910583, + "learning_rate": 1.5476539700677604e-05, + "loss": 0.5208, + "step": 23052 + }, + { + "epoch": 0.6329763866007688, + "grad_norm": 0.3773582875728607, + "learning_rate": 1.5476178328228614e-05, + "loss": 0.5281, + "step": 23053 + }, + { + "epoch": 0.6330038440417353, + "grad_norm": 0.4004562795162201, + "learning_rate": 1.547581694556481e-05, + "loss": 0.5733, + "step": 23054 + }, + { + "epoch": 0.6330313014827018, + "grad_norm": 0.3871382474899292, + "learning_rate": 1.5475455552686855e-05, + "loss": 0.4875, + "step": 23055 + }, + { + "epoch": 0.6330587589236684, + "grad_norm": 0.3839006721973419, + "learning_rate": 1.5475094149595428e-05, + "loss": 0.5224, + "step": 23056 + }, + { + "epoch": 0.6330862163646348, + "grad_norm": 0.38582777976989746, + "learning_rate": 1.54747327362912e-05, + "loss": 0.4452, + "step": 23057 + }, + { + "epoch": 0.6331136738056014, + "grad_norm": 0.38230785727500916, + "learning_rate": 1.547437131277485e-05, + "loss": 0.5311, + "step": 23058 + }, + { + "epoch": 0.6331411312465678, + "grad_norm": 0.3614116609096527, + "learning_rate": 1.5474009879047055e-05, + "loss": 0.4979, + "step": 23059 + }, + { + "epoch": 0.6331685886875343, + "grad_norm": 0.45920678973197937, + "learning_rate": 1.547364843510848e-05, + "loss": 0.5171, + "step": 23060 + }, + { + "epoch": 0.6331960461285008, + "grad_norm": 0.33893638849258423, + "learning_rate": 1.5473286980959805e-05, + "loss": 0.4667, + "step": 23061 + }, + { + "epoch": 0.6332235035694673, + "grad_norm": 0.4210006892681122, + "learning_rate": 1.5472925516601702e-05, + "loss": 0.5644, + "step": 23062 + }, + { + "epoch": 0.6332509610104339, + "grad_norm": 0.3949960172176361, + "learning_rate": 1.5472564042034847e-05, + "loss": 0.4996, + "step": 23063 + }, + { + "epoch": 0.6332784184514003, + "grad_norm": 0.6855419874191284, + "learning_rate": 1.5472202557259916e-05, + "loss": 0.6427, + "step": 23064 + }, + { + "epoch": 0.6333058758923669, + "grad_norm": 0.3675002455711365, + "learning_rate": 1.5471841062277574e-05, + "loss": 0.4522, + "step": 23065 + }, + { + "epoch": 0.6333333333333333, + "grad_norm": 0.38230621814727783, + "learning_rate": 1.5471479557088506e-05, + "loss": 0.525, + "step": 23066 + }, + { + "epoch": 0.6333607907742999, + "grad_norm": 0.3966647684574127, + "learning_rate": 1.5471118041693385e-05, + "loss": 0.5063, + "step": 23067 + }, + { + "epoch": 0.6333882482152663, + "grad_norm": 0.42115357518196106, + "learning_rate": 1.5470756516092876e-05, + "loss": 0.5332, + "step": 23068 + }, + { + "epoch": 0.6334157056562328, + "grad_norm": 0.37133410573005676, + "learning_rate": 1.5470394980287666e-05, + "loss": 0.5262, + "step": 23069 + }, + { + "epoch": 0.6334431630971994, + "grad_norm": 0.4698651134967804, + "learning_rate": 1.5470033434278423e-05, + "loss": 0.6593, + "step": 23070 + }, + { + "epoch": 0.6334706205381658, + "grad_norm": 0.410562664270401, + "learning_rate": 1.5469671878065818e-05, + "loss": 0.4665, + "step": 23071 + }, + { + "epoch": 0.6334980779791324, + "grad_norm": 0.40724897384643555, + "learning_rate": 1.546931031165053e-05, + "loss": 0.498, + "step": 23072 + }, + { + "epoch": 0.6335255354200988, + "grad_norm": 0.38413506746292114, + "learning_rate": 1.5468948735033238e-05, + "loss": 0.5316, + "step": 23073 + }, + { + "epoch": 0.6335529928610654, + "grad_norm": 0.4032515585422516, + "learning_rate": 1.5468587148214605e-05, + "loss": 0.4783, + "step": 23074 + }, + { + "epoch": 0.6335804503020318, + "grad_norm": 0.36969518661499023, + "learning_rate": 1.5468225551195314e-05, + "loss": 0.4792, + "step": 23075 + }, + { + "epoch": 0.6336079077429984, + "grad_norm": 0.3691737949848175, + "learning_rate": 1.5467863943976034e-05, + "loss": 0.4191, + "step": 23076 + }, + { + "epoch": 0.6336353651839649, + "grad_norm": 0.3533030152320862, + "learning_rate": 1.546750232655745e-05, + "loss": 0.4798, + "step": 23077 + }, + { + "epoch": 0.6336628226249313, + "grad_norm": 0.40293240547180176, + "learning_rate": 1.5467140698940223e-05, + "loss": 0.5157, + "step": 23078 + }, + { + "epoch": 0.6336902800658979, + "grad_norm": 0.7748141884803772, + "learning_rate": 1.5466779061125037e-05, + "loss": 0.5787, + "step": 23079 + }, + { + "epoch": 0.6337177375068643, + "grad_norm": 0.394393652677536, + "learning_rate": 1.5466417413112562e-05, + "loss": 0.4923, + "step": 23080 + }, + { + "epoch": 0.6337451949478309, + "grad_norm": 0.43010014295578003, + "learning_rate": 1.5466055754903472e-05, + "loss": 0.447, + "step": 23081 + }, + { + "epoch": 0.6337726523887973, + "grad_norm": 0.3759283125400543, + "learning_rate": 1.5465694086498446e-05, + "loss": 0.5098, + "step": 23082 + }, + { + "epoch": 0.6338001098297639, + "grad_norm": 0.43147483468055725, + "learning_rate": 1.5465332407898155e-05, + "loss": 0.443, + "step": 23083 + }, + { + "epoch": 0.6338275672707304, + "grad_norm": 0.3488841950893402, + "learning_rate": 1.5464970719103274e-05, + "loss": 0.4621, + "step": 23084 + }, + { + "epoch": 0.6338550247116969, + "grad_norm": 0.3695021867752075, + "learning_rate": 1.546460902011448e-05, + "loss": 0.4834, + "step": 23085 + }, + { + "epoch": 0.6338824821526634, + "grad_norm": 0.3534071445465088, + "learning_rate": 1.5464247310932444e-05, + "loss": 0.4588, + "step": 23086 + }, + { + "epoch": 0.6339099395936298, + "grad_norm": 0.3558345437049866, + "learning_rate": 1.5463885591557844e-05, + "loss": 0.4027, + "step": 23087 + }, + { + "epoch": 0.6339373970345964, + "grad_norm": 0.3654100298881531, + "learning_rate": 1.5463523861991354e-05, + "loss": 0.4716, + "step": 23088 + }, + { + "epoch": 0.6339648544755628, + "grad_norm": 0.35962381958961487, + "learning_rate": 1.5463162122233648e-05, + "loss": 0.5217, + "step": 23089 + }, + { + "epoch": 0.6339923119165294, + "grad_norm": 0.38005805015563965, + "learning_rate": 1.54628003722854e-05, + "loss": 0.5347, + "step": 23090 + }, + { + "epoch": 0.6340197693574959, + "grad_norm": 0.4630787968635559, + "learning_rate": 1.5462438612147286e-05, + "loss": 0.593, + "step": 23091 + }, + { + "epoch": 0.6340472267984624, + "grad_norm": 0.3761962354183197, + "learning_rate": 1.546207684181998e-05, + "loss": 0.5427, + "step": 23092 + }, + { + "epoch": 0.6340746842394289, + "grad_norm": 0.41832759976387024, + "learning_rate": 1.546171506130416e-05, + "loss": 0.4919, + "step": 23093 + }, + { + "epoch": 0.6341021416803954, + "grad_norm": 0.38694778084754944, + "learning_rate": 1.5461353270600496e-05, + "loss": 0.4174, + "step": 23094 + }, + { + "epoch": 0.6341295991213619, + "grad_norm": 0.3563859462738037, + "learning_rate": 1.5460991469709665e-05, + "loss": 0.5107, + "step": 23095 + }, + { + "epoch": 0.6341570565623283, + "grad_norm": 0.3383978009223938, + "learning_rate": 1.5460629658632344e-05, + "loss": 0.4987, + "step": 23096 + }, + { + "epoch": 0.6341845140032949, + "grad_norm": 0.855695366859436, + "learning_rate": 1.5460267837369204e-05, + "loss": 0.642, + "step": 23097 + }, + { + "epoch": 0.6342119714442614, + "grad_norm": 0.4117930233478546, + "learning_rate": 1.5459906005920924e-05, + "loss": 0.5239, + "step": 23098 + }, + { + "epoch": 0.6342394288852279, + "grad_norm": 0.3417458236217499, + "learning_rate": 1.5459544164288173e-05, + "loss": 0.5376, + "step": 23099 + }, + { + "epoch": 0.6342668863261944, + "grad_norm": 0.3484813868999481, + "learning_rate": 1.5459182312471633e-05, + "loss": 0.4391, + "step": 23100 + }, + { + "epoch": 0.6342943437671609, + "grad_norm": 0.4191799461841583, + "learning_rate": 1.5458820450471976e-05, + "loss": 0.4705, + "step": 23101 + }, + { + "epoch": 0.6343218012081274, + "grad_norm": 0.48568323254585266, + "learning_rate": 1.5458458578289874e-05, + "loss": 0.6249, + "step": 23102 + }, + { + "epoch": 0.6343492586490939, + "grad_norm": 0.4041660726070404, + "learning_rate": 1.5458096695926003e-05, + "loss": 0.4758, + "step": 23103 + }, + { + "epoch": 0.6343767160900604, + "grad_norm": 0.37629079818725586, + "learning_rate": 1.5457734803381042e-05, + "loss": 0.4648, + "step": 23104 + }, + { + "epoch": 0.634404173531027, + "grad_norm": 0.4323202967643738, + "learning_rate": 1.5457372900655668e-05, + "loss": 0.5481, + "step": 23105 + }, + { + "epoch": 0.6344316309719934, + "grad_norm": 0.34899428486824036, + "learning_rate": 1.5457010987750546e-05, + "loss": 0.4016, + "step": 23106 + }, + { + "epoch": 0.63445908841296, + "grad_norm": 0.34941360354423523, + "learning_rate": 1.5456649064666355e-05, + "loss": 0.5425, + "step": 23107 + }, + { + "epoch": 0.6344865458539264, + "grad_norm": 0.38764163851737976, + "learning_rate": 1.5456287131403776e-05, + "loss": 0.4842, + "step": 23108 + }, + { + "epoch": 0.6345140032948929, + "grad_norm": 0.3626110255718231, + "learning_rate": 1.5455925187963483e-05, + "loss": 0.4184, + "step": 23109 + }, + { + "epoch": 0.6345414607358594, + "grad_norm": 0.3764074146747589, + "learning_rate": 1.5455563234346143e-05, + "loss": 0.5705, + "step": 23110 + }, + { + "epoch": 0.6345689181768259, + "grad_norm": 0.36458972096443176, + "learning_rate": 1.5455201270552437e-05, + "loss": 0.5418, + "step": 23111 + }, + { + "epoch": 0.6345963756177925, + "grad_norm": 0.38084959983825684, + "learning_rate": 1.5454839296583043e-05, + "loss": 0.5422, + "step": 23112 + }, + { + "epoch": 0.6346238330587589, + "grad_norm": 0.48499566316604614, + "learning_rate": 1.5454477312438626e-05, + "loss": 0.4367, + "step": 23113 + }, + { + "epoch": 0.6346512904997255, + "grad_norm": 0.40629327297210693, + "learning_rate": 1.5454115318119875e-05, + "loss": 0.5114, + "step": 23114 + }, + { + "epoch": 0.6346787479406919, + "grad_norm": 0.3993547260761261, + "learning_rate": 1.5453753313627456e-05, + "loss": 0.582, + "step": 23115 + }, + { + "epoch": 0.6347062053816585, + "grad_norm": 0.3816734254360199, + "learning_rate": 1.5453391298962043e-05, + "loss": 0.4955, + "step": 23116 + }, + { + "epoch": 0.6347336628226249, + "grad_norm": 0.40103021264076233, + "learning_rate": 1.545302927412432e-05, + "loss": 0.5222, + "step": 23117 + }, + { + "epoch": 0.6347611202635914, + "grad_norm": 0.3600994646549225, + "learning_rate": 1.5452667239114953e-05, + "loss": 0.4531, + "step": 23118 + }, + { + "epoch": 0.634788577704558, + "grad_norm": 0.36365047097206116, + "learning_rate": 1.5452305193934625e-05, + "loss": 0.4432, + "step": 23119 + }, + { + "epoch": 0.6348160351455244, + "grad_norm": 0.45382532477378845, + "learning_rate": 1.5451943138584004e-05, + "loss": 0.5106, + "step": 23120 + }, + { + "epoch": 0.634843492586491, + "grad_norm": 0.42120328545570374, + "learning_rate": 1.5451581073063772e-05, + "loss": 0.4141, + "step": 23121 + }, + { + "epoch": 0.6348709500274574, + "grad_norm": 0.3394007384777069, + "learning_rate": 1.54512189973746e-05, + "loss": 0.4927, + "step": 23122 + }, + { + "epoch": 0.634898407468424, + "grad_norm": 0.4701749384403229, + "learning_rate": 1.5450856911517163e-05, + "loss": 0.4855, + "step": 23123 + }, + { + "epoch": 0.6349258649093904, + "grad_norm": 0.3769737184047699, + "learning_rate": 1.545049481549214e-05, + "loss": 0.4802, + "step": 23124 + }, + { + "epoch": 0.634953322350357, + "grad_norm": 0.4439629316329956, + "learning_rate": 1.5450132709300203e-05, + "loss": 0.5574, + "step": 23125 + }, + { + "epoch": 0.6349807797913235, + "grad_norm": 0.3873760998249054, + "learning_rate": 1.5449770592942034e-05, + "loss": 0.455, + "step": 23126 + }, + { + "epoch": 0.6350082372322899, + "grad_norm": 0.4361985921859741, + "learning_rate": 1.5449408466418296e-05, + "loss": 0.5366, + "step": 23127 + }, + { + "epoch": 0.6350356946732565, + "grad_norm": 0.38562464714050293, + "learning_rate": 1.544904632972968e-05, + "loss": 0.4885, + "step": 23128 + }, + { + "epoch": 0.6350631521142229, + "grad_norm": 0.3935583531856537, + "learning_rate": 1.544868418287685e-05, + "loss": 0.4869, + "step": 23129 + }, + { + "epoch": 0.6350906095551895, + "grad_norm": 0.3871798515319824, + "learning_rate": 1.544832202586048e-05, + "loss": 0.537, + "step": 23130 + }, + { + "epoch": 0.6351180669961559, + "grad_norm": 0.38637059926986694, + "learning_rate": 1.5447959858681256e-05, + "loss": 0.499, + "step": 23131 + }, + { + "epoch": 0.6351455244371225, + "grad_norm": 0.3399500846862793, + "learning_rate": 1.5447597681339845e-05, + "loss": 0.4482, + "step": 23132 + }, + { + "epoch": 0.635172981878089, + "grad_norm": 0.397236168384552, + "learning_rate": 1.5447235493836928e-05, + "loss": 0.4962, + "step": 23133 + }, + { + "epoch": 0.6352004393190555, + "grad_norm": 0.397553950548172, + "learning_rate": 1.5446873296173177e-05, + "loss": 0.5098, + "step": 23134 + }, + { + "epoch": 0.635227896760022, + "grad_norm": 0.38921859860420227, + "learning_rate": 1.5446511088349272e-05, + "loss": 0.4999, + "step": 23135 + }, + { + "epoch": 0.6352553542009884, + "grad_norm": 0.4286947548389435, + "learning_rate": 1.544614887036588e-05, + "loss": 0.7311, + "step": 23136 + }, + { + "epoch": 0.635282811641955, + "grad_norm": 0.40711960196495056, + "learning_rate": 1.544578664222369e-05, + "loss": 0.5077, + "step": 23137 + }, + { + "epoch": 0.6353102690829214, + "grad_norm": 0.3852107524871826, + "learning_rate": 1.544542440392336e-05, + "loss": 0.4234, + "step": 23138 + }, + { + "epoch": 0.635337726523888, + "grad_norm": 0.373675674200058, + "learning_rate": 1.544506215546558e-05, + "loss": 0.538, + "step": 23139 + }, + { + "epoch": 0.6353651839648545, + "grad_norm": 0.3921574354171753, + "learning_rate": 1.5444699896851026e-05, + "loss": 0.4957, + "step": 23140 + }, + { + "epoch": 0.635392641405821, + "grad_norm": 0.5805567502975464, + "learning_rate": 1.544433762808036e-05, + "loss": 0.5659, + "step": 23141 + }, + { + "epoch": 0.6354200988467875, + "grad_norm": 0.660605251789093, + "learning_rate": 1.5443975349154274e-05, + "loss": 0.516, + "step": 23142 + }, + { + "epoch": 0.635447556287754, + "grad_norm": 0.3963235318660736, + "learning_rate": 1.5443613060073435e-05, + "loss": 0.5342, + "step": 23143 + }, + { + "epoch": 0.6354750137287205, + "grad_norm": 0.40800750255584717, + "learning_rate": 1.544325076083852e-05, + "loss": 0.5874, + "step": 23144 + }, + { + "epoch": 0.6355024711696869, + "grad_norm": 0.41823309659957886, + "learning_rate": 1.5442888451450202e-05, + "loss": 0.4849, + "step": 23145 + }, + { + "epoch": 0.6355299286106535, + "grad_norm": 0.38655057549476624, + "learning_rate": 1.544252613190916e-05, + "loss": 0.5422, + "step": 23146 + }, + { + "epoch": 0.63555738605162, + "grad_norm": 0.40317535400390625, + "learning_rate": 1.5442163802216076e-05, + "loss": 0.5309, + "step": 23147 + }, + { + "epoch": 0.6355848434925865, + "grad_norm": 0.4582132399082184, + "learning_rate": 1.5441801462371614e-05, + "loss": 0.5887, + "step": 23148 + }, + { + "epoch": 0.635612300933553, + "grad_norm": 0.39238813519477844, + "learning_rate": 1.5441439112376455e-05, + "loss": 0.5089, + "step": 23149 + }, + { + "epoch": 0.6356397583745195, + "grad_norm": 0.3864198327064514, + "learning_rate": 1.544107675223128e-05, + "loss": 0.4513, + "step": 23150 + }, + { + "epoch": 0.635667215815486, + "grad_norm": 0.3879215717315674, + "learning_rate": 1.5440714381936756e-05, + "loss": 0.4782, + "step": 23151 + }, + { + "epoch": 0.6356946732564525, + "grad_norm": 0.3946992754936218, + "learning_rate": 1.5440352001493564e-05, + "loss": 0.5372, + "step": 23152 + }, + { + "epoch": 0.635722130697419, + "grad_norm": 0.4273972511291504, + "learning_rate": 1.543998961090238e-05, + "loss": 0.5285, + "step": 23153 + }, + { + "epoch": 0.6357495881383856, + "grad_norm": 0.3794424831867218, + "learning_rate": 1.5439627210163878e-05, + "loss": 0.4976, + "step": 23154 + }, + { + "epoch": 0.635777045579352, + "grad_norm": 0.6093939542770386, + "learning_rate": 1.543926479927874e-05, + "loss": 0.4473, + "step": 23155 + }, + { + "epoch": 0.6358045030203185, + "grad_norm": 0.4022117555141449, + "learning_rate": 1.543890237824763e-05, + "loss": 0.5864, + "step": 23156 + }, + { + "epoch": 0.635831960461285, + "grad_norm": 0.3957201838493347, + "learning_rate": 1.5438539947071233e-05, + "loss": 0.512, + "step": 23157 + }, + { + "epoch": 0.6358594179022515, + "grad_norm": 0.4199093282222748, + "learning_rate": 1.5438177505750225e-05, + "loss": 0.5279, + "step": 23158 + }, + { + "epoch": 0.635886875343218, + "grad_norm": 0.37731701135635376, + "learning_rate": 1.5437815054285282e-05, + "loss": 0.4897, + "step": 23159 + }, + { + "epoch": 0.6359143327841845, + "grad_norm": 0.4576948881149292, + "learning_rate": 1.5437452592677072e-05, + "loss": 0.5012, + "step": 23160 + }, + { + "epoch": 0.6359417902251511, + "grad_norm": 0.3656446039676666, + "learning_rate": 1.5437090120926284e-05, + "loss": 0.4878, + "step": 23161 + }, + { + "epoch": 0.6359692476661175, + "grad_norm": 0.39502575993537903, + "learning_rate": 1.5436727639033583e-05, + "loss": 0.5432, + "step": 23162 + }, + { + "epoch": 0.6359967051070841, + "grad_norm": 0.3633093535900116, + "learning_rate": 1.5436365146999655e-05, + "loss": 0.4856, + "step": 23163 + }, + { + "epoch": 0.6360241625480505, + "grad_norm": 0.38085854053497314, + "learning_rate": 1.5436002644825164e-05, + "loss": 0.4636, + "step": 23164 + }, + { + "epoch": 0.636051619989017, + "grad_norm": 0.4096362590789795, + "learning_rate": 1.54356401325108e-05, + "loss": 0.525, + "step": 23165 + }, + { + "epoch": 0.6360790774299835, + "grad_norm": 0.41394323110580444, + "learning_rate": 1.543527761005723e-05, + "loss": 0.4131, + "step": 23166 + }, + { + "epoch": 0.63610653487095, + "grad_norm": 0.4359787702560425, + "learning_rate": 1.5434915077465132e-05, + "loss": 0.5229, + "step": 23167 + }, + { + "epoch": 0.6361339923119165, + "grad_norm": 0.38194769620895386, + "learning_rate": 1.543455253473518e-05, + "loss": 0.4845, + "step": 23168 + }, + { + "epoch": 0.636161449752883, + "grad_norm": 0.3960731029510498, + "learning_rate": 1.543418998186806e-05, + "loss": 0.668, + "step": 23169 + }, + { + "epoch": 0.6361889071938496, + "grad_norm": 0.41112372279167175, + "learning_rate": 1.5433827418864435e-05, + "loss": 0.538, + "step": 23170 + }, + { + "epoch": 0.636216364634816, + "grad_norm": 0.4057997763156891, + "learning_rate": 1.5433464845724988e-05, + "loss": 0.4708, + "step": 23171 + }, + { + "epoch": 0.6362438220757826, + "grad_norm": 0.4092215597629547, + "learning_rate": 1.5433102262450396e-05, + "loss": 0.4836, + "step": 23172 + }, + { + "epoch": 0.636271279516749, + "grad_norm": 0.37497901916503906, + "learning_rate": 1.5432739669041335e-05, + "loss": 0.5166, + "step": 23173 + }, + { + "epoch": 0.6362987369577155, + "grad_norm": 0.39989611506462097, + "learning_rate": 1.543237706549848e-05, + "loss": 0.514, + "step": 23174 + }, + { + "epoch": 0.636326194398682, + "grad_norm": 0.38425639271736145, + "learning_rate": 1.543201445182251e-05, + "loss": 0.5294, + "step": 23175 + }, + { + "epoch": 0.6363536518396485, + "grad_norm": 0.39275774359703064, + "learning_rate": 1.54316518280141e-05, + "loss": 0.486, + "step": 23176 + }, + { + "epoch": 0.6363811092806151, + "grad_norm": 0.37867262959480286, + "learning_rate": 1.543128919407392e-05, + "loss": 0.4287, + "step": 23177 + }, + { + "epoch": 0.6364085667215815, + "grad_norm": 0.3839682936668396, + "learning_rate": 1.5430926550002653e-05, + "loss": 0.5278, + "step": 23178 + }, + { + "epoch": 0.6364360241625481, + "grad_norm": 0.3572182357311249, + "learning_rate": 1.5430563895800977e-05, + "loss": 0.5789, + "step": 23179 + }, + { + "epoch": 0.6364634816035145, + "grad_norm": 0.3409055471420288, + "learning_rate": 1.5430201231469567e-05, + "loss": 0.4723, + "step": 23180 + }, + { + "epoch": 0.6364909390444811, + "grad_norm": 0.38545286655426025, + "learning_rate": 1.54298385570091e-05, + "loss": 0.4865, + "step": 23181 + }, + { + "epoch": 0.6365183964854475, + "grad_norm": 0.34843191504478455, + "learning_rate": 1.5429475872420246e-05, + "loss": 0.5104, + "step": 23182 + }, + { + "epoch": 0.636545853926414, + "grad_norm": 0.3923165202140808, + "learning_rate": 1.542911317770369e-05, + "loss": 0.521, + "step": 23183 + }, + { + "epoch": 0.6365733113673806, + "grad_norm": 0.34207576513290405, + "learning_rate": 1.5428750472860107e-05, + "loss": 0.5467, + "step": 23184 + }, + { + "epoch": 0.636600768808347, + "grad_norm": 0.41347983479499817, + "learning_rate": 1.5428387757890167e-05, + "loss": 0.5627, + "step": 23185 + }, + { + "epoch": 0.6366282262493136, + "grad_norm": 0.41610628366470337, + "learning_rate": 1.5428025032794555e-05, + "loss": 0.4585, + "step": 23186 + }, + { + "epoch": 0.63665568369028, + "grad_norm": 0.3485484719276428, + "learning_rate": 1.542766229757394e-05, + "loss": 0.4704, + "step": 23187 + }, + { + "epoch": 0.6366831411312466, + "grad_norm": 0.4595089852809906, + "learning_rate": 1.542729955222901e-05, + "loss": 0.5065, + "step": 23188 + }, + { + "epoch": 0.636710598572213, + "grad_norm": 0.35638228058815, + "learning_rate": 1.5426936796760425e-05, + "loss": 0.5323, + "step": 23189 + }, + { + "epoch": 0.6367380560131796, + "grad_norm": 0.38852035999298096, + "learning_rate": 1.5426574031168876e-05, + "loss": 0.4487, + "step": 23190 + }, + { + "epoch": 0.6367655134541461, + "grad_norm": 0.3991386592388153, + "learning_rate": 1.5426211255455035e-05, + "loss": 0.5382, + "step": 23191 + }, + { + "epoch": 0.6367929708951126, + "grad_norm": 0.39037150144577026, + "learning_rate": 1.5425848469619574e-05, + "loss": 0.5094, + "step": 23192 + }, + { + "epoch": 0.6368204283360791, + "grad_norm": 0.3789840638637543, + "learning_rate": 1.5425485673663178e-05, + "loss": 0.4757, + "step": 23193 + }, + { + "epoch": 0.6368478857770455, + "grad_norm": 0.4898328185081482, + "learning_rate": 1.542512286758652e-05, + "loss": 0.4734, + "step": 23194 + }, + { + "epoch": 0.6368753432180121, + "grad_norm": 0.4133282005786896, + "learning_rate": 1.5424760051390274e-05, + "loss": 0.4389, + "step": 23195 + }, + { + "epoch": 0.6369028006589785, + "grad_norm": 0.3740154504776001, + "learning_rate": 1.542439722507512e-05, + "loss": 0.558, + "step": 23196 + }, + { + "epoch": 0.6369302580999451, + "grad_norm": 0.5770440697669983, + "learning_rate": 1.5424034388641735e-05, + "loss": 0.4569, + "step": 23197 + }, + { + "epoch": 0.6369577155409116, + "grad_norm": 0.3746641278266907, + "learning_rate": 1.542367154209079e-05, + "loss": 0.5123, + "step": 23198 + }, + { + "epoch": 0.6369851729818781, + "grad_norm": 0.4017345607280731, + "learning_rate": 1.5423308685422968e-05, + "loss": 0.4687, + "step": 23199 + }, + { + "epoch": 0.6370126304228446, + "grad_norm": 0.3621593117713928, + "learning_rate": 1.5422945818638947e-05, + "loss": 0.5744, + "step": 23200 + }, + { + "epoch": 0.637040087863811, + "grad_norm": 0.37034872174263, + "learning_rate": 1.54225829417394e-05, + "loss": 0.5737, + "step": 23201 + }, + { + "epoch": 0.6370675453047776, + "grad_norm": 0.6495491862297058, + "learning_rate": 1.5422220054725007e-05, + "loss": 0.5349, + "step": 23202 + }, + { + "epoch": 0.637095002745744, + "grad_norm": 0.3374318480491638, + "learning_rate": 1.542185715759644e-05, + "loss": 0.4175, + "step": 23203 + }, + { + "epoch": 0.6371224601867106, + "grad_norm": 0.33278122544288635, + "learning_rate": 1.5421494250354382e-05, + "loss": 0.4305, + "step": 23204 + }, + { + "epoch": 0.6371499176276771, + "grad_norm": 0.5741167068481445, + "learning_rate": 1.5421131332999502e-05, + "loss": 0.4811, + "step": 23205 + }, + { + "epoch": 0.6371773750686436, + "grad_norm": 0.4093117415904999, + "learning_rate": 1.5420768405532486e-05, + "loss": 0.5181, + "step": 23206 + }, + { + "epoch": 0.6372048325096101, + "grad_norm": 0.3113693296909332, + "learning_rate": 1.5420405467954007e-05, + "loss": 0.4305, + "step": 23207 + }, + { + "epoch": 0.6372322899505766, + "grad_norm": 0.35866779088974, + "learning_rate": 1.542004252026474e-05, + "loss": 0.4736, + "step": 23208 + }, + { + "epoch": 0.6372597473915431, + "grad_norm": 0.3485752046108246, + "learning_rate": 1.5419679562465364e-05, + "loss": 0.4977, + "step": 23209 + }, + { + "epoch": 0.6372872048325096, + "grad_norm": 0.4487015902996063, + "learning_rate": 1.541931659455656e-05, + "loss": 0.6114, + "step": 23210 + }, + { + "epoch": 0.6373146622734761, + "grad_norm": 0.37420421838760376, + "learning_rate": 1.5418953616538996e-05, + "loss": 0.4884, + "step": 23211 + }, + { + "epoch": 0.6373421197144427, + "grad_norm": 0.36313384771347046, + "learning_rate": 1.5418590628413355e-05, + "loss": 0.4764, + "step": 23212 + }, + { + "epoch": 0.6373695771554091, + "grad_norm": 0.36471158266067505, + "learning_rate": 1.5418227630180314e-05, + "loss": 0.4562, + "step": 23213 + }, + { + "epoch": 0.6373970345963756, + "grad_norm": 0.41885319352149963, + "learning_rate": 1.541786462184055e-05, + "loss": 0.507, + "step": 23214 + }, + { + "epoch": 0.6374244920373421, + "grad_norm": 0.4322323501110077, + "learning_rate": 1.5417501603394737e-05, + "loss": 0.521, + "step": 23215 + }, + { + "epoch": 0.6374519494783086, + "grad_norm": 0.3432362675666809, + "learning_rate": 1.5417138574843554e-05, + "loss": 0.5346, + "step": 23216 + }, + { + "epoch": 0.6374794069192751, + "grad_norm": 0.35707592964172363, + "learning_rate": 1.541677553618768e-05, + "loss": 0.4464, + "step": 23217 + }, + { + "epoch": 0.6375068643602416, + "grad_norm": 0.3602023422718048, + "learning_rate": 1.541641248742779e-05, + "loss": 0.4496, + "step": 23218 + }, + { + "epoch": 0.6375343218012082, + "grad_norm": 0.3810091018676758, + "learning_rate": 1.541604942856456e-05, + "loss": 0.5673, + "step": 23219 + }, + { + "epoch": 0.6375617792421746, + "grad_norm": 0.43578702211380005, + "learning_rate": 1.5415686359598673e-05, + "loss": 0.5791, + "step": 23220 + }, + { + "epoch": 0.6375892366831412, + "grad_norm": 0.34876585006713867, + "learning_rate": 1.5415323280530804e-05, + "loss": 0.5121, + "step": 23221 + }, + { + "epoch": 0.6376166941241076, + "grad_norm": 0.4660422205924988, + "learning_rate": 1.5414960191361624e-05, + "loss": 0.5006, + "step": 23222 + }, + { + "epoch": 0.6376441515650741, + "grad_norm": 0.3958325684070587, + "learning_rate": 1.541459709209182e-05, + "loss": 0.5063, + "step": 23223 + }, + { + "epoch": 0.6376716090060406, + "grad_norm": 0.3851735293865204, + "learning_rate": 1.541423398272206e-05, + "loss": 0.4619, + "step": 23224 + }, + { + "epoch": 0.6376990664470071, + "grad_norm": 0.36440545320510864, + "learning_rate": 1.5413870863253028e-05, + "loss": 0.4488, + "step": 23225 + }, + { + "epoch": 0.6377265238879737, + "grad_norm": 0.36761894822120667, + "learning_rate": 1.5413507733685397e-05, + "loss": 0.4717, + "step": 23226 + }, + { + "epoch": 0.6377539813289401, + "grad_norm": 0.4066241383552551, + "learning_rate": 1.5413144594019846e-05, + "loss": 0.5012, + "step": 23227 + }, + { + "epoch": 0.6377814387699067, + "grad_norm": 0.3893265426158905, + "learning_rate": 1.5412781444257055e-05, + "loss": 0.4694, + "step": 23228 + }, + { + "epoch": 0.6378088962108731, + "grad_norm": 0.5100352168083191, + "learning_rate": 1.5412418284397696e-05, + "loss": 0.4352, + "step": 23229 + }, + { + "epoch": 0.6378363536518397, + "grad_norm": 0.4173066020011902, + "learning_rate": 1.5412055114442453e-05, + "loss": 0.4703, + "step": 23230 + }, + { + "epoch": 0.6378638110928061, + "grad_norm": 0.36859768629074097, + "learning_rate": 1.5411691934391997e-05, + "loss": 0.4064, + "step": 23231 + }, + { + "epoch": 0.6378912685337726, + "grad_norm": 0.47813212871551514, + "learning_rate": 1.541132874424701e-05, + "loss": 0.5561, + "step": 23232 + }, + { + "epoch": 0.6379187259747392, + "grad_norm": 0.3600277900695801, + "learning_rate": 1.541096554400817e-05, + "loss": 0.4803, + "step": 23233 + }, + { + "epoch": 0.6379461834157056, + "grad_norm": 0.380526065826416, + "learning_rate": 1.5410602333676144e-05, + "loss": 0.5922, + "step": 23234 + }, + { + "epoch": 0.6379736408566722, + "grad_norm": 0.3682764768600464, + "learning_rate": 1.5410239113251626e-05, + "loss": 0.5015, + "step": 23235 + }, + { + "epoch": 0.6380010982976386, + "grad_norm": 0.3719002902507782, + "learning_rate": 1.5409875882735282e-05, + "loss": 0.4643, + "step": 23236 + }, + { + "epoch": 0.6380285557386052, + "grad_norm": 0.4564128816127777, + "learning_rate": 1.5409512642127795e-05, + "loss": 0.5339, + "step": 23237 + }, + { + "epoch": 0.6380560131795716, + "grad_norm": 0.32599058747291565, + "learning_rate": 1.5409149391429838e-05, + "loss": 0.3546, + "step": 23238 + }, + { + "epoch": 0.6380834706205382, + "grad_norm": 0.3296312987804413, + "learning_rate": 1.540878613064209e-05, + "loss": 0.4248, + "step": 23239 + }, + { + "epoch": 0.6381109280615047, + "grad_norm": 0.5298121571540833, + "learning_rate": 1.5408422859765235e-05, + "loss": 0.4915, + "step": 23240 + }, + { + "epoch": 0.6381383855024712, + "grad_norm": 0.4769827723503113, + "learning_rate": 1.540805957879994e-05, + "loss": 0.5154, + "step": 23241 + }, + { + "epoch": 0.6381658429434377, + "grad_norm": 0.33248913288116455, + "learning_rate": 1.5407696287746888e-05, + "loss": 0.4043, + "step": 23242 + }, + { + "epoch": 0.6381933003844041, + "grad_norm": 0.3557734787464142, + "learning_rate": 1.5407332986606763e-05, + "loss": 0.4293, + "step": 23243 + }, + { + "epoch": 0.6382207578253707, + "grad_norm": 0.376846045255661, + "learning_rate": 1.5406969675380228e-05, + "loss": 0.5601, + "step": 23244 + }, + { + "epoch": 0.6382482152663371, + "grad_norm": 0.35156741738319397, + "learning_rate": 1.5406606354067974e-05, + "loss": 0.4126, + "step": 23245 + }, + { + "epoch": 0.6382756727073037, + "grad_norm": 0.3726288378238678, + "learning_rate": 1.5406243022670673e-05, + "loss": 0.5223, + "step": 23246 + }, + { + "epoch": 0.6383031301482702, + "grad_norm": 0.37260863184928894, + "learning_rate": 1.5405879681189e-05, + "loss": 0.4722, + "step": 23247 + }, + { + "epoch": 0.6383305875892367, + "grad_norm": 0.4717891812324524, + "learning_rate": 1.5405516329623638e-05, + "loss": 0.4946, + "step": 23248 + }, + { + "epoch": 0.6383580450302032, + "grad_norm": 0.3838779330253601, + "learning_rate": 1.5405152967975264e-05, + "loss": 0.4944, + "step": 23249 + }, + { + "epoch": 0.6383855024711697, + "grad_norm": 0.38444384932518005, + "learning_rate": 1.5404789596244552e-05, + "loss": 0.5144, + "step": 23250 + }, + { + "epoch": 0.6384129599121362, + "grad_norm": 0.3707401752471924, + "learning_rate": 1.5404426214432183e-05, + "loss": 0.4989, + "step": 23251 + }, + { + "epoch": 0.6384404173531026, + "grad_norm": 0.3862748146057129, + "learning_rate": 1.5404062822538835e-05, + "loss": 0.5058, + "step": 23252 + }, + { + "epoch": 0.6384678747940692, + "grad_norm": 0.393929660320282, + "learning_rate": 1.5403699420565187e-05, + "loss": 0.5495, + "step": 23253 + }, + { + "epoch": 0.6384953322350357, + "grad_norm": 0.3835124969482422, + "learning_rate": 1.5403336008511913e-05, + "loss": 0.4646, + "step": 23254 + }, + { + "epoch": 0.6385227896760022, + "grad_norm": 0.3535405099391937, + "learning_rate": 1.540297258637969e-05, + "loss": 0.454, + "step": 23255 + }, + { + "epoch": 0.6385502471169687, + "grad_norm": 0.4486088156700134, + "learning_rate": 1.5402609154169204e-05, + "loss": 0.5509, + "step": 23256 + }, + { + "epoch": 0.6385777045579352, + "grad_norm": 0.4190601408481598, + "learning_rate": 1.5402245711881122e-05, + "loss": 0.5755, + "step": 23257 + }, + { + "epoch": 0.6386051619989017, + "grad_norm": 0.40132972598075867, + "learning_rate": 1.5401882259516133e-05, + "loss": 0.5919, + "step": 23258 + }, + { + "epoch": 0.6386326194398682, + "grad_norm": 0.3933853507041931, + "learning_rate": 1.5401518797074908e-05, + "loss": 0.5684, + "step": 23259 + }, + { + "epoch": 0.6386600768808347, + "grad_norm": 0.41423895955085754, + "learning_rate": 1.5401155324558124e-05, + "loss": 0.5748, + "step": 23260 + }, + { + "epoch": 0.6386875343218013, + "grad_norm": 0.39568835496902466, + "learning_rate": 1.5400791841966466e-05, + "loss": 0.5096, + "step": 23261 + }, + { + "epoch": 0.6387149917627677, + "grad_norm": 0.394079327583313, + "learning_rate": 1.54004283493006e-05, + "loss": 0.5171, + "step": 23262 + }, + { + "epoch": 0.6387424492037342, + "grad_norm": 0.36781975626945496, + "learning_rate": 1.5400064846561218e-05, + "loss": 0.4662, + "step": 23263 + }, + { + "epoch": 0.6387699066447007, + "grad_norm": 0.3379192054271698, + "learning_rate": 1.539970133374899e-05, + "loss": 0.4272, + "step": 23264 + }, + { + "epoch": 0.6387973640856672, + "grad_norm": 1.4198195934295654, + "learning_rate": 1.5399337810864594e-05, + "loss": 0.5817, + "step": 23265 + }, + { + "epoch": 0.6388248215266337, + "grad_norm": 0.38628217577934265, + "learning_rate": 1.5398974277908713e-05, + "loss": 0.5536, + "step": 23266 + }, + { + "epoch": 0.6388522789676002, + "grad_norm": 0.38976743817329407, + "learning_rate": 1.5398610734882017e-05, + "loss": 0.4141, + "step": 23267 + }, + { + "epoch": 0.6388797364085668, + "grad_norm": 0.42068231105804443, + "learning_rate": 1.5398247181785193e-05, + "loss": 0.5873, + "step": 23268 + }, + { + "epoch": 0.6389071938495332, + "grad_norm": 0.3863702118396759, + "learning_rate": 1.5397883618618913e-05, + "loss": 0.5253, + "step": 23269 + }, + { + "epoch": 0.6389346512904998, + "grad_norm": 0.4066868722438812, + "learning_rate": 1.5397520045383857e-05, + "loss": 0.4909, + "step": 23270 + }, + { + "epoch": 0.6389621087314662, + "grad_norm": 0.34870487451553345, + "learning_rate": 1.5397156462080707e-05, + "loss": 0.4262, + "step": 23271 + }, + { + "epoch": 0.6389895661724327, + "grad_norm": 0.3678308129310608, + "learning_rate": 1.5396792868710133e-05, + "loss": 0.4281, + "step": 23272 + }, + { + "epoch": 0.6390170236133992, + "grad_norm": 0.3847922384738922, + "learning_rate": 1.539642926527282e-05, + "loss": 0.4836, + "step": 23273 + }, + { + "epoch": 0.6390444810543657, + "grad_norm": 0.38642778992652893, + "learning_rate": 1.5396065651769445e-05, + "loss": 0.5077, + "step": 23274 + }, + { + "epoch": 0.6390719384953323, + "grad_norm": 0.36486631631851196, + "learning_rate": 1.5395702028200682e-05, + "loss": 0.5455, + "step": 23275 + }, + { + "epoch": 0.6390993959362987, + "grad_norm": 0.3988831341266632, + "learning_rate": 1.5395338394567217e-05, + "loss": 0.5469, + "step": 23276 + }, + { + "epoch": 0.6391268533772653, + "grad_norm": 0.3848815858364105, + "learning_rate": 1.5394974750869723e-05, + "loss": 0.4892, + "step": 23277 + }, + { + "epoch": 0.6391543108182317, + "grad_norm": 0.40750497579574585, + "learning_rate": 1.539461109710888e-05, + "loss": 0.5512, + "step": 23278 + }, + { + "epoch": 0.6391817682591983, + "grad_norm": 0.7758901715278625, + "learning_rate": 1.539424743328536e-05, + "loss": 0.5387, + "step": 23279 + }, + { + "epoch": 0.6392092257001647, + "grad_norm": 0.3683019280433655, + "learning_rate": 1.5393883759399852e-05, + "loss": 0.5237, + "step": 23280 + }, + { + "epoch": 0.6392366831411312, + "grad_norm": 0.35037797689437866, + "learning_rate": 1.5393520075453026e-05, + "loss": 0.4326, + "step": 23281 + }, + { + "epoch": 0.6392641405820978, + "grad_norm": 0.44442519545555115, + "learning_rate": 1.5393156381445567e-05, + "loss": 0.5378, + "step": 23282 + }, + { + "epoch": 0.6392915980230642, + "grad_norm": 0.3759596347808838, + "learning_rate": 1.5392792677378148e-05, + "loss": 0.5501, + "step": 23283 + }, + { + "epoch": 0.6393190554640308, + "grad_norm": 0.37357082962989807, + "learning_rate": 1.539242896325145e-05, + "loss": 0.4619, + "step": 23284 + }, + { + "epoch": 0.6393465129049972, + "grad_norm": 0.4244522750377655, + "learning_rate": 1.539206523906615e-05, + "loss": 0.4462, + "step": 23285 + }, + { + "epoch": 0.6393739703459638, + "grad_norm": 0.34691721200942993, + "learning_rate": 1.539170150482293e-05, + "loss": 0.4598, + "step": 23286 + }, + { + "epoch": 0.6394014277869302, + "grad_norm": 0.3927154541015625, + "learning_rate": 1.5391337760522464e-05, + "loss": 0.5436, + "step": 23287 + }, + { + "epoch": 0.6394288852278968, + "grad_norm": 0.36177805066108704, + "learning_rate": 1.5390974006165432e-05, + "loss": 0.4042, + "step": 23288 + }, + { + "epoch": 0.6394563426688633, + "grad_norm": 0.3837836980819702, + "learning_rate": 1.5390610241752516e-05, + "loss": 0.5147, + "step": 23289 + }, + { + "epoch": 0.6394838001098297, + "grad_norm": 0.4072783291339874, + "learning_rate": 1.539024646728439e-05, + "loss": 0.5934, + "step": 23290 + }, + { + "epoch": 0.6395112575507963, + "grad_norm": 0.35004910826683044, + "learning_rate": 1.5389882682761726e-05, + "loss": 0.5394, + "step": 23291 + }, + { + "epoch": 0.6395387149917627, + "grad_norm": 0.37624043226242065, + "learning_rate": 1.538951888818522e-05, + "loss": 0.4936, + "step": 23292 + }, + { + "epoch": 0.6395661724327293, + "grad_norm": 0.3916052579879761, + "learning_rate": 1.5389155083555536e-05, + "loss": 0.5063, + "step": 23293 + }, + { + "epoch": 0.6395936298736957, + "grad_norm": 0.37599435448646545, + "learning_rate": 1.5388791268873363e-05, + "loss": 0.4963, + "step": 23294 + }, + { + "epoch": 0.6396210873146623, + "grad_norm": 0.3764609396457672, + "learning_rate": 1.538842744413937e-05, + "loss": 0.4917, + "step": 23295 + }, + { + "epoch": 0.6396485447556288, + "grad_norm": 0.3665120005607605, + "learning_rate": 1.5388063609354243e-05, + "loss": 0.4638, + "step": 23296 + }, + { + "epoch": 0.6396760021965953, + "grad_norm": 0.3854277729988098, + "learning_rate": 1.5387699764518656e-05, + "loss": 0.5093, + "step": 23297 + }, + { + "epoch": 0.6397034596375618, + "grad_norm": 0.37834376096725464, + "learning_rate": 1.538733590963329e-05, + "loss": 0.5255, + "step": 23298 + }, + { + "epoch": 0.6397309170785282, + "grad_norm": 0.34227702021598816, + "learning_rate": 1.5386972044698823e-05, + "loss": 0.4775, + "step": 23299 + }, + { + "epoch": 0.6397583745194948, + "grad_norm": 0.3686126470565796, + "learning_rate": 1.5386608169715934e-05, + "loss": 0.5573, + "step": 23300 + }, + { + "epoch": 0.6397858319604612, + "grad_norm": 0.35471388697624207, + "learning_rate": 1.5386244284685302e-05, + "loss": 0.5025, + "step": 23301 + }, + { + "epoch": 0.6398132894014278, + "grad_norm": 0.37887367606163025, + "learning_rate": 1.5385880389607603e-05, + "loss": 0.4929, + "step": 23302 + }, + { + "epoch": 0.6398407468423943, + "grad_norm": 0.3601284921169281, + "learning_rate": 1.538551648448352e-05, + "loss": 0.4779, + "step": 23303 + }, + { + "epoch": 0.6398682042833608, + "grad_norm": 0.3657006323337555, + "learning_rate": 1.538515256931373e-05, + "loss": 0.5136, + "step": 23304 + }, + { + "epoch": 0.6398956617243273, + "grad_norm": 0.42818209528923035, + "learning_rate": 1.5384788644098913e-05, + "loss": 0.5527, + "step": 23305 + }, + { + "epoch": 0.6399231191652938, + "grad_norm": 0.3885520100593567, + "learning_rate": 1.538442470883974e-05, + "loss": 0.5132, + "step": 23306 + }, + { + "epoch": 0.6399505766062603, + "grad_norm": 0.35363152623176575, + "learning_rate": 1.5384060763536903e-05, + "loss": 0.4647, + "step": 23307 + }, + { + "epoch": 0.6399780340472268, + "grad_norm": 0.3432157635688782, + "learning_rate": 1.5383696808191075e-05, + "loss": 0.4343, + "step": 23308 + }, + { + "epoch": 0.6400054914881933, + "grad_norm": 0.3943377733230591, + "learning_rate": 1.538333284280293e-05, + "loss": 0.4703, + "step": 23309 + }, + { + "epoch": 0.6400329489291599, + "grad_norm": 0.3469265103340149, + "learning_rate": 1.538296886737315e-05, + "loss": 0.4871, + "step": 23310 + }, + { + "epoch": 0.6400604063701263, + "grad_norm": 0.3577434718608856, + "learning_rate": 1.538260488190242e-05, + "loss": 0.4895, + "step": 23311 + }, + { + "epoch": 0.6400878638110928, + "grad_norm": 0.3555665910243988, + "learning_rate": 1.538224088639141e-05, + "loss": 0.4188, + "step": 23312 + }, + { + "epoch": 0.6401153212520593, + "grad_norm": 0.48646411299705505, + "learning_rate": 1.5381876880840807e-05, + "loss": 0.502, + "step": 23313 + }, + { + "epoch": 0.6401427786930258, + "grad_norm": 0.4505626857280731, + "learning_rate": 1.5381512865251282e-05, + "loss": 0.4049, + "step": 23314 + }, + { + "epoch": 0.6401702361339923, + "grad_norm": 0.38546234369277954, + "learning_rate": 1.5381148839623522e-05, + "loss": 0.5934, + "step": 23315 + }, + { + "epoch": 0.6401976935749588, + "grad_norm": 0.4524680972099304, + "learning_rate": 1.5380784803958198e-05, + "loss": 0.4966, + "step": 23316 + }, + { + "epoch": 0.6402251510159254, + "grad_norm": 0.5143951177597046, + "learning_rate": 1.5380420758255994e-05, + "loss": 0.5532, + "step": 23317 + }, + { + "epoch": 0.6402526084568918, + "grad_norm": 0.4236416816711426, + "learning_rate": 1.5380056702517588e-05, + "loss": 0.5405, + "step": 23318 + }, + { + "epoch": 0.6402800658978584, + "grad_norm": 0.3528895080089569, + "learning_rate": 1.5379692636743658e-05, + "loss": 0.4925, + "step": 23319 + }, + { + "epoch": 0.6403075233388248, + "grad_norm": 0.5806242227554321, + "learning_rate": 1.5379328560934888e-05, + "loss": 0.5805, + "step": 23320 + }, + { + "epoch": 0.6403349807797913, + "grad_norm": 0.3504464328289032, + "learning_rate": 1.5378964475091952e-05, + "loss": 0.4816, + "step": 23321 + }, + { + "epoch": 0.6403624382207578, + "grad_norm": 0.350191593170166, + "learning_rate": 1.5378600379215528e-05, + "loss": 0.4926, + "step": 23322 + }, + { + "epoch": 0.6403898956617243, + "grad_norm": 0.3653624951839447, + "learning_rate": 1.5378236273306303e-05, + "loss": 0.5394, + "step": 23323 + }, + { + "epoch": 0.6404173531026909, + "grad_norm": 0.3602825701236725, + "learning_rate": 1.5377872157364943e-05, + "loss": 0.4885, + "step": 23324 + }, + { + "epoch": 0.6404448105436573, + "grad_norm": 0.5661852359771729, + "learning_rate": 1.537750803139214e-05, + "loss": 0.5317, + "step": 23325 + }, + { + "epoch": 0.6404722679846239, + "grad_norm": 0.3591994643211365, + "learning_rate": 1.5377143895388567e-05, + "loss": 0.5329, + "step": 23326 + }, + { + "epoch": 0.6404997254255903, + "grad_norm": 0.3698809742927551, + "learning_rate": 1.5376779749354904e-05, + "loss": 0.4208, + "step": 23327 + }, + { + "epoch": 0.6405271828665569, + "grad_norm": 0.3434257507324219, + "learning_rate": 1.5376415593291832e-05, + "loss": 0.4384, + "step": 23328 + }, + { + "epoch": 0.6405546403075233, + "grad_norm": 0.43479785323143005, + "learning_rate": 1.5376051427200026e-05, + "loss": 0.5529, + "step": 23329 + }, + { + "epoch": 0.6405820977484898, + "grad_norm": 0.4218279719352722, + "learning_rate": 1.5375687251080173e-05, + "loss": 0.5197, + "step": 23330 + }, + { + "epoch": 0.6406095551894564, + "grad_norm": 0.3976280093193054, + "learning_rate": 1.5375323064932944e-05, + "loss": 0.5219, + "step": 23331 + }, + { + "epoch": 0.6406370126304228, + "grad_norm": 0.389387845993042, + "learning_rate": 1.5374958868759023e-05, + "loss": 0.5206, + "step": 23332 + }, + { + "epoch": 0.6406644700713894, + "grad_norm": 0.46067896485328674, + "learning_rate": 1.537459466255909e-05, + "loss": 0.5101, + "step": 23333 + }, + { + "epoch": 0.6406919275123558, + "grad_norm": 0.4199887812137604, + "learning_rate": 1.537423044633382e-05, + "loss": 0.4974, + "step": 23334 + }, + { + "epoch": 0.6407193849533224, + "grad_norm": 0.40160298347473145, + "learning_rate": 1.53738662200839e-05, + "loss": 0.5439, + "step": 23335 + }, + { + "epoch": 0.6407468423942888, + "grad_norm": 0.3799644410610199, + "learning_rate": 1.537350198381e-05, + "loss": 0.5238, + "step": 23336 + }, + { + "epoch": 0.6407742998352554, + "grad_norm": 0.39012610912323, + "learning_rate": 1.53731377375128e-05, + "loss": 0.4215, + "step": 23337 + }, + { + "epoch": 0.6408017572762219, + "grad_norm": 0.532233476638794, + "learning_rate": 1.537277348119299e-05, + "loss": 0.4558, + "step": 23338 + }, + { + "epoch": 0.6408292147171883, + "grad_norm": 0.36730891466140747, + "learning_rate": 1.537240921485124e-05, + "loss": 0.5315, + "step": 23339 + }, + { + "epoch": 0.6408566721581549, + "grad_norm": 0.4715304672718048, + "learning_rate": 1.537204493848823e-05, + "loss": 0.5732, + "step": 23340 + }, + { + "epoch": 0.6408841295991213, + "grad_norm": 0.42470186948776245, + "learning_rate": 1.5371680652104644e-05, + "loss": 0.531, + "step": 23341 + }, + { + "epoch": 0.6409115870400879, + "grad_norm": 0.3835452198982239, + "learning_rate": 1.537131635570116e-05, + "loss": 0.4862, + "step": 23342 + }, + { + "epoch": 0.6409390444810543, + "grad_norm": 0.4184708893299103, + "learning_rate": 1.5370952049278454e-05, + "loss": 0.3757, + "step": 23343 + }, + { + "epoch": 0.6409665019220209, + "grad_norm": 0.3793332874774933, + "learning_rate": 1.537058773283721e-05, + "loss": 0.5559, + "step": 23344 + }, + { + "epoch": 0.6409939593629874, + "grad_norm": 0.41226232051849365, + "learning_rate": 1.5370223406378107e-05, + "loss": 0.5638, + "step": 23345 + }, + { + "epoch": 0.6410214168039539, + "grad_norm": 0.3795441687107086, + "learning_rate": 1.536985906990182e-05, + "loss": 0.525, + "step": 23346 + }, + { + "epoch": 0.6410488742449204, + "grad_norm": 0.3820289969444275, + "learning_rate": 1.5369494723409033e-05, + "loss": 0.5082, + "step": 23347 + }, + { + "epoch": 0.6410763316858868, + "grad_norm": 0.3663162291049957, + "learning_rate": 1.5369130366900426e-05, + "loss": 0.4574, + "step": 23348 + }, + { + "epoch": 0.6411037891268534, + "grad_norm": 0.3452400267124176, + "learning_rate": 1.5368766000376677e-05, + "loss": 0.429, + "step": 23349 + }, + { + "epoch": 0.6411312465678198, + "grad_norm": 0.37639743089675903, + "learning_rate": 1.5368401623838464e-05, + "loss": 0.4719, + "step": 23350 + }, + { + "epoch": 0.6411587040087864, + "grad_norm": 0.3908580541610718, + "learning_rate": 1.5368037237286472e-05, + "loss": 0.5592, + "step": 23351 + }, + { + "epoch": 0.6411861614497529, + "grad_norm": 0.41682612895965576, + "learning_rate": 1.5367672840721378e-05, + "loss": 0.4896, + "step": 23352 + }, + { + "epoch": 0.6412136188907194, + "grad_norm": 0.36820265650749207, + "learning_rate": 1.5367308434143853e-05, + "loss": 0.5474, + "step": 23353 + }, + { + "epoch": 0.6412410763316859, + "grad_norm": 0.3869185745716095, + "learning_rate": 1.5366944017554592e-05, + "loss": 0.4291, + "step": 23354 + }, + { + "epoch": 0.6412685337726524, + "grad_norm": 0.4039987325668335, + "learning_rate": 1.5366579590954266e-05, + "loss": 0.5125, + "step": 23355 + }, + { + "epoch": 0.6412959912136189, + "grad_norm": 0.42116060853004456, + "learning_rate": 1.5366215154343555e-05, + "loss": 0.4645, + "step": 23356 + }, + { + "epoch": 0.6413234486545853, + "grad_norm": 0.39759477972984314, + "learning_rate": 1.536585070772314e-05, + "loss": 0.5255, + "step": 23357 + }, + { + "epoch": 0.6413509060955519, + "grad_norm": 0.4346941411495209, + "learning_rate": 1.5365486251093704e-05, + "loss": 0.5069, + "step": 23358 + }, + { + "epoch": 0.6413783635365184, + "grad_norm": 0.3513598144054413, + "learning_rate": 1.5365121784455922e-05, + "loss": 0.4782, + "step": 23359 + }, + { + "epoch": 0.6414058209774849, + "grad_norm": 0.33418819308280945, + "learning_rate": 1.5364757307810473e-05, + "loss": 0.5259, + "step": 23360 + }, + { + "epoch": 0.6414332784184514, + "grad_norm": 0.3269241750240326, + "learning_rate": 1.5364392821158042e-05, + "loss": 0.4267, + "step": 23361 + }, + { + "epoch": 0.6414607358594179, + "grad_norm": 0.34932941198349, + "learning_rate": 1.536402832449931e-05, + "loss": 0.4794, + "step": 23362 + }, + { + "epoch": 0.6414881933003844, + "grad_norm": 0.48010125756263733, + "learning_rate": 1.5363663817834944e-05, + "loss": 0.5432, + "step": 23363 + }, + { + "epoch": 0.6415156507413509, + "grad_norm": 0.4471402168273926, + "learning_rate": 1.536329930116564e-05, + "loss": 0.5182, + "step": 23364 + }, + { + "epoch": 0.6415431081823174, + "grad_norm": 0.41603222489356995, + "learning_rate": 1.5362934774492068e-05, + "loss": 0.5367, + "step": 23365 + }, + { + "epoch": 0.641570565623284, + "grad_norm": 0.41754236817359924, + "learning_rate": 1.536257023781491e-05, + "loss": 0.5474, + "step": 23366 + }, + { + "epoch": 0.6415980230642504, + "grad_norm": 0.34604793787002563, + "learning_rate": 1.536220569113485e-05, + "loss": 0.5166, + "step": 23367 + }, + { + "epoch": 0.641625480505217, + "grad_norm": 0.3410996198654175, + "learning_rate": 1.5361841134452563e-05, + "loss": 0.5043, + "step": 23368 + }, + { + "epoch": 0.6416529379461834, + "grad_norm": 0.4235232174396515, + "learning_rate": 1.5361476567768735e-05, + "loss": 0.5553, + "step": 23369 + }, + { + "epoch": 0.6416803953871499, + "grad_norm": 0.3439613878726959, + "learning_rate": 1.536111199108404e-05, + "loss": 0.5254, + "step": 23370 + }, + { + "epoch": 0.6417078528281164, + "grad_norm": 0.4100147485733032, + "learning_rate": 1.5360747404399156e-05, + "loss": 0.5018, + "step": 23371 + }, + { + "epoch": 0.6417353102690829, + "grad_norm": 0.3830028474330902, + "learning_rate": 1.5360382807714772e-05, + "loss": 0.507, + "step": 23372 + }, + { + "epoch": 0.6417627677100495, + "grad_norm": 0.41696926951408386, + "learning_rate": 1.536001820103156e-05, + "loss": 0.476, + "step": 23373 + }, + { + "epoch": 0.6417902251510159, + "grad_norm": 0.3740525543689728, + "learning_rate": 1.5359653584350205e-05, + "loss": 0.5955, + "step": 23374 + }, + { + "epoch": 0.6418176825919825, + "grad_norm": 0.3852023184299469, + "learning_rate": 1.5359288957671386e-05, + "loss": 0.4885, + "step": 23375 + }, + { + "epoch": 0.6418451400329489, + "grad_norm": 0.37958207726478577, + "learning_rate": 1.5358924320995782e-05, + "loss": 0.5243, + "step": 23376 + }, + { + "epoch": 0.6418725974739155, + "grad_norm": 0.39876991510391235, + "learning_rate": 1.5358559674324075e-05, + "loss": 0.5949, + "step": 23377 + }, + { + "epoch": 0.6419000549148819, + "grad_norm": 0.35154786705970764, + "learning_rate": 1.5358195017656938e-05, + "loss": 0.5289, + "step": 23378 + }, + { + "epoch": 0.6419275123558484, + "grad_norm": 0.3734380602836609, + "learning_rate": 1.5357830350995064e-05, + "loss": 0.5213, + "step": 23379 + }, + { + "epoch": 0.641954969796815, + "grad_norm": 0.3651007413864136, + "learning_rate": 1.5357465674339125e-05, + "loss": 0.4817, + "step": 23380 + }, + { + "epoch": 0.6419824272377814, + "grad_norm": 0.36430445313453674, + "learning_rate": 1.5357100987689798e-05, + "loss": 0.551, + "step": 23381 + }, + { + "epoch": 0.642009884678748, + "grad_norm": 0.3833302855491638, + "learning_rate": 1.5356736291047775e-05, + "loss": 0.6227, + "step": 23382 + }, + { + "epoch": 0.6420373421197144, + "grad_norm": 0.45691436529159546, + "learning_rate": 1.5356371584413724e-05, + "loss": 0.4588, + "step": 23383 + }, + { + "epoch": 0.642064799560681, + "grad_norm": 0.40323683619499207, + "learning_rate": 1.535600686778833e-05, + "loss": 0.5494, + "step": 23384 + }, + { + "epoch": 0.6420922570016474, + "grad_norm": 0.41318202018737793, + "learning_rate": 1.535564214117228e-05, + "loss": 0.4458, + "step": 23385 + }, + { + "epoch": 0.642119714442614, + "grad_norm": 1.0183138847351074, + "learning_rate": 1.535527740456624e-05, + "loss": 0.7262, + "step": 23386 + }, + { + "epoch": 0.6421471718835805, + "grad_norm": 0.3578435182571411, + "learning_rate": 1.5354912657970905e-05, + "loss": 0.3619, + "step": 23387 + }, + { + "epoch": 0.6421746293245469, + "grad_norm": 0.427310973405838, + "learning_rate": 1.535454790138695e-05, + "loss": 0.4965, + "step": 23388 + }, + { + "epoch": 0.6422020867655135, + "grad_norm": 0.37347230315208435, + "learning_rate": 1.5354183134815044e-05, + "loss": 0.4701, + "step": 23389 + }, + { + "epoch": 0.6422295442064799, + "grad_norm": 0.3918861746788025, + "learning_rate": 1.5353818358255886e-05, + "loss": 0.4918, + "step": 23390 + }, + { + "epoch": 0.6422570016474465, + "grad_norm": 0.37887585163116455, + "learning_rate": 1.5353453571710146e-05, + "loss": 0.5082, + "step": 23391 + }, + { + "epoch": 0.6422844590884129, + "grad_norm": 0.38466185331344604, + "learning_rate": 1.5353088775178506e-05, + "loss": 0.4603, + "step": 23392 + }, + { + "epoch": 0.6423119165293795, + "grad_norm": 0.3974919617176056, + "learning_rate": 1.535272396866165e-05, + "loss": 0.4675, + "step": 23393 + }, + { + "epoch": 0.642339373970346, + "grad_norm": 0.3649398684501648, + "learning_rate": 1.535235915216025e-05, + "loss": 0.4737, + "step": 23394 + }, + { + "epoch": 0.6423668314113125, + "grad_norm": 0.37644436955451965, + "learning_rate": 1.5351994325674995e-05, + "loss": 0.5591, + "step": 23395 + }, + { + "epoch": 0.642394288852279, + "grad_norm": 0.41149741411209106, + "learning_rate": 1.5351629489206562e-05, + "loss": 0.5762, + "step": 23396 + }, + { + "epoch": 0.6424217462932454, + "grad_norm": 0.3463708162307739, + "learning_rate": 1.535126464275563e-05, + "loss": 0.5038, + "step": 23397 + }, + { + "epoch": 0.642449203734212, + "grad_norm": 0.4213593602180481, + "learning_rate": 1.5350899786322887e-05, + "loss": 0.5895, + "step": 23398 + }, + { + "epoch": 0.6424766611751784, + "grad_norm": 0.3961278796195984, + "learning_rate": 1.5350534919909e-05, + "loss": 0.4481, + "step": 23399 + }, + { + "epoch": 0.642504118616145, + "grad_norm": 0.4040287733078003, + "learning_rate": 1.5350170043514666e-05, + "loss": 0.4712, + "step": 23400 + }, + { + "epoch": 0.6425315760571115, + "grad_norm": 0.4180543124675751, + "learning_rate": 1.5349805157140553e-05, + "loss": 0.5114, + "step": 23401 + }, + { + "epoch": 0.642559033498078, + "grad_norm": 0.3451293408870697, + "learning_rate": 1.534944026078735e-05, + "loss": 0.558, + "step": 23402 + }, + { + "epoch": 0.6425864909390445, + "grad_norm": 0.38470321893692017, + "learning_rate": 1.5349075354455726e-05, + "loss": 0.5177, + "step": 23403 + }, + { + "epoch": 0.642613948380011, + "grad_norm": 0.48813965916633606, + "learning_rate": 1.5348710438146374e-05, + "loss": 0.6072, + "step": 23404 + }, + { + "epoch": 0.6426414058209775, + "grad_norm": 0.4181462824344635, + "learning_rate": 1.534834551185997e-05, + "loss": 0.4878, + "step": 23405 + }, + { + "epoch": 0.642668863261944, + "grad_norm": 0.41156619787216187, + "learning_rate": 1.5347980575597192e-05, + "loss": 0.5549, + "step": 23406 + }, + { + "epoch": 0.6426963207029105, + "grad_norm": 0.39910459518432617, + "learning_rate": 1.5347615629358724e-05, + "loss": 0.421, + "step": 23407 + }, + { + "epoch": 0.642723778143877, + "grad_norm": 0.39041510224342346, + "learning_rate": 1.534725067314525e-05, + "loss": 0.5107, + "step": 23408 + }, + { + "epoch": 0.6427512355848435, + "grad_norm": 0.4237847328186035, + "learning_rate": 1.534688570695744e-05, + "loss": 0.5484, + "step": 23409 + }, + { + "epoch": 0.64277869302581, + "grad_norm": 0.44051671028137207, + "learning_rate": 1.534652073079599e-05, + "loss": 0.5424, + "step": 23410 + }, + { + "epoch": 0.6428061504667765, + "grad_norm": 0.38985711336135864, + "learning_rate": 1.5346155744661568e-05, + "loss": 0.4311, + "step": 23411 + }, + { + "epoch": 0.642833607907743, + "grad_norm": 0.4444248080253601, + "learning_rate": 1.5345790748554858e-05, + "loss": 0.5223, + "step": 23412 + }, + { + "epoch": 0.6428610653487095, + "grad_norm": 0.3561208248138428, + "learning_rate": 1.5345425742476544e-05, + "loss": 0.4896, + "step": 23413 + }, + { + "epoch": 0.642888522789676, + "grad_norm": 0.4085029661655426, + "learning_rate": 1.5345060726427304e-05, + "loss": 0.5492, + "step": 23414 + }, + { + "epoch": 0.6429159802306426, + "grad_norm": 0.41916653513908386, + "learning_rate": 1.5344695700407823e-05, + "loss": 0.4921, + "step": 23415 + }, + { + "epoch": 0.642943437671609, + "grad_norm": 0.3685733377933502, + "learning_rate": 1.5344330664418773e-05, + "loss": 0.5424, + "step": 23416 + }, + { + "epoch": 0.6429708951125755, + "grad_norm": 0.3824084997177124, + "learning_rate": 1.5343965618460844e-05, + "loss": 0.5628, + "step": 23417 + }, + { + "epoch": 0.642998352553542, + "grad_norm": 0.38640689849853516, + "learning_rate": 1.5343600562534712e-05, + "loss": 0.5449, + "step": 23418 + }, + { + "epoch": 0.6430258099945085, + "grad_norm": 0.42851170897483826, + "learning_rate": 1.5343235496641063e-05, + "loss": 0.5096, + "step": 23419 + }, + { + "epoch": 0.643053267435475, + "grad_norm": 0.3772341310977936, + "learning_rate": 1.534287042078057e-05, + "loss": 0.4843, + "step": 23420 + }, + { + "epoch": 0.6430807248764415, + "grad_norm": 0.3637911379337311, + "learning_rate": 1.5342505334953922e-05, + "loss": 0.4638, + "step": 23421 + }, + { + "epoch": 0.6431081823174081, + "grad_norm": 0.3767666220664978, + "learning_rate": 1.5342140239161795e-05, + "loss": 0.5778, + "step": 23422 + }, + { + "epoch": 0.6431356397583745, + "grad_norm": 0.42338645458221436, + "learning_rate": 1.534177513340487e-05, + "loss": 0.4927, + "step": 23423 + }, + { + "epoch": 0.6431630971993411, + "grad_norm": 0.39665886759757996, + "learning_rate": 1.5341410017683835e-05, + "loss": 0.4541, + "step": 23424 + }, + { + "epoch": 0.6431905546403075, + "grad_norm": 0.40056750178337097, + "learning_rate": 1.534104489199936e-05, + "loss": 0.4615, + "step": 23425 + }, + { + "epoch": 0.643218012081274, + "grad_norm": 0.3307589888572693, + "learning_rate": 1.534067975635213e-05, + "loss": 0.4521, + "step": 23426 + }, + { + "epoch": 0.6432454695222405, + "grad_norm": 0.42305514216423035, + "learning_rate": 1.534031461074283e-05, + "loss": 0.6592, + "step": 23427 + }, + { + "epoch": 0.643272926963207, + "grad_norm": 0.482235312461853, + "learning_rate": 1.533994945517214e-05, + "loss": 0.5604, + "step": 23428 + }, + { + "epoch": 0.6433003844041736, + "grad_norm": 0.3907907009124756, + "learning_rate": 1.5339584289640742e-05, + "loss": 0.5331, + "step": 23429 + }, + { + "epoch": 0.64332784184514, + "grad_norm": 0.40320682525634766, + "learning_rate": 1.533921911414931e-05, + "loss": 0.5749, + "step": 23430 + }, + { + "epoch": 0.6433552992861066, + "grad_norm": 0.3696143925189972, + "learning_rate": 1.5338853928698532e-05, + "loss": 0.5264, + "step": 23431 + }, + { + "epoch": 0.643382756727073, + "grad_norm": 0.3319754898548126, + "learning_rate": 1.5338488733289093e-05, + "loss": 0.496, + "step": 23432 + }, + { + "epoch": 0.6434102141680396, + "grad_norm": 0.4220735430717468, + "learning_rate": 1.533812352792166e-05, + "loss": 0.5118, + "step": 23433 + }, + { + "epoch": 0.643437671609006, + "grad_norm": 0.3343045115470886, + "learning_rate": 1.5337758312596926e-05, + "loss": 0.4384, + "step": 23434 + }, + { + "epoch": 0.6434651290499726, + "grad_norm": 0.3994324505329132, + "learning_rate": 1.5337393087315572e-05, + "loss": 0.4666, + "step": 23435 + }, + { + "epoch": 0.643492586490939, + "grad_norm": 0.34487485885620117, + "learning_rate": 1.533702785207827e-05, + "loss": 0.4992, + "step": 23436 + }, + { + "epoch": 0.6435200439319055, + "grad_norm": 0.4174627661705017, + "learning_rate": 1.5336662606885715e-05, + "loss": 0.4817, + "step": 23437 + }, + { + "epoch": 0.6435475013728721, + "grad_norm": 0.37813815474510193, + "learning_rate": 1.533629735173858e-05, + "loss": 0.4715, + "step": 23438 + }, + { + "epoch": 0.6435749588138385, + "grad_norm": 0.38937243819236755, + "learning_rate": 1.5335932086637543e-05, + "loss": 0.5953, + "step": 23439 + }, + { + "epoch": 0.6436024162548051, + "grad_norm": 0.37922218441963196, + "learning_rate": 1.5335566811583293e-05, + "loss": 0.5448, + "step": 23440 + }, + { + "epoch": 0.6436298736957715, + "grad_norm": 0.41336745023727417, + "learning_rate": 1.5335201526576505e-05, + "loss": 0.5324, + "step": 23441 + }, + { + "epoch": 0.6436573311367381, + "grad_norm": 0.3581755459308624, + "learning_rate": 1.5334836231617866e-05, + "loss": 0.5366, + "step": 23442 + }, + { + "epoch": 0.6436847885777045, + "grad_norm": 0.5423375368118286, + "learning_rate": 1.5334470926708056e-05, + "loss": 0.5196, + "step": 23443 + }, + { + "epoch": 0.643712246018671, + "grad_norm": 0.3933834433555603, + "learning_rate": 1.533410561184775e-05, + "loss": 0.5565, + "step": 23444 + }, + { + "epoch": 0.6437397034596376, + "grad_norm": 0.45108407735824585, + "learning_rate": 1.533374028703764e-05, + "loss": 0.4619, + "step": 23445 + }, + { + "epoch": 0.643767160900604, + "grad_norm": 0.46122512221336365, + "learning_rate": 1.53333749522784e-05, + "loss": 0.5551, + "step": 23446 + }, + { + "epoch": 0.6437946183415706, + "grad_norm": 0.379111647605896, + "learning_rate": 1.5333009607570714e-05, + "loss": 0.4998, + "step": 23447 + }, + { + "epoch": 0.643822075782537, + "grad_norm": 0.4193352162837982, + "learning_rate": 1.533264425291526e-05, + "loss": 0.5244, + "step": 23448 + }, + { + "epoch": 0.6438495332235036, + "grad_norm": 0.3484514057636261, + "learning_rate": 1.5332278888312727e-05, + "loss": 0.4978, + "step": 23449 + }, + { + "epoch": 0.64387699066447, + "grad_norm": 12.829379081726074, + "learning_rate": 1.533191351376379e-05, + "loss": 0.5269, + "step": 23450 + }, + { + "epoch": 0.6439044481054366, + "grad_norm": 0.5067495703697205, + "learning_rate": 1.5331548129269132e-05, + "loss": 0.5427, + "step": 23451 + }, + { + "epoch": 0.6439319055464031, + "grad_norm": 0.37953925132751465, + "learning_rate": 1.5331182734829434e-05, + "loss": 0.5638, + "step": 23452 + }, + { + "epoch": 0.6439593629873696, + "grad_norm": 0.41366273164749146, + "learning_rate": 1.5330817330445376e-05, + "loss": 0.6001, + "step": 23453 + }, + { + "epoch": 0.6439868204283361, + "grad_norm": 0.36196261644363403, + "learning_rate": 1.533045191611765e-05, + "loss": 0.5917, + "step": 23454 + }, + { + "epoch": 0.6440142778693025, + "grad_norm": 0.3850476145744324, + "learning_rate": 1.5330086491846926e-05, + "loss": 0.5483, + "step": 23455 + }, + { + "epoch": 0.6440417353102691, + "grad_norm": 0.5892282128334045, + "learning_rate": 1.5329721057633888e-05, + "loss": 0.4598, + "step": 23456 + }, + { + "epoch": 0.6440691927512355, + "grad_norm": 0.37163785099983215, + "learning_rate": 1.532935561347922e-05, + "loss": 0.5082, + "step": 23457 + }, + { + "epoch": 0.6440966501922021, + "grad_norm": 0.3671664893627167, + "learning_rate": 1.5328990159383606e-05, + "loss": 0.5788, + "step": 23458 + }, + { + "epoch": 0.6441241076331686, + "grad_norm": 0.3575930595397949, + "learning_rate": 1.5328624695347724e-05, + "loss": 0.5483, + "step": 23459 + }, + { + "epoch": 0.6441515650741351, + "grad_norm": 0.34670600295066833, + "learning_rate": 1.5328259221372255e-05, + "loss": 0.4266, + "step": 23460 + }, + { + "epoch": 0.6441790225151016, + "grad_norm": 0.4417199492454529, + "learning_rate": 1.532789373745788e-05, + "loss": 0.5111, + "step": 23461 + }, + { + "epoch": 0.6442064799560681, + "grad_norm": 0.43500369787216187, + "learning_rate": 1.532752824360528e-05, + "loss": 0.4647, + "step": 23462 + }, + { + "epoch": 0.6442339373970346, + "grad_norm": 0.4041222035884857, + "learning_rate": 1.5327162739815148e-05, + "loss": 0.436, + "step": 23463 + }, + { + "epoch": 0.644261394838001, + "grad_norm": 0.39275363087654114, + "learning_rate": 1.5326797226088153e-05, + "loss": 0.5491, + "step": 23464 + }, + { + "epoch": 0.6442888522789676, + "grad_norm": 0.46627873182296753, + "learning_rate": 1.5326431702424983e-05, + "loss": 0.6491, + "step": 23465 + }, + { + "epoch": 0.6443163097199341, + "grad_norm": 0.42550012469291687, + "learning_rate": 1.5326066168826313e-05, + "loss": 0.4602, + "step": 23466 + }, + { + "epoch": 0.6443437671609006, + "grad_norm": 0.38763904571533203, + "learning_rate": 1.5325700625292835e-05, + "loss": 0.5258, + "step": 23467 + }, + { + "epoch": 0.6443712246018671, + "grad_norm": 0.39235323667526245, + "learning_rate": 1.5325335071825222e-05, + "loss": 0.4593, + "step": 23468 + }, + { + "epoch": 0.6443986820428336, + "grad_norm": 0.37306147813796997, + "learning_rate": 1.5324969508424162e-05, + "loss": 0.4959, + "step": 23469 + }, + { + "epoch": 0.6444261394838001, + "grad_norm": 0.5764407515525818, + "learning_rate": 1.532460393509033e-05, + "loss": 0.5499, + "step": 23470 + }, + { + "epoch": 0.6444535969247666, + "grad_norm": 0.38954341411590576, + "learning_rate": 1.5324238351824417e-05, + "loss": 0.5589, + "step": 23471 + }, + { + "epoch": 0.6444810543657331, + "grad_norm": 0.4033588171005249, + "learning_rate": 1.5323872758627096e-05, + "loss": 0.5192, + "step": 23472 + }, + { + "epoch": 0.6445085118066997, + "grad_norm": 0.3980705440044403, + "learning_rate": 1.5323507155499056e-05, + "loss": 0.5458, + "step": 23473 + }, + { + "epoch": 0.6445359692476661, + "grad_norm": 0.3896021544933319, + "learning_rate": 1.5323141542440975e-05, + "loss": 0.5408, + "step": 23474 + }, + { + "epoch": 0.6445634266886326, + "grad_norm": 0.37329742312431335, + "learning_rate": 1.532277591945354e-05, + "loss": 0.5197, + "step": 23475 + }, + { + "epoch": 0.6445908841295991, + "grad_norm": 0.3618464171886444, + "learning_rate": 1.5322410286537424e-05, + "loss": 0.5965, + "step": 23476 + }, + { + "epoch": 0.6446183415705656, + "grad_norm": 0.3850589692592621, + "learning_rate": 1.5322044643693317e-05, + "loss": 0.4987, + "step": 23477 + }, + { + "epoch": 0.6446457990115321, + "grad_norm": 0.440790057182312, + "learning_rate": 1.5321678990921896e-05, + "loss": 0.5097, + "step": 23478 + }, + { + "epoch": 0.6446732564524986, + "grad_norm": 0.4229966104030609, + "learning_rate": 1.5321313328223845e-05, + "loss": 0.4775, + "step": 23479 + }, + { + "epoch": 0.6447007138934652, + "grad_norm": 0.42557135224342346, + "learning_rate": 1.5320947655599848e-05, + "loss": 0.4439, + "step": 23480 + }, + { + "epoch": 0.6447281713344316, + "grad_norm": 0.43500199913978577, + "learning_rate": 1.5320581973050587e-05, + "loss": 0.5613, + "step": 23481 + }, + { + "epoch": 0.6447556287753982, + "grad_norm": 0.3703666925430298, + "learning_rate": 1.532021628057674e-05, + "loss": 0.5468, + "step": 23482 + }, + { + "epoch": 0.6447830862163646, + "grad_norm": 0.35344669222831726, + "learning_rate": 1.5319850578178993e-05, + "loss": 0.5594, + "step": 23483 + }, + { + "epoch": 0.6448105436573311, + "grad_norm": 0.4030556082725525, + "learning_rate": 1.531948486585802e-05, + "loss": 0.4948, + "step": 23484 + }, + { + "epoch": 0.6448380010982976, + "grad_norm": 0.41496706008911133, + "learning_rate": 1.531911914361452e-05, + "loss": 0.4592, + "step": 23485 + }, + { + "epoch": 0.6448654585392641, + "grad_norm": 0.4118815064430237, + "learning_rate": 1.5318753411449162e-05, + "loss": 0.5553, + "step": 23486 + }, + { + "epoch": 0.6448929159802307, + "grad_norm": 0.41664260625839233, + "learning_rate": 1.5318387669362626e-05, + "loss": 0.5509, + "step": 23487 + }, + { + "epoch": 0.6449203734211971, + "grad_norm": 0.3605995774269104, + "learning_rate": 1.5318021917355607e-05, + "loss": 0.4357, + "step": 23488 + }, + { + "epoch": 0.6449478308621637, + "grad_norm": 0.3561449646949768, + "learning_rate": 1.5317656155428774e-05, + "loss": 0.5357, + "step": 23489 + }, + { + "epoch": 0.6449752883031301, + "grad_norm": 0.37047451734542847, + "learning_rate": 1.531729038358282e-05, + "loss": 0.557, + "step": 23490 + }, + { + "epoch": 0.6450027457440967, + "grad_norm": 0.36255019903182983, + "learning_rate": 1.5316924601818418e-05, + "loss": 0.4998, + "step": 23491 + }, + { + "epoch": 0.6450302031850631, + "grad_norm": 0.37985020875930786, + "learning_rate": 1.5316558810136257e-05, + "loss": 0.5254, + "step": 23492 + }, + { + "epoch": 0.6450576606260296, + "grad_norm": 0.385601282119751, + "learning_rate": 1.5316193008537016e-05, + "loss": 0.4194, + "step": 23493 + }, + { + "epoch": 0.6450851180669962, + "grad_norm": 0.39671579003334045, + "learning_rate": 1.531582719702138e-05, + "loss": 0.5085, + "step": 23494 + }, + { + "epoch": 0.6451125755079626, + "grad_norm": 0.46844983100891113, + "learning_rate": 1.5315461375590027e-05, + "loss": 0.5417, + "step": 23495 + }, + { + "epoch": 0.6451400329489292, + "grad_norm": 0.42948397994041443, + "learning_rate": 1.5315095544243644e-05, + "loss": 0.5737, + "step": 23496 + }, + { + "epoch": 0.6451674903898956, + "grad_norm": 0.3544919490814209, + "learning_rate": 1.5314729702982913e-05, + "loss": 0.515, + "step": 23497 + }, + { + "epoch": 0.6451949478308622, + "grad_norm": 0.4292404353618622, + "learning_rate": 1.531436385180851e-05, + "loss": 0.4282, + "step": 23498 + }, + { + "epoch": 0.6452224052718286, + "grad_norm": 0.48635348677635193, + "learning_rate": 1.5313997990721124e-05, + "loss": 0.5488, + "step": 23499 + }, + { + "epoch": 0.6452498627127952, + "grad_norm": 0.5099563002586365, + "learning_rate": 1.531363211972144e-05, + "loss": 0.5219, + "step": 23500 + }, + { + "epoch": 0.6452773201537617, + "grad_norm": 0.4011765718460083, + "learning_rate": 1.5313266238810133e-05, + "loss": 0.4044, + "step": 23501 + }, + { + "epoch": 0.6453047775947282, + "grad_norm": 0.3679286241531372, + "learning_rate": 1.5312900347987888e-05, + "loss": 0.4808, + "step": 23502 + }, + { + "epoch": 0.6453322350356947, + "grad_norm": 0.45455726981163025, + "learning_rate": 1.5312534447255388e-05, + "loss": 0.4893, + "step": 23503 + }, + { + "epoch": 0.6453596924766611, + "grad_norm": 0.37145766615867615, + "learning_rate": 1.5312168536613317e-05, + "loss": 0.5494, + "step": 23504 + }, + { + "epoch": 0.6453871499176277, + "grad_norm": 2.3793447017669678, + "learning_rate": 1.5311802616062353e-05, + "loss": 0.4225, + "step": 23505 + }, + { + "epoch": 0.6454146073585941, + "grad_norm": 0.36410728096961975, + "learning_rate": 1.5311436685603188e-05, + "loss": 0.4717, + "step": 23506 + }, + { + "epoch": 0.6454420647995607, + "grad_norm": 0.3831580877304077, + "learning_rate": 1.531107074523649e-05, + "loss": 0.5296, + "step": 23507 + }, + { + "epoch": 0.6454695222405272, + "grad_norm": 0.4357326626777649, + "learning_rate": 1.531070479496296e-05, + "loss": 0.5425, + "step": 23508 + }, + { + "epoch": 0.6454969796814937, + "grad_norm": 0.4528629779815674, + "learning_rate": 1.5310338834783264e-05, + "loss": 0.5998, + "step": 23509 + }, + { + "epoch": 0.6455244371224602, + "grad_norm": 0.3355613648891449, + "learning_rate": 1.530997286469809e-05, + "loss": 0.4602, + "step": 23510 + }, + { + "epoch": 0.6455518945634267, + "grad_norm": 0.4505445957183838, + "learning_rate": 1.530960688470813e-05, + "loss": 0.4825, + "step": 23511 + }, + { + "epoch": 0.6455793520043932, + "grad_norm": 0.356999009847641, + "learning_rate": 1.5309240894814047e-05, + "loss": 0.4416, + "step": 23512 + }, + { + "epoch": 0.6456068094453596, + "grad_norm": 0.35697898268699646, + "learning_rate": 1.5308874895016544e-05, + "loss": 0.5472, + "step": 23513 + }, + { + "epoch": 0.6456342668863262, + "grad_norm": 0.35243189334869385, + "learning_rate": 1.530850888531629e-05, + "loss": 0.5401, + "step": 23514 + }, + { + "epoch": 0.6456617243272927, + "grad_norm": 0.40839242935180664, + "learning_rate": 1.5308142865713976e-05, + "loss": 0.4558, + "step": 23515 + }, + { + "epoch": 0.6456891817682592, + "grad_norm": 0.9486708045005798, + "learning_rate": 1.5307776836210282e-05, + "loss": 0.4795, + "step": 23516 + }, + { + "epoch": 0.6457166392092257, + "grad_norm": 0.37303799390792847, + "learning_rate": 1.5307410796805888e-05, + "loss": 0.4964, + "step": 23517 + }, + { + "epoch": 0.6457440966501922, + "grad_norm": 0.4153828024864197, + "learning_rate": 1.5307044747501478e-05, + "loss": 0.5379, + "step": 23518 + }, + { + "epoch": 0.6457715540911587, + "grad_norm": 0.38188737630844116, + "learning_rate": 1.5306678688297736e-05, + "loss": 0.4485, + "step": 23519 + }, + { + "epoch": 0.6457990115321252, + "grad_norm": 0.43084800243377686, + "learning_rate": 1.5306312619195347e-05, + "loss": 0.4929, + "step": 23520 + }, + { + "epoch": 0.6458264689730917, + "grad_norm": 0.355805367231369, + "learning_rate": 1.5305946540194992e-05, + "loss": 0.4897, + "step": 23521 + }, + { + "epoch": 0.6458539264140583, + "grad_norm": 0.39379727840423584, + "learning_rate": 1.530558045129735e-05, + "loss": 0.4787, + "step": 23522 + }, + { + "epoch": 0.6458813838550247, + "grad_norm": 0.37184977531433105, + "learning_rate": 1.5305214352503107e-05, + "loss": 0.52, + "step": 23523 + }, + { + "epoch": 0.6459088412959912, + "grad_norm": 0.409349650144577, + "learning_rate": 1.530484824381295e-05, + "loss": 0.4183, + "step": 23524 + }, + { + "epoch": 0.6459362987369577, + "grad_norm": 0.37581634521484375, + "learning_rate": 1.5304482125227553e-05, + "loss": 0.4365, + "step": 23525 + }, + { + "epoch": 0.6459637561779242, + "grad_norm": 0.3423929512500763, + "learning_rate": 1.5304115996747605e-05, + "loss": 0.4359, + "step": 23526 + }, + { + "epoch": 0.6459912136188907, + "grad_norm": 0.33313488960266113, + "learning_rate": 1.530374985837379e-05, + "loss": 0.4322, + "step": 23527 + }, + { + "epoch": 0.6460186710598572, + "grad_norm": 0.45416757464408875, + "learning_rate": 1.5303383710106788e-05, + "loss": 0.5631, + "step": 23528 + }, + { + "epoch": 0.6460461285008238, + "grad_norm": 0.37279438972473145, + "learning_rate": 1.530301755194728e-05, + "loss": 0.5514, + "step": 23529 + }, + { + "epoch": 0.6460735859417902, + "grad_norm": 0.36055248975753784, + "learning_rate": 1.5302651383895953e-05, + "loss": 0.4654, + "step": 23530 + }, + { + "epoch": 0.6461010433827568, + "grad_norm": 0.44470393657684326, + "learning_rate": 1.5302285205953493e-05, + "loss": 0.4878, + "step": 23531 + }, + { + "epoch": 0.6461285008237232, + "grad_norm": 0.3753061890602112, + "learning_rate": 1.5301919018120574e-05, + "loss": 0.451, + "step": 23532 + }, + { + "epoch": 0.6461559582646897, + "grad_norm": 0.35062187910079956, + "learning_rate": 1.5301552820397886e-05, + "loss": 0.4896, + "step": 23533 + }, + { + "epoch": 0.6461834157056562, + "grad_norm": 0.37730610370635986, + "learning_rate": 1.530118661278611e-05, + "loss": 0.4544, + "step": 23534 + }, + { + "epoch": 0.6462108731466227, + "grad_norm": 0.32008129358291626, + "learning_rate": 1.530082039528593e-05, + "loss": 0.383, + "step": 23535 + }, + { + "epoch": 0.6462383305875893, + "grad_norm": 0.3896711766719818, + "learning_rate": 1.5300454167898025e-05, + "loss": 0.4562, + "step": 23536 + }, + { + "epoch": 0.6462657880285557, + "grad_norm": 0.37368258833885193, + "learning_rate": 1.5300087930623085e-05, + "loss": 0.5374, + "step": 23537 + }, + { + "epoch": 0.6462932454695223, + "grad_norm": 0.38598549365997314, + "learning_rate": 1.5299721683461787e-05, + "loss": 0.54, + "step": 23538 + }, + { + "epoch": 0.6463207029104887, + "grad_norm": 0.3561011254787445, + "learning_rate": 1.5299355426414815e-05, + "loss": 0.3945, + "step": 23539 + }, + { + "epoch": 0.6463481603514553, + "grad_norm": 0.40940478444099426, + "learning_rate": 1.5298989159482857e-05, + "loss": 0.5368, + "step": 23540 + }, + { + "epoch": 0.6463756177924217, + "grad_norm": 0.39513206481933594, + "learning_rate": 1.529862288266659e-05, + "loss": 0.5542, + "step": 23541 + }, + { + "epoch": 0.6464030752333882, + "grad_norm": 0.3888480067253113, + "learning_rate": 1.52982565959667e-05, + "loss": 0.5063, + "step": 23542 + }, + { + "epoch": 0.6464305326743548, + "grad_norm": 0.3695431053638458, + "learning_rate": 1.5297890299383874e-05, + "loss": 0.5022, + "step": 23543 + }, + { + "epoch": 0.6464579901153212, + "grad_norm": 0.38698798418045044, + "learning_rate": 1.529752399291879e-05, + "loss": 0.5304, + "step": 23544 + }, + { + "epoch": 0.6464854475562878, + "grad_norm": 0.4107344448566437, + "learning_rate": 1.5297157676572133e-05, + "loss": 0.5476, + "step": 23545 + }, + { + "epoch": 0.6465129049972542, + "grad_norm": 0.37237295508384705, + "learning_rate": 1.5296791350344585e-05, + "loss": 0.4807, + "step": 23546 + }, + { + "epoch": 0.6465403624382208, + "grad_norm": 0.3852366805076599, + "learning_rate": 1.5296425014236835e-05, + "loss": 0.6294, + "step": 23547 + }, + { + "epoch": 0.6465678198791872, + "grad_norm": 0.3964526653289795, + "learning_rate": 1.5296058668249557e-05, + "loss": 0.502, + "step": 23548 + }, + { + "epoch": 0.6465952773201538, + "grad_norm": 0.4374866187572479, + "learning_rate": 1.529569231238344e-05, + "loss": 0.5977, + "step": 23549 + }, + { + "epoch": 0.6466227347611203, + "grad_norm": 0.42083367705345154, + "learning_rate": 1.529532594663917e-05, + "loss": 0.5841, + "step": 23550 + }, + { + "epoch": 0.6466501922020867, + "grad_norm": 0.3813053071498871, + "learning_rate": 1.5294959571017422e-05, + "loss": 0.4733, + "step": 23551 + }, + { + "epoch": 0.6466776496430533, + "grad_norm": 0.39929884672164917, + "learning_rate": 1.529459318551889e-05, + "loss": 0.4342, + "step": 23552 + }, + { + "epoch": 0.6467051070840197, + "grad_norm": 0.3981907069683075, + "learning_rate": 1.5294226790144247e-05, + "loss": 0.5007, + "step": 23553 + }, + { + "epoch": 0.6467325645249863, + "grad_norm": 0.3797193765640259, + "learning_rate": 1.5293860384894184e-05, + "loss": 0.5482, + "step": 23554 + }, + { + "epoch": 0.6467600219659527, + "grad_norm": 0.409719318151474, + "learning_rate": 1.5293493969769383e-05, + "loss": 0.5305, + "step": 23555 + }, + { + "epoch": 0.6467874794069193, + "grad_norm": 0.3918571472167969, + "learning_rate": 1.5293127544770522e-05, + "loss": 0.5065, + "step": 23556 + }, + { + "epoch": 0.6468149368478858, + "grad_norm": 0.37884876132011414, + "learning_rate": 1.529276110989829e-05, + "loss": 0.484, + "step": 23557 + }, + { + "epoch": 0.6468423942888523, + "grad_norm": 0.40796470642089844, + "learning_rate": 1.529239466515337e-05, + "loss": 0.566, + "step": 23558 + }, + { + "epoch": 0.6468698517298188, + "grad_norm": 0.3528914153575897, + "learning_rate": 1.5292028210536448e-05, + "loss": 0.5433, + "step": 23559 + }, + { + "epoch": 0.6468973091707853, + "grad_norm": 0.3988187611103058, + "learning_rate": 1.52916617460482e-05, + "loss": 0.5925, + "step": 23560 + }, + { + "epoch": 0.6469247666117518, + "grad_norm": 0.3913251757621765, + "learning_rate": 1.5291295271689318e-05, + "loss": 0.4681, + "step": 23561 + }, + { + "epoch": 0.6469522240527182, + "grad_norm": 0.43245929479599, + "learning_rate": 1.5290928787460476e-05, + "loss": 0.5497, + "step": 23562 + }, + { + "epoch": 0.6469796814936848, + "grad_norm": 0.4246596097946167, + "learning_rate": 1.529056229336237e-05, + "loss": 0.5015, + "step": 23563 + }, + { + "epoch": 0.6470071389346513, + "grad_norm": 0.49690550565719604, + "learning_rate": 1.529019578939567e-05, + "loss": 0.4511, + "step": 23564 + }, + { + "epoch": 0.6470345963756178, + "grad_norm": 0.3775213658809662, + "learning_rate": 1.528982927556107e-05, + "loss": 0.477, + "step": 23565 + }, + { + "epoch": 0.6470620538165843, + "grad_norm": 0.36722591519355774, + "learning_rate": 1.528946275185925e-05, + "loss": 0.5042, + "step": 23566 + }, + { + "epoch": 0.6470895112575508, + "grad_norm": 0.378296434879303, + "learning_rate": 1.5289096218290892e-05, + "loss": 0.4759, + "step": 23567 + }, + { + "epoch": 0.6471169686985173, + "grad_norm": 0.34696832299232483, + "learning_rate": 1.5288729674856683e-05, + "loss": 0.4398, + "step": 23568 + }, + { + "epoch": 0.6471444261394838, + "grad_norm": 0.4349323511123657, + "learning_rate": 1.5288363121557305e-05, + "loss": 0.5371, + "step": 23569 + }, + { + "epoch": 0.6471718835804503, + "grad_norm": 0.4229699373245239, + "learning_rate": 1.5287996558393442e-05, + "loss": 0.5069, + "step": 23570 + }, + { + "epoch": 0.6471993410214169, + "grad_norm": 0.3885268568992615, + "learning_rate": 1.528762998536578e-05, + "loss": 0.4693, + "step": 23571 + }, + { + "epoch": 0.6472267984623833, + "grad_norm": 0.38908064365386963, + "learning_rate": 1.5287263402474993e-05, + "loss": 0.529, + "step": 23572 + }, + { + "epoch": 0.6472542559033498, + "grad_norm": 0.38693487644195557, + "learning_rate": 1.528689680972178e-05, + "loss": 0.4925, + "step": 23573 + }, + { + "epoch": 0.6472817133443163, + "grad_norm": 0.3703179359436035, + "learning_rate": 1.5286530207106815e-05, + "loss": 0.5105, + "step": 23574 + }, + { + "epoch": 0.6473091707852828, + "grad_norm": 0.382712185382843, + "learning_rate": 1.528616359463078e-05, + "loss": 0.558, + "step": 23575 + }, + { + "epoch": 0.6473366282262493, + "grad_norm": 0.3714483976364136, + "learning_rate": 1.528579697229437e-05, + "loss": 0.5034, + "step": 23576 + }, + { + "epoch": 0.6473640856672158, + "grad_norm": 0.405723512172699, + "learning_rate": 1.5285430340098256e-05, + "loss": 0.557, + "step": 23577 + }, + { + "epoch": 0.6473915431081824, + "grad_norm": 0.34994447231292725, + "learning_rate": 1.5285063698043127e-05, + "loss": 0.5487, + "step": 23578 + }, + { + "epoch": 0.6474190005491488, + "grad_norm": 0.34603482484817505, + "learning_rate": 1.5284697046129673e-05, + "loss": 0.5536, + "step": 23579 + }, + { + "epoch": 0.6474464579901154, + "grad_norm": 0.3981989026069641, + "learning_rate": 1.5284330384358565e-05, + "loss": 0.5228, + "step": 23580 + }, + { + "epoch": 0.6474739154310818, + "grad_norm": 0.3539542853832245, + "learning_rate": 1.52839637127305e-05, + "loss": 0.5453, + "step": 23581 + }, + { + "epoch": 0.6475013728720483, + "grad_norm": 0.35297611355781555, + "learning_rate": 1.5283597031246153e-05, + "loss": 0.4844, + "step": 23582 + }, + { + "epoch": 0.6475288303130148, + "grad_norm": 0.42301276326179504, + "learning_rate": 1.5283230339906216e-05, + "loss": 0.5968, + "step": 23583 + }, + { + "epoch": 0.6475562877539813, + "grad_norm": 0.3421790897846222, + "learning_rate": 1.5282863638711365e-05, + "loss": 0.455, + "step": 23584 + }, + { + "epoch": 0.6475837451949479, + "grad_norm": 0.40178149938583374, + "learning_rate": 1.5282496927662285e-05, + "loss": 0.413, + "step": 23585 + }, + { + "epoch": 0.6476112026359143, + "grad_norm": 0.401120126247406, + "learning_rate": 1.5282130206759664e-05, + "loss": 0.5078, + "step": 23586 + }, + { + "epoch": 0.6476386600768809, + "grad_norm": 0.3689352869987488, + "learning_rate": 1.5281763476004187e-05, + "loss": 0.5043, + "step": 23587 + }, + { + "epoch": 0.6476661175178473, + "grad_norm": 0.3661883771419525, + "learning_rate": 1.528139673539653e-05, + "loss": 0.4875, + "step": 23588 + }, + { + "epoch": 0.6476935749588139, + "grad_norm": 0.3733847737312317, + "learning_rate": 1.5281029984937386e-05, + "loss": 0.5205, + "step": 23589 + }, + { + "epoch": 0.6477210323997803, + "grad_norm": 0.3995633125305176, + "learning_rate": 1.5280663224627435e-05, + "loss": 0.57, + "step": 23590 + }, + { + "epoch": 0.6477484898407468, + "grad_norm": 0.3817859888076782, + "learning_rate": 1.528029645446736e-05, + "loss": 0.5332, + "step": 23591 + }, + { + "epoch": 0.6477759472817134, + "grad_norm": 0.4123396575450897, + "learning_rate": 1.5279929674457853e-05, + "loss": 0.5235, + "step": 23592 + }, + { + "epoch": 0.6478034047226798, + "grad_norm": 0.3818662464618683, + "learning_rate": 1.5279562884599585e-05, + "loss": 0.4773, + "step": 23593 + }, + { + "epoch": 0.6478308621636464, + "grad_norm": 0.36677008867263794, + "learning_rate": 1.527919608489325e-05, + "loss": 0.4538, + "step": 23594 + }, + { + "epoch": 0.6478583196046128, + "grad_norm": 0.37672337889671326, + "learning_rate": 1.5278829275339528e-05, + "loss": 0.538, + "step": 23595 + }, + { + "epoch": 0.6478857770455794, + "grad_norm": 0.37302064895629883, + "learning_rate": 1.5278462455939105e-05, + "loss": 0.5209, + "step": 23596 + }, + { + "epoch": 0.6479132344865458, + "grad_norm": 0.38945242762565613, + "learning_rate": 1.5278095626692666e-05, + "loss": 0.4835, + "step": 23597 + }, + { + "epoch": 0.6479406919275124, + "grad_norm": 0.4318399429321289, + "learning_rate": 1.5277728787600895e-05, + "loss": 0.4823, + "step": 23598 + }, + { + "epoch": 0.6479681493684789, + "grad_norm": 0.4127531051635742, + "learning_rate": 1.5277361938664474e-05, + "loss": 0.5598, + "step": 23599 + }, + { + "epoch": 0.6479956068094453, + "grad_norm": 0.36781755089759827, + "learning_rate": 1.5276995079884086e-05, + "loss": 0.5412, + "step": 23600 + }, + { + "epoch": 0.6480230642504119, + "grad_norm": 0.4121516942977905, + "learning_rate": 1.5276628211260424e-05, + "loss": 0.487, + "step": 23601 + }, + { + "epoch": 0.6480505216913783, + "grad_norm": 0.38877174258232117, + "learning_rate": 1.527626133279416e-05, + "loss": 0.465, + "step": 23602 + }, + { + "epoch": 0.6480779791323449, + "grad_norm": 0.3705126643180847, + "learning_rate": 1.5275894444485988e-05, + "loss": 0.5002, + "step": 23603 + }, + { + "epoch": 0.6481054365733113, + "grad_norm": 0.37879008054733276, + "learning_rate": 1.5275527546336587e-05, + "loss": 0.5341, + "step": 23604 + }, + { + "epoch": 0.6481328940142779, + "grad_norm": 0.43851184844970703, + "learning_rate": 1.5275160638346648e-05, + "loss": 0.5366, + "step": 23605 + }, + { + "epoch": 0.6481603514552444, + "grad_norm": 0.49791640043258667, + "learning_rate": 1.5274793720516846e-05, + "loss": 0.4442, + "step": 23606 + }, + { + "epoch": 0.6481878088962109, + "grad_norm": 0.4260006844997406, + "learning_rate": 1.527442679284787e-05, + "loss": 0.5403, + "step": 23607 + }, + { + "epoch": 0.6482152663371774, + "grad_norm": 0.4213089346885681, + "learning_rate": 1.5274059855340407e-05, + "loss": 0.4723, + "step": 23608 + }, + { + "epoch": 0.6482427237781438, + "grad_norm": 0.33732354640960693, + "learning_rate": 1.5273692907995136e-05, + "loss": 0.4697, + "step": 23609 + }, + { + "epoch": 0.6482701812191104, + "grad_norm": 0.48739102482795715, + "learning_rate": 1.527332595081275e-05, + "loss": 0.4883, + "step": 23610 + }, + { + "epoch": 0.6482976386600768, + "grad_norm": 0.38530057668685913, + "learning_rate": 1.5272958983793924e-05, + "loss": 0.6107, + "step": 23611 + }, + { + "epoch": 0.6483250961010434, + "grad_norm": 0.3893119990825653, + "learning_rate": 1.527259200693935e-05, + "loss": 0.5648, + "step": 23612 + }, + { + "epoch": 0.6483525535420099, + "grad_norm": 0.41314688324928284, + "learning_rate": 1.5272225020249703e-05, + "loss": 0.548, + "step": 23613 + }, + { + "epoch": 0.6483800109829764, + "grad_norm": 0.36960580945014954, + "learning_rate": 1.5271858023725678e-05, + "loss": 0.5594, + "step": 23614 + }, + { + "epoch": 0.6484074684239429, + "grad_norm": 0.41664743423461914, + "learning_rate": 1.5271491017367956e-05, + "loss": 0.5012, + "step": 23615 + }, + { + "epoch": 0.6484349258649094, + "grad_norm": 0.404263973236084, + "learning_rate": 1.527112400117722e-05, + "loss": 0.56, + "step": 23616 + }, + { + "epoch": 0.6484623833058759, + "grad_norm": 0.40553224086761475, + "learning_rate": 1.5270756975154154e-05, + "loss": 0.5104, + "step": 23617 + }, + { + "epoch": 0.6484898407468423, + "grad_norm": 0.4340897500514984, + "learning_rate": 1.5270389939299445e-05, + "loss": 0.4944, + "step": 23618 + }, + { + "epoch": 0.6485172981878089, + "grad_norm": 0.46582648158073425, + "learning_rate": 1.5270022893613777e-05, + "loss": 0.5584, + "step": 23619 + }, + { + "epoch": 0.6485447556287754, + "grad_norm": 0.36733195185661316, + "learning_rate": 1.5269655838097835e-05, + "loss": 0.5322, + "step": 23620 + }, + { + "epoch": 0.6485722130697419, + "grad_norm": 0.3981301188468933, + "learning_rate": 1.5269288772752298e-05, + "loss": 0.4716, + "step": 23621 + }, + { + "epoch": 0.6485996705107084, + "grad_norm": 0.38421449065208435, + "learning_rate": 1.526892169757786e-05, + "loss": 0.4623, + "step": 23622 + }, + { + "epoch": 0.6486271279516749, + "grad_norm": 0.36929967999458313, + "learning_rate": 1.5268554612575202e-05, + "loss": 0.533, + "step": 23623 + }, + { + "epoch": 0.6486545853926414, + "grad_norm": 0.35250210762023926, + "learning_rate": 1.5268187517745005e-05, + "loss": 0.4835, + "step": 23624 + }, + { + "epoch": 0.6486820428336079, + "grad_norm": 0.3253669738769531, + "learning_rate": 1.5267820413087958e-05, + "loss": 0.402, + "step": 23625 + }, + { + "epoch": 0.6487095002745744, + "grad_norm": 0.3759809732437134, + "learning_rate": 1.5267453298604746e-05, + "loss": 0.6091, + "step": 23626 + }, + { + "epoch": 0.648736957715541, + "grad_norm": 0.3368217647075653, + "learning_rate": 1.5267086174296053e-05, + "loss": 0.4803, + "step": 23627 + }, + { + "epoch": 0.6487644151565074, + "grad_norm": 0.4017060399055481, + "learning_rate": 1.526671904016256e-05, + "loss": 0.5113, + "step": 23628 + }, + { + "epoch": 0.648791872597474, + "grad_norm": 0.377604603767395, + "learning_rate": 1.5266351896204957e-05, + "loss": 0.5298, + "step": 23629 + }, + { + "epoch": 0.6488193300384404, + "grad_norm": 0.5177984833717346, + "learning_rate": 1.526598474242393e-05, + "loss": 0.5153, + "step": 23630 + }, + { + "epoch": 0.6488467874794069, + "grad_norm": 0.4555246829986572, + "learning_rate": 1.5265617578820156e-05, + "loss": 0.4201, + "step": 23631 + }, + { + "epoch": 0.6488742449203734, + "grad_norm": 0.3822295069694519, + "learning_rate": 1.5265250405394327e-05, + "loss": 0.5237, + "step": 23632 + }, + { + "epoch": 0.6489017023613399, + "grad_norm": 0.40762263536453247, + "learning_rate": 1.5264883222147123e-05, + "loss": 0.5813, + "step": 23633 + }, + { + "epoch": 0.6489291598023065, + "grad_norm": 0.4483424425125122, + "learning_rate": 1.5264516029079234e-05, + "loss": 0.4888, + "step": 23634 + }, + { + "epoch": 0.6489566172432729, + "grad_norm": 0.40169626474380493, + "learning_rate": 1.5264148826191342e-05, + "loss": 0.4925, + "step": 23635 + }, + { + "epoch": 0.6489840746842395, + "grad_norm": 0.3659188747406006, + "learning_rate": 1.5263781613484133e-05, + "loss": 0.5582, + "step": 23636 + }, + { + "epoch": 0.6490115321252059, + "grad_norm": 0.389707088470459, + "learning_rate": 1.5263414390958287e-05, + "loss": 0.5273, + "step": 23637 + }, + { + "epoch": 0.6490389895661725, + "grad_norm": 0.38261178135871887, + "learning_rate": 1.52630471586145e-05, + "loss": 0.4237, + "step": 23638 + }, + { + "epoch": 0.6490664470071389, + "grad_norm": 0.39575034379959106, + "learning_rate": 1.5262679916453445e-05, + "loss": 0.5897, + "step": 23639 + }, + { + "epoch": 0.6490939044481054, + "grad_norm": 0.3864888548851013, + "learning_rate": 1.5262312664475816e-05, + "loss": 0.5204, + "step": 23640 + }, + { + "epoch": 0.649121361889072, + "grad_norm": 0.4013170301914215, + "learning_rate": 1.5261945402682292e-05, + "loss": 0.5499, + "step": 23641 + }, + { + "epoch": 0.6491488193300384, + "grad_norm": 0.321617990732193, + "learning_rate": 1.526157813107356e-05, + "loss": 0.5425, + "step": 23642 + }, + { + "epoch": 0.649176276771005, + "grad_norm": 0.3522210419178009, + "learning_rate": 1.5261210849650306e-05, + "loss": 0.509, + "step": 23643 + }, + { + "epoch": 0.6492037342119714, + "grad_norm": 0.3949056565761566, + "learning_rate": 1.5260843558413215e-05, + "loss": 0.5133, + "step": 23644 + }, + { + "epoch": 0.649231191652938, + "grad_norm": 0.3825320899486542, + "learning_rate": 1.5260476257362974e-05, + "loss": 0.579, + "step": 23645 + }, + { + "epoch": 0.6492586490939044, + "grad_norm": 0.3622956871986389, + "learning_rate": 1.5260108946500262e-05, + "loss": 0.5081, + "step": 23646 + }, + { + "epoch": 0.649286106534871, + "grad_norm": 0.3925495743751526, + "learning_rate": 1.5259741625825768e-05, + "loss": 0.4774, + "step": 23647 + }, + { + "epoch": 0.6493135639758375, + "grad_norm": 0.39794591069221497, + "learning_rate": 1.525937429534018e-05, + "loss": 0.4957, + "step": 23648 + }, + { + "epoch": 0.6493410214168039, + "grad_norm": 1.2981231212615967, + "learning_rate": 1.5259006955044177e-05, + "loss": 0.4261, + "step": 23649 + }, + { + "epoch": 0.6493684788577705, + "grad_norm": 0.38356369733810425, + "learning_rate": 1.525863960493845e-05, + "loss": 0.4724, + "step": 23650 + }, + { + "epoch": 0.6493959362987369, + "grad_norm": 0.3979029059410095, + "learning_rate": 1.5258272245023681e-05, + "loss": 0.4162, + "step": 23651 + }, + { + "epoch": 0.6494233937397035, + "grad_norm": 0.4131755232810974, + "learning_rate": 1.5257904875300557e-05, + "loss": 0.4941, + "step": 23652 + }, + { + "epoch": 0.6494508511806699, + "grad_norm": 0.4643997251987457, + "learning_rate": 1.525753749576976e-05, + "loss": 0.4947, + "step": 23653 + }, + { + "epoch": 0.6494783086216365, + "grad_norm": 0.37832266092300415, + "learning_rate": 1.5257170106431982e-05, + "loss": 0.4331, + "step": 23654 + }, + { + "epoch": 0.649505766062603, + "grad_norm": 0.35687947273254395, + "learning_rate": 1.5256802707287898e-05, + "loss": 0.5234, + "step": 23655 + }, + { + "epoch": 0.6495332235035695, + "grad_norm": 0.4457745850086212, + "learning_rate": 1.5256435298338203e-05, + "loss": 0.4855, + "step": 23656 + }, + { + "epoch": 0.649560680944536, + "grad_norm": 0.38329577445983887, + "learning_rate": 1.5256067879583579e-05, + "loss": 0.5083, + "step": 23657 + }, + { + "epoch": 0.6495881383855024, + "grad_norm": 0.34335607290267944, + "learning_rate": 1.525570045102471e-05, + "loss": 0.5203, + "step": 23658 + }, + { + "epoch": 0.649615595826469, + "grad_norm": 0.3806074559688568, + "learning_rate": 1.5255333012662282e-05, + "loss": 0.4702, + "step": 23659 + }, + { + "epoch": 0.6496430532674354, + "grad_norm": 0.3775946795940399, + "learning_rate": 1.525496556449698e-05, + "loss": 0.5069, + "step": 23660 + }, + { + "epoch": 0.649670510708402, + "grad_norm": 0.40823933482170105, + "learning_rate": 1.525459810652949e-05, + "loss": 0.5247, + "step": 23661 + }, + { + "epoch": 0.6496979681493685, + "grad_norm": 0.3791608512401581, + "learning_rate": 1.5254230638760496e-05, + "loss": 0.4472, + "step": 23662 + }, + { + "epoch": 0.649725425590335, + "grad_norm": 0.332027405500412, + "learning_rate": 1.5253863161190687e-05, + "loss": 0.446, + "step": 23663 + }, + { + "epoch": 0.6497528830313015, + "grad_norm": 0.41062307357788086, + "learning_rate": 1.5253495673820746e-05, + "loss": 0.4916, + "step": 23664 + }, + { + "epoch": 0.649780340472268, + "grad_norm": 0.3737700879573822, + "learning_rate": 1.525312817665136e-05, + "loss": 0.5047, + "step": 23665 + }, + { + "epoch": 0.6498077979132345, + "grad_norm": 0.36018651723861694, + "learning_rate": 1.5252760669683211e-05, + "loss": 0.4574, + "step": 23666 + }, + { + "epoch": 0.649835255354201, + "grad_norm": 0.37488311529159546, + "learning_rate": 1.5252393152916992e-05, + "loss": 0.5458, + "step": 23667 + }, + { + "epoch": 0.6498627127951675, + "grad_norm": 0.3431636691093445, + "learning_rate": 1.5252025626353377e-05, + "loss": 0.464, + "step": 23668 + }, + { + "epoch": 0.649890170236134, + "grad_norm": 0.43569302558898926, + "learning_rate": 1.5251658089993061e-05, + "loss": 0.571, + "step": 23669 + }, + { + "epoch": 0.6499176276771005, + "grad_norm": 0.35711148381233215, + "learning_rate": 1.5251290543836725e-05, + "loss": 0.4508, + "step": 23670 + }, + { + "epoch": 0.649945085118067, + "grad_norm": 0.37078607082366943, + "learning_rate": 1.5250922987885057e-05, + "loss": 0.5079, + "step": 23671 + }, + { + "epoch": 0.6499725425590335, + "grad_norm": 0.4647006392478943, + "learning_rate": 1.5250555422138744e-05, + "loss": 0.5282, + "step": 23672 + }, + { + "epoch": 0.65, + "grad_norm": 0.35909104347229004, + "learning_rate": 1.5250187846598465e-05, + "loss": 0.5138, + "step": 23673 + }, + { + "epoch": 0.6500274574409665, + "grad_norm": 0.3401111960411072, + "learning_rate": 1.5249820261264914e-05, + "loss": 0.4618, + "step": 23674 + }, + { + "epoch": 0.650054914881933, + "grad_norm": 0.3741942346096039, + "learning_rate": 1.5249452666138769e-05, + "loss": 0.4879, + "step": 23675 + }, + { + "epoch": 0.6500823723228996, + "grad_norm": 0.39525625109672546, + "learning_rate": 1.5249085061220723e-05, + "loss": 0.5236, + "step": 23676 + }, + { + "epoch": 0.650109829763866, + "grad_norm": 0.4767204821109772, + "learning_rate": 1.5248717446511456e-05, + "loss": 0.6236, + "step": 23677 + }, + { + "epoch": 0.6501372872048325, + "grad_norm": 0.40846049785614014, + "learning_rate": 1.5248349822011657e-05, + "loss": 0.5112, + "step": 23678 + }, + { + "epoch": 0.650164744645799, + "grad_norm": 0.47721487283706665, + "learning_rate": 1.5247982187722008e-05, + "loss": 0.6002, + "step": 23679 + }, + { + "epoch": 0.6501922020867655, + "grad_norm": 0.3992398977279663, + "learning_rate": 1.5247614543643199e-05, + "loss": 0.5045, + "step": 23680 + }, + { + "epoch": 0.650219659527732, + "grad_norm": 0.38624507188796997, + "learning_rate": 1.5247246889775915e-05, + "loss": 0.5211, + "step": 23681 + }, + { + "epoch": 0.6502471169686985, + "grad_norm": 0.3930586576461792, + "learning_rate": 1.5246879226120838e-05, + "loss": 0.503, + "step": 23682 + }, + { + "epoch": 0.6502745744096651, + "grad_norm": 0.400028795003891, + "learning_rate": 1.5246511552678658e-05, + "loss": 0.5429, + "step": 23683 + }, + { + "epoch": 0.6503020318506315, + "grad_norm": 0.4955674707889557, + "learning_rate": 1.5246143869450061e-05, + "loss": 0.4973, + "step": 23684 + }, + { + "epoch": 0.6503294892915981, + "grad_norm": 0.37542468309402466, + "learning_rate": 1.5245776176435731e-05, + "loss": 0.4894, + "step": 23685 + }, + { + "epoch": 0.6503569467325645, + "grad_norm": 0.4325043261051178, + "learning_rate": 1.5245408473636352e-05, + "loss": 0.4855, + "step": 23686 + }, + { + "epoch": 0.650384404173531, + "grad_norm": 0.3471148908138275, + "learning_rate": 1.5245040761052615e-05, + "loss": 0.5137, + "step": 23687 + }, + { + "epoch": 0.6504118616144975, + "grad_norm": 0.4817054867744446, + "learning_rate": 1.5244673038685199e-05, + "loss": 0.5234, + "step": 23688 + }, + { + "epoch": 0.650439319055464, + "grad_norm": 0.38028454780578613, + "learning_rate": 1.5244305306534797e-05, + "loss": 0.4996, + "step": 23689 + }, + { + "epoch": 0.6504667764964306, + "grad_norm": 0.3933710753917694, + "learning_rate": 1.524393756460209e-05, + "loss": 0.5649, + "step": 23690 + }, + { + "epoch": 0.650494233937397, + "grad_norm": 0.39372214674949646, + "learning_rate": 1.5243569812887766e-05, + "loss": 0.5574, + "step": 23691 + }, + { + "epoch": 0.6505216913783636, + "grad_norm": 0.4059258997440338, + "learning_rate": 1.5243202051392513e-05, + "loss": 0.4947, + "step": 23692 + }, + { + "epoch": 0.65054914881933, + "grad_norm": 0.4512738883495331, + "learning_rate": 1.5242834280117013e-05, + "loss": 0.6183, + "step": 23693 + }, + { + "epoch": 0.6505766062602966, + "grad_norm": 0.34508800506591797, + "learning_rate": 1.5242466499061954e-05, + "loss": 0.4836, + "step": 23694 + }, + { + "epoch": 0.650604063701263, + "grad_norm": 0.4077144265174866, + "learning_rate": 1.5242098708228019e-05, + "loss": 0.5812, + "step": 23695 + }, + { + "epoch": 0.6506315211422296, + "grad_norm": 0.4154678285121918, + "learning_rate": 1.5241730907615902e-05, + "loss": 0.5549, + "step": 23696 + }, + { + "epoch": 0.6506589785831961, + "grad_norm": 0.3598564565181732, + "learning_rate": 1.5241363097226282e-05, + "loss": 0.4983, + "step": 23697 + }, + { + "epoch": 0.6506864360241625, + "grad_norm": 0.4105212688446045, + "learning_rate": 1.5240995277059845e-05, + "loss": 0.511, + "step": 23698 + }, + { + "epoch": 0.6507138934651291, + "grad_norm": 0.3683725595474243, + "learning_rate": 1.5240627447117281e-05, + "loss": 0.5487, + "step": 23699 + }, + { + "epoch": 0.6507413509060955, + "grad_norm": 0.45838648080825806, + "learning_rate": 1.5240259607399273e-05, + "loss": 0.578, + "step": 23700 + }, + { + "epoch": 0.6507688083470621, + "grad_norm": 0.39265739917755127, + "learning_rate": 1.5239891757906509e-05, + "loss": 0.5491, + "step": 23701 + }, + { + "epoch": 0.6507962657880285, + "grad_norm": 0.347990483045578, + "learning_rate": 1.5239523898639676e-05, + "loss": 0.4897, + "step": 23702 + }, + { + "epoch": 0.6508237232289951, + "grad_norm": 0.34755003452301025, + "learning_rate": 1.5239156029599457e-05, + "loss": 0.4917, + "step": 23703 + }, + { + "epoch": 0.6508511806699615, + "grad_norm": 0.4398002624511719, + "learning_rate": 1.5238788150786538e-05, + "loss": 0.485, + "step": 23704 + }, + { + "epoch": 0.650878638110928, + "grad_norm": 0.45074462890625, + "learning_rate": 1.523842026220161e-05, + "loss": 0.5316, + "step": 23705 + }, + { + "epoch": 0.6509060955518946, + "grad_norm": 0.35325881838798523, + "learning_rate": 1.5238052363845357e-05, + "loss": 0.4807, + "step": 23706 + }, + { + "epoch": 0.650933552992861, + "grad_norm": 0.3939119577407837, + "learning_rate": 1.5237684455718462e-05, + "loss": 0.4821, + "step": 23707 + }, + { + "epoch": 0.6509610104338276, + "grad_norm": 0.42961618304252625, + "learning_rate": 1.5237316537821616e-05, + "loss": 0.5807, + "step": 23708 + }, + { + "epoch": 0.650988467874794, + "grad_norm": 0.37778645753860474, + "learning_rate": 1.5236948610155502e-05, + "loss": 0.487, + "step": 23709 + }, + { + "epoch": 0.6510159253157606, + "grad_norm": 0.382432758808136, + "learning_rate": 1.5236580672720807e-05, + "loss": 0.4213, + "step": 23710 + }, + { + "epoch": 0.651043382756727, + "grad_norm": 0.35771504044532776, + "learning_rate": 1.5236212725518218e-05, + "loss": 0.4606, + "step": 23711 + }, + { + "epoch": 0.6510708401976936, + "grad_norm": 0.3904881179332733, + "learning_rate": 1.523584476854842e-05, + "loss": 0.5279, + "step": 23712 + }, + { + "epoch": 0.6510982976386601, + "grad_norm": 0.409147173166275, + "learning_rate": 1.52354768018121e-05, + "loss": 0.5617, + "step": 23713 + }, + { + "epoch": 0.6511257550796266, + "grad_norm": 0.40701955556869507, + "learning_rate": 1.523510882530995e-05, + "loss": 0.5721, + "step": 23714 + }, + { + "epoch": 0.6511532125205931, + "grad_norm": 0.35579943656921387, + "learning_rate": 1.5234740839042646e-05, + "loss": 0.4426, + "step": 23715 + }, + { + "epoch": 0.6511806699615595, + "grad_norm": 0.4169216752052307, + "learning_rate": 1.523437284301088e-05, + "loss": 0.583, + "step": 23716 + }, + { + "epoch": 0.6512081274025261, + "grad_norm": 0.3572644293308258, + "learning_rate": 1.5234004837215338e-05, + "loss": 0.5579, + "step": 23717 + }, + { + "epoch": 0.6512355848434925, + "grad_norm": 0.38142678141593933, + "learning_rate": 1.523363682165671e-05, + "loss": 0.5219, + "step": 23718 + }, + { + "epoch": 0.6512630422844591, + "grad_norm": 0.3415701985359192, + "learning_rate": 1.5233268796335677e-05, + "loss": 0.4586, + "step": 23719 + }, + { + "epoch": 0.6512904997254256, + "grad_norm": 0.39159443974494934, + "learning_rate": 1.5232900761252927e-05, + "loss": 0.5128, + "step": 23720 + }, + { + "epoch": 0.6513179571663921, + "grad_norm": 0.3865741193294525, + "learning_rate": 1.5232532716409148e-05, + "loss": 0.5205, + "step": 23721 + }, + { + "epoch": 0.6513454146073586, + "grad_norm": 0.38265615701675415, + "learning_rate": 1.523216466180502e-05, + "loss": 0.5212, + "step": 23722 + }, + { + "epoch": 0.6513728720483251, + "grad_norm": 0.421025812625885, + "learning_rate": 1.5231796597441241e-05, + "loss": 0.5062, + "step": 23723 + }, + { + "epoch": 0.6514003294892916, + "grad_norm": 0.6716886758804321, + "learning_rate": 1.5231428523318488e-05, + "loss": 0.5067, + "step": 23724 + }, + { + "epoch": 0.651427786930258, + "grad_norm": 0.3802092373371124, + "learning_rate": 1.5231060439437455e-05, + "loss": 0.5665, + "step": 23725 + }, + { + "epoch": 0.6514552443712246, + "grad_norm": 0.5426437854766846, + "learning_rate": 1.5230692345798823e-05, + "loss": 0.5243, + "step": 23726 + }, + { + "epoch": 0.6514827018121911, + "grad_norm": 0.370145320892334, + "learning_rate": 1.523032424240328e-05, + "loss": 0.4355, + "step": 23727 + }, + { + "epoch": 0.6515101592531576, + "grad_norm": 0.37723442912101746, + "learning_rate": 1.5229956129251513e-05, + "loss": 0.4967, + "step": 23728 + }, + { + "epoch": 0.6515376166941241, + "grad_norm": 0.35321810841560364, + "learning_rate": 1.5229588006344209e-05, + "loss": 0.4458, + "step": 23729 + }, + { + "epoch": 0.6515650741350906, + "grad_norm": 0.383277952671051, + "learning_rate": 1.5229219873682052e-05, + "loss": 0.5726, + "step": 23730 + }, + { + "epoch": 0.6515925315760571, + "grad_norm": 0.35766151547431946, + "learning_rate": 1.5228851731265734e-05, + "loss": 0.4736, + "step": 23731 + }, + { + "epoch": 0.6516199890170236, + "grad_norm": 0.6738865971565247, + "learning_rate": 1.5228483579095938e-05, + "loss": 0.5536, + "step": 23732 + }, + { + "epoch": 0.6516474464579901, + "grad_norm": 0.3710985779762268, + "learning_rate": 1.522811541717335e-05, + "loss": 0.4963, + "step": 23733 + }, + { + "epoch": 0.6516749038989567, + "grad_norm": 0.3928694725036621, + "learning_rate": 1.5227747245498661e-05, + "loss": 0.4516, + "step": 23734 + }, + { + "epoch": 0.6517023613399231, + "grad_norm": 0.3582439422607422, + "learning_rate": 1.5227379064072552e-05, + "loss": 0.4695, + "step": 23735 + }, + { + "epoch": 0.6517298187808896, + "grad_norm": 0.4069248139858246, + "learning_rate": 1.5227010872895713e-05, + "loss": 0.4552, + "step": 23736 + }, + { + "epoch": 0.6517572762218561, + "grad_norm": 0.3432716131210327, + "learning_rate": 1.5226642671968832e-05, + "loss": 0.4517, + "step": 23737 + }, + { + "epoch": 0.6517847336628226, + "grad_norm": 0.4144769310951233, + "learning_rate": 1.5226274461292594e-05, + "loss": 0.5266, + "step": 23738 + }, + { + "epoch": 0.6518121911037891, + "grad_norm": 0.4657509922981262, + "learning_rate": 1.5225906240867685e-05, + "loss": 0.5412, + "step": 23739 + }, + { + "epoch": 0.6518396485447556, + "grad_norm": 0.4205869436264038, + "learning_rate": 1.5225538010694791e-05, + "loss": 0.436, + "step": 23740 + }, + { + "epoch": 0.6518671059857222, + "grad_norm": 0.3671778440475464, + "learning_rate": 1.5225169770774605e-05, + "loss": 0.5342, + "step": 23741 + }, + { + "epoch": 0.6518945634266886, + "grad_norm": 0.4004552960395813, + "learning_rate": 1.5224801521107808e-05, + "loss": 0.5125, + "step": 23742 + }, + { + "epoch": 0.6519220208676552, + "grad_norm": 0.3999151289463043, + "learning_rate": 1.5224433261695088e-05, + "loss": 0.5571, + "step": 23743 + }, + { + "epoch": 0.6519494783086216, + "grad_norm": 0.3808857798576355, + "learning_rate": 1.5224064992537133e-05, + "loss": 0.5226, + "step": 23744 + }, + { + "epoch": 0.6519769357495881, + "grad_norm": 0.3525325357913971, + "learning_rate": 1.522369671363463e-05, + "loss": 0.4718, + "step": 23745 + }, + { + "epoch": 0.6520043931905546, + "grad_norm": 0.4057543873786926, + "learning_rate": 1.5223328424988263e-05, + "loss": 0.4446, + "step": 23746 + }, + { + "epoch": 0.6520318506315211, + "grad_norm": 0.41805851459503174, + "learning_rate": 1.5222960126598727e-05, + "loss": 0.5788, + "step": 23747 + }, + { + "epoch": 0.6520593080724877, + "grad_norm": 0.377819687128067, + "learning_rate": 1.52225918184667e-05, + "loss": 0.4348, + "step": 23748 + }, + { + "epoch": 0.6520867655134541, + "grad_norm": 0.37553781270980835, + "learning_rate": 1.522222350059287e-05, + "loss": 0.5401, + "step": 23749 + }, + { + "epoch": 0.6521142229544207, + "grad_norm": 0.4081602692604065, + "learning_rate": 1.5221855172977927e-05, + "loss": 0.4952, + "step": 23750 + }, + { + "epoch": 0.6521416803953871, + "grad_norm": 0.3945885896682739, + "learning_rate": 1.5221486835622558e-05, + "loss": 0.5132, + "step": 23751 + }, + { + "epoch": 0.6521691378363537, + "grad_norm": 0.40265387296676636, + "learning_rate": 1.5221118488527453e-05, + "loss": 0.4712, + "step": 23752 + }, + { + "epoch": 0.6521965952773201, + "grad_norm": 0.3667725920677185, + "learning_rate": 1.5220750131693294e-05, + "loss": 0.4592, + "step": 23753 + }, + { + "epoch": 0.6522240527182867, + "grad_norm": 0.3840203285217285, + "learning_rate": 1.5220381765120769e-05, + "loss": 0.5172, + "step": 23754 + }, + { + "epoch": 0.6522515101592532, + "grad_norm": 0.4408799111843109, + "learning_rate": 1.5220013388810565e-05, + "loss": 0.4808, + "step": 23755 + }, + { + "epoch": 0.6522789676002196, + "grad_norm": 0.5411415100097656, + "learning_rate": 1.5219645002763371e-05, + "loss": 0.5785, + "step": 23756 + }, + { + "epoch": 0.6523064250411862, + "grad_norm": 0.41816431283950806, + "learning_rate": 1.5219276606979874e-05, + "loss": 0.5406, + "step": 23757 + }, + { + "epoch": 0.6523338824821526, + "grad_norm": 0.4204012453556061, + "learning_rate": 1.5218908201460755e-05, + "loss": 0.5289, + "step": 23758 + }, + { + "epoch": 0.6523613399231192, + "grad_norm": 0.40524566173553467, + "learning_rate": 1.5218539786206713e-05, + "loss": 0.5269, + "step": 23759 + }, + { + "epoch": 0.6523887973640856, + "grad_norm": 0.3587714433670044, + "learning_rate": 1.5218171361218429e-05, + "loss": 0.429, + "step": 23760 + }, + { + "epoch": 0.6524162548050522, + "grad_norm": 0.4066140949726105, + "learning_rate": 1.5217802926496585e-05, + "loss": 0.5531, + "step": 23761 + }, + { + "epoch": 0.6524437122460187, + "grad_norm": 0.5565515756607056, + "learning_rate": 1.5217434482041875e-05, + "loss": 0.5525, + "step": 23762 + }, + { + "epoch": 0.6524711696869852, + "grad_norm": 0.4059676229953766, + "learning_rate": 1.5217066027854986e-05, + "loss": 0.576, + "step": 23763 + }, + { + "epoch": 0.6524986271279517, + "grad_norm": 0.3765068054199219, + "learning_rate": 1.5216697563936604e-05, + "loss": 0.5686, + "step": 23764 + }, + { + "epoch": 0.6525260845689181, + "grad_norm": 0.36149701476097107, + "learning_rate": 1.5216329090287417e-05, + "loss": 0.501, + "step": 23765 + }, + { + "epoch": 0.6525535420098847, + "grad_norm": 0.3929203748703003, + "learning_rate": 1.5215960606908104e-05, + "loss": 0.4872, + "step": 23766 + }, + { + "epoch": 0.6525809994508511, + "grad_norm": 0.36024075746536255, + "learning_rate": 1.5215592113799366e-05, + "loss": 0.4183, + "step": 23767 + }, + { + "epoch": 0.6526084568918177, + "grad_norm": 0.3582187294960022, + "learning_rate": 1.5215223610961882e-05, + "loss": 0.4814, + "step": 23768 + }, + { + "epoch": 0.6526359143327842, + "grad_norm": 0.35664743185043335, + "learning_rate": 1.5214855098396344e-05, + "loss": 0.483, + "step": 23769 + }, + { + "epoch": 0.6526633717737507, + "grad_norm": 0.3740565776824951, + "learning_rate": 1.5214486576103438e-05, + "loss": 0.447, + "step": 23770 + }, + { + "epoch": 0.6526908292147172, + "grad_norm": 0.3794468641281128, + "learning_rate": 1.5214118044083845e-05, + "loss": 0.4795, + "step": 23771 + }, + { + "epoch": 0.6527182866556837, + "grad_norm": 0.41327399015426636, + "learning_rate": 1.5213749502338262e-05, + "loss": 0.4577, + "step": 23772 + }, + { + "epoch": 0.6527457440966502, + "grad_norm": 0.3863506019115448, + "learning_rate": 1.5213380950867372e-05, + "loss": 0.482, + "step": 23773 + }, + { + "epoch": 0.6527732015376166, + "grad_norm": 0.40236327052116394, + "learning_rate": 1.5213012389671859e-05, + "loss": 0.4166, + "step": 23774 + }, + { + "epoch": 0.6528006589785832, + "grad_norm": 0.3786713778972626, + "learning_rate": 1.5212643818752418e-05, + "loss": 0.5158, + "step": 23775 + }, + { + "epoch": 0.6528281164195497, + "grad_norm": 0.4604823887348175, + "learning_rate": 1.521227523810973e-05, + "loss": 0.5837, + "step": 23776 + }, + { + "epoch": 0.6528555738605162, + "grad_norm": 0.4109199643135071, + "learning_rate": 1.5211906647744486e-05, + "loss": 0.5722, + "step": 23777 + }, + { + "epoch": 0.6528830313014827, + "grad_norm": 0.4758520722389221, + "learning_rate": 1.5211538047657373e-05, + "loss": 0.5151, + "step": 23778 + }, + { + "epoch": 0.6529104887424492, + "grad_norm": 0.3880813717842102, + "learning_rate": 1.5211169437849079e-05, + "loss": 0.4948, + "step": 23779 + }, + { + "epoch": 0.6529379461834157, + "grad_norm": 0.4055856764316559, + "learning_rate": 1.5210800818320289e-05, + "loss": 0.5317, + "step": 23780 + }, + { + "epoch": 0.6529654036243822, + "grad_norm": 0.4025108516216278, + "learning_rate": 1.5210432189071691e-05, + "loss": 0.4616, + "step": 23781 + }, + { + "epoch": 0.6529928610653487, + "grad_norm": 0.39077186584472656, + "learning_rate": 1.5210063550103978e-05, + "loss": 0.4972, + "step": 23782 + }, + { + "epoch": 0.6530203185063153, + "grad_norm": 0.4076533913612366, + "learning_rate": 1.5209694901417832e-05, + "loss": 0.5243, + "step": 23783 + }, + { + "epoch": 0.6530477759472817, + "grad_norm": 0.42377105355262756, + "learning_rate": 1.520932624301394e-05, + "loss": 0.5308, + "step": 23784 + }, + { + "epoch": 0.6530752333882482, + "grad_norm": 0.40554720163345337, + "learning_rate": 1.5208957574892995e-05, + "loss": 0.4734, + "step": 23785 + }, + { + "epoch": 0.6531026908292147, + "grad_norm": 0.39534664154052734, + "learning_rate": 1.520858889705568e-05, + "loss": 0.539, + "step": 23786 + }, + { + "epoch": 0.6531301482701812, + "grad_norm": 0.3672639727592468, + "learning_rate": 1.5208220209502684e-05, + "loss": 0.4598, + "step": 23787 + }, + { + "epoch": 0.6531576057111477, + "grad_norm": 0.363486111164093, + "learning_rate": 1.5207851512234698e-05, + "loss": 0.4702, + "step": 23788 + }, + { + "epoch": 0.6531850631521142, + "grad_norm": 0.37160325050354004, + "learning_rate": 1.5207482805252402e-05, + "loss": 0.505, + "step": 23789 + }, + { + "epoch": 0.6532125205930808, + "grad_norm": 0.36664673686027527, + "learning_rate": 1.5207114088556494e-05, + "loss": 0.4925, + "step": 23790 + }, + { + "epoch": 0.6532399780340472, + "grad_norm": 0.3830573260784149, + "learning_rate": 1.5206745362147652e-05, + "loss": 0.4389, + "step": 23791 + }, + { + "epoch": 0.6532674354750138, + "grad_norm": 0.5204707384109497, + "learning_rate": 1.520637662602657e-05, + "loss": 0.576, + "step": 23792 + }, + { + "epoch": 0.6532948929159802, + "grad_norm": 0.4023855924606323, + "learning_rate": 1.5206007880193934e-05, + "loss": 0.5501, + "step": 23793 + }, + { + "epoch": 0.6533223503569467, + "grad_norm": 0.3745805323123932, + "learning_rate": 1.5205639124650428e-05, + "loss": 0.4616, + "step": 23794 + }, + { + "epoch": 0.6533498077979132, + "grad_norm": 0.36603912711143494, + "learning_rate": 1.5205270359396748e-05, + "loss": 0.5504, + "step": 23795 + }, + { + "epoch": 0.6533772652388797, + "grad_norm": 0.37189003825187683, + "learning_rate": 1.5204901584433577e-05, + "loss": 0.5068, + "step": 23796 + }, + { + "epoch": 0.6534047226798463, + "grad_norm": 0.3633081018924713, + "learning_rate": 1.5204532799761603e-05, + "loss": 0.4207, + "step": 23797 + }, + { + "epoch": 0.6534321801208127, + "grad_norm": 0.4171067774295807, + "learning_rate": 1.5204164005381513e-05, + "loss": 0.4352, + "step": 23798 + }, + { + "epoch": 0.6534596375617793, + "grad_norm": 0.4381736218929291, + "learning_rate": 1.5203795201293997e-05, + "loss": 0.6069, + "step": 23799 + }, + { + "epoch": 0.6534870950027457, + "grad_norm": 0.3735615909099579, + "learning_rate": 1.5203426387499742e-05, + "loss": 0.4798, + "step": 23800 + }, + { + "epoch": 0.6535145524437123, + "grad_norm": 0.37482067942619324, + "learning_rate": 1.5203057563999437e-05, + "loss": 0.4397, + "step": 23801 + }, + { + "epoch": 0.6535420098846787, + "grad_norm": 0.33044013381004333, + "learning_rate": 1.5202688730793767e-05, + "loss": 0.4354, + "step": 23802 + }, + { + "epoch": 0.6535694673256452, + "grad_norm": 0.41229477524757385, + "learning_rate": 1.5202319887883424e-05, + "loss": 0.4889, + "step": 23803 + }, + { + "epoch": 0.6535969247666118, + "grad_norm": 0.349237322807312, + "learning_rate": 1.5201951035269094e-05, + "loss": 0.5141, + "step": 23804 + }, + { + "epoch": 0.6536243822075782, + "grad_norm": 0.42026081681251526, + "learning_rate": 1.5201582172951464e-05, + "loss": 0.5115, + "step": 23805 + }, + { + "epoch": 0.6536518396485448, + "grad_norm": 0.474020779132843, + "learning_rate": 1.5201213300931223e-05, + "loss": 0.5016, + "step": 23806 + }, + { + "epoch": 0.6536792970895112, + "grad_norm": 0.3715853691101074, + "learning_rate": 1.5200844419209058e-05, + "loss": 0.4883, + "step": 23807 + }, + { + "epoch": 0.6537067545304778, + "grad_norm": 0.4046780467033386, + "learning_rate": 1.5200475527785661e-05, + "loss": 0.5049, + "step": 23808 + }, + { + "epoch": 0.6537342119714442, + "grad_norm": 0.383783757686615, + "learning_rate": 1.5200106626661715e-05, + "loss": 0.5251, + "step": 23809 + }, + { + "epoch": 0.6537616694124108, + "grad_norm": 0.37583038210868835, + "learning_rate": 1.519973771583791e-05, + "loss": 0.5465, + "step": 23810 + }, + { + "epoch": 0.6537891268533773, + "grad_norm": 0.3696254789829254, + "learning_rate": 1.519936879531494e-05, + "loss": 0.4922, + "step": 23811 + }, + { + "epoch": 0.6538165842943438, + "grad_norm": 0.34464162588119507, + "learning_rate": 1.5198999865093482e-05, + "loss": 0.4619, + "step": 23812 + }, + { + "epoch": 0.6538440417353103, + "grad_norm": 0.3787131905555725, + "learning_rate": 1.5198630925174231e-05, + "loss": 0.5723, + "step": 23813 + }, + { + "epoch": 0.6538714991762767, + "grad_norm": 0.4072667956352234, + "learning_rate": 1.5198261975557876e-05, + "loss": 0.5128, + "step": 23814 + }, + { + "epoch": 0.6538989566172433, + "grad_norm": 0.40222057700157166, + "learning_rate": 1.5197893016245102e-05, + "loss": 0.4705, + "step": 23815 + }, + { + "epoch": 0.6539264140582097, + "grad_norm": 0.34019890427589417, + "learning_rate": 1.5197524047236599e-05, + "loss": 0.389, + "step": 23816 + }, + { + "epoch": 0.6539538714991763, + "grad_norm": 0.4082937240600586, + "learning_rate": 1.5197155068533054e-05, + "loss": 0.5523, + "step": 23817 + }, + { + "epoch": 0.6539813289401428, + "grad_norm": 0.4714715778827667, + "learning_rate": 1.5196786080135155e-05, + "loss": 0.5584, + "step": 23818 + }, + { + "epoch": 0.6540087863811093, + "grad_norm": 0.4458354115486145, + "learning_rate": 1.5196417082043593e-05, + "loss": 0.4666, + "step": 23819 + }, + { + "epoch": 0.6540362438220758, + "grad_norm": 0.33524900674819946, + "learning_rate": 1.5196048074259056e-05, + "loss": 0.4747, + "step": 23820 + }, + { + "epoch": 0.6540637012630423, + "grad_norm": 0.39744752645492554, + "learning_rate": 1.519567905678223e-05, + "loss": 0.5207, + "step": 23821 + }, + { + "epoch": 0.6540911587040088, + "grad_norm": 0.5068857669830322, + "learning_rate": 1.5195310029613804e-05, + "loss": 0.4709, + "step": 23822 + }, + { + "epoch": 0.6541186161449752, + "grad_norm": 0.33176591992378235, + "learning_rate": 1.5194940992754466e-05, + "loss": 0.5086, + "step": 23823 + }, + { + "epoch": 0.6541460735859418, + "grad_norm": 0.382457971572876, + "learning_rate": 1.5194571946204906e-05, + "loss": 0.4801, + "step": 23824 + }, + { + "epoch": 0.6541735310269083, + "grad_norm": 0.35402071475982666, + "learning_rate": 1.5194202889965808e-05, + "loss": 0.4487, + "step": 23825 + }, + { + "epoch": 0.6542009884678748, + "grad_norm": 0.4963783025741577, + "learning_rate": 1.5193833824037868e-05, + "loss": 0.488, + "step": 23826 + }, + { + "epoch": 0.6542284459088413, + "grad_norm": 0.3790636360645294, + "learning_rate": 1.5193464748421769e-05, + "loss": 0.4382, + "step": 23827 + }, + { + "epoch": 0.6542559033498078, + "grad_norm": 0.5410585403442383, + "learning_rate": 1.51930956631182e-05, + "loss": 0.4887, + "step": 23828 + }, + { + "epoch": 0.6542833607907743, + "grad_norm": 0.38508063554763794, + "learning_rate": 1.5192726568127851e-05, + "loss": 0.3924, + "step": 23829 + }, + { + "epoch": 0.6543108182317408, + "grad_norm": 0.39466235041618347, + "learning_rate": 1.519235746345141e-05, + "loss": 0.4942, + "step": 23830 + }, + { + "epoch": 0.6543382756727073, + "grad_norm": 0.3778545558452606, + "learning_rate": 1.5191988349089566e-05, + "loss": 0.4066, + "step": 23831 + }, + { + "epoch": 0.6543657331136739, + "grad_norm": 0.37348175048828125, + "learning_rate": 1.5191619225043004e-05, + "loss": 0.465, + "step": 23832 + }, + { + "epoch": 0.6543931905546403, + "grad_norm": 0.40360498428344727, + "learning_rate": 1.5191250091312415e-05, + "loss": 0.5081, + "step": 23833 + }, + { + "epoch": 0.6544206479956068, + "grad_norm": 0.3740587830543518, + "learning_rate": 1.519088094789849e-05, + "loss": 0.4967, + "step": 23834 + }, + { + "epoch": 0.6544481054365733, + "grad_norm": 0.3612286448478699, + "learning_rate": 1.5190511794801914e-05, + "loss": 0.4658, + "step": 23835 + }, + { + "epoch": 0.6544755628775398, + "grad_norm": 0.3687891364097595, + "learning_rate": 1.5190142632023377e-05, + "loss": 0.4652, + "step": 23836 + }, + { + "epoch": 0.6545030203185063, + "grad_norm": 0.38653191924095154, + "learning_rate": 1.5189773459563568e-05, + "loss": 0.4581, + "step": 23837 + }, + { + "epoch": 0.6545304777594728, + "grad_norm": 0.35681405663490295, + "learning_rate": 1.5189404277423175e-05, + "loss": 0.4695, + "step": 23838 + }, + { + "epoch": 0.6545579352004394, + "grad_norm": 0.33935126662254333, + "learning_rate": 1.5189035085602885e-05, + "loss": 0.4454, + "step": 23839 + }, + { + "epoch": 0.6545853926414058, + "grad_norm": 0.3510790765285492, + "learning_rate": 1.5188665884103393e-05, + "loss": 0.4684, + "step": 23840 + }, + { + "epoch": 0.6546128500823724, + "grad_norm": 0.37300312519073486, + "learning_rate": 1.5188296672925378e-05, + "loss": 0.5473, + "step": 23841 + }, + { + "epoch": 0.6546403075233388, + "grad_norm": 0.38593602180480957, + "learning_rate": 1.5187927452069535e-05, + "loss": 0.5712, + "step": 23842 + }, + { + "epoch": 0.6546677649643053, + "grad_norm": 0.3275633454322815, + "learning_rate": 1.5187558221536553e-05, + "loss": 0.4773, + "step": 23843 + }, + { + "epoch": 0.6546952224052718, + "grad_norm": 0.37738746404647827, + "learning_rate": 1.5187188981327119e-05, + "loss": 0.5524, + "step": 23844 + }, + { + "epoch": 0.6547226798462383, + "grad_norm": 0.39229831099510193, + "learning_rate": 1.5186819731441924e-05, + "loss": 0.4537, + "step": 23845 + }, + { + "epoch": 0.6547501372872049, + "grad_norm": 0.34418031573295593, + "learning_rate": 1.518645047188165e-05, + "loss": 0.4924, + "step": 23846 + }, + { + "epoch": 0.6547775947281713, + "grad_norm": 0.3606501519680023, + "learning_rate": 1.5186081202646995e-05, + "loss": 0.4855, + "step": 23847 + }, + { + "epoch": 0.6548050521691379, + "grad_norm": 0.44193190336227417, + "learning_rate": 1.5185711923738638e-05, + "loss": 0.5643, + "step": 23848 + }, + { + "epoch": 0.6548325096101043, + "grad_norm": 0.3966399133205414, + "learning_rate": 1.5185342635157278e-05, + "loss": 0.5632, + "step": 23849 + }, + { + "epoch": 0.6548599670510709, + "grad_norm": 0.3869325518608093, + "learning_rate": 1.51849733369036e-05, + "loss": 0.5197, + "step": 23850 + }, + { + "epoch": 0.6548874244920373, + "grad_norm": 0.39721745252609253, + "learning_rate": 1.5184604028978288e-05, + "loss": 0.4994, + "step": 23851 + }, + { + "epoch": 0.6549148819330038, + "grad_norm": 0.4197114408016205, + "learning_rate": 1.5184234711382036e-05, + "loss": 0.5329, + "step": 23852 + }, + { + "epoch": 0.6549423393739704, + "grad_norm": 0.3475610017776489, + "learning_rate": 1.5183865384115533e-05, + "loss": 0.5301, + "step": 23853 + }, + { + "epoch": 0.6549697968149368, + "grad_norm": 0.38975295424461365, + "learning_rate": 1.5183496047179463e-05, + "loss": 0.4781, + "step": 23854 + }, + { + "epoch": 0.6549972542559034, + "grad_norm": 0.3484359681606293, + "learning_rate": 1.5183126700574522e-05, + "loss": 0.4631, + "step": 23855 + }, + { + "epoch": 0.6550247116968698, + "grad_norm": 0.35377705097198486, + "learning_rate": 1.518275734430139e-05, + "loss": 0.518, + "step": 23856 + }, + { + "epoch": 0.6550521691378364, + "grad_norm": 0.3655840754508972, + "learning_rate": 1.5182387978360769e-05, + "loss": 0.5972, + "step": 23857 + }, + { + "epoch": 0.6550796265788028, + "grad_norm": 0.4111377000808716, + "learning_rate": 1.5182018602753337e-05, + "loss": 0.503, + "step": 23858 + }, + { + "epoch": 0.6551070840197694, + "grad_norm": 0.35863634943962097, + "learning_rate": 1.5181649217479786e-05, + "loss": 0.531, + "step": 23859 + }, + { + "epoch": 0.6551345414607359, + "grad_norm": 0.35143330693244934, + "learning_rate": 1.5181279822540806e-05, + "loss": 0.52, + "step": 23860 + }, + { + "epoch": 0.6551619989017023, + "grad_norm": 0.3911744952201843, + "learning_rate": 1.5180910417937084e-05, + "loss": 0.5612, + "step": 23861 + }, + { + "epoch": 0.6551894563426689, + "grad_norm": 0.4091092050075531, + "learning_rate": 1.5180541003669313e-05, + "loss": 0.5319, + "step": 23862 + }, + { + "epoch": 0.6552169137836353, + "grad_norm": 0.35621142387390137, + "learning_rate": 1.5180171579738178e-05, + "loss": 0.4696, + "step": 23863 + }, + { + "epoch": 0.6552443712246019, + "grad_norm": 0.3588516116142273, + "learning_rate": 1.5179802146144372e-05, + "loss": 0.5012, + "step": 23864 + }, + { + "epoch": 0.6552718286655683, + "grad_norm": 0.3860105276107788, + "learning_rate": 1.5179432702888578e-05, + "loss": 0.4704, + "step": 23865 + }, + { + "epoch": 0.6552992861065349, + "grad_norm": 0.43671151995658875, + "learning_rate": 1.517906324997149e-05, + "loss": 0.5322, + "step": 23866 + }, + { + "epoch": 0.6553267435475014, + "grad_norm": 0.4514642655849457, + "learning_rate": 1.5178693787393796e-05, + "loss": 0.6095, + "step": 23867 + }, + { + "epoch": 0.6553542009884679, + "grad_norm": 0.4700597822666168, + "learning_rate": 1.5178324315156188e-05, + "loss": 0.5468, + "step": 23868 + }, + { + "epoch": 0.6553816584294344, + "grad_norm": 0.4331531822681427, + "learning_rate": 1.517795483325935e-05, + "loss": 0.4699, + "step": 23869 + }, + { + "epoch": 0.6554091158704008, + "grad_norm": 0.3561910390853882, + "learning_rate": 1.5177585341703976e-05, + "loss": 0.4965, + "step": 23870 + }, + { + "epoch": 0.6554365733113674, + "grad_norm": 0.34720396995544434, + "learning_rate": 1.5177215840490751e-05, + "loss": 0.4987, + "step": 23871 + }, + { + "epoch": 0.6554640307523338, + "grad_norm": 0.38149508833885193, + "learning_rate": 1.5176846329620365e-05, + "loss": 0.4135, + "step": 23872 + }, + { + "epoch": 0.6554914881933004, + "grad_norm": 0.3961644768714905, + "learning_rate": 1.517647680909351e-05, + "loss": 0.5101, + "step": 23873 + }, + { + "epoch": 0.6555189456342669, + "grad_norm": 0.3593004047870636, + "learning_rate": 1.5176107278910873e-05, + "loss": 0.4887, + "step": 23874 + }, + { + "epoch": 0.6555464030752334, + "grad_norm": 0.40347328782081604, + "learning_rate": 1.5175737739073144e-05, + "loss": 0.4819, + "step": 23875 + }, + { + "epoch": 0.6555738605161999, + "grad_norm": 0.3572215139865875, + "learning_rate": 1.5175368189581013e-05, + "loss": 0.5335, + "step": 23876 + }, + { + "epoch": 0.6556013179571664, + "grad_norm": 0.5818918347358704, + "learning_rate": 1.5174998630435166e-05, + "loss": 0.4523, + "step": 23877 + }, + { + "epoch": 0.6556287753981329, + "grad_norm": 0.3931645154953003, + "learning_rate": 1.5174629061636297e-05, + "loss": 0.5136, + "step": 23878 + }, + { + "epoch": 0.6556562328390994, + "grad_norm": 0.40563544631004333, + "learning_rate": 1.5174259483185092e-05, + "loss": 0.4645, + "step": 23879 + }, + { + "epoch": 0.6556836902800659, + "grad_norm": 0.3705555498600006, + "learning_rate": 1.5173889895082243e-05, + "loss": 0.5074, + "step": 23880 + }, + { + "epoch": 0.6557111477210325, + "grad_norm": 0.3994797468185425, + "learning_rate": 1.5173520297328437e-05, + "loss": 0.4818, + "step": 23881 + }, + { + "epoch": 0.6557386051619989, + "grad_norm": 0.35491520166397095, + "learning_rate": 1.5173150689924365e-05, + "loss": 0.4476, + "step": 23882 + }, + { + "epoch": 0.6557660626029654, + "grad_norm": 0.43078482151031494, + "learning_rate": 1.5172781072870717e-05, + "loss": 0.4963, + "step": 23883 + }, + { + "epoch": 0.6557935200439319, + "grad_norm": 0.4117569029331207, + "learning_rate": 1.517241144616818e-05, + "loss": 0.5301, + "step": 23884 + }, + { + "epoch": 0.6558209774848984, + "grad_norm": 0.41313356161117554, + "learning_rate": 1.5172041809817444e-05, + "loss": 0.5432, + "step": 23885 + }, + { + "epoch": 0.6558484349258649, + "grad_norm": 0.3572746813297272, + "learning_rate": 1.51716721638192e-05, + "loss": 0.4555, + "step": 23886 + }, + { + "epoch": 0.6558758923668314, + "grad_norm": 0.3679569959640503, + "learning_rate": 1.5171302508174139e-05, + "loss": 0.5601, + "step": 23887 + }, + { + "epoch": 0.655903349807798, + "grad_norm": 0.40712982416152954, + "learning_rate": 1.5170932842882943e-05, + "loss": 0.4921, + "step": 23888 + }, + { + "epoch": 0.6559308072487644, + "grad_norm": 0.4098339378833771, + "learning_rate": 1.5170563167946312e-05, + "loss": 0.5065, + "step": 23889 + }, + { + "epoch": 0.655958264689731, + "grad_norm": 0.5623688101768494, + "learning_rate": 1.517019348336493e-05, + "loss": 0.4739, + "step": 23890 + }, + { + "epoch": 0.6559857221306974, + "grad_norm": 0.33378925919532776, + "learning_rate": 1.5169823789139483e-05, + "loss": 0.4159, + "step": 23891 + }, + { + "epoch": 0.6560131795716639, + "grad_norm": 0.5865381360054016, + "learning_rate": 1.516945408527067e-05, + "loss": 0.4695, + "step": 23892 + }, + { + "epoch": 0.6560406370126304, + "grad_norm": 0.3597528636455536, + "learning_rate": 1.516908437175917e-05, + "loss": 0.4878, + "step": 23893 + }, + { + "epoch": 0.6560680944535969, + "grad_norm": 0.40665188431739807, + "learning_rate": 1.5168714648605682e-05, + "loss": 0.5298, + "step": 23894 + }, + { + "epoch": 0.6560955518945635, + "grad_norm": 0.36472827196121216, + "learning_rate": 1.5168344915810891e-05, + "loss": 0.4748, + "step": 23895 + }, + { + "epoch": 0.6561230093355299, + "grad_norm": 0.4921271502971649, + "learning_rate": 1.5167975173375484e-05, + "loss": 0.5927, + "step": 23896 + }, + { + "epoch": 0.6561504667764965, + "grad_norm": 0.45371463894844055, + "learning_rate": 1.5167605421300158e-05, + "loss": 0.5704, + "step": 23897 + }, + { + "epoch": 0.6561779242174629, + "grad_norm": 0.36017146706581116, + "learning_rate": 1.5167235659585595e-05, + "loss": 0.4274, + "step": 23898 + }, + { + "epoch": 0.6562053816584295, + "grad_norm": 0.359554260969162, + "learning_rate": 1.5166865888232492e-05, + "loss": 0.4522, + "step": 23899 + }, + { + "epoch": 0.6562328390993959, + "grad_norm": 0.35941338539123535, + "learning_rate": 1.5166496107241532e-05, + "loss": 0.4787, + "step": 23900 + }, + { + "epoch": 0.6562602965403624, + "grad_norm": 0.368913859128952, + "learning_rate": 1.5166126316613409e-05, + "loss": 0.4793, + "step": 23901 + }, + { + "epoch": 0.656287753981329, + "grad_norm": 0.37052103877067566, + "learning_rate": 1.5165756516348812e-05, + "loss": 0.5205, + "step": 23902 + }, + { + "epoch": 0.6563152114222954, + "grad_norm": 0.42437684535980225, + "learning_rate": 1.5165386706448428e-05, + "loss": 0.5374, + "step": 23903 + }, + { + "epoch": 0.656342668863262, + "grad_norm": 0.35600847005844116, + "learning_rate": 1.5165016886912951e-05, + "loss": 0.5493, + "step": 23904 + }, + { + "epoch": 0.6563701263042284, + "grad_norm": 0.44146472215652466, + "learning_rate": 1.5164647057743069e-05, + "loss": 0.5154, + "step": 23905 + }, + { + "epoch": 0.656397583745195, + "grad_norm": 0.3685690760612488, + "learning_rate": 1.5164277218939472e-05, + "loss": 0.4844, + "step": 23906 + }, + { + "epoch": 0.6564250411861614, + "grad_norm": 0.4134138524532318, + "learning_rate": 1.5163907370502851e-05, + "loss": 0.5426, + "step": 23907 + }, + { + "epoch": 0.656452498627128, + "grad_norm": 0.41853782534599304, + "learning_rate": 1.5163537512433892e-05, + "loss": 0.5598, + "step": 23908 + }, + { + "epoch": 0.6564799560680945, + "grad_norm": 0.47246310114860535, + "learning_rate": 1.5163167644733289e-05, + "loss": 0.4777, + "step": 23909 + }, + { + "epoch": 0.6565074135090609, + "grad_norm": 0.40454962849617004, + "learning_rate": 1.5162797767401728e-05, + "loss": 0.5228, + "step": 23910 + }, + { + "epoch": 0.6565348709500275, + "grad_norm": 0.34973350167274475, + "learning_rate": 1.5162427880439908e-05, + "loss": 0.5539, + "step": 23911 + }, + { + "epoch": 0.6565623283909939, + "grad_norm": 0.40779784321784973, + "learning_rate": 1.5162057983848507e-05, + "loss": 0.4787, + "step": 23912 + }, + { + "epoch": 0.6565897858319605, + "grad_norm": 0.3787303864955902, + "learning_rate": 1.516168807762822e-05, + "loss": 0.4521, + "step": 23913 + }, + { + "epoch": 0.6566172432729269, + "grad_norm": 0.46128949522972107, + "learning_rate": 1.516131816177974e-05, + "loss": 0.6272, + "step": 23914 + }, + { + "epoch": 0.6566447007138935, + "grad_norm": 0.38078975677490234, + "learning_rate": 1.5160948236303755e-05, + "loss": 0.4702, + "step": 23915 + }, + { + "epoch": 0.65667215815486, + "grad_norm": 0.3621194064617157, + "learning_rate": 1.516057830120095e-05, + "loss": 0.4584, + "step": 23916 + }, + { + "epoch": 0.6566996155958265, + "grad_norm": 0.42416226863861084, + "learning_rate": 1.5160208356472021e-05, + "loss": 0.5008, + "step": 23917 + }, + { + "epoch": 0.656727073036793, + "grad_norm": 0.3621540367603302, + "learning_rate": 1.5159838402117654e-05, + "loss": 0.534, + "step": 23918 + }, + { + "epoch": 0.6567545304777594, + "grad_norm": 0.3991404175758362, + "learning_rate": 1.5159468438138548e-05, + "loss": 0.5303, + "step": 23919 + }, + { + "epoch": 0.656781987918726, + "grad_norm": 0.38658607006073, + "learning_rate": 1.5159098464535382e-05, + "loss": 0.4608, + "step": 23920 + }, + { + "epoch": 0.6568094453596924, + "grad_norm": 1.1730042695999146, + "learning_rate": 1.5158728481308853e-05, + "loss": 0.507, + "step": 23921 + }, + { + "epoch": 0.656836902800659, + "grad_norm": 0.5095987915992737, + "learning_rate": 1.5158358488459647e-05, + "loss": 0.5022, + "step": 23922 + }, + { + "epoch": 0.6568643602416255, + "grad_norm": 0.40227818489074707, + "learning_rate": 1.5157988485988457e-05, + "loss": 0.5288, + "step": 23923 + }, + { + "epoch": 0.656891817682592, + "grad_norm": 0.4037502706050873, + "learning_rate": 1.5157618473895972e-05, + "loss": 0.5322, + "step": 23924 + }, + { + "epoch": 0.6569192751235585, + "grad_norm": 0.41233935952186584, + "learning_rate": 1.5157248452182885e-05, + "loss": 0.5454, + "step": 23925 + }, + { + "epoch": 0.656946732564525, + "grad_norm": 0.35549062490463257, + "learning_rate": 1.5156878420849879e-05, + "loss": 0.475, + "step": 23926 + }, + { + "epoch": 0.6569741900054915, + "grad_norm": 0.3565361499786377, + "learning_rate": 1.5156508379897651e-05, + "loss": 0.4303, + "step": 23927 + }, + { + "epoch": 0.657001647446458, + "grad_norm": 0.3923676609992981, + "learning_rate": 1.515613832932689e-05, + "loss": 0.5526, + "step": 23928 + }, + { + "epoch": 0.6570291048874245, + "grad_norm": 0.42218273878097534, + "learning_rate": 1.5155768269138284e-05, + "loss": 0.5848, + "step": 23929 + }, + { + "epoch": 0.657056562328391, + "grad_norm": 0.39501747488975525, + "learning_rate": 1.515539819933253e-05, + "loss": 0.5981, + "step": 23930 + }, + { + "epoch": 0.6570840197693575, + "grad_norm": 0.4172563850879669, + "learning_rate": 1.5155028119910305e-05, + "loss": 0.6399, + "step": 23931 + }, + { + "epoch": 0.657111477210324, + "grad_norm": 0.44342079758644104, + "learning_rate": 1.515465803087231e-05, + "loss": 0.5189, + "step": 23932 + }, + { + "epoch": 0.6571389346512905, + "grad_norm": 0.3691185414791107, + "learning_rate": 1.5154287932219233e-05, + "loss": 0.4341, + "step": 23933 + }, + { + "epoch": 0.657166392092257, + "grad_norm": 0.3722602128982544, + "learning_rate": 1.5153917823951764e-05, + "loss": 0.5038, + "step": 23934 + }, + { + "epoch": 0.6571938495332235, + "grad_norm": 0.40006932616233826, + "learning_rate": 1.5153547706070595e-05, + "loss": 0.5565, + "step": 23935 + }, + { + "epoch": 0.65722130697419, + "grad_norm": 0.37805014848709106, + "learning_rate": 1.5153177578576412e-05, + "loss": 0.5111, + "step": 23936 + }, + { + "epoch": 0.6572487644151566, + "grad_norm": 0.49466150999069214, + "learning_rate": 1.515280744146991e-05, + "loss": 0.4771, + "step": 23937 + }, + { + "epoch": 0.657276221856123, + "grad_norm": 0.3885880708694458, + "learning_rate": 1.5152437294751779e-05, + "loss": 0.4808, + "step": 23938 + }, + { + "epoch": 0.6573036792970895, + "grad_norm": 0.3847009539604187, + "learning_rate": 1.5152067138422703e-05, + "loss": 0.5279, + "step": 23939 + }, + { + "epoch": 0.657331136738056, + "grad_norm": 0.3731232285499573, + "learning_rate": 1.515169697248338e-05, + "loss": 0.4419, + "step": 23940 + }, + { + "epoch": 0.6573585941790225, + "grad_norm": 0.42065170407295227, + "learning_rate": 1.5151326796934499e-05, + "loss": 0.4949, + "step": 23941 + }, + { + "epoch": 0.657386051619989, + "grad_norm": 0.39258480072021484, + "learning_rate": 1.5150956611776748e-05, + "loss": 0.477, + "step": 23942 + }, + { + "epoch": 0.6574135090609555, + "grad_norm": 0.45420950651168823, + "learning_rate": 1.515058641701082e-05, + "loss": 0.592, + "step": 23943 + }, + { + "epoch": 0.6574409665019221, + "grad_norm": 0.3520858883857727, + "learning_rate": 1.5150216212637403e-05, + "loss": 0.4525, + "step": 23944 + }, + { + "epoch": 0.6574684239428885, + "grad_norm": 0.3777257800102234, + "learning_rate": 1.5149845998657191e-05, + "loss": 0.5192, + "step": 23945 + }, + { + "epoch": 0.6574958813838551, + "grad_norm": 0.3686080276966095, + "learning_rate": 1.5149475775070872e-05, + "loss": 0.4893, + "step": 23946 + }, + { + "epoch": 0.6575233388248215, + "grad_norm": 0.39480507373809814, + "learning_rate": 1.5149105541879134e-05, + "loss": 0.5124, + "step": 23947 + }, + { + "epoch": 0.657550796265788, + "grad_norm": 0.7043114304542542, + "learning_rate": 1.5148735299082677e-05, + "loss": 0.4159, + "step": 23948 + }, + { + "epoch": 0.6575782537067545, + "grad_norm": 0.38168227672576904, + "learning_rate": 1.514836504668218e-05, + "loss": 0.4615, + "step": 23949 + }, + { + "epoch": 0.657605711147721, + "grad_norm": 0.42309117317199707, + "learning_rate": 1.5147994784678342e-05, + "loss": 0.5307, + "step": 23950 + }, + { + "epoch": 0.6576331685886876, + "grad_norm": 0.3904206454753876, + "learning_rate": 1.5147624513071849e-05, + "loss": 0.5361, + "step": 23951 + }, + { + "epoch": 0.657660626029654, + "grad_norm": 0.4133411943912506, + "learning_rate": 1.5147254231863393e-05, + "loss": 0.5704, + "step": 23952 + }, + { + "epoch": 0.6576880834706206, + "grad_norm": 0.3648108243942261, + "learning_rate": 1.5146883941053668e-05, + "loss": 0.4851, + "step": 23953 + }, + { + "epoch": 0.657715540911587, + "grad_norm": 0.3536510169506073, + "learning_rate": 1.5146513640643358e-05, + "loss": 0.5073, + "step": 23954 + }, + { + "epoch": 0.6577429983525536, + "grad_norm": 0.45780104398727417, + "learning_rate": 1.5146143330633161e-05, + "loss": 0.5176, + "step": 23955 + }, + { + "epoch": 0.65777045579352, + "grad_norm": 0.38521525263786316, + "learning_rate": 1.514577301102376e-05, + "loss": 0.4969, + "step": 23956 + }, + { + "epoch": 0.6577979132344866, + "grad_norm": 0.4498515725135803, + "learning_rate": 1.5145402681815853e-05, + "loss": 0.5313, + "step": 23957 + }, + { + "epoch": 0.6578253706754531, + "grad_norm": 0.4214504361152649, + "learning_rate": 1.5145032343010128e-05, + "loss": 0.4698, + "step": 23958 + }, + { + "epoch": 0.6578528281164195, + "grad_norm": 0.36548590660095215, + "learning_rate": 1.5144661994607272e-05, + "loss": 0.4879, + "step": 23959 + }, + { + "epoch": 0.6578802855573861, + "grad_norm": 0.34963274002075195, + "learning_rate": 1.5144291636607985e-05, + "loss": 0.5168, + "step": 23960 + }, + { + "epoch": 0.6579077429983525, + "grad_norm": 0.35828661918640137, + "learning_rate": 1.514392126901295e-05, + "loss": 0.5391, + "step": 23961 + }, + { + "epoch": 0.6579352004393191, + "grad_norm": 0.3735388517379761, + "learning_rate": 1.5143550891822859e-05, + "loss": 0.4526, + "step": 23962 + }, + { + "epoch": 0.6579626578802855, + "grad_norm": 0.3720947504043579, + "learning_rate": 1.5143180505038402e-05, + "loss": 0.5468, + "step": 23963 + }, + { + "epoch": 0.6579901153212521, + "grad_norm": 0.3825312852859497, + "learning_rate": 1.5142810108660277e-05, + "loss": 0.4602, + "step": 23964 + }, + { + "epoch": 0.6580175727622186, + "grad_norm": 0.34863603115081787, + "learning_rate": 1.5142439702689164e-05, + "loss": 0.3957, + "step": 23965 + }, + { + "epoch": 0.658045030203185, + "grad_norm": 0.443844735622406, + "learning_rate": 1.5142069287125765e-05, + "loss": 0.5104, + "step": 23966 + }, + { + "epoch": 0.6580724876441516, + "grad_norm": 0.4012451469898224, + "learning_rate": 1.5141698861970762e-05, + "loss": 0.5935, + "step": 23967 + }, + { + "epoch": 0.658099945085118, + "grad_norm": 0.37376147508621216, + "learning_rate": 1.514132842722485e-05, + "loss": 0.4681, + "step": 23968 + }, + { + "epoch": 0.6581274025260846, + "grad_norm": 0.36959168314933777, + "learning_rate": 1.5140957982888723e-05, + "loss": 0.4913, + "step": 23969 + }, + { + "epoch": 0.658154859967051, + "grad_norm": 0.38753822445869446, + "learning_rate": 1.5140587528963065e-05, + "loss": 0.5109, + "step": 23970 + }, + { + "epoch": 0.6581823174080176, + "grad_norm": 0.3941248059272766, + "learning_rate": 1.514021706544857e-05, + "loss": 0.5205, + "step": 23971 + }, + { + "epoch": 0.658209774848984, + "grad_norm": 0.43180322647094727, + "learning_rate": 1.513984659234593e-05, + "loss": 0.448, + "step": 23972 + }, + { + "epoch": 0.6582372322899506, + "grad_norm": 0.4012894332408905, + "learning_rate": 1.5139476109655839e-05, + "loss": 0.4678, + "step": 23973 + }, + { + "epoch": 0.6582646897309171, + "grad_norm": 0.3820578157901764, + "learning_rate": 1.5139105617378982e-05, + "loss": 0.4982, + "step": 23974 + }, + { + "epoch": 0.6582921471718836, + "grad_norm": 0.39036595821380615, + "learning_rate": 1.5138735115516055e-05, + "loss": 0.4335, + "step": 23975 + }, + { + "epoch": 0.6583196046128501, + "grad_norm": 0.42036300897598267, + "learning_rate": 1.5138364604067745e-05, + "loss": 0.4912, + "step": 23976 + }, + { + "epoch": 0.6583470620538165, + "grad_norm": 0.45336592197418213, + "learning_rate": 1.5137994083034743e-05, + "loss": 0.4222, + "step": 23977 + }, + { + "epoch": 0.6583745194947831, + "grad_norm": 0.38211461901664734, + "learning_rate": 1.5137623552417746e-05, + "loss": 0.4518, + "step": 23978 + }, + { + "epoch": 0.6584019769357495, + "grad_norm": 0.35518449544906616, + "learning_rate": 1.513725301221744e-05, + "loss": 0.5271, + "step": 23979 + }, + { + "epoch": 0.6584294343767161, + "grad_norm": 0.3911410868167877, + "learning_rate": 1.5136882462434517e-05, + "loss": 0.6067, + "step": 23980 + }, + { + "epoch": 0.6584568918176826, + "grad_norm": 0.41568097472190857, + "learning_rate": 1.513651190306967e-05, + "loss": 0.5425, + "step": 23981 + }, + { + "epoch": 0.6584843492586491, + "grad_norm": 0.37300705909729004, + "learning_rate": 1.5136141334123586e-05, + "loss": 0.4626, + "step": 23982 + }, + { + "epoch": 0.6585118066996156, + "grad_norm": 0.33540773391723633, + "learning_rate": 1.5135770755596962e-05, + "loss": 0.404, + "step": 23983 + }, + { + "epoch": 0.6585392641405821, + "grad_norm": 0.3623064458370209, + "learning_rate": 1.5135400167490487e-05, + "loss": 0.4151, + "step": 23984 + }, + { + "epoch": 0.6585667215815486, + "grad_norm": 0.3585006892681122, + "learning_rate": 1.5135029569804846e-05, + "loss": 0.4249, + "step": 23985 + }, + { + "epoch": 0.658594179022515, + "grad_norm": 0.3870038688182831, + "learning_rate": 1.5134658962540743e-05, + "loss": 0.4751, + "step": 23986 + }, + { + "epoch": 0.6586216364634816, + "grad_norm": 0.3566970229148865, + "learning_rate": 1.513428834569886e-05, + "loss": 0.4223, + "step": 23987 + }, + { + "epoch": 0.6586490939044481, + "grad_norm": 0.40721994638442993, + "learning_rate": 1.5133917719279886e-05, + "loss": 0.4698, + "step": 23988 + }, + { + "epoch": 0.6586765513454146, + "grad_norm": 0.3818439245223999, + "learning_rate": 1.5133547083284523e-05, + "loss": 0.5191, + "step": 23989 + }, + { + "epoch": 0.6587040087863811, + "grad_norm": 0.35108712315559387, + "learning_rate": 1.5133176437713454e-05, + "loss": 0.4659, + "step": 23990 + }, + { + "epoch": 0.6587314662273476, + "grad_norm": 0.40364962816238403, + "learning_rate": 1.5132805782567373e-05, + "loss": 0.4969, + "step": 23991 + }, + { + "epoch": 0.6587589236683141, + "grad_norm": 0.3653351962566376, + "learning_rate": 1.513243511784697e-05, + "loss": 0.5234, + "step": 23992 + }, + { + "epoch": 0.6587863811092806, + "grad_norm": 0.40441039204597473, + "learning_rate": 1.5132064443552938e-05, + "loss": 0.5877, + "step": 23993 + }, + { + "epoch": 0.6588138385502471, + "grad_norm": 0.38160017132759094, + "learning_rate": 1.5131693759685968e-05, + "loss": 0.4616, + "step": 23994 + }, + { + "epoch": 0.6588412959912137, + "grad_norm": 0.3807300925254822, + "learning_rate": 1.5131323066246753e-05, + "loss": 0.5681, + "step": 23995 + }, + { + "epoch": 0.6588687534321801, + "grad_norm": 0.3919525742530823, + "learning_rate": 1.513095236323598e-05, + "loss": 0.4571, + "step": 23996 + }, + { + "epoch": 0.6588962108731466, + "grad_norm": 0.4258916974067688, + "learning_rate": 1.5130581650654344e-05, + "loss": 0.4643, + "step": 23997 + }, + { + "epoch": 0.6589236683141131, + "grad_norm": 0.3517382740974426, + "learning_rate": 1.5130210928502536e-05, + "loss": 0.4735, + "step": 23998 + }, + { + "epoch": 0.6589511257550796, + "grad_norm": 0.3432435691356659, + "learning_rate": 1.5129840196781249e-05, + "loss": 0.477, + "step": 23999 + }, + { + "epoch": 0.6589785831960461, + "grad_norm": 0.3310431241989136, + "learning_rate": 1.5129469455491171e-05, + "loss": 0.4738, + "step": 24000 + }, + { + "epoch": 0.6590060406370126, + "grad_norm": 0.3629182279109955, + "learning_rate": 1.5129098704632994e-05, + "loss": 0.434, + "step": 24001 + }, + { + "epoch": 0.6590334980779792, + "grad_norm": 0.31795990467071533, + "learning_rate": 1.5128727944207412e-05, + "loss": 0.4147, + "step": 24002 + }, + { + "epoch": 0.6590609555189456, + "grad_norm": 0.37017470598220825, + "learning_rate": 1.5128357174215116e-05, + "loss": 0.5033, + "step": 24003 + }, + { + "epoch": 0.6590884129599122, + "grad_norm": 0.37525224685668945, + "learning_rate": 1.5127986394656797e-05, + "loss": 0.5104, + "step": 24004 + }, + { + "epoch": 0.6591158704008786, + "grad_norm": 0.35840412974357605, + "learning_rate": 1.5127615605533148e-05, + "loss": 0.4773, + "step": 24005 + }, + { + "epoch": 0.6591433278418452, + "grad_norm": 0.38165760040283203, + "learning_rate": 1.5127244806844856e-05, + "loss": 0.4435, + "step": 24006 + }, + { + "epoch": 0.6591707852828116, + "grad_norm": 0.31844136118888855, + "learning_rate": 1.5126873998592619e-05, + "loss": 0.4476, + "step": 24007 + }, + { + "epoch": 0.6591982427237781, + "grad_norm": 0.4082482159137726, + "learning_rate": 1.5126503180777126e-05, + "loss": 0.4852, + "step": 24008 + }, + { + "epoch": 0.6592257001647447, + "grad_norm": 0.3316288888454437, + "learning_rate": 1.5126132353399064e-05, + "loss": 0.4509, + "step": 24009 + }, + { + "epoch": 0.6592531576057111, + "grad_norm": 0.3549002707004547, + "learning_rate": 1.5125761516459132e-05, + "loss": 0.4472, + "step": 24010 + }, + { + "epoch": 0.6592806150466777, + "grad_norm": 0.3818439841270447, + "learning_rate": 1.5125390669958018e-05, + "loss": 0.5111, + "step": 24011 + }, + { + "epoch": 0.6593080724876441, + "grad_norm": 0.39492741227149963, + "learning_rate": 1.5125019813896416e-05, + "loss": 0.5555, + "step": 24012 + }, + { + "epoch": 0.6593355299286107, + "grad_norm": 0.41379600763320923, + "learning_rate": 1.5124648948275016e-05, + "loss": 0.5018, + "step": 24013 + }, + { + "epoch": 0.6593629873695771, + "grad_norm": 0.38908863067626953, + "learning_rate": 1.512427807309451e-05, + "loss": 0.5159, + "step": 24014 + }, + { + "epoch": 0.6593904448105437, + "grad_norm": 0.42091208696365356, + "learning_rate": 1.512390718835559e-05, + "loss": 0.4969, + "step": 24015 + }, + { + "epoch": 0.6594179022515102, + "grad_norm": 0.4098203182220459, + "learning_rate": 1.5123536294058947e-05, + "loss": 0.5016, + "step": 24016 + }, + { + "epoch": 0.6594453596924766, + "grad_norm": 0.38693326711654663, + "learning_rate": 1.5123165390205274e-05, + "loss": 0.4588, + "step": 24017 + }, + { + "epoch": 0.6594728171334432, + "grad_norm": 0.3761516511440277, + "learning_rate": 1.5122794476795263e-05, + "loss": 0.4856, + "step": 24018 + }, + { + "epoch": 0.6595002745744096, + "grad_norm": 0.3800496757030487, + "learning_rate": 1.5122423553829603e-05, + "loss": 0.5056, + "step": 24019 + }, + { + "epoch": 0.6595277320153762, + "grad_norm": 0.37697774171829224, + "learning_rate": 1.5122052621308991e-05, + "loss": 0.4705, + "step": 24020 + }, + { + "epoch": 0.6595551894563426, + "grad_norm": 0.3954748809337616, + "learning_rate": 1.5121681679234112e-05, + "loss": 0.5063, + "step": 24021 + }, + { + "epoch": 0.6595826468973092, + "grad_norm": 0.6181082129478455, + "learning_rate": 1.5121310727605667e-05, + "loss": 0.536, + "step": 24022 + }, + { + "epoch": 0.6596101043382757, + "grad_norm": 0.3828640282154083, + "learning_rate": 1.5120939766424343e-05, + "loss": 0.5675, + "step": 24023 + }, + { + "epoch": 0.6596375617792422, + "grad_norm": 0.44749152660369873, + "learning_rate": 1.512056879569083e-05, + "loss": 0.5516, + "step": 24024 + }, + { + "epoch": 0.6596650192202087, + "grad_norm": 0.45890605449676514, + "learning_rate": 1.5120197815405821e-05, + "loss": 0.5038, + "step": 24025 + }, + { + "epoch": 0.6596924766611751, + "grad_norm": 0.7149428725242615, + "learning_rate": 1.511982682557001e-05, + "loss": 0.4651, + "step": 24026 + }, + { + "epoch": 0.6597199341021417, + "grad_norm": 0.4190370738506317, + "learning_rate": 1.5119455826184087e-05, + "loss": 0.5132, + "step": 24027 + }, + { + "epoch": 0.6597473915431081, + "grad_norm": 0.3901206851005554, + "learning_rate": 1.5119084817248747e-05, + "loss": 0.5277, + "step": 24028 + }, + { + "epoch": 0.6597748489840747, + "grad_norm": 0.3569476306438446, + "learning_rate": 1.5118713798764677e-05, + "loss": 0.5044, + "step": 24029 + }, + { + "epoch": 0.6598023064250412, + "grad_norm": 0.36212247610092163, + "learning_rate": 1.5118342770732572e-05, + "loss": 0.4704, + "step": 24030 + }, + { + "epoch": 0.6598297638660077, + "grad_norm": 0.40042051672935486, + "learning_rate": 1.5117971733153129e-05, + "loss": 0.5376, + "step": 24031 + }, + { + "epoch": 0.6598572213069742, + "grad_norm": 0.37853512167930603, + "learning_rate": 1.511760068602703e-05, + "loss": 0.5069, + "step": 24032 + }, + { + "epoch": 0.6598846787479407, + "grad_norm": 0.3851795494556427, + "learning_rate": 1.5117229629354974e-05, + "loss": 0.5673, + "step": 24033 + }, + { + "epoch": 0.6599121361889072, + "grad_norm": 0.3782115578651428, + "learning_rate": 1.5116858563137654e-05, + "loss": 0.5024, + "step": 24034 + }, + { + "epoch": 0.6599395936298736, + "grad_norm": 0.4062112867832184, + "learning_rate": 1.5116487487375757e-05, + "loss": 0.4557, + "step": 24035 + }, + { + "epoch": 0.6599670510708402, + "grad_norm": 0.4363280236721039, + "learning_rate": 1.511611640206998e-05, + "loss": 0.4942, + "step": 24036 + }, + { + "epoch": 0.6599945085118067, + "grad_norm": 0.4659200608730316, + "learning_rate": 1.511574530722101e-05, + "loss": 0.5659, + "step": 24037 + }, + { + "epoch": 0.6600219659527732, + "grad_norm": 0.9206740856170654, + "learning_rate": 1.5115374202829545e-05, + "loss": 0.5659, + "step": 24038 + }, + { + "epoch": 0.6600494233937397, + "grad_norm": 0.4030377268791199, + "learning_rate": 1.5115003088896272e-05, + "loss": 0.491, + "step": 24039 + }, + { + "epoch": 0.6600768808347062, + "grad_norm": 0.4007171392440796, + "learning_rate": 1.5114631965421886e-05, + "loss": 0.5207, + "step": 24040 + }, + { + "epoch": 0.6601043382756727, + "grad_norm": 0.39342543482780457, + "learning_rate": 1.511426083240708e-05, + "loss": 0.4838, + "step": 24041 + }, + { + "epoch": 0.6601317957166392, + "grad_norm": 0.4421384632587433, + "learning_rate": 1.5113889689852546e-05, + "loss": 0.5926, + "step": 24042 + }, + { + "epoch": 0.6601592531576057, + "grad_norm": 0.35787492990493774, + "learning_rate": 1.5113518537758976e-05, + "loss": 0.4773, + "step": 24043 + }, + { + "epoch": 0.6601867105985723, + "grad_norm": 0.3687651753425598, + "learning_rate": 1.511314737612706e-05, + "loss": 0.5367, + "step": 24044 + }, + { + "epoch": 0.6602141680395387, + "grad_norm": 0.35999825596809387, + "learning_rate": 1.511277620495749e-05, + "loss": 0.4835, + "step": 24045 + }, + { + "epoch": 0.6602416254805052, + "grad_norm": 0.39089030027389526, + "learning_rate": 1.5112405024250966e-05, + "loss": 0.5618, + "step": 24046 + }, + { + "epoch": 0.6602690829214717, + "grad_norm": 0.38359716534614563, + "learning_rate": 1.5112033834008171e-05, + "loss": 0.5083, + "step": 24047 + }, + { + "epoch": 0.6602965403624382, + "grad_norm": 0.3704761266708374, + "learning_rate": 1.5111662634229803e-05, + "loss": 0.5196, + "step": 24048 + }, + { + "epoch": 0.6603239978034047, + "grad_norm": 0.3586193025112152, + "learning_rate": 1.5111291424916554e-05, + "loss": 0.4378, + "step": 24049 + }, + { + "epoch": 0.6603514552443712, + "grad_norm": 0.633192777633667, + "learning_rate": 1.511092020606911e-05, + "loss": 0.5174, + "step": 24050 + }, + { + "epoch": 0.6603789126853378, + "grad_norm": 0.42016056180000305, + "learning_rate": 1.5110548977688172e-05, + "loss": 0.5789, + "step": 24051 + }, + { + "epoch": 0.6604063701263042, + "grad_norm": 0.363089382648468, + "learning_rate": 1.5110177739774429e-05, + "loss": 0.406, + "step": 24052 + }, + { + "epoch": 0.6604338275672708, + "grad_norm": 0.35215166211128235, + "learning_rate": 1.5109806492328575e-05, + "loss": 0.4863, + "step": 24053 + }, + { + "epoch": 0.6604612850082372, + "grad_norm": 0.5428338646888733, + "learning_rate": 1.5109435235351302e-05, + "loss": 0.5706, + "step": 24054 + }, + { + "epoch": 0.6604887424492037, + "grad_norm": 0.3742062449455261, + "learning_rate": 1.5109063968843298e-05, + "loss": 0.4954, + "step": 24055 + }, + { + "epoch": 0.6605161998901702, + "grad_norm": 0.8461037278175354, + "learning_rate": 1.510869269280526e-05, + "loss": 0.5463, + "step": 24056 + }, + { + "epoch": 0.6605436573311367, + "grad_norm": 0.3577113449573517, + "learning_rate": 1.510832140723788e-05, + "loss": 0.4176, + "step": 24057 + }, + { + "epoch": 0.6605711147721033, + "grad_norm": 0.35823047161102295, + "learning_rate": 1.5107950112141851e-05, + "loss": 0.4928, + "step": 24058 + }, + { + "epoch": 0.6605985722130697, + "grad_norm": 0.362909734249115, + "learning_rate": 1.5107578807517865e-05, + "loss": 0.4787, + "step": 24059 + }, + { + "epoch": 0.6606260296540363, + "grad_norm": 0.35504424571990967, + "learning_rate": 1.5107207493366611e-05, + "loss": 0.4398, + "step": 24060 + }, + { + "epoch": 0.6606534870950027, + "grad_norm": 0.40052786469459534, + "learning_rate": 1.5106836169688788e-05, + "loss": 0.5272, + "step": 24061 + }, + { + "epoch": 0.6606809445359693, + "grad_norm": 0.36349132657051086, + "learning_rate": 1.5106464836485085e-05, + "loss": 0.5023, + "step": 24062 + }, + { + "epoch": 0.6607084019769357, + "grad_norm": 0.32683902978897095, + "learning_rate": 1.5106093493756195e-05, + "loss": 0.4705, + "step": 24063 + }, + { + "epoch": 0.6607358594179022, + "grad_norm": 0.3968454897403717, + "learning_rate": 1.5105722141502812e-05, + "loss": 0.5029, + "step": 24064 + }, + { + "epoch": 0.6607633168588688, + "grad_norm": 0.39025482535362244, + "learning_rate": 1.5105350779725627e-05, + "loss": 0.4629, + "step": 24065 + }, + { + "epoch": 0.6607907742998352, + "grad_norm": 0.4100659191608429, + "learning_rate": 1.5104979408425333e-05, + "loss": 0.496, + "step": 24066 + }, + { + "epoch": 0.6608182317408018, + "grad_norm": 0.35037732124328613, + "learning_rate": 1.5104608027602621e-05, + "loss": 0.5274, + "step": 24067 + }, + { + "epoch": 0.6608456891817682, + "grad_norm": 0.36645716428756714, + "learning_rate": 1.510423663725819e-05, + "loss": 0.4737, + "step": 24068 + }, + { + "epoch": 0.6608731466227348, + "grad_norm": 0.37400683760643005, + "learning_rate": 1.5103865237392725e-05, + "loss": 0.5377, + "step": 24069 + }, + { + "epoch": 0.6609006040637012, + "grad_norm": 0.3864007592201233, + "learning_rate": 1.5103493828006923e-05, + "loss": 0.4622, + "step": 24070 + }, + { + "epoch": 0.6609280615046678, + "grad_norm": 0.4001660645008087, + "learning_rate": 1.5103122409101479e-05, + "loss": 0.5646, + "step": 24071 + }, + { + "epoch": 0.6609555189456343, + "grad_norm": 0.40740805864334106, + "learning_rate": 1.5102750980677081e-05, + "loss": 0.504, + "step": 24072 + }, + { + "epoch": 0.6609829763866008, + "grad_norm": 0.4128975570201874, + "learning_rate": 1.5102379542734425e-05, + "loss": 0.4433, + "step": 24073 + }, + { + "epoch": 0.6610104338275673, + "grad_norm": 0.34095457196235657, + "learning_rate": 1.51020080952742e-05, + "loss": 0.4605, + "step": 24074 + }, + { + "epoch": 0.6610378912685337, + "grad_norm": 0.38530340790748596, + "learning_rate": 1.5101636638297103e-05, + "loss": 0.6284, + "step": 24075 + }, + { + "epoch": 0.6610653487095003, + "grad_norm": 0.395935595035553, + "learning_rate": 1.5101265171803828e-05, + "loss": 0.5093, + "step": 24076 + }, + { + "epoch": 0.6610928061504667, + "grad_norm": 0.4380389451980591, + "learning_rate": 1.5100893695795061e-05, + "loss": 0.5881, + "step": 24077 + }, + { + "epoch": 0.6611202635914333, + "grad_norm": 0.3459262549877167, + "learning_rate": 1.5100522210271502e-05, + "loss": 0.4551, + "step": 24078 + }, + { + "epoch": 0.6611477210323998, + "grad_norm": 0.3491075932979584, + "learning_rate": 1.510015071523384e-05, + "loss": 0.5219, + "step": 24079 + }, + { + "epoch": 0.6611751784733663, + "grad_norm": 0.40279272198677063, + "learning_rate": 1.509977921068277e-05, + "loss": 0.4988, + "step": 24080 + }, + { + "epoch": 0.6612026359143328, + "grad_norm": 0.37964364886283875, + "learning_rate": 1.5099407696618983e-05, + "loss": 0.551, + "step": 24081 + }, + { + "epoch": 0.6612300933552993, + "grad_norm": 0.40749576687812805, + "learning_rate": 1.5099036173043174e-05, + "loss": 0.5119, + "step": 24082 + }, + { + "epoch": 0.6612575507962658, + "grad_norm": 0.5994154810905457, + "learning_rate": 1.5098664639956036e-05, + "loss": 0.4453, + "step": 24083 + }, + { + "epoch": 0.6612850082372322, + "grad_norm": 0.431636780500412, + "learning_rate": 1.5098293097358258e-05, + "loss": 0.4968, + "step": 24084 + }, + { + "epoch": 0.6613124656781988, + "grad_norm": 0.38016730546951294, + "learning_rate": 1.5097921545250538e-05, + "loss": 0.4865, + "step": 24085 + }, + { + "epoch": 0.6613399231191653, + "grad_norm": 0.3413971960544586, + "learning_rate": 1.5097549983633566e-05, + "loss": 0.4759, + "step": 24086 + }, + { + "epoch": 0.6613673805601318, + "grad_norm": 0.36416012048721313, + "learning_rate": 1.509717841250804e-05, + "loss": 0.4468, + "step": 24087 + }, + { + "epoch": 0.6613948380010983, + "grad_norm": 0.37473538517951965, + "learning_rate": 1.5096806831874648e-05, + "loss": 0.4714, + "step": 24088 + }, + { + "epoch": 0.6614222954420648, + "grad_norm": 0.5513883233070374, + "learning_rate": 1.5096435241734081e-05, + "loss": 0.5142, + "step": 24089 + }, + { + "epoch": 0.6614497528830313, + "grad_norm": 0.39200666546821594, + "learning_rate": 1.509606364208704e-05, + "loss": 0.5927, + "step": 24090 + }, + { + "epoch": 0.6614772103239978, + "grad_norm": 0.36687755584716797, + "learning_rate": 1.5095692032934213e-05, + "loss": 0.493, + "step": 24091 + }, + { + "epoch": 0.6615046677649643, + "grad_norm": 0.34702202677726746, + "learning_rate": 1.5095320414276292e-05, + "loss": 0.4591, + "step": 24092 + }, + { + "epoch": 0.6615321252059309, + "grad_norm": 0.38459476828575134, + "learning_rate": 1.5094948786113975e-05, + "loss": 0.5869, + "step": 24093 + }, + { + "epoch": 0.6615595826468973, + "grad_norm": 0.4456349313259125, + "learning_rate": 1.5094577148447948e-05, + "loss": 0.5504, + "step": 24094 + }, + { + "epoch": 0.6615870400878638, + "grad_norm": 0.38392919301986694, + "learning_rate": 1.5094205501278913e-05, + "loss": 0.5209, + "step": 24095 + }, + { + "epoch": 0.6616144975288303, + "grad_norm": 0.4337863624095917, + "learning_rate": 1.5093833844607554e-05, + "loss": 0.6019, + "step": 24096 + }, + { + "epoch": 0.6616419549697968, + "grad_norm": 0.42098167538642883, + "learning_rate": 1.5093462178434575e-05, + "loss": 0.4615, + "step": 24097 + }, + { + "epoch": 0.6616694124107633, + "grad_norm": 0.5025973320007324, + "learning_rate": 1.5093090502760662e-05, + "loss": 0.5172, + "step": 24098 + }, + { + "epoch": 0.6616968698517298, + "grad_norm": 0.3784642815589905, + "learning_rate": 1.5092718817586507e-05, + "loss": 0.487, + "step": 24099 + }, + { + "epoch": 0.6617243272926964, + "grad_norm": 0.35975584387779236, + "learning_rate": 1.509234712291281e-05, + "loss": 0.434, + "step": 24100 + }, + { + "epoch": 0.6617517847336628, + "grad_norm": 0.384817510843277, + "learning_rate": 1.5091975418740257e-05, + "loss": 0.5038, + "step": 24101 + }, + { + "epoch": 0.6617792421746294, + "grad_norm": 0.5579428672790527, + "learning_rate": 1.5091603705069545e-05, + "loss": 0.535, + "step": 24102 + }, + { + "epoch": 0.6618066996155958, + "grad_norm": 0.520343542098999, + "learning_rate": 1.5091231981901367e-05, + "loss": 0.5234, + "step": 24103 + }, + { + "epoch": 0.6618341570565623, + "grad_norm": 0.3436465561389923, + "learning_rate": 1.5090860249236416e-05, + "loss": 0.4964, + "step": 24104 + }, + { + "epoch": 0.6618616144975288, + "grad_norm": 0.35006874799728394, + "learning_rate": 1.5090488507075387e-05, + "loss": 0.5023, + "step": 24105 + }, + { + "epoch": 0.6618890719384953, + "grad_norm": 0.37106871604919434, + "learning_rate": 1.5090116755418973e-05, + "loss": 0.4692, + "step": 24106 + }, + { + "epoch": 0.6619165293794619, + "grad_norm": 0.3689243197441101, + "learning_rate": 1.5089744994267866e-05, + "loss": 0.4702, + "step": 24107 + }, + { + "epoch": 0.6619439868204283, + "grad_norm": 0.4075925946235657, + "learning_rate": 1.508937322362276e-05, + "loss": 0.5174, + "step": 24108 + }, + { + "epoch": 0.6619714442613949, + "grad_norm": 0.42445439100265503, + "learning_rate": 1.5089001443484347e-05, + "loss": 0.513, + "step": 24109 + }, + { + "epoch": 0.6619989017023613, + "grad_norm": 0.37527552247047424, + "learning_rate": 1.5088629653853324e-05, + "loss": 0.4822, + "step": 24110 + }, + { + "epoch": 0.6620263591433279, + "grad_norm": 0.334225058555603, + "learning_rate": 1.5088257854730384e-05, + "loss": 0.4879, + "step": 24111 + }, + { + "epoch": 0.6620538165842943, + "grad_norm": 0.422102153301239, + "learning_rate": 1.5087886046116216e-05, + "loss": 0.5615, + "step": 24112 + }, + { + "epoch": 0.6620812740252608, + "grad_norm": 0.41751548647880554, + "learning_rate": 1.5087514228011517e-05, + "loss": 0.5649, + "step": 24113 + }, + { + "epoch": 0.6621087314662274, + "grad_norm": 0.39554914832115173, + "learning_rate": 1.508714240041698e-05, + "loss": 0.5092, + "step": 24114 + }, + { + "epoch": 0.6621361889071938, + "grad_norm": 0.43264371156692505, + "learning_rate": 1.5086770563333301e-05, + "loss": 0.4845, + "step": 24115 + }, + { + "epoch": 0.6621636463481604, + "grad_norm": 0.3488117754459381, + "learning_rate": 1.5086398716761172e-05, + "loss": 0.4437, + "step": 24116 + }, + { + "epoch": 0.6621911037891268, + "grad_norm": 0.385703444480896, + "learning_rate": 1.5086026860701281e-05, + "loss": 0.5007, + "step": 24117 + }, + { + "epoch": 0.6622185612300934, + "grad_norm": 0.41718965768814087, + "learning_rate": 1.5085654995154333e-05, + "loss": 0.4635, + "step": 24118 + }, + { + "epoch": 0.6622460186710598, + "grad_norm": 0.518252968788147, + "learning_rate": 1.5085283120121013e-05, + "loss": 0.4583, + "step": 24119 + }, + { + "epoch": 0.6622734761120264, + "grad_norm": 0.3761674463748932, + "learning_rate": 1.5084911235602016e-05, + "loss": 0.5112, + "step": 24120 + }, + { + "epoch": 0.6623009335529929, + "grad_norm": 0.336700439453125, + "learning_rate": 1.5084539341598036e-05, + "loss": 0.4203, + "step": 24121 + }, + { + "epoch": 0.6623283909939593, + "grad_norm": 0.34201329946517944, + "learning_rate": 1.5084167438109767e-05, + "loss": 0.4344, + "step": 24122 + }, + { + "epoch": 0.6623558484349259, + "grad_norm": 0.4143046736717224, + "learning_rate": 1.5083795525137906e-05, + "loss": 0.48, + "step": 24123 + }, + { + "epoch": 0.6623833058758923, + "grad_norm": 0.38507211208343506, + "learning_rate": 1.5083423602683142e-05, + "loss": 0.5533, + "step": 24124 + }, + { + "epoch": 0.6624107633168589, + "grad_norm": 0.3607304096221924, + "learning_rate": 1.5083051670746169e-05, + "loss": 0.5231, + "step": 24125 + }, + { + "epoch": 0.6624382207578253, + "grad_norm": 0.3629733920097351, + "learning_rate": 1.5082679729327685e-05, + "loss": 0.4819, + "step": 24126 + }, + { + "epoch": 0.6624656781987919, + "grad_norm": 0.40217646956443787, + "learning_rate": 1.5082307778428378e-05, + "loss": 0.4728, + "step": 24127 + }, + { + "epoch": 0.6624931356397584, + "grad_norm": 0.4167408347129822, + "learning_rate": 1.508193581804895e-05, + "loss": 0.5381, + "step": 24128 + }, + { + "epoch": 0.6625205930807249, + "grad_norm": 0.42048609256744385, + "learning_rate": 1.5081563848190084e-05, + "loss": 0.5138, + "step": 24129 + }, + { + "epoch": 0.6625480505216914, + "grad_norm": 0.3978072702884674, + "learning_rate": 1.5081191868852481e-05, + "loss": 0.5127, + "step": 24130 + }, + { + "epoch": 0.6625755079626579, + "grad_norm": 0.3801674544811249, + "learning_rate": 1.5080819880036837e-05, + "loss": 0.4829, + "step": 24131 + }, + { + "epoch": 0.6626029654036244, + "grad_norm": 0.3630140721797943, + "learning_rate": 1.508044788174384e-05, + "loss": 0.4327, + "step": 24132 + }, + { + "epoch": 0.6626304228445908, + "grad_norm": 0.4014008045196533, + "learning_rate": 1.5080075873974185e-05, + "loss": 0.5311, + "step": 24133 + }, + { + "epoch": 0.6626578802855574, + "grad_norm": 0.4049926996231079, + "learning_rate": 1.507970385672857e-05, + "loss": 0.5948, + "step": 24134 + }, + { + "epoch": 0.6626853377265239, + "grad_norm": 0.37951719760894775, + "learning_rate": 1.5079331830007681e-05, + "loss": 0.5267, + "step": 24135 + }, + { + "epoch": 0.6627127951674904, + "grad_norm": 0.36609435081481934, + "learning_rate": 1.5078959793812223e-05, + "loss": 0.512, + "step": 24136 + }, + { + "epoch": 0.6627402526084569, + "grad_norm": 0.3981935977935791, + "learning_rate": 1.5078587748142882e-05, + "loss": 0.5145, + "step": 24137 + }, + { + "epoch": 0.6627677100494234, + "grad_norm": 0.40482285618782043, + "learning_rate": 1.5078215693000351e-05, + "loss": 0.5976, + "step": 24138 + }, + { + "epoch": 0.6627951674903899, + "grad_norm": 0.3753305673599243, + "learning_rate": 1.5077843628385332e-05, + "loss": 0.5233, + "step": 24139 + }, + { + "epoch": 0.6628226249313564, + "grad_norm": 0.37663978338241577, + "learning_rate": 1.5077471554298506e-05, + "loss": 0.4939, + "step": 24140 + }, + { + "epoch": 0.6628500823723229, + "grad_norm": 0.3635084927082062, + "learning_rate": 1.5077099470740582e-05, + "loss": 0.4412, + "step": 24141 + }, + { + "epoch": 0.6628775398132895, + "grad_norm": 0.3875877261161804, + "learning_rate": 1.5076727377712245e-05, + "loss": 0.5379, + "step": 24142 + }, + { + "epoch": 0.6629049972542559, + "grad_norm": 0.37023869156837463, + "learning_rate": 1.5076355275214192e-05, + "loss": 0.5542, + "step": 24143 + }, + { + "epoch": 0.6629324546952224, + "grad_norm": 0.34797853231430054, + "learning_rate": 1.5075983163247116e-05, + "loss": 0.5922, + "step": 24144 + }, + { + "epoch": 0.6629599121361889, + "grad_norm": 0.3749619424343109, + "learning_rate": 1.5075611041811708e-05, + "loss": 0.5123, + "step": 24145 + }, + { + "epoch": 0.6629873695771554, + "grad_norm": 0.35781553387641907, + "learning_rate": 1.5075238910908669e-05, + "loss": 0.5244, + "step": 24146 + }, + { + "epoch": 0.6630148270181219, + "grad_norm": 0.36886435747146606, + "learning_rate": 1.5074866770538692e-05, + "loss": 0.5059, + "step": 24147 + }, + { + "epoch": 0.6630422844590884, + "grad_norm": 0.37600159645080566, + "learning_rate": 1.5074494620702464e-05, + "loss": 0.5023, + "step": 24148 + }, + { + "epoch": 0.663069741900055, + "grad_norm": 0.33499136567115784, + "learning_rate": 1.5074122461400687e-05, + "loss": 0.4933, + "step": 24149 + }, + { + "epoch": 0.6630971993410214, + "grad_norm": 0.4667464792728424, + "learning_rate": 1.507375029263405e-05, + "loss": 0.4926, + "step": 24150 + }, + { + "epoch": 0.663124656781988, + "grad_norm": 0.363365113735199, + "learning_rate": 1.507337811440325e-05, + "loss": 0.4143, + "step": 24151 + }, + { + "epoch": 0.6631521142229544, + "grad_norm": 0.3883298337459564, + "learning_rate": 1.5073005926708981e-05, + "loss": 0.487, + "step": 24152 + }, + { + "epoch": 0.6631795716639209, + "grad_norm": 0.3779236078262329, + "learning_rate": 1.5072633729551936e-05, + "loss": 0.4739, + "step": 24153 + }, + { + "epoch": 0.6632070291048874, + "grad_norm": 0.3439730405807495, + "learning_rate": 1.507226152293281e-05, + "loss": 0.5902, + "step": 24154 + }, + { + "epoch": 0.6632344865458539, + "grad_norm": 0.37182050943374634, + "learning_rate": 1.50718893068523e-05, + "loss": 0.5425, + "step": 24155 + }, + { + "epoch": 0.6632619439868205, + "grad_norm": 0.40336528420448303, + "learning_rate": 1.5071517081311093e-05, + "loss": 0.4826, + "step": 24156 + }, + { + "epoch": 0.6632894014277869, + "grad_norm": 0.41912999749183655, + "learning_rate": 1.5071144846309894e-05, + "loss": 0.518, + "step": 24157 + }, + { + "epoch": 0.6633168588687535, + "grad_norm": 0.4210856854915619, + "learning_rate": 1.5070772601849386e-05, + "loss": 0.5113, + "step": 24158 + }, + { + "epoch": 0.6633443163097199, + "grad_norm": 0.39987993240356445, + "learning_rate": 1.5070400347930271e-05, + "loss": 0.5336, + "step": 24159 + }, + { + "epoch": 0.6633717737506865, + "grad_norm": 0.4171293377876282, + "learning_rate": 1.507002808455324e-05, + "loss": 0.4919, + "step": 24160 + }, + { + "epoch": 0.6633992311916529, + "grad_norm": 0.4034542739391327, + "learning_rate": 1.5069655811718988e-05, + "loss": 0.563, + "step": 24161 + }, + { + "epoch": 0.6634266886326194, + "grad_norm": 0.39471226930618286, + "learning_rate": 1.5069283529428213e-05, + "loss": 0.4632, + "step": 24162 + }, + { + "epoch": 0.663454146073586, + "grad_norm": 0.3965429663658142, + "learning_rate": 1.5068911237681606e-05, + "loss": 0.6324, + "step": 24163 + }, + { + "epoch": 0.6634816035145524, + "grad_norm": 0.3571731150150299, + "learning_rate": 1.5068538936479859e-05, + "loss": 0.517, + "step": 24164 + }, + { + "epoch": 0.663509060955519, + "grad_norm": 0.37974628806114197, + "learning_rate": 1.506816662582367e-05, + "loss": 0.6562, + "step": 24165 + }, + { + "epoch": 0.6635365183964854, + "grad_norm": 0.4550722539424896, + "learning_rate": 1.5067794305713732e-05, + "loss": 0.5554, + "step": 24166 + }, + { + "epoch": 0.663563975837452, + "grad_norm": 0.39317789673805237, + "learning_rate": 1.5067421976150743e-05, + "loss": 0.5683, + "step": 24167 + }, + { + "epoch": 0.6635914332784184, + "grad_norm": 0.3563327193260193, + "learning_rate": 1.5067049637135393e-05, + "loss": 0.44, + "step": 24168 + }, + { + "epoch": 0.663618890719385, + "grad_norm": 0.39784228801727295, + "learning_rate": 1.506667728866838e-05, + "loss": 0.5806, + "step": 24169 + }, + { + "epoch": 0.6636463481603515, + "grad_norm": 0.39702364802360535, + "learning_rate": 1.5066304930750393e-05, + "loss": 0.4914, + "step": 24170 + }, + { + "epoch": 0.663673805601318, + "grad_norm": 0.40982237458229065, + "learning_rate": 1.506593256338213e-05, + "loss": 0.5524, + "step": 24171 + }, + { + "epoch": 0.6637012630422845, + "grad_norm": 0.3796665668487549, + "learning_rate": 1.506556018656429e-05, + "loss": 0.5302, + "step": 24172 + }, + { + "epoch": 0.6637287204832509, + "grad_norm": 0.356151819229126, + "learning_rate": 1.5065187800297563e-05, + "loss": 0.5683, + "step": 24173 + }, + { + "epoch": 0.6637561779242175, + "grad_norm": 0.41748401522636414, + "learning_rate": 1.506481540458264e-05, + "loss": 0.4465, + "step": 24174 + }, + { + "epoch": 0.6637836353651839, + "grad_norm": 0.3835783302783966, + "learning_rate": 1.5064442999420227e-05, + "loss": 0.4899, + "step": 24175 + }, + { + "epoch": 0.6638110928061505, + "grad_norm": 0.4015693664550781, + "learning_rate": 1.5064070584811003e-05, + "loss": 0.4784, + "step": 24176 + }, + { + "epoch": 0.663838550247117, + "grad_norm": 0.3756457567214966, + "learning_rate": 1.5063698160755675e-05, + "loss": 0.5327, + "step": 24177 + }, + { + "epoch": 0.6638660076880835, + "grad_norm": 0.3549119830131531, + "learning_rate": 1.5063325727254933e-05, + "loss": 0.506, + "step": 24178 + }, + { + "epoch": 0.66389346512905, + "grad_norm": 0.3947664201259613, + "learning_rate": 1.5062953284309474e-05, + "loss": 0.5255, + "step": 24179 + }, + { + "epoch": 0.6639209225700164, + "grad_norm": 0.35491350293159485, + "learning_rate": 1.5062580831919991e-05, + "loss": 0.431, + "step": 24180 + }, + { + "epoch": 0.663948380010983, + "grad_norm": 0.3664568364620209, + "learning_rate": 1.5062208370087178e-05, + "loss": 0.5761, + "step": 24181 + }, + { + "epoch": 0.6639758374519494, + "grad_norm": 0.5195244550704956, + "learning_rate": 1.5061835898811732e-05, + "loss": 0.4687, + "step": 24182 + }, + { + "epoch": 0.664003294892916, + "grad_norm": 0.3594239056110382, + "learning_rate": 1.5061463418094347e-05, + "loss": 0.4965, + "step": 24183 + }, + { + "epoch": 0.6640307523338825, + "grad_norm": 0.31815096735954285, + "learning_rate": 1.5061090927935715e-05, + "loss": 0.4363, + "step": 24184 + }, + { + "epoch": 0.664058209774849, + "grad_norm": 0.3921823501586914, + "learning_rate": 1.5060718428336534e-05, + "loss": 0.4861, + "step": 24185 + }, + { + "epoch": 0.6640856672158155, + "grad_norm": 0.6466066241264343, + "learning_rate": 1.5060345919297499e-05, + "loss": 0.4584, + "step": 24186 + }, + { + "epoch": 0.664113124656782, + "grad_norm": 0.3789427578449249, + "learning_rate": 1.5059973400819302e-05, + "loss": 0.4593, + "step": 24187 + }, + { + "epoch": 0.6641405820977485, + "grad_norm": 0.4090641438961029, + "learning_rate": 1.505960087290264e-05, + "loss": 0.4895, + "step": 24188 + }, + { + "epoch": 0.664168039538715, + "grad_norm": 0.3521094024181366, + "learning_rate": 1.5059228335548209e-05, + "loss": 0.4475, + "step": 24189 + }, + { + "epoch": 0.6641954969796815, + "grad_norm": 0.370278537273407, + "learning_rate": 1.50588557887567e-05, + "loss": 0.5097, + "step": 24190 + }, + { + "epoch": 0.664222954420648, + "grad_norm": 0.40206268429756165, + "learning_rate": 1.5058483232528813e-05, + "loss": 0.5239, + "step": 24191 + }, + { + "epoch": 0.6642504118616145, + "grad_norm": 0.3939264416694641, + "learning_rate": 1.5058110666865238e-05, + "loss": 0.4175, + "step": 24192 + }, + { + "epoch": 0.664277869302581, + "grad_norm": 0.38880443572998047, + "learning_rate": 1.5057738091766674e-05, + "loss": 0.4847, + "step": 24193 + }, + { + "epoch": 0.6643053267435475, + "grad_norm": 0.3916959762573242, + "learning_rate": 1.5057365507233814e-05, + "loss": 0.5073, + "step": 24194 + }, + { + "epoch": 0.664332784184514, + "grad_norm": 0.3489813804626465, + "learning_rate": 1.5056992913267352e-05, + "loss": 0.4986, + "step": 24195 + }, + { + "epoch": 0.6643602416254805, + "grad_norm": 0.3755294680595398, + "learning_rate": 1.5056620309867987e-05, + "loss": 0.4871, + "step": 24196 + }, + { + "epoch": 0.664387699066447, + "grad_norm": 0.3563813269138336, + "learning_rate": 1.5056247697036406e-05, + "loss": 0.4945, + "step": 24197 + }, + { + "epoch": 0.6644151565074136, + "grad_norm": 0.37481462955474854, + "learning_rate": 1.5055875074773313e-05, + "loss": 0.5441, + "step": 24198 + }, + { + "epoch": 0.66444261394838, + "grad_norm": 0.5117475390434265, + "learning_rate": 1.5055502443079401e-05, + "loss": 0.5517, + "step": 24199 + }, + { + "epoch": 0.6644700713893466, + "grad_norm": 0.36590078473091125, + "learning_rate": 1.505512980195536e-05, + "loss": 0.5025, + "step": 24200 + }, + { + "epoch": 0.664497528830313, + "grad_norm": 0.368334025144577, + "learning_rate": 1.505475715140189e-05, + "loss": 0.4817, + "step": 24201 + }, + { + "epoch": 0.6645249862712795, + "grad_norm": 0.380587637424469, + "learning_rate": 1.5054384491419685e-05, + "loss": 0.4824, + "step": 24202 + }, + { + "epoch": 0.664552443712246, + "grad_norm": 0.36474063992500305, + "learning_rate": 1.5054011822009438e-05, + "loss": 0.5386, + "step": 24203 + }, + { + "epoch": 0.6645799011532125, + "grad_norm": 0.343766450881958, + "learning_rate": 1.5053639143171847e-05, + "loss": 0.4076, + "step": 24204 + }, + { + "epoch": 0.6646073585941791, + "grad_norm": 0.43854156136512756, + "learning_rate": 1.5053266454907607e-05, + "loss": 0.5044, + "step": 24205 + }, + { + "epoch": 0.6646348160351455, + "grad_norm": 0.39683741331100464, + "learning_rate": 1.5052893757217411e-05, + "loss": 0.465, + "step": 24206 + }, + { + "epoch": 0.6646622734761121, + "grad_norm": 0.3657856285572052, + "learning_rate": 1.5052521050101954e-05, + "loss": 0.5207, + "step": 24207 + }, + { + "epoch": 0.6646897309170785, + "grad_norm": 0.4090490937232971, + "learning_rate": 1.5052148333561936e-05, + "loss": 0.4543, + "step": 24208 + }, + { + "epoch": 0.664717188358045, + "grad_norm": 0.4072422683238983, + "learning_rate": 1.5051775607598047e-05, + "loss": 0.5824, + "step": 24209 + }, + { + "epoch": 0.6647446457990115, + "grad_norm": 0.3620085120201111, + "learning_rate": 1.5051402872210985e-05, + "loss": 0.4899, + "step": 24210 + }, + { + "epoch": 0.664772103239978, + "grad_norm": 0.3452054560184479, + "learning_rate": 1.5051030127401442e-05, + "loss": 0.5201, + "step": 24211 + }, + { + "epoch": 0.6647995606809446, + "grad_norm": 0.34123384952545166, + "learning_rate": 1.5050657373170121e-05, + "loss": 0.4407, + "step": 24212 + }, + { + "epoch": 0.664827018121911, + "grad_norm": 0.43103909492492676, + "learning_rate": 1.5050284609517707e-05, + "loss": 0.6199, + "step": 24213 + }, + { + "epoch": 0.6648544755628776, + "grad_norm": 0.3763953447341919, + "learning_rate": 1.5049911836444903e-05, + "loss": 0.5346, + "step": 24214 + }, + { + "epoch": 0.664881933003844, + "grad_norm": 0.44751787185668945, + "learning_rate": 1.5049539053952398e-05, + "loss": 0.4734, + "step": 24215 + }, + { + "epoch": 0.6649093904448106, + "grad_norm": 0.4253011643886566, + "learning_rate": 1.5049166262040894e-05, + "loss": 0.5102, + "step": 24216 + }, + { + "epoch": 0.664936847885777, + "grad_norm": 0.37465086579322815, + "learning_rate": 1.5048793460711086e-05, + "loss": 0.5047, + "step": 24217 + }, + { + "epoch": 0.6649643053267436, + "grad_norm": 0.43068674206733704, + "learning_rate": 1.5048420649963662e-05, + "loss": 0.5131, + "step": 24218 + }, + { + "epoch": 0.6649917627677101, + "grad_norm": 0.39951497316360474, + "learning_rate": 1.5048047829799326e-05, + "loss": 0.4901, + "step": 24219 + }, + { + "epoch": 0.6650192202086765, + "grad_norm": 0.5702389478683472, + "learning_rate": 1.5047675000218766e-05, + "loss": 0.4603, + "step": 24220 + }, + { + "epoch": 0.6650466776496431, + "grad_norm": 0.35309115052223206, + "learning_rate": 1.5047302161222684e-05, + "loss": 0.4338, + "step": 24221 + }, + { + "epoch": 0.6650741350906095, + "grad_norm": 0.38658371567726135, + "learning_rate": 1.504692931281177e-05, + "loss": 0.4785, + "step": 24222 + }, + { + "epoch": 0.6651015925315761, + "grad_norm": 0.3986166715621948, + "learning_rate": 1.5046556454986725e-05, + "loss": 0.4921, + "step": 24223 + }, + { + "epoch": 0.6651290499725425, + "grad_norm": 0.37376898527145386, + "learning_rate": 1.504618358774824e-05, + "loss": 0.6043, + "step": 24224 + }, + { + "epoch": 0.6651565074135091, + "grad_norm": 0.44485223293304443, + "learning_rate": 1.5045810711097009e-05, + "loss": 0.5251, + "step": 24225 + }, + { + "epoch": 0.6651839648544756, + "grad_norm": 0.35929742455482483, + "learning_rate": 1.5045437825033736e-05, + "loss": 0.4902, + "step": 24226 + }, + { + "epoch": 0.6652114222954421, + "grad_norm": 0.4207545220851898, + "learning_rate": 1.504506492955911e-05, + "loss": 0.4933, + "step": 24227 + }, + { + "epoch": 0.6652388797364086, + "grad_norm": 0.39709773659706116, + "learning_rate": 1.5044692024673824e-05, + "loss": 0.4969, + "step": 24228 + }, + { + "epoch": 0.665266337177375, + "grad_norm": 0.4719196557998657, + "learning_rate": 1.504431911037858e-05, + "loss": 0.54, + "step": 24229 + }, + { + "epoch": 0.6652937946183416, + "grad_norm": 0.40913572907447815, + "learning_rate": 1.5043946186674072e-05, + "loss": 0.4606, + "step": 24230 + }, + { + "epoch": 0.665321252059308, + "grad_norm": 0.3766596019268036, + "learning_rate": 1.5043573253560992e-05, + "loss": 0.3952, + "step": 24231 + }, + { + "epoch": 0.6653487095002746, + "grad_norm": 0.42699214816093445, + "learning_rate": 1.5043200311040041e-05, + "loss": 0.5142, + "step": 24232 + }, + { + "epoch": 0.6653761669412411, + "grad_norm": 0.3776165246963501, + "learning_rate": 1.5042827359111908e-05, + "loss": 0.5193, + "step": 24233 + }, + { + "epoch": 0.6654036243822076, + "grad_norm": 0.38419708609580994, + "learning_rate": 1.5042454397777295e-05, + "loss": 0.3944, + "step": 24234 + }, + { + "epoch": 0.6654310818231741, + "grad_norm": 0.3634811043739319, + "learning_rate": 1.5042081427036896e-05, + "loss": 0.4968, + "step": 24235 + }, + { + "epoch": 0.6654585392641406, + "grad_norm": 0.40527984499931335, + "learning_rate": 1.5041708446891404e-05, + "loss": 0.4491, + "step": 24236 + }, + { + "epoch": 0.6654859967051071, + "grad_norm": 0.40782490372657776, + "learning_rate": 1.5041335457341517e-05, + "loss": 0.577, + "step": 24237 + }, + { + "epoch": 0.6655134541460735, + "grad_norm": 0.41175195574760437, + "learning_rate": 1.504096245838793e-05, + "loss": 0.5168, + "step": 24238 + }, + { + "epoch": 0.6655409115870401, + "grad_norm": 0.4158669114112854, + "learning_rate": 1.5040589450031342e-05, + "loss": 0.4742, + "step": 24239 + }, + { + "epoch": 0.6655683690280065, + "grad_norm": 0.3490431308746338, + "learning_rate": 1.5040216432272444e-05, + "loss": 0.4117, + "step": 24240 + }, + { + "epoch": 0.6655958264689731, + "grad_norm": 0.41116976737976074, + "learning_rate": 1.503984340511193e-05, + "loss": 0.5726, + "step": 24241 + }, + { + "epoch": 0.6656232839099396, + "grad_norm": 0.38836508989334106, + "learning_rate": 1.5039470368550507e-05, + "loss": 0.5151, + "step": 24242 + }, + { + "epoch": 0.6656507413509061, + "grad_norm": 0.3445570468902588, + "learning_rate": 1.5039097322588859e-05, + "loss": 0.4741, + "step": 24243 + }, + { + "epoch": 0.6656781987918726, + "grad_norm": 0.45580556988716125, + "learning_rate": 1.5038724267227686e-05, + "loss": 0.5107, + "step": 24244 + }, + { + "epoch": 0.6657056562328391, + "grad_norm": 0.38449615240097046, + "learning_rate": 1.5038351202467686e-05, + "loss": 0.388, + "step": 24245 + }, + { + "epoch": 0.6657331136738056, + "grad_norm": 0.3545491397380829, + "learning_rate": 1.503797812830955e-05, + "loss": 0.4464, + "step": 24246 + }, + { + "epoch": 0.665760571114772, + "grad_norm": 0.43351510167121887, + "learning_rate": 1.5037605044753979e-05, + "loss": 0.5531, + "step": 24247 + }, + { + "epoch": 0.6657880285557386, + "grad_norm": 0.36498621106147766, + "learning_rate": 1.5037231951801665e-05, + "loss": 0.4834, + "step": 24248 + }, + { + "epoch": 0.6658154859967051, + "grad_norm": 0.3675636947154999, + "learning_rate": 1.5036858849453308e-05, + "loss": 0.5325, + "step": 24249 + }, + { + "epoch": 0.6658429434376716, + "grad_norm": 0.45448777079582214, + "learning_rate": 1.5036485737709598e-05, + "loss": 0.5457, + "step": 24250 + }, + { + "epoch": 0.6658704008786381, + "grad_norm": 0.4208862781524658, + "learning_rate": 1.503611261657124e-05, + "loss": 0.4814, + "step": 24251 + }, + { + "epoch": 0.6658978583196046, + "grad_norm": 0.3787975013256073, + "learning_rate": 1.5035739486038922e-05, + "loss": 0.4756, + "step": 24252 + }, + { + "epoch": 0.6659253157605711, + "grad_norm": 0.3726511299610138, + "learning_rate": 1.503536634611334e-05, + "loss": 0.4438, + "step": 24253 + }, + { + "epoch": 0.6659527732015376, + "grad_norm": 0.41570496559143066, + "learning_rate": 1.5034993196795195e-05, + "loss": 0.4989, + "step": 24254 + }, + { + "epoch": 0.6659802306425041, + "grad_norm": 0.34305539727211, + "learning_rate": 1.5034620038085181e-05, + "loss": 0.4679, + "step": 24255 + }, + { + "epoch": 0.6660076880834707, + "grad_norm": 0.37321698665618896, + "learning_rate": 1.5034246869983994e-05, + "loss": 0.4885, + "step": 24256 + }, + { + "epoch": 0.6660351455244371, + "grad_norm": 0.42823684215545654, + "learning_rate": 1.5033873692492331e-05, + "loss": 0.533, + "step": 24257 + }, + { + "epoch": 0.6660626029654036, + "grad_norm": 0.3562394380569458, + "learning_rate": 1.5033500505610886e-05, + "loss": 0.492, + "step": 24258 + }, + { + "epoch": 0.6660900604063701, + "grad_norm": 0.39827442169189453, + "learning_rate": 1.5033127309340355e-05, + "loss": 0.5359, + "step": 24259 + }, + { + "epoch": 0.6661175178473366, + "grad_norm": 0.3797565698623657, + "learning_rate": 1.5032754103681436e-05, + "loss": 0.5106, + "step": 24260 + }, + { + "epoch": 0.6661449752883031, + "grad_norm": 0.34880509972572327, + "learning_rate": 1.503238088863482e-05, + "loss": 0.4892, + "step": 24261 + }, + { + "epoch": 0.6661724327292696, + "grad_norm": 0.32279735803604126, + "learning_rate": 1.5032007664201213e-05, + "loss": 0.4818, + "step": 24262 + }, + { + "epoch": 0.6661998901702362, + "grad_norm": 0.33914434909820557, + "learning_rate": 1.5031634430381306e-05, + "loss": 0.5107, + "step": 24263 + }, + { + "epoch": 0.6662273476112026, + "grad_norm": 0.3743007779121399, + "learning_rate": 1.5031261187175792e-05, + "loss": 0.5172, + "step": 24264 + }, + { + "epoch": 0.6662548050521692, + "grad_norm": 0.3478195369243622, + "learning_rate": 1.503088793458537e-05, + "loss": 0.4637, + "step": 24265 + }, + { + "epoch": 0.6662822624931356, + "grad_norm": 0.3925755023956299, + "learning_rate": 1.5030514672610738e-05, + "loss": 0.537, + "step": 24266 + }, + { + "epoch": 0.6663097199341022, + "grad_norm": 0.3946317136287689, + "learning_rate": 1.503014140125259e-05, + "loss": 0.5593, + "step": 24267 + }, + { + "epoch": 0.6663371773750686, + "grad_norm": 0.3520088195800781, + "learning_rate": 1.5029768120511623e-05, + "loss": 0.4634, + "step": 24268 + }, + { + "epoch": 0.6663646348160351, + "grad_norm": 0.370897501707077, + "learning_rate": 1.5029394830388535e-05, + "loss": 0.5052, + "step": 24269 + }, + { + "epoch": 0.6663920922570017, + "grad_norm": 0.4122379720211029, + "learning_rate": 1.5029021530884016e-05, + "loss": 0.5165, + "step": 24270 + }, + { + "epoch": 0.6664195496979681, + "grad_norm": 0.4931597411632538, + "learning_rate": 1.502864822199877e-05, + "loss": 0.417, + "step": 24271 + }, + { + "epoch": 0.6664470071389347, + "grad_norm": 0.4112676978111267, + "learning_rate": 1.5028274903733486e-05, + "loss": 0.5693, + "step": 24272 + }, + { + "epoch": 0.6664744645799011, + "grad_norm": 0.36541077494621277, + "learning_rate": 1.5027901576088871e-05, + "loss": 0.509, + "step": 24273 + }, + { + "epoch": 0.6665019220208677, + "grad_norm": 0.4211333990097046, + "learning_rate": 1.5027528239065611e-05, + "loss": 0.5073, + "step": 24274 + }, + { + "epoch": 0.6665293794618341, + "grad_norm": 2.5447514057159424, + "learning_rate": 1.5027154892664406e-05, + "loss": 0.5188, + "step": 24275 + }, + { + "epoch": 0.6665568369028007, + "grad_norm": 0.34201720356941223, + "learning_rate": 1.5026781536885955e-05, + "loss": 0.4568, + "step": 24276 + }, + { + "epoch": 0.6665842943437672, + "grad_norm": 0.5979773998260498, + "learning_rate": 1.502640817173095e-05, + "loss": 0.4701, + "step": 24277 + }, + { + "epoch": 0.6666117517847336, + "grad_norm": 0.3147057890892029, + "learning_rate": 1.502603479720009e-05, + "loss": 0.4287, + "step": 24278 + }, + { + "epoch": 0.6666392092257002, + "grad_norm": 0.5007888078689575, + "learning_rate": 1.5025661413294073e-05, + "loss": 0.4065, + "step": 24279 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 0.40072354674339294, + "learning_rate": 1.5025288020013588e-05, + "loss": 0.4746, + "step": 24280 + }, + { + "epoch": 0.6666941241076332, + "grad_norm": 0.46675577759742737, + "learning_rate": 1.5024914617359341e-05, + "loss": 0.5428, + "step": 24281 + }, + { + "epoch": 0.6667215815485996, + "grad_norm": 0.4897279441356659, + "learning_rate": 1.5024541205332024e-05, + "loss": 0.6035, + "step": 24282 + }, + { + "epoch": 0.6667490389895662, + "grad_norm": 0.3675212860107422, + "learning_rate": 1.5024167783932336e-05, + "loss": 0.4935, + "step": 24283 + }, + { + "epoch": 0.6667764964305327, + "grad_norm": 0.4407653510570526, + "learning_rate": 1.502379435316097e-05, + "loss": 0.6273, + "step": 24284 + }, + { + "epoch": 0.6668039538714992, + "grad_norm": 0.41590872406959534, + "learning_rate": 1.5023420913018622e-05, + "loss": 0.6079, + "step": 24285 + }, + { + "epoch": 0.6668314113124657, + "grad_norm": 0.3302535116672516, + "learning_rate": 1.5023047463505994e-05, + "loss": 0.479, + "step": 24286 + }, + { + "epoch": 0.6668588687534321, + "grad_norm": 0.41366657614707947, + "learning_rate": 1.5022674004623773e-05, + "loss": 0.5534, + "step": 24287 + }, + { + "epoch": 0.6668863261943987, + "grad_norm": 0.3608993589878082, + "learning_rate": 1.5022300536372669e-05, + "loss": 0.4465, + "step": 24288 + }, + { + "epoch": 0.6669137836353651, + "grad_norm": 0.37615910172462463, + "learning_rate": 1.502192705875337e-05, + "loss": 0.4666, + "step": 24289 + }, + { + "epoch": 0.6669412410763317, + "grad_norm": 0.3864412009716034, + "learning_rate": 1.5021553571766574e-05, + "loss": 0.5269, + "step": 24290 + }, + { + "epoch": 0.6669686985172982, + "grad_norm": 0.49482518434524536, + "learning_rate": 1.5021180075412978e-05, + "loss": 0.5628, + "step": 24291 + }, + { + "epoch": 0.6669961559582647, + "grad_norm": 0.6335674524307251, + "learning_rate": 1.5020806569693278e-05, + "loss": 0.4642, + "step": 24292 + }, + { + "epoch": 0.6670236133992312, + "grad_norm": 0.3880801796913147, + "learning_rate": 1.5020433054608168e-05, + "loss": 0.5422, + "step": 24293 + }, + { + "epoch": 0.6670510708401977, + "grad_norm": 0.36019986867904663, + "learning_rate": 1.5020059530158354e-05, + "loss": 0.4627, + "step": 24294 + }, + { + "epoch": 0.6670785282811642, + "grad_norm": 0.39428842067718506, + "learning_rate": 1.5019685996344522e-05, + "loss": 0.5287, + "step": 24295 + }, + { + "epoch": 0.6671059857221306, + "grad_norm": 0.36318179965019226, + "learning_rate": 1.5019312453167377e-05, + "loss": 0.5142, + "step": 24296 + }, + { + "epoch": 0.6671334431630972, + "grad_norm": 0.4445419907569885, + "learning_rate": 1.501893890062761e-05, + "loss": 0.5263, + "step": 24297 + }, + { + "epoch": 0.6671609006040637, + "grad_norm": 0.4042070508003235, + "learning_rate": 1.501856533872592e-05, + "loss": 0.5395, + "step": 24298 + }, + { + "epoch": 0.6671883580450302, + "grad_norm": 0.4005545675754547, + "learning_rate": 1.5018191767463006e-05, + "loss": 0.4429, + "step": 24299 + }, + { + "epoch": 0.6672158154859967, + "grad_norm": 0.40798091888427734, + "learning_rate": 1.501781818683956e-05, + "loss": 0.4967, + "step": 24300 + }, + { + "epoch": 0.6672432729269632, + "grad_norm": 0.38643401861190796, + "learning_rate": 1.5017444596856283e-05, + "loss": 0.5507, + "step": 24301 + }, + { + "epoch": 0.6672707303679297, + "grad_norm": 0.3932947814464569, + "learning_rate": 1.5017070997513871e-05, + "loss": 0.5038, + "step": 24302 + }, + { + "epoch": 0.6672981878088962, + "grad_norm": 0.3761448860168457, + "learning_rate": 1.501669738881302e-05, + "loss": 0.496, + "step": 24303 + }, + { + "epoch": 0.6673256452498627, + "grad_norm": 0.3224492371082306, + "learning_rate": 1.5016323770754429e-05, + "loss": 0.4321, + "step": 24304 + }, + { + "epoch": 0.6673531026908293, + "grad_norm": 0.38647928833961487, + "learning_rate": 1.5015950143338791e-05, + "loss": 0.413, + "step": 24305 + }, + { + "epoch": 0.6673805601317957, + "grad_norm": 0.37665998935699463, + "learning_rate": 1.5015576506566802e-05, + "loss": 0.5239, + "step": 24306 + }, + { + "epoch": 0.6674080175727622, + "grad_norm": 0.3810124695301056, + "learning_rate": 1.5015202860439168e-05, + "loss": 0.467, + "step": 24307 + }, + { + "epoch": 0.6674354750137287, + "grad_norm": 0.41939398646354675, + "learning_rate": 1.5014829204956576e-05, + "loss": 0.6226, + "step": 24308 + }, + { + "epoch": 0.6674629324546952, + "grad_norm": 0.33680832386016846, + "learning_rate": 1.5014455540119727e-05, + "loss": 0.4647, + "step": 24309 + }, + { + "epoch": 0.6674903898956617, + "grad_norm": 0.378614604473114, + "learning_rate": 1.5014081865929323e-05, + "loss": 0.5379, + "step": 24310 + }, + { + "epoch": 0.6675178473366282, + "grad_norm": 0.40570586919784546, + "learning_rate": 1.5013708182386052e-05, + "loss": 0.5612, + "step": 24311 + }, + { + "epoch": 0.6675453047775948, + "grad_norm": 0.4285777509212494, + "learning_rate": 1.5013334489490613e-05, + "loss": 0.5702, + "step": 24312 + }, + { + "epoch": 0.6675727622185612, + "grad_norm": 0.3776269555091858, + "learning_rate": 1.5012960787243706e-05, + "loss": 0.4954, + "step": 24313 + }, + { + "epoch": 0.6676002196595278, + "grad_norm": 0.4037593603134155, + "learning_rate": 1.501258707564603e-05, + "loss": 0.4616, + "step": 24314 + }, + { + "epoch": 0.6676276771004942, + "grad_norm": 0.3557252585887909, + "learning_rate": 1.5012213354698278e-05, + "loss": 0.4646, + "step": 24315 + }, + { + "epoch": 0.6676551345414607, + "grad_norm": 0.390337735414505, + "learning_rate": 1.5011839624401148e-05, + "loss": 0.472, + "step": 24316 + }, + { + "epoch": 0.6676825919824272, + "grad_norm": 0.41521748900413513, + "learning_rate": 1.5011465884755339e-05, + "loss": 0.4235, + "step": 24317 + }, + { + "epoch": 0.6677100494233937, + "grad_norm": 0.8316264152526855, + "learning_rate": 1.5011092135761542e-05, + "loss": 0.5215, + "step": 24318 + }, + { + "epoch": 0.6677375068643603, + "grad_norm": 0.43116140365600586, + "learning_rate": 1.5010718377420466e-05, + "loss": 0.4586, + "step": 24319 + }, + { + "epoch": 0.6677649643053267, + "grad_norm": 0.3652154207229614, + "learning_rate": 1.5010344609732796e-05, + "loss": 0.4281, + "step": 24320 + }, + { + "epoch": 0.6677924217462933, + "grad_norm": 0.4387313425540924, + "learning_rate": 1.5009970832699234e-05, + "loss": 0.4702, + "step": 24321 + }, + { + "epoch": 0.6678198791872597, + "grad_norm": 0.3595532774925232, + "learning_rate": 1.500959704632048e-05, + "loss": 0.5312, + "step": 24322 + }, + { + "epoch": 0.6678473366282263, + "grad_norm": 0.33936193585395813, + "learning_rate": 1.500922325059723e-05, + "loss": 0.5302, + "step": 24323 + }, + { + "epoch": 0.6678747940691927, + "grad_norm": 0.3752181828022003, + "learning_rate": 1.5008849445530174e-05, + "loss": 0.5571, + "step": 24324 + }, + { + "epoch": 0.6679022515101593, + "grad_norm": 0.34363141655921936, + "learning_rate": 1.500847563112002e-05, + "loss": 0.4118, + "step": 24325 + }, + { + "epoch": 0.6679297089511258, + "grad_norm": 0.3626946210861206, + "learning_rate": 1.5008101807367457e-05, + "loss": 0.4215, + "step": 24326 + }, + { + "epoch": 0.6679571663920922, + "grad_norm": 0.4208669066429138, + "learning_rate": 1.5007727974273189e-05, + "loss": 0.5038, + "step": 24327 + }, + { + "epoch": 0.6679846238330588, + "grad_norm": 0.3616136610507965, + "learning_rate": 1.500735413183791e-05, + "loss": 0.4944, + "step": 24328 + }, + { + "epoch": 0.6680120812740252, + "grad_norm": 0.4310722351074219, + "learning_rate": 1.5006980280062315e-05, + "loss": 0.6103, + "step": 24329 + }, + { + "epoch": 0.6680395387149918, + "grad_norm": 0.3598319888114929, + "learning_rate": 1.5006606418947105e-05, + "loss": 0.4811, + "step": 24330 + }, + { + "epoch": 0.6680669961559582, + "grad_norm": 0.3842642307281494, + "learning_rate": 1.5006232548492974e-05, + "loss": 0.4406, + "step": 24331 + }, + { + "epoch": 0.6680944535969248, + "grad_norm": 0.38358089327812195, + "learning_rate": 1.5005858668700626e-05, + "loss": 0.5383, + "step": 24332 + }, + { + "epoch": 0.6681219110378913, + "grad_norm": 0.33139070868492126, + "learning_rate": 1.500548477957075e-05, + "loss": 0.4286, + "step": 24333 + }, + { + "epoch": 0.6681493684788578, + "grad_norm": 0.43517187237739563, + "learning_rate": 1.5005110881104048e-05, + "loss": 0.5903, + "step": 24334 + }, + { + "epoch": 0.6681768259198243, + "grad_norm": 0.40206030011177063, + "learning_rate": 1.5004736973301217e-05, + "loss": 0.4927, + "step": 24335 + }, + { + "epoch": 0.6682042833607907, + "grad_norm": 0.34805840253829956, + "learning_rate": 1.5004363056162955e-05, + "loss": 0.487, + "step": 24336 + }, + { + "epoch": 0.6682317408017573, + "grad_norm": 0.38826602697372437, + "learning_rate": 1.5003989129689957e-05, + "loss": 0.4662, + "step": 24337 + }, + { + "epoch": 0.6682591982427237, + "grad_norm": 0.41704583168029785, + "learning_rate": 1.5003615193882925e-05, + "loss": 0.5347, + "step": 24338 + }, + { + "epoch": 0.6682866556836903, + "grad_norm": 0.3648903965950012, + "learning_rate": 1.5003241248742551e-05, + "loss": 0.5107, + "step": 24339 + }, + { + "epoch": 0.6683141131246568, + "grad_norm": 0.3982028365135193, + "learning_rate": 1.5002867294269537e-05, + "loss": 0.5505, + "step": 24340 + }, + { + "epoch": 0.6683415705656233, + "grad_norm": 0.40073609352111816, + "learning_rate": 1.500249333046458e-05, + "loss": 0.5213, + "step": 24341 + }, + { + "epoch": 0.6683690280065898, + "grad_norm": 0.40653330087661743, + "learning_rate": 1.5002119357328372e-05, + "loss": 0.5462, + "step": 24342 + }, + { + "epoch": 0.6683964854475563, + "grad_norm": 0.37435877323150635, + "learning_rate": 1.5001745374861618e-05, + "loss": 0.4803, + "step": 24343 + }, + { + "epoch": 0.6684239428885228, + "grad_norm": 0.4322631359100342, + "learning_rate": 1.5001371383065012e-05, + "loss": 0.4894, + "step": 24344 + }, + { + "epoch": 0.6684514003294892, + "grad_norm": 0.36270809173583984, + "learning_rate": 1.5000997381939253e-05, + "loss": 0.5413, + "step": 24345 + }, + { + "epoch": 0.6684788577704558, + "grad_norm": 0.41866815090179443, + "learning_rate": 1.5000623371485037e-05, + "loss": 0.5551, + "step": 24346 + }, + { + "epoch": 0.6685063152114223, + "grad_norm": 0.37811413407325745, + "learning_rate": 1.500024935170306e-05, + "loss": 0.4978, + "step": 24347 + }, + { + "epoch": 0.6685337726523888, + "grad_norm": 0.4079609811306, + "learning_rate": 1.4999875322594026e-05, + "loss": 0.5723, + "step": 24348 + }, + { + "epoch": 0.6685612300933553, + "grad_norm": 0.48508164286613464, + "learning_rate": 1.4999501284158626e-05, + "loss": 0.6176, + "step": 24349 + }, + { + "epoch": 0.6685886875343218, + "grad_norm": 0.4689445495605469, + "learning_rate": 1.4999127236397562e-05, + "loss": 0.4416, + "step": 24350 + }, + { + "epoch": 0.6686161449752883, + "grad_norm": 0.3810408115386963, + "learning_rate": 1.499875317931153e-05, + "loss": 0.4832, + "step": 24351 + }, + { + "epoch": 0.6686436024162548, + "grad_norm": 0.3728795349597931, + "learning_rate": 1.4998379112901226e-05, + "loss": 0.5562, + "step": 24352 + }, + { + "epoch": 0.6686710598572213, + "grad_norm": 0.43987923860549927, + "learning_rate": 1.4998005037167351e-05, + "loss": 0.5487, + "step": 24353 + }, + { + "epoch": 0.6686985172981879, + "grad_norm": 0.3972318768501282, + "learning_rate": 1.4997630952110604e-05, + "loss": 0.5483, + "step": 24354 + }, + { + "epoch": 0.6687259747391543, + "grad_norm": 0.3873419463634491, + "learning_rate": 1.4997256857731677e-05, + "loss": 0.5411, + "step": 24355 + }, + { + "epoch": 0.6687534321801208, + "grad_norm": 0.3939681351184845, + "learning_rate": 1.4996882754031272e-05, + "loss": 0.5142, + "step": 24356 + }, + { + "epoch": 0.6687808896210873, + "grad_norm": 0.5004730224609375, + "learning_rate": 1.4996508641010085e-05, + "loss": 0.5284, + "step": 24357 + }, + { + "epoch": 0.6688083470620538, + "grad_norm": 0.3302323520183563, + "learning_rate": 1.4996134518668814e-05, + "loss": 0.4774, + "step": 24358 + }, + { + "epoch": 0.6688358045030203, + "grad_norm": 0.3861036002635956, + "learning_rate": 1.4995760387008163e-05, + "loss": 0.4676, + "step": 24359 + }, + { + "epoch": 0.6688632619439868, + "grad_norm": 0.3524801731109619, + "learning_rate": 1.4995386246028818e-05, + "loss": 0.5578, + "step": 24360 + }, + { + "epoch": 0.6688907193849534, + "grad_norm": 0.4333026111125946, + "learning_rate": 1.4995012095731487e-05, + "loss": 0.5156, + "step": 24361 + }, + { + "epoch": 0.6689181768259198, + "grad_norm": 0.42605918645858765, + "learning_rate": 1.499463793611686e-05, + "loss": 0.5665, + "step": 24362 + }, + { + "epoch": 0.6689456342668864, + "grad_norm": 0.3261209726333618, + "learning_rate": 1.4994263767185642e-05, + "loss": 0.4203, + "step": 24363 + }, + { + "epoch": 0.6689730917078528, + "grad_norm": 0.3765048086643219, + "learning_rate": 1.499388958893853e-05, + "loss": 0.5693, + "step": 24364 + }, + { + "epoch": 0.6690005491488193, + "grad_norm": 0.41863781213760376, + "learning_rate": 1.4993515401376217e-05, + "loss": 0.5141, + "step": 24365 + }, + { + "epoch": 0.6690280065897858, + "grad_norm": 0.43369126319885254, + "learning_rate": 1.4993141204499406e-05, + "loss": 0.475, + "step": 24366 + }, + { + "epoch": 0.6690554640307523, + "grad_norm": 0.36454156041145325, + "learning_rate": 1.4992766998308792e-05, + "loss": 0.5366, + "step": 24367 + }, + { + "epoch": 0.6690829214717189, + "grad_norm": 0.4030226469039917, + "learning_rate": 1.4992392782805074e-05, + "loss": 0.5922, + "step": 24368 + }, + { + "epoch": 0.6691103789126853, + "grad_norm": 0.4193758964538574, + "learning_rate": 1.499201855798895e-05, + "loss": 0.5467, + "step": 24369 + }, + { + "epoch": 0.6691378363536519, + "grad_norm": 0.4341091811656952, + "learning_rate": 1.4991644323861119e-05, + "loss": 0.5709, + "step": 24370 + }, + { + "epoch": 0.6691652937946183, + "grad_norm": 0.4614722430706024, + "learning_rate": 1.4991270080422278e-05, + "loss": 0.3962, + "step": 24371 + }, + { + "epoch": 0.6691927512355849, + "grad_norm": 0.41258805990219116, + "learning_rate": 1.4990895827673124e-05, + "loss": 0.5031, + "step": 24372 + }, + { + "epoch": 0.6692202086765513, + "grad_norm": 0.39752405881881714, + "learning_rate": 1.4990521565614357e-05, + "loss": 0.4428, + "step": 24373 + }, + { + "epoch": 0.6692476661175178, + "grad_norm": 0.3943370282649994, + "learning_rate": 1.4990147294246675e-05, + "loss": 0.5469, + "step": 24374 + }, + { + "epoch": 0.6692751235584844, + "grad_norm": 0.39323973655700684, + "learning_rate": 1.4989773013570773e-05, + "loss": 0.5471, + "step": 24375 + }, + { + "epoch": 0.6693025809994508, + "grad_norm": 0.4107409417629242, + "learning_rate": 1.4989398723587357e-05, + "loss": 0.4745, + "step": 24376 + }, + { + "epoch": 0.6693300384404174, + "grad_norm": 0.45797106623649597, + "learning_rate": 1.4989024424297118e-05, + "loss": 0.5897, + "step": 24377 + }, + { + "epoch": 0.6693574958813838, + "grad_norm": 0.4222685396671295, + "learning_rate": 1.4988650115700754e-05, + "loss": 0.5418, + "step": 24378 + }, + { + "epoch": 0.6693849533223504, + "grad_norm": 0.4189116060733795, + "learning_rate": 1.4988275797798966e-05, + "loss": 0.503, + "step": 24379 + }, + { + "epoch": 0.6694124107633168, + "grad_norm": 0.36321568489074707, + "learning_rate": 1.4987901470592453e-05, + "loss": 0.485, + "step": 24380 + }, + { + "epoch": 0.6694398682042834, + "grad_norm": 0.44860923290252686, + "learning_rate": 1.498752713408191e-05, + "loss": 0.5239, + "step": 24381 + }, + { + "epoch": 0.6694673256452499, + "grad_norm": 0.40222999453544617, + "learning_rate": 1.4987152788268037e-05, + "loss": 0.427, + "step": 24382 + }, + { + "epoch": 0.6694947830862163, + "grad_norm": 0.4308027923107147, + "learning_rate": 1.4986778433151534e-05, + "loss": 0.5102, + "step": 24383 + }, + { + "epoch": 0.6695222405271829, + "grad_norm": 0.3822615146636963, + "learning_rate": 1.4986404068733099e-05, + "loss": 0.4638, + "step": 24384 + }, + { + "epoch": 0.6695496979681493, + "grad_norm": 0.3790890872478485, + "learning_rate": 1.4986029695013426e-05, + "loss": 0.4502, + "step": 24385 + }, + { + "epoch": 0.6695771554091159, + "grad_norm": 0.3913233280181885, + "learning_rate": 1.4985655311993217e-05, + "loss": 0.5257, + "step": 24386 + }, + { + "epoch": 0.6696046128500823, + "grad_norm": 0.4571707844734192, + "learning_rate": 1.4985280919673169e-05, + "loss": 0.48, + "step": 24387 + }, + { + "epoch": 0.6696320702910489, + "grad_norm": 0.37993302941322327, + "learning_rate": 1.4984906518053981e-05, + "loss": 0.472, + "step": 24388 + }, + { + "epoch": 0.6696595277320154, + "grad_norm": 0.34993407130241394, + "learning_rate": 1.4984532107136354e-05, + "loss": 0.4845, + "step": 24389 + }, + { + "epoch": 0.6696869851729819, + "grad_norm": 0.3929523825645447, + "learning_rate": 1.4984157686920981e-05, + "loss": 0.4319, + "step": 24390 + }, + { + "epoch": 0.6697144426139484, + "grad_norm": 0.3905077278614044, + "learning_rate": 1.4983783257408565e-05, + "loss": 0.5104, + "step": 24391 + }, + { + "epoch": 0.6697419000549149, + "grad_norm": 0.36483293771743774, + "learning_rate": 1.4983408818599801e-05, + "loss": 0.4944, + "step": 24392 + }, + { + "epoch": 0.6697693574958814, + "grad_norm": 0.35506492853164673, + "learning_rate": 1.498303437049539e-05, + "loss": 0.5292, + "step": 24393 + }, + { + "epoch": 0.6697968149368478, + "grad_norm": 0.3880969285964966, + "learning_rate": 1.498265991309603e-05, + "loss": 0.4419, + "step": 24394 + }, + { + "epoch": 0.6698242723778144, + "grad_norm": 0.3575671315193176, + "learning_rate": 1.4982285446402416e-05, + "loss": 0.4682, + "step": 24395 + }, + { + "epoch": 0.6698517298187809, + "grad_norm": 0.3916985094547272, + "learning_rate": 1.498191097041525e-05, + "loss": 0.4762, + "step": 24396 + }, + { + "epoch": 0.6698791872597474, + "grad_norm": 0.31731340289115906, + "learning_rate": 1.4981536485135233e-05, + "loss": 0.4522, + "step": 24397 + }, + { + "epoch": 0.6699066447007139, + "grad_norm": 0.3951798677444458, + "learning_rate": 1.498116199056306e-05, + "loss": 0.495, + "step": 24398 + }, + { + "epoch": 0.6699341021416804, + "grad_norm": 0.3886653482913971, + "learning_rate": 1.498078748669943e-05, + "loss": 0.5202, + "step": 24399 + }, + { + "epoch": 0.6699615595826469, + "grad_norm": 0.3919002115726471, + "learning_rate": 1.498041297354504e-05, + "loss": 0.5527, + "step": 24400 + }, + { + "epoch": 0.6699890170236134, + "grad_norm": 0.37803444266319275, + "learning_rate": 1.4980038451100589e-05, + "loss": 0.4434, + "step": 24401 + }, + { + "epoch": 0.6700164744645799, + "grad_norm": 0.3771652579307556, + "learning_rate": 1.497966391936678e-05, + "loss": 0.4572, + "step": 24402 + }, + { + "epoch": 0.6700439319055465, + "grad_norm": 0.33237597346305847, + "learning_rate": 1.4979289378344309e-05, + "loss": 0.4936, + "step": 24403 + }, + { + "epoch": 0.6700713893465129, + "grad_norm": 0.37814798951148987, + "learning_rate": 1.4978914828033871e-05, + "loss": 0.5242, + "step": 24404 + }, + { + "epoch": 0.6700988467874794, + "grad_norm": 0.4509678781032562, + "learning_rate": 1.4978540268436171e-05, + "loss": 0.511, + "step": 24405 + }, + { + "epoch": 0.6701263042284459, + "grad_norm": 0.39572659134864807, + "learning_rate": 1.4978165699551901e-05, + "loss": 0.427, + "step": 24406 + }, + { + "epoch": 0.6701537616694124, + "grad_norm": 0.32596614956855774, + "learning_rate": 1.4977791121381766e-05, + "loss": 0.4426, + "step": 24407 + }, + { + "epoch": 0.6701812191103789, + "grad_norm": 0.3713546097278595, + "learning_rate": 1.4977416533926461e-05, + "loss": 0.4539, + "step": 24408 + }, + { + "epoch": 0.6702086765513454, + "grad_norm": 0.44320282340049744, + "learning_rate": 1.4977041937186683e-05, + "loss": 0.5377, + "step": 24409 + }, + { + "epoch": 0.670236133992312, + "grad_norm": 0.36684906482696533, + "learning_rate": 1.4976667331163138e-05, + "loss": 0.4875, + "step": 24410 + }, + { + "epoch": 0.6702635914332784, + "grad_norm": 0.3943157494068146, + "learning_rate": 1.4976292715856515e-05, + "loss": 0.5146, + "step": 24411 + }, + { + "epoch": 0.670291048874245, + "grad_norm": 0.8033331632614136, + "learning_rate": 1.497591809126752e-05, + "loss": 0.4514, + "step": 24412 + }, + { + "epoch": 0.6703185063152114, + "grad_norm": 0.3910847008228302, + "learning_rate": 1.497554345739685e-05, + "loss": 0.5179, + "step": 24413 + }, + { + "epoch": 0.6703459637561779, + "grad_norm": 0.33453473448753357, + "learning_rate": 1.4975168814245204e-05, + "loss": 0.4003, + "step": 24414 + }, + { + "epoch": 0.6703734211971444, + "grad_norm": 0.37554192543029785, + "learning_rate": 1.497479416181328e-05, + "loss": 0.5238, + "step": 24415 + }, + { + "epoch": 0.6704008786381109, + "grad_norm": 0.38779062032699585, + "learning_rate": 1.4974419500101778e-05, + "loss": 0.5791, + "step": 24416 + }, + { + "epoch": 0.6704283360790775, + "grad_norm": 0.41506287455558777, + "learning_rate": 1.4974044829111393e-05, + "loss": 0.5785, + "step": 24417 + }, + { + "epoch": 0.6704557935200439, + "grad_norm": 0.3559380769729614, + "learning_rate": 1.497367014884283e-05, + "loss": 0.4849, + "step": 24418 + }, + { + "epoch": 0.6704832509610105, + "grad_norm": 0.4134364426136017, + "learning_rate": 1.4973295459296781e-05, + "loss": 0.5252, + "step": 24419 + }, + { + "epoch": 0.6705107084019769, + "grad_norm": 0.42875584959983826, + "learning_rate": 1.4972920760473954e-05, + "loss": 0.4843, + "step": 24420 + }, + { + "epoch": 0.6705381658429435, + "grad_norm": 0.39449357986450195, + "learning_rate": 1.497254605237504e-05, + "loss": 0.531, + "step": 24421 + }, + { + "epoch": 0.6705656232839099, + "grad_norm": 0.34989890456199646, + "learning_rate": 1.4972171335000737e-05, + "loss": 0.4463, + "step": 24422 + }, + { + "epoch": 0.6705930807248764, + "grad_norm": 0.3672311007976532, + "learning_rate": 1.4971796608351753e-05, + "loss": 0.4691, + "step": 24423 + }, + { + "epoch": 0.670620538165843, + "grad_norm": 0.38913196325302124, + "learning_rate": 1.497142187242878e-05, + "loss": 0.4898, + "step": 24424 + }, + { + "epoch": 0.6706479956068094, + "grad_norm": 0.43850991129875183, + "learning_rate": 1.4971047127232517e-05, + "loss": 0.5865, + "step": 24425 + }, + { + "epoch": 0.670675453047776, + "grad_norm": 0.3611348867416382, + "learning_rate": 1.4970672372763666e-05, + "loss": 0.4693, + "step": 24426 + }, + { + "epoch": 0.6707029104887424, + "grad_norm": 0.40363195538520813, + "learning_rate": 1.4970297609022923e-05, + "loss": 0.4555, + "step": 24427 + }, + { + "epoch": 0.670730367929709, + "grad_norm": 0.36001938581466675, + "learning_rate": 1.4969922836010991e-05, + "loss": 0.4449, + "step": 24428 + }, + { + "epoch": 0.6707578253706754, + "grad_norm": 0.4865620732307434, + "learning_rate": 1.4969548053728565e-05, + "loss": 0.4578, + "step": 24429 + }, + { + "epoch": 0.670785282811642, + "grad_norm": 0.35550743341445923, + "learning_rate": 1.4969173262176347e-05, + "loss": 0.4586, + "step": 24430 + }, + { + "epoch": 0.6708127402526085, + "grad_norm": 0.36922210454940796, + "learning_rate": 1.4968798461355032e-05, + "loss": 0.5186, + "step": 24431 + }, + { + "epoch": 0.670840197693575, + "grad_norm": 0.3907538056373596, + "learning_rate": 1.4968423651265322e-05, + "loss": 0.5083, + "step": 24432 + }, + { + "epoch": 0.6708676551345415, + "grad_norm": 0.38454726338386536, + "learning_rate": 1.4968048831907921e-05, + "loss": 0.5063, + "step": 24433 + }, + { + "epoch": 0.6708951125755079, + "grad_norm": 0.3746526837348938, + "learning_rate": 1.496767400328352e-05, + "loss": 0.5778, + "step": 24434 + }, + { + "epoch": 0.6709225700164745, + "grad_norm": 0.4165246784687042, + "learning_rate": 1.4967299165392821e-05, + "loss": 0.488, + "step": 24435 + }, + { + "epoch": 0.6709500274574409, + "grad_norm": 0.4082239270210266, + "learning_rate": 1.4966924318236524e-05, + "loss": 0.4991, + "step": 24436 + }, + { + "epoch": 0.6709774848984075, + "grad_norm": 0.39807015657424927, + "learning_rate": 1.496654946181533e-05, + "loss": 0.569, + "step": 24437 + }, + { + "epoch": 0.671004942339374, + "grad_norm": 0.3520061671733856, + "learning_rate": 1.4966174596129935e-05, + "loss": 0.5158, + "step": 24438 + }, + { + "epoch": 0.6710323997803405, + "grad_norm": 0.4027259349822998, + "learning_rate": 1.4965799721181038e-05, + "loss": 0.5252, + "step": 24439 + }, + { + "epoch": 0.671059857221307, + "grad_norm": 0.3892516493797302, + "learning_rate": 1.496542483696934e-05, + "loss": 0.4177, + "step": 24440 + }, + { + "epoch": 0.6710873146622734, + "grad_norm": 0.4236079752445221, + "learning_rate": 1.496504994349554e-05, + "loss": 0.4811, + "step": 24441 + }, + { + "epoch": 0.67111477210324, + "grad_norm": 0.4508568346500397, + "learning_rate": 1.496467504076034e-05, + "loss": 0.5621, + "step": 24442 + }, + { + "epoch": 0.6711422295442064, + "grad_norm": 0.40316540002822876, + "learning_rate": 1.496430012876443e-05, + "loss": 0.5559, + "step": 24443 + }, + { + "epoch": 0.671169686985173, + "grad_norm": 0.3428434431552887, + "learning_rate": 1.4963925207508523e-05, + "loss": 0.5307, + "step": 24444 + }, + { + "epoch": 0.6711971444261395, + "grad_norm": 0.45570889115333557, + "learning_rate": 1.4963550276993308e-05, + "loss": 0.545, + "step": 24445 + }, + { + "epoch": 0.671224601867106, + "grad_norm": 0.41465601325035095, + "learning_rate": 1.4963175337219487e-05, + "loss": 0.5224, + "step": 24446 + }, + { + "epoch": 0.6712520593080725, + "grad_norm": 0.37135088443756104, + "learning_rate": 1.4962800388187761e-05, + "loss": 0.5557, + "step": 24447 + }, + { + "epoch": 0.671279516749039, + "grad_norm": 0.3866599500179291, + "learning_rate": 1.4962425429898826e-05, + "loss": 0.4375, + "step": 24448 + }, + { + "epoch": 0.6713069741900055, + "grad_norm": 0.48393958806991577, + "learning_rate": 1.4962050462353388e-05, + "loss": 0.485, + "step": 24449 + }, + { + "epoch": 0.671334431630972, + "grad_norm": 0.4009033143520355, + "learning_rate": 1.4961675485552141e-05, + "loss": 0.4335, + "step": 24450 + }, + { + "epoch": 0.6713618890719385, + "grad_norm": 0.4112264811992645, + "learning_rate": 1.4961300499495783e-05, + "loss": 0.4858, + "step": 24451 + }, + { + "epoch": 0.671389346512905, + "grad_norm": 0.40976840257644653, + "learning_rate": 1.4960925504185018e-05, + "loss": 0.4333, + "step": 24452 + }, + { + "epoch": 0.6714168039538715, + "grad_norm": 0.39261680841445923, + "learning_rate": 1.4960550499620543e-05, + "loss": 0.474, + "step": 24453 + }, + { + "epoch": 0.671444261394838, + "grad_norm": 0.38440343737602234, + "learning_rate": 1.496017548580306e-05, + "loss": 0.5004, + "step": 24454 + }, + { + "epoch": 0.6714717188358045, + "grad_norm": 0.4346105456352234, + "learning_rate": 1.4959800462733264e-05, + "loss": 0.5456, + "step": 24455 + }, + { + "epoch": 0.671499176276771, + "grad_norm": 0.39084070920944214, + "learning_rate": 1.4959425430411858e-05, + "loss": 0.5208, + "step": 24456 + }, + { + "epoch": 0.6715266337177375, + "grad_norm": 0.39274176955223083, + "learning_rate": 1.495905038883954e-05, + "loss": 0.4488, + "step": 24457 + }, + { + "epoch": 0.671554091158704, + "grad_norm": 0.4990578591823578, + "learning_rate": 1.495867533801701e-05, + "loss": 0.4589, + "step": 24458 + }, + { + "epoch": 0.6715815485996706, + "grad_norm": 0.398885577917099, + "learning_rate": 1.495830027794497e-05, + "loss": 0.4637, + "step": 24459 + }, + { + "epoch": 0.671609006040637, + "grad_norm": 0.35534748435020447, + "learning_rate": 1.4957925208624118e-05, + "loss": 0.4969, + "step": 24460 + }, + { + "epoch": 0.6716364634816036, + "grad_norm": 0.34399113059043884, + "learning_rate": 1.495755013005515e-05, + "loss": 0.4393, + "step": 24461 + }, + { + "epoch": 0.67166392092257, + "grad_norm": 0.3440386950969696, + "learning_rate": 1.495717504223877e-05, + "loss": 0.4292, + "step": 24462 + }, + { + "epoch": 0.6716913783635365, + "grad_norm": 0.3939066529273987, + "learning_rate": 1.4956799945175674e-05, + "loss": 0.491, + "step": 24463 + }, + { + "epoch": 0.671718835804503, + "grad_norm": 0.34361159801483154, + "learning_rate": 1.495642483886657e-05, + "loss": 0.4754, + "step": 24464 + }, + { + "epoch": 0.6717462932454695, + "grad_norm": 0.8950289487838745, + "learning_rate": 1.495604972331215e-05, + "loss": 0.4859, + "step": 24465 + }, + { + "epoch": 0.6717737506864361, + "grad_norm": 0.38327756524086, + "learning_rate": 1.4955674598513113e-05, + "loss": 0.4449, + "step": 24466 + }, + { + "epoch": 0.6718012081274025, + "grad_norm": 0.41009992361068726, + "learning_rate": 1.4955299464470164e-05, + "loss": 0.4658, + "step": 24467 + }, + { + "epoch": 0.6718286655683691, + "grad_norm": 0.38031160831451416, + "learning_rate": 1.4954924321183994e-05, + "loss": 0.4434, + "step": 24468 + }, + { + "epoch": 0.6718561230093355, + "grad_norm": 0.4130900204181671, + "learning_rate": 1.4954549168655315e-05, + "loss": 0.5147, + "step": 24469 + }, + { + "epoch": 0.671883580450302, + "grad_norm": 0.38960033655166626, + "learning_rate": 1.4954174006884818e-05, + "loss": 0.4447, + "step": 24470 + }, + { + "epoch": 0.6719110378912685, + "grad_norm": 0.36396655440330505, + "learning_rate": 1.4953798835873205e-05, + "loss": 0.4915, + "step": 24471 + }, + { + "epoch": 0.671938495332235, + "grad_norm": 0.4284327030181885, + "learning_rate": 1.4953423655621178e-05, + "loss": 0.4925, + "step": 24472 + }, + { + "epoch": 0.6719659527732016, + "grad_norm": 0.4440813660621643, + "learning_rate": 1.4953048466129434e-05, + "loss": 0.503, + "step": 24473 + }, + { + "epoch": 0.671993410214168, + "grad_norm": 0.4893396496772766, + "learning_rate": 1.4952673267398674e-05, + "loss": 0.5462, + "step": 24474 + }, + { + "epoch": 0.6720208676551346, + "grad_norm": 0.3778676688671112, + "learning_rate": 1.4952298059429597e-05, + "loss": 0.4204, + "step": 24475 + }, + { + "epoch": 0.672048325096101, + "grad_norm": 0.4116414785385132, + "learning_rate": 1.4951922842222901e-05, + "loss": 0.4551, + "step": 24476 + }, + { + "epoch": 0.6720757825370676, + "grad_norm": 0.3751869201660156, + "learning_rate": 1.4951547615779294e-05, + "loss": 0.4486, + "step": 24477 + }, + { + "epoch": 0.672103239978034, + "grad_norm": 0.40163102746009827, + "learning_rate": 1.4951172380099467e-05, + "loss": 0.5748, + "step": 24478 + }, + { + "epoch": 0.6721306974190006, + "grad_norm": 0.34393271803855896, + "learning_rate": 1.4950797135184123e-05, + "loss": 0.4346, + "step": 24479 + }, + { + "epoch": 0.6721581548599671, + "grad_norm": 0.4224848449230194, + "learning_rate": 1.4950421881033964e-05, + "loss": 0.5778, + "step": 24480 + }, + { + "epoch": 0.6721856123009335, + "grad_norm": 0.4111120104789734, + "learning_rate": 1.4950046617649684e-05, + "loss": 0.4551, + "step": 24481 + }, + { + "epoch": 0.6722130697419001, + "grad_norm": 0.7008498311042786, + "learning_rate": 1.4949671345031992e-05, + "loss": 0.5095, + "step": 24482 + }, + { + "epoch": 0.6722405271828665, + "grad_norm": 0.3964710831642151, + "learning_rate": 1.4949296063181584e-05, + "loss": 0.5372, + "step": 24483 + }, + { + "epoch": 0.6722679846238331, + "grad_norm": 0.3755912482738495, + "learning_rate": 1.4948920772099156e-05, + "loss": 0.5495, + "step": 24484 + }, + { + "epoch": 0.6722954420647995, + "grad_norm": 0.3712335526943207, + "learning_rate": 1.4948545471785411e-05, + "loss": 0.4229, + "step": 24485 + }, + { + "epoch": 0.6723228995057661, + "grad_norm": 0.3628402352333069, + "learning_rate": 1.494817016224105e-05, + "loss": 0.4856, + "step": 24486 + }, + { + "epoch": 0.6723503569467326, + "grad_norm": 0.3742866516113281, + "learning_rate": 1.4947794843466774e-05, + "loss": 0.5166, + "step": 24487 + }, + { + "epoch": 0.6723778143876991, + "grad_norm": 0.37783434987068176, + "learning_rate": 1.494741951546328e-05, + "loss": 0.5001, + "step": 24488 + }, + { + "epoch": 0.6724052718286656, + "grad_norm": 0.42667174339294434, + "learning_rate": 1.494704417823127e-05, + "loss": 0.5584, + "step": 24489 + }, + { + "epoch": 0.672432729269632, + "grad_norm": 0.33719876408576965, + "learning_rate": 1.4946668831771443e-05, + "loss": 0.5239, + "step": 24490 + }, + { + "epoch": 0.6724601867105986, + "grad_norm": 0.37199804186820984, + "learning_rate": 1.4946293476084501e-05, + "loss": 0.4333, + "step": 24491 + }, + { + "epoch": 0.672487644151565, + "grad_norm": 0.3581751883029938, + "learning_rate": 1.4945918111171141e-05, + "loss": 0.5307, + "step": 24492 + }, + { + "epoch": 0.6725151015925316, + "grad_norm": 0.37696823477745056, + "learning_rate": 1.4945542737032066e-05, + "loss": 0.5229, + "step": 24493 + }, + { + "epoch": 0.6725425590334981, + "grad_norm": 0.3762693703174591, + "learning_rate": 1.4945167353667975e-05, + "loss": 0.5006, + "step": 24494 + }, + { + "epoch": 0.6725700164744646, + "grad_norm": 0.4108847975730896, + "learning_rate": 1.494479196107957e-05, + "loss": 0.4629, + "step": 24495 + }, + { + "epoch": 0.6725974739154311, + "grad_norm": 0.4466383159160614, + "learning_rate": 1.494441655926755e-05, + "loss": 0.5142, + "step": 24496 + }, + { + "epoch": 0.6726249313563976, + "grad_norm": 0.39571741223335266, + "learning_rate": 1.4944041148232612e-05, + "loss": 0.5531, + "step": 24497 + }, + { + "epoch": 0.6726523887973641, + "grad_norm": 0.4102534353733063, + "learning_rate": 1.4943665727975463e-05, + "loss": 0.4842, + "step": 24498 + }, + { + "epoch": 0.6726798462383305, + "grad_norm": 0.39369282126426697, + "learning_rate": 1.4943290298496798e-05, + "loss": 0.5472, + "step": 24499 + }, + { + "epoch": 0.6727073036792971, + "grad_norm": 0.3298470973968506, + "learning_rate": 1.494291485979732e-05, + "loss": 0.4777, + "step": 24500 + }, + { + "epoch": 0.6727347611202636, + "grad_norm": 0.5239458084106445, + "learning_rate": 1.4942539411877727e-05, + "loss": 0.5037, + "step": 24501 + }, + { + "epoch": 0.6727622185612301, + "grad_norm": 0.31752511858940125, + "learning_rate": 1.494216395473872e-05, + "loss": 0.4761, + "step": 24502 + }, + { + "epoch": 0.6727896760021966, + "grad_norm": 0.3949616849422455, + "learning_rate": 1.4941788488381002e-05, + "loss": 0.5349, + "step": 24503 + }, + { + "epoch": 0.6728171334431631, + "grad_norm": 0.38686633110046387, + "learning_rate": 1.4941413012805267e-05, + "loss": 0.5575, + "step": 24504 + }, + { + "epoch": 0.6728445908841296, + "grad_norm": 0.3982712924480438, + "learning_rate": 1.4941037528012224e-05, + "loss": 0.5018, + "step": 24505 + }, + { + "epoch": 0.6728720483250961, + "grad_norm": 0.3536010980606079, + "learning_rate": 1.4940662034002569e-05, + "loss": 0.4696, + "step": 24506 + }, + { + "epoch": 0.6728995057660626, + "grad_norm": 0.40320783853530884, + "learning_rate": 1.4940286530776998e-05, + "loss": 0.4809, + "step": 24507 + }, + { + "epoch": 0.672926963207029, + "grad_norm": 0.3977469503879547, + "learning_rate": 1.493991101833622e-05, + "loss": 0.5186, + "step": 24508 + }, + { + "epoch": 0.6729544206479956, + "grad_norm": 0.34845396876335144, + "learning_rate": 1.493953549668093e-05, + "loss": 0.4973, + "step": 24509 + }, + { + "epoch": 0.6729818780889621, + "grad_norm": 0.37814903259277344, + "learning_rate": 1.4939159965811829e-05, + "loss": 0.524, + "step": 24510 + }, + { + "epoch": 0.6730093355299286, + "grad_norm": 0.35990217328071594, + "learning_rate": 1.493878442572962e-05, + "loss": 0.4931, + "step": 24511 + }, + { + "epoch": 0.6730367929708951, + "grad_norm": 0.3558020293712616, + "learning_rate": 1.4938408876435e-05, + "loss": 0.4619, + "step": 24512 + }, + { + "epoch": 0.6730642504118616, + "grad_norm": 0.36216166615486145, + "learning_rate": 1.4938033317928672e-05, + "loss": 0.4477, + "step": 24513 + }, + { + "epoch": 0.6730917078528281, + "grad_norm": 0.3496515452861786, + "learning_rate": 1.4937657750211339e-05, + "loss": 0.4388, + "step": 24514 + }, + { + "epoch": 0.6731191652937946, + "grad_norm": 0.4190749228000641, + "learning_rate": 1.4937282173283692e-05, + "loss": 0.4998, + "step": 24515 + }, + { + "epoch": 0.6731466227347611, + "grad_norm": 0.37252727150917053, + "learning_rate": 1.4936906587146443e-05, + "loss": 0.48, + "step": 24516 + }, + { + "epoch": 0.6731740801757277, + "grad_norm": 0.430812269449234, + "learning_rate": 1.4936530991800284e-05, + "loss": 0.4833, + "step": 24517 + }, + { + "epoch": 0.6732015376166941, + "grad_norm": 0.3485341966152191, + "learning_rate": 1.4936155387245921e-05, + "loss": 0.4742, + "step": 24518 + }, + { + "epoch": 0.6732289950576607, + "grad_norm": 0.3503608703613281, + "learning_rate": 1.4935779773484054e-05, + "loss": 0.5123, + "step": 24519 + }, + { + "epoch": 0.6732564524986271, + "grad_norm": 0.3595551550388336, + "learning_rate": 1.493540415051538e-05, + "loss": 0.4135, + "step": 24520 + }, + { + "epoch": 0.6732839099395936, + "grad_norm": 0.5299739241600037, + "learning_rate": 1.4935028518340604e-05, + "loss": 0.5142, + "step": 24521 + }, + { + "epoch": 0.6733113673805601, + "grad_norm": 0.4050653278827667, + "learning_rate": 1.4934652876960425e-05, + "loss": 0.541, + "step": 24522 + }, + { + "epoch": 0.6733388248215266, + "grad_norm": 0.364177942276001, + "learning_rate": 1.493427722637554e-05, + "loss": 0.5486, + "step": 24523 + }, + { + "epoch": 0.6733662822624932, + "grad_norm": 0.3974637985229492, + "learning_rate": 1.4933901566586658e-05, + "loss": 0.4585, + "step": 24524 + }, + { + "epoch": 0.6733937397034596, + "grad_norm": 0.36399203538894653, + "learning_rate": 1.493352589759447e-05, + "loss": 0.3959, + "step": 24525 + }, + { + "epoch": 0.6734211971444262, + "grad_norm": 0.43902236223220825, + "learning_rate": 1.4933150219399686e-05, + "loss": 0.4614, + "step": 24526 + }, + { + "epoch": 0.6734486545853926, + "grad_norm": 0.4430224299430847, + "learning_rate": 1.4932774532003001e-05, + "loss": 0.5336, + "step": 24527 + }, + { + "epoch": 0.6734761120263592, + "grad_norm": 0.37345507740974426, + "learning_rate": 1.4932398835405115e-05, + "loss": 0.5204, + "step": 24528 + }, + { + "epoch": 0.6735035694673256, + "grad_norm": 0.4007030129432678, + "learning_rate": 1.4932023129606732e-05, + "loss": 0.511, + "step": 24529 + }, + { + "epoch": 0.6735310269082921, + "grad_norm": 0.40268856287002563, + "learning_rate": 1.4931647414608551e-05, + "loss": 0.5501, + "step": 24530 + }, + { + "epoch": 0.6735584843492587, + "grad_norm": 0.46880510449409485, + "learning_rate": 1.4931271690411276e-05, + "loss": 0.519, + "step": 24531 + }, + { + "epoch": 0.6735859417902251, + "grad_norm": 0.47139108180999756, + "learning_rate": 1.4930895957015606e-05, + "loss": 0.5584, + "step": 24532 + }, + { + "epoch": 0.6736133992311917, + "grad_norm": 0.40465277433395386, + "learning_rate": 1.4930520214422237e-05, + "loss": 0.5799, + "step": 24533 + }, + { + "epoch": 0.6736408566721581, + "grad_norm": 0.332698792219162, + "learning_rate": 1.4930144462631877e-05, + "loss": 0.3716, + "step": 24534 + }, + { + "epoch": 0.6736683141131247, + "grad_norm": 0.4443698227405548, + "learning_rate": 1.4929768701645222e-05, + "loss": 0.5167, + "step": 24535 + }, + { + "epoch": 0.6736957715540911, + "grad_norm": 0.3921014666557312, + "learning_rate": 1.4929392931462979e-05, + "loss": 0.4969, + "step": 24536 + }, + { + "epoch": 0.6737232289950577, + "grad_norm": 0.3766387104988098, + "learning_rate": 1.4929017152085843e-05, + "loss": 0.606, + "step": 24537 + }, + { + "epoch": 0.6737506864360242, + "grad_norm": 0.34511616826057434, + "learning_rate": 1.4928641363514513e-05, + "loss": 0.5163, + "step": 24538 + }, + { + "epoch": 0.6737781438769906, + "grad_norm": 0.37016546726226807, + "learning_rate": 1.4928265565749697e-05, + "loss": 0.5239, + "step": 24539 + }, + { + "epoch": 0.6738056013179572, + "grad_norm": 0.4538094699382782, + "learning_rate": 1.4927889758792095e-05, + "loss": 0.612, + "step": 24540 + }, + { + "epoch": 0.6738330587589236, + "grad_norm": 0.404949814081192, + "learning_rate": 1.4927513942642402e-05, + "loss": 0.4993, + "step": 24541 + }, + { + "epoch": 0.6738605161998902, + "grad_norm": 0.37416964769363403, + "learning_rate": 1.4927138117301328e-05, + "loss": 0.451, + "step": 24542 + }, + { + "epoch": 0.6738879736408566, + "grad_norm": 0.4025149345397949, + "learning_rate": 1.4926762282769563e-05, + "loss": 0.5023, + "step": 24543 + }, + { + "epoch": 0.6739154310818232, + "grad_norm": 0.3611920177936554, + "learning_rate": 1.4926386439047817e-05, + "loss": 0.5549, + "step": 24544 + }, + { + "epoch": 0.6739428885227897, + "grad_norm": 0.3956725001335144, + "learning_rate": 1.4926010586136787e-05, + "loss": 0.4103, + "step": 24545 + }, + { + "epoch": 0.6739703459637562, + "grad_norm": 0.39659959077835083, + "learning_rate": 1.4925634724037175e-05, + "loss": 0.5993, + "step": 24546 + }, + { + "epoch": 0.6739978034047227, + "grad_norm": 0.3755542039871216, + "learning_rate": 1.4925258852749682e-05, + "loss": 0.4324, + "step": 24547 + }, + { + "epoch": 0.6740252608456891, + "grad_norm": 0.3986935019493103, + "learning_rate": 1.492488297227501e-05, + "loss": 0.5585, + "step": 24548 + }, + { + "epoch": 0.6740527182866557, + "grad_norm": 0.4030296206474304, + "learning_rate": 1.492450708261386e-05, + "loss": 0.5238, + "step": 24549 + }, + { + "epoch": 0.6740801757276221, + "grad_norm": 0.35807281732559204, + "learning_rate": 1.4924131183766932e-05, + "loss": 0.4775, + "step": 24550 + }, + { + "epoch": 0.6741076331685887, + "grad_norm": 0.3918784558773041, + "learning_rate": 1.4923755275734926e-05, + "loss": 0.4318, + "step": 24551 + }, + { + "epoch": 0.6741350906095552, + "grad_norm": 0.3889288604259491, + "learning_rate": 1.4923379358518548e-05, + "loss": 0.4331, + "step": 24552 + }, + { + "epoch": 0.6741625480505217, + "grad_norm": 0.3840411305427551, + "learning_rate": 1.4923003432118492e-05, + "loss": 0.4858, + "step": 24553 + }, + { + "epoch": 0.6741900054914882, + "grad_norm": 0.38037073612213135, + "learning_rate": 1.4922627496535466e-05, + "loss": 0.5236, + "step": 24554 + }, + { + "epoch": 0.6742174629324547, + "grad_norm": 0.37249642610549927, + "learning_rate": 1.4922251551770171e-05, + "loss": 0.5037, + "step": 24555 + }, + { + "epoch": 0.6742449203734212, + "grad_norm": 0.3978208601474762, + "learning_rate": 1.4921875597823303e-05, + "loss": 0.4535, + "step": 24556 + }, + { + "epoch": 0.6742723778143876, + "grad_norm": 0.5610777735710144, + "learning_rate": 1.4921499634695567e-05, + "loss": 0.5917, + "step": 24557 + }, + { + "epoch": 0.6742998352553542, + "grad_norm": 0.3878350257873535, + "learning_rate": 1.4921123662387662e-05, + "loss": 0.453, + "step": 24558 + }, + { + "epoch": 0.6743272926963207, + "grad_norm": 0.3919682204723358, + "learning_rate": 1.4920747680900292e-05, + "loss": 0.5592, + "step": 24559 + }, + { + "epoch": 0.6743547501372872, + "grad_norm": 0.3920331299304962, + "learning_rate": 1.4920371690234156e-05, + "loss": 0.4849, + "step": 24560 + }, + { + "epoch": 0.6743822075782537, + "grad_norm": 0.3460105359554291, + "learning_rate": 1.4919995690389958e-05, + "loss": 0.5209, + "step": 24561 + }, + { + "epoch": 0.6744096650192202, + "grad_norm": 0.38608741760253906, + "learning_rate": 1.4919619681368396e-05, + "loss": 0.5291, + "step": 24562 + }, + { + "epoch": 0.6744371224601867, + "grad_norm": 0.35018467903137207, + "learning_rate": 1.4919243663170176e-05, + "loss": 0.4278, + "step": 24563 + }, + { + "epoch": 0.6744645799011532, + "grad_norm": 0.32496705651283264, + "learning_rate": 1.4918867635795993e-05, + "loss": 0.4549, + "step": 24564 + }, + { + "epoch": 0.6744920373421197, + "grad_norm": 0.33646532893180847, + "learning_rate": 1.4918491599246554e-05, + "loss": 0.4681, + "step": 24565 + }, + { + "epoch": 0.6745194947830863, + "grad_norm": 0.370693564414978, + "learning_rate": 1.4918115553522556e-05, + "loss": 0.4586, + "step": 24566 + }, + { + "epoch": 0.6745469522240527, + "grad_norm": 0.3917047083377838, + "learning_rate": 1.4917739498624706e-05, + "loss": 0.6157, + "step": 24567 + }, + { + "epoch": 0.6745744096650192, + "grad_norm": 0.34217947721481323, + "learning_rate": 1.4917363434553702e-05, + "loss": 0.4138, + "step": 24568 + }, + { + "epoch": 0.6746018671059857, + "grad_norm": 0.35845229029655457, + "learning_rate": 1.4916987361310243e-05, + "loss": 0.4271, + "step": 24569 + }, + { + "epoch": 0.6746293245469522, + "grad_norm": 0.36230671405792236, + "learning_rate": 1.4916611278895036e-05, + "loss": 0.5158, + "step": 24570 + }, + { + "epoch": 0.6746567819879187, + "grad_norm": 0.38496971130371094, + "learning_rate": 1.491623518730878e-05, + "loss": 0.5548, + "step": 24571 + }, + { + "epoch": 0.6746842394288852, + "grad_norm": 0.4383469521999359, + "learning_rate": 1.4915859086552173e-05, + "loss": 0.5212, + "step": 24572 + }, + { + "epoch": 0.6747116968698518, + "grad_norm": 0.4856886565685272, + "learning_rate": 1.4915482976625924e-05, + "loss": 0.5637, + "step": 24573 + }, + { + "epoch": 0.6747391543108182, + "grad_norm": 0.36719903349876404, + "learning_rate": 1.4915106857530725e-05, + "loss": 0.4087, + "step": 24574 + }, + { + "epoch": 0.6747666117517848, + "grad_norm": 0.3883417546749115, + "learning_rate": 1.4914730729267289e-05, + "loss": 0.4336, + "step": 24575 + }, + { + "epoch": 0.6747940691927512, + "grad_norm": 0.40620318055152893, + "learning_rate": 1.4914354591836308e-05, + "loss": 0.4924, + "step": 24576 + }, + { + "epoch": 0.6748215266337177, + "grad_norm": 0.45186689496040344, + "learning_rate": 1.4913978445238488e-05, + "loss": 0.4614, + "step": 24577 + }, + { + "epoch": 0.6748489840746842, + "grad_norm": 0.4127728044986725, + "learning_rate": 1.4913602289474529e-05, + "loss": 0.4723, + "step": 24578 + }, + { + "epoch": 0.6748764415156507, + "grad_norm": 0.38014960289001465, + "learning_rate": 1.4913226124545135e-05, + "loss": 0.5423, + "step": 24579 + }, + { + "epoch": 0.6749038989566173, + "grad_norm": 0.38368716835975647, + "learning_rate": 1.4912849950451004e-05, + "loss": 0.524, + "step": 24580 + }, + { + "epoch": 0.6749313563975837, + "grad_norm": 0.34034568071365356, + "learning_rate": 1.4912473767192842e-05, + "loss": 0.475, + "step": 24581 + }, + { + "epoch": 0.6749588138385503, + "grad_norm": 0.3314669132232666, + "learning_rate": 1.491209757477135e-05, + "loss": 0.4378, + "step": 24582 + }, + { + "epoch": 0.6749862712795167, + "grad_norm": 0.4210059344768524, + "learning_rate": 1.4911721373187225e-05, + "loss": 0.5095, + "step": 24583 + }, + { + "epoch": 0.6750137287204833, + "grad_norm": 0.3827277421951294, + "learning_rate": 1.491134516244117e-05, + "loss": 0.4634, + "step": 24584 + }, + { + "epoch": 0.6750411861614497, + "grad_norm": 0.36526167392730713, + "learning_rate": 1.4910968942533894e-05, + "loss": 0.5349, + "step": 24585 + }, + { + "epoch": 0.6750686436024163, + "grad_norm": 0.32987093925476074, + "learning_rate": 1.4910592713466093e-05, + "loss": 0.4919, + "step": 24586 + }, + { + "epoch": 0.6750961010433828, + "grad_norm": 0.37047910690307617, + "learning_rate": 1.4910216475238466e-05, + "loss": 0.4371, + "step": 24587 + }, + { + "epoch": 0.6751235584843492, + "grad_norm": 0.4140540361404419, + "learning_rate": 1.4909840227851722e-05, + "loss": 0.5127, + "step": 24588 + }, + { + "epoch": 0.6751510159253158, + "grad_norm": 0.46208202838897705, + "learning_rate": 1.4909463971306557e-05, + "loss": 0.5287, + "step": 24589 + }, + { + "epoch": 0.6751784733662822, + "grad_norm": 0.3833446800708771, + "learning_rate": 1.4909087705603673e-05, + "loss": 0.4806, + "step": 24590 + }, + { + "epoch": 0.6752059308072488, + "grad_norm": 0.3414084017276764, + "learning_rate": 1.4908711430743776e-05, + "loss": 0.524, + "step": 24591 + }, + { + "epoch": 0.6752333882482152, + "grad_norm": 0.3484494686126709, + "learning_rate": 1.4908335146727563e-05, + "loss": 0.4932, + "step": 24592 + }, + { + "epoch": 0.6752608456891818, + "grad_norm": 0.3920177221298218, + "learning_rate": 1.4907958853555742e-05, + "loss": 0.4606, + "step": 24593 + }, + { + "epoch": 0.6752883031301483, + "grad_norm": 0.47357261180877686, + "learning_rate": 1.490758255122901e-05, + "loss": 0.4883, + "step": 24594 + }, + { + "epoch": 0.6753157605711148, + "grad_norm": 0.3538281321525574, + "learning_rate": 1.490720623974807e-05, + "loss": 0.45, + "step": 24595 + }, + { + "epoch": 0.6753432180120813, + "grad_norm": 0.41389501094818115, + "learning_rate": 1.4906829919113623e-05, + "loss": 0.5301, + "step": 24596 + }, + { + "epoch": 0.6753706754530477, + "grad_norm": 0.40059810876846313, + "learning_rate": 1.490645358932637e-05, + "loss": 0.608, + "step": 24597 + }, + { + "epoch": 0.6753981328940143, + "grad_norm": 0.41472533345222473, + "learning_rate": 1.4906077250387022e-05, + "loss": 0.5225, + "step": 24598 + }, + { + "epoch": 0.6754255903349807, + "grad_norm": 0.3730088770389557, + "learning_rate": 1.4905700902296269e-05, + "loss": 0.5635, + "step": 24599 + }, + { + "epoch": 0.6754530477759473, + "grad_norm": 0.3968581259250641, + "learning_rate": 1.4905324545054817e-05, + "loss": 0.4474, + "step": 24600 + }, + { + "epoch": 0.6754805052169138, + "grad_norm": 0.44245412945747375, + "learning_rate": 1.4904948178663374e-05, + "loss": 0.5852, + "step": 24601 + }, + { + "epoch": 0.6755079626578803, + "grad_norm": 0.3946479856967926, + "learning_rate": 1.4904571803122634e-05, + "loss": 0.4995, + "step": 24602 + }, + { + "epoch": 0.6755354200988468, + "grad_norm": 0.40508314967155457, + "learning_rate": 1.4904195418433302e-05, + "loss": 0.5048, + "step": 24603 + }, + { + "epoch": 0.6755628775398133, + "grad_norm": 0.4574253559112549, + "learning_rate": 1.4903819024596082e-05, + "loss": 0.5316, + "step": 24604 + }, + { + "epoch": 0.6755903349807798, + "grad_norm": 0.4047321081161499, + "learning_rate": 1.4903442621611673e-05, + "loss": 0.5592, + "step": 24605 + }, + { + "epoch": 0.6756177924217462, + "grad_norm": 0.3417544364929199, + "learning_rate": 1.4903066209480779e-05, + "loss": 0.4515, + "step": 24606 + }, + { + "epoch": 0.6756452498627128, + "grad_norm": 0.3838084638118744, + "learning_rate": 1.49026897882041e-05, + "loss": 0.557, + "step": 24607 + }, + { + "epoch": 0.6756727073036793, + "grad_norm": 0.4075060784816742, + "learning_rate": 1.4902313357782343e-05, + "loss": 0.5663, + "step": 24608 + }, + { + "epoch": 0.6757001647446458, + "grad_norm": 0.4222567081451416, + "learning_rate": 1.4901936918216206e-05, + "loss": 0.5671, + "step": 24609 + }, + { + "epoch": 0.6757276221856123, + "grad_norm": 0.41118401288986206, + "learning_rate": 1.4901560469506389e-05, + "loss": 0.5282, + "step": 24610 + }, + { + "epoch": 0.6757550796265788, + "grad_norm": 0.3938857316970825, + "learning_rate": 1.49011840116536e-05, + "loss": 0.5166, + "step": 24611 + }, + { + "epoch": 0.6757825370675453, + "grad_norm": 0.3827604055404663, + "learning_rate": 1.4900807544658538e-05, + "loss": 0.5455, + "step": 24612 + }, + { + "epoch": 0.6758099945085118, + "grad_norm": 0.3926313817501068, + "learning_rate": 1.4900431068521908e-05, + "loss": 0.4973, + "step": 24613 + }, + { + "epoch": 0.6758374519494783, + "grad_norm": 0.3724226951599121, + "learning_rate": 1.4900054583244408e-05, + "loss": 0.4499, + "step": 24614 + }, + { + "epoch": 0.6758649093904449, + "grad_norm": 0.423447847366333, + "learning_rate": 1.4899678088826741e-05, + "loss": 0.5387, + "step": 24615 + }, + { + "epoch": 0.6758923668314113, + "grad_norm": 0.3710342049598694, + "learning_rate": 1.4899301585269611e-05, + "loss": 0.462, + "step": 24616 + }, + { + "epoch": 0.6759198242723778, + "grad_norm": 0.361422598361969, + "learning_rate": 1.4898925072573723e-05, + "loss": 0.4674, + "step": 24617 + }, + { + "epoch": 0.6759472817133443, + "grad_norm": 0.3752308487892151, + "learning_rate": 1.4898548550739773e-05, + "loss": 0.4755, + "step": 24618 + }, + { + "epoch": 0.6759747391543108, + "grad_norm": 0.37381258606910706, + "learning_rate": 1.4898172019768469e-05, + "loss": 0.4472, + "step": 24619 + }, + { + "epoch": 0.6760021965952773, + "grad_norm": 0.339123010635376, + "learning_rate": 1.4897795479660509e-05, + "loss": 0.4967, + "step": 24620 + }, + { + "epoch": 0.6760296540362438, + "grad_norm": 0.35449087619781494, + "learning_rate": 1.4897418930416598e-05, + "loss": 0.5171, + "step": 24621 + }, + { + "epoch": 0.6760571114772104, + "grad_norm": 0.3871387243270874, + "learning_rate": 1.4897042372037436e-05, + "loss": 0.4505, + "step": 24622 + }, + { + "epoch": 0.6760845689181768, + "grad_norm": 0.38298478722572327, + "learning_rate": 1.4896665804523728e-05, + "loss": 0.4688, + "step": 24623 + }, + { + "epoch": 0.6761120263591434, + "grad_norm": 0.39064833521842957, + "learning_rate": 1.4896289227876174e-05, + "loss": 0.4934, + "step": 24624 + }, + { + "epoch": 0.6761394838001098, + "grad_norm": 0.38540512323379517, + "learning_rate": 1.489591264209548e-05, + "loss": 0.5237, + "step": 24625 + }, + { + "epoch": 0.6761669412410763, + "grad_norm": 0.3764270842075348, + "learning_rate": 1.489553604718235e-05, + "loss": 0.4997, + "step": 24626 + }, + { + "epoch": 0.6761943986820428, + "grad_norm": 1.1259453296661377, + "learning_rate": 1.4895159443137477e-05, + "loss": 0.5908, + "step": 24627 + }, + { + "epoch": 0.6762218561230093, + "grad_norm": 0.41269755363464355, + "learning_rate": 1.489478282996157e-05, + "loss": 0.5677, + "step": 24628 + }, + { + "epoch": 0.6762493135639759, + "grad_norm": 0.378048837184906, + "learning_rate": 1.489440620765533e-05, + "loss": 0.5304, + "step": 24629 + }, + { + "epoch": 0.6762767710049423, + "grad_norm": 0.42348745465278625, + "learning_rate": 1.4894029576219464e-05, + "loss": 0.5175, + "step": 24630 + }, + { + "epoch": 0.6763042284459089, + "grad_norm": 0.33571645617485046, + "learning_rate": 1.489365293565467e-05, + "loss": 0.4423, + "step": 24631 + }, + { + "epoch": 0.6763316858868753, + "grad_norm": 0.41025876998901367, + "learning_rate": 1.4893276285961648e-05, + "loss": 0.4845, + "step": 24632 + }, + { + "epoch": 0.6763591433278419, + "grad_norm": 0.38076502084732056, + "learning_rate": 1.4892899627141109e-05, + "loss": 0.5086, + "step": 24633 + }, + { + "epoch": 0.6763866007688083, + "grad_norm": 0.46431028842926025, + "learning_rate": 1.4892522959193745e-05, + "loss": 0.5989, + "step": 24634 + }, + { + "epoch": 0.6764140582097748, + "grad_norm": 0.4151221513748169, + "learning_rate": 1.4892146282120268e-05, + "loss": 0.5386, + "step": 24635 + }, + { + "epoch": 0.6764415156507414, + "grad_norm": 0.36836618185043335, + "learning_rate": 1.4891769595921377e-05, + "loss": 0.4415, + "step": 24636 + }, + { + "epoch": 0.6764689730917078, + "grad_norm": 0.40889889001846313, + "learning_rate": 1.4891392900597772e-05, + "loss": 0.4884, + "step": 24637 + }, + { + "epoch": 0.6764964305326744, + "grad_norm": 0.37919095158576965, + "learning_rate": 1.4891016196150161e-05, + "loss": 0.5247, + "step": 24638 + }, + { + "epoch": 0.6765238879736408, + "grad_norm": 0.3906339704990387, + "learning_rate": 1.4890639482579242e-05, + "loss": 0.5675, + "step": 24639 + }, + { + "epoch": 0.6765513454146074, + "grad_norm": 0.3761000633239746, + "learning_rate": 1.4890262759885718e-05, + "loss": 0.406, + "step": 24640 + }, + { + "epoch": 0.6765788028555738, + "grad_norm": 0.4928313195705414, + "learning_rate": 1.4889886028070295e-05, + "loss": 0.5413, + "step": 24641 + }, + { + "epoch": 0.6766062602965404, + "grad_norm": 0.39812812209129333, + "learning_rate": 1.4889509287133674e-05, + "loss": 0.5857, + "step": 24642 + }, + { + "epoch": 0.6766337177375069, + "grad_norm": 0.41760751605033875, + "learning_rate": 1.4889132537076558e-05, + "loss": 0.5174, + "step": 24643 + }, + { + "epoch": 0.6766611751784734, + "grad_norm": 0.3205573856830597, + "learning_rate": 1.4888755777899647e-05, + "loss": 0.4519, + "step": 24644 + }, + { + "epoch": 0.6766886326194399, + "grad_norm": 0.40193575620651245, + "learning_rate": 1.4888379009603648e-05, + "loss": 0.5573, + "step": 24645 + }, + { + "epoch": 0.6767160900604063, + "grad_norm": 0.40479370951652527, + "learning_rate": 1.488800223218926e-05, + "loss": 0.5312, + "step": 24646 + }, + { + "epoch": 0.6767435475013729, + "grad_norm": 0.40116778016090393, + "learning_rate": 1.4887625445657193e-05, + "loss": 0.5737, + "step": 24647 + }, + { + "epoch": 0.6767710049423393, + "grad_norm": 0.48819276690483093, + "learning_rate": 1.4887248650008142e-05, + "loss": 0.5088, + "step": 24648 + }, + { + "epoch": 0.6767984623833059, + "grad_norm": 0.37710481882095337, + "learning_rate": 1.488687184524281e-05, + "loss": 0.5945, + "step": 24649 + }, + { + "epoch": 0.6768259198242724, + "grad_norm": 0.627784013748169, + "learning_rate": 1.4886495031361903e-05, + "loss": 0.5013, + "step": 24650 + }, + { + "epoch": 0.6768533772652389, + "grad_norm": 0.4297949969768524, + "learning_rate": 1.4886118208366125e-05, + "loss": 0.567, + "step": 24651 + }, + { + "epoch": 0.6768808347062054, + "grad_norm": 0.4072975516319275, + "learning_rate": 1.4885741376256175e-05, + "loss": 0.5192, + "step": 24652 + }, + { + "epoch": 0.6769082921471719, + "grad_norm": 0.3537621796131134, + "learning_rate": 1.488536453503276e-05, + "loss": 0.4993, + "step": 24653 + }, + { + "epoch": 0.6769357495881384, + "grad_norm": 0.3731457591056824, + "learning_rate": 1.4884987684696579e-05, + "loss": 0.4527, + "step": 24654 + }, + { + "epoch": 0.6769632070291048, + "grad_norm": 0.4117981791496277, + "learning_rate": 1.4884610825248338e-05, + "loss": 0.5099, + "step": 24655 + }, + { + "epoch": 0.6769906644700714, + "grad_norm": 0.4026380479335785, + "learning_rate": 1.488423395668874e-05, + "loss": 0.4926, + "step": 24656 + }, + { + "epoch": 0.6770181219110379, + "grad_norm": 0.3824782073497772, + "learning_rate": 1.4883857079018485e-05, + "loss": 0.4945, + "step": 24657 + }, + { + "epoch": 0.6770455793520044, + "grad_norm": 0.3629313111305237, + "learning_rate": 1.4883480192238277e-05, + "loss": 0.4808, + "step": 24658 + }, + { + "epoch": 0.6770730367929709, + "grad_norm": 0.3595391809940338, + "learning_rate": 1.4883103296348822e-05, + "loss": 0.4426, + "step": 24659 + }, + { + "epoch": 0.6771004942339374, + "grad_norm": 0.6131923794746399, + "learning_rate": 1.4882726391350821e-05, + "loss": 0.5036, + "step": 24660 + }, + { + "epoch": 0.6771279516749039, + "grad_norm": 0.3421216309070587, + "learning_rate": 1.4882349477244975e-05, + "loss": 0.4825, + "step": 24661 + }, + { + "epoch": 0.6771554091158704, + "grad_norm": 0.2953258454799652, + "learning_rate": 1.488197255403199e-05, + "loss": 0.3599, + "step": 24662 + }, + { + "epoch": 0.6771828665568369, + "grad_norm": 0.34559378027915955, + "learning_rate": 1.4881595621712567e-05, + "loss": 0.4903, + "step": 24663 + }, + { + "epoch": 0.6772103239978035, + "grad_norm": 0.3611229658126831, + "learning_rate": 1.4881218680287415e-05, + "loss": 0.4677, + "step": 24664 + }, + { + "epoch": 0.6772377814387699, + "grad_norm": 0.42003607749938965, + "learning_rate": 1.4880841729757225e-05, + "loss": 0.5121, + "step": 24665 + }, + { + "epoch": 0.6772652388797364, + "grad_norm": 0.3831963539123535, + "learning_rate": 1.488046477012271e-05, + "loss": 0.4612, + "step": 24666 + }, + { + "epoch": 0.6772926963207029, + "grad_norm": 0.3545961081981659, + "learning_rate": 1.4880087801384573e-05, + "loss": 0.5236, + "step": 24667 + }, + { + "epoch": 0.6773201537616694, + "grad_norm": 0.5288163423538208, + "learning_rate": 1.4879710823543514e-05, + "loss": 0.4867, + "step": 24668 + }, + { + "epoch": 0.6773476112026359, + "grad_norm": 0.35817283391952515, + "learning_rate": 1.4879333836600236e-05, + "loss": 0.461, + "step": 24669 + }, + { + "epoch": 0.6773750686436024, + "grad_norm": 0.3856397569179535, + "learning_rate": 1.4878956840555441e-05, + "loss": 0.5019, + "step": 24670 + }, + { + "epoch": 0.677402526084569, + "grad_norm": 0.3809085190296173, + "learning_rate": 1.4878579835409837e-05, + "loss": 0.533, + "step": 24671 + }, + { + "epoch": 0.6774299835255354, + "grad_norm": 0.33630844950675964, + "learning_rate": 1.4878202821164122e-05, + "loss": 0.4322, + "step": 24672 + }, + { + "epoch": 0.677457440966502, + "grad_norm": 0.3207458257675171, + "learning_rate": 1.4877825797819005e-05, + "loss": 0.515, + "step": 24673 + }, + { + "epoch": 0.6774848984074684, + "grad_norm": 0.4093596041202545, + "learning_rate": 1.4877448765375184e-05, + "loss": 0.501, + "step": 24674 + }, + { + "epoch": 0.6775123558484349, + "grad_norm": 0.36584851145744324, + "learning_rate": 1.4877071723833365e-05, + "loss": 0.4966, + "step": 24675 + }, + { + "epoch": 0.6775398132894014, + "grad_norm": 0.5227502584457397, + "learning_rate": 1.4876694673194252e-05, + "loss": 0.5177, + "step": 24676 + }, + { + "epoch": 0.6775672707303679, + "grad_norm": 0.3756561875343323, + "learning_rate": 1.4876317613458543e-05, + "loss": 0.5299, + "step": 24677 + }, + { + "epoch": 0.6775947281713345, + "grad_norm": 0.3682920038700104, + "learning_rate": 1.487594054462695e-05, + "loss": 0.5348, + "step": 24678 + }, + { + "epoch": 0.6776221856123009, + "grad_norm": 0.34742066264152527, + "learning_rate": 1.4875563466700169e-05, + "loss": 0.4761, + "step": 24679 + }, + { + "epoch": 0.6776496430532675, + "grad_norm": 0.43169623613357544, + "learning_rate": 1.4875186379678908e-05, + "loss": 0.5757, + "step": 24680 + }, + { + "epoch": 0.6776771004942339, + "grad_norm": 0.37947484850883484, + "learning_rate": 1.4874809283563867e-05, + "loss": 0.4821, + "step": 24681 + }, + { + "epoch": 0.6777045579352005, + "grad_norm": 0.4154565930366516, + "learning_rate": 1.487443217835575e-05, + "loss": 0.5336, + "step": 24682 + }, + { + "epoch": 0.6777320153761669, + "grad_norm": 0.3759317994117737, + "learning_rate": 1.4874055064055261e-05, + "loss": 0.4997, + "step": 24683 + }, + { + "epoch": 0.6777594728171334, + "grad_norm": 0.3883313238620758, + "learning_rate": 1.4873677940663107e-05, + "loss": 0.5253, + "step": 24684 + }, + { + "epoch": 0.6777869302581, + "grad_norm": 0.3828660845756531, + "learning_rate": 1.4873300808179985e-05, + "loss": 0.5104, + "step": 24685 + }, + { + "epoch": 0.6778143876990664, + "grad_norm": 0.4011501669883728, + "learning_rate": 1.4872923666606602e-05, + "loss": 0.5339, + "step": 24686 + }, + { + "epoch": 0.677841845140033, + "grad_norm": 0.3488782048225403, + "learning_rate": 1.4872546515943661e-05, + "loss": 0.4998, + "step": 24687 + }, + { + "epoch": 0.6778693025809994, + "grad_norm": 0.3759072422981262, + "learning_rate": 1.4872169356191867e-05, + "loss": 0.4991, + "step": 24688 + }, + { + "epoch": 0.677896760021966, + "grad_norm": 0.47305694222450256, + "learning_rate": 1.4871792187351921e-05, + "loss": 0.6008, + "step": 24689 + }, + { + "epoch": 0.6779242174629324, + "grad_norm": 0.3556036353111267, + "learning_rate": 1.4871415009424528e-05, + "loss": 0.4323, + "step": 24690 + }, + { + "epoch": 0.677951674903899, + "grad_norm": 0.42531007528305054, + "learning_rate": 1.4871037822410392e-05, + "loss": 0.561, + "step": 24691 + }, + { + "epoch": 0.6779791323448655, + "grad_norm": 0.36813661456108093, + "learning_rate": 1.4870660626310216e-05, + "loss": 0.4902, + "step": 24692 + }, + { + "epoch": 0.678006589785832, + "grad_norm": 0.37318286299705505, + "learning_rate": 1.48702834211247e-05, + "loss": 0.4899, + "step": 24693 + }, + { + "epoch": 0.6780340472267985, + "grad_norm": 0.3967891037464142, + "learning_rate": 1.4869906206854556e-05, + "loss": 0.5505, + "step": 24694 + }, + { + "epoch": 0.6780615046677649, + "grad_norm": 0.4000481367111206, + "learning_rate": 1.486952898350048e-05, + "loss": 0.4045, + "step": 24695 + }, + { + "epoch": 0.6780889621087315, + "grad_norm": 0.3878704905509949, + "learning_rate": 1.4869151751063178e-05, + "loss": 0.6026, + "step": 24696 + }, + { + "epoch": 0.6781164195496979, + "grad_norm": 0.3805018663406372, + "learning_rate": 1.4868774509543355e-05, + "loss": 0.5186, + "step": 24697 + }, + { + "epoch": 0.6781438769906645, + "grad_norm": 0.35620933771133423, + "learning_rate": 1.486839725894171e-05, + "loss": 0.5231, + "step": 24698 + }, + { + "epoch": 0.678171334431631, + "grad_norm": 0.3495666980743408, + "learning_rate": 1.4868019999258957e-05, + "loss": 0.4321, + "step": 24699 + }, + { + "epoch": 0.6781987918725975, + "grad_norm": 0.35241419076919556, + "learning_rate": 1.4867642730495787e-05, + "loss": 0.4532, + "step": 24700 + }, + { + "epoch": 0.678226249313564, + "grad_norm": 0.37175247073173523, + "learning_rate": 1.4867265452652913e-05, + "loss": 0.5061, + "step": 24701 + }, + { + "epoch": 0.6782537067545304, + "grad_norm": 0.3732503056526184, + "learning_rate": 1.4866888165731034e-05, + "loss": 0.456, + "step": 24702 + }, + { + "epoch": 0.678281164195497, + "grad_norm": 0.4279842972755432, + "learning_rate": 1.4866510869730855e-05, + "loss": 0.482, + "step": 24703 + }, + { + "epoch": 0.6783086216364634, + "grad_norm": 0.38725391030311584, + "learning_rate": 1.4866133564653083e-05, + "loss": 0.5526, + "step": 24704 + }, + { + "epoch": 0.67833607907743, + "grad_norm": 0.358897864818573, + "learning_rate": 1.4865756250498416e-05, + "loss": 0.4947, + "step": 24705 + }, + { + "epoch": 0.6783635365183965, + "grad_norm": 0.3751176595687866, + "learning_rate": 1.486537892726756e-05, + "loss": 0.5463, + "step": 24706 + }, + { + "epoch": 0.678390993959363, + "grad_norm": 0.372969388961792, + "learning_rate": 1.4865001594961221e-05, + "loss": 0.5352, + "step": 24707 + }, + { + "epoch": 0.6784184514003295, + "grad_norm": 0.3516174256801605, + "learning_rate": 1.48646242535801e-05, + "loss": 0.5204, + "step": 24708 + }, + { + "epoch": 0.678445908841296, + "grad_norm": 0.4268017113208771, + "learning_rate": 1.4864246903124904e-05, + "loss": 0.5965, + "step": 24709 + }, + { + "epoch": 0.6784733662822625, + "grad_norm": 0.3818952441215515, + "learning_rate": 1.4863869543596337e-05, + "loss": 0.5956, + "step": 24710 + }, + { + "epoch": 0.678500823723229, + "grad_norm": 0.43440333008766174, + "learning_rate": 1.4863492174995095e-05, + "loss": 0.5548, + "step": 24711 + }, + { + "epoch": 0.6785282811641955, + "grad_norm": 0.37717920541763306, + "learning_rate": 1.4863114797321891e-05, + "loss": 0.5345, + "step": 24712 + }, + { + "epoch": 0.678555738605162, + "grad_norm": 0.38694632053375244, + "learning_rate": 1.4862737410577427e-05, + "loss": 0.4599, + "step": 24713 + }, + { + "epoch": 0.6785831960461285, + "grad_norm": 0.3523053824901581, + "learning_rate": 1.4862360014762404e-05, + "loss": 0.4339, + "step": 24714 + }, + { + "epoch": 0.678610653487095, + "grad_norm": 0.3841882050037384, + "learning_rate": 1.4861982609877527e-05, + "loss": 0.5559, + "step": 24715 + }, + { + "epoch": 0.6786381109280615, + "grad_norm": 0.3694884181022644, + "learning_rate": 1.48616051959235e-05, + "loss": 0.4902, + "step": 24716 + }, + { + "epoch": 0.678665568369028, + "grad_norm": 0.38973236083984375, + "learning_rate": 1.4861227772901032e-05, + "loss": 0.5725, + "step": 24717 + }, + { + "epoch": 0.6786930258099945, + "grad_norm": 0.4293643534183502, + "learning_rate": 1.4860850340810822e-05, + "loss": 0.4838, + "step": 24718 + }, + { + "epoch": 0.678720483250961, + "grad_norm": 0.32226577401161194, + "learning_rate": 1.4860472899653571e-05, + "loss": 0.469, + "step": 24719 + }, + { + "epoch": 0.6787479406919276, + "grad_norm": 0.39230573177337646, + "learning_rate": 1.486009544942999e-05, + "loss": 0.4656, + "step": 24720 + }, + { + "epoch": 0.678775398132894, + "grad_norm": 0.3882876932621002, + "learning_rate": 1.4859717990140776e-05, + "loss": 0.5951, + "step": 24721 + }, + { + "epoch": 0.6788028555738606, + "grad_norm": 0.3995455801486969, + "learning_rate": 1.485934052178664e-05, + "loss": 0.5415, + "step": 24722 + }, + { + "epoch": 0.678830313014827, + "grad_norm": 0.4224449396133423, + "learning_rate": 1.4858963044368284e-05, + "loss": 0.506, + "step": 24723 + }, + { + "epoch": 0.6788577704557935, + "grad_norm": 0.45586255192756653, + "learning_rate": 1.4858585557886408e-05, + "loss": 0.5372, + "step": 24724 + }, + { + "epoch": 0.67888522789676, + "grad_norm": 0.41354838013648987, + "learning_rate": 1.4858208062341721e-05, + "loss": 0.5182, + "step": 24725 + }, + { + "epoch": 0.6789126853377265, + "grad_norm": 0.3628770411014557, + "learning_rate": 1.4857830557734927e-05, + "loss": 0.4627, + "step": 24726 + }, + { + "epoch": 0.6789401427786931, + "grad_norm": 0.37641704082489014, + "learning_rate": 1.4857453044066726e-05, + "loss": 0.4879, + "step": 24727 + }, + { + "epoch": 0.6789676002196595, + "grad_norm": 0.3610899746417999, + "learning_rate": 1.4857075521337826e-05, + "loss": 0.4745, + "step": 24728 + }, + { + "epoch": 0.6789950576606261, + "grad_norm": 0.4320964813232422, + "learning_rate": 1.4856697989548927e-05, + "loss": 0.5422, + "step": 24729 + }, + { + "epoch": 0.6790225151015925, + "grad_norm": 0.386313796043396, + "learning_rate": 1.4856320448700739e-05, + "loss": 0.5062, + "step": 24730 + }, + { + "epoch": 0.679049972542559, + "grad_norm": 0.39313608407974243, + "learning_rate": 1.4855942898793965e-05, + "loss": 0.4889, + "step": 24731 + }, + { + "epoch": 0.6790774299835255, + "grad_norm": 0.3889591097831726, + "learning_rate": 1.4855565339829303e-05, + "loss": 0.3975, + "step": 24732 + }, + { + "epoch": 0.679104887424492, + "grad_norm": 0.3713637590408325, + "learning_rate": 1.4855187771807466e-05, + "loss": 0.4693, + "step": 24733 + }, + { + "epoch": 0.6791323448654586, + "grad_norm": 0.386711984872818, + "learning_rate": 1.4854810194729152e-05, + "loss": 0.4752, + "step": 24734 + }, + { + "epoch": 0.679159802306425, + "grad_norm": 0.4015325903892517, + "learning_rate": 1.4854432608595069e-05, + "loss": 0.5516, + "step": 24735 + }, + { + "epoch": 0.6791872597473916, + "grad_norm": 0.3431996703147888, + "learning_rate": 1.4854055013405918e-05, + "loss": 0.4807, + "step": 24736 + }, + { + "epoch": 0.679214717188358, + "grad_norm": 0.3879076838493347, + "learning_rate": 1.4853677409162406e-05, + "loss": 0.5259, + "step": 24737 + }, + { + "epoch": 0.6792421746293246, + "grad_norm": 0.3491518199443817, + "learning_rate": 1.4853299795865236e-05, + "loss": 0.4694, + "step": 24738 + }, + { + "epoch": 0.679269632070291, + "grad_norm": 0.37226054072380066, + "learning_rate": 1.4852922173515114e-05, + "loss": 0.4995, + "step": 24739 + }, + { + "epoch": 0.6792970895112576, + "grad_norm": 0.418663889169693, + "learning_rate": 1.4852544542112744e-05, + "loss": 0.4902, + "step": 24740 + }, + { + "epoch": 0.6793245469522241, + "grad_norm": 0.3524758815765381, + "learning_rate": 1.485216690165883e-05, + "loss": 0.4686, + "step": 24741 + }, + { + "epoch": 0.6793520043931905, + "grad_norm": 0.38901662826538086, + "learning_rate": 1.4851789252154074e-05, + "loss": 0.4399, + "step": 24742 + }, + { + "epoch": 0.6793794618341571, + "grad_norm": 0.43476101756095886, + "learning_rate": 1.4851411593599183e-05, + "loss": 0.5006, + "step": 24743 + }, + { + "epoch": 0.6794069192751235, + "grad_norm": 0.3728569746017456, + "learning_rate": 1.4851033925994862e-05, + "loss": 0.4891, + "step": 24744 + }, + { + "epoch": 0.6794343767160901, + "grad_norm": 0.36587709188461304, + "learning_rate": 1.4850656249341813e-05, + "loss": 0.4751, + "step": 24745 + }, + { + "epoch": 0.6794618341570565, + "grad_norm": 0.41170018911361694, + "learning_rate": 1.4850278563640743e-05, + "loss": 0.5321, + "step": 24746 + }, + { + "epoch": 0.6794892915980231, + "grad_norm": 0.3987009823322296, + "learning_rate": 1.4849900868892354e-05, + "loss": 0.4182, + "step": 24747 + }, + { + "epoch": 0.6795167490389896, + "grad_norm": 0.3998626172542572, + "learning_rate": 1.4849523165097352e-05, + "loss": 0.5608, + "step": 24748 + }, + { + "epoch": 0.6795442064799561, + "grad_norm": 0.46267759799957275, + "learning_rate": 1.4849145452256447e-05, + "loss": 0.5438, + "step": 24749 + }, + { + "epoch": 0.6795716639209226, + "grad_norm": 0.3884490132331848, + "learning_rate": 1.4848767730370331e-05, + "loss": 0.5015, + "step": 24750 + }, + { + "epoch": 0.679599121361889, + "grad_norm": 0.4451427757740021, + "learning_rate": 1.484838999943972e-05, + "loss": 0.5458, + "step": 24751 + }, + { + "epoch": 0.6796265788028556, + "grad_norm": 0.3769061863422394, + "learning_rate": 1.484801225946531e-05, + "loss": 0.5113, + "step": 24752 + }, + { + "epoch": 0.679654036243822, + "grad_norm": 0.41998496651649475, + "learning_rate": 1.4847634510447812e-05, + "loss": 0.5882, + "step": 24753 + }, + { + "epoch": 0.6796814936847886, + "grad_norm": 0.5420153141021729, + "learning_rate": 1.4847256752387931e-05, + "loss": 0.4638, + "step": 24754 + }, + { + "epoch": 0.6797089511257551, + "grad_norm": 0.39680346846580505, + "learning_rate": 1.4846878985286366e-05, + "loss": 0.5539, + "step": 24755 + }, + { + "epoch": 0.6797364085667216, + "grad_norm": 0.38108667731285095, + "learning_rate": 1.4846501209143824e-05, + "loss": 0.4766, + "step": 24756 + }, + { + "epoch": 0.6797638660076881, + "grad_norm": 0.4224685728549957, + "learning_rate": 1.4846123423961015e-05, + "loss": 0.5303, + "step": 24757 + }, + { + "epoch": 0.6797913234486546, + "grad_norm": 0.3879458010196686, + "learning_rate": 1.4845745629738634e-05, + "loss": 0.4765, + "step": 24758 + }, + { + "epoch": 0.6798187808896211, + "grad_norm": 0.3626798987388611, + "learning_rate": 1.4845367826477396e-05, + "loss": 0.5577, + "step": 24759 + }, + { + "epoch": 0.6798462383305875, + "grad_norm": 0.3651992380619049, + "learning_rate": 1.4844990014177994e-05, + "loss": 0.4387, + "step": 24760 + }, + { + "epoch": 0.6798736957715541, + "grad_norm": 0.41993337869644165, + "learning_rate": 1.4844612192841143e-05, + "loss": 0.499, + "step": 24761 + }, + { + "epoch": 0.6799011532125206, + "grad_norm": 0.6609918475151062, + "learning_rate": 1.4844234362467545e-05, + "loss": 0.4649, + "step": 24762 + }, + { + "epoch": 0.6799286106534871, + "grad_norm": 0.4617288410663605, + "learning_rate": 1.4843856523057902e-05, + "loss": 0.479, + "step": 24763 + }, + { + "epoch": 0.6799560680944536, + "grad_norm": 0.414126455783844, + "learning_rate": 1.4843478674612924e-05, + "loss": 0.4752, + "step": 24764 + }, + { + "epoch": 0.6799835255354201, + "grad_norm": 0.37222009897232056, + "learning_rate": 1.4843100817133306e-05, + "loss": 0.4388, + "step": 24765 + }, + { + "epoch": 0.6800109829763866, + "grad_norm": 0.38828524947166443, + "learning_rate": 1.4842722950619764e-05, + "loss": 0.5248, + "step": 24766 + }, + { + "epoch": 0.6800384404173531, + "grad_norm": 0.4090743362903595, + "learning_rate": 1.4842345075072998e-05, + "loss": 0.5264, + "step": 24767 + }, + { + "epoch": 0.6800658978583196, + "grad_norm": 0.40971800684928894, + "learning_rate": 1.4841967190493711e-05, + "loss": 0.549, + "step": 24768 + }, + { + "epoch": 0.6800933552992862, + "grad_norm": 0.3782045543193817, + "learning_rate": 1.484158929688261e-05, + "loss": 0.5415, + "step": 24769 + }, + { + "epoch": 0.6801208127402526, + "grad_norm": 0.40254074335098267, + "learning_rate": 1.48412113942404e-05, + "loss": 0.5044, + "step": 24770 + }, + { + "epoch": 0.6801482701812192, + "grad_norm": 0.3977079391479492, + "learning_rate": 1.4840833482567786e-05, + "loss": 0.4948, + "step": 24771 + }, + { + "epoch": 0.6801757276221856, + "grad_norm": 0.36784598231315613, + "learning_rate": 1.4840455561865475e-05, + "loss": 0.5194, + "step": 24772 + }, + { + "epoch": 0.6802031850631521, + "grad_norm": 0.41661709547042847, + "learning_rate": 1.4840077632134167e-05, + "loss": 0.5307, + "step": 24773 + }, + { + "epoch": 0.6802306425041186, + "grad_norm": 0.35944920778274536, + "learning_rate": 1.4839699693374567e-05, + "loss": 0.4671, + "step": 24774 + }, + { + "epoch": 0.6802580999450851, + "grad_norm": 0.4112045466899872, + "learning_rate": 1.4839321745587387e-05, + "loss": 0.5018, + "step": 24775 + }, + { + "epoch": 0.6802855573860516, + "grad_norm": 0.41675832867622375, + "learning_rate": 1.4838943788773325e-05, + "loss": 0.5168, + "step": 24776 + }, + { + "epoch": 0.6803130148270181, + "grad_norm": 0.42622271180152893, + "learning_rate": 1.483856582293309e-05, + "loss": 0.5926, + "step": 24777 + }, + { + "epoch": 0.6803404722679847, + "grad_norm": 0.4079679548740387, + "learning_rate": 1.4838187848067382e-05, + "loss": 0.5387, + "step": 24778 + }, + { + "epoch": 0.6803679297089511, + "grad_norm": 0.4241170883178711, + "learning_rate": 1.4837809864176913e-05, + "loss": 0.5272, + "step": 24779 + }, + { + "epoch": 0.6803953871499177, + "grad_norm": 0.3668062388896942, + "learning_rate": 1.4837431871262385e-05, + "loss": 0.5025, + "step": 24780 + }, + { + "epoch": 0.6804228445908841, + "grad_norm": 0.3498862385749817, + "learning_rate": 1.4837053869324498e-05, + "loss": 0.4955, + "step": 24781 + }, + { + "epoch": 0.6804503020318506, + "grad_norm": 0.3379315137863159, + "learning_rate": 1.4836675858363964e-05, + "loss": 0.5317, + "step": 24782 + }, + { + "epoch": 0.6804777594728171, + "grad_norm": 0.35673996806144714, + "learning_rate": 1.4836297838381489e-05, + "loss": 0.4594, + "step": 24783 + }, + { + "epoch": 0.6805052169137836, + "grad_norm": 0.39776644110679626, + "learning_rate": 1.483591980937777e-05, + "loss": 0.4776, + "step": 24784 + }, + { + "epoch": 0.6805326743547502, + "grad_norm": 0.3943544924259186, + "learning_rate": 1.483554177135352e-05, + "loss": 0.5366, + "step": 24785 + }, + { + "epoch": 0.6805601317957166, + "grad_norm": 0.3750261962413788, + "learning_rate": 1.483516372430944e-05, + "loss": 0.4671, + "step": 24786 + }, + { + "epoch": 0.6805875892366832, + "grad_norm": 0.40222543478012085, + "learning_rate": 1.4834785668246238e-05, + "loss": 0.4992, + "step": 24787 + }, + { + "epoch": 0.6806150466776496, + "grad_norm": 0.37992438673973083, + "learning_rate": 1.4834407603164617e-05, + "loss": 0.5811, + "step": 24788 + }, + { + "epoch": 0.6806425041186162, + "grad_norm": 0.3561042547225952, + "learning_rate": 1.4834029529065283e-05, + "loss": 0.4847, + "step": 24789 + }, + { + "epoch": 0.6806699615595826, + "grad_norm": 0.4806372821331024, + "learning_rate": 1.4833651445948943e-05, + "loss": 0.5443, + "step": 24790 + }, + { + "epoch": 0.6806974190005491, + "grad_norm": 0.3604779839515686, + "learning_rate": 1.4833273353816296e-05, + "loss": 0.4748, + "step": 24791 + }, + { + "epoch": 0.6807248764415157, + "grad_norm": 0.4138298034667969, + "learning_rate": 1.4832895252668054e-05, + "loss": 0.4999, + "step": 24792 + }, + { + "epoch": 0.6807523338824821, + "grad_norm": 0.36065396666526794, + "learning_rate": 1.4832517142504918e-05, + "loss": 0.4658, + "step": 24793 + }, + { + "epoch": 0.6807797913234487, + "grad_norm": 0.4247809648513794, + "learning_rate": 1.4832139023327599e-05, + "loss": 0.5228, + "step": 24794 + }, + { + "epoch": 0.6808072487644151, + "grad_norm": 0.38223543763160706, + "learning_rate": 1.4831760895136795e-05, + "loss": 0.5051, + "step": 24795 + }, + { + "epoch": 0.6808347062053817, + "grad_norm": 0.39497119188308716, + "learning_rate": 1.4831382757933217e-05, + "loss": 0.5363, + "step": 24796 + }, + { + "epoch": 0.6808621636463481, + "grad_norm": 0.35188817977905273, + "learning_rate": 1.4831004611717567e-05, + "loss": 0.4574, + "step": 24797 + }, + { + "epoch": 0.6808896210873147, + "grad_norm": 0.3510240614414215, + "learning_rate": 1.4830626456490551e-05, + "loss": 0.5603, + "step": 24798 + }, + { + "epoch": 0.6809170785282812, + "grad_norm": 0.3709449768066406, + "learning_rate": 1.4830248292252876e-05, + "loss": 0.518, + "step": 24799 + }, + { + "epoch": 0.6809445359692476, + "grad_norm": 0.3718576431274414, + "learning_rate": 1.4829870119005247e-05, + "loss": 0.4625, + "step": 24800 + }, + { + "epoch": 0.6809719934102142, + "grad_norm": 0.3791601061820984, + "learning_rate": 1.4829491936748368e-05, + "loss": 0.4699, + "step": 24801 + }, + { + "epoch": 0.6809994508511806, + "grad_norm": 0.3657032549381256, + "learning_rate": 1.4829113745482944e-05, + "loss": 0.4952, + "step": 24802 + }, + { + "epoch": 0.6810269082921472, + "grad_norm": 0.3757285475730896, + "learning_rate": 1.482873554520968e-05, + "loss": 0.4656, + "step": 24803 + }, + { + "epoch": 0.6810543657331136, + "grad_norm": 0.3871396780014038, + "learning_rate": 1.4828357335929288e-05, + "loss": 0.4707, + "step": 24804 + }, + { + "epoch": 0.6810818231740802, + "grad_norm": 0.36886411905288696, + "learning_rate": 1.4827979117642467e-05, + "loss": 0.5038, + "step": 24805 + }, + { + "epoch": 0.6811092806150467, + "grad_norm": 0.5465626120567322, + "learning_rate": 1.4827600890349923e-05, + "loss": 0.5637, + "step": 24806 + }, + { + "epoch": 0.6811367380560132, + "grad_norm": 0.4028051793575287, + "learning_rate": 1.4827222654052363e-05, + "loss": 0.5625, + "step": 24807 + }, + { + "epoch": 0.6811641954969797, + "grad_norm": 0.3903573751449585, + "learning_rate": 1.482684440875049e-05, + "loss": 0.4736, + "step": 24808 + }, + { + "epoch": 0.6811916529379461, + "grad_norm": 0.38079503178596497, + "learning_rate": 1.4826466154445014e-05, + "loss": 0.4752, + "step": 24809 + }, + { + "epoch": 0.6812191103789127, + "grad_norm": 0.33930355310440063, + "learning_rate": 1.4826087891136636e-05, + "loss": 0.4692, + "step": 24810 + }, + { + "epoch": 0.6812465678198791, + "grad_norm": 0.42364850640296936, + "learning_rate": 1.4825709618826065e-05, + "loss": 0.5114, + "step": 24811 + }, + { + "epoch": 0.6812740252608457, + "grad_norm": 0.41698595881462097, + "learning_rate": 1.4825331337514007e-05, + "loss": 0.5794, + "step": 24812 + }, + { + "epoch": 0.6813014827018122, + "grad_norm": 0.3929669260978699, + "learning_rate": 1.4824953047201163e-05, + "loss": 0.4573, + "step": 24813 + }, + { + "epoch": 0.6813289401427787, + "grad_norm": 0.3830530345439911, + "learning_rate": 1.4824574747888245e-05, + "loss": 0.4742, + "step": 24814 + }, + { + "epoch": 0.6813563975837452, + "grad_norm": 0.3926469087600708, + "learning_rate": 1.482419643957595e-05, + "loss": 0.6256, + "step": 24815 + }, + { + "epoch": 0.6813838550247117, + "grad_norm": 0.3491367995738983, + "learning_rate": 1.4823818122264994e-05, + "loss": 0.4926, + "step": 24816 + }, + { + "epoch": 0.6814113124656782, + "grad_norm": 0.4047195315361023, + "learning_rate": 1.4823439795956079e-05, + "loss": 0.4939, + "step": 24817 + }, + { + "epoch": 0.6814387699066446, + "grad_norm": 0.4140149652957916, + "learning_rate": 1.4823061460649904e-05, + "loss": 0.5182, + "step": 24818 + }, + { + "epoch": 0.6814662273476112, + "grad_norm": 0.39249858260154724, + "learning_rate": 1.4822683116347183e-05, + "loss": 0.4697, + "step": 24819 + }, + { + "epoch": 0.6814936847885777, + "grad_norm": 0.3508499264717102, + "learning_rate": 1.4822304763048619e-05, + "loss": 0.5878, + "step": 24820 + }, + { + "epoch": 0.6815211422295442, + "grad_norm": 1.8966386318206787, + "learning_rate": 1.4821926400754915e-05, + "loss": 0.5219, + "step": 24821 + }, + { + "epoch": 0.6815485996705107, + "grad_norm": 0.403727650642395, + "learning_rate": 1.4821548029466782e-05, + "loss": 0.4543, + "step": 24822 + }, + { + "epoch": 0.6815760571114772, + "grad_norm": 0.3967725932598114, + "learning_rate": 1.482116964918492e-05, + "loss": 0.4663, + "step": 24823 + }, + { + "epoch": 0.6816035145524437, + "grad_norm": 0.40517550706863403, + "learning_rate": 1.4820791259910041e-05, + "loss": 0.5123, + "step": 24824 + }, + { + "epoch": 0.6816309719934102, + "grad_norm": 0.4405979812145233, + "learning_rate": 1.4820412861642845e-05, + "loss": 0.555, + "step": 24825 + }, + { + "epoch": 0.6816584294343767, + "grad_norm": 0.4646373391151428, + "learning_rate": 1.4820034454384045e-05, + "loss": 0.5118, + "step": 24826 + }, + { + "epoch": 0.6816858868753433, + "grad_norm": 0.39963871240615845, + "learning_rate": 1.4819656038134337e-05, + "loss": 0.5276, + "step": 24827 + }, + { + "epoch": 0.6817133443163097, + "grad_norm": 0.4460827112197876, + "learning_rate": 1.4819277612894436e-05, + "loss": 0.4293, + "step": 24828 + }, + { + "epoch": 0.6817408017572762, + "grad_norm": 0.5707235336303711, + "learning_rate": 1.4818899178665044e-05, + "loss": 0.4611, + "step": 24829 + }, + { + "epoch": 0.6817682591982427, + "grad_norm": 0.37080761790275574, + "learning_rate": 1.4818520735446863e-05, + "loss": 0.5063, + "step": 24830 + }, + { + "epoch": 0.6817957166392092, + "grad_norm": 0.3917675018310547, + "learning_rate": 1.4818142283240609e-05, + "loss": 0.5086, + "step": 24831 + }, + { + "epoch": 0.6818231740801757, + "grad_norm": 0.4136042892932892, + "learning_rate": 1.4817763822046977e-05, + "loss": 0.5181, + "step": 24832 + }, + { + "epoch": 0.6818506315211422, + "grad_norm": 0.37907809019088745, + "learning_rate": 1.481738535186668e-05, + "loss": 0.5442, + "step": 24833 + }, + { + "epoch": 0.6818780889621088, + "grad_norm": 0.4064784049987793, + "learning_rate": 1.4817006872700421e-05, + "loss": 0.4985, + "step": 24834 + }, + { + "epoch": 0.6819055464030752, + "grad_norm": 1.4483965635299683, + "learning_rate": 1.4816628384548906e-05, + "loss": 0.5151, + "step": 24835 + }, + { + "epoch": 0.6819330038440418, + "grad_norm": 0.46663758158683777, + "learning_rate": 1.4816249887412844e-05, + "loss": 0.5539, + "step": 24836 + }, + { + "epoch": 0.6819604612850082, + "grad_norm": 0.44579198956489563, + "learning_rate": 1.4815871381292939e-05, + "loss": 0.4706, + "step": 24837 + }, + { + "epoch": 0.6819879187259748, + "grad_norm": 0.38031381368637085, + "learning_rate": 1.4815492866189894e-05, + "loss": 0.4931, + "step": 24838 + }, + { + "epoch": 0.6820153761669412, + "grad_norm": 0.3751586079597473, + "learning_rate": 1.481511434210442e-05, + "loss": 0.5085, + "step": 24839 + }, + { + "epoch": 0.6820428336079077, + "grad_norm": 0.3458572030067444, + "learning_rate": 1.4814735809037219e-05, + "loss": 0.4341, + "step": 24840 + }, + { + "epoch": 0.6820702910488743, + "grad_norm": 0.44269365072250366, + "learning_rate": 1.4814357266989e-05, + "loss": 0.5144, + "step": 24841 + }, + { + "epoch": 0.6820977484898407, + "grad_norm": 0.32083529233932495, + "learning_rate": 1.481397871596047e-05, + "loss": 0.3412, + "step": 24842 + }, + { + "epoch": 0.6821252059308073, + "grad_norm": 1.5561447143554688, + "learning_rate": 1.481360015595233e-05, + "loss": 0.4517, + "step": 24843 + }, + { + "epoch": 0.6821526633717737, + "grad_norm": 0.3980329930782318, + "learning_rate": 1.4813221586965292e-05, + "loss": 0.5683, + "step": 24844 + }, + { + "epoch": 0.6821801208127403, + "grad_norm": 0.6409361958503723, + "learning_rate": 1.4812843009000058e-05, + "loss": 0.5434, + "step": 24845 + }, + { + "epoch": 0.6822075782537067, + "grad_norm": 0.34522053599357605, + "learning_rate": 1.4812464422057335e-05, + "loss": 0.4681, + "step": 24846 + }, + { + "epoch": 0.6822350356946733, + "grad_norm": 0.37633946537971497, + "learning_rate": 1.4812085826137834e-05, + "loss": 0.5096, + "step": 24847 + }, + { + "epoch": 0.6822624931356398, + "grad_norm": 0.44478827714920044, + "learning_rate": 1.4811707221242253e-05, + "loss": 0.5422, + "step": 24848 + }, + { + "epoch": 0.6822899505766062, + "grad_norm": 0.3703426718711853, + "learning_rate": 1.4811328607371305e-05, + "loss": 0.4298, + "step": 24849 + }, + { + "epoch": 0.6823174080175728, + "grad_norm": 0.3889651298522949, + "learning_rate": 1.4810949984525693e-05, + "loss": 0.4828, + "step": 24850 + }, + { + "epoch": 0.6823448654585392, + "grad_norm": 0.4192187786102295, + "learning_rate": 1.4810571352706121e-05, + "loss": 0.4349, + "step": 24851 + }, + { + "epoch": 0.6823723228995058, + "grad_norm": 0.36454305052757263, + "learning_rate": 1.48101927119133e-05, + "loss": 0.4337, + "step": 24852 + }, + { + "epoch": 0.6823997803404722, + "grad_norm": 0.4030281901359558, + "learning_rate": 1.4809814062147934e-05, + "loss": 0.5325, + "step": 24853 + }, + { + "epoch": 0.6824272377814388, + "grad_norm": 0.36381566524505615, + "learning_rate": 1.480943540341073e-05, + "loss": 0.5145, + "step": 24854 + }, + { + "epoch": 0.6824546952224053, + "grad_norm": 0.42357510328292847, + "learning_rate": 1.4809056735702396e-05, + "loss": 0.503, + "step": 24855 + }, + { + "epoch": 0.6824821526633718, + "grad_norm": 0.3834179639816284, + "learning_rate": 1.4808678059023632e-05, + "loss": 0.5132, + "step": 24856 + }, + { + "epoch": 0.6825096101043383, + "grad_norm": 0.4150472581386566, + "learning_rate": 1.4808299373375153e-05, + "loss": 0.5936, + "step": 24857 + }, + { + "epoch": 0.6825370675453047, + "grad_norm": 0.495029091835022, + "learning_rate": 1.4807920678757657e-05, + "loss": 0.4828, + "step": 24858 + }, + { + "epoch": 0.6825645249862713, + "grad_norm": 0.4056919515132904, + "learning_rate": 1.4807541975171857e-05, + "loss": 0.4918, + "step": 24859 + }, + { + "epoch": 0.6825919824272377, + "grad_norm": 0.3829820454120636, + "learning_rate": 1.4807163262618459e-05, + "loss": 0.5496, + "step": 24860 + }, + { + "epoch": 0.6826194398682043, + "grad_norm": 0.3589872419834137, + "learning_rate": 1.4806784541098162e-05, + "loss": 0.533, + "step": 24861 + }, + { + "epoch": 0.6826468973091708, + "grad_norm": 0.3868025243282318, + "learning_rate": 1.4806405810611682e-05, + "loss": 0.58, + "step": 24862 + }, + { + "epoch": 0.6826743547501373, + "grad_norm": 0.4043683111667633, + "learning_rate": 1.4806027071159716e-05, + "loss": 0.4895, + "step": 24863 + }, + { + "epoch": 0.6827018121911038, + "grad_norm": 0.3368467688560486, + "learning_rate": 1.480564832274298e-05, + "loss": 0.4189, + "step": 24864 + }, + { + "epoch": 0.6827292696320703, + "grad_norm": 0.37265080213546753, + "learning_rate": 1.4805269565362177e-05, + "loss": 0.5494, + "step": 24865 + }, + { + "epoch": 0.6827567270730368, + "grad_norm": 0.4464718699455261, + "learning_rate": 1.4804890799018008e-05, + "loss": 0.5403, + "step": 24866 + }, + { + "epoch": 0.6827841845140032, + "grad_norm": 0.3574119806289673, + "learning_rate": 1.4804512023711188e-05, + "loss": 0.4588, + "step": 24867 + }, + { + "epoch": 0.6828116419549698, + "grad_norm": 0.37792500853538513, + "learning_rate": 1.4804133239442419e-05, + "loss": 0.4836, + "step": 24868 + }, + { + "epoch": 0.6828390993959363, + "grad_norm": 0.3386591374874115, + "learning_rate": 1.4803754446212408e-05, + "loss": 0.4961, + "step": 24869 + }, + { + "epoch": 0.6828665568369028, + "grad_norm": 0.5056691765785217, + "learning_rate": 1.480337564402186e-05, + "loss": 0.6012, + "step": 24870 + }, + { + "epoch": 0.6828940142778693, + "grad_norm": 0.33615219593048096, + "learning_rate": 1.4802996832871484e-05, + "loss": 0.4586, + "step": 24871 + }, + { + "epoch": 0.6829214717188358, + "grad_norm": 0.4249819219112396, + "learning_rate": 1.4802618012761988e-05, + "loss": 0.5109, + "step": 24872 + }, + { + "epoch": 0.6829489291598023, + "grad_norm": 0.36838701367378235, + "learning_rate": 1.4802239183694076e-05, + "loss": 0.4545, + "step": 24873 + }, + { + "epoch": 0.6829763866007688, + "grad_norm": 0.39959385991096497, + "learning_rate": 1.4801860345668452e-05, + "loss": 0.4946, + "step": 24874 + }, + { + "epoch": 0.6830038440417353, + "grad_norm": 0.35120922327041626, + "learning_rate": 1.480148149868583e-05, + "loss": 0.3723, + "step": 24875 + }, + { + "epoch": 0.6830313014827019, + "grad_norm": 0.3476572334766388, + "learning_rate": 1.480110264274691e-05, + "loss": 0.5063, + "step": 24876 + }, + { + "epoch": 0.6830587589236683, + "grad_norm": 0.4200665056705475, + "learning_rate": 1.4800723777852404e-05, + "loss": 0.5149, + "step": 24877 + }, + { + "epoch": 0.6830862163646348, + "grad_norm": 0.3658219873905182, + "learning_rate": 1.4800344904003014e-05, + "loss": 0.5494, + "step": 24878 + }, + { + "epoch": 0.6831136738056013, + "grad_norm": 0.34370124340057373, + "learning_rate": 1.4799966021199448e-05, + "loss": 0.4804, + "step": 24879 + }, + { + "epoch": 0.6831411312465678, + "grad_norm": 0.40713876485824585, + "learning_rate": 1.4799587129442414e-05, + "loss": 0.5802, + "step": 24880 + }, + { + "epoch": 0.6831685886875343, + "grad_norm": 0.7041587829589844, + "learning_rate": 1.479920822873262e-05, + "loss": 0.5646, + "step": 24881 + }, + { + "epoch": 0.6831960461285008, + "grad_norm": 0.364563524723053, + "learning_rate": 1.479882931907077e-05, + "loss": 0.3966, + "step": 24882 + }, + { + "epoch": 0.6832235035694674, + "grad_norm": 0.3685877323150635, + "learning_rate": 1.4798450400457571e-05, + "loss": 0.4529, + "step": 24883 + }, + { + "epoch": 0.6832509610104338, + "grad_norm": 0.36175569891929626, + "learning_rate": 1.479807147289373e-05, + "loss": 0.4592, + "step": 24884 + }, + { + "epoch": 0.6832784184514004, + "grad_norm": 0.3668641149997711, + "learning_rate": 1.4797692536379953e-05, + "loss": 0.4945, + "step": 24885 + }, + { + "epoch": 0.6833058758923668, + "grad_norm": 0.3530903458595276, + "learning_rate": 1.4797313590916952e-05, + "loss": 0.4972, + "step": 24886 + }, + { + "epoch": 0.6833333333333333, + "grad_norm": 0.38182833790779114, + "learning_rate": 1.4796934636505428e-05, + "loss": 0.5429, + "step": 24887 + }, + { + "epoch": 0.6833607907742998, + "grad_norm": 0.3649163842201233, + "learning_rate": 1.4796555673146089e-05, + "loss": 0.4949, + "step": 24888 + }, + { + "epoch": 0.6833882482152663, + "grad_norm": 0.3764351010322571, + "learning_rate": 1.4796176700839641e-05, + "loss": 0.4988, + "step": 24889 + }, + { + "epoch": 0.6834157056562329, + "grad_norm": 0.44681215286254883, + "learning_rate": 1.4795797719586798e-05, + "loss": 0.5556, + "step": 24890 + }, + { + "epoch": 0.6834431630971993, + "grad_norm": 0.40046921372413635, + "learning_rate": 1.479541872938826e-05, + "loss": 0.4293, + "step": 24891 + }, + { + "epoch": 0.6834706205381659, + "grad_norm": 0.3907008171081543, + "learning_rate": 1.4795039730244734e-05, + "loss": 0.4954, + "step": 24892 + }, + { + "epoch": 0.6834980779791323, + "grad_norm": 0.40889349579811096, + "learning_rate": 1.4794660722156931e-05, + "loss": 0.5006, + "step": 24893 + }, + { + "epoch": 0.6835255354200989, + "grad_norm": 0.3955991566181183, + "learning_rate": 1.479428170512555e-05, + "loss": 0.5082, + "step": 24894 + }, + { + "epoch": 0.6835529928610653, + "grad_norm": 0.40025994181632996, + "learning_rate": 1.4793902679151308e-05, + "loss": 0.5652, + "step": 24895 + }, + { + "epoch": 0.6835804503020319, + "grad_norm": 0.40693941712379456, + "learning_rate": 1.4793523644234909e-05, + "loss": 0.4547, + "step": 24896 + }, + { + "epoch": 0.6836079077429984, + "grad_norm": 0.36389395594596863, + "learning_rate": 1.4793144600377054e-05, + "loss": 0.4543, + "step": 24897 + }, + { + "epoch": 0.6836353651839648, + "grad_norm": 0.4055582284927368, + "learning_rate": 1.4792765547578456e-05, + "loss": 0.4703, + "step": 24898 + }, + { + "epoch": 0.6836628226249314, + "grad_norm": 0.3808901011943817, + "learning_rate": 1.4792386485839822e-05, + "loss": 0.4959, + "step": 24899 + }, + { + "epoch": 0.6836902800658978, + "grad_norm": 0.4612281620502472, + "learning_rate": 1.4792007415161855e-05, + "loss": 0.5233, + "step": 24900 + }, + { + "epoch": 0.6837177375068644, + "grad_norm": 0.3687084913253784, + "learning_rate": 1.4791628335545269e-05, + "loss": 0.4798, + "step": 24901 + }, + { + "epoch": 0.6837451949478308, + "grad_norm": 0.39728397130966187, + "learning_rate": 1.4791249246990763e-05, + "loss": 0.5932, + "step": 24902 + }, + { + "epoch": 0.6837726523887974, + "grad_norm": 0.4013398289680481, + "learning_rate": 1.479087014949905e-05, + "loss": 0.5159, + "step": 24903 + }, + { + "epoch": 0.6838001098297639, + "grad_norm": 0.37396004796028137, + "learning_rate": 1.4790491043070833e-05, + "loss": 0.4705, + "step": 24904 + }, + { + "epoch": 0.6838275672707304, + "grad_norm": 0.40817540884017944, + "learning_rate": 1.4790111927706823e-05, + "loss": 0.4784, + "step": 24905 + }, + { + "epoch": 0.6838550247116969, + "grad_norm": 0.3609885275363922, + "learning_rate": 1.4789732803407726e-05, + "loss": 0.4982, + "step": 24906 + }, + { + "epoch": 0.6838824821526633, + "grad_norm": 0.42398396134376526, + "learning_rate": 1.4789353670174247e-05, + "loss": 0.5199, + "step": 24907 + }, + { + "epoch": 0.6839099395936299, + "grad_norm": 0.3661375939846039, + "learning_rate": 1.4788974528007096e-05, + "loss": 0.4282, + "step": 24908 + }, + { + "epoch": 0.6839373970345963, + "grad_norm": 0.35768160223960876, + "learning_rate": 1.478859537690698e-05, + "loss": 0.4536, + "step": 24909 + }, + { + "epoch": 0.6839648544755629, + "grad_norm": 0.3608910143375397, + "learning_rate": 1.4788216216874603e-05, + "loss": 0.507, + "step": 24910 + }, + { + "epoch": 0.6839923119165294, + "grad_norm": 0.43883541226387024, + "learning_rate": 1.4787837047910676e-05, + "loss": 0.4287, + "step": 24911 + }, + { + "epoch": 0.6840197693574959, + "grad_norm": 0.4053664803504944, + "learning_rate": 1.4787457870015901e-05, + "loss": 0.463, + "step": 24912 + }, + { + "epoch": 0.6840472267984624, + "grad_norm": 0.3748384118080139, + "learning_rate": 1.4787078683190994e-05, + "loss": 0.6243, + "step": 24913 + }, + { + "epoch": 0.6840746842394289, + "grad_norm": 0.42392557859420776, + "learning_rate": 1.4786699487436655e-05, + "loss": 0.5504, + "step": 24914 + }, + { + "epoch": 0.6841021416803954, + "grad_norm": 0.3693647086620331, + "learning_rate": 1.4786320282753595e-05, + "loss": 0.5149, + "step": 24915 + }, + { + "epoch": 0.6841295991213618, + "grad_norm": 0.3919825851917267, + "learning_rate": 1.478594106914252e-05, + "loss": 0.5066, + "step": 24916 + }, + { + "epoch": 0.6841570565623284, + "grad_norm": 0.42959752678871155, + "learning_rate": 1.4785561846604138e-05, + "loss": 0.4099, + "step": 24917 + }, + { + "epoch": 0.6841845140032949, + "grad_norm": 0.3968738615512848, + "learning_rate": 1.4785182615139153e-05, + "loss": 0.522, + "step": 24918 + }, + { + "epoch": 0.6842119714442614, + "grad_norm": 0.3526715040206909, + "learning_rate": 1.4784803374748278e-05, + "loss": 0.4596, + "step": 24919 + }, + { + "epoch": 0.6842394288852279, + "grad_norm": 0.413411021232605, + "learning_rate": 1.4784424125432216e-05, + "loss": 0.5313, + "step": 24920 + }, + { + "epoch": 0.6842668863261944, + "grad_norm": 0.42681244015693665, + "learning_rate": 1.4784044867191676e-05, + "loss": 0.5624, + "step": 24921 + }, + { + "epoch": 0.6842943437671609, + "grad_norm": 0.4470168650150299, + "learning_rate": 1.4783665600027365e-05, + "loss": 0.4721, + "step": 24922 + }, + { + "epoch": 0.6843218012081274, + "grad_norm": 0.34993165731430054, + "learning_rate": 1.4783286323939991e-05, + "loss": 0.4156, + "step": 24923 + }, + { + "epoch": 0.6843492586490939, + "grad_norm": 0.42912545800209045, + "learning_rate": 1.4782907038930266e-05, + "loss": 0.5474, + "step": 24924 + }, + { + "epoch": 0.6843767160900605, + "grad_norm": 0.3474637567996979, + "learning_rate": 1.4782527744998888e-05, + "loss": 0.5479, + "step": 24925 + }, + { + "epoch": 0.6844041735310269, + "grad_norm": 0.44261348247528076, + "learning_rate": 1.4782148442146569e-05, + "loss": 0.4871, + "step": 24926 + }, + { + "epoch": 0.6844316309719934, + "grad_norm": 0.35779350996017456, + "learning_rate": 1.4781769130374017e-05, + "loss": 0.4876, + "step": 24927 + }, + { + "epoch": 0.6844590884129599, + "grad_norm": 0.45802047848701477, + "learning_rate": 1.478138980968194e-05, + "loss": 0.5213, + "step": 24928 + }, + { + "epoch": 0.6844865458539264, + "grad_norm": 0.5102986693382263, + "learning_rate": 1.4781010480071048e-05, + "loss": 0.5165, + "step": 24929 + }, + { + "epoch": 0.6845140032948929, + "grad_norm": 0.42031463980674744, + "learning_rate": 1.4780631141542043e-05, + "loss": 0.5854, + "step": 24930 + }, + { + "epoch": 0.6845414607358594, + "grad_norm": 0.3348855972290039, + "learning_rate": 1.4780251794095635e-05, + "loss": 0.5283, + "step": 24931 + }, + { + "epoch": 0.684568918176826, + "grad_norm": 0.4121032655239105, + "learning_rate": 1.4779872437732531e-05, + "loss": 0.4891, + "step": 24932 + }, + { + "epoch": 0.6845963756177924, + "grad_norm": 0.3642995357513428, + "learning_rate": 1.4779493072453439e-05, + "loss": 0.5107, + "step": 24933 + }, + { + "epoch": 0.684623833058759, + "grad_norm": 0.39876437187194824, + "learning_rate": 1.4779113698259072e-05, + "loss": 0.4427, + "step": 24934 + }, + { + "epoch": 0.6846512904997254, + "grad_norm": 0.47884130477905273, + "learning_rate": 1.4778734315150127e-05, + "loss": 0.5315, + "step": 24935 + }, + { + "epoch": 0.684678747940692, + "grad_norm": 0.3604498505592346, + "learning_rate": 1.4778354923127317e-05, + "loss": 0.4786, + "step": 24936 + }, + { + "epoch": 0.6847062053816584, + "grad_norm": 0.38389894366264343, + "learning_rate": 1.4777975522191355e-05, + "loss": 0.492, + "step": 24937 + }, + { + "epoch": 0.6847336628226249, + "grad_norm": 0.4527512490749359, + "learning_rate": 1.4777596112342938e-05, + "loss": 0.5591, + "step": 24938 + }, + { + "epoch": 0.6847611202635915, + "grad_norm": 0.3925826847553253, + "learning_rate": 1.4777216693582783e-05, + "loss": 0.6062, + "step": 24939 + }, + { + "epoch": 0.6847885777045579, + "grad_norm": 0.34460556507110596, + "learning_rate": 1.4776837265911594e-05, + "loss": 0.4643, + "step": 24940 + }, + { + "epoch": 0.6848160351455245, + "grad_norm": 0.37968993186950684, + "learning_rate": 1.4776457829330078e-05, + "loss": 0.4267, + "step": 24941 + }, + { + "epoch": 0.6848434925864909, + "grad_norm": 0.4676034152507782, + "learning_rate": 1.4776078383838945e-05, + "loss": 0.5278, + "step": 24942 + }, + { + "epoch": 0.6848709500274575, + "grad_norm": 0.38904330134391785, + "learning_rate": 1.47756989294389e-05, + "loss": 0.5015, + "step": 24943 + }, + { + "epoch": 0.6848984074684239, + "grad_norm": 0.3668583929538727, + "learning_rate": 1.4775319466130653e-05, + "loss": 0.522, + "step": 24944 + }, + { + "epoch": 0.6849258649093904, + "grad_norm": 0.39829540252685547, + "learning_rate": 1.4774939993914911e-05, + "loss": 0.4656, + "step": 24945 + }, + { + "epoch": 0.684953322350357, + "grad_norm": 0.3840712606906891, + "learning_rate": 1.4774560512792379e-05, + "loss": 0.4565, + "step": 24946 + }, + { + "epoch": 0.6849807797913234, + "grad_norm": 0.41839128732681274, + "learning_rate": 1.4774181022763775e-05, + "loss": 0.5362, + "step": 24947 + }, + { + "epoch": 0.68500823723229, + "grad_norm": 0.3668530583381653, + "learning_rate": 1.4773801523829795e-05, + "loss": 0.5099, + "step": 24948 + }, + { + "epoch": 0.6850356946732564, + "grad_norm": 0.3699326813220978, + "learning_rate": 1.477342201599115e-05, + "loss": 0.5032, + "step": 24949 + }, + { + "epoch": 0.685063152114223, + "grad_norm": 0.4102562963962555, + "learning_rate": 1.4773042499248552e-05, + "loss": 0.4784, + "step": 24950 + }, + { + "epoch": 0.6850906095551894, + "grad_norm": 0.45973747968673706, + "learning_rate": 1.4772662973602705e-05, + "loss": 0.4985, + "step": 24951 + }, + { + "epoch": 0.685118066996156, + "grad_norm": 0.40723615884780884, + "learning_rate": 1.4772283439054318e-05, + "loss": 0.4512, + "step": 24952 + }, + { + "epoch": 0.6851455244371225, + "grad_norm": 0.3389741778373718, + "learning_rate": 1.47719038956041e-05, + "loss": 0.5405, + "step": 24953 + }, + { + "epoch": 0.685172981878089, + "grad_norm": 0.40978842973709106, + "learning_rate": 1.477152434325276e-05, + "loss": 0.4712, + "step": 24954 + }, + { + "epoch": 0.6852004393190555, + "grad_norm": 0.38378196954727173, + "learning_rate": 1.4771144782001002e-05, + "loss": 0.5406, + "step": 24955 + }, + { + "epoch": 0.6852278967600219, + "grad_norm": 0.43732205033302307, + "learning_rate": 1.4770765211849535e-05, + "loss": 0.4949, + "step": 24956 + }, + { + "epoch": 0.6852553542009885, + "grad_norm": 0.4060264527797699, + "learning_rate": 1.4770385632799071e-05, + "loss": 0.4806, + "step": 24957 + }, + { + "epoch": 0.6852828116419549, + "grad_norm": 0.42101046442985535, + "learning_rate": 1.4770006044850316e-05, + "loss": 0.4894, + "step": 24958 + }, + { + "epoch": 0.6853102690829215, + "grad_norm": 0.3874875605106354, + "learning_rate": 1.4769626448003974e-05, + "loss": 0.5152, + "step": 24959 + }, + { + "epoch": 0.685337726523888, + "grad_norm": 0.3824084997177124, + "learning_rate": 1.4769246842260759e-05, + "loss": 0.5424, + "step": 24960 + }, + { + "epoch": 0.6853651839648545, + "grad_norm": 0.4230633080005646, + "learning_rate": 1.4768867227621375e-05, + "loss": 0.4938, + "step": 24961 + }, + { + "epoch": 0.685392641405821, + "grad_norm": 0.42961710691452026, + "learning_rate": 1.4768487604086529e-05, + "loss": 0.5427, + "step": 24962 + }, + { + "epoch": 0.6854200988467875, + "grad_norm": 0.37059178948402405, + "learning_rate": 1.4768107971656938e-05, + "loss": 0.5728, + "step": 24963 + }, + { + "epoch": 0.685447556287754, + "grad_norm": 0.37713146209716797, + "learning_rate": 1.4767728330333299e-05, + "loss": 0.5318, + "step": 24964 + }, + { + "epoch": 0.6854750137287204, + "grad_norm": 0.40062010288238525, + "learning_rate": 1.4767348680116326e-05, + "loss": 0.5003, + "step": 24965 + }, + { + "epoch": 0.685502471169687, + "grad_norm": 0.45412859320640564, + "learning_rate": 1.476696902100673e-05, + "loss": 0.5091, + "step": 24966 + }, + { + "epoch": 0.6855299286106535, + "grad_norm": 0.3741428852081299, + "learning_rate": 1.476658935300521e-05, + "loss": 0.4517, + "step": 24967 + }, + { + "epoch": 0.68555738605162, + "grad_norm": 0.425538569688797, + "learning_rate": 1.4766209676112482e-05, + "loss": 0.5778, + "step": 24968 + }, + { + "epoch": 0.6855848434925865, + "grad_norm": 0.4259178638458252, + "learning_rate": 1.4765829990329251e-05, + "loss": 0.5469, + "step": 24969 + }, + { + "epoch": 0.685612300933553, + "grad_norm": 0.5256428122520447, + "learning_rate": 1.4765450295656225e-05, + "loss": 0.5168, + "step": 24970 + }, + { + "epoch": 0.6856397583745195, + "grad_norm": 0.34909167885780334, + "learning_rate": 1.4765070592094116e-05, + "loss": 0.5063, + "step": 24971 + }, + { + "epoch": 0.685667215815486, + "grad_norm": 0.38031238317489624, + "learning_rate": 1.4764690879643626e-05, + "loss": 0.5727, + "step": 24972 + }, + { + "epoch": 0.6856946732564525, + "grad_norm": 0.4337478280067444, + "learning_rate": 1.476431115830547e-05, + "loss": 0.5525, + "step": 24973 + }, + { + "epoch": 0.685722130697419, + "grad_norm": 0.35913437604904175, + "learning_rate": 1.476393142808035e-05, + "loss": 0.4606, + "step": 24974 + }, + { + "epoch": 0.6857495881383855, + "grad_norm": 0.4607742130756378, + "learning_rate": 1.476355168896898e-05, + "loss": 0.5573, + "step": 24975 + }, + { + "epoch": 0.685777045579352, + "grad_norm": 0.4378810226917267, + "learning_rate": 1.4763171940972068e-05, + "loss": 0.5009, + "step": 24976 + }, + { + "epoch": 0.6858045030203185, + "grad_norm": 0.379414826631546, + "learning_rate": 1.4762792184090316e-05, + "loss": 0.5106, + "step": 24977 + }, + { + "epoch": 0.685831960461285, + "grad_norm": 0.34290239214897156, + "learning_rate": 1.4762412418324437e-05, + "loss": 0.4643, + "step": 24978 + }, + { + "epoch": 0.6858594179022515, + "grad_norm": 0.4200463593006134, + "learning_rate": 1.4762032643675137e-05, + "loss": 0.5193, + "step": 24979 + }, + { + "epoch": 0.685886875343218, + "grad_norm": 0.46877843141555786, + "learning_rate": 1.4761652860143132e-05, + "loss": 0.4636, + "step": 24980 + }, + { + "epoch": 0.6859143327841846, + "grad_norm": 0.3822246193885803, + "learning_rate": 1.476127306772912e-05, + "loss": 0.4352, + "step": 24981 + }, + { + "epoch": 0.685941790225151, + "grad_norm": 0.3386247158050537, + "learning_rate": 1.4760893266433816e-05, + "loss": 0.5464, + "step": 24982 + }, + { + "epoch": 0.6859692476661176, + "grad_norm": 1.7205625772476196, + "learning_rate": 1.4760513456257925e-05, + "loss": 0.5874, + "step": 24983 + }, + { + "epoch": 0.685996705107084, + "grad_norm": 0.3882298171520233, + "learning_rate": 1.4760133637202159e-05, + "loss": 0.3966, + "step": 24984 + }, + { + "epoch": 0.6860241625480505, + "grad_norm": 0.36815860867500305, + "learning_rate": 1.4759753809267223e-05, + "loss": 0.4937, + "step": 24985 + }, + { + "epoch": 0.686051619989017, + "grad_norm": 0.36421552300453186, + "learning_rate": 1.4759373972453828e-05, + "loss": 0.4762, + "step": 24986 + }, + { + "epoch": 0.6860790774299835, + "grad_norm": 0.4105118215084076, + "learning_rate": 1.4758994126762681e-05, + "loss": 0.4799, + "step": 24987 + }, + { + "epoch": 0.6861065348709501, + "grad_norm": 0.34058263897895813, + "learning_rate": 1.4758614272194491e-05, + "loss": 0.5007, + "step": 24988 + }, + { + "epoch": 0.6861339923119165, + "grad_norm": 0.4137830436229706, + "learning_rate": 1.4758234408749967e-05, + "loss": 0.5466, + "step": 24989 + }, + { + "epoch": 0.6861614497528831, + "grad_norm": 0.4226836562156677, + "learning_rate": 1.4757854536429817e-05, + "loss": 0.4598, + "step": 24990 + }, + { + "epoch": 0.6861889071938495, + "grad_norm": 0.3843311667442322, + "learning_rate": 1.475747465523475e-05, + "loss": 0.4898, + "step": 24991 + }, + { + "epoch": 0.6862163646348161, + "grad_norm": 0.4199639856815338, + "learning_rate": 1.4757094765165473e-05, + "loss": 0.5183, + "step": 24992 + }, + { + "epoch": 0.6862438220757825, + "grad_norm": 0.35397061705589294, + "learning_rate": 1.47567148662227e-05, + "loss": 0.4597, + "step": 24993 + }, + { + "epoch": 0.686271279516749, + "grad_norm": 0.4356652796268463, + "learning_rate": 1.475633495840713e-05, + "loss": 0.5085, + "step": 24994 + }, + { + "epoch": 0.6862987369577156, + "grad_norm": 0.36105191707611084, + "learning_rate": 1.475595504171948e-05, + "loss": 0.5558, + "step": 24995 + }, + { + "epoch": 0.686326194398682, + "grad_norm": 0.36948299407958984, + "learning_rate": 1.4755575116160455e-05, + "loss": 0.4741, + "step": 24996 + }, + { + "epoch": 0.6863536518396486, + "grad_norm": 0.3623158037662506, + "learning_rate": 1.4755195181730767e-05, + "loss": 0.3846, + "step": 24997 + }, + { + "epoch": 0.686381109280615, + "grad_norm": 0.41395583748817444, + "learning_rate": 1.4754815238431122e-05, + "loss": 0.4677, + "step": 24998 + }, + { + "epoch": 0.6864085667215816, + "grad_norm": 0.3729924261569977, + "learning_rate": 1.4754435286262224e-05, + "loss": 0.5122, + "step": 24999 + }, + { + "epoch": 0.686436024162548, + "grad_norm": 0.3699617385864258, + "learning_rate": 1.4754055325224794e-05, + "loss": 0.4486, + "step": 25000 + }, + { + "epoch": 0.6864634816035146, + "grad_norm": 0.3920876681804657, + "learning_rate": 1.4753675355319527e-05, + "loss": 0.5503, + "step": 25001 + }, + { + "epoch": 0.6864909390444811, + "grad_norm": 0.45700857043266296, + "learning_rate": 1.4753295376547143e-05, + "loss": 0.565, + "step": 25002 + }, + { + "epoch": 0.6865183964854475, + "grad_norm": 0.34269505739212036, + "learning_rate": 1.4752915388908342e-05, + "loss": 0.4277, + "step": 25003 + }, + { + "epoch": 0.6865458539264141, + "grad_norm": 0.3658309280872345, + "learning_rate": 1.4752535392403841e-05, + "loss": 0.4448, + "step": 25004 + }, + { + "epoch": 0.6865733113673805, + "grad_norm": 0.37079334259033203, + "learning_rate": 1.4752155387034344e-05, + "loss": 0.4879, + "step": 25005 + }, + { + "epoch": 0.6866007688083471, + "grad_norm": 0.3612517714500427, + "learning_rate": 1.4751775372800557e-05, + "loss": 0.4967, + "step": 25006 + }, + { + "epoch": 0.6866282262493135, + "grad_norm": 0.3654747009277344, + "learning_rate": 1.4751395349703197e-05, + "loss": 0.4719, + "step": 25007 + }, + { + "epoch": 0.6866556836902801, + "grad_norm": 0.37965530157089233, + "learning_rate": 1.4751015317742963e-05, + "loss": 0.5241, + "step": 25008 + }, + { + "epoch": 0.6866831411312466, + "grad_norm": 0.3599485456943512, + "learning_rate": 1.4750635276920573e-05, + "loss": 0.4624, + "step": 25009 + }, + { + "epoch": 0.6867105985722131, + "grad_norm": 0.3858427405357361, + "learning_rate": 1.475025522723673e-05, + "loss": 0.5038, + "step": 25010 + }, + { + "epoch": 0.6867380560131796, + "grad_norm": 0.4101352095603943, + "learning_rate": 1.4749875168692147e-05, + "loss": 0.5477, + "step": 25011 + }, + { + "epoch": 0.686765513454146, + "grad_norm": 0.4274534285068512, + "learning_rate": 1.474949510128753e-05, + "loss": 0.5704, + "step": 25012 + }, + { + "epoch": 0.6867929708951126, + "grad_norm": 0.3857730031013489, + "learning_rate": 1.4749115025023588e-05, + "loss": 0.5318, + "step": 25013 + }, + { + "epoch": 0.686820428336079, + "grad_norm": 2.4252800941467285, + "learning_rate": 1.4748734939901033e-05, + "loss": 0.411, + "step": 25014 + }, + { + "epoch": 0.6868478857770456, + "grad_norm": 0.35186588764190674, + "learning_rate": 1.474835484592057e-05, + "loss": 0.4944, + "step": 25015 + }, + { + "epoch": 0.6868753432180121, + "grad_norm": 0.4326329231262207, + "learning_rate": 1.4747974743082906e-05, + "loss": 0.5832, + "step": 25016 + }, + { + "epoch": 0.6869028006589786, + "grad_norm": 0.4257397949695587, + "learning_rate": 1.474759463138876e-05, + "loss": 0.5567, + "step": 25017 + }, + { + "epoch": 0.6869302580999451, + "grad_norm": 0.3894282281398773, + "learning_rate": 1.474721451083883e-05, + "loss": 0.5232, + "step": 25018 + }, + { + "epoch": 0.6869577155409116, + "grad_norm": 0.3541971743106842, + "learning_rate": 1.4746834381433833e-05, + "loss": 0.4159, + "step": 25019 + }, + { + "epoch": 0.6869851729818781, + "grad_norm": 0.41042444109916687, + "learning_rate": 1.4746454243174475e-05, + "loss": 0.543, + "step": 25020 + }, + { + "epoch": 0.6870126304228446, + "grad_norm": 0.40615513920783997, + "learning_rate": 1.4746074096061463e-05, + "loss": 0.5442, + "step": 25021 + }, + { + "epoch": 0.6870400878638111, + "grad_norm": 0.44850075244903564, + "learning_rate": 1.474569394009551e-05, + "loss": 0.5657, + "step": 25022 + }, + { + "epoch": 0.6870675453047776, + "grad_norm": 0.42442286014556885, + "learning_rate": 1.4745313775277322e-05, + "loss": 0.5105, + "step": 25023 + }, + { + "epoch": 0.6870950027457441, + "grad_norm": 0.42426684498786926, + "learning_rate": 1.4744933601607608e-05, + "loss": 0.4948, + "step": 25024 + }, + { + "epoch": 0.6871224601867106, + "grad_norm": 0.3726227283477783, + "learning_rate": 1.474455341908708e-05, + "loss": 0.4652, + "step": 25025 + }, + { + "epoch": 0.6871499176276771, + "grad_norm": 0.3685840368270874, + "learning_rate": 1.4744173227716446e-05, + "loss": 0.5292, + "step": 25026 + }, + { + "epoch": 0.6871773750686436, + "grad_norm": 0.30504414439201355, + "learning_rate": 1.4743793027496414e-05, + "loss": 0.4059, + "step": 25027 + }, + { + "epoch": 0.6872048325096101, + "grad_norm": 0.3608452379703522, + "learning_rate": 1.4743412818427694e-05, + "loss": 0.4795, + "step": 25028 + }, + { + "epoch": 0.6872322899505766, + "grad_norm": 0.37962475419044495, + "learning_rate": 1.4743032600510994e-05, + "loss": 0.508, + "step": 25029 + }, + { + "epoch": 0.6872597473915432, + "grad_norm": 0.4278847873210907, + "learning_rate": 1.4742652373747026e-05, + "loss": 0.4813, + "step": 25030 + }, + { + "epoch": 0.6872872048325096, + "grad_norm": 0.4042535722255707, + "learning_rate": 1.4742272138136496e-05, + "loss": 0.5658, + "step": 25031 + }, + { + "epoch": 0.6873146622734762, + "grad_norm": 0.3782712519168854, + "learning_rate": 1.4741891893680116e-05, + "loss": 0.5645, + "step": 25032 + }, + { + "epoch": 0.6873421197144426, + "grad_norm": 0.37286460399627686, + "learning_rate": 1.4741511640378593e-05, + "loss": 0.4959, + "step": 25033 + }, + { + "epoch": 0.6873695771554091, + "grad_norm": 0.40477001667022705, + "learning_rate": 1.474113137823264e-05, + "loss": 0.5579, + "step": 25034 + }, + { + "epoch": 0.6873970345963756, + "grad_norm": 0.3944624364376068, + "learning_rate": 1.4740751107242964e-05, + "loss": 0.5203, + "step": 25035 + }, + { + "epoch": 0.6874244920373421, + "grad_norm": 0.35118967294692993, + "learning_rate": 1.474037082741027e-05, + "loss": 0.4067, + "step": 25036 + }, + { + "epoch": 0.6874519494783087, + "grad_norm": 0.4388546645641327, + "learning_rate": 1.4739990538735276e-05, + "loss": 0.4794, + "step": 25037 + }, + { + "epoch": 0.6874794069192751, + "grad_norm": 0.3888486325740814, + "learning_rate": 1.4739610241218684e-05, + "loss": 0.5317, + "step": 25038 + }, + { + "epoch": 0.6875068643602417, + "grad_norm": 0.3496399223804474, + "learning_rate": 1.4739229934861206e-05, + "loss": 0.4152, + "step": 25039 + }, + { + "epoch": 0.6875343218012081, + "grad_norm": 0.5843207836151123, + "learning_rate": 1.4738849619663555e-05, + "loss": 0.5576, + "step": 25040 + }, + { + "epoch": 0.6875617792421747, + "grad_norm": 0.36091458797454834, + "learning_rate": 1.4738469295626435e-05, + "loss": 0.5032, + "step": 25041 + }, + { + "epoch": 0.6875892366831411, + "grad_norm": 0.4014441668987274, + "learning_rate": 1.4738088962750556e-05, + "loss": 0.4403, + "step": 25042 + }, + { + "epoch": 0.6876166941241076, + "grad_norm": 0.4212038516998291, + "learning_rate": 1.473770862103663e-05, + "loss": 0.5211, + "step": 25043 + }, + { + "epoch": 0.6876441515650741, + "grad_norm": 0.4082971513271332, + "learning_rate": 1.4737328270485364e-05, + "loss": 0.4951, + "step": 25044 + }, + { + "epoch": 0.6876716090060406, + "grad_norm": 0.38707464933395386, + "learning_rate": 1.4736947911097472e-05, + "loss": 0.5072, + "step": 25045 + }, + { + "epoch": 0.6876990664470072, + "grad_norm": 0.378704309463501, + "learning_rate": 1.4736567542873658e-05, + "loss": 0.4963, + "step": 25046 + }, + { + "epoch": 0.6877265238879736, + "grad_norm": 0.3576977252960205, + "learning_rate": 1.4736187165814633e-05, + "loss": 0.4656, + "step": 25047 + }, + { + "epoch": 0.6877539813289402, + "grad_norm": 0.39872992038726807, + "learning_rate": 1.4735806779921109e-05, + "loss": 0.5016, + "step": 25048 + }, + { + "epoch": 0.6877814387699066, + "grad_norm": 0.3615678548812866, + "learning_rate": 1.4735426385193792e-05, + "loss": 0.4991, + "step": 25049 + }, + { + "epoch": 0.6878088962108732, + "grad_norm": 0.34810179471969604, + "learning_rate": 1.4735045981633396e-05, + "loss": 0.5623, + "step": 25050 + }, + { + "epoch": 0.6878363536518396, + "grad_norm": 0.3768642246723175, + "learning_rate": 1.4734665569240627e-05, + "loss": 0.4401, + "step": 25051 + }, + { + "epoch": 0.6878638110928061, + "grad_norm": 0.3954802453517914, + "learning_rate": 1.4734285148016195e-05, + "loss": 0.5877, + "step": 25052 + }, + { + "epoch": 0.6878912685337727, + "grad_norm": 0.44435742497444153, + "learning_rate": 1.4733904717960812e-05, + "loss": 0.4486, + "step": 25053 + }, + { + "epoch": 0.6879187259747391, + "grad_norm": 0.3506354093551636, + "learning_rate": 1.4733524279075184e-05, + "loss": 0.4998, + "step": 25054 + }, + { + "epoch": 0.6879461834157057, + "grad_norm": 0.5023093223571777, + "learning_rate": 1.4733143831360021e-05, + "loss": 0.5652, + "step": 25055 + }, + { + "epoch": 0.6879736408566721, + "grad_norm": 0.49816906452178955, + "learning_rate": 1.4732763374816038e-05, + "loss": 0.4692, + "step": 25056 + }, + { + "epoch": 0.6880010982976387, + "grad_norm": 0.37674686312675476, + "learning_rate": 1.4732382909443936e-05, + "loss": 0.5367, + "step": 25057 + }, + { + "epoch": 0.6880285557386051, + "grad_norm": 0.4826428294181824, + "learning_rate": 1.4732002435244432e-05, + "loss": 0.4539, + "step": 25058 + }, + { + "epoch": 0.6880560131795717, + "grad_norm": 0.43274232745170593, + "learning_rate": 1.4731621952218236e-05, + "loss": 0.5841, + "step": 25059 + }, + { + "epoch": 0.6880834706205382, + "grad_norm": 0.34822770953178406, + "learning_rate": 1.473124146036605e-05, + "loss": 0.4099, + "step": 25060 + }, + { + "epoch": 0.6881109280615046, + "grad_norm": 0.4351721405982971, + "learning_rate": 1.473086095968859e-05, + "loss": 0.5288, + "step": 25061 + }, + { + "epoch": 0.6881383855024712, + "grad_norm": 0.4239504337310791, + "learning_rate": 1.4730480450186566e-05, + "loss": 0.4811, + "step": 25062 + }, + { + "epoch": 0.6881658429434376, + "grad_norm": 0.42835599184036255, + "learning_rate": 1.4730099931860687e-05, + "loss": 0.4789, + "step": 25063 + }, + { + "epoch": 0.6881933003844042, + "grad_norm": 0.37714943289756775, + "learning_rate": 1.472971940471166e-05, + "loss": 0.5495, + "step": 25064 + }, + { + "epoch": 0.6882207578253706, + "grad_norm": 0.3670966327190399, + "learning_rate": 1.4729338868740198e-05, + "loss": 0.5114, + "step": 25065 + }, + { + "epoch": 0.6882482152663372, + "grad_norm": 0.4600536823272705, + "learning_rate": 1.4728958323947006e-05, + "loss": 0.61, + "step": 25066 + }, + { + "epoch": 0.6882756727073037, + "grad_norm": 0.36692866683006287, + "learning_rate": 1.4728577770332801e-05, + "loss": 0.4459, + "step": 25067 + }, + { + "epoch": 0.6883031301482702, + "grad_norm": 0.4146535396575928, + "learning_rate": 1.472819720789829e-05, + "loss": 0.4765, + "step": 25068 + }, + { + "epoch": 0.6883305875892367, + "grad_norm": 0.4122669994831085, + "learning_rate": 1.4727816636644182e-05, + "loss": 0.5305, + "step": 25069 + }, + { + "epoch": 0.6883580450302031, + "grad_norm": 0.365607887506485, + "learning_rate": 1.4727436056571183e-05, + "loss": 0.4116, + "step": 25070 + }, + { + "epoch": 0.6883855024711697, + "grad_norm": 0.40892621874809265, + "learning_rate": 1.472705546768001e-05, + "loss": 0.482, + "step": 25071 + }, + { + "epoch": 0.6884129599121361, + "grad_norm": 0.37252146005630493, + "learning_rate": 1.4726674869971368e-05, + "loss": 0.4766, + "step": 25072 + }, + { + "epoch": 0.6884404173531027, + "grad_norm": 6.623183727264404, + "learning_rate": 1.4726294263445971e-05, + "loss": 0.4764, + "step": 25073 + }, + { + "epoch": 0.6884678747940692, + "grad_norm": 0.38651081919670105, + "learning_rate": 1.4725913648104527e-05, + "loss": 0.4928, + "step": 25074 + }, + { + "epoch": 0.6884953322350357, + "grad_norm": 0.410879522562027, + "learning_rate": 1.4725533023947743e-05, + "loss": 0.4696, + "step": 25075 + }, + { + "epoch": 0.6885227896760022, + "grad_norm": 0.3840910792350769, + "learning_rate": 1.4725152390976334e-05, + "loss": 0.5055, + "step": 25076 + }, + { + "epoch": 0.6885502471169687, + "grad_norm": 0.3773084282875061, + "learning_rate": 1.4724771749191007e-05, + "loss": 0.443, + "step": 25077 + }, + { + "epoch": 0.6885777045579352, + "grad_norm": 0.35352739691734314, + "learning_rate": 1.4724391098592471e-05, + "loss": 0.5276, + "step": 25078 + }, + { + "epoch": 0.6886051619989016, + "grad_norm": 0.3392471373081207, + "learning_rate": 1.4724010439181442e-05, + "loss": 0.4511, + "step": 25079 + }, + { + "epoch": 0.6886326194398682, + "grad_norm": 0.41726890206336975, + "learning_rate": 1.4723629770958621e-05, + "loss": 0.4964, + "step": 25080 + }, + { + "epoch": 0.6886600768808347, + "grad_norm": 0.32720935344696045, + "learning_rate": 1.4723249093924725e-05, + "loss": 0.4174, + "step": 25081 + }, + { + "epoch": 0.6886875343218012, + "grad_norm": 0.4577919840812683, + "learning_rate": 1.4722868408080464e-05, + "loss": 0.5203, + "step": 25082 + }, + { + "epoch": 0.6887149917627677, + "grad_norm": 0.4065510928630829, + "learning_rate": 1.4722487713426544e-05, + "loss": 0.5171, + "step": 25083 + }, + { + "epoch": 0.6887424492037342, + "grad_norm": 0.359680712223053, + "learning_rate": 1.4722107009963677e-05, + "loss": 0.4451, + "step": 25084 + }, + { + "epoch": 0.6887699066447007, + "grad_norm": 0.37972524762153625, + "learning_rate": 1.4721726297692573e-05, + "loss": 0.5947, + "step": 25085 + }, + { + "epoch": 0.6887973640856672, + "grad_norm": 0.3927869200706482, + "learning_rate": 1.4721345576613944e-05, + "loss": 0.4311, + "step": 25086 + }, + { + "epoch": 0.6888248215266337, + "grad_norm": 0.41468921303749084, + "learning_rate": 1.4720964846728498e-05, + "loss": 0.4945, + "step": 25087 + }, + { + "epoch": 0.6888522789676003, + "grad_norm": 0.31542977690696716, + "learning_rate": 1.4720584108036944e-05, + "loss": 0.3842, + "step": 25088 + }, + { + "epoch": 0.6888797364085667, + "grad_norm": 0.3769542872905731, + "learning_rate": 1.4720203360539996e-05, + "loss": 0.4527, + "step": 25089 + }, + { + "epoch": 0.6889071938495333, + "grad_norm": 0.35488808155059814, + "learning_rate": 1.4719822604238362e-05, + "loss": 0.4428, + "step": 25090 + }, + { + "epoch": 0.6889346512904997, + "grad_norm": 0.3445208966732025, + "learning_rate": 1.4719441839132751e-05, + "loss": 0.3965, + "step": 25091 + }, + { + "epoch": 0.6889621087314662, + "grad_norm": 0.4206531047821045, + "learning_rate": 1.4719061065223876e-05, + "loss": 0.4906, + "step": 25092 + }, + { + "epoch": 0.6889895661724327, + "grad_norm": 0.37973538041114807, + "learning_rate": 1.4718680282512448e-05, + "loss": 0.4633, + "step": 25093 + }, + { + "epoch": 0.6890170236133992, + "grad_norm": 0.39888596534729004, + "learning_rate": 1.4718299490999173e-05, + "loss": 0.4291, + "step": 25094 + }, + { + "epoch": 0.6890444810543658, + "grad_norm": 0.40059205889701843, + "learning_rate": 1.4717918690684763e-05, + "loss": 0.5751, + "step": 25095 + }, + { + "epoch": 0.6890719384953322, + "grad_norm": 0.4870827794075012, + "learning_rate": 1.4717537881569928e-05, + "loss": 0.4746, + "step": 25096 + }, + { + "epoch": 0.6890993959362988, + "grad_norm": 0.37847352027893066, + "learning_rate": 1.4717157063655383e-05, + "loss": 0.4501, + "step": 25097 + }, + { + "epoch": 0.6891268533772652, + "grad_norm": 0.3859655261039734, + "learning_rate": 1.471677623694183e-05, + "loss": 0.5017, + "step": 25098 + }, + { + "epoch": 0.6891543108182318, + "grad_norm": 0.34948059916496277, + "learning_rate": 1.4716395401429986e-05, + "loss": 0.4668, + "step": 25099 + }, + { + "epoch": 0.6891817682591982, + "grad_norm": 0.3598960041999817, + "learning_rate": 1.4716014557120562e-05, + "loss": 0.4752, + "step": 25100 + }, + { + "epoch": 0.6892092257001647, + "grad_norm": 0.3713364899158478, + "learning_rate": 1.471563370401426e-05, + "loss": 0.4527, + "step": 25101 + }, + { + "epoch": 0.6892366831411313, + "grad_norm": 0.4153587520122528, + "learning_rate": 1.4715252842111801e-05, + "loss": 0.5141, + "step": 25102 + }, + { + "epoch": 0.6892641405820977, + "grad_norm": 0.3808860778808594, + "learning_rate": 1.4714871971413889e-05, + "loss": 0.483, + "step": 25103 + }, + { + "epoch": 0.6892915980230643, + "grad_norm": 0.3795967996120453, + "learning_rate": 1.4714491091921235e-05, + "loss": 0.5017, + "step": 25104 + }, + { + "epoch": 0.6893190554640307, + "grad_norm": 0.4091937243938446, + "learning_rate": 1.4714110203634551e-05, + "loss": 0.4184, + "step": 25105 + }, + { + "epoch": 0.6893465129049973, + "grad_norm": 0.3974437713623047, + "learning_rate": 1.4713729306554548e-05, + "loss": 0.4831, + "step": 25106 + }, + { + "epoch": 0.6893739703459637, + "grad_norm": 0.3688446283340454, + "learning_rate": 1.4713348400681932e-05, + "loss": 0.5266, + "step": 25107 + }, + { + "epoch": 0.6894014277869303, + "grad_norm": 0.43463316559791565, + "learning_rate": 1.4712967486017421e-05, + "loss": 0.4446, + "step": 25108 + }, + { + "epoch": 0.6894288852278968, + "grad_norm": 0.3839513659477234, + "learning_rate": 1.471258656256172e-05, + "loss": 0.5215, + "step": 25109 + }, + { + "epoch": 0.6894563426688632, + "grad_norm": 0.3581148087978363, + "learning_rate": 1.4712205630315539e-05, + "loss": 0.4172, + "step": 25110 + }, + { + "epoch": 0.6894838001098298, + "grad_norm": 0.357438862323761, + "learning_rate": 1.4711824689279592e-05, + "loss": 0.4471, + "step": 25111 + }, + { + "epoch": 0.6895112575507962, + "grad_norm": 0.3794468343257904, + "learning_rate": 1.471144373945459e-05, + "loss": 0.4891, + "step": 25112 + }, + { + "epoch": 0.6895387149917628, + "grad_norm": 0.46722954511642456, + "learning_rate": 1.471106278084124e-05, + "loss": 0.5057, + "step": 25113 + }, + { + "epoch": 0.6895661724327292, + "grad_norm": 0.4025157690048218, + "learning_rate": 1.4710681813440253e-05, + "loss": 0.5166, + "step": 25114 + }, + { + "epoch": 0.6895936298736958, + "grad_norm": 0.42446282505989075, + "learning_rate": 1.4710300837252343e-05, + "loss": 0.4754, + "step": 25115 + }, + { + "epoch": 0.6896210873146623, + "grad_norm": 0.3991166353225708, + "learning_rate": 1.4709919852278219e-05, + "loss": 0.5271, + "step": 25116 + }, + { + "epoch": 0.6896485447556288, + "grad_norm": 0.3417428135871887, + "learning_rate": 1.4709538858518588e-05, + "loss": 0.5012, + "step": 25117 + }, + { + "epoch": 0.6896760021965953, + "grad_norm": 0.39597266912460327, + "learning_rate": 1.4709157855974168e-05, + "loss": 0.54, + "step": 25118 + }, + { + "epoch": 0.6897034596375617, + "grad_norm": 0.3713369369506836, + "learning_rate": 1.4708776844645663e-05, + "loss": 0.4756, + "step": 25119 + }, + { + "epoch": 0.6897309170785283, + "grad_norm": 0.33983322978019714, + "learning_rate": 1.4708395824533785e-05, + "loss": 0.3708, + "step": 25120 + }, + { + "epoch": 0.6897583745194947, + "grad_norm": 0.39624738693237305, + "learning_rate": 1.4708014795639249e-05, + "loss": 0.5013, + "step": 25121 + }, + { + "epoch": 0.6897858319604613, + "grad_norm": 0.3773285448551178, + "learning_rate": 1.4707633757962762e-05, + "loss": 0.5156, + "step": 25122 + }, + { + "epoch": 0.6898132894014278, + "grad_norm": 0.37116116285324097, + "learning_rate": 1.4707252711505036e-05, + "loss": 0.5224, + "step": 25123 + }, + { + "epoch": 0.6898407468423943, + "grad_norm": 0.3811199963092804, + "learning_rate": 1.470687165626678e-05, + "loss": 0.5417, + "step": 25124 + }, + { + "epoch": 0.6898682042833608, + "grad_norm": 0.39893168210983276, + "learning_rate": 1.4706490592248706e-05, + "loss": 0.3728, + "step": 25125 + }, + { + "epoch": 0.6898956617243273, + "grad_norm": 0.33234381675720215, + "learning_rate": 1.4706109519451528e-05, + "loss": 0.4073, + "step": 25126 + }, + { + "epoch": 0.6899231191652938, + "grad_norm": 0.4006821811199188, + "learning_rate": 1.4705728437875947e-05, + "loss": 0.4716, + "step": 25127 + }, + { + "epoch": 0.6899505766062602, + "grad_norm": 0.35408464074134827, + "learning_rate": 1.4705347347522687e-05, + "loss": 0.4999, + "step": 25128 + }, + { + "epoch": 0.6899780340472268, + "grad_norm": 0.3397924304008484, + "learning_rate": 1.470496624839245e-05, + "loss": 0.5501, + "step": 25129 + }, + { + "epoch": 0.6900054914881933, + "grad_norm": 0.32466262578964233, + "learning_rate": 1.470458514048595e-05, + "loss": 0.372, + "step": 25130 + }, + { + "epoch": 0.6900329489291598, + "grad_norm": 0.34637022018432617, + "learning_rate": 1.4704204023803899e-05, + "loss": 0.5191, + "step": 25131 + }, + { + "epoch": 0.6900604063701263, + "grad_norm": 0.36118608713150024, + "learning_rate": 1.4703822898347003e-05, + "loss": 0.4445, + "step": 25132 + }, + { + "epoch": 0.6900878638110928, + "grad_norm": 0.3964383602142334, + "learning_rate": 1.4703441764115975e-05, + "loss": 0.5861, + "step": 25133 + }, + { + "epoch": 0.6901153212520593, + "grad_norm": 0.4415261149406433, + "learning_rate": 1.470306062111153e-05, + "loss": 0.5025, + "step": 25134 + }, + { + "epoch": 0.6901427786930258, + "grad_norm": 0.4176766872406006, + "learning_rate": 1.4702679469334373e-05, + "loss": 0.4964, + "step": 25135 + }, + { + "epoch": 0.6901702361339923, + "grad_norm": 0.33790937066078186, + "learning_rate": 1.4702298308785222e-05, + "loss": 0.4865, + "step": 25136 + }, + { + "epoch": 0.6901976935749589, + "grad_norm": 0.41459140181541443, + "learning_rate": 1.4701917139464778e-05, + "loss": 0.5087, + "step": 25137 + }, + { + "epoch": 0.6902251510159253, + "grad_norm": 0.7533536553382874, + "learning_rate": 1.4701535961373763e-05, + "loss": 0.452, + "step": 25138 + }, + { + "epoch": 0.6902526084568918, + "grad_norm": 0.39161553978919983, + "learning_rate": 1.4701154774512883e-05, + "loss": 0.4935, + "step": 25139 + }, + { + "epoch": 0.6902800658978583, + "grad_norm": 0.3953689932823181, + "learning_rate": 1.4700773578882846e-05, + "loss": 0.5239, + "step": 25140 + }, + { + "epoch": 0.6903075233388248, + "grad_norm": 0.39209139347076416, + "learning_rate": 1.470039237448437e-05, + "loss": 0.4791, + "step": 25141 + }, + { + "epoch": 0.6903349807797913, + "grad_norm": 0.37539637088775635, + "learning_rate": 1.4700011161318155e-05, + "loss": 0.4776, + "step": 25142 + }, + { + "epoch": 0.6903624382207578, + "grad_norm": 0.35168296098709106, + "learning_rate": 1.4699629939384926e-05, + "loss": 0.4261, + "step": 25143 + }, + { + "epoch": 0.6903898956617244, + "grad_norm": 0.39977067708969116, + "learning_rate": 1.4699248708685383e-05, + "loss": 0.4836, + "step": 25144 + }, + { + "epoch": 0.6904173531026908, + "grad_norm": 0.4935271739959717, + "learning_rate": 1.4698867469220241e-05, + "loss": 0.5367, + "step": 25145 + }, + { + "epoch": 0.6904448105436574, + "grad_norm": 0.39314594864845276, + "learning_rate": 1.4698486220990215e-05, + "loss": 0.4944, + "step": 25146 + }, + { + "epoch": 0.6904722679846238, + "grad_norm": 0.39134353399276733, + "learning_rate": 1.4698104963996012e-05, + "loss": 0.4885, + "step": 25147 + }, + { + "epoch": 0.6904997254255903, + "grad_norm": 0.45482149720191956, + "learning_rate": 1.4697723698238342e-05, + "loss": 0.4756, + "step": 25148 + }, + { + "epoch": 0.6905271828665568, + "grad_norm": 0.37733834981918335, + "learning_rate": 1.4697342423717919e-05, + "loss": 0.5128, + "step": 25149 + }, + { + "epoch": 0.6905546403075233, + "grad_norm": 0.37441515922546387, + "learning_rate": 1.469696114043545e-05, + "loss": 0.4776, + "step": 25150 + }, + { + "epoch": 0.6905820977484899, + "grad_norm": 0.3624413013458252, + "learning_rate": 1.4696579848391654e-05, + "loss": 0.5185, + "step": 25151 + }, + { + "epoch": 0.6906095551894563, + "grad_norm": 0.38542863726615906, + "learning_rate": 1.4696198547587237e-05, + "loss": 0.5966, + "step": 25152 + }, + { + "epoch": 0.6906370126304229, + "grad_norm": 0.3669089674949646, + "learning_rate": 1.469581723802291e-05, + "loss": 0.4863, + "step": 25153 + }, + { + "epoch": 0.6906644700713893, + "grad_norm": 0.46713951230049133, + "learning_rate": 1.4695435919699383e-05, + "loss": 0.5836, + "step": 25154 + }, + { + "epoch": 0.6906919275123559, + "grad_norm": 0.4087623655796051, + "learning_rate": 1.469505459261737e-05, + "loss": 0.5799, + "step": 25155 + }, + { + "epoch": 0.6907193849533223, + "grad_norm": 0.3530656099319458, + "learning_rate": 1.4694673256777585e-05, + "loss": 0.4848, + "step": 25156 + }, + { + "epoch": 0.6907468423942889, + "grad_norm": 0.4121893048286438, + "learning_rate": 1.4694291912180734e-05, + "loss": 0.5801, + "step": 25157 + }, + { + "epoch": 0.6907742998352554, + "grad_norm": 0.3783740997314453, + "learning_rate": 1.469391055882753e-05, + "loss": 0.5875, + "step": 25158 + }, + { + "epoch": 0.6908017572762218, + "grad_norm": 0.3836696743965149, + "learning_rate": 1.4693529196718683e-05, + "loss": 0.4768, + "step": 25159 + }, + { + "epoch": 0.6908292147171884, + "grad_norm": 0.3699340522289276, + "learning_rate": 1.4693147825854908e-05, + "loss": 0.5046, + "step": 25160 + }, + { + "epoch": 0.6908566721581548, + "grad_norm": 0.3765628933906555, + "learning_rate": 1.4692766446236914e-05, + "loss": 0.5463, + "step": 25161 + }, + { + "epoch": 0.6908841295991214, + "grad_norm": 0.3935123085975647, + "learning_rate": 1.4692385057865414e-05, + "loss": 0.4037, + "step": 25162 + }, + { + "epoch": 0.6909115870400878, + "grad_norm": 0.44081082940101624, + "learning_rate": 1.4692003660741116e-05, + "loss": 0.6421, + "step": 25163 + }, + { + "epoch": 0.6909390444810544, + "grad_norm": 0.4446166157722473, + "learning_rate": 1.4691622254864736e-05, + "loss": 0.5896, + "step": 25164 + }, + { + "epoch": 0.6909665019220209, + "grad_norm": 0.40320149064064026, + "learning_rate": 1.4691240840236982e-05, + "loss": 0.6174, + "step": 25165 + }, + { + "epoch": 0.6909939593629874, + "grad_norm": 0.5162468552589417, + "learning_rate": 1.4690859416858567e-05, + "loss": 0.4752, + "step": 25166 + }, + { + "epoch": 0.6910214168039539, + "grad_norm": 0.414180725812912, + "learning_rate": 1.46904779847302e-05, + "loss": 0.5809, + "step": 25167 + }, + { + "epoch": 0.6910488742449203, + "grad_norm": 0.37775689363479614, + "learning_rate": 1.4690096543852599e-05, + "loss": 0.503, + "step": 25168 + }, + { + "epoch": 0.6910763316858869, + "grad_norm": 0.3934357166290283, + "learning_rate": 1.468971509422647e-05, + "loss": 0.486, + "step": 25169 + }, + { + "epoch": 0.6911037891268533, + "grad_norm": 0.44130611419677734, + "learning_rate": 1.4689333635852521e-05, + "loss": 0.6161, + "step": 25170 + }, + { + "epoch": 0.6911312465678199, + "grad_norm": 0.3749227523803711, + "learning_rate": 1.4688952168731472e-05, + "loss": 0.5143, + "step": 25171 + }, + { + "epoch": 0.6911587040087864, + "grad_norm": 0.36254534125328064, + "learning_rate": 1.4688570692864028e-05, + "loss": 0.4728, + "step": 25172 + }, + { + "epoch": 0.6911861614497529, + "grad_norm": 0.3468341827392578, + "learning_rate": 1.4688189208250906e-05, + "loss": 0.5047, + "step": 25173 + }, + { + "epoch": 0.6912136188907194, + "grad_norm": 0.4492226541042328, + "learning_rate": 1.4687807714892815e-05, + "loss": 0.5027, + "step": 25174 + }, + { + "epoch": 0.6912410763316859, + "grad_norm": 0.4727918803691864, + "learning_rate": 1.4687426212790464e-05, + "loss": 0.4858, + "step": 25175 + }, + { + "epoch": 0.6912685337726524, + "grad_norm": 0.39062702655792236, + "learning_rate": 1.4687044701944568e-05, + "loss": 0.4804, + "step": 25176 + }, + { + "epoch": 0.6912959912136188, + "grad_norm": 0.40380164980888367, + "learning_rate": 1.4686663182355836e-05, + "loss": 0.537, + "step": 25177 + }, + { + "epoch": 0.6913234486545854, + "grad_norm": 0.380109041929245, + "learning_rate": 1.4686281654024986e-05, + "loss": 0.4731, + "step": 25178 + }, + { + "epoch": 0.6913509060955519, + "grad_norm": 0.3816669285297394, + "learning_rate": 1.468590011695272e-05, + "loss": 0.4819, + "step": 25179 + }, + { + "epoch": 0.6913783635365184, + "grad_norm": 0.4698818624019623, + "learning_rate": 1.4685518571139759e-05, + "loss": 0.4723, + "step": 25180 + }, + { + "epoch": 0.6914058209774849, + "grad_norm": 0.40228191018104553, + "learning_rate": 1.4685137016586808e-05, + "loss": 0.4878, + "step": 25181 + }, + { + "epoch": 0.6914332784184514, + "grad_norm": 0.38203513622283936, + "learning_rate": 1.4684755453294582e-05, + "loss": 0.52, + "step": 25182 + }, + { + "epoch": 0.6914607358594179, + "grad_norm": 0.36494672298431396, + "learning_rate": 1.4684373881263792e-05, + "loss": 0.4669, + "step": 25183 + }, + { + "epoch": 0.6914881933003844, + "grad_norm": 0.33518001437187195, + "learning_rate": 1.4683992300495147e-05, + "loss": 0.4987, + "step": 25184 + }, + { + "epoch": 0.6915156507413509, + "grad_norm": 0.3928460478782654, + "learning_rate": 1.4683610710989365e-05, + "loss": 0.5583, + "step": 25185 + }, + { + "epoch": 0.6915431081823175, + "grad_norm": 0.3606477379798889, + "learning_rate": 1.4683229112747153e-05, + "loss": 0.5413, + "step": 25186 + }, + { + "epoch": 0.6915705656232839, + "grad_norm": 0.3750912845134735, + "learning_rate": 1.4682847505769222e-05, + "loss": 0.432, + "step": 25187 + }, + { + "epoch": 0.6915980230642504, + "grad_norm": 0.4567478895187378, + "learning_rate": 1.4682465890056288e-05, + "loss": 0.6373, + "step": 25188 + }, + { + "epoch": 0.6916254805052169, + "grad_norm": 0.37875521183013916, + "learning_rate": 1.468208426560906e-05, + "loss": 0.5336, + "step": 25189 + }, + { + "epoch": 0.6916529379461834, + "grad_norm": 0.3795166313648224, + "learning_rate": 1.468170263242825e-05, + "loss": 0.4831, + "step": 25190 + }, + { + "epoch": 0.6916803953871499, + "grad_norm": 0.37683531641960144, + "learning_rate": 1.4681320990514571e-05, + "loss": 0.547, + "step": 25191 + }, + { + "epoch": 0.6917078528281164, + "grad_norm": 0.3660186231136322, + "learning_rate": 1.4680939339868733e-05, + "loss": 0.57, + "step": 25192 + }, + { + "epoch": 0.691735310269083, + "grad_norm": 0.4044761061668396, + "learning_rate": 1.4680557680491452e-05, + "loss": 0.4682, + "step": 25193 + }, + { + "epoch": 0.6917627677100494, + "grad_norm": 0.3722977042198181, + "learning_rate": 1.4680176012383433e-05, + "loss": 0.4952, + "step": 25194 + }, + { + "epoch": 0.691790225151016, + "grad_norm": 0.3607144057750702, + "learning_rate": 1.4679794335545397e-05, + "loss": 0.4785, + "step": 25195 + }, + { + "epoch": 0.6918176825919824, + "grad_norm": 0.37791478633880615, + "learning_rate": 1.4679412649978049e-05, + "loss": 0.4927, + "step": 25196 + }, + { + "epoch": 0.691845140032949, + "grad_norm": 0.401456356048584, + "learning_rate": 1.46790309556821e-05, + "loss": 0.5625, + "step": 25197 + }, + { + "epoch": 0.6918725974739154, + "grad_norm": 0.4532018005847931, + "learning_rate": 1.4678649252658267e-05, + "loss": 0.5409, + "step": 25198 + }, + { + "epoch": 0.6919000549148819, + "grad_norm": 0.4127953350543976, + "learning_rate": 1.467826754090726e-05, + "loss": 0.6087, + "step": 25199 + }, + { + "epoch": 0.6919275123558485, + "grad_norm": 0.38508889079093933, + "learning_rate": 1.4677885820429793e-05, + "loss": 0.4267, + "step": 25200 + }, + { + "epoch": 0.6919549697968149, + "grad_norm": 0.3701131343841553, + "learning_rate": 1.4677504091226576e-05, + "loss": 0.5291, + "step": 25201 + }, + { + "epoch": 0.6919824272377815, + "grad_norm": 0.37522387504577637, + "learning_rate": 1.4677122353298317e-05, + "loss": 0.4914, + "step": 25202 + }, + { + "epoch": 0.6920098846787479, + "grad_norm": 0.4703931510448456, + "learning_rate": 1.4676740606645734e-05, + "loss": 0.584, + "step": 25203 + }, + { + "epoch": 0.6920373421197145, + "grad_norm": 0.3892243206501007, + "learning_rate": 1.4676358851269536e-05, + "loss": 0.4829, + "step": 25204 + }, + { + "epoch": 0.6920647995606809, + "grad_norm": 0.3794500529766083, + "learning_rate": 1.467597708717044e-05, + "loss": 0.4882, + "step": 25205 + }, + { + "epoch": 0.6920922570016474, + "grad_norm": 0.3992704451084137, + "learning_rate": 1.467559531434915e-05, + "loss": 0.4585, + "step": 25206 + }, + { + "epoch": 0.692119714442614, + "grad_norm": 0.3610994219779968, + "learning_rate": 1.4675213532806382e-05, + "loss": 0.5107, + "step": 25207 + }, + { + "epoch": 0.6921471718835804, + "grad_norm": 0.5334581136703491, + "learning_rate": 1.4674831742542851e-05, + "loss": 0.4696, + "step": 25208 + }, + { + "epoch": 0.692174629324547, + "grad_norm": 0.3638720214366913, + "learning_rate": 1.467444994355927e-05, + "loss": 0.5806, + "step": 25209 + }, + { + "epoch": 0.6922020867655134, + "grad_norm": 0.3518778681755066, + "learning_rate": 1.4674068135856343e-05, + "loss": 0.4581, + "step": 25210 + }, + { + "epoch": 0.69222954420648, + "grad_norm": 0.43035128712654114, + "learning_rate": 1.467368631943479e-05, + "loss": 0.4631, + "step": 25211 + }, + { + "epoch": 0.6922570016474464, + "grad_norm": 0.34792494773864746, + "learning_rate": 1.4673304494295317e-05, + "loss": 0.5073, + "step": 25212 + }, + { + "epoch": 0.692284459088413, + "grad_norm": 0.37970224022865295, + "learning_rate": 1.4672922660438641e-05, + "loss": 0.5415, + "step": 25213 + }, + { + "epoch": 0.6923119165293795, + "grad_norm": 0.38961437344551086, + "learning_rate": 1.4672540817865474e-05, + "loss": 0.5444, + "step": 25214 + }, + { + "epoch": 0.692339373970346, + "grad_norm": 0.4083119034767151, + "learning_rate": 1.4672158966576527e-05, + "loss": 0.4804, + "step": 25215 + }, + { + "epoch": 0.6923668314113125, + "grad_norm": 0.3417511284351349, + "learning_rate": 1.4671777106572511e-05, + "loss": 0.4263, + "step": 25216 + }, + { + "epoch": 0.6923942888522789, + "grad_norm": 0.4251704514026642, + "learning_rate": 1.467139523785414e-05, + "loss": 0.6044, + "step": 25217 + }, + { + "epoch": 0.6924217462932455, + "grad_norm": 0.3595302999019623, + "learning_rate": 1.4671013360422126e-05, + "loss": 0.4547, + "step": 25218 + }, + { + "epoch": 0.6924492037342119, + "grad_norm": 0.3667217791080475, + "learning_rate": 1.4670631474277184e-05, + "loss": 0.5544, + "step": 25219 + }, + { + "epoch": 0.6924766611751785, + "grad_norm": 0.3650515675544739, + "learning_rate": 1.467024957942002e-05, + "loss": 0.4541, + "step": 25220 + }, + { + "epoch": 0.692504118616145, + "grad_norm": 0.38099753856658936, + "learning_rate": 1.466986767585135e-05, + "loss": 0.4635, + "step": 25221 + }, + { + "epoch": 0.6925315760571115, + "grad_norm": 0.4211656153202057, + "learning_rate": 1.4669485763571887e-05, + "loss": 0.5122, + "step": 25222 + }, + { + "epoch": 0.692559033498078, + "grad_norm": 0.4039934575557709, + "learning_rate": 1.4669103842582345e-05, + "loss": 0.5063, + "step": 25223 + }, + { + "epoch": 0.6925864909390445, + "grad_norm": 0.39328083395957947, + "learning_rate": 1.4668721912883433e-05, + "loss": 0.4823, + "step": 25224 + }, + { + "epoch": 0.692613948380011, + "grad_norm": 0.3448216915130615, + "learning_rate": 1.4668339974475862e-05, + "loss": 0.5185, + "step": 25225 + }, + { + "epoch": 0.6926414058209774, + "grad_norm": 0.4108954966068268, + "learning_rate": 1.466795802736035e-05, + "loss": 0.4397, + "step": 25226 + }, + { + "epoch": 0.692668863261944, + "grad_norm": 0.49058103561401367, + "learning_rate": 1.4667576071537605e-05, + "loss": 0.4965, + "step": 25227 + }, + { + "epoch": 0.6926963207029105, + "grad_norm": 0.34978529810905457, + "learning_rate": 1.466719410700834e-05, + "loss": 0.4537, + "step": 25228 + }, + { + "epoch": 0.692723778143877, + "grad_norm": 0.35640397667884827, + "learning_rate": 1.4666812133773271e-05, + "loss": 0.3783, + "step": 25229 + }, + { + "epoch": 0.6927512355848435, + "grad_norm": 0.373700350522995, + "learning_rate": 1.4666430151833107e-05, + "loss": 0.4836, + "step": 25230 + }, + { + "epoch": 0.69277869302581, + "grad_norm": 0.38002943992614746, + "learning_rate": 1.466604816118856e-05, + "loss": 0.4481, + "step": 25231 + }, + { + "epoch": 0.6928061504667765, + "grad_norm": 0.38804709911346436, + "learning_rate": 1.4665666161840347e-05, + "loss": 0.5381, + "step": 25232 + }, + { + "epoch": 0.692833607907743, + "grad_norm": 0.4911561608314514, + "learning_rate": 1.4665284153789175e-05, + "loss": 0.522, + "step": 25233 + }, + { + "epoch": 0.6928610653487095, + "grad_norm": 0.3446517586708069, + "learning_rate": 1.466490213703576e-05, + "loss": 0.5039, + "step": 25234 + }, + { + "epoch": 0.692888522789676, + "grad_norm": 0.4756913185119629, + "learning_rate": 1.4664520111580812e-05, + "loss": 0.5168, + "step": 25235 + }, + { + "epoch": 0.6929159802306425, + "grad_norm": 0.39775654673576355, + "learning_rate": 1.4664138077425045e-05, + "loss": 0.5365, + "step": 25236 + }, + { + "epoch": 0.692943437671609, + "grad_norm": 0.39302608370780945, + "learning_rate": 1.4663756034569174e-05, + "loss": 0.5126, + "step": 25237 + }, + { + "epoch": 0.6929708951125755, + "grad_norm": 0.3990744352340698, + "learning_rate": 1.466337398301391e-05, + "loss": 0.4602, + "step": 25238 + }, + { + "epoch": 0.692998352553542, + "grad_norm": 0.39376354217529297, + "learning_rate": 1.4662991922759963e-05, + "loss": 0.5681, + "step": 25239 + }, + { + "epoch": 0.6930258099945085, + "grad_norm": 0.41882070899009705, + "learning_rate": 1.4662609853808047e-05, + "loss": 0.4373, + "step": 25240 + }, + { + "epoch": 0.693053267435475, + "grad_norm": 0.41154372692108154, + "learning_rate": 1.4662227776158877e-05, + "loss": 0.4955, + "step": 25241 + }, + { + "epoch": 0.6930807248764416, + "grad_norm": 0.3723752498626709, + "learning_rate": 1.4661845689813165e-05, + "loss": 0.4783, + "step": 25242 + }, + { + "epoch": 0.693108182317408, + "grad_norm": 0.38918331265449524, + "learning_rate": 1.4661463594771622e-05, + "loss": 0.5411, + "step": 25243 + }, + { + "epoch": 0.6931356397583746, + "grad_norm": 0.37669679522514343, + "learning_rate": 1.4661081491034963e-05, + "loss": 0.494, + "step": 25244 + }, + { + "epoch": 0.693163097199341, + "grad_norm": 0.3923669457435608, + "learning_rate": 1.4660699378603899e-05, + "loss": 0.5222, + "step": 25245 + }, + { + "epoch": 0.6931905546403075, + "grad_norm": 0.3321725130081177, + "learning_rate": 1.466031725747914e-05, + "loss": 0.4621, + "step": 25246 + }, + { + "epoch": 0.693218012081274, + "grad_norm": 0.38330796360969543, + "learning_rate": 1.4659935127661407e-05, + "loss": 0.4775, + "step": 25247 + }, + { + "epoch": 0.6932454695222405, + "grad_norm": 0.37005865573883057, + "learning_rate": 1.4659552989151403e-05, + "loss": 0.5532, + "step": 25248 + }, + { + "epoch": 0.6932729269632071, + "grad_norm": 0.39477023482322693, + "learning_rate": 1.4659170841949848e-05, + "loss": 0.4919, + "step": 25249 + }, + { + "epoch": 0.6933003844041735, + "grad_norm": 0.3802751302719116, + "learning_rate": 1.4658788686057453e-05, + "loss": 0.5338, + "step": 25250 + }, + { + "epoch": 0.6933278418451401, + "grad_norm": 0.3821379542350769, + "learning_rate": 1.4658406521474928e-05, + "loss": 0.5943, + "step": 25251 + }, + { + "epoch": 0.6933552992861065, + "grad_norm": 0.35306933522224426, + "learning_rate": 1.465802434820299e-05, + "loss": 0.4939, + "step": 25252 + }, + { + "epoch": 0.6933827567270731, + "grad_norm": 0.4965232312679291, + "learning_rate": 1.4657642166242347e-05, + "loss": 0.5358, + "step": 25253 + }, + { + "epoch": 0.6934102141680395, + "grad_norm": 0.42576315999031067, + "learning_rate": 1.4657259975593718e-05, + "loss": 0.5183, + "step": 25254 + }, + { + "epoch": 0.693437671609006, + "grad_norm": 0.4263446033000946, + "learning_rate": 1.4656877776257812e-05, + "loss": 0.4374, + "step": 25255 + }, + { + "epoch": 0.6934651290499726, + "grad_norm": 0.3645632565021515, + "learning_rate": 1.465649556823534e-05, + "loss": 0.39, + "step": 25256 + }, + { + "epoch": 0.693492586490939, + "grad_norm": 0.3877979516983032, + "learning_rate": 1.4656113351527022e-05, + "loss": 0.4432, + "step": 25257 + }, + { + "epoch": 0.6935200439319056, + "grad_norm": 0.4157184362411499, + "learning_rate": 1.4655731126133566e-05, + "loss": 0.5022, + "step": 25258 + }, + { + "epoch": 0.693547501372872, + "grad_norm": 0.3689919412136078, + "learning_rate": 1.465534889205568e-05, + "loss": 0.4733, + "step": 25259 + }, + { + "epoch": 0.6935749588138386, + "grad_norm": 0.3436318635940552, + "learning_rate": 1.465496664929409e-05, + "loss": 0.4639, + "step": 25260 + }, + { + "epoch": 0.693602416254805, + "grad_norm": 0.4040205478668213, + "learning_rate": 1.4654584397849496e-05, + "loss": 0.4947, + "step": 25261 + }, + { + "epoch": 0.6936298736957716, + "grad_norm": 0.36143758893013, + "learning_rate": 1.4654202137722617e-05, + "loss": 0.4977, + "step": 25262 + }, + { + "epoch": 0.6936573311367381, + "grad_norm": 0.38464444875717163, + "learning_rate": 1.465381986891417e-05, + "loss": 0.5376, + "step": 25263 + }, + { + "epoch": 0.6936847885777045, + "grad_norm": 0.9891734719276428, + "learning_rate": 1.4653437591424857e-05, + "loss": 0.4806, + "step": 25264 + }, + { + "epoch": 0.6937122460186711, + "grad_norm": 0.507115364074707, + "learning_rate": 1.4653055305255404e-05, + "loss": 0.5676, + "step": 25265 + }, + { + "epoch": 0.6937397034596375, + "grad_norm": 0.39354366064071655, + "learning_rate": 1.4652673010406514e-05, + "loss": 0.5126, + "step": 25266 + }, + { + "epoch": 0.6937671609006041, + "grad_norm": 0.37439870834350586, + "learning_rate": 1.4652290706878905e-05, + "loss": 0.4475, + "step": 25267 + }, + { + "epoch": 0.6937946183415705, + "grad_norm": 0.4412074089050293, + "learning_rate": 1.4651908394673289e-05, + "loss": 0.478, + "step": 25268 + }, + { + "epoch": 0.6938220757825371, + "grad_norm": 0.36869552731513977, + "learning_rate": 1.465152607379038e-05, + "loss": 0.4779, + "step": 25269 + }, + { + "epoch": 0.6938495332235036, + "grad_norm": 0.38996580243110657, + "learning_rate": 1.465114374423089e-05, + "loss": 0.4829, + "step": 25270 + }, + { + "epoch": 0.6938769906644701, + "grad_norm": 0.3765906095504761, + "learning_rate": 1.4650761405995528e-05, + "loss": 0.4821, + "step": 25271 + }, + { + "epoch": 0.6939044481054366, + "grad_norm": 0.3175306022167206, + "learning_rate": 1.4650379059085017e-05, + "loss": 0.4082, + "step": 25272 + }, + { + "epoch": 0.693931905546403, + "grad_norm": 0.4054983854293823, + "learning_rate": 1.4649996703500064e-05, + "loss": 0.5091, + "step": 25273 + }, + { + "epoch": 0.6939593629873696, + "grad_norm": 0.39155060052871704, + "learning_rate": 1.4649614339241382e-05, + "loss": 0.52, + "step": 25274 + }, + { + "epoch": 0.693986820428336, + "grad_norm": 0.3766295611858368, + "learning_rate": 1.4649231966309684e-05, + "loss": 0.4237, + "step": 25275 + }, + { + "epoch": 0.6940142778693026, + "grad_norm": 0.3961699903011322, + "learning_rate": 1.4648849584705688e-05, + "loss": 0.49, + "step": 25276 + }, + { + "epoch": 0.6940417353102691, + "grad_norm": 0.4775618612766266, + "learning_rate": 1.4648467194430101e-05, + "loss": 0.5444, + "step": 25277 + }, + { + "epoch": 0.6940691927512356, + "grad_norm": 0.4622125029563904, + "learning_rate": 1.4648084795483641e-05, + "loss": 0.6007, + "step": 25278 + }, + { + "epoch": 0.6940966501922021, + "grad_norm": 0.41597622632980347, + "learning_rate": 1.4647702387867018e-05, + "loss": 0.5353, + "step": 25279 + }, + { + "epoch": 0.6941241076331686, + "grad_norm": 0.3501005172729492, + "learning_rate": 1.4647319971580946e-05, + "loss": 0.511, + "step": 25280 + }, + { + "epoch": 0.6941515650741351, + "grad_norm": 0.3766581118106842, + "learning_rate": 1.4646937546626141e-05, + "loss": 0.5035, + "step": 25281 + }, + { + "epoch": 0.6941790225151016, + "grad_norm": 0.4425783157348633, + "learning_rate": 1.464655511300331e-05, + "loss": 0.5265, + "step": 25282 + }, + { + "epoch": 0.6942064799560681, + "grad_norm": 0.36914992332458496, + "learning_rate": 1.4646172670713177e-05, + "loss": 0.4661, + "step": 25283 + }, + { + "epoch": 0.6942339373970347, + "grad_norm": 0.37693244218826294, + "learning_rate": 1.4645790219756444e-05, + "loss": 0.5499, + "step": 25284 + }, + { + "epoch": 0.6942613948380011, + "grad_norm": 0.37350693345069885, + "learning_rate": 1.4645407760133834e-05, + "loss": 0.5183, + "step": 25285 + }, + { + "epoch": 0.6942888522789676, + "grad_norm": 0.3501913249492645, + "learning_rate": 1.4645025291846054e-05, + "loss": 0.4275, + "step": 25286 + }, + { + "epoch": 0.6943163097199341, + "grad_norm": 0.34879228472709656, + "learning_rate": 1.4644642814893817e-05, + "loss": 0.4898, + "step": 25287 + }, + { + "epoch": 0.6943437671609006, + "grad_norm": 0.45376527309417725, + "learning_rate": 1.4644260329277841e-05, + "loss": 0.513, + "step": 25288 + }, + { + "epoch": 0.6943712246018671, + "grad_norm": 0.3733663856983185, + "learning_rate": 1.4643877834998836e-05, + "loss": 0.4905, + "step": 25289 + }, + { + "epoch": 0.6943986820428336, + "grad_norm": 0.34017378091812134, + "learning_rate": 1.4643495332057518e-05, + "loss": 0.4182, + "step": 25290 + }, + { + "epoch": 0.6944261394838002, + "grad_norm": 0.4982990622520447, + "learning_rate": 1.4643112820454598e-05, + "loss": 0.5068, + "step": 25291 + }, + { + "epoch": 0.6944535969247666, + "grad_norm": 0.38283804059028625, + "learning_rate": 1.464273030019079e-05, + "loss": 0.4342, + "step": 25292 + }, + { + "epoch": 0.6944810543657332, + "grad_norm": 0.36872217059135437, + "learning_rate": 1.4642347771266811e-05, + "loss": 0.5398, + "step": 25293 + }, + { + "epoch": 0.6945085118066996, + "grad_norm": 0.39132872223854065, + "learning_rate": 1.464196523368337e-05, + "loss": 0.521, + "step": 25294 + }, + { + "epoch": 0.6945359692476661, + "grad_norm": 0.3570835590362549, + "learning_rate": 1.4641582687441182e-05, + "loss": 0.4955, + "step": 25295 + }, + { + "epoch": 0.6945634266886326, + "grad_norm": 0.3967655301094055, + "learning_rate": 1.4641200132540962e-05, + "loss": 0.4441, + "step": 25296 + }, + { + "epoch": 0.6945908841295991, + "grad_norm": 0.3841409981250763, + "learning_rate": 1.464081756898342e-05, + "loss": 0.4826, + "step": 25297 + }, + { + "epoch": 0.6946183415705657, + "grad_norm": 0.45963919162750244, + "learning_rate": 1.4640434996769274e-05, + "loss": 0.4966, + "step": 25298 + }, + { + "epoch": 0.6946457990115321, + "grad_norm": 0.3364951014518738, + "learning_rate": 1.4640052415899238e-05, + "loss": 0.5372, + "step": 25299 + }, + { + "epoch": 0.6946732564524987, + "grad_norm": 0.33859264850616455, + "learning_rate": 1.4639669826374019e-05, + "loss": 0.5137, + "step": 25300 + }, + { + "epoch": 0.6947007138934651, + "grad_norm": 0.36878830194473267, + "learning_rate": 1.4639287228194339e-05, + "loss": 0.487, + "step": 25301 + }, + { + "epoch": 0.6947281713344317, + "grad_norm": 0.3932752013206482, + "learning_rate": 1.4638904621360902e-05, + "loss": 0.5497, + "step": 25302 + }, + { + "epoch": 0.6947556287753981, + "grad_norm": 0.381727397441864, + "learning_rate": 1.4638522005874433e-05, + "loss": 0.4869, + "step": 25303 + }, + { + "epoch": 0.6947830862163646, + "grad_norm": 0.40487241744995117, + "learning_rate": 1.4638139381735638e-05, + "loss": 0.5001, + "step": 25304 + }, + { + "epoch": 0.6948105436573312, + "grad_norm": 0.3610488772392273, + "learning_rate": 1.4637756748945231e-05, + "loss": 0.6001, + "step": 25305 + }, + { + "epoch": 0.6948380010982976, + "grad_norm": 0.681302547454834, + "learning_rate": 1.463737410750393e-05, + "loss": 0.476, + "step": 25306 + }, + { + "epoch": 0.6948654585392642, + "grad_norm": 0.359510600566864, + "learning_rate": 1.4636991457412446e-05, + "loss": 0.5343, + "step": 25307 + }, + { + "epoch": 0.6948929159802306, + "grad_norm": 0.39557838439941406, + "learning_rate": 1.463660879867149e-05, + "loss": 0.4517, + "step": 25308 + }, + { + "epoch": 0.6949203734211972, + "grad_norm": 0.42404815554618835, + "learning_rate": 1.4636226131281784e-05, + "loss": 0.5471, + "step": 25309 + }, + { + "epoch": 0.6949478308621636, + "grad_norm": 0.35788264870643616, + "learning_rate": 1.463584345524403e-05, + "loss": 0.4769, + "step": 25310 + }, + { + "epoch": 0.6949752883031302, + "grad_norm": 0.39822208881378174, + "learning_rate": 1.4635460770558953e-05, + "loss": 0.581, + "step": 25311 + }, + { + "epoch": 0.6950027457440966, + "grad_norm": 0.38095206022262573, + "learning_rate": 1.4635078077227263e-05, + "loss": 0.4934, + "step": 25312 + }, + { + "epoch": 0.6950302031850631, + "grad_norm": 0.44227394461631775, + "learning_rate": 1.4634695375249669e-05, + "loss": 0.4983, + "step": 25313 + }, + { + "epoch": 0.6950576606260297, + "grad_norm": 0.36248454451560974, + "learning_rate": 1.4634312664626893e-05, + "loss": 0.4201, + "step": 25314 + }, + { + "epoch": 0.6950851180669961, + "grad_norm": 1.1304601430892944, + "learning_rate": 1.463392994535964e-05, + "loss": 0.6357, + "step": 25315 + }, + { + "epoch": 0.6951125755079627, + "grad_norm": 0.4404352307319641, + "learning_rate": 1.4633547217448631e-05, + "loss": 0.491, + "step": 25316 + }, + { + "epoch": 0.6951400329489291, + "grad_norm": 0.3608398735523224, + "learning_rate": 1.463316448089458e-05, + "loss": 0.4893, + "step": 25317 + }, + { + "epoch": 0.6951674903898957, + "grad_norm": 0.3433651924133301, + "learning_rate": 1.4632781735698196e-05, + "loss": 0.4357, + "step": 25318 + }, + { + "epoch": 0.6951949478308621, + "grad_norm": 0.3370068371295929, + "learning_rate": 1.4632398981860196e-05, + "loss": 0.5421, + "step": 25319 + }, + { + "epoch": 0.6952224052718287, + "grad_norm": 0.4155050814151764, + "learning_rate": 1.4632016219381293e-05, + "loss": 0.5557, + "step": 25320 + }, + { + "epoch": 0.6952498627127952, + "grad_norm": 0.3878953456878662, + "learning_rate": 1.4631633448262202e-05, + "loss": 0.4911, + "step": 25321 + }, + { + "epoch": 0.6952773201537616, + "grad_norm": 0.4021739065647125, + "learning_rate": 1.4631250668503637e-05, + "loss": 0.5855, + "step": 25322 + }, + { + "epoch": 0.6953047775947282, + "grad_norm": 0.37154150009155273, + "learning_rate": 1.4630867880106308e-05, + "loss": 0.5045, + "step": 25323 + }, + { + "epoch": 0.6953322350356946, + "grad_norm": 0.38938480615615845, + "learning_rate": 1.4630485083070936e-05, + "loss": 0.5404, + "step": 25324 + }, + { + "epoch": 0.6953596924766612, + "grad_norm": 0.3544134199619293, + "learning_rate": 1.4630102277398233e-05, + "loss": 0.505, + "step": 25325 + }, + { + "epoch": 0.6953871499176276, + "grad_norm": 0.522505521774292, + "learning_rate": 1.4629719463088904e-05, + "loss": 0.4818, + "step": 25326 + }, + { + "epoch": 0.6954146073585942, + "grad_norm": 0.43122386932373047, + "learning_rate": 1.4629336640143679e-05, + "loss": 0.5599, + "step": 25327 + }, + { + "epoch": 0.6954420647995607, + "grad_norm": 0.3991270959377289, + "learning_rate": 1.4628953808563258e-05, + "loss": 0.5873, + "step": 25328 + }, + { + "epoch": 0.6954695222405272, + "grad_norm": 0.39553025364875793, + "learning_rate": 1.4628570968348364e-05, + "loss": 0.5059, + "step": 25329 + }, + { + "epoch": 0.6954969796814937, + "grad_norm": 0.47177019715309143, + "learning_rate": 1.4628188119499707e-05, + "loss": 0.6603, + "step": 25330 + }, + { + "epoch": 0.6955244371224601, + "grad_norm": 0.45925551652908325, + "learning_rate": 1.4627805262018e-05, + "loss": 0.5599, + "step": 25331 + }, + { + "epoch": 0.6955518945634267, + "grad_norm": 0.3627833425998688, + "learning_rate": 1.4627422395903964e-05, + "loss": 0.5625, + "step": 25332 + }, + { + "epoch": 0.6955793520043931, + "grad_norm": 0.37874358892440796, + "learning_rate": 1.4627039521158302e-05, + "loss": 0.4893, + "step": 25333 + }, + { + "epoch": 0.6956068094453597, + "grad_norm": 0.40369677543640137, + "learning_rate": 1.4626656637781738e-05, + "loss": 0.4307, + "step": 25334 + }, + { + "epoch": 0.6956342668863262, + "grad_norm": 0.5013852715492249, + "learning_rate": 1.4626273745774983e-05, + "loss": 0.5441, + "step": 25335 + }, + { + "epoch": 0.6956617243272927, + "grad_norm": 0.38237547874450684, + "learning_rate": 1.462589084513875e-05, + "loss": 0.3777, + "step": 25336 + }, + { + "epoch": 0.6956891817682592, + "grad_norm": 0.3825632333755493, + "learning_rate": 1.4625507935873756e-05, + "loss": 0.5852, + "step": 25337 + }, + { + "epoch": 0.6957166392092257, + "grad_norm": 0.3818103075027466, + "learning_rate": 1.4625125017980712e-05, + "loss": 0.5004, + "step": 25338 + }, + { + "epoch": 0.6957440966501922, + "grad_norm": 0.43543270230293274, + "learning_rate": 1.4624742091460333e-05, + "loss": 0.5584, + "step": 25339 + }, + { + "epoch": 0.6957715540911587, + "grad_norm": 0.3585946559906006, + "learning_rate": 1.4624359156313335e-05, + "loss": 0.5109, + "step": 25340 + }, + { + "epoch": 0.6957990115321252, + "grad_norm": 0.36217913031578064, + "learning_rate": 1.462397621254043e-05, + "loss": 0.4498, + "step": 25341 + }, + { + "epoch": 0.6958264689730917, + "grad_norm": 0.4188084900379181, + "learning_rate": 1.4623593260142335e-05, + "loss": 0.5033, + "step": 25342 + }, + { + "epoch": 0.6958539264140582, + "grad_norm": 0.42253798246383667, + "learning_rate": 1.462321029911976e-05, + "loss": 0.5178, + "step": 25343 + }, + { + "epoch": 0.6958813838550247, + "grad_norm": 0.37353643774986267, + "learning_rate": 1.4622827329473427e-05, + "loss": 0.5293, + "step": 25344 + }, + { + "epoch": 0.6959088412959912, + "grad_norm": 0.3775566816329956, + "learning_rate": 1.4622444351204042e-05, + "loss": 0.4313, + "step": 25345 + }, + { + "epoch": 0.6959362987369577, + "grad_norm": 0.43346893787384033, + "learning_rate": 1.4622061364312323e-05, + "loss": 0.468, + "step": 25346 + }, + { + "epoch": 0.6959637561779242, + "grad_norm": 0.3822588622570038, + "learning_rate": 1.4621678368798986e-05, + "loss": 0.4988, + "step": 25347 + }, + { + "epoch": 0.6959912136188907, + "grad_norm": 0.3968234360218048, + "learning_rate": 1.4621295364664743e-05, + "loss": 0.4992, + "step": 25348 + }, + { + "epoch": 0.6960186710598573, + "grad_norm": 0.39415308833122253, + "learning_rate": 1.4620912351910308e-05, + "loss": 0.4947, + "step": 25349 + }, + { + "epoch": 0.6960461285008237, + "grad_norm": 0.36490535736083984, + "learning_rate": 1.46205293305364e-05, + "loss": 0.5293, + "step": 25350 + }, + { + "epoch": 0.6960735859417903, + "grad_norm": 0.4414251744747162, + "learning_rate": 1.4620146300543727e-05, + "loss": 0.5424, + "step": 25351 + }, + { + "epoch": 0.6961010433827567, + "grad_norm": 0.43288469314575195, + "learning_rate": 1.4619763261933008e-05, + "loss": 0.4863, + "step": 25352 + }, + { + "epoch": 0.6961285008237232, + "grad_norm": 0.4547356963157654, + "learning_rate": 1.4619380214704954e-05, + "loss": 0.4757, + "step": 25353 + }, + { + "epoch": 0.6961559582646897, + "grad_norm": 0.6719582080841064, + "learning_rate": 1.4618997158860284e-05, + "loss": 0.5723, + "step": 25354 + }, + { + "epoch": 0.6961834157056562, + "grad_norm": 0.3890974223613739, + "learning_rate": 1.4618614094399712e-05, + "loss": 0.5295, + "step": 25355 + }, + { + "epoch": 0.6962108731466228, + "grad_norm": 0.3563663363456726, + "learning_rate": 1.4618231021323946e-05, + "loss": 0.4951, + "step": 25356 + }, + { + "epoch": 0.6962383305875892, + "grad_norm": 0.4012490212917328, + "learning_rate": 1.4617847939633709e-05, + "loss": 0.4457, + "step": 25357 + }, + { + "epoch": 0.6962657880285558, + "grad_norm": 0.3936764597892761, + "learning_rate": 1.4617464849329708e-05, + "loss": 0.569, + "step": 25358 + }, + { + "epoch": 0.6962932454695222, + "grad_norm": 0.38228994607925415, + "learning_rate": 1.4617081750412662e-05, + "loss": 0.4749, + "step": 25359 + }, + { + "epoch": 0.6963207029104888, + "grad_norm": 0.49958130717277527, + "learning_rate": 1.4616698642883287e-05, + "loss": 0.6399, + "step": 25360 + }, + { + "epoch": 0.6963481603514552, + "grad_norm": 0.4254586696624756, + "learning_rate": 1.4616315526742296e-05, + "loss": 0.5756, + "step": 25361 + }, + { + "epoch": 0.6963756177924217, + "grad_norm": 0.38669806718826294, + "learning_rate": 1.4615932401990403e-05, + "loss": 0.5336, + "step": 25362 + }, + { + "epoch": 0.6964030752333883, + "grad_norm": 0.37593865394592285, + "learning_rate": 1.461554926862832e-05, + "loss": 0.5053, + "step": 25363 + }, + { + "epoch": 0.6964305326743547, + "grad_norm": 0.5282707810401917, + "learning_rate": 1.4615166126656766e-05, + "loss": 0.5421, + "step": 25364 + }, + { + "epoch": 0.6964579901153213, + "grad_norm": 0.3744141161441803, + "learning_rate": 1.4614782976076454e-05, + "loss": 0.4873, + "step": 25365 + }, + { + "epoch": 0.6964854475562877, + "grad_norm": 0.39034080505371094, + "learning_rate": 1.46143998168881e-05, + "loss": 0.5006, + "step": 25366 + }, + { + "epoch": 0.6965129049972543, + "grad_norm": 0.34547728300094604, + "learning_rate": 1.4614016649092418e-05, + "loss": 0.4647, + "step": 25367 + }, + { + "epoch": 0.6965403624382207, + "grad_norm": 0.3805381655693054, + "learning_rate": 1.4613633472690118e-05, + "loss": 0.4734, + "step": 25368 + }, + { + "epoch": 0.6965678198791873, + "grad_norm": 0.41958561539649963, + "learning_rate": 1.4613250287681924e-05, + "loss": 0.5373, + "step": 25369 + }, + { + "epoch": 0.6965952773201538, + "grad_norm": 0.46847084164619446, + "learning_rate": 1.4612867094068543e-05, + "loss": 0.5214, + "step": 25370 + }, + { + "epoch": 0.6966227347611202, + "grad_norm": 0.3500595986843109, + "learning_rate": 1.4612483891850695e-05, + "loss": 0.4928, + "step": 25371 + }, + { + "epoch": 0.6966501922020868, + "grad_norm": 0.33767861127853394, + "learning_rate": 1.4612100681029091e-05, + "loss": 0.4669, + "step": 25372 + }, + { + "epoch": 0.6966776496430532, + "grad_norm": 0.4210420548915863, + "learning_rate": 1.4611717461604446e-05, + "loss": 0.5056, + "step": 25373 + }, + { + "epoch": 0.6967051070840198, + "grad_norm": 0.37151041626930237, + "learning_rate": 1.4611334233577478e-05, + "loss": 0.5013, + "step": 25374 + }, + { + "epoch": 0.6967325645249862, + "grad_norm": 0.36425137519836426, + "learning_rate": 1.46109509969489e-05, + "loss": 0.5092, + "step": 25375 + }, + { + "epoch": 0.6967600219659528, + "grad_norm": 0.4395934045314789, + "learning_rate": 1.4610567751719427e-05, + "loss": 0.4686, + "step": 25376 + }, + { + "epoch": 0.6967874794069193, + "grad_norm": 0.4124169647693634, + "learning_rate": 1.461018449788977e-05, + "loss": 0.462, + "step": 25377 + }, + { + "epoch": 0.6968149368478858, + "grad_norm": 0.3974516987800598, + "learning_rate": 1.4609801235460653e-05, + "loss": 0.5503, + "step": 25378 + }, + { + "epoch": 0.6968423942888523, + "grad_norm": 0.43092894554138184, + "learning_rate": 1.4609417964432782e-05, + "loss": 0.4488, + "step": 25379 + }, + { + "epoch": 0.6968698517298187, + "grad_norm": 0.35248851776123047, + "learning_rate": 1.4609034684806875e-05, + "loss": 0.463, + "step": 25380 + }, + { + "epoch": 0.6968973091707853, + "grad_norm": 0.38552138209342957, + "learning_rate": 1.4608651396583649e-05, + "loss": 0.5011, + "step": 25381 + }, + { + "epoch": 0.6969247666117517, + "grad_norm": 0.3823942542076111, + "learning_rate": 1.4608268099763816e-05, + "loss": 0.5015, + "step": 25382 + }, + { + "epoch": 0.6969522240527183, + "grad_norm": 0.3486464321613312, + "learning_rate": 1.4607884794348093e-05, + "loss": 0.4782, + "step": 25383 + }, + { + "epoch": 0.6969796814936848, + "grad_norm": 0.3770047724246979, + "learning_rate": 1.4607501480337193e-05, + "loss": 0.4927, + "step": 25384 + }, + { + "epoch": 0.6970071389346513, + "grad_norm": 0.3807443082332611, + "learning_rate": 1.4607118157731831e-05, + "loss": 0.45, + "step": 25385 + }, + { + "epoch": 0.6970345963756178, + "grad_norm": 0.39712586998939514, + "learning_rate": 1.4606734826532729e-05, + "loss": 0.4707, + "step": 25386 + }, + { + "epoch": 0.6970620538165843, + "grad_norm": 0.3702531158924103, + "learning_rate": 1.4606351486740592e-05, + "loss": 0.4357, + "step": 25387 + }, + { + "epoch": 0.6970895112575508, + "grad_norm": 0.46723392605781555, + "learning_rate": 1.460596813835614e-05, + "loss": 0.444, + "step": 25388 + }, + { + "epoch": 0.6971169686985172, + "grad_norm": 0.4044448137283325, + "learning_rate": 1.4605584781380087e-05, + "loss": 0.4749, + "step": 25389 + }, + { + "epoch": 0.6971444261394838, + "grad_norm": 0.40213897824287415, + "learning_rate": 1.4605201415813147e-05, + "loss": 0.4802, + "step": 25390 + }, + { + "epoch": 0.6971718835804503, + "grad_norm": 0.4053410589694977, + "learning_rate": 1.4604818041656039e-05, + "loss": 0.472, + "step": 25391 + }, + { + "epoch": 0.6971993410214168, + "grad_norm": 0.36085158586502075, + "learning_rate": 1.4604434658909476e-05, + "loss": 0.4822, + "step": 25392 + }, + { + "epoch": 0.6972267984623833, + "grad_norm": 0.36807844042778015, + "learning_rate": 1.4604051267574171e-05, + "loss": 0.4882, + "step": 25393 + }, + { + "epoch": 0.6972542559033498, + "grad_norm": 0.3604799509048462, + "learning_rate": 1.460366786765084e-05, + "loss": 0.5556, + "step": 25394 + }, + { + "epoch": 0.6972817133443163, + "grad_norm": 0.3477962613105774, + "learning_rate": 1.4603284459140202e-05, + "loss": 0.4845, + "step": 25395 + }, + { + "epoch": 0.6973091707852828, + "grad_norm": 0.36576423048973083, + "learning_rate": 1.460290104204297e-05, + "loss": 0.4659, + "step": 25396 + }, + { + "epoch": 0.6973366282262493, + "grad_norm": 0.40345698595046997, + "learning_rate": 1.4602517616359854e-05, + "loss": 0.4915, + "step": 25397 + }, + { + "epoch": 0.6973640856672159, + "grad_norm": 0.39943066239356995, + "learning_rate": 1.4602134182091577e-05, + "loss": 0.5471, + "step": 25398 + }, + { + "epoch": 0.6973915431081823, + "grad_norm": 0.3715866804122925, + "learning_rate": 1.4601750739238851e-05, + "loss": 0.4938, + "step": 25399 + }, + { + "epoch": 0.6974190005491488, + "grad_norm": 0.4035443067550659, + "learning_rate": 1.460136728780239e-05, + "loss": 0.5001, + "step": 25400 + }, + { + "epoch": 0.6974464579901153, + "grad_norm": 0.3853027820587158, + "learning_rate": 1.460098382778291e-05, + "loss": 0.515, + "step": 25401 + }, + { + "epoch": 0.6974739154310818, + "grad_norm": 0.3632504343986511, + "learning_rate": 1.4600600359181127e-05, + "loss": 0.4341, + "step": 25402 + }, + { + "epoch": 0.6975013728720483, + "grad_norm": 0.40799659490585327, + "learning_rate": 1.4600216881997758e-05, + "loss": 0.5214, + "step": 25403 + }, + { + "epoch": 0.6975288303130148, + "grad_norm": 0.4270766079425812, + "learning_rate": 1.4599833396233514e-05, + "loss": 0.5095, + "step": 25404 + }, + { + "epoch": 0.6975562877539814, + "grad_norm": 0.39089149236679077, + "learning_rate": 1.4599449901889114e-05, + "loss": 0.447, + "step": 25405 + }, + { + "epoch": 0.6975837451949478, + "grad_norm": 0.39415526390075684, + "learning_rate": 1.4599066398965269e-05, + "loss": 0.4844, + "step": 25406 + }, + { + "epoch": 0.6976112026359144, + "grad_norm": 0.35627275705337524, + "learning_rate": 1.4598682887462701e-05, + "loss": 0.4882, + "step": 25407 + }, + { + "epoch": 0.6976386600768808, + "grad_norm": 0.4121449589729309, + "learning_rate": 1.459829936738212e-05, + "loss": 0.4814, + "step": 25408 + }, + { + "epoch": 0.6976661175178474, + "grad_norm": 0.39693117141723633, + "learning_rate": 1.4597915838724244e-05, + "loss": 0.4702, + "step": 25409 + }, + { + "epoch": 0.6976935749588138, + "grad_norm": 0.3664967119693756, + "learning_rate": 1.4597532301489788e-05, + "loss": 0.4528, + "step": 25410 + }, + { + "epoch": 0.6977210323997803, + "grad_norm": 0.37770670652389526, + "learning_rate": 1.4597148755679462e-05, + "loss": 0.4451, + "step": 25411 + }, + { + "epoch": 0.6977484898407469, + "grad_norm": 0.34469226002693176, + "learning_rate": 1.4596765201293992e-05, + "loss": 0.3914, + "step": 25412 + }, + { + "epoch": 0.6977759472817133, + "grad_norm": 0.37485289573669434, + "learning_rate": 1.4596381638334088e-05, + "loss": 0.446, + "step": 25413 + }, + { + "epoch": 0.6978034047226799, + "grad_norm": 0.340489000082016, + "learning_rate": 1.459599806680046e-05, + "loss": 0.5094, + "step": 25414 + }, + { + "epoch": 0.6978308621636463, + "grad_norm": 0.3640578091144562, + "learning_rate": 1.4595614486693835e-05, + "loss": 0.5321, + "step": 25415 + }, + { + "epoch": 0.6978583196046129, + "grad_norm": 0.35629913210868835, + "learning_rate": 1.4595230898014916e-05, + "loss": 0.5058, + "step": 25416 + }, + { + "epoch": 0.6978857770455793, + "grad_norm": 0.37914976477622986, + "learning_rate": 1.4594847300764428e-05, + "loss": 0.4503, + "step": 25417 + }, + { + "epoch": 0.6979132344865459, + "grad_norm": 0.43085458874702454, + "learning_rate": 1.4594463694943084e-05, + "loss": 0.5518, + "step": 25418 + }, + { + "epoch": 0.6979406919275124, + "grad_norm": 0.3899019658565521, + "learning_rate": 1.4594080080551597e-05, + "loss": 0.5519, + "step": 25419 + }, + { + "epoch": 0.6979681493684788, + "grad_norm": 0.4156707525253296, + "learning_rate": 1.4593696457590688e-05, + "loss": 0.5862, + "step": 25420 + }, + { + "epoch": 0.6979956068094454, + "grad_norm": 0.34225592017173767, + "learning_rate": 1.4593312826061063e-05, + "loss": 0.4772, + "step": 25421 + }, + { + "epoch": 0.6980230642504118, + "grad_norm": 0.35079413652420044, + "learning_rate": 1.4592929185963447e-05, + "loss": 0.4031, + "step": 25422 + }, + { + "epoch": 0.6980505216913784, + "grad_norm": 0.3509799540042877, + "learning_rate": 1.4592545537298555e-05, + "loss": 0.3967, + "step": 25423 + }, + { + "epoch": 0.6980779791323448, + "grad_norm": 0.35817137360572815, + "learning_rate": 1.4592161880067096e-05, + "loss": 0.4591, + "step": 25424 + }, + { + "epoch": 0.6981054365733114, + "grad_norm": 0.3844546675682068, + "learning_rate": 1.4591778214269789e-05, + "loss": 0.4986, + "step": 25425 + }, + { + "epoch": 0.6981328940142779, + "grad_norm": 0.45115259289741516, + "learning_rate": 1.4591394539907348e-05, + "loss": 0.529, + "step": 25426 + }, + { + "epoch": 0.6981603514552444, + "grad_norm": 0.43783843517303467, + "learning_rate": 1.4591010856980496e-05, + "loss": 0.5832, + "step": 25427 + }, + { + "epoch": 0.6981878088962109, + "grad_norm": 0.355221688747406, + "learning_rate": 1.4590627165489944e-05, + "loss": 0.5, + "step": 25428 + }, + { + "epoch": 0.6982152663371773, + "grad_norm": 0.37167999148368835, + "learning_rate": 1.4590243465436402e-05, + "loss": 0.5238, + "step": 25429 + }, + { + "epoch": 0.6982427237781439, + "grad_norm": 0.4129979908466339, + "learning_rate": 1.4589859756820594e-05, + "loss": 0.4964, + "step": 25430 + }, + { + "epoch": 0.6982701812191103, + "grad_norm": 0.41145846247673035, + "learning_rate": 1.4589476039643233e-05, + "loss": 0.5889, + "step": 25431 + }, + { + "epoch": 0.6982976386600769, + "grad_norm": 0.3577579855918884, + "learning_rate": 1.4589092313905034e-05, + "loss": 0.4973, + "step": 25432 + }, + { + "epoch": 0.6983250961010434, + "grad_norm": 0.3763614296913147, + "learning_rate": 1.4588708579606714e-05, + "loss": 0.4417, + "step": 25433 + }, + { + "epoch": 0.6983525535420099, + "grad_norm": 0.38437125086784363, + "learning_rate": 1.4588324836748986e-05, + "loss": 0.6009, + "step": 25434 + }, + { + "epoch": 0.6983800109829764, + "grad_norm": 0.3477506935596466, + "learning_rate": 1.458794108533257e-05, + "loss": 0.4205, + "step": 25435 + }, + { + "epoch": 0.6984074684239429, + "grad_norm": 0.3894840478897095, + "learning_rate": 1.4587557325358177e-05, + "loss": 0.484, + "step": 25436 + }, + { + "epoch": 0.6984349258649094, + "grad_norm": 0.6417276263237, + "learning_rate": 1.4587173556826526e-05, + "loss": 0.4433, + "step": 25437 + }, + { + "epoch": 0.6984623833058758, + "grad_norm": 0.5560301542282104, + "learning_rate": 1.4586789779738333e-05, + "loss": 0.5942, + "step": 25438 + }, + { + "epoch": 0.6984898407468424, + "grad_norm": 0.4187886416912079, + "learning_rate": 1.4586405994094313e-05, + "loss": 0.6353, + "step": 25439 + }, + { + "epoch": 0.6985172981878089, + "grad_norm": 0.44966015219688416, + "learning_rate": 1.4586022199895182e-05, + "loss": 0.5811, + "step": 25440 + }, + { + "epoch": 0.6985447556287754, + "grad_norm": 0.357273668050766, + "learning_rate": 1.4585638397141657e-05, + "loss": 0.5718, + "step": 25441 + }, + { + "epoch": 0.6985722130697419, + "grad_norm": 0.40705162286758423, + "learning_rate": 1.458525458583445e-05, + "loss": 0.549, + "step": 25442 + }, + { + "epoch": 0.6985996705107084, + "grad_norm": 0.4409189522266388, + "learning_rate": 1.4584870765974282e-05, + "loss": 0.5284, + "step": 25443 + }, + { + "epoch": 0.6986271279516749, + "grad_norm": 0.3913683593273163, + "learning_rate": 1.4584486937561866e-05, + "loss": 0.5282, + "step": 25444 + }, + { + "epoch": 0.6986545853926414, + "grad_norm": 0.4199334681034088, + "learning_rate": 1.458410310059792e-05, + "loss": 0.511, + "step": 25445 + }, + { + "epoch": 0.6986820428336079, + "grad_norm": 0.3714434802532196, + "learning_rate": 1.4583719255083155e-05, + "loss": 0.4918, + "step": 25446 + }, + { + "epoch": 0.6987095002745745, + "grad_norm": 0.3949292004108429, + "learning_rate": 1.4583335401018293e-05, + "loss": 0.5284, + "step": 25447 + }, + { + "epoch": 0.6987369577155409, + "grad_norm": 0.36550769209861755, + "learning_rate": 1.4582951538404048e-05, + "loss": 0.4871, + "step": 25448 + }, + { + "epoch": 0.6987644151565074, + "grad_norm": 0.361356258392334, + "learning_rate": 1.4582567667241137e-05, + "loss": 0.488, + "step": 25449 + }, + { + "epoch": 0.6987918725974739, + "grad_norm": 0.4206015467643738, + "learning_rate": 1.4582183787530273e-05, + "loss": 0.4468, + "step": 25450 + }, + { + "epoch": 0.6988193300384404, + "grad_norm": 0.342547208070755, + "learning_rate": 1.4581799899272173e-05, + "loss": 0.5168, + "step": 25451 + }, + { + "epoch": 0.6988467874794069, + "grad_norm": 0.4114772379398346, + "learning_rate": 1.4581416002467554e-05, + "loss": 0.5461, + "step": 25452 + }, + { + "epoch": 0.6988742449203734, + "grad_norm": 0.38687655329704285, + "learning_rate": 1.4581032097117134e-05, + "loss": 0.4838, + "step": 25453 + }, + { + "epoch": 0.69890170236134, + "grad_norm": 0.34125709533691406, + "learning_rate": 1.4580648183221626e-05, + "loss": 0.5352, + "step": 25454 + }, + { + "epoch": 0.6989291598023064, + "grad_norm": 0.398270845413208, + "learning_rate": 1.4580264260781744e-05, + "loss": 0.5313, + "step": 25455 + }, + { + "epoch": 0.698956617243273, + "grad_norm": 0.38955777883529663, + "learning_rate": 1.4579880329798213e-05, + "loss": 0.6164, + "step": 25456 + }, + { + "epoch": 0.6989840746842394, + "grad_norm": 0.3907632529735565, + "learning_rate": 1.4579496390271739e-05, + "loss": 0.472, + "step": 25457 + }, + { + "epoch": 0.699011532125206, + "grad_norm": 0.4551410675048828, + "learning_rate": 1.4579112442203046e-05, + "loss": 0.4656, + "step": 25458 + }, + { + "epoch": 0.6990389895661724, + "grad_norm": 0.39492297172546387, + "learning_rate": 1.4578728485592845e-05, + "loss": 0.5435, + "step": 25459 + }, + { + "epoch": 0.6990664470071389, + "grad_norm": 0.39289143681526184, + "learning_rate": 1.4578344520441853e-05, + "loss": 0.5264, + "step": 25460 + }, + { + "epoch": 0.6990939044481055, + "grad_norm": 0.39825108647346497, + "learning_rate": 1.457796054675079e-05, + "loss": 0.5373, + "step": 25461 + }, + { + "epoch": 0.6991213618890719, + "grad_norm": 0.4253794550895691, + "learning_rate": 1.4577576564520368e-05, + "loss": 0.5739, + "step": 25462 + }, + { + "epoch": 0.6991488193300385, + "grad_norm": 0.41354668140411377, + "learning_rate": 1.4577192573751302e-05, + "loss": 0.5114, + "step": 25463 + }, + { + "epoch": 0.6991762767710049, + "grad_norm": 0.6106164455413818, + "learning_rate": 1.4576808574444314e-05, + "loss": 0.4972, + "step": 25464 + }, + { + "epoch": 0.6992037342119715, + "grad_norm": 0.3712099492549896, + "learning_rate": 1.4576424566600116e-05, + "loss": 0.491, + "step": 25465 + }, + { + "epoch": 0.6992311916529379, + "grad_norm": 0.4822063744068146, + "learning_rate": 1.4576040550219428e-05, + "loss": 0.4949, + "step": 25466 + }, + { + "epoch": 0.6992586490939044, + "grad_norm": 0.3936867415904999, + "learning_rate": 1.4575656525302964e-05, + "loss": 0.5418, + "step": 25467 + }, + { + "epoch": 0.699286106534871, + "grad_norm": 0.3470097482204437, + "learning_rate": 1.4575272491851435e-05, + "loss": 0.4388, + "step": 25468 + }, + { + "epoch": 0.6993135639758374, + "grad_norm": 0.37351739406585693, + "learning_rate": 1.4574888449865567e-05, + "loss": 0.5053, + "step": 25469 + }, + { + "epoch": 0.699341021416804, + "grad_norm": 0.4169138967990875, + "learning_rate": 1.457450439934607e-05, + "loss": 0.5411, + "step": 25470 + }, + { + "epoch": 0.6993684788577704, + "grad_norm": 1.3586455583572388, + "learning_rate": 1.4574120340293663e-05, + "loss": 0.5479, + "step": 25471 + }, + { + "epoch": 0.699395936298737, + "grad_norm": 0.3889513611793518, + "learning_rate": 1.4573736272709065e-05, + "loss": 0.4562, + "step": 25472 + }, + { + "epoch": 0.6994233937397034, + "grad_norm": 0.4128541946411133, + "learning_rate": 1.4573352196592983e-05, + "loss": 0.5425, + "step": 25473 + }, + { + "epoch": 0.69945085118067, + "grad_norm": 0.3986850678920746, + "learning_rate": 1.4572968111946143e-05, + "loss": 0.4688, + "step": 25474 + }, + { + "epoch": 0.6994783086216365, + "grad_norm": 0.39681771397590637, + "learning_rate": 1.4572584018769259e-05, + "loss": 0.5502, + "step": 25475 + }, + { + "epoch": 0.699505766062603, + "grad_norm": 0.3734547793865204, + "learning_rate": 1.4572199917063044e-05, + "loss": 0.5884, + "step": 25476 + }, + { + "epoch": 0.6995332235035695, + "grad_norm": 0.4484544098377228, + "learning_rate": 1.4571815806828218e-05, + "loss": 0.4151, + "step": 25477 + }, + { + "epoch": 0.6995606809445359, + "grad_norm": 0.40805771946907043, + "learning_rate": 1.4571431688065494e-05, + "loss": 0.605, + "step": 25478 + }, + { + "epoch": 0.6995881383855025, + "grad_norm": 0.4716414511203766, + "learning_rate": 1.4571047560775593e-05, + "loss": 0.4715, + "step": 25479 + }, + { + "epoch": 0.6996155958264689, + "grad_norm": 0.4433923065662384, + "learning_rate": 1.457066342495923e-05, + "loss": 0.5876, + "step": 25480 + }, + { + "epoch": 0.6996430532674355, + "grad_norm": 0.34977322816848755, + "learning_rate": 1.4570279280617119e-05, + "loss": 0.4731, + "step": 25481 + }, + { + "epoch": 0.699670510708402, + "grad_norm": 0.37862730026245117, + "learning_rate": 1.4569895127749982e-05, + "loss": 0.5158, + "step": 25482 + }, + { + "epoch": 0.6996979681493685, + "grad_norm": 0.3817298114299774, + "learning_rate": 1.4569510966358526e-05, + "loss": 0.545, + "step": 25483 + }, + { + "epoch": 0.699725425590335, + "grad_norm": 0.3889427185058594, + "learning_rate": 1.456912679644348e-05, + "loss": 0.5485, + "step": 25484 + }, + { + "epoch": 0.6997528830313015, + "grad_norm": 0.3915839195251465, + "learning_rate": 1.4568742618005552e-05, + "loss": 0.4928, + "step": 25485 + }, + { + "epoch": 0.699780340472268, + "grad_norm": 0.36893731355667114, + "learning_rate": 1.456835843104546e-05, + "loss": 0.4595, + "step": 25486 + }, + { + "epoch": 0.6998077979132344, + "grad_norm": 0.40549057722091675, + "learning_rate": 1.4567974235563922e-05, + "loss": 0.5017, + "step": 25487 + }, + { + "epoch": 0.699835255354201, + "grad_norm": 0.3457392752170563, + "learning_rate": 1.456759003156165e-05, + "loss": 0.4983, + "step": 25488 + }, + { + "epoch": 0.6998627127951675, + "grad_norm": 0.4409995675086975, + "learning_rate": 1.4567205819039371e-05, + "loss": 0.6225, + "step": 25489 + }, + { + "epoch": 0.699890170236134, + "grad_norm": 0.3928782045841217, + "learning_rate": 1.4566821597997795e-05, + "loss": 0.4683, + "step": 25490 + }, + { + "epoch": 0.6999176276771005, + "grad_norm": 0.36442095041275024, + "learning_rate": 1.4566437368437636e-05, + "loss": 0.4939, + "step": 25491 + }, + { + "epoch": 0.699945085118067, + "grad_norm": 0.3952120244503021, + "learning_rate": 1.4566053130359617e-05, + "loss": 0.4593, + "step": 25492 + }, + { + "epoch": 0.6999725425590335, + "grad_norm": 0.32815104722976685, + "learning_rate": 1.456566888376445e-05, + "loss": 0.409, + "step": 25493 + }, + { + "epoch": 0.7, + "grad_norm": 0.44275471568107605, + "learning_rate": 1.4565284628652851e-05, + "loss": 0.5578, + "step": 25494 + }, + { + "epoch": 0.7000274574409665, + "grad_norm": 0.41010984778404236, + "learning_rate": 1.4564900365025542e-05, + "loss": 0.5546, + "step": 25495 + }, + { + "epoch": 0.700054914881933, + "grad_norm": 0.39667388796806335, + "learning_rate": 1.4564516092883236e-05, + "loss": 0.4838, + "step": 25496 + }, + { + "epoch": 0.7000823723228995, + "grad_norm": 0.414629727602005, + "learning_rate": 1.4564131812226652e-05, + "loss": 0.4735, + "step": 25497 + }, + { + "epoch": 0.700109829763866, + "grad_norm": 0.40252256393432617, + "learning_rate": 1.4563747523056504e-05, + "loss": 0.5028, + "step": 25498 + }, + { + "epoch": 0.7001372872048325, + "grad_norm": 0.44975876808166504, + "learning_rate": 1.456336322537351e-05, + "loss": 0.5288, + "step": 25499 + }, + { + "epoch": 0.700164744645799, + "grad_norm": 0.35001280903816223, + "learning_rate": 1.4562978919178388e-05, + "loss": 0.3927, + "step": 25500 + }, + { + "epoch": 0.7001922020867655, + "grad_norm": 0.4171823561191559, + "learning_rate": 1.4562594604471851e-05, + "loss": 0.5573, + "step": 25501 + }, + { + "epoch": 0.700219659527732, + "grad_norm": 0.3928503692150116, + "learning_rate": 1.4562210281254624e-05, + "loss": 0.5539, + "step": 25502 + }, + { + "epoch": 0.7002471169686986, + "grad_norm": 0.39245885610580444, + "learning_rate": 1.4561825949527417e-05, + "loss": 0.5552, + "step": 25503 + }, + { + "epoch": 0.700274574409665, + "grad_norm": 0.395622581243515, + "learning_rate": 1.4561441609290948e-05, + "loss": 0.5678, + "step": 25504 + }, + { + "epoch": 0.7003020318506316, + "grad_norm": 0.3703465759754181, + "learning_rate": 1.4561057260545935e-05, + "loss": 0.5397, + "step": 25505 + }, + { + "epoch": 0.700329489291598, + "grad_norm": 0.39561647176742554, + "learning_rate": 1.4560672903293094e-05, + "loss": 0.5331, + "step": 25506 + }, + { + "epoch": 0.7003569467325645, + "grad_norm": 0.38219231367111206, + "learning_rate": 1.456028853753314e-05, + "loss": 0.5533, + "step": 25507 + }, + { + "epoch": 0.700384404173531, + "grad_norm": 0.4668722450733185, + "learning_rate": 1.4559904163266795e-05, + "loss": 0.5284, + "step": 25508 + }, + { + "epoch": 0.7004118616144975, + "grad_norm": 0.43465766310691833, + "learning_rate": 1.4559519780494773e-05, + "loss": 0.4835, + "step": 25509 + }, + { + "epoch": 0.7004393190554641, + "grad_norm": 0.4053919315338135, + "learning_rate": 1.4559135389217794e-05, + "loss": 0.4844, + "step": 25510 + }, + { + "epoch": 0.7004667764964305, + "grad_norm": 0.37558043003082275, + "learning_rate": 1.455875098943657e-05, + "loss": 0.4119, + "step": 25511 + }, + { + "epoch": 0.7004942339373971, + "grad_norm": 0.4137763977050781, + "learning_rate": 1.4558366581151819e-05, + "loss": 0.5691, + "step": 25512 + }, + { + "epoch": 0.7005216913783635, + "grad_norm": 0.365683913230896, + "learning_rate": 1.4557982164364261e-05, + "loss": 0.4583, + "step": 25513 + }, + { + "epoch": 0.7005491488193301, + "grad_norm": 0.5254682302474976, + "learning_rate": 1.455759773907461e-05, + "loss": 0.4846, + "step": 25514 + }, + { + "epoch": 0.7005766062602965, + "grad_norm": 0.34630414843559265, + "learning_rate": 1.4557213305283585e-05, + "loss": 0.4335, + "step": 25515 + }, + { + "epoch": 0.700604063701263, + "grad_norm": 0.4035494327545166, + "learning_rate": 1.4556828862991908e-05, + "loss": 0.4658, + "step": 25516 + }, + { + "epoch": 0.7006315211422296, + "grad_norm": 0.39087194204330444, + "learning_rate": 1.4556444412200283e-05, + "loss": 0.563, + "step": 25517 + }, + { + "epoch": 0.700658978583196, + "grad_norm": 0.4403948485851288, + "learning_rate": 1.455605995290944e-05, + "loss": 0.5627, + "step": 25518 + }, + { + "epoch": 0.7006864360241626, + "grad_norm": 0.39401742815971375, + "learning_rate": 1.4555675485120089e-05, + "loss": 0.5103, + "step": 25519 + }, + { + "epoch": 0.700713893465129, + "grad_norm": 0.3294680714607239, + "learning_rate": 1.455529100883295e-05, + "loss": 0.4399, + "step": 25520 + }, + { + "epoch": 0.7007413509060956, + "grad_norm": 0.45106953382492065, + "learning_rate": 1.455490652404874e-05, + "loss": 0.4513, + "step": 25521 + }, + { + "epoch": 0.700768808347062, + "grad_norm": 0.38094329833984375, + "learning_rate": 1.4554522030768171e-05, + "loss": 0.52, + "step": 25522 + }, + { + "epoch": 0.7007962657880286, + "grad_norm": 0.3357788920402527, + "learning_rate": 1.4554137528991972e-05, + "loss": 0.494, + "step": 25523 + }, + { + "epoch": 0.7008237232289951, + "grad_norm": 0.4036049246788025, + "learning_rate": 1.4553753018720849e-05, + "loss": 0.5181, + "step": 25524 + }, + { + "epoch": 0.7008511806699615, + "grad_norm": 0.34515562653541565, + "learning_rate": 1.4553368499955523e-05, + "loss": 0.4078, + "step": 25525 + }, + { + "epoch": 0.7008786381109281, + "grad_norm": 0.5524396300315857, + "learning_rate": 1.4552983972696712e-05, + "loss": 0.5571, + "step": 25526 + }, + { + "epoch": 0.7009060955518945, + "grad_norm": 0.4001401662826538, + "learning_rate": 1.4552599436945132e-05, + "loss": 0.4668, + "step": 25527 + }, + { + "epoch": 0.7009335529928611, + "grad_norm": 0.3967103660106659, + "learning_rate": 1.4552214892701502e-05, + "loss": 0.4778, + "step": 25528 + }, + { + "epoch": 0.7009610104338275, + "grad_norm": 0.4463861584663391, + "learning_rate": 1.4551830339966537e-05, + "loss": 0.4796, + "step": 25529 + }, + { + "epoch": 0.7009884678747941, + "grad_norm": 0.35379305481910706, + "learning_rate": 1.4551445778740957e-05, + "loss": 0.4753, + "step": 25530 + }, + { + "epoch": 0.7010159253157606, + "grad_norm": 0.39269736409187317, + "learning_rate": 1.4551061209025478e-05, + "loss": 0.5051, + "step": 25531 + }, + { + "epoch": 0.7010433827567271, + "grad_norm": 0.40198907256126404, + "learning_rate": 1.4550676630820819e-05, + "loss": 0.5353, + "step": 25532 + }, + { + "epoch": 0.7010708401976936, + "grad_norm": 0.4116518497467041, + "learning_rate": 1.4550292044127694e-05, + "loss": 0.543, + "step": 25533 + }, + { + "epoch": 0.70109829763866, + "grad_norm": 0.3985337018966675, + "learning_rate": 1.454990744894682e-05, + "loss": 0.5066, + "step": 25534 + }, + { + "epoch": 0.7011257550796266, + "grad_norm": 0.35581669211387634, + "learning_rate": 1.454952284527892e-05, + "loss": 0.4877, + "step": 25535 + }, + { + "epoch": 0.701153212520593, + "grad_norm": 0.3826042413711548, + "learning_rate": 1.4549138233124707e-05, + "loss": 0.4641, + "step": 25536 + }, + { + "epoch": 0.7011806699615596, + "grad_norm": 0.4104309380054474, + "learning_rate": 1.4548753612484896e-05, + "loss": 0.5685, + "step": 25537 + }, + { + "epoch": 0.7012081274025261, + "grad_norm": 0.41666179895401, + "learning_rate": 1.454836898336021e-05, + "loss": 0.5691, + "step": 25538 + }, + { + "epoch": 0.7012355848434926, + "grad_norm": 0.39096251130104065, + "learning_rate": 1.4547984345751365e-05, + "loss": 0.5303, + "step": 25539 + }, + { + "epoch": 0.7012630422844591, + "grad_norm": 0.4020645320415497, + "learning_rate": 1.4547599699659079e-05, + "loss": 0.4741, + "step": 25540 + }, + { + "epoch": 0.7012904997254256, + "grad_norm": 0.4154307246208191, + "learning_rate": 1.4547215045084065e-05, + "loss": 0.5128, + "step": 25541 + }, + { + "epoch": 0.7013179571663921, + "grad_norm": 0.38484203815460205, + "learning_rate": 1.4546830382027045e-05, + "loss": 0.543, + "step": 25542 + }, + { + "epoch": 0.7013454146073586, + "grad_norm": 0.33228689432144165, + "learning_rate": 1.4546445710488734e-05, + "loss": 0.4587, + "step": 25543 + }, + { + "epoch": 0.7013728720483251, + "grad_norm": 0.4225093424320221, + "learning_rate": 1.4546061030469852e-05, + "loss": 0.5432, + "step": 25544 + }, + { + "epoch": 0.7014003294892917, + "grad_norm": 0.41656234860420227, + "learning_rate": 1.4545676341971114e-05, + "loss": 0.5205, + "step": 25545 + }, + { + "epoch": 0.7014277869302581, + "grad_norm": 0.4305850863456726, + "learning_rate": 1.454529164499324e-05, + "loss": 0.5787, + "step": 25546 + }, + { + "epoch": 0.7014552443712246, + "grad_norm": 0.39565590023994446, + "learning_rate": 1.4544906939536944e-05, + "loss": 0.4778, + "step": 25547 + }, + { + "epoch": 0.7014827018121911, + "grad_norm": 0.3816887140274048, + "learning_rate": 1.4544522225602951e-05, + "loss": 0.5187, + "step": 25548 + }, + { + "epoch": 0.7015101592531576, + "grad_norm": 0.37928593158721924, + "learning_rate": 1.4544137503191969e-05, + "loss": 0.5063, + "step": 25549 + }, + { + "epoch": 0.7015376166941241, + "grad_norm": 0.3695752024650574, + "learning_rate": 1.4543752772304722e-05, + "loss": 0.5127, + "step": 25550 + }, + { + "epoch": 0.7015650741350906, + "grad_norm": 0.3623110353946686, + "learning_rate": 1.4543368032941924e-05, + "loss": 0.4892, + "step": 25551 + }, + { + "epoch": 0.7015925315760572, + "grad_norm": 0.3767757713794708, + "learning_rate": 1.4542983285104297e-05, + "loss": 0.4772, + "step": 25552 + }, + { + "epoch": 0.7016199890170236, + "grad_norm": 0.39002251625061035, + "learning_rate": 1.4542598528792557e-05, + "loss": 0.5077, + "step": 25553 + }, + { + "epoch": 0.7016474464579902, + "grad_norm": 0.4232649803161621, + "learning_rate": 1.4542213764007418e-05, + "loss": 0.4417, + "step": 25554 + }, + { + "epoch": 0.7016749038989566, + "grad_norm": 0.3804721236228943, + "learning_rate": 1.4541828990749605e-05, + "loss": 0.4642, + "step": 25555 + }, + { + "epoch": 0.7017023613399231, + "grad_norm": 0.37860339879989624, + "learning_rate": 1.4541444209019826e-05, + "loss": 0.572, + "step": 25556 + }, + { + "epoch": 0.7017298187808896, + "grad_norm": 0.3334372341632843, + "learning_rate": 1.4541059418818808e-05, + "loss": 0.4386, + "step": 25557 + }, + { + "epoch": 0.7017572762218561, + "grad_norm": 0.3732791543006897, + "learning_rate": 1.4540674620147261e-05, + "loss": 0.5359, + "step": 25558 + }, + { + "epoch": 0.7017847336628227, + "grad_norm": 0.37556856870651245, + "learning_rate": 1.454028981300591e-05, + "loss": 0.5223, + "step": 25559 + }, + { + "epoch": 0.7018121911037891, + "grad_norm": 0.32337796688079834, + "learning_rate": 1.4539904997395468e-05, + "loss": 0.4151, + "step": 25560 + }, + { + "epoch": 0.7018396485447557, + "grad_norm": 0.410470575094223, + "learning_rate": 1.4539520173316654e-05, + "loss": 0.5381, + "step": 25561 + }, + { + "epoch": 0.7018671059857221, + "grad_norm": 0.3592793047428131, + "learning_rate": 1.4539135340770188e-05, + "loss": 0.4025, + "step": 25562 + }, + { + "epoch": 0.7018945634266887, + "grad_norm": 0.4087314009666443, + "learning_rate": 1.4538750499756785e-05, + "loss": 0.6024, + "step": 25563 + }, + { + "epoch": 0.7019220208676551, + "grad_norm": 0.35463646054267883, + "learning_rate": 1.4538365650277162e-05, + "loss": 0.4124, + "step": 25564 + }, + { + "epoch": 0.7019494783086216, + "grad_norm": 0.41876786947250366, + "learning_rate": 1.453798079233204e-05, + "loss": 0.4653, + "step": 25565 + }, + { + "epoch": 0.7019769357495882, + "grad_norm": 0.35154515504837036, + "learning_rate": 1.4537595925922134e-05, + "loss": 0.4721, + "step": 25566 + }, + { + "epoch": 0.7020043931905546, + "grad_norm": 0.3559425175189972, + "learning_rate": 1.4537211051048167e-05, + "loss": 0.4896, + "step": 25567 + }, + { + "epoch": 0.7020318506315212, + "grad_norm": 0.361256867647171, + "learning_rate": 1.4536826167710852e-05, + "loss": 0.4437, + "step": 25568 + }, + { + "epoch": 0.7020593080724876, + "grad_norm": 0.5027250051498413, + "learning_rate": 1.4536441275910904e-05, + "loss": 0.5362, + "step": 25569 + }, + { + "epoch": 0.7020867655134542, + "grad_norm": 0.3667948544025421, + "learning_rate": 1.453605637564905e-05, + "loss": 0.5205, + "step": 25570 + }, + { + "epoch": 0.7021142229544206, + "grad_norm": 0.4688922166824341, + "learning_rate": 1.4535671466926e-05, + "loss": 0.501, + "step": 25571 + }, + { + "epoch": 0.7021416803953872, + "grad_norm": 0.36227062344551086, + "learning_rate": 1.4535286549742478e-05, + "loss": 0.407, + "step": 25572 + }, + { + "epoch": 0.7021691378363537, + "grad_norm": 0.3392520546913147, + "learning_rate": 1.4534901624099198e-05, + "loss": 0.3981, + "step": 25573 + }, + { + "epoch": 0.7021965952773201, + "grad_norm": 0.39412832260131836, + "learning_rate": 1.4534516689996878e-05, + "loss": 0.5246, + "step": 25574 + }, + { + "epoch": 0.7022240527182867, + "grad_norm": 0.39351391792297363, + "learning_rate": 1.4534131747436237e-05, + "loss": 0.4999, + "step": 25575 + }, + { + "epoch": 0.7022515101592531, + "grad_norm": 0.35336238145828247, + "learning_rate": 1.4533746796417994e-05, + "loss": 0.5567, + "step": 25576 + }, + { + "epoch": 0.7022789676002197, + "grad_norm": 0.4432460367679596, + "learning_rate": 1.4533361836942868e-05, + "loss": 0.6227, + "step": 25577 + }, + { + "epoch": 0.7023064250411861, + "grad_norm": 0.4296761453151703, + "learning_rate": 1.4532976869011572e-05, + "loss": 0.4503, + "step": 25578 + }, + { + "epoch": 0.7023338824821527, + "grad_norm": 0.4211810827255249, + "learning_rate": 1.4532591892624828e-05, + "loss": 0.4204, + "step": 25579 + }, + { + "epoch": 0.7023613399231191, + "grad_norm": 0.33984047174453735, + "learning_rate": 1.4532206907783356e-05, + "loss": 0.4243, + "step": 25580 + }, + { + "epoch": 0.7023887973640857, + "grad_norm": 0.37419456243515015, + "learning_rate": 1.4531821914487869e-05, + "loss": 0.4865, + "step": 25581 + }, + { + "epoch": 0.7024162548050522, + "grad_norm": 0.3900434076786041, + "learning_rate": 1.453143691273909e-05, + "loss": 0.4746, + "step": 25582 + }, + { + "epoch": 0.7024437122460186, + "grad_norm": 0.43376314640045166, + "learning_rate": 1.4531051902537734e-05, + "loss": 0.4498, + "step": 25583 + }, + { + "epoch": 0.7024711696869852, + "grad_norm": 0.42061108350753784, + "learning_rate": 1.4530666883884518e-05, + "loss": 0.5837, + "step": 25584 + }, + { + "epoch": 0.7024986271279516, + "grad_norm": 0.6189656257629395, + "learning_rate": 1.4530281856780164e-05, + "loss": 0.5164, + "step": 25585 + }, + { + "epoch": 0.7025260845689182, + "grad_norm": 0.4034634232521057, + "learning_rate": 1.4529896821225388e-05, + "loss": 0.6029, + "step": 25586 + }, + { + "epoch": 0.7025535420098846, + "grad_norm": 1.0585527420043945, + "learning_rate": 1.452951177722091e-05, + "loss": 0.4641, + "step": 25587 + }, + { + "epoch": 0.7025809994508512, + "grad_norm": 0.3717815577983856, + "learning_rate": 1.4529126724767447e-05, + "loss": 0.5409, + "step": 25588 + }, + { + "epoch": 0.7026084568918177, + "grad_norm": 0.5228464603424072, + "learning_rate": 1.4528741663865714e-05, + "loss": 0.4318, + "step": 25589 + }, + { + "epoch": 0.7026359143327842, + "grad_norm": 0.3814222514629364, + "learning_rate": 1.4528356594516435e-05, + "loss": 0.4768, + "step": 25590 + }, + { + "epoch": 0.7026633717737507, + "grad_norm": 0.4083000421524048, + "learning_rate": 1.4527971516720325e-05, + "loss": 0.5827, + "step": 25591 + }, + { + "epoch": 0.7026908292147171, + "grad_norm": 0.37474197149276733, + "learning_rate": 1.4527586430478102e-05, + "loss": 0.5117, + "step": 25592 + }, + { + "epoch": 0.7027182866556837, + "grad_norm": 0.4106961786746979, + "learning_rate": 1.4527201335790486e-05, + "loss": 0.5974, + "step": 25593 + }, + { + "epoch": 0.7027457440966501, + "grad_norm": 0.3558419644832611, + "learning_rate": 1.4526816232658197e-05, + "loss": 0.5242, + "step": 25594 + }, + { + "epoch": 0.7027732015376167, + "grad_norm": 0.3599098324775696, + "learning_rate": 1.4526431121081947e-05, + "loss": 0.5219, + "step": 25595 + }, + { + "epoch": 0.7028006589785832, + "grad_norm": 0.33500590920448303, + "learning_rate": 1.4526046001062461e-05, + "loss": 0.4254, + "step": 25596 + }, + { + "epoch": 0.7028281164195497, + "grad_norm": 0.34247061610221863, + "learning_rate": 1.4525660872600453e-05, + "loss": 0.4618, + "step": 25597 + }, + { + "epoch": 0.7028555738605162, + "grad_norm": 0.40571045875549316, + "learning_rate": 1.4525275735696644e-05, + "loss": 0.5539, + "step": 25598 + }, + { + "epoch": 0.7028830313014827, + "grad_norm": 0.41074541211128235, + "learning_rate": 1.452489059035175e-05, + "loss": 0.4878, + "step": 25599 + }, + { + "epoch": 0.7029104887424492, + "grad_norm": 0.37267449498176575, + "learning_rate": 1.4524505436566493e-05, + "loss": 0.4228, + "step": 25600 + }, + { + "epoch": 0.7029379461834157, + "grad_norm": 0.5731538534164429, + "learning_rate": 1.4524120274341588e-05, + "loss": 0.5077, + "step": 25601 + }, + { + "epoch": 0.7029654036243822, + "grad_norm": 0.4136224389076233, + "learning_rate": 1.4523735103677756e-05, + "loss": 0.5979, + "step": 25602 + }, + { + "epoch": 0.7029928610653488, + "grad_norm": 0.40230220556259155, + "learning_rate": 1.4523349924575715e-05, + "loss": 0.5509, + "step": 25603 + }, + { + "epoch": 0.7030203185063152, + "grad_norm": 0.3627816438674927, + "learning_rate": 1.4522964737036181e-05, + "loss": 0.4381, + "step": 25604 + }, + { + "epoch": 0.7030477759472817, + "grad_norm": 0.38819822669029236, + "learning_rate": 1.4522579541059874e-05, + "loss": 0.4852, + "step": 25605 + }, + { + "epoch": 0.7030752333882482, + "grad_norm": 0.42598363757133484, + "learning_rate": 1.4522194336647512e-05, + "loss": 0.5109, + "step": 25606 + }, + { + "epoch": 0.7031026908292147, + "grad_norm": 0.36737725138664246, + "learning_rate": 1.4521809123799815e-05, + "loss": 0.4938, + "step": 25607 + }, + { + "epoch": 0.7031301482701812, + "grad_norm": 0.3718014359474182, + "learning_rate": 1.4521423902517502e-05, + "loss": 0.4206, + "step": 25608 + }, + { + "epoch": 0.7031576057111477, + "grad_norm": 0.4484901428222656, + "learning_rate": 1.4521038672801288e-05, + "loss": 0.4296, + "step": 25609 + }, + { + "epoch": 0.7031850631521143, + "grad_norm": 0.3786068260669708, + "learning_rate": 1.4520653434651895e-05, + "loss": 0.5819, + "step": 25610 + }, + { + "epoch": 0.7032125205930807, + "grad_norm": 0.3906787037849426, + "learning_rate": 1.4520268188070042e-05, + "loss": 0.4294, + "step": 25611 + }, + { + "epoch": 0.7032399780340473, + "grad_norm": 0.4383326768875122, + "learning_rate": 1.4519882933056443e-05, + "loss": 0.5258, + "step": 25612 + }, + { + "epoch": 0.7032674354750137, + "grad_norm": 0.43764373660087585, + "learning_rate": 1.4519497669611824e-05, + "loss": 0.5359, + "step": 25613 + }, + { + "epoch": 0.7032948929159802, + "grad_norm": 0.35168763995170593, + "learning_rate": 1.4519112397736898e-05, + "loss": 0.4438, + "step": 25614 + }, + { + "epoch": 0.7033223503569467, + "grad_norm": 0.3711172044277191, + "learning_rate": 1.4518727117432383e-05, + "loss": 0.5329, + "step": 25615 + }, + { + "epoch": 0.7033498077979132, + "grad_norm": 0.3994397521018982, + "learning_rate": 1.4518341828699e-05, + "loss": 0.4727, + "step": 25616 + }, + { + "epoch": 0.7033772652388798, + "grad_norm": 0.34934115409851074, + "learning_rate": 1.451795653153747e-05, + "loss": 0.4681, + "step": 25617 + }, + { + "epoch": 0.7034047226798462, + "grad_norm": 0.34250280261039734, + "learning_rate": 1.4517571225948506e-05, + "loss": 0.4884, + "step": 25618 + }, + { + "epoch": 0.7034321801208128, + "grad_norm": 0.4589095115661621, + "learning_rate": 1.4517185911932835e-05, + "loss": 0.542, + "step": 25619 + }, + { + "epoch": 0.7034596375617792, + "grad_norm": 0.36239370703697205, + "learning_rate": 1.4516800589491165e-05, + "loss": 0.4984, + "step": 25620 + }, + { + "epoch": 0.7034870950027458, + "grad_norm": 0.34629982709884644, + "learning_rate": 1.451641525862422e-05, + "loss": 0.4649, + "step": 25621 + }, + { + "epoch": 0.7035145524437122, + "grad_norm": 0.38144567608833313, + "learning_rate": 1.4516029919332724e-05, + "loss": 0.5291, + "step": 25622 + }, + { + "epoch": 0.7035420098846787, + "grad_norm": 0.383162260055542, + "learning_rate": 1.4515644571617387e-05, + "loss": 0.5606, + "step": 25623 + }, + { + "epoch": 0.7035694673256453, + "grad_norm": 0.37346625328063965, + "learning_rate": 1.4515259215478932e-05, + "loss": 0.4641, + "step": 25624 + }, + { + "epoch": 0.7035969247666117, + "grad_norm": 0.39530083537101746, + "learning_rate": 1.4514873850918076e-05, + "loss": 0.4764, + "step": 25625 + }, + { + "epoch": 0.7036243822075783, + "grad_norm": 0.3469306230545044, + "learning_rate": 1.4514488477935542e-05, + "loss": 0.4742, + "step": 25626 + }, + { + "epoch": 0.7036518396485447, + "grad_norm": 0.3829055726528168, + "learning_rate": 1.4514103096532046e-05, + "loss": 0.5122, + "step": 25627 + }, + { + "epoch": 0.7036792970895113, + "grad_norm": 0.34749579429626465, + "learning_rate": 1.4513717706708309e-05, + "loss": 0.4326, + "step": 25628 + }, + { + "epoch": 0.7037067545304777, + "grad_norm": 0.3661653995513916, + "learning_rate": 1.4513332308465045e-05, + "loss": 0.4628, + "step": 25629 + }, + { + "epoch": 0.7037342119714443, + "grad_norm": 0.439898818731308, + "learning_rate": 1.4512946901802974e-05, + "loss": 0.5724, + "step": 25630 + }, + { + "epoch": 0.7037616694124108, + "grad_norm": 0.37449410557746887, + "learning_rate": 1.451256148672282e-05, + "loss": 0.4641, + "step": 25631 + }, + { + "epoch": 0.7037891268533772, + "grad_norm": 0.4128553569316864, + "learning_rate": 1.4512176063225298e-05, + "loss": 0.5863, + "step": 25632 + }, + { + "epoch": 0.7038165842943438, + "grad_norm": 0.4747392237186432, + "learning_rate": 1.4511790631311126e-05, + "loss": 0.5474, + "step": 25633 + }, + { + "epoch": 0.7038440417353102, + "grad_norm": 0.4205755591392517, + "learning_rate": 1.4511405190981025e-05, + "loss": 0.537, + "step": 25634 + }, + { + "epoch": 0.7038714991762768, + "grad_norm": 0.37671807408332825, + "learning_rate": 1.4511019742235714e-05, + "loss": 0.4643, + "step": 25635 + }, + { + "epoch": 0.7038989566172432, + "grad_norm": 0.4256541132926941, + "learning_rate": 1.451063428507591e-05, + "loss": 0.5761, + "step": 25636 + }, + { + "epoch": 0.7039264140582098, + "grad_norm": 0.3496798276901245, + "learning_rate": 1.4510248819502337e-05, + "loss": 0.5236, + "step": 25637 + }, + { + "epoch": 0.7039538714991763, + "grad_norm": 0.3632848858833313, + "learning_rate": 1.4509863345515707e-05, + "loss": 0.4441, + "step": 25638 + }, + { + "epoch": 0.7039813289401428, + "grad_norm": 0.40795382857322693, + "learning_rate": 1.4509477863116744e-05, + "loss": 0.533, + "step": 25639 + }, + { + "epoch": 0.7040087863811093, + "grad_norm": 0.5161058306694031, + "learning_rate": 1.4509092372306166e-05, + "loss": 0.5042, + "step": 25640 + }, + { + "epoch": 0.7040362438220757, + "grad_norm": 0.3534725606441498, + "learning_rate": 1.4508706873084691e-05, + "loss": 0.5134, + "step": 25641 + }, + { + "epoch": 0.7040637012630423, + "grad_norm": 0.3642263412475586, + "learning_rate": 1.4508321365453038e-05, + "loss": 0.5275, + "step": 25642 + }, + { + "epoch": 0.7040911587040087, + "grad_norm": 0.38733094930648804, + "learning_rate": 1.4507935849411925e-05, + "loss": 0.5905, + "step": 25643 + }, + { + "epoch": 0.7041186161449753, + "grad_norm": 0.4156807065010071, + "learning_rate": 1.4507550324962076e-05, + "loss": 0.5182, + "step": 25644 + }, + { + "epoch": 0.7041460735859418, + "grad_norm": 0.37364086508750916, + "learning_rate": 1.4507164792104206e-05, + "loss": 0.5202, + "step": 25645 + }, + { + "epoch": 0.7041735310269083, + "grad_norm": 0.3490923047065735, + "learning_rate": 1.4506779250839037e-05, + "loss": 0.4464, + "step": 25646 + }, + { + "epoch": 0.7042009884678748, + "grad_norm": 0.3800398111343384, + "learning_rate": 1.4506393701167284e-05, + "loss": 0.4617, + "step": 25647 + }, + { + "epoch": 0.7042284459088413, + "grad_norm": 0.3761318027973175, + "learning_rate": 1.4506008143089669e-05, + "loss": 0.4826, + "step": 25648 + }, + { + "epoch": 0.7042559033498078, + "grad_norm": 0.37143296003341675, + "learning_rate": 1.450562257660691e-05, + "loss": 0.4963, + "step": 25649 + }, + { + "epoch": 0.7042833607907742, + "grad_norm": 0.3475790321826935, + "learning_rate": 1.4505237001719727e-05, + "loss": 0.4575, + "step": 25650 + }, + { + "epoch": 0.7043108182317408, + "grad_norm": 0.31979092955589294, + "learning_rate": 1.4504851418428838e-05, + "loss": 0.4743, + "step": 25651 + }, + { + "epoch": 0.7043382756727073, + "grad_norm": 0.37928786873817444, + "learning_rate": 1.4504465826734966e-05, + "loss": 0.5174, + "step": 25652 + }, + { + "epoch": 0.7043657331136738, + "grad_norm": 0.38302531838417053, + "learning_rate": 1.4504080226638828e-05, + "loss": 0.5332, + "step": 25653 + }, + { + "epoch": 0.7043931905546403, + "grad_norm": 0.41552409529685974, + "learning_rate": 1.450369461814114e-05, + "loss": 0.597, + "step": 25654 + }, + { + "epoch": 0.7044206479956068, + "grad_norm": 0.4232742190361023, + "learning_rate": 1.4503309001242626e-05, + "loss": 0.5567, + "step": 25655 + }, + { + "epoch": 0.7044481054365733, + "grad_norm": 0.38879749178886414, + "learning_rate": 1.4502923375944002e-05, + "loss": 0.5338, + "step": 25656 + }, + { + "epoch": 0.7044755628775398, + "grad_norm": 0.5789538025856018, + "learning_rate": 1.450253774224599e-05, + "loss": 0.5065, + "step": 25657 + }, + { + "epoch": 0.7045030203185063, + "grad_norm": 0.36123889684677124, + "learning_rate": 1.450215210014931e-05, + "loss": 0.5241, + "step": 25658 + }, + { + "epoch": 0.7045304777594729, + "grad_norm": 0.3551064133644104, + "learning_rate": 1.4501766449654675e-05, + "loss": 0.475, + "step": 25659 + }, + { + "epoch": 0.7045579352004393, + "grad_norm": 0.3896539509296417, + "learning_rate": 1.450138079076281e-05, + "loss": 0.4842, + "step": 25660 + }, + { + "epoch": 0.7045853926414058, + "grad_norm": 0.3786185383796692, + "learning_rate": 1.4500995123474436e-05, + "loss": 0.5168, + "step": 25661 + }, + { + "epoch": 0.7046128500823723, + "grad_norm": 0.39876487851142883, + "learning_rate": 1.4500609447790265e-05, + "loss": 0.4985, + "step": 25662 + }, + { + "epoch": 0.7046403075233388, + "grad_norm": 0.3739904761314392, + "learning_rate": 1.4500223763711026e-05, + "loss": 0.532, + "step": 25663 + }, + { + "epoch": 0.7046677649643053, + "grad_norm": 0.38699647784233093, + "learning_rate": 1.4499838071237431e-05, + "loss": 0.4889, + "step": 25664 + }, + { + "epoch": 0.7046952224052718, + "grad_norm": 0.33833956718444824, + "learning_rate": 1.4499452370370202e-05, + "loss": 0.4362, + "step": 25665 + }, + { + "epoch": 0.7047226798462384, + "grad_norm": 0.4008192718029022, + "learning_rate": 1.4499066661110057e-05, + "loss": 0.4949, + "step": 25666 + }, + { + "epoch": 0.7047501372872048, + "grad_norm": 0.381436824798584, + "learning_rate": 1.4498680943457717e-05, + "loss": 0.449, + "step": 25667 + }, + { + "epoch": 0.7047775947281714, + "grad_norm": 0.43844908475875854, + "learning_rate": 1.4498295217413902e-05, + "loss": 0.4962, + "step": 25668 + }, + { + "epoch": 0.7048050521691378, + "grad_norm": 0.32793542742729187, + "learning_rate": 1.449790948297933e-05, + "loss": 0.4906, + "step": 25669 + }, + { + "epoch": 0.7048325096101044, + "grad_norm": 0.3963126838207245, + "learning_rate": 1.4497523740154722e-05, + "loss": 0.5174, + "step": 25670 + }, + { + "epoch": 0.7048599670510708, + "grad_norm": 0.40752270817756653, + "learning_rate": 1.4497137988940798e-05, + "loss": 0.4838, + "step": 25671 + }, + { + "epoch": 0.7048874244920373, + "grad_norm": 0.3942354917526245, + "learning_rate": 1.4496752229338275e-05, + "loss": 0.4657, + "step": 25672 + }, + { + "epoch": 0.7049148819330039, + "grad_norm": 0.3837500810623169, + "learning_rate": 1.4496366461347875e-05, + "loss": 0.4252, + "step": 25673 + }, + { + "epoch": 0.7049423393739703, + "grad_norm": 0.3776382505893707, + "learning_rate": 1.4495980684970311e-05, + "loss": 0.5714, + "step": 25674 + }, + { + "epoch": 0.7049697968149369, + "grad_norm": 0.44987764954566956, + "learning_rate": 1.4495594900206316e-05, + "loss": 0.5105, + "step": 25675 + }, + { + "epoch": 0.7049972542559033, + "grad_norm": 0.37573108077049255, + "learning_rate": 1.4495209107056599e-05, + "loss": 0.4366, + "step": 25676 + }, + { + "epoch": 0.7050247116968699, + "grad_norm": 0.356988787651062, + "learning_rate": 1.4494823305521879e-05, + "loss": 0.4971, + "step": 25677 + }, + { + "epoch": 0.7050521691378363, + "grad_norm": 0.5029333233833313, + "learning_rate": 1.4494437495602881e-05, + "loss": 0.419, + "step": 25678 + }, + { + "epoch": 0.7050796265788029, + "grad_norm": 0.6312275528907776, + "learning_rate": 1.4494051677300326e-05, + "loss": 0.4725, + "step": 25679 + }, + { + "epoch": 0.7051070840197694, + "grad_norm": 0.3834789991378784, + "learning_rate": 1.4493665850614927e-05, + "loss": 0.4791, + "step": 25680 + }, + { + "epoch": 0.7051345414607358, + "grad_norm": 0.482816606760025, + "learning_rate": 1.4493280015547406e-05, + "loss": 0.4919, + "step": 25681 + }, + { + "epoch": 0.7051619989017024, + "grad_norm": 0.42107242345809937, + "learning_rate": 1.4492894172098484e-05, + "loss": 0.4614, + "step": 25682 + }, + { + "epoch": 0.7051894563426688, + "grad_norm": 0.34619423747062683, + "learning_rate": 1.4492508320268882e-05, + "loss": 0.413, + "step": 25683 + }, + { + "epoch": 0.7052169137836354, + "grad_norm": 0.3370210528373718, + "learning_rate": 1.449212246005932e-05, + "loss": 0.4787, + "step": 25684 + }, + { + "epoch": 0.7052443712246018, + "grad_norm": 0.40019407868385315, + "learning_rate": 1.449173659147051e-05, + "loss": 0.5185, + "step": 25685 + }, + { + "epoch": 0.7052718286655684, + "grad_norm": 0.43080300092697144, + "learning_rate": 1.4491350714503182e-05, + "loss": 0.5265, + "step": 25686 + }, + { + "epoch": 0.7052992861065349, + "grad_norm": 0.4275834858417511, + "learning_rate": 1.4490964829158052e-05, + "loss": 0.6374, + "step": 25687 + }, + { + "epoch": 0.7053267435475014, + "grad_norm": 0.3616642355918884, + "learning_rate": 1.4490578935435838e-05, + "loss": 0.488, + "step": 25688 + }, + { + "epoch": 0.7053542009884679, + "grad_norm": 0.410146564245224, + "learning_rate": 1.4490193033337262e-05, + "loss": 0.5554, + "step": 25689 + }, + { + "epoch": 0.7053816584294343, + "grad_norm": 0.3768078684806824, + "learning_rate": 1.448980712286304e-05, + "loss": 0.5111, + "step": 25690 + }, + { + "epoch": 0.7054091158704009, + "grad_norm": 0.33338791131973267, + "learning_rate": 1.4489421204013896e-05, + "loss": 0.4547, + "step": 25691 + }, + { + "epoch": 0.7054365733113673, + "grad_norm": 0.36915135383605957, + "learning_rate": 1.4489035276790549e-05, + "loss": 0.508, + "step": 25692 + }, + { + "epoch": 0.7054640307523339, + "grad_norm": 0.35876166820526123, + "learning_rate": 1.4488649341193719e-05, + "loss": 0.4499, + "step": 25693 + }, + { + "epoch": 0.7054914881933004, + "grad_norm": 0.341897189617157, + "learning_rate": 1.4488263397224126e-05, + "loss": 0.4691, + "step": 25694 + }, + { + "epoch": 0.7055189456342669, + "grad_norm": 0.37152573466300964, + "learning_rate": 1.4487877444882486e-05, + "loss": 0.4995, + "step": 25695 + }, + { + "epoch": 0.7055464030752334, + "grad_norm": 0.4035491347312927, + "learning_rate": 1.4487491484169525e-05, + "loss": 0.5153, + "step": 25696 + }, + { + "epoch": 0.7055738605161999, + "grad_norm": 0.37194037437438965, + "learning_rate": 1.4487105515085959e-05, + "loss": 0.4509, + "step": 25697 + }, + { + "epoch": 0.7056013179571664, + "grad_norm": 0.5227673649787903, + "learning_rate": 1.448671953763251e-05, + "loss": 0.501, + "step": 25698 + }, + { + "epoch": 0.7056287753981328, + "grad_norm": 0.38403379917144775, + "learning_rate": 1.4486333551809897e-05, + "loss": 0.6119, + "step": 25699 + }, + { + "epoch": 0.7056562328390994, + "grad_norm": 0.3858312964439392, + "learning_rate": 1.4485947557618838e-05, + "loss": 0.5314, + "step": 25700 + }, + { + "epoch": 0.705683690280066, + "grad_norm": 0.3897850215435028, + "learning_rate": 1.4485561555060059e-05, + "loss": 0.4101, + "step": 25701 + }, + { + "epoch": 0.7057111477210324, + "grad_norm": 0.35661229491233826, + "learning_rate": 1.4485175544134273e-05, + "loss": 0.5197, + "step": 25702 + }, + { + "epoch": 0.7057386051619989, + "grad_norm": 0.4033012390136719, + "learning_rate": 1.4484789524842204e-05, + "loss": 0.4788, + "step": 25703 + }, + { + "epoch": 0.7057660626029654, + "grad_norm": 0.336189329624176, + "learning_rate": 1.4484403497184572e-05, + "loss": 0.4763, + "step": 25704 + }, + { + "epoch": 0.7057935200439319, + "grad_norm": 0.4546240568161011, + "learning_rate": 1.4484017461162094e-05, + "loss": 0.4977, + "step": 25705 + }, + { + "epoch": 0.7058209774848984, + "grad_norm": 0.4221002459526062, + "learning_rate": 1.4483631416775494e-05, + "loss": 0.6043, + "step": 25706 + }, + { + "epoch": 0.7058484349258649, + "grad_norm": 0.38541996479034424, + "learning_rate": 1.4483245364025491e-05, + "loss": 0.5197, + "step": 25707 + }, + { + "epoch": 0.7058758923668315, + "grad_norm": 0.4084306061267853, + "learning_rate": 1.4482859302912801e-05, + "loss": 0.4485, + "step": 25708 + }, + { + "epoch": 0.7059033498077979, + "grad_norm": 0.4060688614845276, + "learning_rate": 1.4482473233438152e-05, + "loss": 0.5711, + "step": 25709 + }, + { + "epoch": 0.7059308072487644, + "grad_norm": 0.39270877838134766, + "learning_rate": 1.4482087155602257e-05, + "loss": 0.5145, + "step": 25710 + }, + { + "epoch": 0.7059582646897309, + "grad_norm": 0.355991929769516, + "learning_rate": 1.448170106940584e-05, + "loss": 0.453, + "step": 25711 + }, + { + "epoch": 0.7059857221306974, + "grad_norm": 0.42438971996307373, + "learning_rate": 1.4481314974849622e-05, + "loss": 0.4171, + "step": 25712 + }, + { + "epoch": 0.7060131795716639, + "grad_norm": 0.3883151113986969, + "learning_rate": 1.4480928871934317e-05, + "loss": 0.4488, + "step": 25713 + }, + { + "epoch": 0.7060406370126304, + "grad_norm": 0.3715982437133789, + "learning_rate": 1.4480542760660653e-05, + "loss": 0.4877, + "step": 25714 + }, + { + "epoch": 0.706068094453597, + "grad_norm": 0.45923733711242676, + "learning_rate": 1.4480156641029344e-05, + "loss": 0.5273, + "step": 25715 + }, + { + "epoch": 0.7060955518945634, + "grad_norm": 0.4255702793598175, + "learning_rate": 1.4479770513041117e-05, + "loss": 0.5369, + "step": 25716 + }, + { + "epoch": 0.70612300933553, + "grad_norm": 0.4855153560638428, + "learning_rate": 1.4479384376696685e-05, + "loss": 0.5134, + "step": 25717 + }, + { + "epoch": 0.7061504667764964, + "grad_norm": 0.37968909740448, + "learning_rate": 1.447899823199677e-05, + "loss": 0.4735, + "step": 25718 + }, + { + "epoch": 0.706177924217463, + "grad_norm": 0.3487691879272461, + "learning_rate": 1.4478612078942099e-05, + "loss": 0.4684, + "step": 25719 + }, + { + "epoch": 0.7062053816584294, + "grad_norm": 0.4061296284198761, + "learning_rate": 1.4478225917533381e-05, + "loss": 0.5009, + "step": 25720 + }, + { + "epoch": 0.7062328390993959, + "grad_norm": 0.3874371349811554, + "learning_rate": 1.4477839747771347e-05, + "loss": 0.4659, + "step": 25721 + }, + { + "epoch": 0.7062602965403625, + "grad_norm": 0.3507485091686249, + "learning_rate": 1.447745356965671e-05, + "loss": 0.4963, + "step": 25722 + }, + { + "epoch": 0.7062877539813289, + "grad_norm": 0.43081483244895935, + "learning_rate": 1.4477067383190196e-05, + "loss": 0.5389, + "step": 25723 + }, + { + "epoch": 0.7063152114222955, + "grad_norm": 0.3848617374897003, + "learning_rate": 1.447668118837252e-05, + "loss": 0.5021, + "step": 25724 + }, + { + "epoch": 0.7063426688632619, + "grad_norm": 0.36143532395362854, + "learning_rate": 1.4476294985204405e-05, + "loss": 0.5079, + "step": 25725 + }, + { + "epoch": 0.7063701263042285, + "grad_norm": 0.3673800230026245, + "learning_rate": 1.4475908773686574e-05, + "loss": 0.5014, + "step": 25726 + }, + { + "epoch": 0.7063975837451949, + "grad_norm": 0.44015592336654663, + "learning_rate": 1.4475522553819739e-05, + "loss": 0.5735, + "step": 25727 + }, + { + "epoch": 0.7064250411861615, + "grad_norm": 0.40906164050102234, + "learning_rate": 1.447513632560463e-05, + "loss": 0.4852, + "step": 25728 + }, + { + "epoch": 0.706452498627128, + "grad_norm": 0.42395922541618347, + "learning_rate": 1.4474750089041964e-05, + "loss": 0.5996, + "step": 25729 + }, + { + "epoch": 0.7064799560680944, + "grad_norm": 0.38385912775993347, + "learning_rate": 1.4474363844132458e-05, + "loss": 0.4663, + "step": 25730 + }, + { + "epoch": 0.706507413509061, + "grad_norm": 0.33122003078460693, + "learning_rate": 1.4473977590876838e-05, + "loss": 0.4348, + "step": 25731 + }, + { + "epoch": 0.7065348709500274, + "grad_norm": 0.3811768889427185, + "learning_rate": 1.447359132927582e-05, + "loss": 0.4455, + "step": 25732 + }, + { + "epoch": 0.706562328390994, + "grad_norm": 0.4336438775062561, + "learning_rate": 1.4473205059330129e-05, + "loss": 0.545, + "step": 25733 + }, + { + "epoch": 0.7065897858319604, + "grad_norm": 0.32426828145980835, + "learning_rate": 1.447281878104048e-05, + "loss": 0.4015, + "step": 25734 + }, + { + "epoch": 0.706617243272927, + "grad_norm": 0.40139874815940857, + "learning_rate": 1.4472432494407595e-05, + "loss": 0.5196, + "step": 25735 + }, + { + "epoch": 0.7066447007138935, + "grad_norm": 0.3931628465652466, + "learning_rate": 1.4472046199432202e-05, + "loss": 0.4771, + "step": 25736 + }, + { + "epoch": 0.70667215815486, + "grad_norm": 0.3732810616493225, + "learning_rate": 1.4471659896115009e-05, + "loss": 0.5287, + "step": 25737 + }, + { + "epoch": 0.7066996155958265, + "grad_norm": 0.39896053075790405, + "learning_rate": 1.4471273584456746e-05, + "loss": 0.5576, + "step": 25738 + }, + { + "epoch": 0.7067270730367929, + "grad_norm": 0.4081512987613678, + "learning_rate": 1.4470887264458132e-05, + "loss": 0.5608, + "step": 25739 + }, + { + "epoch": 0.7067545304777595, + "grad_norm": 0.38027241826057434, + "learning_rate": 1.4470500936119885e-05, + "loss": 0.5332, + "step": 25740 + }, + { + "epoch": 0.7067819879187259, + "grad_norm": 0.3419736325740814, + "learning_rate": 1.4470114599442729e-05, + "loss": 0.4611, + "step": 25741 + }, + { + "epoch": 0.7068094453596925, + "grad_norm": 0.4779621660709381, + "learning_rate": 1.4469728254427377e-05, + "loss": 0.5786, + "step": 25742 + }, + { + "epoch": 0.706836902800659, + "grad_norm": 0.36675217747688293, + "learning_rate": 1.446934190107456e-05, + "loss": 0.4825, + "step": 25743 + }, + { + "epoch": 0.7068643602416255, + "grad_norm": 0.4539373815059662, + "learning_rate": 1.4468955539384992e-05, + "loss": 0.6122, + "step": 25744 + }, + { + "epoch": 0.706891817682592, + "grad_norm": 0.5645896196365356, + "learning_rate": 1.4468569169359397e-05, + "loss": 0.564, + "step": 25745 + }, + { + "epoch": 0.7069192751235585, + "grad_norm": 0.37369534373283386, + "learning_rate": 1.4468182790998495e-05, + "loss": 0.4622, + "step": 25746 + }, + { + "epoch": 0.706946732564525, + "grad_norm": 0.38322997093200684, + "learning_rate": 1.4467796404303003e-05, + "loss": 0.5072, + "step": 25747 + }, + { + "epoch": 0.7069741900054914, + "grad_norm": 0.3349263072013855, + "learning_rate": 1.4467410009273648e-05, + "loss": 0.3805, + "step": 25748 + }, + { + "epoch": 0.707001647446458, + "grad_norm": 0.3882410228252411, + "learning_rate": 1.4467023605911143e-05, + "loss": 0.5204, + "step": 25749 + }, + { + "epoch": 0.7070291048874245, + "grad_norm": 0.38654112815856934, + "learning_rate": 1.4466637194216218e-05, + "loss": 0.5152, + "step": 25750 + }, + { + "epoch": 0.707056562328391, + "grad_norm": 0.4016451835632324, + "learning_rate": 1.4466250774189588e-05, + "loss": 0.476, + "step": 25751 + }, + { + "epoch": 0.7070840197693575, + "grad_norm": 0.38211220502853394, + "learning_rate": 1.4465864345831973e-05, + "loss": 0.4882, + "step": 25752 + }, + { + "epoch": 0.707111477210324, + "grad_norm": 0.337505578994751, + "learning_rate": 1.4465477909144097e-05, + "loss": 0.4075, + "step": 25753 + }, + { + "epoch": 0.7071389346512905, + "grad_norm": 0.39918753504753113, + "learning_rate": 1.4465091464126677e-05, + "loss": 0.4603, + "step": 25754 + }, + { + "epoch": 0.707166392092257, + "grad_norm": 0.40793853998184204, + "learning_rate": 1.446470501078044e-05, + "loss": 0.462, + "step": 25755 + }, + { + "epoch": 0.7071938495332235, + "grad_norm": 0.39319825172424316, + "learning_rate": 1.4464318549106102e-05, + "loss": 0.5569, + "step": 25756 + }, + { + "epoch": 0.7072213069741901, + "grad_norm": 0.39530667662620544, + "learning_rate": 1.4463932079104383e-05, + "loss": 0.5546, + "step": 25757 + }, + { + "epoch": 0.7072487644151565, + "grad_norm": 0.500117301940918, + "learning_rate": 1.446354560077601e-05, + "loss": 0.5559, + "step": 25758 + }, + { + "epoch": 0.707276221856123, + "grad_norm": 0.5452039241790771, + "learning_rate": 1.4463159114121698e-05, + "loss": 0.5991, + "step": 25759 + }, + { + "epoch": 0.7073036792970895, + "grad_norm": 0.3802798092365265, + "learning_rate": 1.446277261914217e-05, + "loss": 0.511, + "step": 25760 + }, + { + "epoch": 0.707331136738056, + "grad_norm": 0.3717237710952759, + "learning_rate": 1.4462386115838145e-05, + "loss": 0.4862, + "step": 25761 + }, + { + "epoch": 0.7073585941790225, + "grad_norm": 0.3931743800640106, + "learning_rate": 1.4461999604210347e-05, + "loss": 0.5458, + "step": 25762 + }, + { + "epoch": 0.707386051619989, + "grad_norm": 0.3521151542663574, + "learning_rate": 1.4461613084259496e-05, + "loss": 0.5007, + "step": 25763 + }, + { + "epoch": 0.7074135090609556, + "grad_norm": 0.41678494215011597, + "learning_rate": 1.4461226555986312e-05, + "loss": 0.5594, + "step": 25764 + }, + { + "epoch": 0.707440966501922, + "grad_norm": 0.373597115278244, + "learning_rate": 1.4460840019391515e-05, + "loss": 0.4758, + "step": 25765 + }, + { + "epoch": 0.7074684239428886, + "grad_norm": 0.3464244306087494, + "learning_rate": 1.446045347447583e-05, + "loss": 0.4404, + "step": 25766 + }, + { + "epoch": 0.707495881383855, + "grad_norm": 0.43420642614364624, + "learning_rate": 1.4460066921239975e-05, + "loss": 0.5269, + "step": 25767 + }, + { + "epoch": 0.7075233388248215, + "grad_norm": 0.3888736665248871, + "learning_rate": 1.445968035968467e-05, + "loss": 0.5315, + "step": 25768 + }, + { + "epoch": 0.707550796265788, + "grad_norm": 0.42245182394981384, + "learning_rate": 1.4459293789810642e-05, + "loss": 0.4392, + "step": 25769 + }, + { + "epoch": 0.7075782537067545, + "grad_norm": 0.4308229386806488, + "learning_rate": 1.4458907211618604e-05, + "loss": 0.485, + "step": 25770 + }, + { + "epoch": 0.7076057111477211, + "grad_norm": 0.4681887626647949, + "learning_rate": 1.4458520625109282e-05, + "loss": 0.592, + "step": 25771 + }, + { + "epoch": 0.7076331685886875, + "grad_norm": 0.4080866277217865, + "learning_rate": 1.4458134030283397e-05, + "loss": 0.4988, + "step": 25772 + }, + { + "epoch": 0.7076606260296541, + "grad_norm": 0.40526530146598816, + "learning_rate": 1.4457747427141669e-05, + "loss": 0.5694, + "step": 25773 + }, + { + "epoch": 0.7076880834706205, + "grad_norm": 0.38375622034072876, + "learning_rate": 1.4457360815684817e-05, + "loss": 0.4885, + "step": 25774 + }, + { + "epoch": 0.7077155409115871, + "grad_norm": 0.3987455368041992, + "learning_rate": 1.4456974195913565e-05, + "loss": 0.5158, + "step": 25775 + }, + { + "epoch": 0.7077429983525535, + "grad_norm": 0.37412500381469727, + "learning_rate": 1.4456587567828636e-05, + "loss": 0.4185, + "step": 25776 + }, + { + "epoch": 0.70777045579352, + "grad_norm": 0.3820159435272217, + "learning_rate": 1.4456200931430748e-05, + "loss": 0.5422, + "step": 25777 + }, + { + "epoch": 0.7077979132344866, + "grad_norm": 0.357799232006073, + "learning_rate": 1.4455814286720622e-05, + "loss": 0.5764, + "step": 25778 + }, + { + "epoch": 0.707825370675453, + "grad_norm": 0.36209723353385925, + "learning_rate": 1.4455427633698981e-05, + "loss": 0.5209, + "step": 25779 + }, + { + "epoch": 0.7078528281164196, + "grad_norm": 0.43998652696609497, + "learning_rate": 1.4455040972366545e-05, + "loss": 0.5779, + "step": 25780 + }, + { + "epoch": 0.707880285557386, + "grad_norm": 0.3826634883880615, + "learning_rate": 1.4454654302724036e-05, + "loss": 0.522, + "step": 25781 + }, + { + "epoch": 0.7079077429983526, + "grad_norm": 0.4328444004058838, + "learning_rate": 1.4454267624772176e-05, + "loss": 0.6581, + "step": 25782 + }, + { + "epoch": 0.707935200439319, + "grad_norm": 0.36680397391319275, + "learning_rate": 1.4453880938511681e-05, + "loss": 0.4429, + "step": 25783 + }, + { + "epoch": 0.7079626578802856, + "grad_norm": 0.39536669850349426, + "learning_rate": 1.4453494243943283e-05, + "loss": 0.5179, + "step": 25784 + }, + { + "epoch": 0.7079901153212521, + "grad_norm": 0.3626229763031006, + "learning_rate": 1.4453107541067692e-05, + "loss": 0.4557, + "step": 25785 + }, + { + "epoch": 0.7080175727622185, + "grad_norm": 0.38307052850723267, + "learning_rate": 1.4452720829885635e-05, + "loss": 0.5401, + "step": 25786 + }, + { + "epoch": 0.7080450302031851, + "grad_norm": 0.38652434945106506, + "learning_rate": 1.4452334110397834e-05, + "loss": 0.4866, + "step": 25787 + }, + { + "epoch": 0.7080724876441515, + "grad_norm": 0.41213810443878174, + "learning_rate": 1.4451947382605007e-05, + "loss": 0.477, + "step": 25788 + }, + { + "epoch": 0.7080999450851181, + "grad_norm": 0.40793541073799133, + "learning_rate": 1.4451560646507878e-05, + "loss": 0.5188, + "step": 25789 + }, + { + "epoch": 0.7081274025260845, + "grad_norm": 0.4347650408744812, + "learning_rate": 1.445117390210717e-05, + "loss": 0.5449, + "step": 25790 + }, + { + "epoch": 0.7081548599670511, + "grad_norm": 0.3900369107723236, + "learning_rate": 1.44507871494036e-05, + "loss": 0.479, + "step": 25791 + }, + { + "epoch": 0.7081823174080176, + "grad_norm": 0.4668372571468353, + "learning_rate": 1.4450400388397891e-05, + "loss": 0.5207, + "step": 25792 + }, + { + "epoch": 0.7082097748489841, + "grad_norm": 0.39451760053634644, + "learning_rate": 1.4450013619090763e-05, + "loss": 0.4333, + "step": 25793 + }, + { + "epoch": 0.7082372322899506, + "grad_norm": 0.49405157566070557, + "learning_rate": 1.4449626841482944e-05, + "loss": 0.4786, + "step": 25794 + }, + { + "epoch": 0.708264689730917, + "grad_norm": 0.32388219237327576, + "learning_rate": 1.444924005557515e-05, + "loss": 0.4228, + "step": 25795 + }, + { + "epoch": 0.7082921471718836, + "grad_norm": 0.43572863936424255, + "learning_rate": 1.4448853261368099e-05, + "loss": 0.5313, + "step": 25796 + }, + { + "epoch": 0.70831960461285, + "grad_norm": 0.3893767297267914, + "learning_rate": 1.4448466458862521e-05, + "loss": 0.4867, + "step": 25797 + }, + { + "epoch": 0.7083470620538166, + "grad_norm": 0.4757401943206787, + "learning_rate": 1.444807964805913e-05, + "loss": 0.513, + "step": 25798 + }, + { + "epoch": 0.7083745194947831, + "grad_norm": 0.4045049846172333, + "learning_rate": 1.4447692828958652e-05, + "loss": 0.5479, + "step": 25799 + }, + { + "epoch": 0.7084019769357496, + "grad_norm": 0.33715349435806274, + "learning_rate": 1.444730600156181e-05, + "loss": 0.3917, + "step": 25800 + }, + { + "epoch": 0.7084294343767161, + "grad_norm": 0.4399476945400238, + "learning_rate": 1.444691916586932e-05, + "loss": 0.5505, + "step": 25801 + }, + { + "epoch": 0.7084568918176826, + "grad_norm": 0.38126063346862793, + "learning_rate": 1.4446532321881907e-05, + "loss": 0.5112, + "step": 25802 + }, + { + "epoch": 0.7084843492586491, + "grad_norm": 0.35870254039764404, + "learning_rate": 1.4446145469600292e-05, + "loss": 0.5016, + "step": 25803 + }, + { + "epoch": 0.7085118066996156, + "grad_norm": 0.3618539571762085, + "learning_rate": 1.4445758609025195e-05, + "loss": 0.4817, + "step": 25804 + }, + { + "epoch": 0.7085392641405821, + "grad_norm": 0.3373320698738098, + "learning_rate": 1.4445371740157343e-05, + "loss": 0.4371, + "step": 25805 + }, + { + "epoch": 0.7085667215815487, + "grad_norm": 0.42204996943473816, + "learning_rate": 1.4444984862997451e-05, + "loss": 0.4789, + "step": 25806 + }, + { + "epoch": 0.7085941790225151, + "grad_norm": 0.3369329571723938, + "learning_rate": 1.4444597977546244e-05, + "loss": 0.5282, + "step": 25807 + }, + { + "epoch": 0.7086216364634816, + "grad_norm": 0.3729742169380188, + "learning_rate": 1.4444211083804445e-05, + "loss": 0.5663, + "step": 25808 + }, + { + "epoch": 0.7086490939044481, + "grad_norm": 0.3488827347755432, + "learning_rate": 1.4443824181772772e-05, + "loss": 0.4834, + "step": 25809 + }, + { + "epoch": 0.7086765513454146, + "grad_norm": 0.480985552072525, + "learning_rate": 1.444343727145195e-05, + "loss": 0.5354, + "step": 25810 + }, + { + "epoch": 0.7087040087863811, + "grad_norm": 0.38537535071372986, + "learning_rate": 1.4443050352842698e-05, + "loss": 0.5036, + "step": 25811 + }, + { + "epoch": 0.7087314662273476, + "grad_norm": 0.44684240221977234, + "learning_rate": 1.444266342594574e-05, + "loss": 0.543, + "step": 25812 + }, + { + "epoch": 0.7087589236683142, + "grad_norm": 0.4180106818675995, + "learning_rate": 1.4442276490761798e-05, + "loss": 0.4191, + "step": 25813 + }, + { + "epoch": 0.7087863811092806, + "grad_norm": 0.3970363140106201, + "learning_rate": 1.444188954729159e-05, + "loss": 0.4795, + "step": 25814 + }, + { + "epoch": 0.7088138385502472, + "grad_norm": 0.34595996141433716, + "learning_rate": 1.444150259553584e-05, + "loss": 0.4438, + "step": 25815 + }, + { + "epoch": 0.7088412959912136, + "grad_norm": 0.39089435338974, + "learning_rate": 1.444111563549527e-05, + "loss": 0.547, + "step": 25816 + }, + { + "epoch": 0.7088687534321801, + "grad_norm": 0.37585216760635376, + "learning_rate": 1.4440728667170606e-05, + "loss": 0.579, + "step": 25817 + }, + { + "epoch": 0.7088962108731466, + "grad_norm": 0.36819952726364136, + "learning_rate": 1.4440341690562562e-05, + "loss": 0.5601, + "step": 25818 + }, + { + "epoch": 0.7089236683141131, + "grad_norm": 0.4637983739376068, + "learning_rate": 1.4439954705671864e-05, + "loss": 0.5443, + "step": 25819 + }, + { + "epoch": 0.7089511257550797, + "grad_norm": 0.38356813788414, + "learning_rate": 1.4439567712499234e-05, + "loss": 0.4348, + "step": 25820 + }, + { + "epoch": 0.7089785831960461, + "grad_norm": 0.3750327527523041, + "learning_rate": 1.4439180711045395e-05, + "loss": 0.4895, + "step": 25821 + }, + { + "epoch": 0.7090060406370127, + "grad_norm": 0.4449750781059265, + "learning_rate": 1.4438793701311063e-05, + "loss": 0.4585, + "step": 25822 + }, + { + "epoch": 0.7090334980779791, + "grad_norm": 0.3264961540699005, + "learning_rate": 1.4438406683296968e-05, + "loss": 0.4985, + "step": 25823 + }, + { + "epoch": 0.7090609555189457, + "grad_norm": 0.4114227294921875, + "learning_rate": 1.4438019657003826e-05, + "loss": 0.5352, + "step": 25824 + }, + { + "epoch": 0.7090884129599121, + "grad_norm": 0.4186139404773712, + "learning_rate": 1.443763262243236e-05, + "loss": 0.5633, + "step": 25825 + }, + { + "epoch": 0.7091158704008786, + "grad_norm": 0.4439699947834015, + "learning_rate": 1.4437245579583296e-05, + "loss": 0.4674, + "step": 25826 + }, + { + "epoch": 0.7091433278418452, + "grad_norm": 0.3628106415271759, + "learning_rate": 1.443685852845735e-05, + "loss": 0.4199, + "step": 25827 + }, + { + "epoch": 0.7091707852828116, + "grad_norm": 0.3921641409397125, + "learning_rate": 1.4436471469055247e-05, + "loss": 0.5196, + "step": 25828 + }, + { + "epoch": 0.7091982427237782, + "grad_norm": 0.4399005174636841, + "learning_rate": 1.443608440137771e-05, + "loss": 0.4564, + "step": 25829 + }, + { + "epoch": 0.7092257001647446, + "grad_norm": 0.438305139541626, + "learning_rate": 1.4435697325425459e-05, + "loss": 0.5609, + "step": 25830 + }, + { + "epoch": 0.7092531576057112, + "grad_norm": 0.3977104127407074, + "learning_rate": 1.4435310241199218e-05, + "loss": 0.6207, + "step": 25831 + }, + { + "epoch": 0.7092806150466776, + "grad_norm": 0.3645772635936737, + "learning_rate": 1.4434923148699704e-05, + "loss": 0.5425, + "step": 25832 + }, + { + "epoch": 0.7093080724876442, + "grad_norm": 0.3777216374874115, + "learning_rate": 1.4434536047927647e-05, + "loss": 0.4752, + "step": 25833 + }, + { + "epoch": 0.7093355299286107, + "grad_norm": 0.34704485535621643, + "learning_rate": 1.4434148938883764e-05, + "loss": 0.4338, + "step": 25834 + }, + { + "epoch": 0.7093629873695771, + "grad_norm": 0.380829393863678, + "learning_rate": 1.4433761821568778e-05, + "loss": 0.5339, + "step": 25835 + }, + { + "epoch": 0.7093904448105437, + "grad_norm": 0.43411070108413696, + "learning_rate": 1.443337469598341e-05, + "loss": 0.5468, + "step": 25836 + }, + { + "epoch": 0.7094179022515101, + "grad_norm": 0.3334331512451172, + "learning_rate": 1.4432987562128382e-05, + "loss": 0.4828, + "step": 25837 + }, + { + "epoch": 0.7094453596924767, + "grad_norm": 0.3748095631599426, + "learning_rate": 1.443260042000442e-05, + "loss": 0.5057, + "step": 25838 + }, + { + "epoch": 0.7094728171334431, + "grad_norm": 0.3970426321029663, + "learning_rate": 1.4432213269612245e-05, + "loss": 0.5899, + "step": 25839 + }, + { + "epoch": 0.7095002745744097, + "grad_norm": 0.35481947660446167, + "learning_rate": 1.4431826110952574e-05, + "loss": 0.4512, + "step": 25840 + }, + { + "epoch": 0.7095277320153762, + "grad_norm": 0.3363458514213562, + "learning_rate": 1.4431438944026135e-05, + "loss": 0.471, + "step": 25841 + }, + { + "epoch": 0.7095551894563427, + "grad_norm": 0.3688531219959259, + "learning_rate": 1.4431051768833647e-05, + "loss": 0.4241, + "step": 25842 + }, + { + "epoch": 0.7095826468973092, + "grad_norm": 0.38773712515830994, + "learning_rate": 1.4430664585375834e-05, + "loss": 0.517, + "step": 25843 + }, + { + "epoch": 0.7096101043382756, + "grad_norm": 0.42674520611763, + "learning_rate": 1.4430277393653418e-05, + "loss": 0.5195, + "step": 25844 + }, + { + "epoch": 0.7096375617792422, + "grad_norm": 0.4060184061527252, + "learning_rate": 1.4429890193667117e-05, + "loss": 0.5087, + "step": 25845 + }, + { + "epoch": 0.7096650192202086, + "grad_norm": 0.4025982916355133, + "learning_rate": 1.442950298541766e-05, + "loss": 0.5271, + "step": 25846 + }, + { + "epoch": 0.7096924766611752, + "grad_norm": 0.3356834948062897, + "learning_rate": 1.4429115768905764e-05, + "loss": 0.3966, + "step": 25847 + }, + { + "epoch": 0.7097199341021416, + "grad_norm": 0.3872230648994446, + "learning_rate": 1.4428728544132157e-05, + "loss": 0.509, + "step": 25848 + }, + { + "epoch": 0.7097473915431082, + "grad_norm": 0.388248085975647, + "learning_rate": 1.4428341311097555e-05, + "loss": 0.5706, + "step": 25849 + }, + { + "epoch": 0.7097748489840747, + "grad_norm": 0.40677523612976074, + "learning_rate": 1.4427954069802685e-05, + "loss": 0.5736, + "step": 25850 + }, + { + "epoch": 0.7098023064250412, + "grad_norm": 0.4282175302505493, + "learning_rate": 1.4427566820248267e-05, + "loss": 0.5892, + "step": 25851 + }, + { + "epoch": 0.7098297638660077, + "grad_norm": 0.3885265290737152, + "learning_rate": 1.4427179562435022e-05, + "loss": 0.4792, + "step": 25852 + }, + { + "epoch": 0.7098572213069742, + "grad_norm": 0.3750806450843811, + "learning_rate": 1.4426792296363674e-05, + "loss": 0.5074, + "step": 25853 + }, + { + "epoch": 0.7098846787479407, + "grad_norm": 0.36252644658088684, + "learning_rate": 1.4426405022034949e-05, + "loss": 0.4572, + "step": 25854 + }, + { + "epoch": 0.7099121361889071, + "grad_norm": 0.3991866409778595, + "learning_rate": 1.4426017739449563e-05, + "loss": 0.5363, + "step": 25855 + }, + { + "epoch": 0.7099395936298737, + "grad_norm": 0.41951021552085876, + "learning_rate": 1.4425630448608242e-05, + "loss": 0.3796, + "step": 25856 + }, + { + "epoch": 0.7099670510708402, + "grad_norm": 0.3674119710922241, + "learning_rate": 1.442524314951171e-05, + "loss": 0.4847, + "step": 25857 + }, + { + "epoch": 0.7099945085118067, + "grad_norm": 0.45130351185798645, + "learning_rate": 1.4424855842160683e-05, + "loss": 0.4912, + "step": 25858 + }, + { + "epoch": 0.7100219659527732, + "grad_norm": 0.41289767622947693, + "learning_rate": 1.442446852655589e-05, + "loss": 0.5049, + "step": 25859 + }, + { + "epoch": 0.7100494233937397, + "grad_norm": 0.44206786155700684, + "learning_rate": 1.4424081202698048e-05, + "loss": 0.4921, + "step": 25860 + }, + { + "epoch": 0.7100768808347062, + "grad_norm": 0.37867385149002075, + "learning_rate": 1.4423693870587886e-05, + "loss": 0.4639, + "step": 25861 + }, + { + "epoch": 0.7101043382756727, + "grad_norm": 0.4461255669593811, + "learning_rate": 1.4423306530226124e-05, + "loss": 0.5506, + "step": 25862 + }, + { + "epoch": 0.7101317957166392, + "grad_norm": 0.35779985785484314, + "learning_rate": 1.442291918161348e-05, + "loss": 0.5065, + "step": 25863 + }, + { + "epoch": 0.7101592531576058, + "grad_norm": 3.286142587661743, + "learning_rate": 1.4422531824750683e-05, + "loss": 0.5818, + "step": 25864 + }, + { + "epoch": 0.7101867105985722, + "grad_norm": 0.4205411970615387, + "learning_rate": 1.4422144459638452e-05, + "loss": 0.5353, + "step": 25865 + }, + { + "epoch": 0.7102141680395387, + "grad_norm": 0.4121982157230377, + "learning_rate": 1.4421757086277511e-05, + "loss": 0.4407, + "step": 25866 + }, + { + "epoch": 0.7102416254805052, + "grad_norm": 0.42069026827812195, + "learning_rate": 1.442136970466858e-05, + "loss": 0.5813, + "step": 25867 + }, + { + "epoch": 0.7102690829214717, + "grad_norm": 0.34720295667648315, + "learning_rate": 1.4420982314812382e-05, + "loss": 0.4975, + "step": 25868 + }, + { + "epoch": 0.7102965403624382, + "grad_norm": 0.38738787174224854, + "learning_rate": 1.4420594916709643e-05, + "loss": 0.4326, + "step": 25869 + }, + { + "epoch": 0.7103239978034047, + "grad_norm": 0.4035623073577881, + "learning_rate": 1.4420207510361083e-05, + "loss": 0.5888, + "step": 25870 + }, + { + "epoch": 0.7103514552443713, + "grad_norm": 0.4032056927680969, + "learning_rate": 1.4419820095767426e-05, + "loss": 0.5085, + "step": 25871 + }, + { + "epoch": 0.7103789126853377, + "grad_norm": 0.35014891624450684, + "learning_rate": 1.4419432672929395e-05, + "loss": 0.4893, + "step": 25872 + }, + { + "epoch": 0.7104063701263043, + "grad_norm": 0.4209508001804352, + "learning_rate": 1.4419045241847708e-05, + "loss": 0.4982, + "step": 25873 + }, + { + "epoch": 0.7104338275672707, + "grad_norm": 0.3537967801094055, + "learning_rate": 1.4418657802523094e-05, + "loss": 0.5071, + "step": 25874 + }, + { + "epoch": 0.7104612850082372, + "grad_norm": 0.3859637379646301, + "learning_rate": 1.4418270354956273e-05, + "loss": 0.4997, + "step": 25875 + }, + { + "epoch": 0.7104887424492037, + "grad_norm": 0.37590491771698, + "learning_rate": 1.4417882899147966e-05, + "loss": 0.5022, + "step": 25876 + }, + { + "epoch": 0.7105161998901702, + "grad_norm": 0.5707791447639465, + "learning_rate": 1.4417495435098899e-05, + "loss": 0.5083, + "step": 25877 + }, + { + "epoch": 0.7105436573311368, + "grad_norm": 0.35279977321624756, + "learning_rate": 1.4417107962809792e-05, + "loss": 0.442, + "step": 25878 + }, + { + "epoch": 0.7105711147721032, + "grad_norm": 0.39908552169799805, + "learning_rate": 1.4416720482281368e-05, + "loss": 0.51, + "step": 25879 + }, + { + "epoch": 0.7105985722130698, + "grad_norm": 0.4755646586418152, + "learning_rate": 1.4416332993514353e-05, + "loss": 0.538, + "step": 25880 + }, + { + "epoch": 0.7106260296540362, + "grad_norm": 0.38870757818222046, + "learning_rate": 1.4415945496509465e-05, + "loss": 0.4796, + "step": 25881 + }, + { + "epoch": 0.7106534870950028, + "grad_norm": 0.3827751874923706, + "learning_rate": 1.4415557991267432e-05, + "loss": 0.4864, + "step": 25882 + }, + { + "epoch": 0.7106809445359692, + "grad_norm": 0.3704669177532196, + "learning_rate": 1.4415170477788973e-05, + "loss": 0.586, + "step": 25883 + }, + { + "epoch": 0.7107084019769357, + "grad_norm": 0.3682456612586975, + "learning_rate": 1.4414782956074809e-05, + "loss": 0.4704, + "step": 25884 + }, + { + "epoch": 0.7107358594179023, + "grad_norm": 0.401678204536438, + "learning_rate": 1.4414395426125669e-05, + "loss": 0.5773, + "step": 25885 + }, + { + "epoch": 0.7107633168588687, + "grad_norm": 0.3607645630836487, + "learning_rate": 1.4414007887942272e-05, + "loss": 0.4821, + "step": 25886 + }, + { + "epoch": 0.7107907742998353, + "grad_norm": 0.39781495928764343, + "learning_rate": 1.4413620341525341e-05, + "loss": 0.4894, + "step": 25887 + }, + { + "epoch": 0.7108182317408017, + "grad_norm": 0.38592299818992615, + "learning_rate": 1.4413232786875602e-05, + "loss": 0.5221, + "step": 25888 + }, + { + "epoch": 0.7108456891817683, + "grad_norm": 0.4252718985080719, + "learning_rate": 1.4412845223993772e-05, + "loss": 0.5244, + "step": 25889 + }, + { + "epoch": 0.7108731466227347, + "grad_norm": 0.38716769218444824, + "learning_rate": 1.4412457652880579e-05, + "loss": 0.5318, + "step": 25890 + }, + { + "epoch": 0.7109006040637013, + "grad_norm": 0.39136409759521484, + "learning_rate": 1.4412070073536741e-05, + "loss": 0.4811, + "step": 25891 + }, + { + "epoch": 0.7109280615046678, + "grad_norm": 0.3492092490196228, + "learning_rate": 1.4411682485962987e-05, + "loss": 0.483, + "step": 25892 + }, + { + "epoch": 0.7109555189456342, + "grad_norm": 0.33289024233818054, + "learning_rate": 1.4411294890160039e-05, + "loss": 0.4, + "step": 25893 + }, + { + "epoch": 0.7109829763866008, + "grad_norm": 0.4194401502609253, + "learning_rate": 1.4410907286128614e-05, + "loss": 0.5308, + "step": 25894 + }, + { + "epoch": 0.7110104338275672, + "grad_norm": 0.4035889506340027, + "learning_rate": 1.4410519673869444e-05, + "loss": 0.5295, + "step": 25895 + }, + { + "epoch": 0.7110378912685338, + "grad_norm": 0.3732859790325165, + "learning_rate": 1.441013205338324e-05, + "loss": 0.5064, + "step": 25896 + }, + { + "epoch": 0.7110653487095002, + "grad_norm": 0.4064289629459381, + "learning_rate": 1.4409744424670738e-05, + "loss": 0.5661, + "step": 25897 + }, + { + "epoch": 0.7110928061504668, + "grad_norm": 0.3807610869407654, + "learning_rate": 1.4409356787732653e-05, + "loss": 0.4305, + "step": 25898 + }, + { + "epoch": 0.7111202635914333, + "grad_norm": 0.39028415083885193, + "learning_rate": 1.4408969142569712e-05, + "loss": 0.53, + "step": 25899 + }, + { + "epoch": 0.7111477210323998, + "grad_norm": 0.4621504247188568, + "learning_rate": 1.4408581489182634e-05, + "loss": 0.5023, + "step": 25900 + }, + { + "epoch": 0.7111751784733663, + "grad_norm": 1.576985239982605, + "learning_rate": 1.4408193827572143e-05, + "loss": 0.5212, + "step": 25901 + }, + { + "epoch": 0.7112026359143327, + "grad_norm": 0.37873151898384094, + "learning_rate": 1.4407806157738969e-05, + "loss": 0.4957, + "step": 25902 + }, + { + "epoch": 0.7112300933552993, + "grad_norm": 0.39273035526275635, + "learning_rate": 1.4407418479683826e-05, + "loss": 0.4546, + "step": 25903 + }, + { + "epoch": 0.7112575507962657, + "grad_norm": 0.38402411341667175, + "learning_rate": 1.4407030793407442e-05, + "loss": 0.5313, + "step": 25904 + }, + { + "epoch": 0.7112850082372323, + "grad_norm": 0.35619717836380005, + "learning_rate": 1.440664309891054e-05, + "loss": 0.5005, + "step": 25905 + }, + { + "epoch": 0.7113124656781988, + "grad_norm": 0.4717952013015747, + "learning_rate": 1.4406255396193838e-05, + "loss": 0.5155, + "step": 25906 + }, + { + "epoch": 0.7113399231191653, + "grad_norm": 0.39893174171447754, + "learning_rate": 1.4405867685258069e-05, + "loss": 0.5246, + "step": 25907 + }, + { + "epoch": 0.7113673805601318, + "grad_norm": 0.37765154242515564, + "learning_rate": 1.4405479966103945e-05, + "loss": 0.4267, + "step": 25908 + }, + { + "epoch": 0.7113948380010983, + "grad_norm": 0.39481621980667114, + "learning_rate": 1.44050922387322e-05, + "loss": 0.4379, + "step": 25909 + }, + { + "epoch": 0.7114222954420648, + "grad_norm": 0.38604483008384705, + "learning_rate": 1.440470450314355e-05, + "loss": 0.5663, + "step": 25910 + }, + { + "epoch": 0.7114497528830313, + "grad_norm": 0.3876763880252838, + "learning_rate": 1.4404316759338719e-05, + "loss": 0.4947, + "step": 25911 + }, + { + "epoch": 0.7114772103239978, + "grad_norm": 0.36547011137008667, + "learning_rate": 1.4403929007318433e-05, + "loss": 0.4833, + "step": 25912 + }, + { + "epoch": 0.7115046677649643, + "grad_norm": 0.8799788355827332, + "learning_rate": 1.4403541247083413e-05, + "loss": 0.4775, + "step": 25913 + }, + { + "epoch": 0.7115321252059308, + "grad_norm": 0.3627760410308838, + "learning_rate": 1.4403153478634384e-05, + "loss": 0.4307, + "step": 25914 + }, + { + "epoch": 0.7115595826468973, + "grad_norm": 0.4181090295314789, + "learning_rate": 1.4402765701972066e-05, + "loss": 0.5294, + "step": 25915 + }, + { + "epoch": 0.7115870400878638, + "grad_norm": 0.3783631920814514, + "learning_rate": 1.4402377917097188e-05, + "loss": 0.4117, + "step": 25916 + }, + { + "epoch": 0.7116144975288303, + "grad_norm": 0.3377611041069031, + "learning_rate": 1.440199012401047e-05, + "loss": 0.4415, + "step": 25917 + }, + { + "epoch": 0.7116419549697968, + "grad_norm": 0.3880821764469147, + "learning_rate": 1.4401602322712633e-05, + "loss": 0.5722, + "step": 25918 + }, + { + "epoch": 0.7116694124107633, + "grad_norm": 0.49683985114097595, + "learning_rate": 1.4401214513204405e-05, + "loss": 0.4794, + "step": 25919 + }, + { + "epoch": 0.7116968698517299, + "grad_norm": 0.4280538856983185, + "learning_rate": 1.4400826695486505e-05, + "loss": 0.5164, + "step": 25920 + }, + { + "epoch": 0.7117243272926963, + "grad_norm": 0.8664463758468628, + "learning_rate": 1.4400438869559659e-05, + "loss": 0.5216, + "step": 25921 + }, + { + "epoch": 0.7117517847336629, + "grad_norm": 0.460968017578125, + "learning_rate": 1.4400051035424594e-05, + "loss": 0.4418, + "step": 25922 + }, + { + "epoch": 0.7117792421746293, + "grad_norm": 0.3797134459018707, + "learning_rate": 1.4399663193082023e-05, + "loss": 0.5978, + "step": 25923 + }, + { + "epoch": 0.7118066996155958, + "grad_norm": 0.34685707092285156, + "learning_rate": 1.4399275342532681e-05, + "loss": 0.4997, + "step": 25924 + }, + { + "epoch": 0.7118341570565623, + "grad_norm": 0.3786543905735016, + "learning_rate": 1.4398887483777284e-05, + "loss": 0.4936, + "step": 25925 + }, + { + "epoch": 0.7118616144975288, + "grad_norm": 0.3833673894405365, + "learning_rate": 1.439849961681656e-05, + "loss": 0.4858, + "step": 25926 + }, + { + "epoch": 0.7118890719384954, + "grad_norm": 0.3846262991428375, + "learning_rate": 1.439811174165123e-05, + "loss": 0.4914, + "step": 25927 + }, + { + "epoch": 0.7119165293794618, + "grad_norm": 0.3826737403869629, + "learning_rate": 1.4397723858282016e-05, + "loss": 0.5051, + "step": 25928 + }, + { + "epoch": 0.7119439868204284, + "grad_norm": 0.38408300280570984, + "learning_rate": 1.4397335966709644e-05, + "loss": 0.4322, + "step": 25929 + }, + { + "epoch": 0.7119714442613948, + "grad_norm": 0.4126847982406616, + "learning_rate": 1.4396948066934838e-05, + "loss": 0.5163, + "step": 25930 + }, + { + "epoch": 0.7119989017023614, + "grad_norm": 0.4045517146587372, + "learning_rate": 1.4396560158958319e-05, + "loss": 0.4843, + "step": 25931 + }, + { + "epoch": 0.7120263591433278, + "grad_norm": 0.3896666467189789, + "learning_rate": 1.4396172242780813e-05, + "loss": 0.4487, + "step": 25932 + }, + { + "epoch": 0.7120538165842943, + "grad_norm": 0.3857246935367584, + "learning_rate": 1.439578431840304e-05, + "loss": 0.4191, + "step": 25933 + }, + { + "epoch": 0.7120812740252609, + "grad_norm": 0.3503515124320984, + "learning_rate": 1.439539638582573e-05, + "loss": 0.4054, + "step": 25934 + }, + { + "epoch": 0.7121087314662273, + "grad_norm": 0.4055585563182831, + "learning_rate": 1.4395008445049603e-05, + "loss": 0.4603, + "step": 25935 + }, + { + "epoch": 0.7121361889071939, + "grad_norm": 0.5107809901237488, + "learning_rate": 1.4394620496075382e-05, + "loss": 0.5577, + "step": 25936 + }, + { + "epoch": 0.7121636463481603, + "grad_norm": 0.43938422203063965, + "learning_rate": 1.4394232538903792e-05, + "loss": 0.5277, + "step": 25937 + }, + { + "epoch": 0.7121911037891269, + "grad_norm": 0.36351537704467773, + "learning_rate": 1.4393844573535551e-05, + "loss": 0.4813, + "step": 25938 + }, + { + "epoch": 0.7122185612300933, + "grad_norm": 0.3506568372249603, + "learning_rate": 1.4393456599971392e-05, + "loss": 0.3828, + "step": 25939 + }, + { + "epoch": 0.7122460186710599, + "grad_norm": 0.391205370426178, + "learning_rate": 1.4393068618212032e-05, + "loss": 0.4972, + "step": 25940 + }, + { + "epoch": 0.7122734761120264, + "grad_norm": 0.39560946822166443, + "learning_rate": 1.43926806282582e-05, + "loss": 0.4712, + "step": 25941 + }, + { + "epoch": 0.7123009335529928, + "grad_norm": 0.41738152503967285, + "learning_rate": 1.4392292630110616e-05, + "loss": 0.6272, + "step": 25942 + }, + { + "epoch": 0.7123283909939594, + "grad_norm": 0.4293467700481415, + "learning_rate": 1.4391904623770002e-05, + "loss": 0.5164, + "step": 25943 + }, + { + "epoch": 0.7123558484349258, + "grad_norm": 0.3723197281360626, + "learning_rate": 1.4391516609237083e-05, + "loss": 0.5208, + "step": 25944 + }, + { + "epoch": 0.7123833058758924, + "grad_norm": 0.41635605692863464, + "learning_rate": 1.4391128586512589e-05, + "loss": 0.4522, + "step": 25945 + }, + { + "epoch": 0.7124107633168588, + "grad_norm": 0.38310515880584717, + "learning_rate": 1.4390740555597234e-05, + "loss": 0.548, + "step": 25946 + }, + { + "epoch": 0.7124382207578254, + "grad_norm": 0.4137367606163025, + "learning_rate": 1.439035251649175e-05, + "loss": 0.4877, + "step": 25947 + }, + { + "epoch": 0.7124656781987919, + "grad_norm": 0.4245050251483917, + "learning_rate": 1.4389964469196853e-05, + "loss": 0.4451, + "step": 25948 + }, + { + "epoch": 0.7124931356397584, + "grad_norm": 0.41657742857933044, + "learning_rate": 1.4389576413713275e-05, + "loss": 0.495, + "step": 25949 + }, + { + "epoch": 0.7125205930807249, + "grad_norm": 0.4260657727718353, + "learning_rate": 1.4389188350041736e-05, + "loss": 0.5358, + "step": 25950 + }, + { + "epoch": 0.7125480505216913, + "grad_norm": 0.3994043171405792, + "learning_rate": 1.4388800278182957e-05, + "loss": 0.4597, + "step": 25951 + }, + { + "epoch": 0.7125755079626579, + "grad_norm": 0.37952253222465515, + "learning_rate": 1.4388412198137666e-05, + "loss": 0.4271, + "step": 25952 + }, + { + "epoch": 0.7126029654036243, + "grad_norm": 0.3841443955898285, + "learning_rate": 1.4388024109906586e-05, + "loss": 0.5913, + "step": 25953 + }, + { + "epoch": 0.7126304228445909, + "grad_norm": 0.37923842668533325, + "learning_rate": 1.4387636013490442e-05, + "loss": 0.4927, + "step": 25954 + }, + { + "epoch": 0.7126578802855574, + "grad_norm": 0.38640761375427246, + "learning_rate": 1.4387247908889954e-05, + "loss": 0.4822, + "step": 25955 + }, + { + "epoch": 0.7126853377265239, + "grad_norm": 0.4377199411392212, + "learning_rate": 1.438685979610585e-05, + "loss": 0.6094, + "step": 25956 + }, + { + "epoch": 0.7127127951674904, + "grad_norm": 0.41739317774772644, + "learning_rate": 1.4386471675138852e-05, + "loss": 0.4461, + "step": 25957 + }, + { + "epoch": 0.7127402526084569, + "grad_norm": 0.36272579431533813, + "learning_rate": 1.4386083545989682e-05, + "loss": 0.4606, + "step": 25958 + }, + { + "epoch": 0.7127677100494234, + "grad_norm": 0.4073275923728943, + "learning_rate": 1.438569540865907e-05, + "loss": 0.5624, + "step": 25959 + }, + { + "epoch": 0.7127951674903898, + "grad_norm": 0.4696483016014099, + "learning_rate": 1.4385307263147734e-05, + "loss": 0.5, + "step": 25960 + }, + { + "epoch": 0.7128226249313564, + "grad_norm": 0.39894652366638184, + "learning_rate": 1.4384919109456402e-05, + "loss": 0.4527, + "step": 25961 + }, + { + "epoch": 0.712850082372323, + "grad_norm": 0.38506248593330383, + "learning_rate": 1.4384530947585795e-05, + "loss": 0.4546, + "step": 25962 + }, + { + "epoch": 0.7128775398132894, + "grad_norm": 0.41508010029792786, + "learning_rate": 1.438414277753664e-05, + "loss": 0.5727, + "step": 25963 + }, + { + "epoch": 0.7129049972542559, + "grad_norm": 0.6940839290618896, + "learning_rate": 1.4383754599309656e-05, + "loss": 0.4917, + "step": 25964 + }, + { + "epoch": 0.7129324546952224, + "grad_norm": 0.43948104977607727, + "learning_rate": 1.4383366412905576e-05, + "loss": 0.4622, + "step": 25965 + }, + { + "epoch": 0.7129599121361889, + "grad_norm": 0.4594871997833252, + "learning_rate": 1.4382978218325111e-05, + "loss": 0.4819, + "step": 25966 + }, + { + "epoch": 0.7129873695771554, + "grad_norm": 0.38914597034454346, + "learning_rate": 1.4382590015569e-05, + "loss": 0.5076, + "step": 25967 + }, + { + "epoch": 0.7130148270181219, + "grad_norm": 0.4657936096191406, + "learning_rate": 1.438220180463796e-05, + "loss": 0.4284, + "step": 25968 + }, + { + "epoch": 0.7130422844590885, + "grad_norm": 0.48969766497612, + "learning_rate": 1.438181358553271e-05, + "loss": 0.5413, + "step": 25969 + }, + { + "epoch": 0.7130697419000549, + "grad_norm": 0.44247207045555115, + "learning_rate": 1.438142535825398e-05, + "loss": 0.4644, + "step": 25970 + }, + { + "epoch": 0.7130971993410214, + "grad_norm": 0.4502282738685608, + "learning_rate": 1.4381037122802492e-05, + "loss": 0.4569, + "step": 25971 + }, + { + "epoch": 0.7131246567819879, + "grad_norm": 0.43277135491371155, + "learning_rate": 1.4380648879178976e-05, + "loss": 0.4819, + "step": 25972 + }, + { + "epoch": 0.7131521142229544, + "grad_norm": 0.3291560709476471, + "learning_rate": 1.438026062738415e-05, + "loss": 0.3899, + "step": 25973 + }, + { + "epoch": 0.7131795716639209, + "grad_norm": 0.4028421938419342, + "learning_rate": 1.437987236741874e-05, + "loss": 0.5276, + "step": 25974 + }, + { + "epoch": 0.7132070291048874, + "grad_norm": 0.3960241377353668, + "learning_rate": 1.4379484099283469e-05, + "loss": 0.4851, + "step": 25975 + }, + { + "epoch": 0.713234486545854, + "grad_norm": 0.3833281993865967, + "learning_rate": 1.4379095822979064e-05, + "loss": 0.4818, + "step": 25976 + }, + { + "epoch": 0.7132619439868204, + "grad_norm": 0.396078884601593, + "learning_rate": 1.4378707538506245e-05, + "loss": 0.5456, + "step": 25977 + }, + { + "epoch": 0.713289401427787, + "grad_norm": 0.32703641057014465, + "learning_rate": 1.4378319245865743e-05, + "loss": 0.4434, + "step": 25978 + }, + { + "epoch": 0.7133168588687534, + "grad_norm": 0.3778613209724426, + "learning_rate": 1.4377930945058273e-05, + "loss": 0.5207, + "step": 25979 + }, + { + "epoch": 0.71334431630972, + "grad_norm": 0.3476533591747284, + "learning_rate": 1.437754263608457e-05, + "loss": 0.442, + "step": 25980 + }, + { + "epoch": 0.7133717737506864, + "grad_norm": 0.4215749502182007, + "learning_rate": 1.437715431894535e-05, + "loss": 0.5703, + "step": 25981 + }, + { + "epoch": 0.7133992311916529, + "grad_norm": 0.36284834146499634, + "learning_rate": 1.437676599364134e-05, + "loss": 0.4192, + "step": 25982 + }, + { + "epoch": 0.7134266886326195, + "grad_norm": 0.3717990219593048, + "learning_rate": 1.4376377660173266e-05, + "loss": 0.489, + "step": 25983 + }, + { + "epoch": 0.7134541460735859, + "grad_norm": 0.39584022760391235, + "learning_rate": 1.4375989318541848e-05, + "loss": 0.5279, + "step": 25984 + }, + { + "epoch": 0.7134816035145525, + "grad_norm": 0.4071667790412903, + "learning_rate": 1.4375600968747816e-05, + "loss": 0.5037, + "step": 25985 + }, + { + "epoch": 0.7135090609555189, + "grad_norm": 0.3223027288913727, + "learning_rate": 1.4375212610791891e-05, + "loss": 0.4133, + "step": 25986 + }, + { + "epoch": 0.7135365183964855, + "grad_norm": 0.4755399227142334, + "learning_rate": 1.4374824244674797e-05, + "loss": 0.5483, + "step": 25987 + }, + { + "epoch": 0.7135639758374519, + "grad_norm": 0.460130900144577, + "learning_rate": 1.4374435870397262e-05, + "loss": 0.5069, + "step": 25988 + }, + { + "epoch": 0.7135914332784185, + "grad_norm": 0.39262843132019043, + "learning_rate": 1.4374047487960004e-05, + "loss": 0.5673, + "step": 25989 + }, + { + "epoch": 0.713618890719385, + "grad_norm": 0.427388459444046, + "learning_rate": 1.4373659097363754e-05, + "loss": 0.4231, + "step": 25990 + }, + { + "epoch": 0.7136463481603514, + "grad_norm": 0.42621687054634094, + "learning_rate": 1.4373270698609234e-05, + "loss": 0.5288, + "step": 25991 + }, + { + "epoch": 0.713673805601318, + "grad_norm": 0.4201515018939972, + "learning_rate": 1.4372882291697168e-05, + "loss": 0.543, + "step": 25992 + }, + { + "epoch": 0.7137012630422844, + "grad_norm": 0.363801509141922, + "learning_rate": 1.4372493876628281e-05, + "loss": 0.5238, + "step": 25993 + }, + { + "epoch": 0.713728720483251, + "grad_norm": 0.38467860221862793, + "learning_rate": 1.4372105453403297e-05, + "loss": 0.4905, + "step": 25994 + }, + { + "epoch": 0.7137561779242174, + "grad_norm": 0.3743225634098053, + "learning_rate": 1.437171702202294e-05, + "loss": 0.5376, + "step": 25995 + }, + { + "epoch": 0.713783635365184, + "grad_norm": 0.32865023612976074, + "learning_rate": 1.4371328582487938e-05, + "loss": 0.4581, + "step": 25996 + }, + { + "epoch": 0.7138110928061505, + "grad_norm": 0.39075711369514465, + "learning_rate": 1.437094013479901e-05, + "loss": 0.5274, + "step": 25997 + }, + { + "epoch": 0.713838550247117, + "grad_norm": 0.40577179193496704, + "learning_rate": 1.4370551678956886e-05, + "loss": 0.4905, + "step": 25998 + }, + { + "epoch": 0.7138660076880835, + "grad_norm": 0.4174309968948364, + "learning_rate": 1.4370163214962289e-05, + "loss": 0.5331, + "step": 25999 + }, + { + "epoch": 0.7138934651290499, + "grad_norm": 0.3949275016784668, + "learning_rate": 1.4369774742815938e-05, + "loss": 0.4953, + "step": 26000 + }, + { + "epoch": 0.7139209225700165, + "grad_norm": 0.38455671072006226, + "learning_rate": 1.4369386262518568e-05, + "loss": 0.5754, + "step": 26001 + }, + { + "epoch": 0.7139483800109829, + "grad_norm": 0.45652082562446594, + "learning_rate": 1.4368997774070894e-05, + "loss": 0.4518, + "step": 26002 + }, + { + "epoch": 0.7139758374519495, + "grad_norm": 0.36224669218063354, + "learning_rate": 1.4368609277473646e-05, + "loss": 0.4747, + "step": 26003 + }, + { + "epoch": 0.714003294892916, + "grad_norm": 0.38411861658096313, + "learning_rate": 1.436822077272755e-05, + "loss": 0.5422, + "step": 26004 + }, + { + "epoch": 0.7140307523338825, + "grad_norm": 0.40852829813957214, + "learning_rate": 1.4367832259833326e-05, + "loss": 0.4866, + "step": 26005 + }, + { + "epoch": 0.714058209774849, + "grad_norm": 0.48855963349342346, + "learning_rate": 1.43674437387917e-05, + "loss": 0.5617, + "step": 26006 + }, + { + "epoch": 0.7140856672158155, + "grad_norm": 0.3702276051044464, + "learning_rate": 1.43670552096034e-05, + "loss": 0.4608, + "step": 26007 + }, + { + "epoch": 0.714113124656782, + "grad_norm": 0.49852994084358215, + "learning_rate": 1.4366666672269145e-05, + "loss": 0.4755, + "step": 26008 + }, + { + "epoch": 0.7141405820977484, + "grad_norm": 0.4262365400791168, + "learning_rate": 1.4366278126789666e-05, + "loss": 0.6589, + "step": 26009 + }, + { + "epoch": 0.714168039538715, + "grad_norm": 0.4021655023097992, + "learning_rate": 1.4365889573165681e-05, + "loss": 0.506, + "step": 26010 + }, + { + "epoch": 0.7141954969796815, + "grad_norm": 0.41766127943992615, + "learning_rate": 1.4365501011397922e-05, + "loss": 0.4721, + "step": 26011 + }, + { + "epoch": 0.714222954420648, + "grad_norm": 0.4753163456916809, + "learning_rate": 1.436511244148711e-05, + "loss": 0.5897, + "step": 26012 + }, + { + "epoch": 0.7142504118616145, + "grad_norm": 0.3903539180755615, + "learning_rate": 1.4364723863433968e-05, + "loss": 0.4906, + "step": 26013 + }, + { + "epoch": 0.714277869302581, + "grad_norm": 0.3684806823730469, + "learning_rate": 1.4364335277239223e-05, + "loss": 0.5362, + "step": 26014 + }, + { + "epoch": 0.7143053267435475, + "grad_norm": 0.3942180275917053, + "learning_rate": 1.43639466829036e-05, + "loss": 0.4893, + "step": 26015 + }, + { + "epoch": 0.714332784184514, + "grad_norm": 0.35558828711509705, + "learning_rate": 1.4363558080427824e-05, + "loss": 0.5058, + "step": 26016 + }, + { + "epoch": 0.7143602416254805, + "grad_norm": 0.35713696479797363, + "learning_rate": 1.4363169469812621e-05, + "loss": 0.458, + "step": 26017 + }, + { + "epoch": 0.7143876990664471, + "grad_norm": 0.34736618399620056, + "learning_rate": 1.4362780851058713e-05, + "loss": 0.4978, + "step": 26018 + }, + { + "epoch": 0.7144151565074135, + "grad_norm": 0.3738850951194763, + "learning_rate": 1.4362392224166829e-05, + "loss": 0.4561, + "step": 26019 + }, + { + "epoch": 0.71444261394838, + "grad_norm": 0.360006183385849, + "learning_rate": 1.4362003589137685e-05, + "loss": 0.4539, + "step": 26020 + }, + { + "epoch": 0.7144700713893465, + "grad_norm": 0.37713584303855896, + "learning_rate": 1.4361614945972018e-05, + "loss": 0.4806, + "step": 26021 + }, + { + "epoch": 0.714497528830313, + "grad_norm": 0.46365079283714294, + "learning_rate": 1.4361226294670545e-05, + "loss": 0.559, + "step": 26022 + }, + { + "epoch": 0.7145249862712795, + "grad_norm": 0.38509702682495117, + "learning_rate": 1.4360837635233993e-05, + "loss": 0.4922, + "step": 26023 + }, + { + "epoch": 0.714552443712246, + "grad_norm": 0.4964011609554291, + "learning_rate": 1.4360448967663087e-05, + "loss": 0.5858, + "step": 26024 + }, + { + "epoch": 0.7145799011532126, + "grad_norm": 0.4346403479576111, + "learning_rate": 1.4360060291958552e-05, + "loss": 0.4809, + "step": 26025 + }, + { + "epoch": 0.714607358594179, + "grad_norm": 0.3773009181022644, + "learning_rate": 1.4359671608121112e-05, + "loss": 0.5492, + "step": 26026 + }, + { + "epoch": 0.7146348160351456, + "grad_norm": 0.3958376348018646, + "learning_rate": 1.4359282916151495e-05, + "loss": 0.4844, + "step": 26027 + }, + { + "epoch": 0.714662273476112, + "grad_norm": 0.3867471218109131, + "learning_rate": 1.4358894216050421e-05, + "loss": 0.4285, + "step": 26028 + }, + { + "epoch": 0.7146897309170785, + "grad_norm": 0.3897256851196289, + "learning_rate": 1.435850550781862e-05, + "loss": 0.5382, + "step": 26029 + }, + { + "epoch": 0.714717188358045, + "grad_norm": 0.4148065447807312, + "learning_rate": 1.4358116791456818e-05, + "loss": 0.5384, + "step": 26030 + }, + { + "epoch": 0.7147446457990115, + "grad_norm": 0.3845970630645752, + "learning_rate": 1.4357728066965733e-05, + "loss": 0.4896, + "step": 26031 + }, + { + "epoch": 0.7147721032399781, + "grad_norm": 0.36382463574409485, + "learning_rate": 1.4357339334346097e-05, + "loss": 0.4894, + "step": 26032 + }, + { + "epoch": 0.7147995606809445, + "grad_norm": 0.3579001724720001, + "learning_rate": 1.435695059359863e-05, + "loss": 0.4506, + "step": 26033 + }, + { + "epoch": 0.7148270181219111, + "grad_norm": 0.3949882686138153, + "learning_rate": 1.4356561844724062e-05, + "loss": 0.4519, + "step": 26034 + }, + { + "epoch": 0.7148544755628775, + "grad_norm": 0.3821321129798889, + "learning_rate": 1.4356173087723114e-05, + "loss": 0.5271, + "step": 26035 + }, + { + "epoch": 0.7148819330038441, + "grad_norm": 0.4146723747253418, + "learning_rate": 1.4355784322596513e-05, + "loss": 0.5351, + "step": 26036 + }, + { + "epoch": 0.7149093904448105, + "grad_norm": 0.3883277475833893, + "learning_rate": 1.4355395549344984e-05, + "loss": 0.5089, + "step": 26037 + }, + { + "epoch": 0.714936847885777, + "grad_norm": 0.3695624768733978, + "learning_rate": 1.4355006767969252e-05, + "loss": 0.4451, + "step": 26038 + }, + { + "epoch": 0.7149643053267436, + "grad_norm": 0.33791589736938477, + "learning_rate": 1.4354617978470043e-05, + "loss": 0.4692, + "step": 26039 + }, + { + "epoch": 0.71499176276771, + "grad_norm": 0.35020723938941956, + "learning_rate": 1.4354229180848083e-05, + "loss": 0.4214, + "step": 26040 + }, + { + "epoch": 0.7150192202086766, + "grad_norm": 0.34675848484039307, + "learning_rate": 1.4353840375104093e-05, + "loss": 0.4471, + "step": 26041 + }, + { + "epoch": 0.715046677649643, + "grad_norm": 0.37643393874168396, + "learning_rate": 1.4353451561238803e-05, + "loss": 0.4881, + "step": 26042 + }, + { + "epoch": 0.7150741350906096, + "grad_norm": 0.38813480734825134, + "learning_rate": 1.4353062739252937e-05, + "loss": 0.534, + "step": 26043 + }, + { + "epoch": 0.715101592531576, + "grad_norm": 0.343219131231308, + "learning_rate": 1.4352673909147217e-05, + "loss": 0.4204, + "step": 26044 + }, + { + "epoch": 0.7151290499725426, + "grad_norm": 1.6274243593215942, + "learning_rate": 1.4352285070922374e-05, + "loss": 0.5529, + "step": 26045 + }, + { + "epoch": 0.7151565074135091, + "grad_norm": 0.3580920100212097, + "learning_rate": 1.4351896224579127e-05, + "loss": 0.4163, + "step": 26046 + }, + { + "epoch": 0.7151839648544756, + "grad_norm": 0.4271029233932495, + "learning_rate": 1.4351507370118208e-05, + "loss": 0.4626, + "step": 26047 + }, + { + "epoch": 0.7152114222954421, + "grad_norm": 0.4010319411754608, + "learning_rate": 1.4351118507540338e-05, + "loss": 0.4866, + "step": 26048 + }, + { + "epoch": 0.7152388797364085, + "grad_norm": 0.3428836762905121, + "learning_rate": 1.4350729636846242e-05, + "loss": 0.5174, + "step": 26049 + }, + { + "epoch": 0.7152663371773751, + "grad_norm": 0.4119150936603546, + "learning_rate": 1.4350340758036647e-05, + "loss": 0.4791, + "step": 26050 + }, + { + "epoch": 0.7152937946183415, + "grad_norm": 0.4236863851547241, + "learning_rate": 1.4349951871112277e-05, + "loss": 0.4695, + "step": 26051 + }, + { + "epoch": 0.7153212520593081, + "grad_norm": 0.3825666010379791, + "learning_rate": 1.4349562976073861e-05, + "loss": 0.5353, + "step": 26052 + }, + { + "epoch": 0.7153487095002746, + "grad_norm": 0.5928026437759399, + "learning_rate": 1.434917407292212e-05, + "loss": 0.4978, + "step": 26053 + }, + { + "epoch": 0.7153761669412411, + "grad_norm": 0.39703139662742615, + "learning_rate": 1.434878516165778e-05, + "loss": 0.5088, + "step": 26054 + }, + { + "epoch": 0.7154036243822076, + "grad_norm": 0.4349009394645691, + "learning_rate": 1.4348396242281573e-05, + "loss": 0.4828, + "step": 26055 + }, + { + "epoch": 0.715431081823174, + "grad_norm": 0.35437270998954773, + "learning_rate": 1.4348007314794216e-05, + "loss": 0.4957, + "step": 26056 + }, + { + "epoch": 0.7154585392641406, + "grad_norm": 0.3615390360355377, + "learning_rate": 1.4347618379196436e-05, + "loss": 0.5743, + "step": 26057 + }, + { + "epoch": 0.715485996705107, + "grad_norm": 0.3424055278301239, + "learning_rate": 1.4347229435488962e-05, + "loss": 0.4815, + "step": 26058 + }, + { + "epoch": 0.7155134541460736, + "grad_norm": 0.36199507117271423, + "learning_rate": 1.4346840483672515e-05, + "loss": 0.4992, + "step": 26059 + }, + { + "epoch": 0.7155409115870401, + "grad_norm": 0.3768567740917206, + "learning_rate": 1.4346451523747827e-05, + "loss": 0.5407, + "step": 26060 + }, + { + "epoch": 0.7155683690280066, + "grad_norm": 0.4903936982154846, + "learning_rate": 1.4346062555715619e-05, + "loss": 0.5462, + "step": 26061 + }, + { + "epoch": 0.7155958264689731, + "grad_norm": 0.36411523818969727, + "learning_rate": 1.4345673579576614e-05, + "loss": 0.4862, + "step": 26062 + }, + { + "epoch": 0.7156232839099396, + "grad_norm": 0.48901668190956116, + "learning_rate": 1.4345284595331543e-05, + "loss": 0.5784, + "step": 26063 + }, + { + "epoch": 0.7156507413509061, + "grad_norm": 0.401764452457428, + "learning_rate": 1.4344895602981129e-05, + "loss": 0.5223, + "step": 26064 + }, + { + "epoch": 0.7156781987918726, + "grad_norm": 0.4186842739582062, + "learning_rate": 1.4344506602526099e-05, + "loss": 0.578, + "step": 26065 + }, + { + "epoch": 0.7157056562328391, + "grad_norm": 0.39163926243782043, + "learning_rate": 1.4344117593967177e-05, + "loss": 0.5653, + "step": 26066 + }, + { + "epoch": 0.7157331136738057, + "grad_norm": 0.3941185474395752, + "learning_rate": 1.4343728577305088e-05, + "loss": 0.4998, + "step": 26067 + }, + { + "epoch": 0.7157605711147721, + "grad_norm": 0.37268924713134766, + "learning_rate": 1.4343339552540561e-05, + "loss": 0.4307, + "step": 26068 + }, + { + "epoch": 0.7157880285557386, + "grad_norm": 0.36848315596580505, + "learning_rate": 1.434295051967432e-05, + "loss": 0.4918, + "step": 26069 + }, + { + "epoch": 0.7158154859967051, + "grad_norm": 0.43074172735214233, + "learning_rate": 1.4342561478707086e-05, + "loss": 0.5924, + "step": 26070 + }, + { + "epoch": 0.7158429434376716, + "grad_norm": 2.2466816902160645, + "learning_rate": 1.4342172429639592e-05, + "loss": 0.458, + "step": 26071 + }, + { + "epoch": 0.7158704008786381, + "grad_norm": 0.3757255971431732, + "learning_rate": 1.434178337247256e-05, + "loss": 0.5188, + "step": 26072 + }, + { + "epoch": 0.7158978583196046, + "grad_norm": 0.32748883962631226, + "learning_rate": 1.4341394307206717e-05, + "loss": 0.4603, + "step": 26073 + }, + { + "epoch": 0.7159253157605712, + "grad_norm": 0.3900916874408722, + "learning_rate": 1.4341005233842787e-05, + "loss": 0.4578, + "step": 26074 + }, + { + "epoch": 0.7159527732015376, + "grad_norm": 0.43124040961265564, + "learning_rate": 1.4340616152381497e-05, + "loss": 0.4966, + "step": 26075 + }, + { + "epoch": 0.7159802306425042, + "grad_norm": 0.3590388298034668, + "learning_rate": 1.4340227062823572e-05, + "loss": 0.4929, + "step": 26076 + }, + { + "epoch": 0.7160076880834706, + "grad_norm": 0.3887551426887512, + "learning_rate": 1.4339837965169738e-05, + "loss": 0.5729, + "step": 26077 + }, + { + "epoch": 0.7160351455244371, + "grad_norm": 0.38222411274909973, + "learning_rate": 1.4339448859420721e-05, + "loss": 0.5335, + "step": 26078 + }, + { + "epoch": 0.7160626029654036, + "grad_norm": 0.4081416726112366, + "learning_rate": 1.433905974557725e-05, + "loss": 0.5226, + "step": 26079 + }, + { + "epoch": 0.7160900604063701, + "grad_norm": 0.38909003138542175, + "learning_rate": 1.4338670623640046e-05, + "loss": 0.5727, + "step": 26080 + }, + { + "epoch": 0.7161175178473367, + "grad_norm": 0.39402592182159424, + "learning_rate": 1.4338281493609835e-05, + "loss": 0.5244, + "step": 26081 + }, + { + "epoch": 0.7161449752883031, + "grad_norm": 0.39477553963661194, + "learning_rate": 1.4337892355487345e-05, + "loss": 0.4593, + "step": 26082 + }, + { + "epoch": 0.7161724327292697, + "grad_norm": 0.4092545509338379, + "learning_rate": 1.4337503209273302e-05, + "loss": 0.5247, + "step": 26083 + }, + { + "epoch": 0.7161998901702361, + "grad_norm": 0.6482234597206116, + "learning_rate": 1.433711405496843e-05, + "loss": 0.6255, + "step": 26084 + }, + { + "epoch": 0.7162273476112027, + "grad_norm": 0.405996173620224, + "learning_rate": 1.4336724892573457e-05, + "loss": 0.4935, + "step": 26085 + }, + { + "epoch": 0.7162548050521691, + "grad_norm": 0.37049180269241333, + "learning_rate": 1.433633572208911e-05, + "loss": 0.5238, + "step": 26086 + }, + { + "epoch": 0.7162822624931356, + "grad_norm": 0.37465107440948486, + "learning_rate": 1.4335946543516108e-05, + "loss": 0.5259, + "step": 26087 + }, + { + "epoch": 0.7163097199341022, + "grad_norm": 0.3569576144218445, + "learning_rate": 1.4335557356855185e-05, + "loss": 0.4734, + "step": 26088 + }, + { + "epoch": 0.7163371773750686, + "grad_norm": 0.41351377964019775, + "learning_rate": 1.4335168162107063e-05, + "loss": 0.574, + "step": 26089 + }, + { + "epoch": 0.7163646348160352, + "grad_norm": 0.3936741352081299, + "learning_rate": 1.4334778959272468e-05, + "loss": 0.4813, + "step": 26090 + }, + { + "epoch": 0.7163920922570016, + "grad_norm": 0.36563870310783386, + "learning_rate": 1.433438974835213e-05, + "loss": 0.5472, + "step": 26091 + }, + { + "epoch": 0.7164195496979682, + "grad_norm": 0.35716190934181213, + "learning_rate": 1.4334000529346768e-05, + "loss": 0.378, + "step": 26092 + }, + { + "epoch": 0.7164470071389346, + "grad_norm": 0.33149388432502747, + "learning_rate": 1.4333611302257113e-05, + "loss": 0.464, + "step": 26093 + }, + { + "epoch": 0.7164744645799012, + "grad_norm": 0.4030951261520386, + "learning_rate": 1.433322206708389e-05, + "loss": 0.5381, + "step": 26094 + }, + { + "epoch": 0.7165019220208677, + "grad_norm": 0.42903417348861694, + "learning_rate": 1.4332832823827824e-05, + "loss": 0.5041, + "step": 26095 + }, + { + "epoch": 0.7165293794618341, + "grad_norm": 0.37395361065864563, + "learning_rate": 1.4332443572489643e-05, + "loss": 0.4914, + "step": 26096 + }, + { + "epoch": 0.7165568369028007, + "grad_norm": 0.360299289226532, + "learning_rate": 1.4332054313070071e-05, + "loss": 0.4776, + "step": 26097 + }, + { + "epoch": 0.7165842943437671, + "grad_norm": 0.40636587142944336, + "learning_rate": 1.4331665045569839e-05, + "loss": 0.5004, + "step": 26098 + }, + { + "epoch": 0.7166117517847337, + "grad_norm": 0.40199699997901917, + "learning_rate": 1.4331275769989664e-05, + "loss": 0.5059, + "step": 26099 + }, + { + "epoch": 0.7166392092257001, + "grad_norm": 0.3748112618923187, + "learning_rate": 1.4330886486330279e-05, + "loss": 0.5354, + "step": 26100 + }, + { + "epoch": 0.7166666666666667, + "grad_norm": 0.36844587326049805, + "learning_rate": 1.4330497194592407e-05, + "loss": 0.5141, + "step": 26101 + }, + { + "epoch": 0.7166941241076332, + "grad_norm": 0.36240458488464355, + "learning_rate": 1.4330107894776779e-05, + "loss": 0.5151, + "step": 26102 + }, + { + "epoch": 0.7167215815485997, + "grad_norm": 0.4110575318336487, + "learning_rate": 1.4329718586884118e-05, + "loss": 0.5385, + "step": 26103 + }, + { + "epoch": 0.7167490389895662, + "grad_norm": 0.37489038705825806, + "learning_rate": 1.4329329270915146e-05, + "loss": 0.6121, + "step": 26104 + }, + { + "epoch": 0.7167764964305327, + "grad_norm": 0.4682330787181854, + "learning_rate": 1.4328939946870597e-05, + "loss": 0.5265, + "step": 26105 + }, + { + "epoch": 0.7168039538714992, + "grad_norm": 0.385666161775589, + "learning_rate": 1.432855061475119e-05, + "loss": 0.5543, + "step": 26106 + }, + { + "epoch": 0.7168314113124656, + "grad_norm": 0.3662683367729187, + "learning_rate": 1.4328161274557657e-05, + "loss": 0.4047, + "step": 26107 + }, + { + "epoch": 0.7168588687534322, + "grad_norm": 0.3850827217102051, + "learning_rate": 1.432777192629072e-05, + "loss": 0.4766, + "step": 26108 + }, + { + "epoch": 0.7168863261943987, + "grad_norm": 0.4758588969707489, + "learning_rate": 1.432738256995111e-05, + "loss": 0.5628, + "step": 26109 + }, + { + "epoch": 0.7169137836353652, + "grad_norm": 0.41869208216667175, + "learning_rate": 1.4326993205539547e-05, + "loss": 0.5503, + "step": 26110 + }, + { + "epoch": 0.7169412410763317, + "grad_norm": 0.39144888520240784, + "learning_rate": 1.4326603833056762e-05, + "loss": 0.5089, + "step": 26111 + }, + { + "epoch": 0.7169686985172982, + "grad_norm": 0.387212872505188, + "learning_rate": 1.4326214452503481e-05, + "loss": 0.5347, + "step": 26112 + }, + { + "epoch": 0.7169961559582647, + "grad_norm": 0.39412763714790344, + "learning_rate": 1.4325825063880427e-05, + "loss": 0.5182, + "step": 26113 + }, + { + "epoch": 0.7170236133992312, + "grad_norm": 0.4453129172325134, + "learning_rate": 1.4325435667188333e-05, + "loss": 0.5811, + "step": 26114 + }, + { + "epoch": 0.7170510708401977, + "grad_norm": 0.35040122270584106, + "learning_rate": 1.4325046262427917e-05, + "loss": 0.5189, + "step": 26115 + }, + { + "epoch": 0.7170785282811641, + "grad_norm": 0.4012080729007721, + "learning_rate": 1.432465684959991e-05, + "loss": 0.5791, + "step": 26116 + }, + { + "epoch": 0.7171059857221307, + "grad_norm": 0.482459157705307, + "learning_rate": 1.4324267428705039e-05, + "loss": 0.4512, + "step": 26117 + }, + { + "epoch": 0.7171334431630972, + "grad_norm": 0.4126095473766327, + "learning_rate": 1.4323877999744027e-05, + "loss": 0.4473, + "step": 26118 + }, + { + "epoch": 0.7171609006040637, + "grad_norm": 0.37289637327194214, + "learning_rate": 1.4323488562717604e-05, + "loss": 0.4951, + "step": 26119 + }, + { + "epoch": 0.7171883580450302, + "grad_norm": 0.5719515681266785, + "learning_rate": 1.4323099117626495e-05, + "loss": 0.6058, + "step": 26120 + }, + { + "epoch": 0.7172158154859967, + "grad_norm": 0.4207122027873993, + "learning_rate": 1.4322709664471423e-05, + "loss": 0.5131, + "step": 26121 + }, + { + "epoch": 0.7172432729269632, + "grad_norm": 0.34789028763771057, + "learning_rate": 1.4322320203253121e-05, + "loss": 0.4525, + "step": 26122 + }, + { + "epoch": 0.7172707303679297, + "grad_norm": 0.39910948276519775, + "learning_rate": 1.4321930733972314e-05, + "loss": 0.5194, + "step": 26123 + }, + { + "epoch": 0.7172981878088962, + "grad_norm": 0.37317660450935364, + "learning_rate": 1.4321541256629725e-05, + "loss": 0.4822, + "step": 26124 + }, + { + "epoch": 0.7173256452498628, + "grad_norm": 0.3971939980983734, + "learning_rate": 1.4321151771226082e-05, + "loss": 0.5052, + "step": 26125 + }, + { + "epoch": 0.7173531026908292, + "grad_norm": 0.5035853981971741, + "learning_rate": 1.432076227776211e-05, + "loss": 0.4542, + "step": 26126 + }, + { + "epoch": 0.7173805601317957, + "grad_norm": 0.3951180577278137, + "learning_rate": 1.4320372776238541e-05, + "loss": 0.5678, + "step": 26127 + }, + { + "epoch": 0.7174080175727622, + "grad_norm": 0.4150985777378082, + "learning_rate": 1.4319983266656098e-05, + "loss": 0.5039, + "step": 26128 + }, + { + "epoch": 0.7174354750137287, + "grad_norm": 0.39717182517051697, + "learning_rate": 1.4319593749015504e-05, + "loss": 0.4831, + "step": 26129 + }, + { + "epoch": 0.7174629324546952, + "grad_norm": 0.37337180972099304, + "learning_rate": 1.4319204223317491e-05, + "loss": 0.5665, + "step": 26130 + }, + { + "epoch": 0.7174903898956617, + "grad_norm": 0.4071110486984253, + "learning_rate": 1.4318814689562783e-05, + "loss": 0.5334, + "step": 26131 + }, + { + "epoch": 0.7175178473366283, + "grad_norm": 0.42442798614501953, + "learning_rate": 1.4318425147752106e-05, + "loss": 0.4944, + "step": 26132 + }, + { + "epoch": 0.7175453047775947, + "grad_norm": 0.3565179109573364, + "learning_rate": 1.4318035597886192e-05, + "loss": 0.4892, + "step": 26133 + }, + { + "epoch": 0.7175727622185613, + "grad_norm": 0.41918933391571045, + "learning_rate": 1.4317646039965758e-05, + "loss": 0.4814, + "step": 26134 + }, + { + "epoch": 0.7176002196595277, + "grad_norm": 0.3607025146484375, + "learning_rate": 1.4317256473991539e-05, + "loss": 0.5235, + "step": 26135 + }, + { + "epoch": 0.7176276771004942, + "grad_norm": 0.4720178544521332, + "learning_rate": 1.4316866899964258e-05, + "loss": 0.5301, + "step": 26136 + }, + { + "epoch": 0.7176551345414607, + "grad_norm": 0.3793657124042511, + "learning_rate": 1.4316477317884643e-05, + "loss": 0.5271, + "step": 26137 + }, + { + "epoch": 0.7176825919824272, + "grad_norm": 0.4030263125896454, + "learning_rate": 1.431608772775342e-05, + "loss": 0.5631, + "step": 26138 + }, + { + "epoch": 0.7177100494233938, + "grad_norm": 0.37038934230804443, + "learning_rate": 1.4315698129571316e-05, + "loss": 0.4733, + "step": 26139 + }, + { + "epoch": 0.7177375068643602, + "grad_norm": 0.38974541425704956, + "learning_rate": 1.4315308523339058e-05, + "loss": 0.4833, + "step": 26140 + }, + { + "epoch": 0.7177649643053268, + "grad_norm": 0.3655814826488495, + "learning_rate": 1.4314918909057372e-05, + "loss": 0.5154, + "step": 26141 + }, + { + "epoch": 0.7177924217462932, + "grad_norm": 0.3481271266937256, + "learning_rate": 1.4314529286726984e-05, + "loss": 0.4369, + "step": 26142 + }, + { + "epoch": 0.7178198791872598, + "grad_norm": 0.44555598497390747, + "learning_rate": 1.4314139656348625e-05, + "loss": 0.536, + "step": 26143 + }, + { + "epoch": 0.7178473366282262, + "grad_norm": 0.5637860894203186, + "learning_rate": 1.4313750017923014e-05, + "loss": 0.5152, + "step": 26144 + }, + { + "epoch": 0.7178747940691927, + "grad_norm": 0.36215564608573914, + "learning_rate": 1.4313360371450886e-05, + "loss": 0.4575, + "step": 26145 + }, + { + "epoch": 0.7179022515101593, + "grad_norm": 0.5309955477714539, + "learning_rate": 1.4312970716932964e-05, + "loss": 0.5299, + "step": 26146 + }, + { + "epoch": 0.7179297089511257, + "grad_norm": 0.3564058244228363, + "learning_rate": 1.4312581054369971e-05, + "loss": 0.4414, + "step": 26147 + }, + { + "epoch": 0.7179571663920923, + "grad_norm": 0.35266149044036865, + "learning_rate": 1.4312191383762643e-05, + "loss": 0.4349, + "step": 26148 + }, + { + "epoch": 0.7179846238330587, + "grad_norm": 0.4178427755832672, + "learning_rate": 1.4311801705111701e-05, + "loss": 0.5109, + "step": 26149 + }, + { + "epoch": 0.7180120812740253, + "grad_norm": 0.418233186006546, + "learning_rate": 1.431141201841787e-05, + "loss": 0.5721, + "step": 26150 + }, + { + "epoch": 0.7180395387149917, + "grad_norm": 0.5421519875526428, + "learning_rate": 1.431102232368188e-05, + "loss": 0.445, + "step": 26151 + }, + { + "epoch": 0.7180669961559583, + "grad_norm": 0.38479962944984436, + "learning_rate": 1.4310632620904458e-05, + "loss": 0.4325, + "step": 26152 + }, + { + "epoch": 0.7180944535969248, + "grad_norm": 0.397297203540802, + "learning_rate": 1.4310242910086331e-05, + "loss": 0.5345, + "step": 26153 + }, + { + "epoch": 0.7181219110378912, + "grad_norm": 0.45248350501060486, + "learning_rate": 1.4309853191228226e-05, + "loss": 0.601, + "step": 26154 + }, + { + "epoch": 0.7181493684788578, + "grad_norm": 0.3964867889881134, + "learning_rate": 1.4309463464330868e-05, + "loss": 0.4727, + "step": 26155 + }, + { + "epoch": 0.7181768259198242, + "grad_norm": 0.362616628408432, + "learning_rate": 1.4309073729394987e-05, + "loss": 0.4373, + "step": 26156 + }, + { + "epoch": 0.7182042833607908, + "grad_norm": 0.3739689588546753, + "learning_rate": 1.4308683986421305e-05, + "loss": 0.4566, + "step": 26157 + }, + { + "epoch": 0.7182317408017572, + "grad_norm": 0.4261053800582886, + "learning_rate": 1.4308294235410553e-05, + "loss": 0.4762, + "step": 26158 + }, + { + "epoch": 0.7182591982427238, + "grad_norm": 0.35194164514541626, + "learning_rate": 1.430790447636346e-05, + "loss": 0.4912, + "step": 26159 + }, + { + "epoch": 0.7182866556836903, + "grad_norm": 0.3615327775478363, + "learning_rate": 1.4307514709280748e-05, + "loss": 0.4551, + "step": 26160 + }, + { + "epoch": 0.7183141131246568, + "grad_norm": 0.332665354013443, + "learning_rate": 1.4307124934163149e-05, + "loss": 0.4949, + "step": 26161 + }, + { + "epoch": 0.7183415705656233, + "grad_norm": 0.4085514545440674, + "learning_rate": 1.4306735151011384e-05, + "loss": 0.5758, + "step": 26162 + }, + { + "epoch": 0.7183690280065897, + "grad_norm": 0.3623743951320648, + "learning_rate": 1.4306345359826183e-05, + "loss": 0.4946, + "step": 26163 + }, + { + "epoch": 0.7183964854475563, + "grad_norm": 0.37114787101745605, + "learning_rate": 1.4305955560608275e-05, + "loss": 0.5396, + "step": 26164 + }, + { + "epoch": 0.7184239428885227, + "grad_norm": 0.5669533610343933, + "learning_rate": 1.4305565753358386e-05, + "loss": 0.567, + "step": 26165 + }, + { + "epoch": 0.7184514003294893, + "grad_norm": 0.3448384702205658, + "learning_rate": 1.4305175938077242e-05, + "loss": 0.4902, + "step": 26166 + }, + { + "epoch": 0.7184788577704558, + "grad_norm": 0.38355425000190735, + "learning_rate": 1.4304786114765572e-05, + "loss": 0.4901, + "step": 26167 + }, + { + "epoch": 0.7185063152114223, + "grad_norm": 0.45099496841430664, + "learning_rate": 1.4304396283424101e-05, + "loss": 0.4608, + "step": 26168 + }, + { + "epoch": 0.7185337726523888, + "grad_norm": 0.43905749917030334, + "learning_rate": 1.4304006444053555e-05, + "loss": 0.5486, + "step": 26169 + }, + { + "epoch": 0.7185612300933553, + "grad_norm": 0.45028647780418396, + "learning_rate": 1.4303616596654665e-05, + "loss": 0.5635, + "step": 26170 + }, + { + "epoch": 0.7185886875343218, + "grad_norm": 0.44524693489074707, + "learning_rate": 1.4303226741228158e-05, + "loss": 0.5334, + "step": 26171 + }, + { + "epoch": 0.7186161449752883, + "grad_norm": 0.38128823041915894, + "learning_rate": 1.430283687777476e-05, + "loss": 0.4802, + "step": 26172 + }, + { + "epoch": 0.7186436024162548, + "grad_norm": 0.390337198972702, + "learning_rate": 1.4302447006295195e-05, + "loss": 0.463, + "step": 26173 + }, + { + "epoch": 0.7186710598572214, + "grad_norm": 0.40497079491615295, + "learning_rate": 1.4302057126790194e-05, + "loss": 0.6039, + "step": 26174 + }, + { + "epoch": 0.7186985172981878, + "grad_norm": 0.45977380871772766, + "learning_rate": 1.4301667239260484e-05, + "loss": 0.5255, + "step": 26175 + }, + { + "epoch": 0.7187259747391543, + "grad_norm": 0.35989248752593994, + "learning_rate": 1.4301277343706792e-05, + "loss": 0.4479, + "step": 26176 + }, + { + "epoch": 0.7187534321801208, + "grad_norm": 0.3945430815219879, + "learning_rate": 1.4300887440129848e-05, + "loss": 0.4702, + "step": 26177 + }, + { + "epoch": 0.7187808896210873, + "grad_norm": 0.304027259349823, + "learning_rate": 1.4300497528530368e-05, + "loss": 0.394, + "step": 26178 + }, + { + "epoch": 0.7188083470620538, + "grad_norm": 0.38538888096809387, + "learning_rate": 1.4300107608909095e-05, + "loss": 0.4594, + "step": 26179 + }, + { + "epoch": 0.7188358045030203, + "grad_norm": 0.4191279709339142, + "learning_rate": 1.4299717681266747e-05, + "loss": 0.5421, + "step": 26180 + }, + { + "epoch": 0.7188632619439869, + "grad_norm": 0.38736531138420105, + "learning_rate": 1.429932774560405e-05, + "loss": 0.5317, + "step": 26181 + }, + { + "epoch": 0.7188907193849533, + "grad_norm": 0.38019874691963196, + "learning_rate": 1.429893780192174e-05, + "loss": 0.5431, + "step": 26182 + }, + { + "epoch": 0.7189181768259199, + "grad_norm": 0.42159005999565125, + "learning_rate": 1.4298547850220534e-05, + "loss": 0.5049, + "step": 26183 + }, + { + "epoch": 0.7189456342668863, + "grad_norm": 0.5824725031852722, + "learning_rate": 1.4298157890501166e-05, + "loss": 0.4632, + "step": 26184 + }, + { + "epoch": 0.7189730917078528, + "grad_norm": 0.4003783166408539, + "learning_rate": 1.4297767922764363e-05, + "loss": 0.6163, + "step": 26185 + }, + { + "epoch": 0.7190005491488193, + "grad_norm": 0.43775486946105957, + "learning_rate": 1.4297377947010848e-05, + "loss": 0.6225, + "step": 26186 + }, + { + "epoch": 0.7190280065897858, + "grad_norm": 0.4314621686935425, + "learning_rate": 1.4296987963241356e-05, + "loss": 0.4625, + "step": 26187 + }, + { + "epoch": 0.7190554640307524, + "grad_norm": 0.39199307560920715, + "learning_rate": 1.4296597971456607e-05, + "loss": 0.4791, + "step": 26188 + }, + { + "epoch": 0.7190829214717188, + "grad_norm": 0.37466001510620117, + "learning_rate": 1.4296207971657331e-05, + "loss": 0.4416, + "step": 26189 + }, + { + "epoch": 0.7191103789126854, + "grad_norm": 0.3806830644607544, + "learning_rate": 1.4295817963844259e-05, + "loss": 0.4978, + "step": 26190 + }, + { + "epoch": 0.7191378363536518, + "grad_norm": 0.3389679789543152, + "learning_rate": 1.4295427948018112e-05, + "loss": 0.4997, + "step": 26191 + }, + { + "epoch": 0.7191652937946184, + "grad_norm": 0.36870241165161133, + "learning_rate": 1.4295037924179625e-05, + "loss": 0.463, + "step": 26192 + }, + { + "epoch": 0.7191927512355848, + "grad_norm": 0.36981818079948425, + "learning_rate": 1.4294647892329519e-05, + "loss": 0.4972, + "step": 26193 + }, + { + "epoch": 0.7192202086765513, + "grad_norm": 0.4398452639579773, + "learning_rate": 1.4294257852468522e-05, + "loss": 0.5845, + "step": 26194 + }, + { + "epoch": 0.7192476661175179, + "grad_norm": 0.3703881800174713, + "learning_rate": 1.4293867804597366e-05, + "loss": 0.5184, + "step": 26195 + }, + { + "epoch": 0.7192751235584843, + "grad_norm": 0.43894556164741516, + "learning_rate": 1.4293477748716776e-05, + "loss": 0.4012, + "step": 26196 + }, + { + "epoch": 0.7193025809994509, + "grad_norm": 0.4664405882358551, + "learning_rate": 1.429308768482748e-05, + "loss": 0.4257, + "step": 26197 + }, + { + "epoch": 0.7193300384404173, + "grad_norm": 0.58287113904953, + "learning_rate": 1.4292697612930205e-05, + "loss": 0.5843, + "step": 26198 + }, + { + "epoch": 0.7193574958813839, + "grad_norm": 0.4222634434700012, + "learning_rate": 1.4292307533025678e-05, + "loss": 0.4701, + "step": 26199 + }, + { + "epoch": 0.7193849533223503, + "grad_norm": 0.404619038105011, + "learning_rate": 1.4291917445114627e-05, + "loss": 0.4638, + "step": 26200 + }, + { + "epoch": 0.7194124107633169, + "grad_norm": 0.4166834354400635, + "learning_rate": 1.429152734919778e-05, + "loss": 0.4896, + "step": 26201 + }, + { + "epoch": 0.7194398682042834, + "grad_norm": 0.4430655837059021, + "learning_rate": 1.4291137245275868e-05, + "loss": 0.5691, + "step": 26202 + }, + { + "epoch": 0.7194673256452498, + "grad_norm": 0.4023185968399048, + "learning_rate": 1.4290747133349614e-05, + "loss": 0.4897, + "step": 26203 + }, + { + "epoch": 0.7194947830862164, + "grad_norm": 0.37929847836494446, + "learning_rate": 1.4290357013419745e-05, + "loss": 0.4634, + "step": 26204 + }, + { + "epoch": 0.7195222405271828, + "grad_norm": 0.36354824900627136, + "learning_rate": 1.4289966885486992e-05, + "loss": 0.4756, + "step": 26205 + }, + { + "epoch": 0.7195496979681494, + "grad_norm": 0.41010090708732605, + "learning_rate": 1.4289576749552081e-05, + "loss": 0.5014, + "step": 26206 + }, + { + "epoch": 0.7195771554091158, + "grad_norm": 0.3832513689994812, + "learning_rate": 1.4289186605615744e-05, + "loss": 0.5108, + "step": 26207 + }, + { + "epoch": 0.7196046128500824, + "grad_norm": 0.43626484274864197, + "learning_rate": 1.4288796453678702e-05, + "loss": 0.513, + "step": 26208 + }, + { + "epoch": 0.7196320702910489, + "grad_norm": 0.31584861874580383, + "learning_rate": 1.4288406293741685e-05, + "loss": 0.4582, + "step": 26209 + }, + { + "epoch": 0.7196595277320154, + "grad_norm": 0.3873485028743744, + "learning_rate": 1.4288016125805424e-05, + "loss": 0.5044, + "step": 26210 + }, + { + "epoch": 0.7196869851729819, + "grad_norm": 0.37661442160606384, + "learning_rate": 1.4287625949870643e-05, + "loss": 0.5308, + "step": 26211 + }, + { + "epoch": 0.7197144426139483, + "grad_norm": 0.34133851528167725, + "learning_rate": 1.428723576593807e-05, + "loss": 0.5497, + "step": 26212 + }, + { + "epoch": 0.7197419000549149, + "grad_norm": 0.3546529710292816, + "learning_rate": 1.4286845574008434e-05, + "loss": 0.4419, + "step": 26213 + }, + { + "epoch": 0.7197693574958813, + "grad_norm": 0.436663419008255, + "learning_rate": 1.4286455374082463e-05, + "loss": 0.5149, + "step": 26214 + }, + { + "epoch": 0.7197968149368479, + "grad_norm": 0.44005313515663147, + "learning_rate": 1.4286065166160888e-05, + "loss": 0.53, + "step": 26215 + }, + { + "epoch": 0.7198242723778144, + "grad_norm": 0.4178759753704071, + "learning_rate": 1.428567495024443e-05, + "loss": 0.5713, + "step": 26216 + }, + { + "epoch": 0.7198517298187809, + "grad_norm": 0.37635666131973267, + "learning_rate": 1.4285284726333822e-05, + "loss": 0.4466, + "step": 26217 + }, + { + "epoch": 0.7198791872597474, + "grad_norm": 0.3842974305152893, + "learning_rate": 1.4284894494429789e-05, + "loss": 0.5565, + "step": 26218 + }, + { + "epoch": 0.7199066447007139, + "grad_norm": 0.39772120118141174, + "learning_rate": 1.428450425453306e-05, + "loss": 0.4554, + "step": 26219 + }, + { + "epoch": 0.7199341021416804, + "grad_norm": 0.3840694725513458, + "learning_rate": 1.4284114006644365e-05, + "loss": 0.4778, + "step": 26220 + }, + { + "epoch": 0.7199615595826468, + "grad_norm": 0.3528640866279602, + "learning_rate": 1.4283723750764429e-05, + "loss": 0.4299, + "step": 26221 + }, + { + "epoch": 0.7199890170236134, + "grad_norm": 0.4639025926589966, + "learning_rate": 1.428333348689398e-05, + "loss": 0.5386, + "step": 26222 + }, + { + "epoch": 0.72001647446458, + "grad_norm": 0.37705302238464355, + "learning_rate": 1.4282943215033748e-05, + "loss": 0.5376, + "step": 26223 + }, + { + "epoch": 0.7200439319055464, + "grad_norm": 0.37812814116477966, + "learning_rate": 1.4282552935184462e-05, + "loss": 0.556, + "step": 26224 + }, + { + "epoch": 0.7200713893465129, + "grad_norm": 0.4301338195800781, + "learning_rate": 1.4282162647346844e-05, + "loss": 0.5536, + "step": 26225 + }, + { + "epoch": 0.7200988467874794, + "grad_norm": 0.3351184129714966, + "learning_rate": 1.428177235152163e-05, + "loss": 0.4926, + "step": 26226 + }, + { + "epoch": 0.7201263042284459, + "grad_norm": 0.3775240182876587, + "learning_rate": 1.428138204770954e-05, + "loss": 0.5207, + "step": 26227 + }, + { + "epoch": 0.7201537616694124, + "grad_norm": 0.41062137484550476, + "learning_rate": 1.428099173591131e-05, + "loss": 0.4929, + "step": 26228 + }, + { + "epoch": 0.7201812191103789, + "grad_norm": 0.4157417416572571, + "learning_rate": 1.4280601416127662e-05, + "loss": 0.5623, + "step": 26229 + }, + { + "epoch": 0.7202086765513455, + "grad_norm": 0.3604680001735687, + "learning_rate": 1.4280211088359324e-05, + "loss": 0.4481, + "step": 26230 + }, + { + "epoch": 0.7202361339923119, + "grad_norm": 0.3841487467288971, + "learning_rate": 1.427982075260703e-05, + "loss": 0.5473, + "step": 26231 + }, + { + "epoch": 0.7202635914332784, + "grad_norm": 0.3956283628940582, + "learning_rate": 1.4279430408871502e-05, + "loss": 0.4776, + "step": 26232 + }, + { + "epoch": 0.7202910488742449, + "grad_norm": 0.3674543499946594, + "learning_rate": 1.427904005715347e-05, + "loss": 0.4937, + "step": 26233 + }, + { + "epoch": 0.7203185063152114, + "grad_norm": 0.4042946696281433, + "learning_rate": 1.4278649697453664e-05, + "loss": 0.4807, + "step": 26234 + }, + { + "epoch": 0.7203459637561779, + "grad_norm": 0.4877842664718628, + "learning_rate": 1.427825932977281e-05, + "loss": 0.5296, + "step": 26235 + }, + { + "epoch": 0.7203734211971444, + "grad_norm": 0.41079410910606384, + "learning_rate": 1.4277868954111637e-05, + "loss": 0.5069, + "step": 26236 + }, + { + "epoch": 0.720400878638111, + "grad_norm": 0.43223297595977783, + "learning_rate": 1.4277478570470872e-05, + "loss": 0.4927, + "step": 26237 + }, + { + "epoch": 0.7204283360790774, + "grad_norm": 0.36788058280944824, + "learning_rate": 1.4277088178851249e-05, + "loss": 0.5992, + "step": 26238 + }, + { + "epoch": 0.720455793520044, + "grad_norm": 0.4175417721271515, + "learning_rate": 1.4276697779253488e-05, + "loss": 0.5572, + "step": 26239 + }, + { + "epoch": 0.7204832509610104, + "grad_norm": 0.38129922747612, + "learning_rate": 1.427630737167832e-05, + "loss": 0.52, + "step": 26240 + }, + { + "epoch": 0.720510708401977, + "grad_norm": 0.3569270670413971, + "learning_rate": 1.4275916956126475e-05, + "loss": 0.438, + "step": 26241 + }, + { + "epoch": 0.7205381658429434, + "grad_norm": 0.38436394929885864, + "learning_rate": 1.427552653259868e-05, + "loss": 0.4727, + "step": 26242 + }, + { + "epoch": 0.7205656232839099, + "grad_norm": 0.39693009853363037, + "learning_rate": 1.4275136101095664e-05, + "loss": 0.4976, + "step": 26243 + }, + { + "epoch": 0.7205930807248765, + "grad_norm": 0.4520387351512909, + "learning_rate": 1.4274745661618152e-05, + "loss": 0.5197, + "step": 26244 + }, + { + "epoch": 0.7206205381658429, + "grad_norm": 0.3893456757068634, + "learning_rate": 1.4274355214166877e-05, + "loss": 0.5074, + "step": 26245 + }, + { + "epoch": 0.7206479956068095, + "grad_norm": 0.3761766254901886, + "learning_rate": 1.4273964758742565e-05, + "loss": 0.4832, + "step": 26246 + }, + { + "epoch": 0.7206754530477759, + "grad_norm": 0.41545554995536804, + "learning_rate": 1.4273574295345947e-05, + "loss": 0.5893, + "step": 26247 + }, + { + "epoch": 0.7207029104887425, + "grad_norm": 0.43613356351852417, + "learning_rate": 1.4273183823977745e-05, + "loss": 0.5737, + "step": 26248 + }, + { + "epoch": 0.7207303679297089, + "grad_norm": 0.5463149547576904, + "learning_rate": 1.4272793344638693e-05, + "loss": 0.4763, + "step": 26249 + }, + { + "epoch": 0.7207578253706755, + "grad_norm": 0.36588752269744873, + "learning_rate": 1.4272402857329517e-05, + "loss": 0.5348, + "step": 26250 + }, + { + "epoch": 0.720785282811642, + "grad_norm": 0.5114498734474182, + "learning_rate": 1.427201236205095e-05, + "loss": 0.5291, + "step": 26251 + }, + { + "epoch": 0.7208127402526084, + "grad_norm": 0.3683733344078064, + "learning_rate": 1.4271621858803713e-05, + "loss": 0.4701, + "step": 26252 + }, + { + "epoch": 0.720840197693575, + "grad_norm": 0.3885875344276428, + "learning_rate": 1.4271231347588536e-05, + "loss": 0.5034, + "step": 26253 + }, + { + "epoch": 0.7208676551345414, + "grad_norm": 0.41927212476730347, + "learning_rate": 1.4270840828406154e-05, + "loss": 0.5062, + "step": 26254 + }, + { + "epoch": 0.720895112575508, + "grad_norm": 0.5036165714263916, + "learning_rate": 1.4270450301257288e-05, + "loss": 0.5628, + "step": 26255 + }, + { + "epoch": 0.7209225700164744, + "grad_norm": 0.4025128185749054, + "learning_rate": 1.4270059766142668e-05, + "loss": 0.4765, + "step": 26256 + }, + { + "epoch": 0.720950027457441, + "grad_norm": 0.3120341897010803, + "learning_rate": 1.4269669223063027e-05, + "loss": 0.5093, + "step": 26257 + }, + { + "epoch": 0.7209774848984075, + "grad_norm": 0.42347878217697144, + "learning_rate": 1.4269278672019087e-05, + "loss": 0.516, + "step": 26258 + }, + { + "epoch": 0.721004942339374, + "grad_norm": 0.37198248505592346, + "learning_rate": 1.4268888113011583e-05, + "loss": 0.4174, + "step": 26259 + }, + { + "epoch": 0.7210323997803405, + "grad_norm": 0.4406661093235016, + "learning_rate": 1.4268497546041235e-05, + "loss": 0.5398, + "step": 26260 + }, + { + "epoch": 0.7210598572213069, + "grad_norm": 0.3685888350009918, + "learning_rate": 1.4268106971108781e-05, + "loss": 0.4405, + "step": 26261 + }, + { + "epoch": 0.7210873146622735, + "grad_norm": 0.4906046986579895, + "learning_rate": 1.4267716388214947e-05, + "loss": 0.5081, + "step": 26262 + }, + { + "epoch": 0.7211147721032399, + "grad_norm": 0.34039852023124695, + "learning_rate": 1.4267325797360457e-05, + "loss": 0.4349, + "step": 26263 + }, + { + "epoch": 0.7211422295442065, + "grad_norm": 0.3763958215713501, + "learning_rate": 1.4266935198546042e-05, + "loss": 0.5451, + "step": 26264 + }, + { + "epoch": 0.721169686985173, + "grad_norm": 0.3860439360141754, + "learning_rate": 1.4266544591772432e-05, + "loss": 0.4771, + "step": 26265 + }, + { + "epoch": 0.7211971444261395, + "grad_norm": 0.3366050124168396, + "learning_rate": 1.4266153977040354e-05, + "loss": 0.4724, + "step": 26266 + }, + { + "epoch": 0.721224601867106, + "grad_norm": 0.3330809772014618, + "learning_rate": 1.4265763354350538e-05, + "loss": 0.4616, + "step": 26267 + }, + { + "epoch": 0.7212520593080725, + "grad_norm": 0.38502225279808044, + "learning_rate": 1.4265372723703709e-05, + "loss": 0.47, + "step": 26268 + }, + { + "epoch": 0.721279516749039, + "grad_norm": 0.35724079608917236, + "learning_rate": 1.4264982085100603e-05, + "loss": 0.4877, + "step": 26269 + }, + { + "epoch": 0.7213069741900054, + "grad_norm": 0.4185185134410858, + "learning_rate": 1.426459143854194e-05, + "loss": 0.4841, + "step": 26270 + }, + { + "epoch": 0.721334431630972, + "grad_norm": 0.39674848318099976, + "learning_rate": 1.4264200784028456e-05, + "loss": 0.4899, + "step": 26271 + }, + { + "epoch": 0.7213618890719385, + "grad_norm": 0.4386468529701233, + "learning_rate": 1.4263810121560878e-05, + "loss": 0.4655, + "step": 26272 + }, + { + "epoch": 0.721389346512905, + "grad_norm": 0.4280010461807251, + "learning_rate": 1.4263419451139929e-05, + "loss": 0.4762, + "step": 26273 + }, + { + "epoch": 0.7214168039538715, + "grad_norm": 0.368112176656723, + "learning_rate": 1.4263028772766342e-05, + "loss": 0.4967, + "step": 26274 + }, + { + "epoch": 0.721444261394838, + "grad_norm": 0.3781695067882538, + "learning_rate": 1.4262638086440848e-05, + "loss": 0.511, + "step": 26275 + }, + { + "epoch": 0.7214717188358045, + "grad_norm": 0.40472105145454407, + "learning_rate": 1.4262247392164175e-05, + "loss": 0.4738, + "step": 26276 + }, + { + "epoch": 0.721499176276771, + "grad_norm": 0.4131077826023102, + "learning_rate": 1.4261856689937046e-05, + "loss": 0.5635, + "step": 26277 + }, + { + "epoch": 0.7215266337177375, + "grad_norm": 0.38940805196762085, + "learning_rate": 1.4261465979760196e-05, + "loss": 0.4663, + "step": 26278 + }, + { + "epoch": 0.7215540911587041, + "grad_norm": 0.39356133341789246, + "learning_rate": 1.4261075261634354e-05, + "loss": 0.537, + "step": 26279 + }, + { + "epoch": 0.7215815485996705, + "grad_norm": 0.46857911348342896, + "learning_rate": 1.4260684535560244e-05, + "loss": 0.5535, + "step": 26280 + }, + { + "epoch": 0.721609006040637, + "grad_norm": 0.6023870706558228, + "learning_rate": 1.4260293801538598e-05, + "loss": 0.5408, + "step": 26281 + }, + { + "epoch": 0.7216364634816035, + "grad_norm": 0.396579384803772, + "learning_rate": 1.4259903059570144e-05, + "loss": 0.4941, + "step": 26282 + }, + { + "epoch": 0.72166392092257, + "grad_norm": 0.38739538192749023, + "learning_rate": 1.4259512309655612e-05, + "loss": 0.4934, + "step": 26283 + }, + { + "epoch": 0.7216913783635365, + "grad_norm": 0.400272399187088, + "learning_rate": 1.4259121551795732e-05, + "loss": 0.4158, + "step": 26284 + }, + { + "epoch": 0.721718835804503, + "grad_norm": 0.37733325362205505, + "learning_rate": 1.4258730785991226e-05, + "loss": 0.4928, + "step": 26285 + }, + { + "epoch": 0.7217462932454696, + "grad_norm": 0.3831734359264374, + "learning_rate": 1.4258340012242833e-05, + "loss": 0.4003, + "step": 26286 + }, + { + "epoch": 0.721773750686436, + "grad_norm": 0.3889709413051605, + "learning_rate": 1.4257949230551274e-05, + "loss": 0.5343, + "step": 26287 + }, + { + "epoch": 0.7218012081274026, + "grad_norm": 0.4269602596759796, + "learning_rate": 1.425755844091728e-05, + "loss": 0.5037, + "step": 26288 + }, + { + "epoch": 0.721828665568369, + "grad_norm": 0.3762735426425934, + "learning_rate": 1.4257167643341582e-05, + "loss": 0.4708, + "step": 26289 + }, + { + "epoch": 0.7218561230093355, + "grad_norm": 0.42619529366493225, + "learning_rate": 1.4256776837824908e-05, + "loss": 0.5279, + "step": 26290 + }, + { + "epoch": 0.721883580450302, + "grad_norm": 0.41751664876937866, + "learning_rate": 1.4256386024367986e-05, + "loss": 0.484, + "step": 26291 + }, + { + "epoch": 0.7219110378912685, + "grad_norm": 0.4200271666049957, + "learning_rate": 1.4255995202971544e-05, + "loss": 0.5013, + "step": 26292 + }, + { + "epoch": 0.7219384953322351, + "grad_norm": 0.3393397629261017, + "learning_rate": 1.4255604373636317e-05, + "loss": 0.4235, + "step": 26293 + }, + { + "epoch": 0.7219659527732015, + "grad_norm": 0.4236258566379547, + "learning_rate": 1.4255213536363023e-05, + "loss": 0.5022, + "step": 26294 + }, + { + "epoch": 0.7219934102141681, + "grad_norm": 0.3938595950603485, + "learning_rate": 1.4254822691152403e-05, + "loss": 0.4811, + "step": 26295 + }, + { + "epoch": 0.7220208676551345, + "grad_norm": 0.36631831526756287, + "learning_rate": 1.425443183800518e-05, + "loss": 0.5581, + "step": 26296 + }, + { + "epoch": 0.7220483250961011, + "grad_norm": 0.38633090257644653, + "learning_rate": 1.425404097692208e-05, + "loss": 0.5025, + "step": 26297 + }, + { + "epoch": 0.7220757825370675, + "grad_norm": 0.32096973061561584, + "learning_rate": 1.4253650107903843e-05, + "loss": 0.442, + "step": 26298 + }, + { + "epoch": 0.722103239978034, + "grad_norm": 0.41667404770851135, + "learning_rate": 1.4253259230951184e-05, + "loss": 0.4959, + "step": 26299 + }, + { + "epoch": 0.7221306974190006, + "grad_norm": 0.3770669102668762, + "learning_rate": 1.4252868346064843e-05, + "loss": 0.4902, + "step": 26300 + }, + { + "epoch": 0.722158154859967, + "grad_norm": 0.36138156056404114, + "learning_rate": 1.4252477453245546e-05, + "loss": 0.4942, + "step": 26301 + }, + { + "epoch": 0.7221856123009336, + "grad_norm": 0.3463192582130432, + "learning_rate": 1.4252086552494015e-05, + "loss": 0.4573, + "step": 26302 + }, + { + "epoch": 0.7222130697419, + "grad_norm": 0.33310213685035706, + "learning_rate": 1.4251695643810992e-05, + "loss": 0.389, + "step": 26303 + }, + { + "epoch": 0.7222405271828666, + "grad_norm": 0.38002684712409973, + "learning_rate": 1.4251304727197199e-05, + "loss": 0.4751, + "step": 26304 + }, + { + "epoch": 0.722267984623833, + "grad_norm": 0.38640910387039185, + "learning_rate": 1.4250913802653364e-05, + "loss": 0.4876, + "step": 26305 + }, + { + "epoch": 0.7222954420647996, + "grad_norm": 0.36124387383461, + "learning_rate": 1.4250522870180218e-05, + "loss": 0.5134, + "step": 26306 + }, + { + "epoch": 0.7223228995057661, + "grad_norm": 0.4217796325683594, + "learning_rate": 1.4250131929778489e-05, + "loss": 0.4264, + "step": 26307 + }, + { + "epoch": 0.7223503569467326, + "grad_norm": 0.46357014775276184, + "learning_rate": 1.424974098144891e-05, + "loss": 0.4757, + "step": 26308 + }, + { + "epoch": 0.7223778143876991, + "grad_norm": 0.5685821771621704, + "learning_rate": 1.4249350025192206e-05, + "loss": 0.5384, + "step": 26309 + }, + { + "epoch": 0.7224052718286655, + "grad_norm": 0.40026241540908813, + "learning_rate": 1.4248959061009108e-05, + "loss": 0.4831, + "step": 26310 + }, + { + "epoch": 0.7224327292696321, + "grad_norm": 0.35808658599853516, + "learning_rate": 1.4248568088900348e-05, + "loss": 0.4117, + "step": 26311 + }, + { + "epoch": 0.7224601867105985, + "grad_norm": 0.352862149477005, + "learning_rate": 1.4248177108866648e-05, + "loss": 0.4932, + "step": 26312 + }, + { + "epoch": 0.7224876441515651, + "grad_norm": 0.3899402618408203, + "learning_rate": 1.4247786120908745e-05, + "loss": 0.5392, + "step": 26313 + }, + { + "epoch": 0.7225151015925316, + "grad_norm": 0.36253198981285095, + "learning_rate": 1.4247395125027365e-05, + "loss": 0.5089, + "step": 26314 + }, + { + "epoch": 0.7225425590334981, + "grad_norm": 0.4078875780105591, + "learning_rate": 1.4247004121223236e-05, + "loss": 0.5274, + "step": 26315 + }, + { + "epoch": 0.7225700164744646, + "grad_norm": 0.36778631806373596, + "learning_rate": 1.4246613109497092e-05, + "loss": 0.5192, + "step": 26316 + }, + { + "epoch": 0.722597473915431, + "grad_norm": 0.38470765948295593, + "learning_rate": 1.4246222089849653e-05, + "loss": 0.4711, + "step": 26317 + }, + { + "epoch": 0.7226249313563976, + "grad_norm": 0.3613913953304291, + "learning_rate": 1.4245831062281662e-05, + "loss": 0.4448, + "step": 26318 + }, + { + "epoch": 0.722652388797364, + "grad_norm": 0.40048375725746155, + "learning_rate": 1.4245440026793836e-05, + "loss": 0.484, + "step": 26319 + }, + { + "epoch": 0.7226798462383306, + "grad_norm": 0.4068031311035156, + "learning_rate": 1.4245048983386908e-05, + "loss": 0.5118, + "step": 26320 + }, + { + "epoch": 0.7227073036792971, + "grad_norm": 0.3690464496612549, + "learning_rate": 1.4244657932061614e-05, + "loss": 0.4878, + "step": 26321 + }, + { + "epoch": 0.7227347611202636, + "grad_norm": 0.41541072726249695, + "learning_rate": 1.4244266872818677e-05, + "loss": 0.5736, + "step": 26322 + }, + { + "epoch": 0.7227622185612301, + "grad_norm": 0.38711100816726685, + "learning_rate": 1.4243875805658824e-05, + "loss": 0.4552, + "step": 26323 + }, + { + "epoch": 0.7227896760021966, + "grad_norm": 0.36797118186950684, + "learning_rate": 1.4243484730582793e-05, + "loss": 0.4941, + "step": 26324 + }, + { + "epoch": 0.7228171334431631, + "grad_norm": 0.3849070370197296, + "learning_rate": 1.4243093647591305e-05, + "loss": 0.4861, + "step": 26325 + }, + { + "epoch": 0.7228445908841296, + "grad_norm": 0.41786113381385803, + "learning_rate": 1.4242702556685095e-05, + "loss": 0.5294, + "step": 26326 + }, + { + "epoch": 0.7228720483250961, + "grad_norm": 0.4417918920516968, + "learning_rate": 1.424231145786489e-05, + "loss": 0.5263, + "step": 26327 + }, + { + "epoch": 0.7228995057660627, + "grad_norm": 0.41903501749038696, + "learning_rate": 1.424192035113142e-05, + "loss": 0.5224, + "step": 26328 + }, + { + "epoch": 0.7229269632070291, + "grad_norm": 0.34714922308921814, + "learning_rate": 1.4241529236485414e-05, + "loss": 0.4611, + "step": 26329 + }, + { + "epoch": 0.7229544206479956, + "grad_norm": 0.3831663727760315, + "learning_rate": 1.4241138113927602e-05, + "loss": 0.4607, + "step": 26330 + }, + { + "epoch": 0.7229818780889621, + "grad_norm": 0.48073646426200867, + "learning_rate": 1.4240746983458715e-05, + "loss": 0.4751, + "step": 26331 + }, + { + "epoch": 0.7230093355299286, + "grad_norm": 0.3660992681980133, + "learning_rate": 1.4240355845079483e-05, + "loss": 0.4304, + "step": 26332 + }, + { + "epoch": 0.7230367929708951, + "grad_norm": 0.5809625387191772, + "learning_rate": 1.4239964698790632e-05, + "loss": 0.4821, + "step": 26333 + }, + { + "epoch": 0.7230642504118616, + "grad_norm": 0.3969602882862091, + "learning_rate": 1.4239573544592892e-05, + "loss": 0.4137, + "step": 26334 + }, + { + "epoch": 0.7230917078528282, + "grad_norm": 0.39329320192337036, + "learning_rate": 1.4239182382486997e-05, + "loss": 0.5173, + "step": 26335 + }, + { + "epoch": 0.7231191652937946, + "grad_norm": 0.3918006718158722, + "learning_rate": 1.4238791212473673e-05, + "loss": 0.4828, + "step": 26336 + }, + { + "epoch": 0.7231466227347612, + "grad_norm": 0.37745431065559387, + "learning_rate": 1.423840003455365e-05, + "loss": 0.5126, + "step": 26337 + }, + { + "epoch": 0.7231740801757276, + "grad_norm": 0.42074286937713623, + "learning_rate": 1.4238008848727659e-05, + "loss": 0.5237, + "step": 26338 + }, + { + "epoch": 0.7232015376166941, + "grad_norm": 0.34583741426467896, + "learning_rate": 1.4237617654996428e-05, + "loss": 0.5167, + "step": 26339 + }, + { + "epoch": 0.7232289950576606, + "grad_norm": 0.8585256934165955, + "learning_rate": 1.4237226453360688e-05, + "loss": 0.4866, + "step": 26340 + }, + { + "epoch": 0.7232564524986271, + "grad_norm": 0.3312419652938843, + "learning_rate": 1.4236835243821168e-05, + "loss": 0.3702, + "step": 26341 + }, + { + "epoch": 0.7232839099395937, + "grad_norm": 0.4142915904521942, + "learning_rate": 1.42364440263786e-05, + "loss": 0.5588, + "step": 26342 + }, + { + "epoch": 0.7233113673805601, + "grad_norm": 0.3874225318431854, + "learning_rate": 1.4236052801033708e-05, + "loss": 0.5469, + "step": 26343 + }, + { + "epoch": 0.7233388248215267, + "grad_norm": 0.42192745208740234, + "learning_rate": 1.4235661567787228e-05, + "loss": 0.5976, + "step": 26344 + }, + { + "epoch": 0.7233662822624931, + "grad_norm": 0.3531978130340576, + "learning_rate": 1.423527032663989e-05, + "loss": 0.4392, + "step": 26345 + }, + { + "epoch": 0.7233937397034597, + "grad_norm": 0.3849564790725708, + "learning_rate": 1.4234879077592413e-05, + "loss": 0.4405, + "step": 26346 + }, + { + "epoch": 0.7234211971444261, + "grad_norm": 0.3676339089870453, + "learning_rate": 1.4234487820645542e-05, + "loss": 0.5264, + "step": 26347 + }, + { + "epoch": 0.7234486545853926, + "grad_norm": 0.3764999806880951, + "learning_rate": 1.4234096555799996e-05, + "loss": 0.5167, + "step": 26348 + }, + { + "epoch": 0.7234761120263592, + "grad_norm": 0.42065373063087463, + "learning_rate": 1.4233705283056511e-05, + "loss": 0.5164, + "step": 26349 + }, + { + "epoch": 0.7235035694673256, + "grad_norm": 0.37902164459228516, + "learning_rate": 1.4233314002415814e-05, + "loss": 0.4738, + "step": 26350 + }, + { + "epoch": 0.7235310269082922, + "grad_norm": 0.39323294162750244, + "learning_rate": 1.4232922713878633e-05, + "loss": 0.4883, + "step": 26351 + }, + { + "epoch": 0.7235584843492586, + "grad_norm": 0.3537800908088684, + "learning_rate": 1.4232531417445705e-05, + "loss": 0.4579, + "step": 26352 + }, + { + "epoch": 0.7235859417902252, + "grad_norm": 0.38359153270721436, + "learning_rate": 1.4232140113117753e-05, + "loss": 0.4997, + "step": 26353 + }, + { + "epoch": 0.7236133992311916, + "grad_norm": 0.4274642765522003, + "learning_rate": 1.4231748800895506e-05, + "loss": 0.4861, + "step": 26354 + }, + { + "epoch": 0.7236408566721582, + "grad_norm": 0.35325971245765686, + "learning_rate": 1.4231357480779702e-05, + "loss": 0.4932, + "step": 26355 + }, + { + "epoch": 0.7236683141131247, + "grad_norm": 0.4098687469959259, + "learning_rate": 1.4230966152771059e-05, + "loss": 0.5025, + "step": 26356 + }, + { + "epoch": 0.7236957715540911, + "grad_norm": 0.39559587836265564, + "learning_rate": 1.423057481687032e-05, + "loss": 0.4989, + "step": 26357 + }, + { + "epoch": 0.7237232289950577, + "grad_norm": 0.41673314571380615, + "learning_rate": 1.4230183473078208e-05, + "loss": 0.5464, + "step": 26358 + }, + { + "epoch": 0.7237506864360241, + "grad_norm": 0.34742486476898193, + "learning_rate": 1.422979212139545e-05, + "loss": 0.4663, + "step": 26359 + }, + { + "epoch": 0.7237781438769907, + "grad_norm": 0.3894139528274536, + "learning_rate": 1.4229400761822781e-05, + "loss": 0.5462, + "step": 26360 + }, + { + "epoch": 0.7238056013179571, + "grad_norm": 0.36407729983329773, + "learning_rate": 1.4229009394360929e-05, + "loss": 0.4054, + "step": 26361 + }, + { + "epoch": 0.7238330587589237, + "grad_norm": 0.35093462467193604, + "learning_rate": 1.4228618019010627e-05, + "loss": 0.4662, + "step": 26362 + }, + { + "epoch": 0.7238605161998902, + "grad_norm": 0.44433826208114624, + "learning_rate": 1.4228226635772605e-05, + "loss": 0.5486, + "step": 26363 + }, + { + "epoch": 0.7238879736408567, + "grad_norm": 0.37154632806777954, + "learning_rate": 1.4227835244647583e-05, + "loss": 0.4859, + "step": 26364 + }, + { + "epoch": 0.7239154310818232, + "grad_norm": 0.3874884247779846, + "learning_rate": 1.4227443845636306e-05, + "loss": 0.5098, + "step": 26365 + }, + { + "epoch": 0.7239428885227897, + "grad_norm": 0.35846519470214844, + "learning_rate": 1.4227052438739496e-05, + "loss": 0.4561, + "step": 26366 + }, + { + "epoch": 0.7239703459637562, + "grad_norm": 0.38736188411712646, + "learning_rate": 1.422666102395788e-05, + "loss": 0.5045, + "step": 26367 + }, + { + "epoch": 0.7239978034047226, + "grad_norm": 0.36723482608795166, + "learning_rate": 1.4226269601292196e-05, + "loss": 0.4816, + "step": 26368 + }, + { + "epoch": 0.7240252608456892, + "grad_norm": 0.37435993552207947, + "learning_rate": 1.4225878170743169e-05, + "loss": 0.4169, + "step": 26369 + }, + { + "epoch": 0.7240527182866557, + "grad_norm": 0.4375900328159332, + "learning_rate": 1.4225486732311531e-05, + "loss": 0.4907, + "step": 26370 + }, + { + "epoch": 0.7240801757276222, + "grad_norm": 0.3844525218009949, + "learning_rate": 1.4225095285998012e-05, + "loss": 0.5113, + "step": 26371 + }, + { + "epoch": 0.7241076331685887, + "grad_norm": 0.37552109360694885, + "learning_rate": 1.422470383180334e-05, + "loss": 0.4801, + "step": 26372 + }, + { + "epoch": 0.7241350906095552, + "grad_norm": 0.3887079656124115, + "learning_rate": 1.4224312369728248e-05, + "loss": 0.5155, + "step": 26373 + }, + { + "epoch": 0.7241625480505217, + "grad_norm": 0.40262892842292786, + "learning_rate": 1.4223920899773465e-05, + "loss": 0.446, + "step": 26374 + }, + { + "epoch": 0.7241900054914882, + "grad_norm": 0.40675559639930725, + "learning_rate": 1.4223529421939721e-05, + "loss": 0.5028, + "step": 26375 + }, + { + "epoch": 0.7242174629324547, + "grad_norm": 0.3714660704135895, + "learning_rate": 1.4223137936227748e-05, + "loss": 0.5056, + "step": 26376 + }, + { + "epoch": 0.7242449203734213, + "grad_norm": 0.40309402346611023, + "learning_rate": 1.4222746442638274e-05, + "loss": 0.5118, + "step": 26377 + }, + { + "epoch": 0.7242723778143877, + "grad_norm": 0.4292336702346802, + "learning_rate": 1.422235494117203e-05, + "loss": 0.4984, + "step": 26378 + }, + { + "epoch": 0.7242998352553542, + "grad_norm": 0.3892662525177002, + "learning_rate": 1.4221963431829745e-05, + "loss": 0.5143, + "step": 26379 + }, + { + "epoch": 0.7243272926963207, + "grad_norm": 0.38032689690589905, + "learning_rate": 1.4221571914612153e-05, + "loss": 0.4583, + "step": 26380 + }, + { + "epoch": 0.7243547501372872, + "grad_norm": 0.34581825137138367, + "learning_rate": 1.4221180389519984e-05, + "loss": 0.4716, + "step": 26381 + }, + { + "epoch": 0.7243822075782537, + "grad_norm": 0.37776103615760803, + "learning_rate": 1.4220788856553962e-05, + "loss": 0.4926, + "step": 26382 + }, + { + "epoch": 0.7244096650192202, + "grad_norm": 0.43873611092567444, + "learning_rate": 1.4220397315714825e-05, + "loss": 0.5137, + "step": 26383 + }, + { + "epoch": 0.7244371224601867, + "grad_norm": 0.36577561497688293, + "learning_rate": 1.4220005767003297e-05, + "loss": 0.5511, + "step": 26384 + }, + { + "epoch": 0.7244645799011532, + "grad_norm": 0.43140238523483276, + "learning_rate": 1.4219614210420111e-05, + "loss": 0.5508, + "step": 26385 + }, + { + "epoch": 0.7244920373421198, + "grad_norm": 0.3570996820926666, + "learning_rate": 1.4219222645966002e-05, + "loss": 0.5081, + "step": 26386 + }, + { + "epoch": 0.7245194947830862, + "grad_norm": 0.4277471899986267, + "learning_rate": 1.4218831073641693e-05, + "loss": 0.5797, + "step": 26387 + }, + { + "epoch": 0.7245469522240527, + "grad_norm": 0.4457041621208191, + "learning_rate": 1.4218439493447918e-05, + "loss": 0.572, + "step": 26388 + }, + { + "epoch": 0.7245744096650192, + "grad_norm": 0.4033837616443634, + "learning_rate": 1.4218047905385408e-05, + "loss": 0.4866, + "step": 26389 + }, + { + "epoch": 0.7246018671059857, + "grad_norm": 0.3534661829471588, + "learning_rate": 1.421765630945489e-05, + "loss": 0.479, + "step": 26390 + }, + { + "epoch": 0.7246293245469522, + "grad_norm": 0.41968095302581787, + "learning_rate": 1.42172647056571e-05, + "loss": 0.5048, + "step": 26391 + }, + { + "epoch": 0.7246567819879187, + "grad_norm": 0.3813953995704651, + "learning_rate": 1.4216873093992763e-05, + "loss": 0.4959, + "step": 26392 + }, + { + "epoch": 0.7246842394288853, + "grad_norm": 0.417678564786911, + "learning_rate": 1.4216481474462613e-05, + "loss": 0.5247, + "step": 26393 + }, + { + "epoch": 0.7247116968698517, + "grad_norm": 0.37071678042411804, + "learning_rate": 1.421608984706738e-05, + "loss": 0.473, + "step": 26394 + }, + { + "epoch": 0.7247391543108183, + "grad_norm": 0.39828938245773315, + "learning_rate": 1.4215698211807791e-05, + "loss": 0.5874, + "step": 26395 + }, + { + "epoch": 0.7247666117517847, + "grad_norm": 0.40159937739372253, + "learning_rate": 1.421530656868458e-05, + "loss": 0.5622, + "step": 26396 + }, + { + "epoch": 0.7247940691927512, + "grad_norm": 0.43385884165763855, + "learning_rate": 1.4214914917698482e-05, + "loss": 0.5691, + "step": 26397 + }, + { + "epoch": 0.7248215266337177, + "grad_norm": 0.4976484775543213, + "learning_rate": 1.4214523258850216e-05, + "loss": 0.5757, + "step": 26398 + }, + { + "epoch": 0.7248489840746842, + "grad_norm": 0.4641304612159729, + "learning_rate": 1.4214131592140521e-05, + "loss": 0.4672, + "step": 26399 + }, + { + "epoch": 0.7248764415156508, + "grad_norm": 0.3834673762321472, + "learning_rate": 1.4213739917570127e-05, + "loss": 0.5402, + "step": 26400 + }, + { + "epoch": 0.7249038989566172, + "grad_norm": 0.3498471975326538, + "learning_rate": 1.4213348235139761e-05, + "loss": 0.4811, + "step": 26401 + }, + { + "epoch": 0.7249313563975838, + "grad_norm": 0.42082828283309937, + "learning_rate": 1.4212956544850158e-05, + "loss": 0.5085, + "step": 26402 + }, + { + "epoch": 0.7249588138385502, + "grad_norm": 0.45856621861457825, + "learning_rate": 1.4212564846702045e-05, + "loss": 0.6257, + "step": 26403 + }, + { + "epoch": 0.7249862712795168, + "grad_norm": 0.3876584470272064, + "learning_rate": 1.4212173140696156e-05, + "loss": 0.5957, + "step": 26404 + }, + { + "epoch": 0.7250137287204832, + "grad_norm": 0.40300464630126953, + "learning_rate": 1.4211781426833216e-05, + "loss": 0.5562, + "step": 26405 + }, + { + "epoch": 0.7250411861614497, + "grad_norm": 0.39774537086486816, + "learning_rate": 1.4211389705113963e-05, + "loss": 0.4741, + "step": 26406 + }, + { + "epoch": 0.7250686436024163, + "grad_norm": 0.3788633346557617, + "learning_rate": 1.4210997975539123e-05, + "loss": 0.466, + "step": 26407 + }, + { + "epoch": 0.7250961010433827, + "grad_norm": 0.37986791133880615, + "learning_rate": 1.4210606238109426e-05, + "loss": 0.4441, + "step": 26408 + }, + { + "epoch": 0.7251235584843493, + "grad_norm": 0.36719101667404175, + "learning_rate": 1.4210214492825607e-05, + "loss": 0.4855, + "step": 26409 + }, + { + "epoch": 0.7251510159253157, + "grad_norm": 0.38099488615989685, + "learning_rate": 1.4209822739688393e-05, + "loss": 0.5348, + "step": 26410 + }, + { + "epoch": 0.7251784733662823, + "grad_norm": 0.8786234855651855, + "learning_rate": 1.4209430978698515e-05, + "loss": 0.4423, + "step": 26411 + }, + { + "epoch": 0.7252059308072487, + "grad_norm": 0.3889460861682892, + "learning_rate": 1.4209039209856708e-05, + "loss": 0.5789, + "step": 26412 + }, + { + "epoch": 0.7252333882482153, + "grad_norm": 0.3521285951137543, + "learning_rate": 1.4208647433163696e-05, + "loss": 0.5161, + "step": 26413 + }, + { + "epoch": 0.7252608456891818, + "grad_norm": 0.36205610632896423, + "learning_rate": 1.4208255648620215e-05, + "loss": 0.496, + "step": 26414 + }, + { + "epoch": 0.7252883031301482, + "grad_norm": 0.4542630910873413, + "learning_rate": 1.4207863856226994e-05, + "loss": 0.5158, + "step": 26415 + }, + { + "epoch": 0.7253157605711148, + "grad_norm": 0.4029260277748108, + "learning_rate": 1.4207472055984764e-05, + "loss": 0.5135, + "step": 26416 + }, + { + "epoch": 0.7253432180120812, + "grad_norm": 0.3832293450832367, + "learning_rate": 1.4207080247894257e-05, + "loss": 0.5343, + "step": 26417 + }, + { + "epoch": 0.7253706754530478, + "grad_norm": 0.44434553384780884, + "learning_rate": 1.4206688431956198e-05, + "loss": 0.4721, + "step": 26418 + }, + { + "epoch": 0.7253981328940142, + "grad_norm": 0.40917137265205383, + "learning_rate": 1.4206296608171325e-05, + "loss": 0.4936, + "step": 26419 + }, + { + "epoch": 0.7254255903349808, + "grad_norm": 0.41396626830101013, + "learning_rate": 1.4205904776540368e-05, + "loss": 0.5648, + "step": 26420 + }, + { + "epoch": 0.7254530477759473, + "grad_norm": 0.4792381525039673, + "learning_rate": 1.4205512937064055e-05, + "loss": 0.6015, + "step": 26421 + }, + { + "epoch": 0.7254805052169138, + "grad_norm": 0.4989106059074402, + "learning_rate": 1.4205121089743118e-05, + "loss": 0.4385, + "step": 26422 + }, + { + "epoch": 0.7255079626578803, + "grad_norm": 0.37714317440986633, + "learning_rate": 1.4204729234578288e-05, + "loss": 0.531, + "step": 26423 + }, + { + "epoch": 0.7255354200988468, + "grad_norm": 0.40086984634399414, + "learning_rate": 1.4204337371570296e-05, + "loss": 0.4412, + "step": 26424 + }, + { + "epoch": 0.7255628775398133, + "grad_norm": 0.4098984897136688, + "learning_rate": 1.4203945500719874e-05, + "loss": 0.485, + "step": 26425 + }, + { + "epoch": 0.7255903349807797, + "grad_norm": 0.3646160662174225, + "learning_rate": 1.4203553622027751e-05, + "loss": 0.4863, + "step": 26426 + }, + { + "epoch": 0.7256177924217463, + "grad_norm": 0.3762587904930115, + "learning_rate": 1.420316173549466e-05, + "loss": 0.4519, + "step": 26427 + }, + { + "epoch": 0.7256452498627128, + "grad_norm": 0.3749825656414032, + "learning_rate": 1.4202769841121329e-05, + "loss": 0.4246, + "step": 26428 + }, + { + "epoch": 0.7256727073036793, + "grad_norm": 0.32484063506126404, + "learning_rate": 1.4202377938908489e-05, + "loss": 0.5229, + "step": 26429 + }, + { + "epoch": 0.7257001647446458, + "grad_norm": 0.3740861713886261, + "learning_rate": 1.4201986028856878e-05, + "loss": 0.4569, + "step": 26430 + }, + { + "epoch": 0.7257276221856123, + "grad_norm": 0.5718126893043518, + "learning_rate": 1.4201594110967218e-05, + "loss": 0.5988, + "step": 26431 + }, + { + "epoch": 0.7257550796265788, + "grad_norm": 0.3922485113143921, + "learning_rate": 1.4201202185240247e-05, + "loss": 0.491, + "step": 26432 + }, + { + "epoch": 0.7257825370675453, + "grad_norm": 0.3829801678657532, + "learning_rate": 1.4200810251676691e-05, + "loss": 0.5354, + "step": 26433 + }, + { + "epoch": 0.7258099945085118, + "grad_norm": 0.4154652953147888, + "learning_rate": 1.4200418310277284e-05, + "loss": 0.5481, + "step": 26434 + }, + { + "epoch": 0.7258374519494784, + "grad_norm": 0.3663593828678131, + "learning_rate": 1.4200026361042756e-05, + "loss": 0.4595, + "step": 26435 + }, + { + "epoch": 0.7258649093904448, + "grad_norm": 0.37983477115631104, + "learning_rate": 1.4199634403973836e-05, + "loss": 0.3936, + "step": 26436 + }, + { + "epoch": 0.7258923668314113, + "grad_norm": 0.43202972412109375, + "learning_rate": 1.419924243907126e-05, + "loss": 0.519, + "step": 26437 + }, + { + "epoch": 0.7259198242723778, + "grad_norm": 0.3615007698535919, + "learning_rate": 1.4198850466335758e-05, + "loss": 0.4495, + "step": 26438 + }, + { + "epoch": 0.7259472817133443, + "grad_norm": 0.4083486795425415, + "learning_rate": 1.4198458485768057e-05, + "loss": 0.5636, + "step": 26439 + }, + { + "epoch": 0.7259747391543108, + "grad_norm": 0.38871923089027405, + "learning_rate": 1.4198066497368893e-05, + "loss": 0.5016, + "step": 26440 + }, + { + "epoch": 0.7260021965952773, + "grad_norm": 0.4324597716331482, + "learning_rate": 1.4197674501138993e-05, + "loss": 0.4988, + "step": 26441 + }, + { + "epoch": 0.7260296540362439, + "grad_norm": 0.40918484330177307, + "learning_rate": 1.4197282497079092e-05, + "loss": 0.5969, + "step": 26442 + }, + { + "epoch": 0.7260571114772103, + "grad_norm": 0.361136794090271, + "learning_rate": 1.419689048518992e-05, + "loss": 0.447, + "step": 26443 + }, + { + "epoch": 0.7260845689181769, + "grad_norm": 0.41849544644355774, + "learning_rate": 1.4196498465472206e-05, + "loss": 0.5575, + "step": 26444 + }, + { + "epoch": 0.7261120263591433, + "grad_norm": 0.38336381316185, + "learning_rate": 1.4196106437926685e-05, + "loss": 0.587, + "step": 26445 + }, + { + "epoch": 0.7261394838001098, + "grad_norm": 0.3666728734970093, + "learning_rate": 1.4195714402554082e-05, + "loss": 0.4826, + "step": 26446 + }, + { + "epoch": 0.7261669412410763, + "grad_norm": 0.44882965087890625, + "learning_rate": 1.4195322359355137e-05, + "loss": 0.4838, + "step": 26447 + }, + { + "epoch": 0.7261943986820428, + "grad_norm": 0.3574516177177429, + "learning_rate": 1.4194930308330576e-05, + "loss": 0.4894, + "step": 26448 + }, + { + "epoch": 0.7262218561230094, + "grad_norm": 0.3355248272418976, + "learning_rate": 1.4194538249481129e-05, + "loss": 0.4776, + "step": 26449 + }, + { + "epoch": 0.7262493135639758, + "grad_norm": 0.422640323638916, + "learning_rate": 1.4194146182807531e-05, + "loss": 0.5326, + "step": 26450 + }, + { + "epoch": 0.7262767710049424, + "grad_norm": 0.35390058159828186, + "learning_rate": 1.4193754108310512e-05, + "loss": 0.4399, + "step": 26451 + }, + { + "epoch": 0.7263042284459088, + "grad_norm": 0.39028510451316833, + "learning_rate": 1.4193362025990803e-05, + "loss": 0.5629, + "step": 26452 + }, + { + "epoch": 0.7263316858868754, + "grad_norm": 0.4464449882507324, + "learning_rate": 1.4192969935849134e-05, + "loss": 0.4543, + "step": 26453 + }, + { + "epoch": 0.7263591433278418, + "grad_norm": 0.4140593409538269, + "learning_rate": 1.4192577837886239e-05, + "loss": 0.4759, + "step": 26454 + }, + { + "epoch": 0.7263866007688083, + "grad_norm": 0.3974490761756897, + "learning_rate": 1.419218573210285e-05, + "loss": 0.5187, + "step": 26455 + }, + { + "epoch": 0.7264140582097749, + "grad_norm": 0.35428494215011597, + "learning_rate": 1.4191793618499692e-05, + "loss": 0.502, + "step": 26456 + }, + { + "epoch": 0.7264415156507413, + "grad_norm": 0.444654256105423, + "learning_rate": 1.4191401497077504e-05, + "loss": 0.4694, + "step": 26457 + }, + { + "epoch": 0.7264689730917079, + "grad_norm": 0.3769027292728424, + "learning_rate": 1.4191009367837015e-05, + "loss": 0.4875, + "step": 26458 + }, + { + "epoch": 0.7264964305326743, + "grad_norm": 0.3933315575122833, + "learning_rate": 1.4190617230778955e-05, + "loss": 0.5882, + "step": 26459 + }, + { + "epoch": 0.7265238879736409, + "grad_norm": 0.3647533357143402, + "learning_rate": 1.4190225085904057e-05, + "loss": 0.4623, + "step": 26460 + }, + { + "epoch": 0.7265513454146073, + "grad_norm": 0.45978331565856934, + "learning_rate": 1.418983293321305e-05, + "loss": 0.5435, + "step": 26461 + }, + { + "epoch": 0.7265788028555739, + "grad_norm": 0.37823987007141113, + "learning_rate": 1.4189440772706671e-05, + "loss": 0.4919, + "step": 26462 + }, + { + "epoch": 0.7266062602965404, + "grad_norm": 0.4116574823856354, + "learning_rate": 1.4189048604385645e-05, + "loss": 0.5491, + "step": 26463 + }, + { + "epoch": 0.7266337177375068, + "grad_norm": 0.44545435905456543, + "learning_rate": 1.4188656428250707e-05, + "loss": 0.4653, + "step": 26464 + }, + { + "epoch": 0.7266611751784734, + "grad_norm": 0.33950814604759216, + "learning_rate": 1.4188264244302587e-05, + "loss": 0.5353, + "step": 26465 + }, + { + "epoch": 0.7266886326194398, + "grad_norm": 0.3806222081184387, + "learning_rate": 1.4187872052542018e-05, + "loss": 0.492, + "step": 26466 + }, + { + "epoch": 0.7267160900604064, + "grad_norm": 0.4128456711769104, + "learning_rate": 1.4187479852969732e-05, + "loss": 0.5316, + "step": 26467 + }, + { + "epoch": 0.7267435475013728, + "grad_norm": 0.35050854086875916, + "learning_rate": 1.4187087645586459e-05, + "loss": 0.5336, + "step": 26468 + }, + { + "epoch": 0.7267710049423394, + "grad_norm": 0.41721194982528687, + "learning_rate": 1.4186695430392932e-05, + "loss": 0.4958, + "step": 26469 + }, + { + "epoch": 0.7267984623833059, + "grad_norm": 0.464465469121933, + "learning_rate": 1.418630320738988e-05, + "loss": 0.5014, + "step": 26470 + }, + { + "epoch": 0.7268259198242724, + "grad_norm": 0.35035431385040283, + "learning_rate": 1.4185910976578037e-05, + "loss": 0.4107, + "step": 26471 + }, + { + "epoch": 0.7268533772652389, + "grad_norm": 0.37879011034965515, + "learning_rate": 1.4185518737958135e-05, + "loss": 0.5833, + "step": 26472 + }, + { + "epoch": 0.7268808347062053, + "grad_norm": 0.3867303431034088, + "learning_rate": 1.4185126491530902e-05, + "loss": 0.4525, + "step": 26473 + }, + { + "epoch": 0.7269082921471719, + "grad_norm": 0.37824559211730957, + "learning_rate": 1.4184734237297076e-05, + "loss": 0.4968, + "step": 26474 + }, + { + "epoch": 0.7269357495881383, + "grad_norm": 0.3510550260543823, + "learning_rate": 1.418434197525738e-05, + "loss": 0.3955, + "step": 26475 + }, + { + "epoch": 0.7269632070291049, + "grad_norm": 0.403104305267334, + "learning_rate": 1.4183949705412556e-05, + "loss": 0.4759, + "step": 26476 + }, + { + "epoch": 0.7269906644700714, + "grad_norm": 0.4203472435474396, + "learning_rate": 1.418355742776333e-05, + "loss": 0.5627, + "step": 26477 + }, + { + "epoch": 0.7270181219110379, + "grad_norm": 0.3917500674724579, + "learning_rate": 1.4183165142310432e-05, + "loss": 0.4908, + "step": 26478 + }, + { + "epoch": 0.7270455793520044, + "grad_norm": 0.3582918345928192, + "learning_rate": 1.4182772849054596e-05, + "loss": 0.495, + "step": 26479 + }, + { + "epoch": 0.7270730367929709, + "grad_norm": 0.3930503726005554, + "learning_rate": 1.4182380547996553e-05, + "loss": 0.4926, + "step": 26480 + }, + { + "epoch": 0.7271004942339374, + "grad_norm": 0.36117374897003174, + "learning_rate": 1.4181988239137038e-05, + "loss": 0.5062, + "step": 26481 + }, + { + "epoch": 0.7271279516749038, + "grad_norm": 0.4584674537181854, + "learning_rate": 1.418159592247678e-05, + "loss": 0.4922, + "step": 26482 + }, + { + "epoch": 0.7271554091158704, + "grad_norm": 0.3861490488052368, + "learning_rate": 1.4181203598016508e-05, + "loss": 0.4786, + "step": 26483 + }, + { + "epoch": 0.727182866556837, + "grad_norm": 0.37113747000694275, + "learning_rate": 1.418081126575696e-05, + "loss": 0.4695, + "step": 26484 + }, + { + "epoch": 0.7272103239978034, + "grad_norm": 0.36588913202285767, + "learning_rate": 1.4180418925698862e-05, + "loss": 0.5311, + "step": 26485 + }, + { + "epoch": 0.7272377814387699, + "grad_norm": 0.3983186185359955, + "learning_rate": 1.4180026577842952e-05, + "loss": 0.5221, + "step": 26486 + }, + { + "epoch": 0.7272652388797364, + "grad_norm": 0.3702963590621948, + "learning_rate": 1.4179634222189956e-05, + "loss": 0.5131, + "step": 26487 + }, + { + "epoch": 0.7272926963207029, + "grad_norm": 0.42811235785484314, + "learning_rate": 1.4179241858740606e-05, + "loss": 0.5307, + "step": 26488 + }, + { + "epoch": 0.7273201537616694, + "grad_norm": 0.42560893297195435, + "learning_rate": 1.417884948749564e-05, + "loss": 0.5298, + "step": 26489 + }, + { + "epoch": 0.7273476112026359, + "grad_norm": 0.37565577030181885, + "learning_rate": 1.4178457108455785e-05, + "loss": 0.4312, + "step": 26490 + }, + { + "epoch": 0.7273750686436025, + "grad_norm": 0.4072927236557007, + "learning_rate": 1.4178064721621771e-05, + "loss": 0.4909, + "step": 26491 + }, + { + "epoch": 0.7274025260845689, + "grad_norm": 0.39482831954956055, + "learning_rate": 1.4177672326994337e-05, + "loss": 0.5077, + "step": 26492 + }, + { + "epoch": 0.7274299835255355, + "grad_norm": 0.3971184194087982, + "learning_rate": 1.4177279924574207e-05, + "loss": 0.5124, + "step": 26493 + }, + { + "epoch": 0.7274574409665019, + "grad_norm": 0.3772537410259247, + "learning_rate": 1.4176887514362122e-05, + "loss": 0.4509, + "step": 26494 + }, + { + "epoch": 0.7274848984074684, + "grad_norm": 0.39377671480178833, + "learning_rate": 1.4176495096358804e-05, + "loss": 0.4754, + "step": 26495 + }, + { + "epoch": 0.7275123558484349, + "grad_norm": 0.3578818142414093, + "learning_rate": 1.417610267056499e-05, + "loss": 0.5896, + "step": 26496 + }, + { + "epoch": 0.7275398132894014, + "grad_norm": 0.3803613781929016, + "learning_rate": 1.4175710236981412e-05, + "loss": 0.4972, + "step": 26497 + }, + { + "epoch": 0.727567270730368, + "grad_norm": 0.3479051887989044, + "learning_rate": 1.4175317795608802e-05, + "loss": 0.4934, + "step": 26498 + }, + { + "epoch": 0.7275947281713344, + "grad_norm": 0.3572935461997986, + "learning_rate": 1.4174925346447892e-05, + "loss": 0.4743, + "step": 26499 + }, + { + "epoch": 0.727622185612301, + "grad_norm": 0.43500110507011414, + "learning_rate": 1.4174532889499415e-05, + "loss": 0.5439, + "step": 26500 + }, + { + "epoch": 0.7276496430532674, + "grad_norm": 0.42536523938179016, + "learning_rate": 1.41741404247641e-05, + "loss": 0.5395, + "step": 26501 + }, + { + "epoch": 0.727677100494234, + "grad_norm": 0.3965587615966797, + "learning_rate": 1.417374795224268e-05, + "loss": 0.5299, + "step": 26502 + }, + { + "epoch": 0.7277045579352004, + "grad_norm": 0.3151324987411499, + "learning_rate": 1.417335547193589e-05, + "loss": 0.3839, + "step": 26503 + }, + { + "epoch": 0.7277320153761669, + "grad_norm": 0.36532846093177795, + "learning_rate": 1.4172962983844461e-05, + "loss": 0.4491, + "step": 26504 + }, + { + "epoch": 0.7277594728171335, + "grad_norm": 0.37355268001556396, + "learning_rate": 1.4172570487969122e-05, + "loss": 0.5088, + "step": 26505 + }, + { + "epoch": 0.7277869302580999, + "grad_norm": 0.3800819218158722, + "learning_rate": 1.4172177984310606e-05, + "loss": 0.4695, + "step": 26506 + }, + { + "epoch": 0.7278143876990665, + "grad_norm": 0.44021841883659363, + "learning_rate": 1.417178547286965e-05, + "loss": 0.5277, + "step": 26507 + }, + { + "epoch": 0.7278418451400329, + "grad_norm": 0.41313350200653076, + "learning_rate": 1.4171392953646981e-05, + "loss": 0.469, + "step": 26508 + }, + { + "epoch": 0.7278693025809995, + "grad_norm": 0.34809181094169617, + "learning_rate": 1.4171000426643332e-05, + "loss": 0.4887, + "step": 26509 + }, + { + "epoch": 0.7278967600219659, + "grad_norm": 0.4057377576828003, + "learning_rate": 1.4170607891859435e-05, + "loss": 0.5353, + "step": 26510 + }, + { + "epoch": 0.7279242174629325, + "grad_norm": 0.3681914210319519, + "learning_rate": 1.4170215349296026e-05, + "loss": 0.5192, + "step": 26511 + }, + { + "epoch": 0.727951674903899, + "grad_norm": 0.384828120470047, + "learning_rate": 1.4169822798953834e-05, + "loss": 0.5605, + "step": 26512 + }, + { + "epoch": 0.7279791323448654, + "grad_norm": 0.3783910870552063, + "learning_rate": 1.4169430240833592e-05, + "loss": 0.3986, + "step": 26513 + }, + { + "epoch": 0.728006589785832, + "grad_norm": 0.34458988904953003, + "learning_rate": 1.4169037674936028e-05, + "loss": 0.5224, + "step": 26514 + }, + { + "epoch": 0.7280340472267984, + "grad_norm": 0.3403850793838501, + "learning_rate": 1.4168645101261882e-05, + "loss": 0.4469, + "step": 26515 + }, + { + "epoch": 0.728061504667765, + "grad_norm": 0.49941548705101013, + "learning_rate": 1.416825251981188e-05, + "loss": 0.6053, + "step": 26516 + }, + { + "epoch": 0.7280889621087314, + "grad_norm": 0.38647252321243286, + "learning_rate": 1.4167859930586759e-05, + "loss": 0.5266, + "step": 26517 + }, + { + "epoch": 0.728116419549698, + "grad_norm": 0.6274833083152771, + "learning_rate": 1.4167467333587247e-05, + "loss": 0.4544, + "step": 26518 + }, + { + "epoch": 0.7281438769906645, + "grad_norm": 0.45255839824676514, + "learning_rate": 1.4167074728814081e-05, + "loss": 0.6464, + "step": 26519 + }, + { + "epoch": 0.728171334431631, + "grad_norm": 0.3711800277233124, + "learning_rate": 1.416668211626799e-05, + "loss": 0.4887, + "step": 26520 + }, + { + "epoch": 0.7281987918725975, + "grad_norm": 0.37301263213157654, + "learning_rate": 1.4166289495949705e-05, + "loss": 0.5297, + "step": 26521 + }, + { + "epoch": 0.728226249313564, + "grad_norm": 0.41086140275001526, + "learning_rate": 1.4165896867859961e-05, + "loss": 0.4161, + "step": 26522 + }, + { + "epoch": 0.7282537067545305, + "grad_norm": 0.3640088737010956, + "learning_rate": 1.416550423199949e-05, + "loss": 0.5215, + "step": 26523 + }, + { + "epoch": 0.7282811641954969, + "grad_norm": 0.39346370100975037, + "learning_rate": 1.4165111588369022e-05, + "loss": 0.5225, + "step": 26524 + }, + { + "epoch": 0.7283086216364635, + "grad_norm": 0.3535346984863281, + "learning_rate": 1.4164718936969296e-05, + "loss": 0.4657, + "step": 26525 + }, + { + "epoch": 0.72833607907743, + "grad_norm": 0.3901219964027405, + "learning_rate": 1.4164326277801039e-05, + "loss": 0.5341, + "step": 26526 + }, + { + "epoch": 0.7283635365183965, + "grad_norm": 0.4328969120979309, + "learning_rate": 1.4163933610864981e-05, + "loss": 0.5041, + "step": 26527 + }, + { + "epoch": 0.728390993959363, + "grad_norm": 0.36454060673713684, + "learning_rate": 1.416354093616186e-05, + "loss": 0.5042, + "step": 26528 + }, + { + "epoch": 0.7284184514003295, + "grad_norm": 0.41108620166778564, + "learning_rate": 1.4163148253692407e-05, + "loss": 0.4659, + "step": 26529 + }, + { + "epoch": 0.728445908841296, + "grad_norm": 0.45896053314208984, + "learning_rate": 1.4162755563457353e-05, + "loss": 0.5698, + "step": 26530 + }, + { + "epoch": 0.7284733662822624, + "grad_norm": 0.38824862241744995, + "learning_rate": 1.4162362865457433e-05, + "loss": 0.5385, + "step": 26531 + }, + { + "epoch": 0.728500823723229, + "grad_norm": 0.40727198123931885, + "learning_rate": 1.4161970159693375e-05, + "loss": 0.5304, + "step": 26532 + }, + { + "epoch": 0.7285282811641955, + "grad_norm": 0.4097726345062256, + "learning_rate": 1.4161577446165915e-05, + "loss": 0.5577, + "step": 26533 + }, + { + "epoch": 0.728555738605162, + "grad_norm": 0.4027020335197449, + "learning_rate": 1.4161184724875786e-05, + "loss": 0.4932, + "step": 26534 + }, + { + "epoch": 0.7285831960461285, + "grad_norm": 0.40310585498809814, + "learning_rate": 1.4160791995823717e-05, + "loss": 0.523, + "step": 26535 + }, + { + "epoch": 0.728610653487095, + "grad_norm": 0.39297324419021606, + "learning_rate": 1.4160399259010447e-05, + "loss": 0.4424, + "step": 26536 + }, + { + "epoch": 0.7286381109280615, + "grad_norm": 0.39155375957489014, + "learning_rate": 1.41600065144367e-05, + "loss": 0.5157, + "step": 26537 + }, + { + "epoch": 0.728665568369028, + "grad_norm": 0.4629780650138855, + "learning_rate": 1.4159613762103215e-05, + "loss": 0.4922, + "step": 26538 + }, + { + "epoch": 0.7286930258099945, + "grad_norm": 0.37035366892814636, + "learning_rate": 1.4159221002010726e-05, + "loss": 0.4656, + "step": 26539 + }, + { + "epoch": 0.7287204832509611, + "grad_norm": 0.3666989207267761, + "learning_rate": 1.4158828234159958e-05, + "loss": 0.5405, + "step": 26540 + }, + { + "epoch": 0.7287479406919275, + "grad_norm": 0.38648420572280884, + "learning_rate": 1.415843545855165e-05, + "loss": 0.4527, + "step": 26541 + }, + { + "epoch": 0.728775398132894, + "grad_norm": 0.39043840765953064, + "learning_rate": 1.4158042675186529e-05, + "loss": 0.5284, + "step": 26542 + }, + { + "epoch": 0.7288028555738605, + "grad_norm": 0.35405468940734863, + "learning_rate": 1.4157649884065337e-05, + "loss": 0.4679, + "step": 26543 + }, + { + "epoch": 0.728830313014827, + "grad_norm": 0.338649719953537, + "learning_rate": 1.4157257085188799e-05, + "loss": 0.4629, + "step": 26544 + }, + { + "epoch": 0.7288577704557935, + "grad_norm": 0.37344124913215637, + "learning_rate": 1.4156864278557647e-05, + "loss": 0.467, + "step": 26545 + }, + { + "epoch": 0.72888522789676, + "grad_norm": 0.3812141716480255, + "learning_rate": 1.4156471464172618e-05, + "loss": 0.4193, + "step": 26546 + }, + { + "epoch": 0.7289126853377266, + "grad_norm": 0.3628295063972473, + "learning_rate": 1.4156078642034443e-05, + "loss": 0.5669, + "step": 26547 + }, + { + "epoch": 0.728940142778693, + "grad_norm": 0.4500172436237335, + "learning_rate": 1.4155685812143856e-05, + "loss": 0.5307, + "step": 26548 + }, + { + "epoch": 0.7289676002196596, + "grad_norm": 0.38682615756988525, + "learning_rate": 1.4155292974501588e-05, + "loss": 0.5448, + "step": 26549 + }, + { + "epoch": 0.728995057660626, + "grad_norm": 0.4714806079864502, + "learning_rate": 1.4154900129108373e-05, + "loss": 0.5412, + "step": 26550 + }, + { + "epoch": 0.7290225151015925, + "grad_norm": 0.4793491065502167, + "learning_rate": 1.415450727596494e-05, + "loss": 0.4279, + "step": 26551 + }, + { + "epoch": 0.729049972542559, + "grad_norm": 0.3594540059566498, + "learning_rate": 1.4154114415072032e-05, + "loss": 0.5011, + "step": 26552 + }, + { + "epoch": 0.7290774299835255, + "grad_norm": 0.3965149223804474, + "learning_rate": 1.4153721546430369e-05, + "loss": 0.4649, + "step": 26553 + }, + { + "epoch": 0.7291048874244921, + "grad_norm": 0.35032832622528076, + "learning_rate": 1.4153328670040692e-05, + "loss": 0.4267, + "step": 26554 + }, + { + "epoch": 0.7291323448654585, + "grad_norm": 0.3988913297653198, + "learning_rate": 1.4152935785903727e-05, + "loss": 0.4928, + "step": 26555 + }, + { + "epoch": 0.7291598023064251, + "grad_norm": 0.4012396037578583, + "learning_rate": 1.4152542894020217e-05, + "loss": 0.4708, + "step": 26556 + }, + { + "epoch": 0.7291872597473915, + "grad_norm": 0.36177119612693787, + "learning_rate": 1.4152149994390887e-05, + "loss": 0.5796, + "step": 26557 + }, + { + "epoch": 0.7292147171883581, + "grad_norm": 0.40994319319725037, + "learning_rate": 1.4151757087016472e-05, + "loss": 0.4861, + "step": 26558 + }, + { + "epoch": 0.7292421746293245, + "grad_norm": 0.38693487644195557, + "learning_rate": 1.4151364171897703e-05, + "loss": 0.4376, + "step": 26559 + }, + { + "epoch": 0.729269632070291, + "grad_norm": 0.7607212066650391, + "learning_rate": 1.4150971249035316e-05, + "loss": 0.4897, + "step": 26560 + }, + { + "epoch": 0.7292970895112576, + "grad_norm": 0.6499704718589783, + "learning_rate": 1.4150578318430044e-05, + "loss": 0.5245, + "step": 26561 + }, + { + "epoch": 0.729324546952224, + "grad_norm": 0.42308396100997925, + "learning_rate": 1.4150185380082619e-05, + "loss": 0.4657, + "step": 26562 + }, + { + "epoch": 0.7293520043931906, + "grad_norm": 0.3374638855457306, + "learning_rate": 1.4149792433993773e-05, + "loss": 0.4844, + "step": 26563 + }, + { + "epoch": 0.729379461834157, + "grad_norm": 0.3751237094402313, + "learning_rate": 1.414939948016424e-05, + "loss": 0.5307, + "step": 26564 + }, + { + "epoch": 0.7294069192751236, + "grad_norm": 0.35915347933769226, + "learning_rate": 1.4149006518594752e-05, + "loss": 0.5347, + "step": 26565 + }, + { + "epoch": 0.72943437671609, + "grad_norm": 0.4206906855106354, + "learning_rate": 1.4148613549286042e-05, + "loss": 0.5219, + "step": 26566 + }, + { + "epoch": 0.7294618341570566, + "grad_norm": 0.3614136278629303, + "learning_rate": 1.4148220572238846e-05, + "loss": 0.4042, + "step": 26567 + }, + { + "epoch": 0.7294892915980231, + "grad_norm": 0.41736340522766113, + "learning_rate": 1.414782758745389e-05, + "loss": 0.5065, + "step": 26568 + }, + { + "epoch": 0.7295167490389896, + "grad_norm": 0.4216974377632141, + "learning_rate": 1.4147434594931918e-05, + "loss": 0.5325, + "step": 26569 + }, + { + "epoch": 0.7295442064799561, + "grad_norm": 0.36988237500190735, + "learning_rate": 1.4147041594673654e-05, + "loss": 0.5293, + "step": 26570 + }, + { + "epoch": 0.7295716639209225, + "grad_norm": 0.42117854952812195, + "learning_rate": 1.4146648586679831e-05, + "loss": 0.4906, + "step": 26571 + }, + { + "epoch": 0.7295991213618891, + "grad_norm": 0.41042831540107727, + "learning_rate": 1.4146255570951188e-05, + "loss": 0.4677, + "step": 26572 + }, + { + "epoch": 0.7296265788028555, + "grad_norm": 0.42743924260139465, + "learning_rate": 1.4145862547488454e-05, + "loss": 0.4856, + "step": 26573 + }, + { + "epoch": 0.7296540362438221, + "grad_norm": 0.39331531524658203, + "learning_rate": 1.4145469516292366e-05, + "loss": 0.5033, + "step": 26574 + }, + { + "epoch": 0.7296814936847886, + "grad_norm": 0.3990926444530487, + "learning_rate": 1.4145076477363651e-05, + "loss": 0.5321, + "step": 26575 + }, + { + "epoch": 0.7297089511257551, + "grad_norm": 0.3844515383243561, + "learning_rate": 1.4144683430703045e-05, + "loss": 0.559, + "step": 26576 + }, + { + "epoch": 0.7297364085667216, + "grad_norm": 0.37143275141716003, + "learning_rate": 1.4144290376311282e-05, + "loss": 0.509, + "step": 26577 + }, + { + "epoch": 0.7297638660076881, + "grad_norm": 0.3957463204860687, + "learning_rate": 1.4143897314189096e-05, + "loss": 0.5124, + "step": 26578 + }, + { + "epoch": 0.7297913234486546, + "grad_norm": 0.36681613326072693, + "learning_rate": 1.4143504244337218e-05, + "loss": 0.5988, + "step": 26579 + }, + { + "epoch": 0.729818780889621, + "grad_norm": 0.40924209356307983, + "learning_rate": 1.4143111166756385e-05, + "loss": 0.5429, + "step": 26580 + }, + { + "epoch": 0.7298462383305876, + "grad_norm": 0.3682584762573242, + "learning_rate": 1.4142718081447324e-05, + "loss": 0.4523, + "step": 26581 + }, + { + "epoch": 0.7298736957715541, + "grad_norm": 0.3872680366039276, + "learning_rate": 1.4142324988410772e-05, + "loss": 0.4398, + "step": 26582 + }, + { + "epoch": 0.7299011532125206, + "grad_norm": 0.3861006200313568, + "learning_rate": 1.4141931887647465e-05, + "loss": 0.5088, + "step": 26583 + }, + { + "epoch": 0.7299286106534871, + "grad_norm": 0.3463515639305115, + "learning_rate": 1.4141538779158128e-05, + "loss": 0.4563, + "step": 26584 + }, + { + "epoch": 0.7299560680944536, + "grad_norm": 0.4558379650115967, + "learning_rate": 1.4141145662943503e-05, + "loss": 0.5964, + "step": 26585 + }, + { + "epoch": 0.7299835255354201, + "grad_norm": 0.34973520040512085, + "learning_rate": 1.4140752539004316e-05, + "loss": 0.4399, + "step": 26586 + }, + { + "epoch": 0.7300109829763866, + "grad_norm": 0.3631007969379425, + "learning_rate": 1.414035940734131e-05, + "loss": 0.5333, + "step": 26587 + }, + { + "epoch": 0.7300384404173531, + "grad_norm": 0.4125973582267761, + "learning_rate": 1.4139966267955207e-05, + "loss": 0.5464, + "step": 26588 + }, + { + "epoch": 0.7300658978583197, + "grad_norm": 0.34898966550827026, + "learning_rate": 1.4139573120846745e-05, + "loss": 0.4814, + "step": 26589 + }, + { + "epoch": 0.7300933552992861, + "grad_norm": 0.3678034245967865, + "learning_rate": 1.4139179966016662e-05, + "loss": 0.4951, + "step": 26590 + }, + { + "epoch": 0.7301208127402526, + "grad_norm": 0.3277418315410614, + "learning_rate": 1.4138786803465685e-05, + "loss": 0.3801, + "step": 26591 + }, + { + "epoch": 0.7301482701812191, + "grad_norm": 0.36244693398475647, + "learning_rate": 1.413839363319455e-05, + "loss": 0.4644, + "step": 26592 + }, + { + "epoch": 0.7301757276221856, + "grad_norm": 0.40801718831062317, + "learning_rate": 1.413800045520399e-05, + "loss": 0.5557, + "step": 26593 + }, + { + "epoch": 0.7302031850631521, + "grad_norm": 0.4273216724395752, + "learning_rate": 1.413760726949474e-05, + "loss": 0.5198, + "step": 26594 + }, + { + "epoch": 0.7302306425041186, + "grad_norm": 0.4020334780216217, + "learning_rate": 1.4137214076067532e-05, + "loss": 0.5638, + "step": 26595 + }, + { + "epoch": 0.7302580999450852, + "grad_norm": 0.3610541820526123, + "learning_rate": 1.4136820874923094e-05, + "loss": 0.5159, + "step": 26596 + }, + { + "epoch": 0.7302855573860516, + "grad_norm": 0.39594796299934387, + "learning_rate": 1.4136427666062169e-05, + "loss": 0.5206, + "step": 26597 + }, + { + "epoch": 0.7303130148270182, + "grad_norm": 0.39101001620292664, + "learning_rate": 1.4136034449485487e-05, + "loss": 0.4311, + "step": 26598 + }, + { + "epoch": 0.7303404722679846, + "grad_norm": 0.3728178143501282, + "learning_rate": 1.413564122519378e-05, + "loss": 0.5061, + "step": 26599 + }, + { + "epoch": 0.7303679297089511, + "grad_norm": 0.376911461353302, + "learning_rate": 1.4135247993187781e-05, + "loss": 0.5727, + "step": 26600 + }, + { + "epoch": 0.7303953871499176, + "grad_norm": 0.40042656660079956, + "learning_rate": 1.4134854753468226e-05, + "loss": 0.4835, + "step": 26601 + }, + { + "epoch": 0.7304228445908841, + "grad_norm": 0.44072577357292175, + "learning_rate": 1.4134461506035846e-05, + "loss": 0.5519, + "step": 26602 + }, + { + "epoch": 0.7304503020318507, + "grad_norm": 0.3764120638370514, + "learning_rate": 1.4134068250891377e-05, + "loss": 0.4893, + "step": 26603 + }, + { + "epoch": 0.7304777594728171, + "grad_norm": 0.36523425579071045, + "learning_rate": 1.4133674988035548e-05, + "loss": 0.4685, + "step": 26604 + }, + { + "epoch": 0.7305052169137837, + "grad_norm": 0.34030604362487793, + "learning_rate": 1.41332817174691e-05, + "loss": 0.4473, + "step": 26605 + }, + { + "epoch": 0.7305326743547501, + "grad_norm": 0.3979475200176239, + "learning_rate": 1.413288843919276e-05, + "loss": 0.5462, + "step": 26606 + }, + { + "epoch": 0.7305601317957167, + "grad_norm": 0.3781273663043976, + "learning_rate": 1.4132495153207264e-05, + "loss": 0.5008, + "step": 26607 + }, + { + "epoch": 0.7305875892366831, + "grad_norm": 0.41187605261802673, + "learning_rate": 1.4132101859513347e-05, + "loss": 0.4673, + "step": 26608 + }, + { + "epoch": 0.7306150466776496, + "grad_norm": 0.42054057121276855, + "learning_rate": 1.4131708558111741e-05, + "loss": 0.5041, + "step": 26609 + }, + { + "epoch": 0.7306425041186162, + "grad_norm": 0.36657437682151794, + "learning_rate": 1.413131524900318e-05, + "loss": 0.5214, + "step": 26610 + }, + { + "epoch": 0.7306699615595826, + "grad_norm": 0.3942070007324219, + "learning_rate": 1.4130921932188398e-05, + "loss": 0.5069, + "step": 26611 + }, + { + "epoch": 0.7306974190005492, + "grad_norm": 0.45702075958251953, + "learning_rate": 1.4130528607668124e-05, + "loss": 0.5386, + "step": 26612 + }, + { + "epoch": 0.7307248764415156, + "grad_norm": 0.4739861786365509, + "learning_rate": 1.41301352754431e-05, + "loss": 0.4841, + "step": 26613 + }, + { + "epoch": 0.7307523338824822, + "grad_norm": 0.3973667621612549, + "learning_rate": 1.4129741935514055e-05, + "loss": 0.3918, + "step": 26614 + }, + { + "epoch": 0.7307797913234486, + "grad_norm": 0.3771193027496338, + "learning_rate": 1.4129348587881721e-05, + "loss": 0.4598, + "step": 26615 + }, + { + "epoch": 0.7308072487644152, + "grad_norm": 0.42019081115722656, + "learning_rate": 1.4128955232546838e-05, + "loss": 0.5464, + "step": 26616 + }, + { + "epoch": 0.7308347062053817, + "grad_norm": 0.37588950991630554, + "learning_rate": 1.4128561869510131e-05, + "loss": 0.5691, + "step": 26617 + }, + { + "epoch": 0.7308621636463482, + "grad_norm": 0.3414279520511627, + "learning_rate": 1.4128168498772341e-05, + "loss": 0.4597, + "step": 26618 + }, + { + "epoch": 0.7308896210873147, + "grad_norm": 0.37602299451828003, + "learning_rate": 1.41277751203342e-05, + "loss": 0.5049, + "step": 26619 + }, + { + "epoch": 0.7309170785282811, + "grad_norm": 0.37521424889564514, + "learning_rate": 1.4127381734196437e-05, + "loss": 0.4413, + "step": 26620 + }, + { + "epoch": 0.7309445359692477, + "grad_norm": 0.5217202305793762, + "learning_rate": 1.4126988340359796e-05, + "loss": 0.4992, + "step": 26621 + }, + { + "epoch": 0.7309719934102141, + "grad_norm": 0.37709757685661316, + "learning_rate": 1.4126594938824998e-05, + "loss": 0.4308, + "step": 26622 + }, + { + "epoch": 0.7309994508511807, + "grad_norm": 0.3669458329677582, + "learning_rate": 1.4126201529592786e-05, + "loss": 0.5002, + "step": 26623 + }, + { + "epoch": 0.7310269082921472, + "grad_norm": 0.4073140621185303, + "learning_rate": 1.4125808112663894e-05, + "loss": 0.4133, + "step": 26624 + }, + { + "epoch": 0.7310543657331137, + "grad_norm": 0.4262892007827759, + "learning_rate": 1.412541468803905e-05, + "loss": 0.6321, + "step": 26625 + }, + { + "epoch": 0.7310818231740802, + "grad_norm": 0.3695288896560669, + "learning_rate": 1.4125021255718991e-05, + "loss": 0.4906, + "step": 26626 + }, + { + "epoch": 0.7311092806150467, + "grad_norm": 0.42302626371383667, + "learning_rate": 1.4124627815704452e-05, + "loss": 0.3965, + "step": 26627 + }, + { + "epoch": 0.7311367380560132, + "grad_norm": 0.3469468355178833, + "learning_rate": 1.4124234367996165e-05, + "loss": 0.4854, + "step": 26628 + }, + { + "epoch": 0.7311641954969796, + "grad_norm": 0.45364734530448914, + "learning_rate": 1.4123840912594866e-05, + "loss": 0.5305, + "step": 26629 + }, + { + "epoch": 0.7311916529379462, + "grad_norm": 0.3702292740345001, + "learning_rate": 1.4123447449501286e-05, + "loss": 0.5682, + "step": 26630 + }, + { + "epoch": 0.7312191103789127, + "grad_norm": 0.36429956555366516, + "learning_rate": 1.4123053978716162e-05, + "loss": 0.4649, + "step": 26631 + }, + { + "epoch": 0.7312465678198792, + "grad_norm": 0.36547327041625977, + "learning_rate": 1.4122660500240222e-05, + "loss": 0.5376, + "step": 26632 + }, + { + "epoch": 0.7312740252608457, + "grad_norm": 0.4035506844520569, + "learning_rate": 1.412226701407421e-05, + "loss": 0.5301, + "step": 26633 + }, + { + "epoch": 0.7313014827018122, + "grad_norm": 0.34333980083465576, + "learning_rate": 1.4121873520218851e-05, + "loss": 0.3999, + "step": 26634 + }, + { + "epoch": 0.7313289401427787, + "grad_norm": 0.36641931533813477, + "learning_rate": 1.4121480018674883e-05, + "loss": 0.5865, + "step": 26635 + }, + { + "epoch": 0.7313563975837452, + "grad_norm": 0.40156421065330505, + "learning_rate": 1.4121086509443041e-05, + "loss": 0.5058, + "step": 26636 + }, + { + "epoch": 0.7313838550247117, + "grad_norm": 0.3742820620536804, + "learning_rate": 1.4120692992524054e-05, + "loss": 0.544, + "step": 26637 + }, + { + "epoch": 0.7314113124656783, + "grad_norm": 0.4717273712158203, + "learning_rate": 1.4120299467918664e-05, + "loss": 0.4797, + "step": 26638 + }, + { + "epoch": 0.7314387699066447, + "grad_norm": 0.43366363644599915, + "learning_rate": 1.4119905935627597e-05, + "loss": 0.5514, + "step": 26639 + }, + { + "epoch": 0.7314662273476112, + "grad_norm": 0.3691422939300537, + "learning_rate": 1.411951239565159e-05, + "loss": 0.4288, + "step": 26640 + }, + { + "epoch": 0.7314936847885777, + "grad_norm": 0.35187941789627075, + "learning_rate": 1.411911884799138e-05, + "loss": 0.4482, + "step": 26641 + }, + { + "epoch": 0.7315211422295442, + "grad_norm": 0.3669714331626892, + "learning_rate": 1.4118725292647697e-05, + "loss": 0.4393, + "step": 26642 + }, + { + "epoch": 0.7315485996705107, + "grad_norm": 0.384246826171875, + "learning_rate": 1.4118331729621277e-05, + "loss": 0.511, + "step": 26643 + }, + { + "epoch": 0.7315760571114772, + "grad_norm": 0.40885427594184875, + "learning_rate": 1.4117938158912854e-05, + "loss": 0.5906, + "step": 26644 + }, + { + "epoch": 0.7316035145524438, + "grad_norm": 0.38156941533088684, + "learning_rate": 1.4117544580523163e-05, + "loss": 0.5341, + "step": 26645 + }, + { + "epoch": 0.7316309719934102, + "grad_norm": 0.38611382246017456, + "learning_rate": 1.4117150994452937e-05, + "loss": 0.5292, + "step": 26646 + }, + { + "epoch": 0.7316584294343768, + "grad_norm": 0.3753373622894287, + "learning_rate": 1.4116757400702907e-05, + "loss": 0.4356, + "step": 26647 + }, + { + "epoch": 0.7316858868753432, + "grad_norm": 0.3972727954387665, + "learning_rate": 1.4116363799273816e-05, + "loss": 0.5588, + "step": 26648 + }, + { + "epoch": 0.7317133443163097, + "grad_norm": 0.39224064350128174, + "learning_rate": 1.4115970190166387e-05, + "loss": 0.5327, + "step": 26649 + }, + { + "epoch": 0.7317408017572762, + "grad_norm": 0.35966992378234863, + "learning_rate": 1.4115576573381364e-05, + "loss": 0.5228, + "step": 26650 + }, + { + "epoch": 0.7317682591982427, + "grad_norm": 0.5135764479637146, + "learning_rate": 1.4115182948919475e-05, + "loss": 0.5464, + "step": 26651 + }, + { + "epoch": 0.7317957166392092, + "grad_norm": 0.5260113477706909, + "learning_rate": 1.4114789316781458e-05, + "loss": 0.5305, + "step": 26652 + }, + { + "epoch": 0.7318231740801757, + "grad_norm": 0.35490211844444275, + "learning_rate": 1.4114395676968047e-05, + "loss": 0.531, + "step": 26653 + }, + { + "epoch": 0.7318506315211423, + "grad_norm": 0.3917534351348877, + "learning_rate": 1.4114002029479972e-05, + "loss": 0.5294, + "step": 26654 + }, + { + "epoch": 0.7318780889621087, + "grad_norm": 0.42279505729675293, + "learning_rate": 1.4113608374317973e-05, + "loss": 0.5256, + "step": 26655 + }, + { + "epoch": 0.7319055464030753, + "grad_norm": 0.3340851068496704, + "learning_rate": 1.4113214711482779e-05, + "loss": 0.4211, + "step": 26656 + }, + { + "epoch": 0.7319330038440417, + "grad_norm": 0.4368550777435303, + "learning_rate": 1.4112821040975126e-05, + "loss": 0.532, + "step": 26657 + }, + { + "epoch": 0.7319604612850082, + "grad_norm": 0.4259128272533417, + "learning_rate": 1.4112427362795752e-05, + "loss": 0.4526, + "step": 26658 + }, + { + "epoch": 0.7319879187259747, + "grad_norm": 0.41899076104164124, + "learning_rate": 1.4112033676945387e-05, + "loss": 0.4457, + "step": 26659 + }, + { + "epoch": 0.7320153761669412, + "grad_norm": 0.38266968727111816, + "learning_rate": 1.4111639983424768e-05, + "loss": 0.5027, + "step": 26660 + }, + { + "epoch": 0.7320428336079078, + "grad_norm": 0.3534713685512543, + "learning_rate": 1.4111246282234626e-05, + "loss": 0.5048, + "step": 26661 + }, + { + "epoch": 0.7320702910488742, + "grad_norm": 0.4523453712463379, + "learning_rate": 1.4110852573375697e-05, + "loss": 0.4822, + "step": 26662 + }, + { + "epoch": 0.7320977484898408, + "grad_norm": 0.4091130495071411, + "learning_rate": 1.4110458856848719e-05, + "loss": 0.4893, + "step": 26663 + }, + { + "epoch": 0.7321252059308072, + "grad_norm": 0.4428843557834625, + "learning_rate": 1.4110065132654421e-05, + "loss": 0.5753, + "step": 26664 + }, + { + "epoch": 0.7321526633717738, + "grad_norm": 0.3675065040588379, + "learning_rate": 1.4109671400793541e-05, + "loss": 0.5534, + "step": 26665 + }, + { + "epoch": 0.7321801208127402, + "grad_norm": 0.4975585341453552, + "learning_rate": 1.4109277661266809e-05, + "loss": 0.5463, + "step": 26666 + }, + { + "epoch": 0.7322075782537067, + "grad_norm": 0.35465702414512634, + "learning_rate": 1.4108883914074966e-05, + "loss": 0.5995, + "step": 26667 + }, + { + "epoch": 0.7322350356946733, + "grad_norm": 0.35120564699172974, + "learning_rate": 1.4108490159218745e-05, + "loss": 0.4265, + "step": 26668 + }, + { + "epoch": 0.7322624931356397, + "grad_norm": 0.4252427816390991, + "learning_rate": 1.4108096396698874e-05, + "loss": 0.4855, + "step": 26669 + }, + { + "epoch": 0.7322899505766063, + "grad_norm": 0.4417116641998291, + "learning_rate": 1.4107702626516096e-05, + "loss": 0.4766, + "step": 26670 + }, + { + "epoch": 0.7323174080175727, + "grad_norm": 0.3743552565574646, + "learning_rate": 1.4107308848671138e-05, + "loss": 0.4706, + "step": 26671 + }, + { + "epoch": 0.7323448654585393, + "grad_norm": 0.4142800569534302, + "learning_rate": 1.410691506316474e-05, + "loss": 0.5337, + "step": 26672 + }, + { + "epoch": 0.7323723228995057, + "grad_norm": 0.3921546936035156, + "learning_rate": 1.4106521269997636e-05, + "loss": 0.488, + "step": 26673 + }, + { + "epoch": 0.7323997803404723, + "grad_norm": 0.37586069107055664, + "learning_rate": 1.4106127469170556e-05, + "loss": 0.5124, + "step": 26674 + }, + { + "epoch": 0.7324272377814388, + "grad_norm": 0.3704080283641815, + "learning_rate": 1.410573366068424e-05, + "loss": 0.4697, + "step": 26675 + }, + { + "epoch": 0.7324546952224052, + "grad_norm": 0.355331689119339, + "learning_rate": 1.4105339844539418e-05, + "loss": 0.5386, + "step": 26676 + }, + { + "epoch": 0.7324821526633718, + "grad_norm": 0.4029296338558197, + "learning_rate": 1.4104946020736829e-05, + "loss": 0.435, + "step": 26677 + }, + { + "epoch": 0.7325096101043382, + "grad_norm": 0.375123530626297, + "learning_rate": 1.4104552189277207e-05, + "loss": 0.4799, + "step": 26678 + }, + { + "epoch": 0.7325370675453048, + "grad_norm": 0.43076229095458984, + "learning_rate": 1.410415835016128e-05, + "loss": 0.4997, + "step": 26679 + }, + { + "epoch": 0.7325645249862712, + "grad_norm": 0.3566029965877533, + "learning_rate": 1.4103764503389794e-05, + "loss": 0.5157, + "step": 26680 + }, + { + "epoch": 0.7325919824272378, + "grad_norm": 0.4659750759601593, + "learning_rate": 1.4103370648963475e-05, + "loss": 0.5435, + "step": 26681 + }, + { + "epoch": 0.7326194398682043, + "grad_norm": 0.3504939675331116, + "learning_rate": 1.4102976786883059e-05, + "loss": 0.4683, + "step": 26682 + }, + { + "epoch": 0.7326468973091708, + "grad_norm": 0.43005064129829407, + "learning_rate": 1.4102582917149281e-05, + "loss": 0.5061, + "step": 26683 + }, + { + "epoch": 0.7326743547501373, + "grad_norm": 0.3728664815425873, + "learning_rate": 1.4102189039762878e-05, + "loss": 0.4543, + "step": 26684 + }, + { + "epoch": 0.7327018121911038, + "grad_norm": 0.3582504987716675, + "learning_rate": 1.4101795154724582e-05, + "loss": 0.4898, + "step": 26685 + }, + { + "epoch": 0.7327292696320703, + "grad_norm": 0.4484447240829468, + "learning_rate": 1.4101401262035132e-05, + "loss": 0.5573, + "step": 26686 + }, + { + "epoch": 0.7327567270730367, + "grad_norm": 0.3991978168487549, + "learning_rate": 1.4101007361695256e-05, + "loss": 0.3768, + "step": 26687 + }, + { + "epoch": 0.7327841845140033, + "grad_norm": 0.3809034526348114, + "learning_rate": 1.4100613453705694e-05, + "loss": 0.5598, + "step": 26688 + }, + { + "epoch": 0.7328116419549698, + "grad_norm": 0.4048158824443817, + "learning_rate": 1.410021953806718e-05, + "loss": 0.4241, + "step": 26689 + }, + { + "epoch": 0.7328390993959363, + "grad_norm": 0.3699227273464203, + "learning_rate": 1.4099825614780445e-05, + "loss": 0.5024, + "step": 26690 + }, + { + "epoch": 0.7328665568369028, + "grad_norm": 0.48359525203704834, + "learning_rate": 1.409943168384623e-05, + "loss": 0.4765, + "step": 26691 + }, + { + "epoch": 0.7328940142778693, + "grad_norm": 0.3565784692764282, + "learning_rate": 1.4099037745265265e-05, + "loss": 0.4381, + "step": 26692 + }, + { + "epoch": 0.7329214717188358, + "grad_norm": 0.4451216757297516, + "learning_rate": 1.4098643799038287e-05, + "loss": 0.6161, + "step": 26693 + }, + { + "epoch": 0.7329489291598023, + "grad_norm": 0.37974485754966736, + "learning_rate": 1.409824984516603e-05, + "loss": 0.4794, + "step": 26694 + }, + { + "epoch": 0.7329763866007688, + "grad_norm": 0.3507250249385834, + "learning_rate": 1.4097855883649227e-05, + "loss": 0.4746, + "step": 26695 + }, + { + "epoch": 0.7330038440417354, + "grad_norm": 0.38968220353126526, + "learning_rate": 1.4097461914488617e-05, + "loss": 0.4762, + "step": 26696 + }, + { + "epoch": 0.7330313014827018, + "grad_norm": 0.36109769344329834, + "learning_rate": 1.4097067937684931e-05, + "loss": 0.5282, + "step": 26697 + }, + { + "epoch": 0.7330587589236683, + "grad_norm": 0.36757415533065796, + "learning_rate": 1.4096673953238908e-05, + "loss": 0.4548, + "step": 26698 + }, + { + "epoch": 0.7330862163646348, + "grad_norm": 0.3779936134815216, + "learning_rate": 1.4096279961151282e-05, + "loss": 0.5052, + "step": 26699 + }, + { + "epoch": 0.7331136738056013, + "grad_norm": 0.37249383330345154, + "learning_rate": 1.4095885961422782e-05, + "loss": 0.5192, + "step": 26700 + }, + { + "epoch": 0.7331411312465678, + "grad_norm": 0.4277748167514801, + "learning_rate": 1.409549195405415e-05, + "loss": 0.5789, + "step": 26701 + }, + { + "epoch": 0.7331685886875343, + "grad_norm": 0.4231413006782532, + "learning_rate": 1.4095097939046118e-05, + "loss": 0.5703, + "step": 26702 + }, + { + "epoch": 0.7331960461285009, + "grad_norm": 0.39980870485305786, + "learning_rate": 1.4094703916399422e-05, + "loss": 0.4315, + "step": 26703 + }, + { + "epoch": 0.7332235035694673, + "grad_norm": 0.4135701060295105, + "learning_rate": 1.4094309886114795e-05, + "loss": 0.5196, + "step": 26704 + }, + { + "epoch": 0.7332509610104339, + "grad_norm": 0.3548392355442047, + "learning_rate": 1.4093915848192974e-05, + "loss": 0.4487, + "step": 26705 + }, + { + "epoch": 0.7332784184514003, + "grad_norm": 0.34857165813446045, + "learning_rate": 1.4093521802634693e-05, + "loss": 0.3435, + "step": 26706 + }, + { + "epoch": 0.7333058758923668, + "grad_norm": 0.40449267625808716, + "learning_rate": 1.4093127749440687e-05, + "loss": 0.488, + "step": 26707 + }, + { + "epoch": 0.7333333333333333, + "grad_norm": 0.3974066972732544, + "learning_rate": 1.4092733688611692e-05, + "loss": 0.4478, + "step": 26708 + }, + { + "epoch": 0.7333607907742998, + "grad_norm": 0.41632896661758423, + "learning_rate": 1.4092339620148442e-05, + "loss": 0.4021, + "step": 26709 + }, + { + "epoch": 0.7333882482152664, + "grad_norm": 0.4103683829307556, + "learning_rate": 1.4091945544051672e-05, + "loss": 0.4985, + "step": 26710 + }, + { + "epoch": 0.7334157056562328, + "grad_norm": 0.3618229031562805, + "learning_rate": 1.4091551460322119e-05, + "loss": 0.5356, + "step": 26711 + }, + { + "epoch": 0.7334431630971994, + "grad_norm": 0.3905039131641388, + "learning_rate": 1.4091157368960517e-05, + "loss": 0.4706, + "step": 26712 + }, + { + "epoch": 0.7334706205381658, + "grad_norm": 0.4260636568069458, + "learning_rate": 1.4090763269967597e-05, + "loss": 0.436, + "step": 26713 + }, + { + "epoch": 0.7334980779791324, + "grad_norm": 0.5614686012268066, + "learning_rate": 1.4090369163344104e-05, + "loss": 0.4346, + "step": 26714 + }, + { + "epoch": 0.7335255354200988, + "grad_norm": 0.37865978479385376, + "learning_rate": 1.4089975049090762e-05, + "loss": 0.4942, + "step": 26715 + }, + { + "epoch": 0.7335529928610653, + "grad_norm": 0.34778186678886414, + "learning_rate": 1.4089580927208312e-05, + "loss": 0.4524, + "step": 26716 + }, + { + "epoch": 0.7335804503020319, + "grad_norm": 0.41114431619644165, + "learning_rate": 1.4089186797697492e-05, + "loss": 0.595, + "step": 26717 + }, + { + "epoch": 0.7336079077429983, + "grad_norm": 0.3985481560230255, + "learning_rate": 1.408879266055903e-05, + "loss": 0.5265, + "step": 26718 + }, + { + "epoch": 0.7336353651839649, + "grad_norm": 0.4016360342502594, + "learning_rate": 1.4088398515793666e-05, + "loss": 0.5423, + "step": 26719 + }, + { + "epoch": 0.7336628226249313, + "grad_norm": 0.4575062394142151, + "learning_rate": 1.4088004363402133e-05, + "loss": 0.5209, + "step": 26720 + }, + { + "epoch": 0.7336902800658979, + "grad_norm": 0.3783665895462036, + "learning_rate": 1.4087610203385166e-05, + "loss": 0.5247, + "step": 26721 + }, + { + "epoch": 0.7337177375068643, + "grad_norm": 0.34201475977897644, + "learning_rate": 1.4087216035743505e-05, + "loss": 0.4794, + "step": 26722 + }, + { + "epoch": 0.7337451949478309, + "grad_norm": 0.5068672299385071, + "learning_rate": 1.4086821860477877e-05, + "loss": 0.4763, + "step": 26723 + }, + { + "epoch": 0.7337726523887974, + "grad_norm": 0.3796377182006836, + "learning_rate": 1.4086427677589026e-05, + "loss": 0.4679, + "step": 26724 + }, + { + "epoch": 0.7338001098297638, + "grad_norm": 0.5012624263763428, + "learning_rate": 1.4086033487077681e-05, + "loss": 0.4855, + "step": 26725 + }, + { + "epoch": 0.7338275672707304, + "grad_norm": 0.37655067443847656, + "learning_rate": 1.408563928894458e-05, + "loss": 0.4846, + "step": 26726 + }, + { + "epoch": 0.7338550247116968, + "grad_norm": 0.38984498381614685, + "learning_rate": 1.4085245083190457e-05, + "loss": 0.477, + "step": 26727 + }, + { + "epoch": 0.7338824821526634, + "grad_norm": 0.40606385469436646, + "learning_rate": 1.4084850869816048e-05, + "loss": 0.4923, + "step": 26728 + }, + { + "epoch": 0.7339099395936298, + "grad_norm": 0.37682977318763733, + "learning_rate": 1.408445664882209e-05, + "loss": 0.5079, + "step": 26729 + }, + { + "epoch": 0.7339373970345964, + "grad_norm": 0.3731168806552887, + "learning_rate": 1.4084062420209317e-05, + "loss": 0.4486, + "step": 26730 + }, + { + "epoch": 0.7339648544755629, + "grad_norm": 0.3683261573314667, + "learning_rate": 1.4083668183978461e-05, + "loss": 0.4619, + "step": 26731 + }, + { + "epoch": 0.7339923119165294, + "grad_norm": 0.3711625337600708, + "learning_rate": 1.4083273940130263e-05, + "loss": 0.4432, + "step": 26732 + }, + { + "epoch": 0.7340197693574959, + "grad_norm": 0.35676294565200806, + "learning_rate": 1.4082879688665455e-05, + "loss": 0.4879, + "step": 26733 + }, + { + "epoch": 0.7340472267984623, + "grad_norm": 0.386452317237854, + "learning_rate": 1.4082485429584774e-05, + "loss": 0.4884, + "step": 26734 + }, + { + "epoch": 0.7340746842394289, + "grad_norm": 0.4428277313709259, + "learning_rate": 1.4082091162888953e-05, + "loss": 0.4806, + "step": 26735 + }, + { + "epoch": 0.7341021416803953, + "grad_norm": 0.4111708104610443, + "learning_rate": 1.4081696888578731e-05, + "loss": 0.5101, + "step": 26736 + }, + { + "epoch": 0.7341295991213619, + "grad_norm": 0.4626118242740631, + "learning_rate": 1.408130260665484e-05, + "loss": 0.545, + "step": 26737 + }, + { + "epoch": 0.7341570565623284, + "grad_norm": 0.36632686853408813, + "learning_rate": 1.4080908317118019e-05, + "loss": 0.5265, + "step": 26738 + }, + { + "epoch": 0.7341845140032949, + "grad_norm": 0.4038887321949005, + "learning_rate": 1.4080514019968998e-05, + "loss": 0.4233, + "step": 26739 + }, + { + "epoch": 0.7342119714442614, + "grad_norm": 0.3883209228515625, + "learning_rate": 1.408011971520852e-05, + "loss": 0.5228, + "step": 26740 + }, + { + "epoch": 0.7342394288852279, + "grad_norm": 0.37469372153282166, + "learning_rate": 1.4079725402837315e-05, + "loss": 0.4838, + "step": 26741 + }, + { + "epoch": 0.7342668863261944, + "grad_norm": 0.36086317896842957, + "learning_rate": 1.407933108285612e-05, + "loss": 0.4578, + "step": 26742 + }, + { + "epoch": 0.7342943437671609, + "grad_norm": 0.40030696988105774, + "learning_rate": 1.407893675526567e-05, + "loss": 0.5545, + "step": 26743 + }, + { + "epoch": 0.7343218012081274, + "grad_norm": 0.3916415274143219, + "learning_rate": 1.4078542420066704e-05, + "loss": 0.4515, + "step": 26744 + }, + { + "epoch": 0.734349258649094, + "grad_norm": 0.44614145159721375, + "learning_rate": 1.4078148077259952e-05, + "loss": 0.5019, + "step": 26745 + }, + { + "epoch": 0.7343767160900604, + "grad_norm": 0.4030715823173523, + "learning_rate": 1.407775372684615e-05, + "loss": 0.4345, + "step": 26746 + }, + { + "epoch": 0.7344041735310269, + "grad_norm": 0.39367079734802246, + "learning_rate": 1.407735936882604e-05, + "loss": 0.5357, + "step": 26747 + }, + { + "epoch": 0.7344316309719934, + "grad_norm": 0.41018959879875183, + "learning_rate": 1.4076965003200352e-05, + "loss": 0.4775, + "step": 26748 + }, + { + "epoch": 0.7344590884129599, + "grad_norm": 0.3469686210155487, + "learning_rate": 1.407657062996982e-05, + "loss": 0.4466, + "step": 26749 + }, + { + "epoch": 0.7344865458539264, + "grad_norm": 0.5237682461738586, + "learning_rate": 1.4076176249135187e-05, + "loss": 0.5632, + "step": 26750 + }, + { + "epoch": 0.7345140032948929, + "grad_norm": 0.3647957146167755, + "learning_rate": 1.4075781860697182e-05, + "loss": 0.5484, + "step": 26751 + }, + { + "epoch": 0.7345414607358595, + "grad_norm": 0.4532521367073059, + "learning_rate": 1.4075387464656544e-05, + "loss": 0.4751, + "step": 26752 + }, + { + "epoch": 0.7345689181768259, + "grad_norm": 0.5405377745628357, + "learning_rate": 1.4074993061014008e-05, + "loss": 0.4761, + "step": 26753 + }, + { + "epoch": 0.7345963756177925, + "grad_norm": 0.472959965467453, + "learning_rate": 1.4074598649770307e-05, + "loss": 0.6116, + "step": 26754 + }, + { + "epoch": 0.7346238330587589, + "grad_norm": 0.37921208143234253, + "learning_rate": 1.4074204230926185e-05, + "loss": 0.4894, + "step": 26755 + }, + { + "epoch": 0.7346512904997254, + "grad_norm": 0.37806081771850586, + "learning_rate": 1.4073809804482368e-05, + "loss": 0.4929, + "step": 26756 + }, + { + "epoch": 0.7346787479406919, + "grad_norm": 0.3266824781894684, + "learning_rate": 1.4073415370439594e-05, + "loss": 0.4875, + "step": 26757 + }, + { + "epoch": 0.7347062053816584, + "grad_norm": 0.4415694773197174, + "learning_rate": 1.4073020928798602e-05, + "loss": 0.5264, + "step": 26758 + }, + { + "epoch": 0.734733662822625, + "grad_norm": 0.37810978293418884, + "learning_rate": 1.4072626479560123e-05, + "loss": 0.4675, + "step": 26759 + }, + { + "epoch": 0.7347611202635914, + "grad_norm": 0.4063452482223511, + "learning_rate": 1.4072232022724901e-05, + "loss": 0.5168, + "step": 26760 + }, + { + "epoch": 0.734788577704558, + "grad_norm": 0.3630560636520386, + "learning_rate": 1.4071837558293664e-05, + "loss": 0.5019, + "step": 26761 + }, + { + "epoch": 0.7348160351455244, + "grad_norm": 0.35066670179367065, + "learning_rate": 1.407144308626715e-05, + "loss": 0.4511, + "step": 26762 + }, + { + "epoch": 0.734843492586491, + "grad_norm": 0.47871726751327515, + "learning_rate": 1.4071048606646094e-05, + "loss": 0.4671, + "step": 26763 + }, + { + "epoch": 0.7348709500274574, + "grad_norm": 0.36331501603126526, + "learning_rate": 1.4070654119431235e-05, + "loss": 0.4429, + "step": 26764 + }, + { + "epoch": 0.7348984074684239, + "grad_norm": 0.40536069869995117, + "learning_rate": 1.4070259624623306e-05, + "loss": 0.4696, + "step": 26765 + }, + { + "epoch": 0.7349258649093905, + "grad_norm": 0.44533032178878784, + "learning_rate": 1.4069865122223046e-05, + "loss": 0.5336, + "step": 26766 + }, + { + "epoch": 0.7349533223503569, + "grad_norm": 0.3424697816371918, + "learning_rate": 1.4069470612231184e-05, + "loss": 0.421, + "step": 26767 + }, + { + "epoch": 0.7349807797913235, + "grad_norm": 0.45720845460891724, + "learning_rate": 1.4069076094648464e-05, + "loss": 0.5469, + "step": 26768 + }, + { + "epoch": 0.7350082372322899, + "grad_norm": 0.35980600118637085, + "learning_rate": 1.4068681569475615e-05, + "loss": 0.499, + "step": 26769 + }, + { + "epoch": 0.7350356946732565, + "grad_norm": 0.4075120687484741, + "learning_rate": 1.4068287036713381e-05, + "loss": 0.5372, + "step": 26770 + }, + { + "epoch": 0.7350631521142229, + "grad_norm": 0.37598979473114014, + "learning_rate": 1.4067892496362492e-05, + "loss": 0.4659, + "step": 26771 + }, + { + "epoch": 0.7350906095551895, + "grad_norm": 0.3727644681930542, + "learning_rate": 1.4067497948423683e-05, + "loss": 0.5116, + "step": 26772 + }, + { + "epoch": 0.735118066996156, + "grad_norm": 0.40593764185905457, + "learning_rate": 1.4067103392897694e-05, + "loss": 0.5518, + "step": 26773 + }, + { + "epoch": 0.7351455244371224, + "grad_norm": 0.45456749200820923, + "learning_rate": 1.4066708829785258e-05, + "loss": 0.5449, + "step": 26774 + }, + { + "epoch": 0.735172981878089, + "grad_norm": 0.414571613073349, + "learning_rate": 1.4066314259087112e-05, + "loss": 0.5349, + "step": 26775 + }, + { + "epoch": 0.7352004393190554, + "grad_norm": 0.48014166951179504, + "learning_rate": 1.4065919680803994e-05, + "loss": 0.5015, + "step": 26776 + }, + { + "epoch": 0.735227896760022, + "grad_norm": 0.36388012766838074, + "learning_rate": 1.4065525094936635e-05, + "loss": 0.5005, + "step": 26777 + }, + { + "epoch": 0.7352553542009884, + "grad_norm": 0.42333975434303284, + "learning_rate": 1.4065130501485777e-05, + "loss": 0.5066, + "step": 26778 + }, + { + "epoch": 0.735282811641955, + "grad_norm": 0.3746768534183502, + "learning_rate": 1.4064735900452153e-05, + "loss": 0.483, + "step": 26779 + }, + { + "epoch": 0.7353102690829215, + "grad_norm": 0.3555462062358856, + "learning_rate": 1.4064341291836497e-05, + "loss": 0.4283, + "step": 26780 + }, + { + "epoch": 0.735337726523888, + "grad_norm": 0.38079363107681274, + "learning_rate": 1.4063946675639549e-05, + "loss": 0.4989, + "step": 26781 + }, + { + "epoch": 0.7353651839648545, + "grad_norm": 0.38357651233673096, + "learning_rate": 1.406355205186204e-05, + "loss": 0.4783, + "step": 26782 + }, + { + "epoch": 0.735392641405821, + "grad_norm": 0.3867793679237366, + "learning_rate": 1.4063157420504716e-05, + "loss": 0.5737, + "step": 26783 + }, + { + "epoch": 0.7354200988467875, + "grad_norm": 0.4419555366039276, + "learning_rate": 1.4062762781568301e-05, + "loss": 0.535, + "step": 26784 + }, + { + "epoch": 0.7354475562877539, + "grad_norm": 0.36043888330459595, + "learning_rate": 1.4062368135053539e-05, + "loss": 0.5035, + "step": 26785 + }, + { + "epoch": 0.7354750137287205, + "grad_norm": 0.4382515251636505, + "learning_rate": 1.4061973480961164e-05, + "loss": 0.4206, + "step": 26786 + }, + { + "epoch": 0.735502471169687, + "grad_norm": 0.3858183026313782, + "learning_rate": 1.4061578819291914e-05, + "loss": 0.4363, + "step": 26787 + }, + { + "epoch": 0.7355299286106535, + "grad_norm": 0.40409815311431885, + "learning_rate": 1.4061184150046518e-05, + "loss": 0.5754, + "step": 26788 + }, + { + "epoch": 0.73555738605162, + "grad_norm": 0.34124770760536194, + "learning_rate": 1.4060789473225723e-05, + "loss": 0.4872, + "step": 26789 + }, + { + "epoch": 0.7355848434925865, + "grad_norm": 0.4785085916519165, + "learning_rate": 1.4060394788830254e-05, + "loss": 0.6009, + "step": 26790 + }, + { + "epoch": 0.735612300933553, + "grad_norm": 0.34383562207221985, + "learning_rate": 1.4060000096860858e-05, + "loss": 0.4518, + "step": 26791 + }, + { + "epoch": 0.7356397583745194, + "grad_norm": 0.3602442443370819, + "learning_rate": 1.4059605397318263e-05, + "loss": 0.4322, + "step": 26792 + }, + { + "epoch": 0.735667215815486, + "grad_norm": 0.4535205066204071, + "learning_rate": 1.405921069020321e-05, + "loss": 0.4844, + "step": 26793 + }, + { + "epoch": 0.7356946732564525, + "grad_norm": 0.3411996364593506, + "learning_rate": 1.4058815975516432e-05, + "loss": 0.3879, + "step": 26794 + }, + { + "epoch": 0.735722130697419, + "grad_norm": 0.3886256515979767, + "learning_rate": 1.4058421253258665e-05, + "loss": 0.4875, + "step": 26795 + }, + { + "epoch": 0.7357495881383855, + "grad_norm": 0.4088708460330963, + "learning_rate": 1.405802652343065e-05, + "loss": 0.4866, + "step": 26796 + }, + { + "epoch": 0.735777045579352, + "grad_norm": 0.3228149116039276, + "learning_rate": 1.4057631786033121e-05, + "loss": 0.4509, + "step": 26797 + }, + { + "epoch": 0.7358045030203185, + "grad_norm": 0.39626410603523254, + "learning_rate": 1.4057237041066811e-05, + "loss": 0.43, + "step": 26798 + }, + { + "epoch": 0.735831960461285, + "grad_norm": 0.371433287858963, + "learning_rate": 1.405684228853246e-05, + "loss": 0.5376, + "step": 26799 + }, + { + "epoch": 0.7358594179022515, + "grad_norm": 0.3786960542201996, + "learning_rate": 1.4056447528430803e-05, + "loss": 0.5216, + "step": 26800 + }, + { + "epoch": 0.7358868753432181, + "grad_norm": 0.3950761556625366, + "learning_rate": 1.4056052760762577e-05, + "loss": 0.4345, + "step": 26801 + }, + { + "epoch": 0.7359143327841845, + "grad_norm": 0.3593626916408539, + "learning_rate": 1.4055657985528522e-05, + "loss": 0.4782, + "step": 26802 + }, + { + "epoch": 0.735941790225151, + "grad_norm": 0.3727959096431732, + "learning_rate": 1.4055263202729363e-05, + "loss": 0.4961, + "step": 26803 + }, + { + "epoch": 0.7359692476661175, + "grad_norm": 0.335500568151474, + "learning_rate": 1.405486841236585e-05, + "loss": 0.4309, + "step": 26804 + }, + { + "epoch": 0.735996705107084, + "grad_norm": 0.46572306752204895, + "learning_rate": 1.405447361443871e-05, + "loss": 0.5871, + "step": 26805 + }, + { + "epoch": 0.7360241625480505, + "grad_norm": 0.4017355144023895, + "learning_rate": 1.4054078808948684e-05, + "loss": 0.5151, + "step": 26806 + }, + { + "epoch": 0.736051619989017, + "grad_norm": 0.3752233386039734, + "learning_rate": 1.4053683995896505e-05, + "loss": 0.4665, + "step": 26807 + }, + { + "epoch": 0.7360790774299836, + "grad_norm": 0.40134963393211365, + "learning_rate": 1.4053289175282914e-05, + "loss": 0.5131, + "step": 26808 + }, + { + "epoch": 0.73610653487095, + "grad_norm": 0.5097197890281677, + "learning_rate": 1.4052894347108646e-05, + "loss": 0.4892, + "step": 26809 + }, + { + "epoch": 0.7361339923119166, + "grad_norm": 0.4008578360080719, + "learning_rate": 1.4052499511374433e-05, + "loss": 0.5355, + "step": 26810 + }, + { + "epoch": 0.736161449752883, + "grad_norm": 0.4043530523777008, + "learning_rate": 1.4052104668081016e-05, + "loss": 0.5077, + "step": 26811 + }, + { + "epoch": 0.7361889071938496, + "grad_norm": 0.5712657570838928, + "learning_rate": 1.4051709817229133e-05, + "loss": 0.556, + "step": 26812 + }, + { + "epoch": 0.736216364634816, + "grad_norm": 0.3757840394973755, + "learning_rate": 1.4051314958819514e-05, + "loss": 0.486, + "step": 26813 + }, + { + "epoch": 0.7362438220757825, + "grad_norm": 0.3828083872795105, + "learning_rate": 1.4050920092852903e-05, + "loss": 0.5365, + "step": 26814 + }, + { + "epoch": 0.7362712795167491, + "grad_norm": 0.3400215804576874, + "learning_rate": 1.405052521933003e-05, + "loss": 0.5048, + "step": 26815 + }, + { + "epoch": 0.7362987369577155, + "grad_norm": 0.3809468150138855, + "learning_rate": 1.405013033825164e-05, + "loss": 0.4293, + "step": 26816 + }, + { + "epoch": 0.7363261943986821, + "grad_norm": 0.3395894169807434, + "learning_rate": 1.404973544961846e-05, + "loss": 0.4598, + "step": 26817 + }, + { + "epoch": 0.7363536518396485, + "grad_norm": 0.37438374757766724, + "learning_rate": 1.404934055343123e-05, + "loss": 0.4621, + "step": 26818 + }, + { + "epoch": 0.7363811092806151, + "grad_norm": 0.47816231846809387, + "learning_rate": 1.4048945649690691e-05, + "loss": 0.5332, + "step": 26819 + }, + { + "epoch": 0.7364085667215815, + "grad_norm": 0.35645508766174316, + "learning_rate": 1.4048550738397573e-05, + "loss": 0.4698, + "step": 26820 + }, + { + "epoch": 0.736436024162548, + "grad_norm": 0.4041135311126709, + "learning_rate": 1.4048155819552617e-05, + "loss": 0.5493, + "step": 26821 + }, + { + "epoch": 0.7364634816035146, + "grad_norm": 0.3920229375362396, + "learning_rate": 1.4047760893156561e-05, + "loss": 0.4296, + "step": 26822 + }, + { + "epoch": 0.736490939044481, + "grad_norm": 0.462161660194397, + "learning_rate": 1.4047365959210135e-05, + "loss": 0.5646, + "step": 26823 + }, + { + "epoch": 0.7365183964854476, + "grad_norm": 0.3791539967060089, + "learning_rate": 1.4046971017714082e-05, + "loss": 0.5487, + "step": 26824 + }, + { + "epoch": 0.736545853926414, + "grad_norm": 0.36192288994789124, + "learning_rate": 1.4046576068669135e-05, + "loss": 0.4647, + "step": 26825 + }, + { + "epoch": 0.7365733113673806, + "grad_norm": 0.42394378781318665, + "learning_rate": 1.4046181112076034e-05, + "loss": 0.4502, + "step": 26826 + }, + { + "epoch": 0.736600768808347, + "grad_norm": 0.38123819231987, + "learning_rate": 1.4045786147935512e-05, + "loss": 0.5483, + "step": 26827 + }, + { + "epoch": 0.7366282262493136, + "grad_norm": 0.41510316729545593, + "learning_rate": 1.4045391176248306e-05, + "loss": 0.523, + "step": 26828 + }, + { + "epoch": 0.7366556836902801, + "grad_norm": 0.4080808758735657, + "learning_rate": 1.404499619701516e-05, + "loss": 0.4025, + "step": 26829 + }, + { + "epoch": 0.7366831411312466, + "grad_norm": 0.3901219666004181, + "learning_rate": 1.4044601210236798e-05, + "loss": 0.5374, + "step": 26830 + }, + { + "epoch": 0.7367105985722131, + "grad_norm": 0.3654945492744446, + "learning_rate": 1.4044206215913968e-05, + "loss": 0.4977, + "step": 26831 + }, + { + "epoch": 0.7367380560131795, + "grad_norm": 0.4243938624858856, + "learning_rate": 1.4043811214047402e-05, + "loss": 0.4552, + "step": 26832 + }, + { + "epoch": 0.7367655134541461, + "grad_norm": 0.37865304946899414, + "learning_rate": 1.4043416204637838e-05, + "loss": 0.4619, + "step": 26833 + }, + { + "epoch": 0.7367929708951125, + "grad_norm": 0.42752042412757874, + "learning_rate": 1.4043021187686013e-05, + "loss": 0.5033, + "step": 26834 + }, + { + "epoch": 0.7368204283360791, + "grad_norm": 0.37474361062049866, + "learning_rate": 1.404262616319266e-05, + "loss": 0.4843, + "step": 26835 + }, + { + "epoch": 0.7368478857770456, + "grad_norm": 0.39080795645713806, + "learning_rate": 1.4042231131158521e-05, + "loss": 0.4643, + "step": 26836 + }, + { + "epoch": 0.7368753432180121, + "grad_norm": 0.4225553274154663, + "learning_rate": 1.4041836091584331e-05, + "loss": 0.5605, + "step": 26837 + }, + { + "epoch": 0.7369028006589786, + "grad_norm": 0.49672991037368774, + "learning_rate": 1.4041441044470827e-05, + "loss": 0.5722, + "step": 26838 + }, + { + "epoch": 0.7369302580999451, + "grad_norm": 0.363634318113327, + "learning_rate": 1.4041045989818744e-05, + "loss": 0.4386, + "step": 26839 + }, + { + "epoch": 0.7369577155409116, + "grad_norm": 0.39900293946266174, + "learning_rate": 1.404065092762882e-05, + "loss": 0.4836, + "step": 26840 + }, + { + "epoch": 0.736985172981878, + "grad_norm": 0.3871653079986572, + "learning_rate": 1.4040255857901797e-05, + "loss": 0.4954, + "step": 26841 + }, + { + "epoch": 0.7370126304228446, + "grad_norm": 0.4468710124492645, + "learning_rate": 1.4039860780638402e-05, + "loss": 0.5223, + "step": 26842 + }, + { + "epoch": 0.7370400878638111, + "grad_norm": 0.3423226773738861, + "learning_rate": 1.403946569583938e-05, + "loss": 0.4436, + "step": 26843 + }, + { + "epoch": 0.7370675453047776, + "grad_norm": 0.38789188861846924, + "learning_rate": 1.4039070603505465e-05, + "loss": 0.5919, + "step": 26844 + }, + { + "epoch": 0.7370950027457441, + "grad_norm": 0.3712608516216278, + "learning_rate": 1.4038675503637393e-05, + "loss": 0.4264, + "step": 26845 + }, + { + "epoch": 0.7371224601867106, + "grad_norm": 0.38093405961990356, + "learning_rate": 1.4038280396235903e-05, + "loss": 0.4715, + "step": 26846 + }, + { + "epoch": 0.7371499176276771, + "grad_norm": 0.3771340847015381, + "learning_rate": 1.4037885281301731e-05, + "loss": 0.4793, + "step": 26847 + }, + { + "epoch": 0.7371773750686436, + "grad_norm": 0.4227500259876251, + "learning_rate": 1.4037490158835617e-05, + "loss": 0.477, + "step": 26848 + }, + { + "epoch": 0.7372048325096101, + "grad_norm": 0.39581841230392456, + "learning_rate": 1.403709502883829e-05, + "loss": 0.5122, + "step": 26849 + }, + { + "epoch": 0.7372322899505767, + "grad_norm": 0.3972928822040558, + "learning_rate": 1.4036699891310495e-05, + "loss": 0.5747, + "step": 26850 + }, + { + "epoch": 0.7372597473915431, + "grad_norm": 0.4198804199695587, + "learning_rate": 1.4036304746252966e-05, + "loss": 0.5469, + "step": 26851 + }, + { + "epoch": 0.7372872048325096, + "grad_norm": 0.3642120063304901, + "learning_rate": 1.403590959366644e-05, + "loss": 0.4829, + "step": 26852 + }, + { + "epoch": 0.7373146622734761, + "grad_norm": 0.3882497251033783, + "learning_rate": 1.4035514433551655e-05, + "loss": 0.5109, + "step": 26853 + }, + { + "epoch": 0.7373421197144426, + "grad_norm": 0.4718131721019745, + "learning_rate": 1.4035119265909349e-05, + "loss": 0.5367, + "step": 26854 + }, + { + "epoch": 0.7373695771554091, + "grad_norm": 0.38770103454589844, + "learning_rate": 1.4034724090740256e-05, + "loss": 0.4127, + "step": 26855 + }, + { + "epoch": 0.7373970345963756, + "grad_norm": 0.377190500497818, + "learning_rate": 1.4034328908045115e-05, + "loss": 0.5243, + "step": 26856 + }, + { + "epoch": 0.7374244920373422, + "grad_norm": 0.4333525598049164, + "learning_rate": 1.4033933717824664e-05, + "loss": 0.5192, + "step": 26857 + }, + { + "epoch": 0.7374519494783086, + "grad_norm": 0.4396527111530304, + "learning_rate": 1.4033538520079638e-05, + "loss": 0.4861, + "step": 26858 + }, + { + "epoch": 0.7374794069192752, + "grad_norm": 0.37734219431877136, + "learning_rate": 1.4033143314810778e-05, + "loss": 0.4707, + "step": 26859 + }, + { + "epoch": 0.7375068643602416, + "grad_norm": 0.37134724855422974, + "learning_rate": 1.4032748102018813e-05, + "loss": 0.5001, + "step": 26860 + }, + { + "epoch": 0.7375343218012081, + "grad_norm": 0.5436025261878967, + "learning_rate": 1.403235288170449e-05, + "loss": 0.5384, + "step": 26861 + }, + { + "epoch": 0.7375617792421746, + "grad_norm": 0.40164047479629517, + "learning_rate": 1.403195765386854e-05, + "loss": 0.546, + "step": 26862 + }, + { + "epoch": 0.7375892366831411, + "grad_norm": 0.38459765911102295, + "learning_rate": 1.4031562418511704e-05, + "loss": 0.5496, + "step": 26863 + }, + { + "epoch": 0.7376166941241077, + "grad_norm": 0.3972252905368805, + "learning_rate": 1.4031167175634716e-05, + "loss": 0.5411, + "step": 26864 + }, + { + "epoch": 0.7376441515650741, + "grad_norm": 0.3950878977775574, + "learning_rate": 1.4030771925238314e-05, + "loss": 0.5337, + "step": 26865 + }, + { + "epoch": 0.7376716090060407, + "grad_norm": 0.39425697922706604, + "learning_rate": 1.4030376667323238e-05, + "loss": 0.4519, + "step": 26866 + }, + { + "epoch": 0.7376990664470071, + "grad_norm": 0.36517825722694397, + "learning_rate": 1.4029981401890222e-05, + "loss": 0.4502, + "step": 26867 + }, + { + "epoch": 0.7377265238879737, + "grad_norm": 0.3860747218132019, + "learning_rate": 1.4029586128940004e-05, + "loss": 0.4663, + "step": 26868 + }, + { + "epoch": 0.7377539813289401, + "grad_norm": 0.47391536831855774, + "learning_rate": 1.4029190848473322e-05, + "loss": 0.5615, + "step": 26869 + }, + { + "epoch": 0.7377814387699067, + "grad_norm": 0.3787294626235962, + "learning_rate": 1.4028795560490913e-05, + "loss": 0.4764, + "step": 26870 + }, + { + "epoch": 0.7378088962108732, + "grad_norm": 0.3876148462295532, + "learning_rate": 1.4028400264993517e-05, + "loss": 0.4974, + "step": 26871 + }, + { + "epoch": 0.7378363536518396, + "grad_norm": 0.4248621165752411, + "learning_rate": 1.4028004961981867e-05, + "loss": 0.6069, + "step": 26872 + }, + { + "epoch": 0.7378638110928062, + "grad_norm": 0.4256625175476074, + "learning_rate": 1.40276096514567e-05, + "loss": 0.4587, + "step": 26873 + }, + { + "epoch": 0.7378912685337726, + "grad_norm": 0.3804173469543457, + "learning_rate": 1.4027214333418757e-05, + "loss": 0.4536, + "step": 26874 + }, + { + "epoch": 0.7379187259747392, + "grad_norm": 0.43846410512924194, + "learning_rate": 1.4026819007868773e-05, + "loss": 0.5697, + "step": 26875 + }, + { + "epoch": 0.7379461834157056, + "grad_norm": 0.3968181610107422, + "learning_rate": 1.4026423674807488e-05, + "loss": 0.5266, + "step": 26876 + }, + { + "epoch": 0.7379736408566722, + "grad_norm": 0.3713231682777405, + "learning_rate": 1.402602833423564e-05, + "loss": 0.5643, + "step": 26877 + }, + { + "epoch": 0.7380010982976387, + "grad_norm": 0.3924883008003235, + "learning_rate": 1.402563298615396e-05, + "loss": 0.5078, + "step": 26878 + }, + { + "epoch": 0.7380285557386052, + "grad_norm": 0.3911442458629608, + "learning_rate": 1.4025237630563192e-05, + "loss": 0.5714, + "step": 26879 + }, + { + "epoch": 0.7380560131795717, + "grad_norm": 0.3872321546077728, + "learning_rate": 1.402484226746407e-05, + "loss": 0.4613, + "step": 26880 + }, + { + "epoch": 0.7380834706205381, + "grad_norm": 0.3668537437915802, + "learning_rate": 1.4024446896857331e-05, + "loss": 0.5565, + "step": 26881 + }, + { + "epoch": 0.7381109280615047, + "grad_norm": 0.447473406791687, + "learning_rate": 1.4024051518743718e-05, + "loss": 0.5889, + "step": 26882 + }, + { + "epoch": 0.7381383855024711, + "grad_norm": 0.4213132858276367, + "learning_rate": 1.4023656133123959e-05, + "loss": 0.5611, + "step": 26883 + }, + { + "epoch": 0.7381658429434377, + "grad_norm": 0.35202112793922424, + "learning_rate": 1.4023260739998804e-05, + "loss": 0.4745, + "step": 26884 + }, + { + "epoch": 0.7381933003844042, + "grad_norm": 0.3622756600379944, + "learning_rate": 1.402286533936898e-05, + "loss": 0.498, + "step": 26885 + }, + { + "epoch": 0.7382207578253707, + "grad_norm": 0.41173428297042847, + "learning_rate": 1.4022469931235226e-05, + "loss": 0.4599, + "step": 26886 + }, + { + "epoch": 0.7382482152663372, + "grad_norm": 0.35925057530403137, + "learning_rate": 1.4022074515598286e-05, + "loss": 0.5048, + "step": 26887 + }, + { + "epoch": 0.7382756727073037, + "grad_norm": 0.37363913655281067, + "learning_rate": 1.4021679092458888e-05, + "loss": 0.4914, + "step": 26888 + }, + { + "epoch": 0.7383031301482702, + "grad_norm": 0.39672181010246277, + "learning_rate": 1.4021283661817781e-05, + "loss": 0.534, + "step": 26889 + }, + { + "epoch": 0.7383305875892366, + "grad_norm": 0.3558575212955475, + "learning_rate": 1.4020888223675694e-05, + "loss": 0.4711, + "step": 26890 + }, + { + "epoch": 0.7383580450302032, + "grad_norm": 0.3770904839038849, + "learning_rate": 1.4020492778033368e-05, + "loss": 0.4182, + "step": 26891 + }, + { + "epoch": 0.7383855024711697, + "grad_norm": 1.0287595987319946, + "learning_rate": 1.4020097324891537e-05, + "loss": 0.4297, + "step": 26892 + }, + { + "epoch": 0.7384129599121362, + "grad_norm": 0.36092114448547363, + "learning_rate": 1.4019701864250943e-05, + "loss": 0.5392, + "step": 26893 + }, + { + "epoch": 0.7384404173531027, + "grad_norm": 0.6335458159446716, + "learning_rate": 1.4019306396112321e-05, + "loss": 0.4896, + "step": 26894 + }, + { + "epoch": 0.7384678747940692, + "grad_norm": 0.41617828607559204, + "learning_rate": 1.4018910920476414e-05, + "loss": 0.545, + "step": 26895 + }, + { + "epoch": 0.7384953322350357, + "grad_norm": 3.185993194580078, + "learning_rate": 1.4018515437343952e-05, + "loss": 0.4745, + "step": 26896 + }, + { + "epoch": 0.7385227896760022, + "grad_norm": 0.3929463028907776, + "learning_rate": 1.4018119946715678e-05, + "loss": 0.4456, + "step": 26897 + }, + { + "epoch": 0.7385502471169687, + "grad_norm": 0.41995003819465637, + "learning_rate": 1.4017724448592327e-05, + "loss": 0.5387, + "step": 26898 + }, + { + "epoch": 0.7385777045579353, + "grad_norm": 0.4150804877281189, + "learning_rate": 1.4017328942974635e-05, + "loss": 0.4559, + "step": 26899 + }, + { + "epoch": 0.7386051619989017, + "grad_norm": 0.38101813197135925, + "learning_rate": 1.4016933429863346e-05, + "loss": 0.4486, + "step": 26900 + }, + { + "epoch": 0.7386326194398682, + "grad_norm": 0.3665793538093567, + "learning_rate": 1.4016537909259191e-05, + "loss": 0.4496, + "step": 26901 + }, + { + "epoch": 0.7386600768808347, + "grad_norm": 0.4896700978279114, + "learning_rate": 1.4016142381162913e-05, + "loss": 0.6119, + "step": 26902 + }, + { + "epoch": 0.7386875343218012, + "grad_norm": 0.4386366307735443, + "learning_rate": 1.4015746845575249e-05, + "loss": 0.4467, + "step": 26903 + }, + { + "epoch": 0.7387149917627677, + "grad_norm": 0.3772076964378357, + "learning_rate": 1.4015351302496932e-05, + "loss": 0.4884, + "step": 26904 + }, + { + "epoch": 0.7387424492037342, + "grad_norm": 0.34964191913604736, + "learning_rate": 1.4014955751928705e-05, + "loss": 0.4984, + "step": 26905 + }, + { + "epoch": 0.7387699066447008, + "grad_norm": 0.4497126340866089, + "learning_rate": 1.4014560193871303e-05, + "loss": 0.4908, + "step": 26906 + }, + { + "epoch": 0.7387973640856672, + "grad_norm": 0.3677826523780823, + "learning_rate": 1.4014164628325466e-05, + "loss": 0.3728, + "step": 26907 + }, + { + "epoch": 0.7388248215266338, + "grad_norm": 0.37968766689300537, + "learning_rate": 1.4013769055291932e-05, + "loss": 0.4846, + "step": 26908 + }, + { + "epoch": 0.7388522789676002, + "grad_norm": 0.3600018322467804, + "learning_rate": 1.4013373474771435e-05, + "loss": 0.3895, + "step": 26909 + }, + { + "epoch": 0.7388797364085667, + "grad_norm": 0.3752509653568268, + "learning_rate": 1.4012977886764718e-05, + "loss": 0.5828, + "step": 26910 + }, + { + "epoch": 0.7389071938495332, + "grad_norm": 0.4137454628944397, + "learning_rate": 1.4012582291272516e-05, + "loss": 0.4445, + "step": 26911 + }, + { + "epoch": 0.7389346512904997, + "grad_norm": 0.36200979351997375, + "learning_rate": 1.4012186688295566e-05, + "loss": 0.4829, + "step": 26912 + }, + { + "epoch": 0.7389621087314663, + "grad_norm": 0.4001387655735016, + "learning_rate": 1.4011791077834609e-05, + "loss": 0.4656, + "step": 26913 + }, + { + "epoch": 0.7389895661724327, + "grad_norm": 0.4600948989391327, + "learning_rate": 1.4011395459890378e-05, + "loss": 0.4824, + "step": 26914 + }, + { + "epoch": 0.7390170236133993, + "grad_norm": 0.38273128867149353, + "learning_rate": 1.4010999834463616e-05, + "loss": 0.5034, + "step": 26915 + }, + { + "epoch": 0.7390444810543657, + "grad_norm": 0.38257551193237305, + "learning_rate": 1.4010604201555063e-05, + "loss": 0.4281, + "step": 26916 + }, + { + "epoch": 0.7390719384953323, + "grad_norm": 0.359611451625824, + "learning_rate": 1.4010208561165449e-05, + "loss": 0.4987, + "step": 26917 + }, + { + "epoch": 0.7390993959362987, + "grad_norm": 0.33377569913864136, + "learning_rate": 1.4009812913295515e-05, + "loss": 0.5034, + "step": 26918 + }, + { + "epoch": 0.7391268533772652, + "grad_norm": 0.5776461958885193, + "learning_rate": 1.4009417257946001e-05, + "loss": 0.4336, + "step": 26919 + }, + { + "epoch": 0.7391543108182317, + "grad_norm": 0.36637192964553833, + "learning_rate": 1.4009021595117648e-05, + "loss": 0.5662, + "step": 26920 + }, + { + "epoch": 0.7391817682591982, + "grad_norm": 0.45175227522850037, + "learning_rate": 1.4008625924811185e-05, + "loss": 0.5161, + "step": 26921 + }, + { + "epoch": 0.7392092257001648, + "grad_norm": 0.40861761569976807, + "learning_rate": 1.4008230247027358e-05, + "loss": 0.4665, + "step": 26922 + }, + { + "epoch": 0.7392366831411312, + "grad_norm": 0.3550074100494385, + "learning_rate": 1.4007834561766902e-05, + "loss": 0.4674, + "step": 26923 + }, + { + "epoch": 0.7392641405820978, + "grad_norm": 0.3695279359817505, + "learning_rate": 1.4007438869030554e-05, + "loss": 0.4523, + "step": 26924 + }, + { + "epoch": 0.7392915980230642, + "grad_norm": 0.37004032731056213, + "learning_rate": 1.4007043168819056e-05, + "loss": 0.4861, + "step": 26925 + }, + { + "epoch": 0.7393190554640308, + "grad_norm": 0.38590580224990845, + "learning_rate": 1.4006647461133143e-05, + "loss": 0.4698, + "step": 26926 + }, + { + "epoch": 0.7393465129049972, + "grad_norm": 0.40374884009361267, + "learning_rate": 1.400625174597355e-05, + "loss": 0.5441, + "step": 26927 + }, + { + "epoch": 0.7393739703459637, + "grad_norm": 0.4075644910335541, + "learning_rate": 1.4005856023341025e-05, + "loss": 0.4611, + "step": 26928 + }, + { + "epoch": 0.7394014277869303, + "grad_norm": 0.41321998834609985, + "learning_rate": 1.4005460293236297e-05, + "loss": 0.4346, + "step": 26929 + }, + { + "epoch": 0.7394288852278967, + "grad_norm": 0.4256496727466583, + "learning_rate": 1.4005064555660106e-05, + "loss": 0.5977, + "step": 26930 + }, + { + "epoch": 0.7394563426688633, + "grad_norm": 0.33093923330307007, + "learning_rate": 1.4004668810613193e-05, + "loss": 0.5213, + "step": 26931 + }, + { + "epoch": 0.7394838001098297, + "grad_norm": 0.38425198197364807, + "learning_rate": 1.4004273058096294e-05, + "loss": 0.5047, + "step": 26932 + }, + { + "epoch": 0.7395112575507963, + "grad_norm": 0.3922576308250427, + "learning_rate": 1.4003877298110146e-05, + "loss": 0.4393, + "step": 26933 + }, + { + "epoch": 0.7395387149917627, + "grad_norm": 0.37931686639785767, + "learning_rate": 1.4003481530655491e-05, + "loss": 0.4704, + "step": 26934 + }, + { + "epoch": 0.7395661724327293, + "grad_norm": 0.4024805426597595, + "learning_rate": 1.4003085755733064e-05, + "loss": 0.5363, + "step": 26935 + }, + { + "epoch": 0.7395936298736958, + "grad_norm": 0.4000808894634247, + "learning_rate": 1.4002689973343605e-05, + "loss": 0.5339, + "step": 26936 + }, + { + "epoch": 0.7396210873146623, + "grad_norm": 0.4065569341182709, + "learning_rate": 1.4002294183487852e-05, + "loss": 0.485, + "step": 26937 + }, + { + "epoch": 0.7396485447556288, + "grad_norm": 0.3905651271343231, + "learning_rate": 1.4001898386166544e-05, + "loss": 0.5601, + "step": 26938 + }, + { + "epoch": 0.7396760021965952, + "grad_norm": 0.4125363230705261, + "learning_rate": 1.4001502581380417e-05, + "loss": 0.4413, + "step": 26939 + }, + { + "epoch": 0.7397034596375618, + "grad_norm": 1.035237193107605, + "learning_rate": 1.4001106769130211e-05, + "loss": 0.5391, + "step": 26940 + }, + { + "epoch": 0.7397309170785282, + "grad_norm": 0.3474701941013336, + "learning_rate": 1.4000710949416663e-05, + "loss": 0.5034, + "step": 26941 + }, + { + "epoch": 0.7397583745194948, + "grad_norm": 0.38741669058799744, + "learning_rate": 1.4000315122240515e-05, + "loss": 0.4733, + "step": 26942 + }, + { + "epoch": 0.7397858319604613, + "grad_norm": 0.4131896197795868, + "learning_rate": 1.3999919287602497e-05, + "loss": 0.4786, + "step": 26943 + }, + { + "epoch": 0.7398132894014278, + "grad_norm": 0.3835640251636505, + "learning_rate": 1.3999523445503358e-05, + "loss": 0.5531, + "step": 26944 + }, + { + "epoch": 0.7398407468423943, + "grad_norm": 0.4858456552028656, + "learning_rate": 1.3999127595943828e-05, + "loss": 0.5689, + "step": 26945 + }, + { + "epoch": 0.7398682042833608, + "grad_norm": 0.3768438994884491, + "learning_rate": 1.3998731738924652e-05, + "loss": 0.5192, + "step": 26946 + }, + { + "epoch": 0.7398956617243273, + "grad_norm": 0.3518599271774292, + "learning_rate": 1.3998335874446562e-05, + "loss": 0.46, + "step": 26947 + }, + { + "epoch": 0.7399231191652937, + "grad_norm": 0.3840305209159851, + "learning_rate": 1.39979400025103e-05, + "loss": 0.4731, + "step": 26948 + }, + { + "epoch": 0.7399505766062603, + "grad_norm": 0.4118255376815796, + "learning_rate": 1.3997544123116606e-05, + "loss": 0.4935, + "step": 26949 + }, + { + "epoch": 0.7399780340472268, + "grad_norm": 0.4147377014160156, + "learning_rate": 1.3997148236266213e-05, + "loss": 0.4052, + "step": 26950 + }, + { + "epoch": 0.7400054914881933, + "grad_norm": 0.38787901401519775, + "learning_rate": 1.3996752341959866e-05, + "loss": 0.5542, + "step": 26951 + }, + { + "epoch": 0.7400329489291598, + "grad_norm": 0.4729222357273102, + "learning_rate": 1.39963564401983e-05, + "loss": 0.4429, + "step": 26952 + }, + { + "epoch": 0.7400604063701263, + "grad_norm": 0.4370659589767456, + "learning_rate": 1.399596053098225e-05, + "loss": 0.5768, + "step": 26953 + }, + { + "epoch": 0.7400878638110928, + "grad_norm": 0.4059261083602905, + "learning_rate": 1.3995564614312463e-05, + "loss": 0.4678, + "step": 26954 + }, + { + "epoch": 0.7401153212520593, + "grad_norm": 0.4078056812286377, + "learning_rate": 1.399516869018967e-05, + "loss": 0.5788, + "step": 26955 + }, + { + "epoch": 0.7401427786930258, + "grad_norm": 0.3330034911632538, + "learning_rate": 1.3994772758614612e-05, + "loss": 0.4323, + "step": 26956 + }, + { + "epoch": 0.7401702361339924, + "grad_norm": 0.39782974123954773, + "learning_rate": 1.3994376819588032e-05, + "loss": 0.5074, + "step": 26957 + }, + { + "epoch": 0.7401976935749588, + "grad_norm": 0.3972403407096863, + "learning_rate": 1.3993980873110658e-05, + "loss": 0.5037, + "step": 26958 + }, + { + "epoch": 0.7402251510159253, + "grad_norm": 0.4074924886226654, + "learning_rate": 1.3993584919183238e-05, + "loss": 0.6275, + "step": 26959 + }, + { + "epoch": 0.7402526084568918, + "grad_norm": 0.4455668032169342, + "learning_rate": 1.3993188957806509e-05, + "loss": 0.492, + "step": 26960 + }, + { + "epoch": 0.7402800658978583, + "grad_norm": 0.3915172517299652, + "learning_rate": 1.3992792988981206e-05, + "loss": 0.4534, + "step": 26961 + }, + { + "epoch": 0.7403075233388248, + "grad_norm": 0.41746827960014343, + "learning_rate": 1.399239701270807e-05, + "loss": 0.5172, + "step": 26962 + }, + { + "epoch": 0.7403349807797913, + "grad_norm": 0.38396212458610535, + "learning_rate": 1.3992001028987841e-05, + "loss": 0.5713, + "step": 26963 + }, + { + "epoch": 0.7403624382207579, + "grad_norm": 0.4991645812988281, + "learning_rate": 1.3991605037821255e-05, + "loss": 0.5662, + "step": 26964 + }, + { + "epoch": 0.7403898956617243, + "grad_norm": 0.4520252048969269, + "learning_rate": 1.3991209039209052e-05, + "loss": 0.5531, + "step": 26965 + }, + { + "epoch": 0.7404173531026909, + "grad_norm": 0.36058536171913147, + "learning_rate": 1.3990813033151969e-05, + "loss": 0.4766, + "step": 26966 + }, + { + "epoch": 0.7404448105436573, + "grad_norm": 0.38229936361312866, + "learning_rate": 1.3990417019650747e-05, + "loss": 0.521, + "step": 26967 + }, + { + "epoch": 0.7404722679846238, + "grad_norm": 0.46099817752838135, + "learning_rate": 1.3990020998706122e-05, + "loss": 0.5066, + "step": 26968 + }, + { + "epoch": 0.7404997254255903, + "grad_norm": 0.34967130422592163, + "learning_rate": 1.3989624970318836e-05, + "loss": 0.4916, + "step": 26969 + }, + { + "epoch": 0.7405271828665568, + "grad_norm": 0.37066882848739624, + "learning_rate": 1.3989228934489627e-05, + "loss": 0.522, + "step": 26970 + }, + { + "epoch": 0.7405546403075234, + "grad_norm": 0.3502979874610901, + "learning_rate": 1.398883289121923e-05, + "loss": 0.4554, + "step": 26971 + }, + { + "epoch": 0.7405820977484898, + "grad_norm": 0.37085363268852234, + "learning_rate": 1.398843684050839e-05, + "loss": 0.5997, + "step": 26972 + }, + { + "epoch": 0.7406095551894564, + "grad_norm": 0.3762287497520447, + "learning_rate": 1.3988040782357841e-05, + "loss": 0.4961, + "step": 26973 + }, + { + "epoch": 0.7406370126304228, + "grad_norm": 0.4316796064376831, + "learning_rate": 1.3987644716768321e-05, + "loss": 0.4849, + "step": 26974 + }, + { + "epoch": 0.7406644700713894, + "grad_norm": 0.3795869052410126, + "learning_rate": 1.3987248643740575e-05, + "loss": 0.531, + "step": 26975 + }, + { + "epoch": 0.7406919275123558, + "grad_norm": 0.45316386222839355, + "learning_rate": 1.3986852563275333e-05, + "loss": 0.6502, + "step": 26976 + }, + { + "epoch": 0.7407193849533223, + "grad_norm": 0.40596747398376465, + "learning_rate": 1.398645647537334e-05, + "loss": 0.4478, + "step": 26977 + }, + { + "epoch": 0.7407468423942889, + "grad_norm": 0.38838255405426025, + "learning_rate": 1.3986060380035335e-05, + "loss": 0.4654, + "step": 26978 + }, + { + "epoch": 0.7407742998352553, + "grad_norm": 0.3709757626056671, + "learning_rate": 1.3985664277262055e-05, + "loss": 0.5165, + "step": 26979 + }, + { + "epoch": 0.7408017572762219, + "grad_norm": 0.406777560710907, + "learning_rate": 1.3985268167054237e-05, + "loss": 0.4421, + "step": 26980 + }, + { + "epoch": 0.7408292147171883, + "grad_norm": 0.399940550327301, + "learning_rate": 1.3984872049412623e-05, + "loss": 0.5169, + "step": 26981 + }, + { + "epoch": 0.7408566721581549, + "grad_norm": 0.4080324172973633, + "learning_rate": 1.3984475924337951e-05, + "loss": 0.5316, + "step": 26982 + }, + { + "epoch": 0.7408841295991213, + "grad_norm": 0.36110150814056396, + "learning_rate": 1.3984079791830963e-05, + "loss": 0.501, + "step": 26983 + }, + { + "epoch": 0.7409115870400879, + "grad_norm": 0.34707656502723694, + "learning_rate": 1.3983683651892389e-05, + "loss": 0.5373, + "step": 26984 + }, + { + "epoch": 0.7409390444810544, + "grad_norm": 0.38079214096069336, + "learning_rate": 1.3983287504522976e-05, + "loss": 0.5472, + "step": 26985 + }, + { + "epoch": 0.7409665019220208, + "grad_norm": 0.37639397382736206, + "learning_rate": 1.398289134972346e-05, + "loss": 0.4688, + "step": 26986 + }, + { + "epoch": 0.7409939593629874, + "grad_norm": 0.4476366341114044, + "learning_rate": 1.398249518749458e-05, + "loss": 0.5715, + "step": 26987 + }, + { + "epoch": 0.7410214168039538, + "grad_norm": 0.37277206778526306, + "learning_rate": 1.3982099017837079e-05, + "loss": 0.5241, + "step": 26988 + }, + { + "epoch": 0.7410488742449204, + "grad_norm": 0.4106844663619995, + "learning_rate": 1.3981702840751687e-05, + "loss": 0.5212, + "step": 26989 + }, + { + "epoch": 0.7410763316858868, + "grad_norm": 0.5333192348480225, + "learning_rate": 1.3981306656239153e-05, + "loss": 0.6125, + "step": 26990 + }, + { + "epoch": 0.7411037891268534, + "grad_norm": 0.4036438465118408, + "learning_rate": 1.3980910464300208e-05, + "loss": 0.5118, + "step": 26991 + }, + { + "epoch": 0.7411312465678199, + "grad_norm": 0.38734087347984314, + "learning_rate": 1.3980514264935596e-05, + "loss": 0.5278, + "step": 26992 + }, + { + "epoch": 0.7411587040087864, + "grad_norm": 0.3706018030643463, + "learning_rate": 1.3980118058146055e-05, + "loss": 0.453, + "step": 26993 + }, + { + "epoch": 0.7411861614497529, + "grad_norm": 0.3623046278953552, + "learning_rate": 1.397972184393232e-05, + "loss": 0.4827, + "step": 26994 + }, + { + "epoch": 0.7412136188907194, + "grad_norm": 0.39502736926078796, + "learning_rate": 1.3979325622295139e-05, + "loss": 0.5576, + "step": 26995 + }, + { + "epoch": 0.7412410763316859, + "grad_norm": 0.3824015259742737, + "learning_rate": 1.3978929393235241e-05, + "loss": 0.6003, + "step": 26996 + }, + { + "epoch": 0.7412685337726523, + "grad_norm": 0.40936264395713806, + "learning_rate": 1.3978533156753373e-05, + "loss": 0.5178, + "step": 26997 + }, + { + "epoch": 0.7412959912136189, + "grad_norm": 0.39788392186164856, + "learning_rate": 1.3978136912850271e-05, + "loss": 0.5091, + "step": 26998 + }, + { + "epoch": 0.7413234486545854, + "grad_norm": 0.4086301624774933, + "learning_rate": 1.3977740661526672e-05, + "loss": 0.4624, + "step": 26999 + }, + { + "epoch": 0.7413509060955519, + "grad_norm": 0.38847866654396057, + "learning_rate": 1.3977344402783317e-05, + "loss": 0.4791, + "step": 27000 + }, + { + "epoch": 0.7413783635365184, + "grad_norm": 0.36396417021751404, + "learning_rate": 1.3976948136620947e-05, + "loss": 0.4861, + "step": 27001 + }, + { + "epoch": 0.7414058209774849, + "grad_norm": 0.4006044566631317, + "learning_rate": 1.3976551863040298e-05, + "loss": 0.5493, + "step": 27002 + }, + { + "epoch": 0.7414332784184514, + "grad_norm": 0.4012194573879242, + "learning_rate": 1.3976155582042112e-05, + "loss": 0.502, + "step": 27003 + }, + { + "epoch": 0.7414607358594179, + "grad_norm": 0.6170787811279297, + "learning_rate": 1.3975759293627124e-05, + "loss": 0.3974, + "step": 27004 + }, + { + "epoch": 0.7414881933003844, + "grad_norm": 0.4621216952800751, + "learning_rate": 1.397536299779608e-05, + "loss": 0.5754, + "step": 27005 + }, + { + "epoch": 0.741515650741351, + "grad_norm": 0.3639104664325714, + "learning_rate": 1.397496669454971e-05, + "loss": 0.5249, + "step": 27006 + }, + { + "epoch": 0.7415431081823174, + "grad_norm": 0.40076789259910583, + "learning_rate": 1.3974570383888764e-05, + "loss": 0.4694, + "step": 27007 + }, + { + "epoch": 0.7415705656232839, + "grad_norm": 0.3661685287952423, + "learning_rate": 1.3974174065813975e-05, + "loss": 0.4826, + "step": 27008 + }, + { + "epoch": 0.7415980230642504, + "grad_norm": 0.3940727114677429, + "learning_rate": 1.3973777740326078e-05, + "loss": 0.5932, + "step": 27009 + }, + { + "epoch": 0.7416254805052169, + "grad_norm": 0.5018472075462341, + "learning_rate": 1.3973381407425821e-05, + "loss": 0.5114, + "step": 27010 + }, + { + "epoch": 0.7416529379461834, + "grad_norm": 0.7577658891677856, + "learning_rate": 1.3972985067113938e-05, + "loss": 0.4143, + "step": 27011 + }, + { + "epoch": 0.7416803953871499, + "grad_norm": 0.34429749846458435, + "learning_rate": 1.3972588719391172e-05, + "loss": 0.5219, + "step": 27012 + }, + { + "epoch": 0.7417078528281165, + "grad_norm": 0.37247148156166077, + "learning_rate": 1.3972192364258258e-05, + "loss": 0.5266, + "step": 27013 + }, + { + "epoch": 0.7417353102690829, + "grad_norm": 0.3828836977481842, + "learning_rate": 1.3971796001715938e-05, + "loss": 0.4829, + "step": 27014 + }, + { + "epoch": 0.7417627677100495, + "grad_norm": 0.38407185673713684, + "learning_rate": 1.3971399631764952e-05, + "loss": 0.511, + "step": 27015 + }, + { + "epoch": 0.7417902251510159, + "grad_norm": 0.3723770081996918, + "learning_rate": 1.3971003254406038e-05, + "loss": 0.4689, + "step": 27016 + }, + { + "epoch": 0.7418176825919824, + "grad_norm": 0.34597229957580566, + "learning_rate": 1.3970606869639936e-05, + "loss": 0.4586, + "step": 27017 + }, + { + "epoch": 0.7418451400329489, + "grad_norm": 0.3791463375091553, + "learning_rate": 1.3970210477467381e-05, + "loss": 0.5616, + "step": 27018 + }, + { + "epoch": 0.7418725974739154, + "grad_norm": 0.5693525075912476, + "learning_rate": 1.3969814077889121e-05, + "loss": 0.5547, + "step": 27019 + }, + { + "epoch": 0.741900054914882, + "grad_norm": 0.6281541585922241, + "learning_rate": 1.3969417670905889e-05, + "loss": 0.5113, + "step": 27020 + }, + { + "epoch": 0.7419275123558484, + "grad_norm": 0.3776805102825165, + "learning_rate": 1.3969021256518424e-05, + "loss": 0.5099, + "step": 27021 + }, + { + "epoch": 0.741954969796815, + "grad_norm": 0.39486807584762573, + "learning_rate": 1.396862483472747e-05, + "loss": 0.5539, + "step": 27022 + }, + { + "epoch": 0.7419824272377814, + "grad_norm": 0.40166792273521423, + "learning_rate": 1.3968228405533764e-05, + "loss": 0.5669, + "step": 27023 + }, + { + "epoch": 0.742009884678748, + "grad_norm": 0.3637751340866089, + "learning_rate": 1.3967831968938046e-05, + "loss": 0.4277, + "step": 27024 + }, + { + "epoch": 0.7420373421197144, + "grad_norm": 0.3840588629245758, + "learning_rate": 1.3967435524941056e-05, + "loss": 0.5955, + "step": 27025 + }, + { + "epoch": 0.7420647995606809, + "grad_norm": 0.4896114766597748, + "learning_rate": 1.3967039073543528e-05, + "loss": 0.5564, + "step": 27026 + }, + { + "epoch": 0.7420922570016475, + "grad_norm": 0.4592808485031128, + "learning_rate": 1.3966642614746209e-05, + "loss": 0.5053, + "step": 27027 + }, + { + "epoch": 0.7421197144426139, + "grad_norm": 0.380420982837677, + "learning_rate": 1.3966246148549834e-05, + "loss": 0.5416, + "step": 27028 + }, + { + "epoch": 0.7421471718835805, + "grad_norm": 0.4022286534309387, + "learning_rate": 1.3965849674955148e-05, + "loss": 0.4983, + "step": 27029 + }, + { + "epoch": 0.7421746293245469, + "grad_norm": 0.40779808163642883, + "learning_rate": 1.3965453193962881e-05, + "loss": 0.5306, + "step": 27030 + }, + { + "epoch": 0.7422020867655135, + "grad_norm": 0.3884509801864624, + "learning_rate": 1.396505670557378e-05, + "loss": 0.3986, + "step": 27031 + }, + { + "epoch": 0.7422295442064799, + "grad_norm": 0.3529687225818634, + "learning_rate": 1.3964660209788585e-05, + "loss": 0.4739, + "step": 27032 + }, + { + "epoch": 0.7422570016474465, + "grad_norm": 0.3728959262371063, + "learning_rate": 1.3964263706608032e-05, + "loss": 0.4908, + "step": 27033 + }, + { + "epoch": 0.742284459088413, + "grad_norm": 0.3877026438713074, + "learning_rate": 1.3963867196032863e-05, + "loss": 0.4273, + "step": 27034 + }, + { + "epoch": 0.7423119165293794, + "grad_norm": 0.365773469209671, + "learning_rate": 1.3963470678063815e-05, + "loss": 0.5108, + "step": 27035 + }, + { + "epoch": 0.742339373970346, + "grad_norm": 0.4517700970172882, + "learning_rate": 1.396307415270163e-05, + "loss": 0.6046, + "step": 27036 + }, + { + "epoch": 0.7423668314113124, + "grad_norm": 0.3713710308074951, + "learning_rate": 1.3962677619947048e-05, + "loss": 0.5133, + "step": 27037 + }, + { + "epoch": 0.742394288852279, + "grad_norm": 0.4072043001651764, + "learning_rate": 1.3962281079800803e-05, + "loss": 0.5265, + "step": 27038 + }, + { + "epoch": 0.7424217462932454, + "grad_norm": 0.41244199872016907, + "learning_rate": 1.3961884532263646e-05, + "loss": 0.4787, + "step": 27039 + }, + { + "epoch": 0.742449203734212, + "grad_norm": 0.3624032735824585, + "learning_rate": 1.3961487977336306e-05, + "loss": 0.4867, + "step": 27040 + }, + { + "epoch": 0.7424766611751785, + "grad_norm": 0.34586301445961, + "learning_rate": 1.3961091415019525e-05, + "loss": 0.5436, + "step": 27041 + }, + { + "epoch": 0.742504118616145, + "grad_norm": 0.41804537177085876, + "learning_rate": 1.3960694845314047e-05, + "loss": 0.5208, + "step": 27042 + }, + { + "epoch": 0.7425315760571115, + "grad_norm": 0.4163196086883545, + "learning_rate": 1.3960298268220606e-05, + "loss": 0.5266, + "step": 27043 + }, + { + "epoch": 0.742559033498078, + "grad_norm": 0.38681161403656006, + "learning_rate": 1.3959901683739946e-05, + "loss": 0.4889, + "step": 27044 + }, + { + "epoch": 0.7425864909390445, + "grad_norm": 0.4204488694667816, + "learning_rate": 1.395950509187281e-05, + "loss": 0.5392, + "step": 27045 + }, + { + "epoch": 0.7426139483800109, + "grad_norm": 0.34840127825737, + "learning_rate": 1.3959108492619925e-05, + "loss": 0.4505, + "step": 27046 + }, + { + "epoch": 0.7426414058209775, + "grad_norm": 0.3483383357524872, + "learning_rate": 1.3958711885982045e-05, + "loss": 0.4448, + "step": 27047 + }, + { + "epoch": 0.742668863261944, + "grad_norm": 0.4208407998085022, + "learning_rate": 1.3958315271959903e-05, + "loss": 0.5637, + "step": 27048 + }, + { + "epoch": 0.7426963207029105, + "grad_norm": 0.445615291595459, + "learning_rate": 1.395791865055424e-05, + "loss": 0.553, + "step": 27049 + }, + { + "epoch": 0.742723778143877, + "grad_norm": 0.45298632979393005, + "learning_rate": 1.3957522021765794e-05, + "loss": 0.5358, + "step": 27050 + }, + { + "epoch": 0.7427512355848435, + "grad_norm": 0.3946841061115265, + "learning_rate": 1.3957125385595307e-05, + "loss": 0.4639, + "step": 27051 + }, + { + "epoch": 0.74277869302581, + "grad_norm": 0.37979593873023987, + "learning_rate": 1.395672874204352e-05, + "loss": 0.5293, + "step": 27052 + }, + { + "epoch": 0.7428061504667764, + "grad_norm": 0.413076788187027, + "learning_rate": 1.3956332091111168e-05, + "loss": 0.4377, + "step": 27053 + }, + { + "epoch": 0.742833607907743, + "grad_norm": 0.36655354499816895, + "learning_rate": 1.3955935432798995e-05, + "loss": 0.5263, + "step": 27054 + }, + { + "epoch": 0.7428610653487095, + "grad_norm": 0.39434880018234253, + "learning_rate": 1.395553876710774e-05, + "loss": 0.533, + "step": 27055 + }, + { + "epoch": 0.742888522789676, + "grad_norm": 0.38567689061164856, + "learning_rate": 1.3955142094038141e-05, + "loss": 0.533, + "step": 27056 + }, + { + "epoch": 0.7429159802306425, + "grad_norm": 0.5730993151664734, + "learning_rate": 1.3954745413590942e-05, + "loss": 0.4025, + "step": 27057 + }, + { + "epoch": 0.742943437671609, + "grad_norm": 0.3477962017059326, + "learning_rate": 1.3954348725766882e-05, + "loss": 0.4427, + "step": 27058 + }, + { + "epoch": 0.7429708951125755, + "grad_norm": 0.39207765460014343, + "learning_rate": 1.3953952030566696e-05, + "loss": 0.4823, + "step": 27059 + }, + { + "epoch": 0.742998352553542, + "grad_norm": 0.4089158773422241, + "learning_rate": 1.3953555327991128e-05, + "loss": 0.505, + "step": 27060 + }, + { + "epoch": 0.7430258099945085, + "grad_norm": 0.6506774425506592, + "learning_rate": 1.3953158618040918e-05, + "loss": 0.4924, + "step": 27061 + }, + { + "epoch": 0.7430532674354751, + "grad_norm": 0.40461763739585876, + "learning_rate": 1.3952761900716807e-05, + "loss": 0.5104, + "step": 27062 + }, + { + "epoch": 0.7430807248764415, + "grad_norm": 0.40663713216781616, + "learning_rate": 1.3952365176019533e-05, + "loss": 0.5684, + "step": 27063 + }, + { + "epoch": 0.743108182317408, + "grad_norm": 0.4110662341117859, + "learning_rate": 1.3951968443949834e-05, + "loss": 0.501, + "step": 27064 + }, + { + "epoch": 0.7431356397583745, + "grad_norm": 0.43960919976234436, + "learning_rate": 1.3951571704508457e-05, + "loss": 0.6208, + "step": 27065 + }, + { + "epoch": 0.743163097199341, + "grad_norm": 0.41747182607650757, + "learning_rate": 1.3951174957696135e-05, + "loss": 0.4687, + "step": 27066 + }, + { + "epoch": 0.7431905546403075, + "grad_norm": 0.3488908112049103, + "learning_rate": 1.3950778203513608e-05, + "loss": 0.4216, + "step": 27067 + }, + { + "epoch": 0.743218012081274, + "grad_norm": 0.3998335003852844, + "learning_rate": 1.3950381441961623e-05, + "loss": 0.4383, + "step": 27068 + }, + { + "epoch": 0.7432454695222406, + "grad_norm": 0.36452800035476685, + "learning_rate": 1.3949984673040913e-05, + "loss": 0.4885, + "step": 27069 + }, + { + "epoch": 0.743272926963207, + "grad_norm": 0.40931764245033264, + "learning_rate": 1.3949587896752223e-05, + "loss": 0.5214, + "step": 27070 + }, + { + "epoch": 0.7433003844041736, + "grad_norm": 0.36774489283561707, + "learning_rate": 1.3949191113096292e-05, + "loss": 0.5405, + "step": 27071 + }, + { + "epoch": 0.74332784184514, + "grad_norm": 0.39834725856781006, + "learning_rate": 1.3948794322073856e-05, + "loss": 0.4999, + "step": 27072 + }, + { + "epoch": 0.7433552992861066, + "grad_norm": 0.43503931164741516, + "learning_rate": 1.394839752368566e-05, + "loss": 0.5113, + "step": 27073 + }, + { + "epoch": 0.743382756727073, + "grad_norm": 0.40426960587501526, + "learning_rate": 1.394800071793244e-05, + "loss": 0.4931, + "step": 27074 + }, + { + "epoch": 0.7434102141680395, + "grad_norm": 0.3584212064743042, + "learning_rate": 1.3947603904814941e-05, + "loss": 0.424, + "step": 27075 + }, + { + "epoch": 0.7434376716090061, + "grad_norm": 0.4627923369407654, + "learning_rate": 1.3947207084333902e-05, + "loss": 0.4763, + "step": 27076 + }, + { + "epoch": 0.7434651290499725, + "grad_norm": 0.431939959526062, + "learning_rate": 1.394681025649006e-05, + "loss": 0.4371, + "step": 27077 + }, + { + "epoch": 0.7434925864909391, + "grad_norm": 0.37147459387779236, + "learning_rate": 1.3946413421284157e-05, + "loss": 0.4794, + "step": 27078 + }, + { + "epoch": 0.7435200439319055, + "grad_norm": 0.374906450510025, + "learning_rate": 1.3946016578716932e-05, + "loss": 0.4879, + "step": 27079 + }, + { + "epoch": 0.7435475013728721, + "grad_norm": 0.38800692558288574, + "learning_rate": 1.3945619728789131e-05, + "loss": 0.5119, + "step": 27080 + }, + { + "epoch": 0.7435749588138385, + "grad_norm": 0.4813782572746277, + "learning_rate": 1.3945222871501486e-05, + "loss": 0.5977, + "step": 27081 + }, + { + "epoch": 0.743602416254805, + "grad_norm": 0.3642158508300781, + "learning_rate": 1.3944826006854743e-05, + "loss": 0.4433, + "step": 27082 + }, + { + "epoch": 0.7436298736957716, + "grad_norm": 0.3974789083003998, + "learning_rate": 1.394442913484964e-05, + "loss": 0.529, + "step": 27083 + }, + { + "epoch": 0.743657331136738, + "grad_norm": 0.37380459904670715, + "learning_rate": 1.3944032255486919e-05, + "loss": 0.4875, + "step": 27084 + }, + { + "epoch": 0.7436847885777046, + "grad_norm": 0.3741873502731323, + "learning_rate": 1.3943635368767317e-05, + "loss": 0.5714, + "step": 27085 + }, + { + "epoch": 0.743712246018671, + "grad_norm": 0.3588683307170868, + "learning_rate": 1.3943238474691578e-05, + "loss": 0.4343, + "step": 27086 + }, + { + "epoch": 0.7437397034596376, + "grad_norm": 0.37900128960609436, + "learning_rate": 1.3942841573260439e-05, + "loss": 0.4288, + "step": 27087 + }, + { + "epoch": 0.743767160900604, + "grad_norm": 0.3489559292793274, + "learning_rate": 1.3942444664474644e-05, + "loss": 0.4004, + "step": 27088 + }, + { + "epoch": 0.7437946183415706, + "grad_norm": 0.4358506500720978, + "learning_rate": 1.3942047748334932e-05, + "loss": 0.3661, + "step": 27089 + }, + { + "epoch": 0.7438220757825371, + "grad_norm": 0.3340131640434265, + "learning_rate": 1.3941650824842038e-05, + "loss": 0.4497, + "step": 27090 + }, + { + "epoch": 0.7438495332235036, + "grad_norm": 0.36977794766426086, + "learning_rate": 1.394125389399671e-05, + "loss": 0.4955, + "step": 27091 + }, + { + "epoch": 0.7438769906644701, + "grad_norm": 0.45542672276496887, + "learning_rate": 1.394085695579969e-05, + "loss": 0.4593, + "step": 27092 + }, + { + "epoch": 0.7439044481054365, + "grad_norm": 0.47883540391921997, + "learning_rate": 1.3940460010251708e-05, + "loss": 0.5406, + "step": 27093 + }, + { + "epoch": 0.7439319055464031, + "grad_norm": 0.4001176655292511, + "learning_rate": 1.3940063057353515e-05, + "loss": 0.5008, + "step": 27094 + }, + { + "epoch": 0.7439593629873695, + "grad_norm": 0.41670510172843933, + "learning_rate": 1.3939666097105843e-05, + "loss": 0.5625, + "step": 27095 + }, + { + "epoch": 0.7439868204283361, + "grad_norm": 0.371585488319397, + "learning_rate": 1.3939269129509438e-05, + "loss": 0.4543, + "step": 27096 + }, + { + "epoch": 0.7440142778693026, + "grad_norm": 0.3722008168697357, + "learning_rate": 1.3938872154565037e-05, + "loss": 0.5741, + "step": 27097 + }, + { + "epoch": 0.7440417353102691, + "grad_norm": 0.4285443127155304, + "learning_rate": 1.3938475172273385e-05, + "loss": 0.517, + "step": 27098 + }, + { + "epoch": 0.7440691927512356, + "grad_norm": 0.38197964429855347, + "learning_rate": 1.3938078182635217e-05, + "loss": 0.4778, + "step": 27099 + }, + { + "epoch": 0.7440966501922021, + "grad_norm": 0.35393184423446655, + "learning_rate": 1.3937681185651277e-05, + "loss": 0.4518, + "step": 27100 + }, + { + "epoch": 0.7441241076331686, + "grad_norm": 0.4424516558647156, + "learning_rate": 1.3937284181322308e-05, + "loss": 0.4988, + "step": 27101 + }, + { + "epoch": 0.744151565074135, + "grad_norm": 0.3476366102695465, + "learning_rate": 1.3936887169649046e-05, + "loss": 0.4822, + "step": 27102 + }, + { + "epoch": 0.7441790225151016, + "grad_norm": 0.4049743413925171, + "learning_rate": 1.3936490150632232e-05, + "loss": 0.4526, + "step": 27103 + }, + { + "epoch": 0.7442064799560681, + "grad_norm": 0.379192590713501, + "learning_rate": 1.3936093124272608e-05, + "loss": 0.482, + "step": 27104 + }, + { + "epoch": 0.7442339373970346, + "grad_norm": 0.41520941257476807, + "learning_rate": 1.3935696090570909e-05, + "loss": 0.4772, + "step": 27105 + }, + { + "epoch": 0.7442613948380011, + "grad_norm": 0.41623514890670776, + "learning_rate": 1.393529904952789e-05, + "loss": 0.4814, + "step": 27106 + }, + { + "epoch": 0.7442888522789676, + "grad_norm": 0.38903263211250305, + "learning_rate": 1.3934902001144277e-05, + "loss": 0.5082, + "step": 27107 + }, + { + "epoch": 0.7443163097199341, + "grad_norm": 0.3993878662586212, + "learning_rate": 1.3934504945420814e-05, + "loss": 0.5242, + "step": 27108 + }, + { + "epoch": 0.7443437671609006, + "grad_norm": 0.3858012557029724, + "learning_rate": 1.3934107882358245e-05, + "loss": 0.4838, + "step": 27109 + }, + { + "epoch": 0.7443712246018671, + "grad_norm": 0.3855363428592682, + "learning_rate": 1.393371081195731e-05, + "loss": 0.4414, + "step": 27110 + }, + { + "epoch": 0.7443986820428337, + "grad_norm": 0.3909245729446411, + "learning_rate": 1.393331373421875e-05, + "loss": 0.4464, + "step": 27111 + }, + { + "epoch": 0.7444261394838001, + "grad_norm": 0.4376932382583618, + "learning_rate": 1.3932916649143305e-05, + "loss": 0.5515, + "step": 27112 + }, + { + "epoch": 0.7444535969247666, + "grad_norm": 0.5710103511810303, + "learning_rate": 1.3932519556731711e-05, + "loss": 0.4287, + "step": 27113 + }, + { + "epoch": 0.7444810543657331, + "grad_norm": 0.3659098744392395, + "learning_rate": 1.3932122456984717e-05, + "loss": 0.4851, + "step": 27114 + }, + { + "epoch": 0.7445085118066996, + "grad_norm": 0.523100733757019, + "learning_rate": 1.3931725349903059e-05, + "loss": 0.4958, + "step": 27115 + }, + { + "epoch": 0.7445359692476661, + "grad_norm": 0.3838779926300049, + "learning_rate": 1.3931328235487475e-05, + "loss": 0.479, + "step": 27116 + }, + { + "epoch": 0.7445634266886326, + "grad_norm": 0.3862573206424713, + "learning_rate": 1.3930931113738716e-05, + "loss": 0.4632, + "step": 27117 + }, + { + "epoch": 0.7445908841295992, + "grad_norm": 0.4086238443851471, + "learning_rate": 1.3930533984657509e-05, + "loss": 0.5311, + "step": 27118 + }, + { + "epoch": 0.7446183415705656, + "grad_norm": 0.4274047911167145, + "learning_rate": 1.3930136848244605e-05, + "loss": 0.4777, + "step": 27119 + }, + { + "epoch": 0.7446457990115322, + "grad_norm": 0.46751153469085693, + "learning_rate": 1.392973970450074e-05, + "loss": 0.5909, + "step": 27120 + }, + { + "epoch": 0.7446732564524986, + "grad_norm": 0.3500654995441437, + "learning_rate": 1.3929342553426658e-05, + "loss": 0.444, + "step": 27121 + }, + { + "epoch": 0.7447007138934651, + "grad_norm": 0.40573498606681824, + "learning_rate": 1.3928945395023096e-05, + "loss": 0.508, + "step": 27122 + }, + { + "epoch": 0.7447281713344316, + "grad_norm": 0.3551639914512634, + "learning_rate": 1.3928548229290797e-05, + "loss": 0.5032, + "step": 27123 + }, + { + "epoch": 0.7447556287753981, + "grad_norm": 0.38197723031044006, + "learning_rate": 1.3928151056230502e-05, + "loss": 0.5257, + "step": 27124 + }, + { + "epoch": 0.7447830862163647, + "grad_norm": 0.38350850343704224, + "learning_rate": 1.3927753875842952e-05, + "loss": 0.5198, + "step": 27125 + }, + { + "epoch": 0.7448105436573311, + "grad_norm": 0.3775874078273773, + "learning_rate": 1.3927356688128888e-05, + "loss": 0.5118, + "step": 27126 + }, + { + "epoch": 0.7448380010982977, + "grad_norm": 0.39955732226371765, + "learning_rate": 1.392695949308905e-05, + "loss": 0.4732, + "step": 27127 + }, + { + "epoch": 0.7448654585392641, + "grad_norm": 0.3672311007976532, + "learning_rate": 1.3926562290724177e-05, + "loss": 0.4945, + "step": 27128 + }, + { + "epoch": 0.7448929159802307, + "grad_norm": 0.3316587209701538, + "learning_rate": 1.3926165081035015e-05, + "loss": 0.439, + "step": 27129 + }, + { + "epoch": 0.7449203734211971, + "grad_norm": 0.3736424446105957, + "learning_rate": 1.3925767864022302e-05, + "loss": 0.453, + "step": 27130 + }, + { + "epoch": 0.7449478308621637, + "grad_norm": 0.4106175899505615, + "learning_rate": 1.3925370639686776e-05, + "loss": 0.4689, + "step": 27131 + }, + { + "epoch": 0.7449752883031302, + "grad_norm": 0.40685001015663147, + "learning_rate": 1.3924973408029185e-05, + "loss": 0.5874, + "step": 27132 + }, + { + "epoch": 0.7450027457440966, + "grad_norm": 0.34905654191970825, + "learning_rate": 1.3924576169050266e-05, + "loss": 0.5361, + "step": 27133 + }, + { + "epoch": 0.7450302031850632, + "grad_norm": 0.37440919876098633, + "learning_rate": 1.3924178922750755e-05, + "loss": 0.4592, + "step": 27134 + }, + { + "epoch": 0.7450576606260296, + "grad_norm": 0.36881229281425476, + "learning_rate": 1.3923781669131402e-05, + "loss": 0.4391, + "step": 27135 + }, + { + "epoch": 0.7450851180669962, + "grad_norm": 0.38177669048309326, + "learning_rate": 1.3923384408192942e-05, + "loss": 0.5695, + "step": 27136 + }, + { + "epoch": 0.7451125755079626, + "grad_norm": 0.3537244498729706, + "learning_rate": 1.3922987139936117e-05, + "loss": 0.4993, + "step": 27137 + }, + { + "epoch": 0.7451400329489292, + "grad_norm": 0.4073982238769531, + "learning_rate": 1.392258986436167e-05, + "loss": 0.5166, + "step": 27138 + }, + { + "epoch": 0.7451674903898957, + "grad_norm": 0.4546234607696533, + "learning_rate": 1.3922192581470343e-05, + "loss": 0.4864, + "step": 27139 + }, + { + "epoch": 0.7451949478308622, + "grad_norm": 0.36803680658340454, + "learning_rate": 1.3921795291262871e-05, + "loss": 0.5067, + "step": 27140 + }, + { + "epoch": 0.7452224052718287, + "grad_norm": 0.410756915807724, + "learning_rate": 1.3921397993740003e-05, + "loss": 0.5143, + "step": 27141 + }, + { + "epoch": 0.7452498627127951, + "grad_norm": 0.35265594720840454, + "learning_rate": 1.3921000688902475e-05, + "loss": 0.4577, + "step": 27142 + }, + { + "epoch": 0.7452773201537617, + "grad_norm": 0.4021552801132202, + "learning_rate": 1.3920603376751029e-05, + "loss": 0.5464, + "step": 27143 + }, + { + "epoch": 0.7453047775947281, + "grad_norm": 0.44017112255096436, + "learning_rate": 1.3920206057286404e-05, + "loss": 0.5355, + "step": 27144 + }, + { + "epoch": 0.7453322350356947, + "grad_norm": 0.4080444872379303, + "learning_rate": 1.3919808730509346e-05, + "loss": 0.496, + "step": 27145 + }, + { + "epoch": 0.7453596924766612, + "grad_norm": 0.3905821442604065, + "learning_rate": 1.3919411396420595e-05, + "loss": 0.4314, + "step": 27146 + }, + { + "epoch": 0.7453871499176277, + "grad_norm": 0.36063605546951294, + "learning_rate": 1.3919014055020887e-05, + "loss": 0.4612, + "step": 27147 + }, + { + "epoch": 0.7454146073585942, + "grad_norm": 0.4225355088710785, + "learning_rate": 1.391861670631097e-05, + "loss": 0.4897, + "step": 27148 + }, + { + "epoch": 0.7454420647995607, + "grad_norm": 0.39107781648635864, + "learning_rate": 1.391821935029158e-05, + "loss": 0.5227, + "step": 27149 + }, + { + "epoch": 0.7454695222405272, + "grad_norm": 0.36338916420936584, + "learning_rate": 1.3917821986963463e-05, + "loss": 0.4211, + "step": 27150 + }, + { + "epoch": 0.7454969796814936, + "grad_norm": 0.46212705969810486, + "learning_rate": 1.3917424616327358e-05, + "loss": 0.5134, + "step": 27151 + }, + { + "epoch": 0.7455244371224602, + "grad_norm": 0.4817342460155487, + "learning_rate": 1.3917027238384002e-05, + "loss": 0.4462, + "step": 27152 + }, + { + "epoch": 0.7455518945634267, + "grad_norm": 0.37376728653907776, + "learning_rate": 1.3916629853134143e-05, + "loss": 0.4296, + "step": 27153 + }, + { + "epoch": 0.7455793520043932, + "grad_norm": 0.3929413855075836, + "learning_rate": 1.3916232460578515e-05, + "loss": 0.5084, + "step": 27154 + }, + { + "epoch": 0.7456068094453597, + "grad_norm": 0.4164959490299225, + "learning_rate": 1.391583506071787e-05, + "loss": 0.4763, + "step": 27155 + }, + { + "epoch": 0.7456342668863262, + "grad_norm": 0.3522765040397644, + "learning_rate": 1.391543765355294e-05, + "loss": 0.4968, + "step": 27156 + }, + { + "epoch": 0.7456617243272927, + "grad_norm": 0.417506605386734, + "learning_rate": 1.3915040239084468e-05, + "loss": 0.5443, + "step": 27157 + }, + { + "epoch": 0.7456891817682592, + "grad_norm": 0.37671589851379395, + "learning_rate": 1.3914642817313198e-05, + "loss": 0.5092, + "step": 27158 + }, + { + "epoch": 0.7457166392092257, + "grad_norm": 0.4278164207935333, + "learning_rate": 1.3914245388239867e-05, + "loss": 0.5458, + "step": 27159 + }, + { + "epoch": 0.7457440966501923, + "grad_norm": 0.38732007145881653, + "learning_rate": 1.3913847951865222e-05, + "loss": 0.5723, + "step": 27160 + }, + { + "epoch": 0.7457715540911587, + "grad_norm": 0.4173907935619354, + "learning_rate": 1.391345050819e-05, + "loss": 0.5911, + "step": 27161 + }, + { + "epoch": 0.7457990115321252, + "grad_norm": 0.3393164277076721, + "learning_rate": 1.3913053057214945e-05, + "loss": 0.5143, + "step": 27162 + }, + { + "epoch": 0.7458264689730917, + "grad_norm": 0.4084787964820862, + "learning_rate": 1.3912655598940797e-05, + "loss": 0.5384, + "step": 27163 + }, + { + "epoch": 0.7458539264140582, + "grad_norm": 0.35454392433166504, + "learning_rate": 1.3912258133368298e-05, + "loss": 0.4794, + "step": 27164 + }, + { + "epoch": 0.7458813838550247, + "grad_norm": 0.37580403685569763, + "learning_rate": 1.3911860660498186e-05, + "loss": 0.4753, + "step": 27165 + }, + { + "epoch": 0.7459088412959912, + "grad_norm": 0.3833920955657959, + "learning_rate": 1.3911463180331208e-05, + "loss": 0.5385, + "step": 27166 + }, + { + "epoch": 0.7459362987369578, + "grad_norm": 0.47670644521713257, + "learning_rate": 1.39110656928681e-05, + "loss": 0.4627, + "step": 27167 + }, + { + "epoch": 0.7459637561779242, + "grad_norm": 0.40480056405067444, + "learning_rate": 1.391066819810961e-05, + "loss": 0.4288, + "step": 27168 + }, + { + "epoch": 0.7459912136188908, + "grad_norm": 0.3918590843677521, + "learning_rate": 1.3910270696056474e-05, + "loss": 0.5279, + "step": 27169 + }, + { + "epoch": 0.7460186710598572, + "grad_norm": 0.3651442229747772, + "learning_rate": 1.3909873186709435e-05, + "loss": 0.5582, + "step": 27170 + }, + { + "epoch": 0.7460461285008237, + "grad_norm": 0.3872045874595642, + "learning_rate": 1.3909475670069234e-05, + "loss": 0.4849, + "step": 27171 + }, + { + "epoch": 0.7460735859417902, + "grad_norm": 0.38970157504081726, + "learning_rate": 1.3909078146136613e-05, + "loss": 0.5024, + "step": 27172 + }, + { + "epoch": 0.7461010433827567, + "grad_norm": 0.4299333989620209, + "learning_rate": 1.3908680614912316e-05, + "loss": 0.4279, + "step": 27173 + }, + { + "epoch": 0.7461285008237233, + "grad_norm": 0.3810559809207916, + "learning_rate": 1.390828307639708e-05, + "loss": 0.5369, + "step": 27174 + }, + { + "epoch": 0.7461559582646897, + "grad_norm": 0.4064956605434418, + "learning_rate": 1.3907885530591647e-05, + "loss": 0.4505, + "step": 27175 + }, + { + "epoch": 0.7461834157056563, + "grad_norm": 0.4144935607910156, + "learning_rate": 1.3907487977496765e-05, + "loss": 0.4898, + "step": 27176 + }, + { + "epoch": 0.7462108731466227, + "grad_norm": 0.8397083282470703, + "learning_rate": 1.3907090417113164e-05, + "loss": 0.4445, + "step": 27177 + }, + { + "epoch": 0.7462383305875893, + "grad_norm": 0.47813528776168823, + "learning_rate": 1.3906692849441599e-05, + "loss": 0.4286, + "step": 27178 + }, + { + "epoch": 0.7462657880285557, + "grad_norm": 0.39944225549697876, + "learning_rate": 1.3906295274482804e-05, + "loss": 0.4939, + "step": 27179 + }, + { + "epoch": 0.7462932454695222, + "grad_norm": 0.3795377314090729, + "learning_rate": 1.3905897692237517e-05, + "loss": 0.5604, + "step": 27180 + }, + { + "epoch": 0.7463207029104888, + "grad_norm": 0.4075329899787903, + "learning_rate": 1.3905500102706491e-05, + "loss": 0.4266, + "step": 27181 + }, + { + "epoch": 0.7463481603514552, + "grad_norm": 0.3287191092967987, + "learning_rate": 1.3905102505890455e-05, + "loss": 0.4576, + "step": 27182 + }, + { + "epoch": 0.7463756177924218, + "grad_norm": 0.46525388956069946, + "learning_rate": 1.3904704901790158e-05, + "loss": 0.5392, + "step": 27183 + }, + { + "epoch": 0.7464030752333882, + "grad_norm": 0.3898729979991913, + "learning_rate": 1.3904307290406343e-05, + "loss": 0.525, + "step": 27184 + }, + { + "epoch": 0.7464305326743548, + "grad_norm": 0.45892661809921265, + "learning_rate": 1.3903909671739744e-05, + "loss": 0.4987, + "step": 27185 + }, + { + "epoch": 0.7464579901153212, + "grad_norm": 0.4826240837574005, + "learning_rate": 1.390351204579111e-05, + "loss": 0.5486, + "step": 27186 + }, + { + "epoch": 0.7464854475562878, + "grad_norm": 0.38185274600982666, + "learning_rate": 1.390311441256118e-05, + "loss": 0.4884, + "step": 27187 + }, + { + "epoch": 0.7465129049972542, + "grad_norm": 0.6604644060134888, + "learning_rate": 1.3902716772050698e-05, + "loss": 0.4779, + "step": 27188 + }, + { + "epoch": 0.7465403624382208, + "grad_norm": 0.4946640133857727, + "learning_rate": 1.3902319124260402e-05, + "loss": 0.5538, + "step": 27189 + }, + { + "epoch": 0.7465678198791873, + "grad_norm": 0.4191156029701233, + "learning_rate": 1.3901921469191034e-05, + "loss": 0.4597, + "step": 27190 + }, + { + "epoch": 0.7465952773201537, + "grad_norm": 0.42399370670318604, + "learning_rate": 1.3901523806843338e-05, + "loss": 0.4322, + "step": 27191 + }, + { + "epoch": 0.7466227347611203, + "grad_norm": 0.3724319636821747, + "learning_rate": 1.3901126137218053e-05, + "loss": 0.582, + "step": 27192 + }, + { + "epoch": 0.7466501922020867, + "grad_norm": 0.4776276648044586, + "learning_rate": 1.3900728460315927e-05, + "loss": 0.4866, + "step": 27193 + }, + { + "epoch": 0.7466776496430533, + "grad_norm": 0.3981255888938904, + "learning_rate": 1.3900330776137694e-05, + "loss": 0.5099, + "step": 27194 + }, + { + "epoch": 0.7467051070840197, + "grad_norm": 0.3810243010520935, + "learning_rate": 1.38999330846841e-05, + "loss": 0.5041, + "step": 27195 + }, + { + "epoch": 0.7467325645249863, + "grad_norm": 0.33832311630249023, + "learning_rate": 1.3899535385955887e-05, + "loss": 0.4311, + "step": 27196 + }, + { + "epoch": 0.7467600219659528, + "grad_norm": 0.3368035554885864, + "learning_rate": 1.3899137679953794e-05, + "loss": 0.4817, + "step": 27197 + }, + { + "epoch": 0.7467874794069193, + "grad_norm": 0.3735591173171997, + "learning_rate": 1.3898739966678567e-05, + "loss": 0.4288, + "step": 27198 + }, + { + "epoch": 0.7468149368478858, + "grad_norm": 0.35658788681030273, + "learning_rate": 1.3898342246130944e-05, + "loss": 0.4453, + "step": 27199 + }, + { + "epoch": 0.7468423942888522, + "grad_norm": 0.5987304449081421, + "learning_rate": 1.389794451831167e-05, + "loss": 0.584, + "step": 27200 + }, + { + "epoch": 0.7468698517298188, + "grad_norm": 0.47466161847114563, + "learning_rate": 1.3897546783221484e-05, + "loss": 0.4885, + "step": 27201 + }, + { + "epoch": 0.7468973091707852, + "grad_norm": 0.4351346492767334, + "learning_rate": 1.389714904086113e-05, + "loss": 0.5053, + "step": 27202 + }, + { + "epoch": 0.7469247666117518, + "grad_norm": 0.37689408659935, + "learning_rate": 1.389675129123135e-05, + "loss": 0.4893, + "step": 27203 + }, + { + "epoch": 0.7469522240527183, + "grad_norm": 0.3708382844924927, + "learning_rate": 1.3896353534332882e-05, + "loss": 0.5026, + "step": 27204 + }, + { + "epoch": 0.7469796814936848, + "grad_norm": 0.3707188069820404, + "learning_rate": 1.3895955770166477e-05, + "loss": 0.4612, + "step": 27205 + }, + { + "epoch": 0.7470071389346513, + "grad_norm": 0.3584029972553253, + "learning_rate": 1.3895557998732867e-05, + "loss": 0.4551, + "step": 27206 + }, + { + "epoch": 0.7470345963756178, + "grad_norm": 0.40067678689956665, + "learning_rate": 1.3895160220032798e-05, + "loss": 0.5111, + "step": 27207 + }, + { + "epoch": 0.7470620538165843, + "grad_norm": 0.358562707901001, + "learning_rate": 1.3894762434067013e-05, + "loss": 0.4244, + "step": 27208 + }, + { + "epoch": 0.7470895112575507, + "grad_norm": 0.38155868649482727, + "learning_rate": 1.3894364640836251e-05, + "loss": 0.5691, + "step": 27209 + }, + { + "epoch": 0.7471169686985173, + "grad_norm": 0.3841138780117035, + "learning_rate": 1.3893966840341258e-05, + "loss": 0.482, + "step": 27210 + }, + { + "epoch": 0.7471444261394838, + "grad_norm": 0.3982864022254944, + "learning_rate": 1.3893569032582773e-05, + "loss": 0.4275, + "step": 27211 + }, + { + "epoch": 0.7471718835804503, + "grad_norm": 0.3701251447200775, + "learning_rate": 1.3893171217561539e-05, + "loss": 0.4292, + "step": 27212 + }, + { + "epoch": 0.7471993410214168, + "grad_norm": 0.37015044689178467, + "learning_rate": 1.3892773395278299e-05, + "loss": 0.5336, + "step": 27213 + }, + { + "epoch": 0.7472267984623833, + "grad_norm": 0.39199334383010864, + "learning_rate": 1.3892375565733793e-05, + "loss": 0.4395, + "step": 27214 + }, + { + "epoch": 0.7472542559033498, + "grad_norm": 0.3883916437625885, + "learning_rate": 1.3891977728928767e-05, + "loss": 0.4065, + "step": 27215 + }, + { + "epoch": 0.7472817133443163, + "grad_norm": 0.3810133635997772, + "learning_rate": 1.3891579884863958e-05, + "loss": 0.4793, + "step": 27216 + }, + { + "epoch": 0.7473091707852828, + "grad_norm": 0.3558897376060486, + "learning_rate": 1.3891182033540108e-05, + "loss": 0.4632, + "step": 27217 + }, + { + "epoch": 0.7473366282262494, + "grad_norm": 0.39683258533477783, + "learning_rate": 1.3890784174957968e-05, + "loss": 0.4542, + "step": 27218 + }, + { + "epoch": 0.7473640856672158, + "grad_norm": 0.37866291403770447, + "learning_rate": 1.3890386309118269e-05, + "loss": 0.6002, + "step": 27219 + }, + { + "epoch": 0.7473915431081823, + "grad_norm": 0.3932018578052521, + "learning_rate": 1.388998843602176e-05, + "loss": 0.5608, + "step": 27220 + }, + { + "epoch": 0.7474190005491488, + "grad_norm": 0.355747252702713, + "learning_rate": 1.388959055566918e-05, + "loss": 0.4884, + "step": 27221 + }, + { + "epoch": 0.7474464579901153, + "grad_norm": 0.41602379083633423, + "learning_rate": 1.3889192668061271e-05, + "loss": 0.4603, + "step": 27222 + }, + { + "epoch": 0.7474739154310818, + "grad_norm": 0.38681963086128235, + "learning_rate": 1.388879477319878e-05, + "loss": 0.4662, + "step": 27223 + }, + { + "epoch": 0.7475013728720483, + "grad_norm": 0.41736915707588196, + "learning_rate": 1.3888396871082442e-05, + "loss": 0.4665, + "step": 27224 + }, + { + "epoch": 0.7475288303130149, + "grad_norm": 0.3702283799648285, + "learning_rate": 1.3887998961713005e-05, + "loss": 0.4888, + "step": 27225 + }, + { + "epoch": 0.7475562877539813, + "grad_norm": 0.5274899005889893, + "learning_rate": 1.3887601045091208e-05, + "loss": 0.5581, + "step": 27226 + }, + { + "epoch": 0.7475837451949479, + "grad_norm": 0.33664631843566895, + "learning_rate": 1.3887203121217794e-05, + "loss": 0.4297, + "step": 27227 + }, + { + "epoch": 0.7476112026359143, + "grad_norm": 0.39843234419822693, + "learning_rate": 1.3886805190093507e-05, + "loss": 0.4826, + "step": 27228 + }, + { + "epoch": 0.7476386600768808, + "grad_norm": 0.3743366599082947, + "learning_rate": 1.3886407251719085e-05, + "loss": 0.4856, + "step": 27229 + }, + { + "epoch": 0.7476661175178473, + "grad_norm": 0.4379758834838867, + "learning_rate": 1.3886009306095276e-05, + "loss": 0.4451, + "step": 27230 + }, + { + "epoch": 0.7476935749588138, + "grad_norm": 0.3881988823413849, + "learning_rate": 1.3885611353222818e-05, + "loss": 0.451, + "step": 27231 + }, + { + "epoch": 0.7477210323997804, + "grad_norm": 0.3969597816467285, + "learning_rate": 1.3885213393102454e-05, + "loss": 0.5273, + "step": 27232 + }, + { + "epoch": 0.7477484898407468, + "grad_norm": 0.5058870911598206, + "learning_rate": 1.388481542573493e-05, + "loss": 0.4753, + "step": 27233 + }, + { + "epoch": 0.7477759472817134, + "grad_norm": 0.40488335490226746, + "learning_rate": 1.3884417451120981e-05, + "loss": 0.5586, + "step": 27234 + }, + { + "epoch": 0.7478034047226798, + "grad_norm": 0.3504822552204132, + "learning_rate": 1.3884019469261357e-05, + "loss": 0.4717, + "step": 27235 + }, + { + "epoch": 0.7478308621636464, + "grad_norm": 0.35097169876098633, + "learning_rate": 1.3883621480156797e-05, + "loss": 0.4724, + "step": 27236 + }, + { + "epoch": 0.7478583196046128, + "grad_norm": 0.3842412233352661, + "learning_rate": 1.3883223483808039e-05, + "loss": 0.4928, + "step": 27237 + }, + { + "epoch": 0.7478857770455793, + "grad_norm": 0.3985026180744171, + "learning_rate": 1.3882825480215836e-05, + "loss": 0.4874, + "step": 27238 + }, + { + "epoch": 0.7479132344865459, + "grad_norm": 0.42950519919395447, + "learning_rate": 1.388242746938092e-05, + "loss": 0.4894, + "step": 27239 + }, + { + "epoch": 0.7479406919275123, + "grad_norm": 0.37394410371780396, + "learning_rate": 1.388202945130404e-05, + "loss": 0.4429, + "step": 27240 + }, + { + "epoch": 0.7479681493684789, + "grad_norm": 0.3448483943939209, + "learning_rate": 1.3881631425985935e-05, + "loss": 0.3888, + "step": 27241 + }, + { + "epoch": 0.7479956068094453, + "grad_norm": 0.38049083948135376, + "learning_rate": 1.3881233393427346e-05, + "loss": 0.4762, + "step": 27242 + }, + { + "epoch": 0.7480230642504119, + "grad_norm": 0.7437814474105835, + "learning_rate": 1.3880835353629024e-05, + "loss": 0.4576, + "step": 27243 + }, + { + "epoch": 0.7480505216913783, + "grad_norm": 0.42212507128715515, + "learning_rate": 1.3880437306591703e-05, + "loss": 0.584, + "step": 27244 + }, + { + "epoch": 0.7480779791323449, + "grad_norm": 0.4118565618991852, + "learning_rate": 1.3880039252316127e-05, + "loss": 0.5228, + "step": 27245 + }, + { + "epoch": 0.7481054365733114, + "grad_norm": 0.36588066816329956, + "learning_rate": 1.3879641190803039e-05, + "loss": 0.4504, + "step": 27246 + }, + { + "epoch": 0.7481328940142778, + "grad_norm": 0.41977429389953613, + "learning_rate": 1.387924312205318e-05, + "loss": 0.5453, + "step": 27247 + }, + { + "epoch": 0.7481603514552444, + "grad_norm": 0.3767732083797455, + "learning_rate": 1.38788450460673e-05, + "loss": 0.4098, + "step": 27248 + }, + { + "epoch": 0.7481878088962108, + "grad_norm": 0.3668791651725769, + "learning_rate": 1.3878446962846132e-05, + "loss": 0.4774, + "step": 27249 + }, + { + "epoch": 0.7482152663371774, + "grad_norm": 0.3821262717247009, + "learning_rate": 1.3878048872390423e-05, + "loss": 0.4986, + "step": 27250 + }, + { + "epoch": 0.7482427237781438, + "grad_norm": 0.40216055512428284, + "learning_rate": 1.3877650774700917e-05, + "loss": 0.4615, + "step": 27251 + }, + { + "epoch": 0.7482701812191104, + "grad_norm": 0.33533957600593567, + "learning_rate": 1.3877252669778352e-05, + "loss": 0.5043, + "step": 27252 + }, + { + "epoch": 0.7482976386600769, + "grad_norm": 0.36502718925476074, + "learning_rate": 1.3876854557623473e-05, + "loss": 0.5033, + "step": 27253 + }, + { + "epoch": 0.7483250961010434, + "grad_norm": 0.590496838092804, + "learning_rate": 1.3876456438237027e-05, + "loss": 0.5091, + "step": 27254 + }, + { + "epoch": 0.7483525535420099, + "grad_norm": 0.415359765291214, + "learning_rate": 1.387605831161975e-05, + "loss": 0.5281, + "step": 27255 + }, + { + "epoch": 0.7483800109829764, + "grad_norm": 0.391146183013916, + "learning_rate": 1.3875660177772385e-05, + "loss": 0.4462, + "step": 27256 + }, + { + "epoch": 0.7484074684239429, + "grad_norm": 0.47389405965805054, + "learning_rate": 1.3875262036695683e-05, + "loss": 0.5884, + "step": 27257 + }, + { + "epoch": 0.7484349258649093, + "grad_norm": 0.36258751153945923, + "learning_rate": 1.3874863888390373e-05, + "loss": 0.3856, + "step": 27258 + }, + { + "epoch": 0.7484623833058759, + "grad_norm": 0.3667765259742737, + "learning_rate": 1.3874465732857209e-05, + "loss": 0.4116, + "step": 27259 + }, + { + "epoch": 0.7484898407468424, + "grad_norm": 0.3515108823776245, + "learning_rate": 1.3874067570096929e-05, + "loss": 0.4619, + "step": 27260 + }, + { + "epoch": 0.7485172981878089, + "grad_norm": 0.3529461920261383, + "learning_rate": 1.3873669400110278e-05, + "loss": 0.469, + "step": 27261 + }, + { + "epoch": 0.7485447556287754, + "grad_norm": 0.42865675687789917, + "learning_rate": 1.3873271222897998e-05, + "loss": 0.5093, + "step": 27262 + }, + { + "epoch": 0.7485722130697419, + "grad_norm": 0.46264100074768066, + "learning_rate": 1.3872873038460825e-05, + "loss": 0.4724, + "step": 27263 + }, + { + "epoch": 0.7485996705107084, + "grad_norm": 0.37311965227127075, + "learning_rate": 1.3872474846799514e-05, + "loss": 0.4561, + "step": 27264 + }, + { + "epoch": 0.7486271279516749, + "grad_norm": 0.37122562527656555, + "learning_rate": 1.3872076647914798e-05, + "loss": 0.5166, + "step": 27265 + }, + { + "epoch": 0.7486545853926414, + "grad_norm": 0.35183191299438477, + "learning_rate": 1.3871678441807426e-05, + "loss": 0.45, + "step": 27266 + }, + { + "epoch": 0.748682042833608, + "grad_norm": 0.4477759003639221, + "learning_rate": 1.3871280228478136e-05, + "loss": 0.4795, + "step": 27267 + }, + { + "epoch": 0.7487095002745744, + "grad_norm": 0.40349265933036804, + "learning_rate": 1.3870882007927673e-05, + "loss": 0.5608, + "step": 27268 + }, + { + "epoch": 0.7487369577155409, + "grad_norm": 0.394853413105011, + "learning_rate": 1.3870483780156781e-05, + "loss": 0.5217, + "step": 27269 + }, + { + "epoch": 0.7487644151565074, + "grad_norm": 0.41012370586395264, + "learning_rate": 1.38700855451662e-05, + "loss": 0.4976, + "step": 27270 + }, + { + "epoch": 0.7487918725974739, + "grad_norm": 0.43071407079696655, + "learning_rate": 1.3869687302956673e-05, + "loss": 0.5027, + "step": 27271 + }, + { + "epoch": 0.7488193300384404, + "grad_norm": 0.3650836944580078, + "learning_rate": 1.3869289053528948e-05, + "loss": 0.4555, + "step": 27272 + }, + { + "epoch": 0.7488467874794069, + "grad_norm": 0.3612661361694336, + "learning_rate": 1.386889079688376e-05, + "loss": 0.4749, + "step": 27273 + }, + { + "epoch": 0.7488742449203735, + "grad_norm": 0.36449575424194336, + "learning_rate": 1.3868492533021858e-05, + "loss": 0.4809, + "step": 27274 + }, + { + "epoch": 0.7489017023613399, + "grad_norm": 0.41477158665657043, + "learning_rate": 1.3868094261943982e-05, + "loss": 0.469, + "step": 27275 + }, + { + "epoch": 0.7489291598023065, + "grad_norm": 0.4054095447063446, + "learning_rate": 1.3867695983650875e-05, + "loss": 0.5849, + "step": 27276 + }, + { + "epoch": 0.7489566172432729, + "grad_norm": 0.520847737789154, + "learning_rate": 1.3867297698143283e-05, + "loss": 0.5741, + "step": 27277 + }, + { + "epoch": 0.7489840746842394, + "grad_norm": 0.40863898396492004, + "learning_rate": 1.3866899405421946e-05, + "loss": 0.5277, + "step": 27278 + }, + { + "epoch": 0.7490115321252059, + "grad_norm": 0.4149259626865387, + "learning_rate": 1.3866501105487606e-05, + "loss": 0.4874, + "step": 27279 + }, + { + "epoch": 0.7490389895661724, + "grad_norm": 0.42169731855392456, + "learning_rate": 1.3866102798341007e-05, + "loss": 0.5059, + "step": 27280 + }, + { + "epoch": 0.749066447007139, + "grad_norm": 0.39926689863204956, + "learning_rate": 1.3865704483982894e-05, + "loss": 0.4881, + "step": 27281 + }, + { + "epoch": 0.7490939044481054, + "grad_norm": 0.41997143626213074, + "learning_rate": 1.3865306162414008e-05, + "loss": 0.5304, + "step": 27282 + }, + { + "epoch": 0.749121361889072, + "grad_norm": 0.37914493680000305, + "learning_rate": 1.3864907833635091e-05, + "loss": 0.4346, + "step": 27283 + }, + { + "epoch": 0.7491488193300384, + "grad_norm": 0.3975631296634674, + "learning_rate": 1.3864509497646887e-05, + "loss": 0.4935, + "step": 27284 + }, + { + "epoch": 0.749176276771005, + "grad_norm": 0.39906707406044006, + "learning_rate": 1.3864111154450144e-05, + "loss": 0.4859, + "step": 27285 + }, + { + "epoch": 0.7492037342119714, + "grad_norm": 0.36917242407798767, + "learning_rate": 1.3863712804045595e-05, + "loss": 0.5077, + "step": 27286 + }, + { + "epoch": 0.749231191652938, + "grad_norm": 0.3992873728275299, + "learning_rate": 1.386331444643399e-05, + "loss": 0.5824, + "step": 27287 + }, + { + "epoch": 0.7492586490939045, + "grad_norm": 0.414122074842453, + "learning_rate": 1.3862916081616074e-05, + "loss": 0.4871, + "step": 27288 + }, + { + "epoch": 0.7492861065348709, + "grad_norm": 0.3591693937778473, + "learning_rate": 1.3862517709592583e-05, + "loss": 0.5037, + "step": 27289 + }, + { + "epoch": 0.7493135639758375, + "grad_norm": 0.3731274902820587, + "learning_rate": 1.3862119330364263e-05, + "loss": 0.5862, + "step": 27290 + }, + { + "epoch": 0.7493410214168039, + "grad_norm": 0.4219018816947937, + "learning_rate": 1.386172094393186e-05, + "loss": 0.4902, + "step": 27291 + }, + { + "epoch": 0.7493684788577705, + "grad_norm": 0.34861108660697937, + "learning_rate": 1.3861322550296112e-05, + "loss": 0.5196, + "step": 27292 + }, + { + "epoch": 0.7493959362987369, + "grad_norm": 0.3414864242076874, + "learning_rate": 1.3860924149457771e-05, + "loss": 0.4158, + "step": 27293 + }, + { + "epoch": 0.7494233937397035, + "grad_norm": 0.3952367603778839, + "learning_rate": 1.3860525741417568e-05, + "loss": 0.4989, + "step": 27294 + }, + { + "epoch": 0.74945085118067, + "grad_norm": 0.44005030393600464, + "learning_rate": 1.3860127326176256e-05, + "loss": 0.3815, + "step": 27295 + }, + { + "epoch": 0.7494783086216364, + "grad_norm": 0.40655073523521423, + "learning_rate": 1.385972890373457e-05, + "loss": 0.4548, + "step": 27296 + }, + { + "epoch": 0.749505766062603, + "grad_norm": 0.4122556447982788, + "learning_rate": 1.3859330474093265e-05, + "loss": 0.5193, + "step": 27297 + }, + { + "epoch": 0.7495332235035694, + "grad_norm": 0.4157485067844391, + "learning_rate": 1.3858932037253073e-05, + "loss": 0.5416, + "step": 27298 + }, + { + "epoch": 0.749560680944536, + "grad_norm": 0.37868767976760864, + "learning_rate": 1.3858533593214739e-05, + "loss": 0.4202, + "step": 27299 + }, + { + "epoch": 0.7495881383855024, + "grad_norm": 0.39639246463775635, + "learning_rate": 1.385813514197901e-05, + "loss": 0.4899, + "step": 27300 + }, + { + "epoch": 0.749615595826469, + "grad_norm": 0.3652653694152832, + "learning_rate": 1.385773668354663e-05, + "loss": 0.515, + "step": 27301 + }, + { + "epoch": 0.7496430532674355, + "grad_norm": 0.3428898751735687, + "learning_rate": 1.3857338217918334e-05, + "loss": 0.4462, + "step": 27302 + }, + { + "epoch": 0.749670510708402, + "grad_norm": 0.35772785544395447, + "learning_rate": 1.3856939745094878e-05, + "loss": 0.474, + "step": 27303 + }, + { + "epoch": 0.7496979681493685, + "grad_norm": 0.48533573746681213, + "learning_rate": 1.3856541265076993e-05, + "loss": 0.512, + "step": 27304 + }, + { + "epoch": 0.749725425590335, + "grad_norm": 0.442781537771225, + "learning_rate": 1.385614277786543e-05, + "loss": 0.4735, + "step": 27305 + }, + { + "epoch": 0.7497528830313015, + "grad_norm": 0.38486069440841675, + "learning_rate": 1.3855744283460932e-05, + "loss": 0.4823, + "step": 27306 + }, + { + "epoch": 0.7497803404722679, + "grad_norm": 0.45295071601867676, + "learning_rate": 1.3855345781864235e-05, + "loss": 0.5847, + "step": 27307 + }, + { + "epoch": 0.7498077979132345, + "grad_norm": 0.39545738697052, + "learning_rate": 1.3854947273076094e-05, + "loss": 0.4664, + "step": 27308 + }, + { + "epoch": 0.749835255354201, + "grad_norm": 0.3854566514492035, + "learning_rate": 1.3854548757097241e-05, + "loss": 0.4586, + "step": 27309 + }, + { + "epoch": 0.7498627127951675, + "grad_norm": 0.41221392154693604, + "learning_rate": 1.3854150233928427e-05, + "loss": 0.5205, + "step": 27310 + }, + { + "epoch": 0.749890170236134, + "grad_norm": 0.36695021390914917, + "learning_rate": 1.3853751703570393e-05, + "loss": 0.5047, + "step": 27311 + }, + { + "epoch": 0.7499176276771005, + "grad_norm": 0.37965846061706543, + "learning_rate": 1.385335316602388e-05, + "loss": 0.4602, + "step": 27312 + }, + { + "epoch": 0.749945085118067, + "grad_norm": 0.3498547375202179, + "learning_rate": 1.3852954621289634e-05, + "loss": 0.4908, + "step": 27313 + }, + { + "epoch": 0.7499725425590335, + "grad_norm": 0.438501238822937, + "learning_rate": 1.38525560693684e-05, + "loss": 0.446, + "step": 27314 + }, + { + "epoch": 0.75, + "grad_norm": 0.40804430842399597, + "learning_rate": 1.3852157510260918e-05, + "loss": 0.5047, + "step": 27315 + }, + { + "epoch": 0.7500274574409665, + "grad_norm": 0.3886106312274933, + "learning_rate": 1.3851758943967932e-05, + "loss": 0.4864, + "step": 27316 + }, + { + "epoch": 0.750054914881933, + "grad_norm": 0.3662477135658264, + "learning_rate": 1.3851360370490185e-05, + "loss": 0.4422, + "step": 27317 + }, + { + "epoch": 0.7500823723228995, + "grad_norm": 0.37884294986724854, + "learning_rate": 1.3850961789828423e-05, + "loss": 0.3827, + "step": 27318 + }, + { + "epoch": 0.750109829763866, + "grad_norm": 0.5056405067443848, + "learning_rate": 1.3850563201983392e-05, + "loss": 0.6113, + "step": 27319 + }, + { + "epoch": 0.7501372872048325, + "grad_norm": 0.34513652324676514, + "learning_rate": 1.3850164606955826e-05, + "loss": 0.4479, + "step": 27320 + }, + { + "epoch": 0.750164744645799, + "grad_norm": 0.4008210599422455, + "learning_rate": 1.3849766004746477e-05, + "loss": 0.5378, + "step": 27321 + }, + { + "epoch": 0.7501922020867655, + "grad_norm": 0.386120468378067, + "learning_rate": 1.3849367395356084e-05, + "loss": 0.4754, + "step": 27322 + }, + { + "epoch": 0.7502196595277321, + "grad_norm": 0.3881092071533203, + "learning_rate": 1.3848968778785395e-05, + "loss": 0.5055, + "step": 27323 + }, + { + "epoch": 0.7502471169686985, + "grad_norm": 0.3796946108341217, + "learning_rate": 1.3848570155035149e-05, + "loss": 0.5257, + "step": 27324 + }, + { + "epoch": 0.750274574409665, + "grad_norm": 0.38822445273399353, + "learning_rate": 1.3848171524106091e-05, + "loss": 0.485, + "step": 27325 + }, + { + "epoch": 0.7503020318506315, + "grad_norm": 0.5708234906196594, + "learning_rate": 1.3847772885998967e-05, + "loss": 0.4515, + "step": 27326 + }, + { + "epoch": 0.750329489291598, + "grad_norm": 0.34968551993370056, + "learning_rate": 1.3847374240714515e-05, + "loss": 0.4944, + "step": 27327 + }, + { + "epoch": 0.7503569467325645, + "grad_norm": 0.3837652802467346, + "learning_rate": 1.3846975588253484e-05, + "loss": 0.5144, + "step": 27328 + }, + { + "epoch": 0.750384404173531, + "grad_norm": 0.4440564513206482, + "learning_rate": 1.3846576928616618e-05, + "loss": 0.5365, + "step": 27329 + }, + { + "epoch": 0.7504118616144976, + "grad_norm": 0.38832637667655945, + "learning_rate": 1.3846178261804655e-05, + "loss": 0.5262, + "step": 27330 + }, + { + "epoch": 0.750439319055464, + "grad_norm": 0.3699714243412018, + "learning_rate": 1.3845779587818342e-05, + "loss": 0.4796, + "step": 27331 + }, + { + "epoch": 0.7504667764964306, + "grad_norm": 0.394499272108078, + "learning_rate": 1.3845380906658425e-05, + "loss": 0.542, + "step": 27332 + }, + { + "epoch": 0.750494233937397, + "grad_norm": 0.4773648679256439, + "learning_rate": 1.3844982218325642e-05, + "loss": 0.4834, + "step": 27333 + }, + { + "epoch": 0.7505216913783636, + "grad_norm": 0.42648574709892273, + "learning_rate": 1.3844583522820742e-05, + "loss": 0.4863, + "step": 27334 + }, + { + "epoch": 0.75054914881933, + "grad_norm": 0.47004643082618713, + "learning_rate": 1.3844184820144466e-05, + "loss": 0.5189, + "step": 27335 + }, + { + "epoch": 0.7505766062602965, + "grad_norm": 0.40205755829811096, + "learning_rate": 1.3843786110297562e-05, + "loss": 0.5569, + "step": 27336 + }, + { + "epoch": 0.7506040637012631, + "grad_norm": 0.49081623554229736, + "learning_rate": 1.3843387393280767e-05, + "loss": 0.5178, + "step": 27337 + }, + { + "epoch": 0.7506315211422295, + "grad_norm": 0.3894991874694824, + "learning_rate": 1.3842988669094828e-05, + "loss": 0.4543, + "step": 27338 + }, + { + "epoch": 0.7506589785831961, + "grad_norm": 0.3913445770740509, + "learning_rate": 1.3842589937740488e-05, + "loss": 0.5185, + "step": 27339 + }, + { + "epoch": 0.7506864360241625, + "grad_norm": 0.5093732476234436, + "learning_rate": 1.3842191199218491e-05, + "loss": 0.4766, + "step": 27340 + }, + { + "epoch": 0.7507138934651291, + "grad_norm": 0.35144326090812683, + "learning_rate": 1.3841792453529582e-05, + "loss": 0.4954, + "step": 27341 + }, + { + "epoch": 0.7507413509060955, + "grad_norm": 0.362045556306839, + "learning_rate": 1.3841393700674506e-05, + "loss": 0.4763, + "step": 27342 + }, + { + "epoch": 0.750768808347062, + "grad_norm": 0.37703239917755127, + "learning_rate": 1.3840994940654e-05, + "loss": 0.4955, + "step": 27343 + }, + { + "epoch": 0.7507962657880286, + "grad_norm": 0.382886677980423, + "learning_rate": 1.3840596173468817e-05, + "loss": 0.458, + "step": 27344 + }, + { + "epoch": 0.750823723228995, + "grad_norm": 0.3518584072589874, + "learning_rate": 1.3840197399119692e-05, + "loss": 0.4289, + "step": 27345 + }, + { + "epoch": 0.7508511806699616, + "grad_norm": 0.40720582008361816, + "learning_rate": 1.3839798617607379e-05, + "loss": 0.511, + "step": 27346 + }, + { + "epoch": 0.750878638110928, + "grad_norm": 0.36922648549079895, + "learning_rate": 1.3839399828932614e-05, + "loss": 0.4487, + "step": 27347 + }, + { + "epoch": 0.7509060955518946, + "grad_norm": 0.7264671921730042, + "learning_rate": 1.383900103309614e-05, + "loss": 0.4617, + "step": 27348 + }, + { + "epoch": 0.750933552992861, + "grad_norm": 0.479998916387558, + "learning_rate": 1.3838602230098706e-05, + "loss": 0.501, + "step": 27349 + }, + { + "epoch": 0.7509610104338276, + "grad_norm": 0.3706666827201843, + "learning_rate": 1.3838203419941055e-05, + "loss": 0.4693, + "step": 27350 + }, + { + "epoch": 0.7509884678747941, + "grad_norm": 0.37931665778160095, + "learning_rate": 1.3837804602623926e-05, + "loss": 0.4647, + "step": 27351 + }, + { + "epoch": 0.7510159253157606, + "grad_norm": 0.41075924038887024, + "learning_rate": 1.3837405778148067e-05, + "loss": 0.5135, + "step": 27352 + }, + { + "epoch": 0.7510433827567271, + "grad_norm": 0.35232123732566833, + "learning_rate": 1.3837006946514224e-05, + "loss": 0.4748, + "step": 27353 + }, + { + "epoch": 0.7510708401976935, + "grad_norm": 0.3913501799106598, + "learning_rate": 1.3836608107723137e-05, + "loss": 0.5794, + "step": 27354 + }, + { + "epoch": 0.7510982976386601, + "grad_norm": 0.4091120660305023, + "learning_rate": 1.3836209261775552e-05, + "loss": 0.5545, + "step": 27355 + }, + { + "epoch": 0.7511257550796265, + "grad_norm": 0.4256509244441986, + "learning_rate": 1.3835810408672212e-05, + "loss": 0.532, + "step": 27356 + }, + { + "epoch": 0.7511532125205931, + "grad_norm": 0.35041505098342896, + "learning_rate": 1.3835411548413862e-05, + "loss": 0.4737, + "step": 27357 + }, + { + "epoch": 0.7511806699615596, + "grad_norm": 0.42662370204925537, + "learning_rate": 1.3835012681001244e-05, + "loss": 0.5414, + "step": 27358 + }, + { + "epoch": 0.7512081274025261, + "grad_norm": 0.4302554428577423, + "learning_rate": 1.3834613806435104e-05, + "loss": 0.4963, + "step": 27359 + }, + { + "epoch": 0.7512355848434926, + "grad_norm": 0.4277259409427643, + "learning_rate": 1.3834214924716187e-05, + "loss": 0.5325, + "step": 27360 + }, + { + "epoch": 0.7512630422844591, + "grad_norm": 0.375774621963501, + "learning_rate": 1.3833816035845233e-05, + "loss": 0.4461, + "step": 27361 + }, + { + "epoch": 0.7512904997254256, + "grad_norm": 0.36699536442756653, + "learning_rate": 1.3833417139822988e-05, + "loss": 0.4731, + "step": 27362 + }, + { + "epoch": 0.751317957166392, + "grad_norm": 0.3365950584411621, + "learning_rate": 1.3833018236650198e-05, + "loss": 0.4517, + "step": 27363 + }, + { + "epoch": 0.7513454146073586, + "grad_norm": 0.33247217535972595, + "learning_rate": 1.3832619326327606e-05, + "loss": 0.4475, + "step": 27364 + }, + { + "epoch": 0.7513728720483251, + "grad_norm": 0.42874640226364136, + "learning_rate": 1.3832220408855957e-05, + "loss": 0.4884, + "step": 27365 + }, + { + "epoch": 0.7514003294892916, + "grad_norm": 0.38884368538856506, + "learning_rate": 1.383182148423599e-05, + "loss": 0.4474, + "step": 27366 + }, + { + "epoch": 0.7514277869302581, + "grad_norm": 0.343723326921463, + "learning_rate": 1.3831422552468456e-05, + "loss": 0.4138, + "step": 27367 + }, + { + "epoch": 0.7514552443712246, + "grad_norm": 0.41703569889068604, + "learning_rate": 1.3831023613554094e-05, + "loss": 0.5546, + "step": 27368 + }, + { + "epoch": 0.7514827018121911, + "grad_norm": 0.4082193970680237, + "learning_rate": 1.3830624667493653e-05, + "loss": 0.5022, + "step": 27369 + }, + { + "epoch": 0.7515101592531576, + "grad_norm": 0.4089539349079132, + "learning_rate": 1.3830225714287872e-05, + "loss": 0.4997, + "step": 27370 + }, + { + "epoch": 0.7515376166941241, + "grad_norm": 0.33666419982910156, + "learning_rate": 1.3829826753937498e-05, + "loss": 0.3632, + "step": 27371 + }, + { + "epoch": 0.7515650741350907, + "grad_norm": 0.44910866022109985, + "learning_rate": 1.3829427786443277e-05, + "loss": 0.5902, + "step": 27372 + }, + { + "epoch": 0.7515925315760571, + "grad_norm": 0.4061928391456604, + "learning_rate": 1.3829028811805947e-05, + "loss": 0.5335, + "step": 27373 + }, + { + "epoch": 0.7516199890170236, + "grad_norm": 0.4015025794506073, + "learning_rate": 1.382862983002626e-05, + "loss": 0.4857, + "step": 27374 + }, + { + "epoch": 0.7516474464579901, + "grad_norm": 0.3873145878314972, + "learning_rate": 1.3828230841104953e-05, + "loss": 0.5614, + "step": 27375 + }, + { + "epoch": 0.7516749038989566, + "grad_norm": 0.4389117956161499, + "learning_rate": 1.3827831845042775e-05, + "loss": 0.4316, + "step": 27376 + }, + { + "epoch": 0.7517023613399231, + "grad_norm": 0.4204513132572174, + "learning_rate": 1.3827432841840471e-05, + "loss": 0.5136, + "step": 27377 + }, + { + "epoch": 0.7517298187808896, + "grad_norm": 0.4550164043903351, + "learning_rate": 1.382703383149878e-05, + "loss": 0.5062, + "step": 27378 + }, + { + "epoch": 0.7517572762218562, + "grad_norm": 0.423313170671463, + "learning_rate": 1.3826634814018451e-05, + "loss": 0.5254, + "step": 27379 + }, + { + "epoch": 0.7517847336628226, + "grad_norm": 0.3931397795677185, + "learning_rate": 1.3826235789400226e-05, + "loss": 0.547, + "step": 27380 + }, + { + "epoch": 0.7518121911037892, + "grad_norm": 0.37100398540496826, + "learning_rate": 1.3825836757644853e-05, + "loss": 0.4762, + "step": 27381 + }, + { + "epoch": 0.7518396485447556, + "grad_norm": 0.34981009364128113, + "learning_rate": 1.382543771875307e-05, + "loss": 0.4222, + "step": 27382 + }, + { + "epoch": 0.7518671059857222, + "grad_norm": 0.36910736560821533, + "learning_rate": 1.3825038672725626e-05, + "loss": 0.467, + "step": 27383 + }, + { + "epoch": 0.7518945634266886, + "grad_norm": 0.44296619296073914, + "learning_rate": 1.3824639619563262e-05, + "loss": 0.485, + "step": 27384 + }, + { + "epoch": 0.7519220208676551, + "grad_norm": 0.38606807589530945, + "learning_rate": 1.3824240559266726e-05, + "loss": 0.4827, + "step": 27385 + }, + { + "epoch": 0.7519494783086217, + "grad_norm": 0.40094926953315735, + "learning_rate": 1.3823841491836762e-05, + "loss": 0.5463, + "step": 27386 + }, + { + "epoch": 0.7519769357495881, + "grad_norm": 0.41017380356788635, + "learning_rate": 1.382344241727411e-05, + "loss": 0.4977, + "step": 27387 + }, + { + "epoch": 0.7520043931905547, + "grad_norm": 0.47281554341316223, + "learning_rate": 1.3823043335579523e-05, + "loss": 0.5698, + "step": 27388 + }, + { + "epoch": 0.7520318506315211, + "grad_norm": 0.3336227536201477, + "learning_rate": 1.3822644246753738e-05, + "loss": 0.4565, + "step": 27389 + }, + { + "epoch": 0.7520593080724877, + "grad_norm": 0.5030280947685242, + "learning_rate": 1.3822245150797497e-05, + "loss": 0.4736, + "step": 27390 + }, + { + "epoch": 0.7520867655134541, + "grad_norm": 0.47680026292800903, + "learning_rate": 1.3821846047711553e-05, + "loss": 0.5012, + "step": 27391 + }, + { + "epoch": 0.7521142229544207, + "grad_norm": 0.49399617314338684, + "learning_rate": 1.3821446937496646e-05, + "loss": 0.6456, + "step": 27392 + }, + { + "epoch": 0.7521416803953872, + "grad_norm": 0.38945528864860535, + "learning_rate": 1.3821047820153521e-05, + "loss": 0.4638, + "step": 27393 + }, + { + "epoch": 0.7521691378363536, + "grad_norm": 0.36322858929634094, + "learning_rate": 1.3820648695682924e-05, + "loss": 0.5058, + "step": 27394 + }, + { + "epoch": 0.7521965952773202, + "grad_norm": 0.5027502775192261, + "learning_rate": 1.3820249564085592e-05, + "loss": 0.5272, + "step": 27395 + }, + { + "epoch": 0.7522240527182866, + "grad_norm": 0.3763788342475891, + "learning_rate": 1.381985042536228e-05, + "loss": 0.4819, + "step": 27396 + }, + { + "epoch": 0.7522515101592532, + "grad_norm": 0.4000832736492157, + "learning_rate": 1.3819451279513725e-05, + "loss": 0.5507, + "step": 27397 + }, + { + "epoch": 0.7522789676002196, + "grad_norm": 0.440574049949646, + "learning_rate": 1.3819052126540674e-05, + "loss": 0.5748, + "step": 27398 + }, + { + "epoch": 0.7523064250411862, + "grad_norm": 0.38016992807388306, + "learning_rate": 1.3818652966443877e-05, + "loss": 0.4941, + "step": 27399 + }, + { + "epoch": 0.7523338824821527, + "grad_norm": 0.40089645981788635, + "learning_rate": 1.3818253799224069e-05, + "loss": 0.5238, + "step": 27400 + }, + { + "epoch": 0.7523613399231192, + "grad_norm": 0.3849565088748932, + "learning_rate": 1.3817854624882002e-05, + "loss": 0.5341, + "step": 27401 + }, + { + "epoch": 0.7523887973640857, + "grad_norm": 0.34693220257759094, + "learning_rate": 1.3817455443418412e-05, + "loss": 0.4992, + "step": 27402 + }, + { + "epoch": 0.7524162548050521, + "grad_norm": 0.42022043466567993, + "learning_rate": 1.3817056254834057e-05, + "loss": 0.4595, + "step": 27403 + }, + { + "epoch": 0.7524437122460187, + "grad_norm": 0.360822468996048, + "learning_rate": 1.3816657059129668e-05, + "loss": 0.4415, + "step": 27404 + }, + { + "epoch": 0.7524711696869851, + "grad_norm": 0.3601246476173401, + "learning_rate": 1.3816257856305997e-05, + "loss": 0.4151, + "step": 27405 + }, + { + "epoch": 0.7524986271279517, + "grad_norm": 0.374319851398468, + "learning_rate": 1.3815858646363789e-05, + "loss": 0.5255, + "step": 27406 + }, + { + "epoch": 0.7525260845689182, + "grad_norm": 0.40697139501571655, + "learning_rate": 1.3815459429303784e-05, + "loss": 0.567, + "step": 27407 + }, + { + "epoch": 0.7525535420098847, + "grad_norm": 0.33135420083999634, + "learning_rate": 1.381506020512673e-05, + "loss": 0.3816, + "step": 27408 + }, + { + "epoch": 0.7525809994508512, + "grad_norm": 0.3707138001918793, + "learning_rate": 1.3814660973833372e-05, + "loss": 0.4718, + "step": 27409 + }, + { + "epoch": 0.7526084568918177, + "grad_norm": 0.3785172402858734, + "learning_rate": 1.3814261735424453e-05, + "loss": 0.5114, + "step": 27410 + }, + { + "epoch": 0.7526359143327842, + "grad_norm": 0.41617366671562195, + "learning_rate": 1.3813862489900719e-05, + "loss": 0.55, + "step": 27411 + }, + { + "epoch": 0.7526633717737506, + "grad_norm": 0.39219939708709717, + "learning_rate": 1.3813463237262913e-05, + "loss": 0.4659, + "step": 27412 + }, + { + "epoch": 0.7526908292147172, + "grad_norm": 0.3991316556930542, + "learning_rate": 1.3813063977511783e-05, + "loss": 0.4545, + "step": 27413 + }, + { + "epoch": 0.7527182866556837, + "grad_norm": 0.39374956488609314, + "learning_rate": 1.3812664710648072e-05, + "loss": 0.519, + "step": 27414 + }, + { + "epoch": 0.7527457440966502, + "grad_norm": 0.36585918068885803, + "learning_rate": 1.381226543667252e-05, + "loss": 0.4567, + "step": 27415 + }, + { + "epoch": 0.7527732015376167, + "grad_norm": 0.42882782220840454, + "learning_rate": 1.3811866155585883e-05, + "loss": 0.4442, + "step": 27416 + }, + { + "epoch": 0.7528006589785832, + "grad_norm": 0.41047266125679016, + "learning_rate": 1.3811466867388894e-05, + "loss": 0.5326, + "step": 27417 + }, + { + "epoch": 0.7528281164195497, + "grad_norm": 0.40959838032722473, + "learning_rate": 1.3811067572082304e-05, + "loss": 0.4822, + "step": 27418 + }, + { + "epoch": 0.7528555738605162, + "grad_norm": 0.365744948387146, + "learning_rate": 1.3810668269666856e-05, + "loss": 0.5346, + "step": 27419 + }, + { + "epoch": 0.7528830313014827, + "grad_norm": 0.3976588547229767, + "learning_rate": 1.3810268960143296e-05, + "loss": 0.4374, + "step": 27420 + }, + { + "epoch": 0.7529104887424493, + "grad_norm": 0.37935927510261536, + "learning_rate": 1.3809869643512368e-05, + "loss": 0.5137, + "step": 27421 + }, + { + "epoch": 0.7529379461834157, + "grad_norm": 0.42621850967407227, + "learning_rate": 1.3809470319774821e-05, + "loss": 0.5077, + "step": 27422 + }, + { + "epoch": 0.7529654036243822, + "grad_norm": 0.34526926279067993, + "learning_rate": 1.380907098893139e-05, + "loss": 0.4688, + "step": 27423 + }, + { + "epoch": 0.7529928610653487, + "grad_norm": 0.38526463508605957, + "learning_rate": 1.3808671650982831e-05, + "loss": 0.433, + "step": 27424 + }, + { + "epoch": 0.7530203185063152, + "grad_norm": 0.37267425656318665, + "learning_rate": 1.3808272305929879e-05, + "loss": 0.4488, + "step": 27425 + }, + { + "epoch": 0.7530477759472817, + "grad_norm": 0.34904181957244873, + "learning_rate": 1.3807872953773287e-05, + "loss": 0.4018, + "step": 27426 + }, + { + "epoch": 0.7530752333882482, + "grad_norm": 0.36720648407936096, + "learning_rate": 1.3807473594513797e-05, + "loss": 0.4553, + "step": 27427 + }, + { + "epoch": 0.7531026908292148, + "grad_norm": 0.3960861563682556, + "learning_rate": 1.380707422815215e-05, + "loss": 0.4306, + "step": 27428 + }, + { + "epoch": 0.7531301482701812, + "grad_norm": 0.37422430515289307, + "learning_rate": 1.3806674854689097e-05, + "loss": 0.4784, + "step": 27429 + }, + { + "epoch": 0.7531576057111478, + "grad_norm": 0.4131346046924591, + "learning_rate": 1.3806275474125381e-05, + "loss": 0.516, + "step": 27430 + }, + { + "epoch": 0.7531850631521142, + "grad_norm": 0.3767428994178772, + "learning_rate": 1.3805876086461744e-05, + "loss": 0.5444, + "step": 27431 + }, + { + "epoch": 0.7532125205930807, + "grad_norm": 0.38154444098472595, + "learning_rate": 1.3805476691698937e-05, + "loss": 0.4913, + "step": 27432 + }, + { + "epoch": 0.7532399780340472, + "grad_norm": 0.36562278866767883, + "learning_rate": 1.3805077289837698e-05, + "loss": 0.5133, + "step": 27433 + }, + { + "epoch": 0.7532674354750137, + "grad_norm": 0.410063773393631, + "learning_rate": 1.3804677880878778e-05, + "loss": 0.5191, + "step": 27434 + }, + { + "epoch": 0.7532948929159803, + "grad_norm": 0.39462798833847046, + "learning_rate": 1.380427846482292e-05, + "loss": 0.5038, + "step": 27435 + }, + { + "epoch": 0.7533223503569467, + "grad_norm": 0.3686285614967346, + "learning_rate": 1.3803879041670864e-05, + "loss": 0.5295, + "step": 27436 + }, + { + "epoch": 0.7533498077979133, + "grad_norm": 0.4308117926120758, + "learning_rate": 1.3803479611423366e-05, + "loss": 0.5169, + "step": 27437 + }, + { + "epoch": 0.7533772652388797, + "grad_norm": 0.3405895233154297, + "learning_rate": 1.3803080174081158e-05, + "loss": 0.475, + "step": 27438 + }, + { + "epoch": 0.7534047226798463, + "grad_norm": 0.3409496247768402, + "learning_rate": 1.3802680729644995e-05, + "loss": 0.3752, + "step": 27439 + }, + { + "epoch": 0.7534321801208127, + "grad_norm": 0.3883706331253052, + "learning_rate": 1.380228127811562e-05, + "loss": 0.4774, + "step": 27440 + }, + { + "epoch": 0.7534596375617792, + "grad_norm": 0.3556082248687744, + "learning_rate": 1.3801881819493772e-05, + "loss": 0.4838, + "step": 27441 + }, + { + "epoch": 0.7534870950027458, + "grad_norm": 0.4511399567127228, + "learning_rate": 1.3801482353780204e-05, + "loss": 0.595, + "step": 27442 + }, + { + "epoch": 0.7535145524437122, + "grad_norm": 0.41371944546699524, + "learning_rate": 1.380108288097566e-05, + "loss": 0.6038, + "step": 27443 + }, + { + "epoch": 0.7535420098846788, + "grad_norm": 0.34458720684051514, + "learning_rate": 1.3800683401080882e-05, + "loss": 0.4381, + "step": 27444 + }, + { + "epoch": 0.7535694673256452, + "grad_norm": 0.4064805805683136, + "learning_rate": 1.3800283914096616e-05, + "loss": 0.539, + "step": 27445 + }, + { + "epoch": 0.7535969247666118, + "grad_norm": 0.39985084533691406, + "learning_rate": 1.3799884420023606e-05, + "loss": 0.4702, + "step": 27446 + }, + { + "epoch": 0.7536243822075782, + "grad_norm": 0.3620544672012329, + "learning_rate": 1.37994849188626e-05, + "loss": 0.5339, + "step": 27447 + }, + { + "epoch": 0.7536518396485448, + "grad_norm": 0.5194942951202393, + "learning_rate": 1.3799085410614343e-05, + "loss": 0.5034, + "step": 27448 + }, + { + "epoch": 0.7536792970895112, + "grad_norm": 0.42927801609039307, + "learning_rate": 1.3798685895279578e-05, + "loss": 0.5396, + "step": 27449 + }, + { + "epoch": 0.7537067545304778, + "grad_norm": 0.4543072283267975, + "learning_rate": 1.3798286372859053e-05, + "loss": 0.5618, + "step": 27450 + }, + { + "epoch": 0.7537342119714443, + "grad_norm": 0.4229802191257477, + "learning_rate": 1.379788684335351e-05, + "loss": 0.5486, + "step": 27451 + }, + { + "epoch": 0.7537616694124107, + "grad_norm": 0.3706176280975342, + "learning_rate": 1.3797487306763698e-05, + "loss": 0.4763, + "step": 27452 + }, + { + "epoch": 0.7537891268533773, + "grad_norm": 0.4034881293773651, + "learning_rate": 1.3797087763090357e-05, + "loss": 0.5139, + "step": 27453 + }, + { + "epoch": 0.7538165842943437, + "grad_norm": 0.3683713674545288, + "learning_rate": 1.3796688212334238e-05, + "loss": 0.4798, + "step": 27454 + }, + { + "epoch": 0.7538440417353103, + "grad_norm": 0.43784400820732117, + "learning_rate": 1.3796288654496081e-05, + "loss": 0.5323, + "step": 27455 + }, + { + "epoch": 0.7538714991762767, + "grad_norm": 0.3527250587940216, + "learning_rate": 1.3795889089576636e-05, + "loss": 0.4427, + "step": 27456 + }, + { + "epoch": 0.7538989566172433, + "grad_norm": 0.3878675103187561, + "learning_rate": 1.3795489517576645e-05, + "loss": 0.5214, + "step": 27457 + }, + { + "epoch": 0.7539264140582098, + "grad_norm": 0.380063533782959, + "learning_rate": 1.3795089938496857e-05, + "loss": 0.5037, + "step": 27458 + }, + { + "epoch": 0.7539538714991763, + "grad_norm": 0.37094417214393616, + "learning_rate": 1.3794690352338014e-05, + "loss": 0.5636, + "step": 27459 + }, + { + "epoch": 0.7539813289401428, + "grad_norm": 0.5691012740135193, + "learning_rate": 1.3794290759100863e-05, + "loss": 0.5687, + "step": 27460 + }, + { + "epoch": 0.7540087863811092, + "grad_norm": 0.38244369626045227, + "learning_rate": 1.3793891158786149e-05, + "loss": 0.4663, + "step": 27461 + }, + { + "epoch": 0.7540362438220758, + "grad_norm": 0.4005008935928345, + "learning_rate": 1.3793491551394613e-05, + "loss": 0.5172, + "step": 27462 + }, + { + "epoch": 0.7540637012630422, + "grad_norm": 0.3730928599834442, + "learning_rate": 1.379309193692701e-05, + "loss": 0.4635, + "step": 27463 + }, + { + "epoch": 0.7540911587040088, + "grad_norm": 0.36966219544410706, + "learning_rate": 1.3792692315384076e-05, + "loss": 0.4395, + "step": 27464 + }, + { + "epoch": 0.7541186161449753, + "grad_norm": 0.49581119418144226, + "learning_rate": 1.3792292686766561e-05, + "loss": 0.5116, + "step": 27465 + }, + { + "epoch": 0.7541460735859418, + "grad_norm": 0.40539538860321045, + "learning_rate": 1.3791893051075214e-05, + "loss": 0.5251, + "step": 27466 + }, + { + "epoch": 0.7541735310269083, + "grad_norm": 0.37437543272972107, + "learning_rate": 1.379149340831077e-05, + "loss": 0.5035, + "step": 27467 + }, + { + "epoch": 0.7542009884678748, + "grad_norm": 0.3826046884059906, + "learning_rate": 1.3791093758473984e-05, + "loss": 0.3972, + "step": 27468 + }, + { + "epoch": 0.7542284459088413, + "grad_norm": 0.3767905533313751, + "learning_rate": 1.3790694101565598e-05, + "loss": 0.4711, + "step": 27469 + }, + { + "epoch": 0.7542559033498077, + "grad_norm": 0.36417174339294434, + "learning_rate": 1.3790294437586358e-05, + "loss": 0.4459, + "step": 27470 + }, + { + "epoch": 0.7542833607907743, + "grad_norm": 0.36076247692108154, + "learning_rate": 1.378989476653701e-05, + "loss": 0.499, + "step": 27471 + }, + { + "epoch": 0.7543108182317408, + "grad_norm": 0.9807671904563904, + "learning_rate": 1.3789495088418293e-05, + "loss": 0.6576, + "step": 27472 + }, + { + "epoch": 0.7543382756727073, + "grad_norm": 0.3835633099079132, + "learning_rate": 1.3789095403230962e-05, + "loss": 0.4201, + "step": 27473 + }, + { + "epoch": 0.7543657331136738, + "grad_norm": 0.38365963101387024, + "learning_rate": 1.3788695710975761e-05, + "loss": 0.4819, + "step": 27474 + }, + { + "epoch": 0.7543931905546403, + "grad_norm": 0.39149606227874756, + "learning_rate": 1.378829601165343e-05, + "loss": 0.4258, + "step": 27475 + }, + { + "epoch": 0.7544206479956068, + "grad_norm": 0.37589630484580994, + "learning_rate": 1.3787896305264721e-05, + "loss": 0.5428, + "step": 27476 + }, + { + "epoch": 0.7544481054365733, + "grad_norm": 0.38685938715934753, + "learning_rate": 1.3787496591810374e-05, + "loss": 0.5408, + "step": 27477 + }, + { + "epoch": 0.7544755628775398, + "grad_norm": 0.5689713954925537, + "learning_rate": 1.3787096871291139e-05, + "loss": 0.5971, + "step": 27478 + }, + { + "epoch": 0.7545030203185064, + "grad_norm": 0.4438979923725128, + "learning_rate": 1.3786697143707759e-05, + "loss": 0.5404, + "step": 27479 + }, + { + "epoch": 0.7545304777594728, + "grad_norm": 0.35911253094673157, + "learning_rate": 1.3786297409060978e-05, + "loss": 0.5055, + "step": 27480 + }, + { + "epoch": 0.7545579352004393, + "grad_norm": 0.39251431822776794, + "learning_rate": 1.3785897667351545e-05, + "loss": 0.5412, + "step": 27481 + }, + { + "epoch": 0.7545853926414058, + "grad_norm": 0.3521425724029541, + "learning_rate": 1.3785497918580205e-05, + "loss": 0.4863, + "step": 27482 + }, + { + "epoch": 0.7546128500823723, + "grad_norm": 0.43061575293540955, + "learning_rate": 1.3785098162747703e-05, + "loss": 0.5482, + "step": 27483 + }, + { + "epoch": 0.7546403075233388, + "grad_norm": 0.3611091077327728, + "learning_rate": 1.3784698399854786e-05, + "loss": 0.4043, + "step": 27484 + }, + { + "epoch": 0.7546677649643053, + "grad_norm": 0.38685232400894165, + "learning_rate": 1.3784298629902195e-05, + "loss": 0.4695, + "step": 27485 + }, + { + "epoch": 0.7546952224052719, + "grad_norm": 0.3795594573020935, + "learning_rate": 1.3783898852890683e-05, + "loss": 0.5184, + "step": 27486 + }, + { + "epoch": 0.7547226798462383, + "grad_norm": 0.4886377453804016, + "learning_rate": 1.378349906882099e-05, + "loss": 0.4802, + "step": 27487 + }, + { + "epoch": 0.7547501372872049, + "grad_norm": 0.3918628394603729, + "learning_rate": 1.3783099277693866e-05, + "loss": 0.4373, + "step": 27488 + }, + { + "epoch": 0.7547775947281713, + "grad_norm": 0.500649094581604, + "learning_rate": 1.3782699479510052e-05, + "loss": 0.528, + "step": 27489 + }, + { + "epoch": 0.7548050521691378, + "grad_norm": 0.3562527596950531, + "learning_rate": 1.3782299674270297e-05, + "loss": 0.4898, + "step": 27490 + }, + { + "epoch": 0.7548325096101043, + "grad_norm": 0.5865973830223083, + "learning_rate": 1.3781899861975347e-05, + "loss": 0.4915, + "step": 27491 + }, + { + "epoch": 0.7548599670510708, + "grad_norm": 0.3637229800224304, + "learning_rate": 1.3781500042625946e-05, + "loss": 0.4518, + "step": 27492 + }, + { + "epoch": 0.7548874244920374, + "grad_norm": 0.46118590235710144, + "learning_rate": 1.378110021622284e-05, + "loss": 0.5998, + "step": 27493 + }, + { + "epoch": 0.7549148819330038, + "grad_norm": 0.4428277015686035, + "learning_rate": 1.3780700382766776e-05, + "loss": 0.5025, + "step": 27494 + }, + { + "epoch": 0.7549423393739704, + "grad_norm": 0.4031149446964264, + "learning_rate": 1.3780300542258496e-05, + "loss": 0.4418, + "step": 27495 + }, + { + "epoch": 0.7549697968149368, + "grad_norm": 0.37004002928733826, + "learning_rate": 1.3779900694698754e-05, + "loss": 0.501, + "step": 27496 + }, + { + "epoch": 0.7549972542559034, + "grad_norm": 0.497615247964859, + "learning_rate": 1.377950084008829e-05, + "loss": 0.4866, + "step": 27497 + }, + { + "epoch": 0.7550247116968698, + "grad_norm": 0.3661970794200897, + "learning_rate": 1.3779100978427846e-05, + "loss": 0.5456, + "step": 27498 + }, + { + "epoch": 0.7550521691378363, + "grad_norm": 0.342549204826355, + "learning_rate": 1.3778701109718178e-05, + "loss": 0.4795, + "step": 27499 + }, + { + "epoch": 0.7550796265788029, + "grad_norm": 0.3651229441165924, + "learning_rate": 1.3778301233960022e-05, + "loss": 0.4366, + "step": 27500 + }, + { + "epoch": 0.7551070840197693, + "grad_norm": 0.38635557889938354, + "learning_rate": 1.377790135115413e-05, + "loss": 0.4826, + "step": 27501 + }, + { + "epoch": 0.7551345414607359, + "grad_norm": 0.5230139493942261, + "learning_rate": 1.3777501461301249e-05, + "loss": 0.4723, + "step": 27502 + }, + { + "epoch": 0.7551619989017023, + "grad_norm": 0.3606136739253998, + "learning_rate": 1.3777101564402117e-05, + "loss": 0.5097, + "step": 27503 + }, + { + "epoch": 0.7551894563426689, + "grad_norm": 0.3978869318962097, + "learning_rate": 1.377670166045749e-05, + "loss": 0.5297, + "step": 27504 + }, + { + "epoch": 0.7552169137836353, + "grad_norm": 0.3582232594490051, + "learning_rate": 1.3776301749468106e-05, + "loss": 0.5088, + "step": 27505 + }, + { + "epoch": 0.7552443712246019, + "grad_norm": 0.4276093542575836, + "learning_rate": 1.3775901831434713e-05, + "loss": 0.5309, + "step": 27506 + }, + { + "epoch": 0.7552718286655684, + "grad_norm": 0.3917903006076813, + "learning_rate": 1.3775501906358062e-05, + "loss": 0.5128, + "step": 27507 + }, + { + "epoch": 0.7552992861065349, + "grad_norm": 0.3940390646457672, + "learning_rate": 1.377510197423889e-05, + "loss": 0.5618, + "step": 27508 + }, + { + "epoch": 0.7553267435475014, + "grad_norm": 0.3739676773548126, + "learning_rate": 1.3774702035077951e-05, + "loss": 0.4741, + "step": 27509 + }, + { + "epoch": 0.7553542009884678, + "grad_norm": 0.40808942914009094, + "learning_rate": 1.3774302088875987e-05, + "loss": 0.5045, + "step": 27510 + }, + { + "epoch": 0.7553816584294344, + "grad_norm": 0.6243113279342651, + "learning_rate": 1.3773902135633747e-05, + "loss": 0.4722, + "step": 27511 + }, + { + "epoch": 0.7554091158704008, + "grad_norm": 0.37808215618133545, + "learning_rate": 1.3773502175351973e-05, + "loss": 0.4433, + "step": 27512 + }, + { + "epoch": 0.7554365733113674, + "grad_norm": 0.4219508767127991, + "learning_rate": 1.3773102208031413e-05, + "loss": 0.5362, + "step": 27513 + }, + { + "epoch": 0.7554640307523339, + "grad_norm": 0.4148716926574707, + "learning_rate": 1.3772702233672814e-05, + "loss": 0.513, + "step": 27514 + }, + { + "epoch": 0.7554914881933004, + "grad_norm": 0.40660879015922546, + "learning_rate": 1.3772302252276924e-05, + "loss": 0.4511, + "step": 27515 + }, + { + "epoch": 0.7555189456342669, + "grad_norm": 0.3832208812236786, + "learning_rate": 1.3771902263844481e-05, + "loss": 0.4975, + "step": 27516 + }, + { + "epoch": 0.7555464030752334, + "grad_norm": 0.41661015152931213, + "learning_rate": 1.3771502268376237e-05, + "loss": 0.5349, + "step": 27517 + }, + { + "epoch": 0.7555738605161999, + "grad_norm": 0.4264613091945648, + "learning_rate": 1.377110226587294e-05, + "loss": 0.4368, + "step": 27518 + }, + { + "epoch": 0.7556013179571663, + "grad_norm": 0.3765987753868103, + "learning_rate": 1.3770702256335333e-05, + "loss": 0.5371, + "step": 27519 + }, + { + "epoch": 0.7556287753981329, + "grad_norm": 0.3989420235157013, + "learning_rate": 1.3770302239764165e-05, + "loss": 0.4593, + "step": 27520 + }, + { + "epoch": 0.7556562328390994, + "grad_norm": 0.36543136835098267, + "learning_rate": 1.3769902216160176e-05, + "loss": 0.4634, + "step": 27521 + }, + { + "epoch": 0.7556836902800659, + "grad_norm": 0.3724704384803772, + "learning_rate": 1.376950218552412e-05, + "loss": 0.4284, + "step": 27522 + }, + { + "epoch": 0.7557111477210324, + "grad_norm": 0.3520794212818146, + "learning_rate": 1.3769102147856737e-05, + "loss": 0.4536, + "step": 27523 + }, + { + "epoch": 0.7557386051619989, + "grad_norm": 0.44091513752937317, + "learning_rate": 1.3768702103158775e-05, + "loss": 0.489, + "step": 27524 + }, + { + "epoch": 0.7557660626029654, + "grad_norm": 0.41204312443733215, + "learning_rate": 1.3768302051430982e-05, + "loss": 0.4473, + "step": 27525 + }, + { + "epoch": 0.7557935200439319, + "grad_norm": 0.4513415992259979, + "learning_rate": 1.37679019926741e-05, + "loss": 0.4873, + "step": 27526 + }, + { + "epoch": 0.7558209774848984, + "grad_norm": 0.40007126331329346, + "learning_rate": 1.3767501926888882e-05, + "loss": 0.4542, + "step": 27527 + }, + { + "epoch": 0.755848434925865, + "grad_norm": 0.38244155049324036, + "learning_rate": 1.376710185407607e-05, + "loss": 0.5178, + "step": 27528 + }, + { + "epoch": 0.7558758923668314, + "grad_norm": 0.38126128911972046, + "learning_rate": 1.3766701774236409e-05, + "loss": 0.4114, + "step": 27529 + }, + { + "epoch": 0.7559033498077979, + "grad_norm": 0.5126073956489563, + "learning_rate": 1.3766301687370649e-05, + "loss": 0.516, + "step": 27530 + }, + { + "epoch": 0.7559308072487644, + "grad_norm": 0.4202978312969208, + "learning_rate": 1.376590159347953e-05, + "loss": 0.4936, + "step": 27531 + }, + { + "epoch": 0.7559582646897309, + "grad_norm": 0.4048677980899811, + "learning_rate": 1.376550149256381e-05, + "loss": 0.5367, + "step": 27532 + }, + { + "epoch": 0.7559857221306974, + "grad_norm": 0.4019112288951874, + "learning_rate": 1.3765101384624221e-05, + "loss": 0.5163, + "step": 27533 + }, + { + "epoch": 0.7560131795716639, + "grad_norm": 0.4039137363433838, + "learning_rate": 1.376470126966152e-05, + "loss": 0.538, + "step": 27534 + }, + { + "epoch": 0.7560406370126305, + "grad_norm": 0.35743454098701477, + "learning_rate": 1.376430114767645e-05, + "loss": 0.4983, + "step": 27535 + }, + { + "epoch": 0.7560680944535969, + "grad_norm": 0.4129966199398041, + "learning_rate": 1.3763901018669756e-05, + "loss": 0.4663, + "step": 27536 + }, + { + "epoch": 0.7560955518945635, + "grad_norm": 0.3555627465248108, + "learning_rate": 1.3763500882642183e-05, + "loss": 0.457, + "step": 27537 + }, + { + "epoch": 0.7561230093355299, + "grad_norm": 0.4347292482852936, + "learning_rate": 1.3763100739594482e-05, + "loss": 0.4133, + "step": 27538 + }, + { + "epoch": 0.7561504667764964, + "grad_norm": 0.34317827224731445, + "learning_rate": 1.3762700589527396e-05, + "loss": 0.4884, + "step": 27539 + }, + { + "epoch": 0.7561779242174629, + "grad_norm": 0.41400739550590515, + "learning_rate": 1.3762300432441674e-05, + "loss": 0.6011, + "step": 27540 + }, + { + "epoch": 0.7562053816584294, + "grad_norm": 0.38131603598594666, + "learning_rate": 1.376190026833806e-05, + "loss": 0.3998, + "step": 27541 + }, + { + "epoch": 0.756232839099396, + "grad_norm": 0.3886866867542267, + "learning_rate": 1.3761500097217299e-05, + "loss": 0.4698, + "step": 27542 + }, + { + "epoch": 0.7562602965403624, + "grad_norm": 0.4297283887863159, + "learning_rate": 1.3761099919080143e-05, + "loss": 0.556, + "step": 27543 + }, + { + "epoch": 0.756287753981329, + "grad_norm": 0.5573281645774841, + "learning_rate": 1.3760699733927334e-05, + "loss": 0.5372, + "step": 27544 + }, + { + "epoch": 0.7563152114222954, + "grad_norm": 0.40695157647132874, + "learning_rate": 1.376029954175962e-05, + "loss": 0.447, + "step": 27545 + }, + { + "epoch": 0.756342668863262, + "grad_norm": 0.4201756715774536, + "learning_rate": 1.375989934257775e-05, + "loss": 0.475, + "step": 27546 + }, + { + "epoch": 0.7563701263042284, + "grad_norm": 0.34100592136383057, + "learning_rate": 1.3759499136382462e-05, + "loss": 0.4757, + "step": 27547 + }, + { + "epoch": 0.756397583745195, + "grad_norm": 0.4017561376094818, + "learning_rate": 1.3759098923174512e-05, + "loss": 0.5395, + "step": 27548 + }, + { + "epoch": 0.7564250411861615, + "grad_norm": 0.3643178641796112, + "learning_rate": 1.375869870295464e-05, + "loss": 0.5211, + "step": 27549 + }, + { + "epoch": 0.7564524986271279, + "grad_norm": 0.4174191653728485, + "learning_rate": 1.37582984757236e-05, + "loss": 0.5496, + "step": 27550 + }, + { + "epoch": 0.7564799560680945, + "grad_norm": 0.36081743240356445, + "learning_rate": 1.3757898241482128e-05, + "loss": 0.4641, + "step": 27551 + }, + { + "epoch": 0.7565074135090609, + "grad_norm": 0.4405480921268463, + "learning_rate": 1.375749800023098e-05, + "loss": 0.5081, + "step": 27552 + }, + { + "epoch": 0.7565348709500275, + "grad_norm": 0.3774234354496002, + "learning_rate": 1.3757097751970898e-05, + "loss": 0.5286, + "step": 27553 + }, + { + "epoch": 0.7565623283909939, + "grad_norm": 0.4056599736213684, + "learning_rate": 1.3756697496702631e-05, + "loss": 0.5921, + "step": 27554 + }, + { + "epoch": 0.7565897858319605, + "grad_norm": 0.36997726559638977, + "learning_rate": 1.3756297234426923e-05, + "loss": 0.5274, + "step": 27555 + }, + { + "epoch": 0.756617243272927, + "grad_norm": 0.40627744793891907, + "learning_rate": 1.3755896965144521e-05, + "loss": 0.4804, + "step": 27556 + }, + { + "epoch": 0.7566447007138934, + "grad_norm": 0.3889913856983185, + "learning_rate": 1.3755496688856173e-05, + "loss": 0.4592, + "step": 27557 + }, + { + "epoch": 0.75667215815486, + "grad_norm": 0.3482077121734619, + "learning_rate": 1.3755096405562627e-05, + "loss": 0.4783, + "step": 27558 + }, + { + "epoch": 0.7566996155958264, + "grad_norm": 0.3734759986400604, + "learning_rate": 1.3754696115264626e-05, + "loss": 0.4343, + "step": 27559 + }, + { + "epoch": 0.756727073036793, + "grad_norm": 0.3776051998138428, + "learning_rate": 1.3754295817962917e-05, + "loss": 0.4727, + "step": 27560 + }, + { + "epoch": 0.7567545304777594, + "grad_norm": 0.35889962315559387, + "learning_rate": 1.3753895513658249e-05, + "loss": 0.4344, + "step": 27561 + }, + { + "epoch": 0.756781987918726, + "grad_norm": 0.39201265573501587, + "learning_rate": 1.3753495202351368e-05, + "loss": 0.5141, + "step": 27562 + }, + { + "epoch": 0.7568094453596925, + "grad_norm": 0.39373087882995605, + "learning_rate": 1.3753094884043023e-05, + "loss": 0.4385, + "step": 27563 + }, + { + "epoch": 0.756836902800659, + "grad_norm": 0.3407222330570221, + "learning_rate": 1.3752694558733954e-05, + "loss": 0.4612, + "step": 27564 + }, + { + "epoch": 0.7568643602416255, + "grad_norm": 0.3993576765060425, + "learning_rate": 1.3752294226424916e-05, + "loss": 0.5612, + "step": 27565 + }, + { + "epoch": 0.756891817682592, + "grad_norm": 0.4099799692630768, + "learning_rate": 1.3751893887116647e-05, + "loss": 0.5849, + "step": 27566 + }, + { + "epoch": 0.7569192751235585, + "grad_norm": 0.4852117598056793, + "learning_rate": 1.3751493540809901e-05, + "loss": 0.4857, + "step": 27567 + }, + { + "epoch": 0.7569467325645249, + "grad_norm": 0.3952242434024811, + "learning_rate": 1.3751093187505422e-05, + "loss": 0.5127, + "step": 27568 + }, + { + "epoch": 0.7569741900054915, + "grad_norm": 0.399491548538208, + "learning_rate": 1.3750692827203957e-05, + "loss": 0.5351, + "step": 27569 + }, + { + "epoch": 0.757001647446458, + "grad_norm": 0.4401789605617523, + "learning_rate": 1.3750292459906253e-05, + "loss": 0.5556, + "step": 27570 + }, + { + "epoch": 0.7570291048874245, + "grad_norm": 0.5013060569763184, + "learning_rate": 1.3749892085613057e-05, + "loss": 0.542, + "step": 27571 + }, + { + "epoch": 0.757056562328391, + "grad_norm": 0.3615363538265228, + "learning_rate": 1.3749491704325117e-05, + "loss": 0.4643, + "step": 27572 + }, + { + "epoch": 0.7570840197693575, + "grad_norm": 0.39951735734939575, + "learning_rate": 1.3749091316043175e-05, + "loss": 0.4758, + "step": 27573 + }, + { + "epoch": 0.757111477210324, + "grad_norm": 0.3840891420841217, + "learning_rate": 1.3748690920767982e-05, + "loss": 0.5358, + "step": 27574 + }, + { + "epoch": 0.7571389346512905, + "grad_norm": 0.3357703983783722, + "learning_rate": 1.3748290518500285e-05, + "loss": 0.4821, + "step": 27575 + }, + { + "epoch": 0.757166392092257, + "grad_norm": 0.3693673312664032, + "learning_rate": 1.374789010924083e-05, + "loss": 0.5422, + "step": 27576 + }, + { + "epoch": 0.7571938495332236, + "grad_norm": 0.3842740058898926, + "learning_rate": 1.3747489692990361e-05, + "loss": 0.4698, + "step": 27577 + }, + { + "epoch": 0.75722130697419, + "grad_norm": 0.40051543712615967, + "learning_rate": 1.374708926974963e-05, + "loss": 0.5529, + "step": 27578 + }, + { + "epoch": 0.7572487644151565, + "grad_norm": 0.39086419343948364, + "learning_rate": 1.3746688839519383e-05, + "loss": 0.5049, + "step": 27579 + }, + { + "epoch": 0.757276221856123, + "grad_norm": 0.4792500436306, + "learning_rate": 1.3746288402300363e-05, + "loss": 0.539, + "step": 27580 + }, + { + "epoch": 0.7573036792970895, + "grad_norm": 0.3595951497554779, + "learning_rate": 1.374588795809332e-05, + "loss": 0.4222, + "step": 27581 + }, + { + "epoch": 0.757331136738056, + "grad_norm": 0.384039044380188, + "learning_rate": 1.3745487506899e-05, + "loss": 0.4607, + "step": 27582 + }, + { + "epoch": 0.7573585941790225, + "grad_norm": 0.3714715838432312, + "learning_rate": 1.3745087048718151e-05, + "loss": 0.463, + "step": 27583 + }, + { + "epoch": 0.7573860516199891, + "grad_norm": 0.3681863844394684, + "learning_rate": 1.374468658355152e-05, + "loss": 0.4647, + "step": 27584 + }, + { + "epoch": 0.7574135090609555, + "grad_norm": 0.34809988737106323, + "learning_rate": 1.3744286111399854e-05, + "loss": 0.4013, + "step": 27585 + }, + { + "epoch": 0.757440966501922, + "grad_norm": 0.37563320994377136, + "learning_rate": 1.3743885632263896e-05, + "loss": 0.4699, + "step": 27586 + }, + { + "epoch": 0.7574684239428885, + "grad_norm": 0.3870886564254761, + "learning_rate": 1.37434851461444e-05, + "loss": 0.5267, + "step": 27587 + }, + { + "epoch": 0.757495881383855, + "grad_norm": 0.48617035150527954, + "learning_rate": 1.3743084653042108e-05, + "loss": 0.5996, + "step": 27588 + }, + { + "epoch": 0.7575233388248215, + "grad_norm": 0.3436395823955536, + "learning_rate": 1.3742684152957768e-05, + "loss": 0.4447, + "step": 27589 + }, + { + "epoch": 0.757550796265788, + "grad_norm": 0.36828941106796265, + "learning_rate": 1.3742283645892127e-05, + "loss": 0.4467, + "step": 27590 + }, + { + "epoch": 0.7575782537067546, + "grad_norm": 0.4570577144622803, + "learning_rate": 1.3741883131845933e-05, + "loss": 0.489, + "step": 27591 + }, + { + "epoch": 0.757605711147721, + "grad_norm": 0.36666497588157654, + "learning_rate": 1.3741482610819934e-05, + "loss": 0.4416, + "step": 27592 + }, + { + "epoch": 0.7576331685886876, + "grad_norm": 0.40060052275657654, + "learning_rate": 1.3741082082814874e-05, + "loss": 0.5132, + "step": 27593 + }, + { + "epoch": 0.757660626029654, + "grad_norm": 0.4599471390247345, + "learning_rate": 1.3740681547831505e-05, + "loss": 0.4231, + "step": 27594 + }, + { + "epoch": 0.7576880834706206, + "grad_norm": 0.3865695595741272, + "learning_rate": 1.3740281005870568e-05, + "loss": 0.4361, + "step": 27595 + }, + { + "epoch": 0.757715540911587, + "grad_norm": 0.3990618586540222, + "learning_rate": 1.3739880456932815e-05, + "loss": 0.5831, + "step": 27596 + }, + { + "epoch": 0.7577429983525535, + "grad_norm": 0.39022350311279297, + "learning_rate": 1.373947990101899e-05, + "loss": 0.4345, + "step": 27597 + }, + { + "epoch": 0.7577704557935201, + "grad_norm": 0.35238388180732727, + "learning_rate": 1.3739079338129843e-05, + "loss": 0.4136, + "step": 27598 + }, + { + "epoch": 0.7577979132344865, + "grad_norm": 0.42477521300315857, + "learning_rate": 1.3738678768266116e-05, + "loss": 0.5458, + "step": 27599 + }, + { + "epoch": 0.7578253706754531, + "grad_norm": 0.4255114197731018, + "learning_rate": 1.3738278191428565e-05, + "loss": 0.54, + "step": 27600 + }, + { + "epoch": 0.7578528281164195, + "grad_norm": 0.4043692648410797, + "learning_rate": 1.3737877607617926e-05, + "loss": 0.5044, + "step": 27601 + }, + { + "epoch": 0.7578802855573861, + "grad_norm": 0.3934517800807953, + "learning_rate": 1.3737477016834957e-05, + "loss": 0.494, + "step": 27602 + }, + { + "epoch": 0.7579077429983525, + "grad_norm": 0.48621514439582825, + "learning_rate": 1.3737076419080401e-05, + "loss": 0.4319, + "step": 27603 + }, + { + "epoch": 0.7579352004393191, + "grad_norm": 0.3886386454105377, + "learning_rate": 1.3736675814355e-05, + "loss": 0.5077, + "step": 27604 + }, + { + "epoch": 0.7579626578802856, + "grad_norm": 0.43813198804855347, + "learning_rate": 1.3736275202659513e-05, + "loss": 0.4689, + "step": 27605 + }, + { + "epoch": 0.757990115321252, + "grad_norm": 0.43443629145622253, + "learning_rate": 1.3735874583994672e-05, + "loss": 0.589, + "step": 27606 + }, + { + "epoch": 0.7580175727622186, + "grad_norm": 0.40044865012168884, + "learning_rate": 1.3735473958361239e-05, + "loss": 0.5359, + "step": 27607 + }, + { + "epoch": 0.758045030203185, + "grad_norm": 0.43496739864349365, + "learning_rate": 1.3735073325759953e-05, + "loss": 0.5286, + "step": 27608 + }, + { + "epoch": 0.7580724876441516, + "grad_norm": 0.4170817732810974, + "learning_rate": 1.3734672686191561e-05, + "loss": 0.4732, + "step": 27609 + }, + { + "epoch": 0.758099945085118, + "grad_norm": 0.38607722520828247, + "learning_rate": 1.3734272039656815e-05, + "loss": 0.5543, + "step": 27610 + }, + { + "epoch": 0.7581274025260846, + "grad_norm": 0.37256914377212524, + "learning_rate": 1.3733871386156459e-05, + "loss": 0.4916, + "step": 27611 + }, + { + "epoch": 0.7581548599670511, + "grad_norm": 0.3554389178752899, + "learning_rate": 1.373347072569124e-05, + "loss": 0.4776, + "step": 27612 + }, + { + "epoch": 0.7581823174080176, + "grad_norm": 0.5816598534584045, + "learning_rate": 1.373307005826191e-05, + "loss": 0.5048, + "step": 27613 + }, + { + "epoch": 0.7582097748489841, + "grad_norm": 0.3698295056819916, + "learning_rate": 1.3732669383869208e-05, + "loss": 0.4519, + "step": 27614 + }, + { + "epoch": 0.7582372322899505, + "grad_norm": 0.33602607250213623, + "learning_rate": 1.3732268702513891e-05, + "loss": 0.4012, + "step": 27615 + }, + { + "epoch": 0.7582646897309171, + "grad_norm": 0.3773578703403473, + "learning_rate": 1.3731868014196701e-05, + "loss": 0.4549, + "step": 27616 + }, + { + "epoch": 0.7582921471718835, + "grad_norm": 0.4213685393333435, + "learning_rate": 1.3731467318918383e-05, + "loss": 0.5388, + "step": 27617 + }, + { + "epoch": 0.7583196046128501, + "grad_norm": 0.368636816740036, + "learning_rate": 1.373106661667969e-05, + "loss": 0.4665, + "step": 27618 + }, + { + "epoch": 0.7583470620538166, + "grad_norm": 0.38233712315559387, + "learning_rate": 1.3730665907481364e-05, + "loss": 0.4602, + "step": 27619 + }, + { + "epoch": 0.7583745194947831, + "grad_norm": 0.40753936767578125, + "learning_rate": 1.3730265191324159e-05, + "loss": 0.5478, + "step": 27620 + }, + { + "epoch": 0.7584019769357496, + "grad_norm": 0.3705849051475525, + "learning_rate": 1.3729864468208817e-05, + "loss": 0.5763, + "step": 27621 + }, + { + "epoch": 0.7584294343767161, + "grad_norm": 0.383811354637146, + "learning_rate": 1.3729463738136088e-05, + "loss": 0.404, + "step": 27622 + }, + { + "epoch": 0.7584568918176826, + "grad_norm": 0.40676844120025635, + "learning_rate": 1.3729063001106719e-05, + "loss": 0.4295, + "step": 27623 + }, + { + "epoch": 0.758484349258649, + "grad_norm": 0.4472912847995758, + "learning_rate": 1.3728662257121455e-05, + "loss": 0.5733, + "step": 27624 + }, + { + "epoch": 0.7585118066996156, + "grad_norm": 0.3761669397354126, + "learning_rate": 1.3728261506181048e-05, + "loss": 0.4398, + "step": 27625 + }, + { + "epoch": 0.7585392641405821, + "grad_norm": 0.35715365409851074, + "learning_rate": 1.3727860748286244e-05, + "loss": 0.4147, + "step": 27626 + }, + { + "epoch": 0.7585667215815486, + "grad_norm": 0.3594139814376831, + "learning_rate": 1.3727459983437788e-05, + "loss": 0.5431, + "step": 27627 + }, + { + "epoch": 0.7585941790225151, + "grad_norm": 0.48935559391975403, + "learning_rate": 1.372705921163643e-05, + "loss": 0.5208, + "step": 27628 + }, + { + "epoch": 0.7586216364634816, + "grad_norm": 0.3724452257156372, + "learning_rate": 1.3726658432882919e-05, + "loss": 0.4872, + "step": 27629 + }, + { + "epoch": 0.7586490939044481, + "grad_norm": 0.4394300580024719, + "learning_rate": 1.3726257647177998e-05, + "loss": 0.5234, + "step": 27630 + }, + { + "epoch": 0.7586765513454146, + "grad_norm": 0.4020772874355316, + "learning_rate": 1.3725856854522419e-05, + "loss": 0.4704, + "step": 27631 + }, + { + "epoch": 0.7587040087863811, + "grad_norm": 0.39618340134620667, + "learning_rate": 1.3725456054916926e-05, + "loss": 0.4931, + "step": 27632 + }, + { + "epoch": 0.7587314662273477, + "grad_norm": 0.37641340494155884, + "learning_rate": 1.372505524836227e-05, + "loss": 0.4451, + "step": 27633 + }, + { + "epoch": 0.7587589236683141, + "grad_norm": 0.41824963688850403, + "learning_rate": 1.3724654434859197e-05, + "loss": 0.491, + "step": 27634 + }, + { + "epoch": 0.7587863811092806, + "grad_norm": 0.3293622136116028, + "learning_rate": 1.3724253614408453e-05, + "loss": 0.5248, + "step": 27635 + }, + { + "epoch": 0.7588138385502471, + "grad_norm": 0.4438917934894562, + "learning_rate": 1.372385278701079e-05, + "loss": 0.5393, + "step": 27636 + }, + { + "epoch": 0.7588412959912136, + "grad_norm": 0.6015644073486328, + "learning_rate": 1.372345195266695e-05, + "loss": 0.4204, + "step": 27637 + }, + { + "epoch": 0.7588687534321801, + "grad_norm": 0.549423098564148, + "learning_rate": 1.3723051111377685e-05, + "loss": 0.5215, + "step": 27638 + }, + { + "epoch": 0.7588962108731466, + "grad_norm": 0.3952849507331848, + "learning_rate": 1.3722650263143744e-05, + "loss": 0.5059, + "step": 27639 + }, + { + "epoch": 0.7589236683141132, + "grad_norm": 0.41572609543800354, + "learning_rate": 1.3722249407965868e-05, + "loss": 0.5024, + "step": 27640 + }, + { + "epoch": 0.7589511257550796, + "grad_norm": 0.3816871643066406, + "learning_rate": 1.3721848545844812e-05, + "loss": 0.5686, + "step": 27641 + }, + { + "epoch": 0.7589785831960462, + "grad_norm": 0.3963963985443115, + "learning_rate": 1.372144767678132e-05, + "loss": 0.5382, + "step": 27642 + }, + { + "epoch": 0.7590060406370126, + "grad_norm": 0.4628923833370209, + "learning_rate": 1.372104680077614e-05, + "loss": 0.5055, + "step": 27643 + }, + { + "epoch": 0.7590334980779792, + "grad_norm": 0.38763415813446045, + "learning_rate": 1.3720645917830018e-05, + "loss": 0.4886, + "step": 27644 + }, + { + "epoch": 0.7590609555189456, + "grad_norm": 0.35843518376350403, + "learning_rate": 1.3720245027943704e-05, + "loss": 0.5096, + "step": 27645 + }, + { + "epoch": 0.7590884129599121, + "grad_norm": 0.4085257649421692, + "learning_rate": 1.371984413111795e-05, + "loss": 0.4666, + "step": 27646 + }, + { + "epoch": 0.7591158704008787, + "grad_norm": 6.698368549346924, + "learning_rate": 1.3719443227353498e-05, + "loss": 0.5266, + "step": 27647 + }, + { + "epoch": 0.7591433278418451, + "grad_norm": 0.407379150390625, + "learning_rate": 1.3719042316651092e-05, + "loss": 0.481, + "step": 27648 + }, + { + "epoch": 0.7591707852828117, + "grad_norm": 0.39034196734428406, + "learning_rate": 1.3718641399011492e-05, + "loss": 0.629, + "step": 27649 + }, + { + "epoch": 0.7591982427237781, + "grad_norm": 0.45856019854545593, + "learning_rate": 1.3718240474435434e-05, + "loss": 0.4874, + "step": 27650 + }, + { + "epoch": 0.7592257001647447, + "grad_norm": 0.38015010952949524, + "learning_rate": 1.3717839542923675e-05, + "loss": 0.4608, + "step": 27651 + }, + { + "epoch": 0.7592531576057111, + "grad_norm": 0.4262295961380005, + "learning_rate": 1.3717438604476956e-05, + "loss": 0.5186, + "step": 27652 + }, + { + "epoch": 0.7592806150466777, + "grad_norm": 0.33036792278289795, + "learning_rate": 1.3717037659096027e-05, + "loss": 0.4618, + "step": 27653 + }, + { + "epoch": 0.7593080724876442, + "grad_norm": 0.3805568516254425, + "learning_rate": 1.3716636706781639e-05, + "loss": 0.4691, + "step": 27654 + }, + { + "epoch": 0.7593355299286106, + "grad_norm": 0.37285491824150085, + "learning_rate": 1.3716235747534535e-05, + "loss": 0.4471, + "step": 27655 + }, + { + "epoch": 0.7593629873695772, + "grad_norm": 0.37135598063468933, + "learning_rate": 1.3715834781355466e-05, + "loss": 0.4486, + "step": 27656 + }, + { + "epoch": 0.7593904448105436, + "grad_norm": 0.3867364227771759, + "learning_rate": 1.371543380824518e-05, + "loss": 0.4761, + "step": 27657 + }, + { + "epoch": 0.7594179022515102, + "grad_norm": 0.3717508614063263, + "learning_rate": 1.3715032828204422e-05, + "loss": 0.453, + "step": 27658 + }, + { + "epoch": 0.7594453596924766, + "grad_norm": 0.3562491834163666, + "learning_rate": 1.3714631841233943e-05, + "loss": 0.5122, + "step": 27659 + }, + { + "epoch": 0.7594728171334432, + "grad_norm": 0.3627948462963104, + "learning_rate": 1.3714230847334493e-05, + "loss": 0.4744, + "step": 27660 + }, + { + "epoch": 0.7595002745744097, + "grad_norm": 0.3799888789653778, + "learning_rate": 1.3713829846506813e-05, + "loss": 0.5032, + "step": 27661 + }, + { + "epoch": 0.7595277320153762, + "grad_norm": 0.36911511421203613, + "learning_rate": 1.3713428838751657e-05, + "loss": 0.4891, + "step": 27662 + }, + { + "epoch": 0.7595551894563427, + "grad_norm": 0.42111217975616455, + "learning_rate": 1.3713027824069768e-05, + "loss": 0.4639, + "step": 27663 + }, + { + "epoch": 0.7595826468973091, + "grad_norm": 0.38927870988845825, + "learning_rate": 1.3712626802461902e-05, + "loss": 0.5313, + "step": 27664 + }, + { + "epoch": 0.7596101043382757, + "grad_norm": 0.36434662342071533, + "learning_rate": 1.37122257739288e-05, + "loss": 0.5053, + "step": 27665 + }, + { + "epoch": 0.7596375617792421, + "grad_norm": 0.36594638228416443, + "learning_rate": 1.3711824738471208e-05, + "loss": 0.482, + "step": 27666 + }, + { + "epoch": 0.7596650192202087, + "grad_norm": 0.3925136625766754, + "learning_rate": 1.3711423696089886e-05, + "loss": 0.5458, + "step": 27667 + }, + { + "epoch": 0.7596924766611752, + "grad_norm": 0.4328237771987915, + "learning_rate": 1.3711022646785568e-05, + "loss": 0.5651, + "step": 27668 + }, + { + "epoch": 0.7597199341021417, + "grad_norm": 0.3576619625091553, + "learning_rate": 1.3710621590559012e-05, + "loss": 0.4676, + "step": 27669 + }, + { + "epoch": 0.7597473915431082, + "grad_norm": 0.3592420518398285, + "learning_rate": 1.371022052741096e-05, + "loss": 0.4082, + "step": 27670 + }, + { + "epoch": 0.7597748489840747, + "grad_norm": 0.36904236674308777, + "learning_rate": 1.3709819457342164e-05, + "loss": 0.5112, + "step": 27671 + }, + { + "epoch": 0.7598023064250412, + "grad_norm": 0.3739687502384186, + "learning_rate": 1.370941838035337e-05, + "loss": 0.4932, + "step": 27672 + }, + { + "epoch": 0.7598297638660076, + "grad_norm": 0.3613985776901245, + "learning_rate": 1.3709017296445326e-05, + "loss": 0.5149, + "step": 27673 + }, + { + "epoch": 0.7598572213069742, + "grad_norm": 0.3639315366744995, + "learning_rate": 1.3708616205618783e-05, + "loss": 0.4257, + "step": 27674 + }, + { + "epoch": 0.7598846787479407, + "grad_norm": 0.3708198070526123, + "learning_rate": 1.3708215107874487e-05, + "loss": 0.4805, + "step": 27675 + }, + { + "epoch": 0.7599121361889072, + "grad_norm": 0.5333237648010254, + "learning_rate": 1.3707814003213185e-05, + "loss": 0.4746, + "step": 27676 + }, + { + "epoch": 0.7599395936298737, + "grad_norm": 0.3760264813899994, + "learning_rate": 1.3707412891635627e-05, + "loss": 0.4956, + "step": 27677 + }, + { + "epoch": 0.7599670510708402, + "grad_norm": 0.40180063247680664, + "learning_rate": 1.370701177314256e-05, + "loss": 0.482, + "step": 27678 + }, + { + "epoch": 0.7599945085118067, + "grad_norm": 0.37029796838760376, + "learning_rate": 1.3706610647734732e-05, + "loss": 0.4941, + "step": 27679 + }, + { + "epoch": 0.7600219659527732, + "grad_norm": 0.4901975095272064, + "learning_rate": 1.3706209515412896e-05, + "loss": 0.5295, + "step": 27680 + }, + { + "epoch": 0.7600494233937397, + "grad_norm": 0.37968742847442627, + "learning_rate": 1.3705808376177791e-05, + "loss": 0.4626, + "step": 27681 + }, + { + "epoch": 0.7600768808347063, + "grad_norm": 0.41979652643203735, + "learning_rate": 1.3705407230030176e-05, + "loss": 0.5293, + "step": 27682 + }, + { + "epoch": 0.7601043382756727, + "grad_norm": 0.37220993638038635, + "learning_rate": 1.3705006076970793e-05, + "loss": 0.5629, + "step": 27683 + }, + { + "epoch": 0.7601317957166392, + "grad_norm": 0.41969823837280273, + "learning_rate": 1.3704604917000388e-05, + "loss": 0.5171, + "step": 27684 + }, + { + "epoch": 0.7601592531576057, + "grad_norm": 0.3921274244785309, + "learning_rate": 1.3704203750119715e-05, + "loss": 0.4804, + "step": 27685 + }, + { + "epoch": 0.7601867105985722, + "grad_norm": 0.36152493953704834, + "learning_rate": 1.3703802576329516e-05, + "loss": 0.4257, + "step": 27686 + }, + { + "epoch": 0.7602141680395387, + "grad_norm": 0.4251555800437927, + "learning_rate": 1.3703401395630547e-05, + "loss": 0.5464, + "step": 27687 + }, + { + "epoch": 0.7602416254805052, + "grad_norm": 0.4464849531650543, + "learning_rate": 1.3703000208023554e-05, + "loss": 0.5189, + "step": 27688 + }, + { + "epoch": 0.7602690829214718, + "grad_norm": 0.4088986814022064, + "learning_rate": 1.370259901350928e-05, + "loss": 0.437, + "step": 27689 + }, + { + "epoch": 0.7602965403624382, + "grad_norm": 0.4370674192905426, + "learning_rate": 1.3702197812088478e-05, + "loss": 0.5175, + "step": 27690 + }, + { + "epoch": 0.7603239978034048, + "grad_norm": 0.353396475315094, + "learning_rate": 1.3701796603761897e-05, + "loss": 0.4336, + "step": 27691 + }, + { + "epoch": 0.7603514552443712, + "grad_norm": 0.385336309671402, + "learning_rate": 1.3701395388530281e-05, + "loss": 0.5093, + "step": 27692 + }, + { + "epoch": 0.7603789126853377, + "grad_norm": 0.4940180778503418, + "learning_rate": 1.3700994166394383e-05, + "loss": 0.4869, + "step": 27693 + }, + { + "epoch": 0.7604063701263042, + "grad_norm": 0.42163312435150146, + "learning_rate": 1.3700592937354949e-05, + "loss": 0.4807, + "step": 27694 + }, + { + "epoch": 0.7604338275672707, + "grad_norm": 0.4935605823993683, + "learning_rate": 1.3700191701412729e-05, + "loss": 0.5081, + "step": 27695 + }, + { + "epoch": 0.7604612850082373, + "grad_norm": 0.3801659643650055, + "learning_rate": 1.3699790458568472e-05, + "loss": 0.4846, + "step": 27696 + }, + { + "epoch": 0.7604887424492037, + "grad_norm": 0.3604607880115509, + "learning_rate": 1.369938920882292e-05, + "loss": 0.4756, + "step": 27697 + }, + { + "epoch": 0.7605161998901703, + "grad_norm": 0.37225109338760376, + "learning_rate": 1.3698987952176829e-05, + "loss": 0.5317, + "step": 27698 + }, + { + "epoch": 0.7605436573311367, + "grad_norm": 0.42815205454826355, + "learning_rate": 1.3698586688630947e-05, + "loss": 0.4964, + "step": 27699 + }, + { + "epoch": 0.7605711147721033, + "grad_norm": 0.39067530632019043, + "learning_rate": 1.3698185418186018e-05, + "loss": 0.5673, + "step": 27700 + }, + { + "epoch": 0.7605985722130697, + "grad_norm": 0.4094858169555664, + "learning_rate": 1.3697784140842796e-05, + "loss": 0.5377, + "step": 27701 + }, + { + "epoch": 0.7606260296540363, + "grad_norm": 0.41249608993530273, + "learning_rate": 1.3697382856602022e-05, + "loss": 0.5057, + "step": 27702 + }, + { + "epoch": 0.7606534870950028, + "grad_norm": 1.239504337310791, + "learning_rate": 1.3696981565464451e-05, + "loss": 0.4986, + "step": 27703 + }, + { + "epoch": 0.7606809445359692, + "grad_norm": 0.44319769740104675, + "learning_rate": 1.3696580267430828e-05, + "loss": 0.4508, + "step": 27704 + }, + { + "epoch": 0.7607084019769358, + "grad_norm": 0.4132560193538666, + "learning_rate": 1.3696178962501904e-05, + "loss": 0.5291, + "step": 27705 + }, + { + "epoch": 0.7607358594179022, + "grad_norm": 0.3599596619606018, + "learning_rate": 1.3695777650678427e-05, + "loss": 0.5193, + "step": 27706 + }, + { + "epoch": 0.7607633168588688, + "grad_norm": 0.8087995648384094, + "learning_rate": 1.3695376331961145e-05, + "loss": 0.5225, + "step": 27707 + }, + { + "epoch": 0.7607907742998352, + "grad_norm": 0.3895479738712311, + "learning_rate": 1.3694975006350808e-05, + "loss": 0.526, + "step": 27708 + }, + { + "epoch": 0.7608182317408018, + "grad_norm": 0.3646712303161621, + "learning_rate": 1.3694573673848161e-05, + "loss": 0.5728, + "step": 27709 + }, + { + "epoch": 0.7608456891817683, + "grad_norm": 0.3624202311038971, + "learning_rate": 1.3694172334453956e-05, + "loss": 0.4834, + "step": 27710 + }, + { + "epoch": 0.7608731466227348, + "grad_norm": 0.3842116892337799, + "learning_rate": 1.3693770988168942e-05, + "loss": 0.4549, + "step": 27711 + }, + { + "epoch": 0.7609006040637013, + "grad_norm": 0.323030024766922, + "learning_rate": 1.3693369634993863e-05, + "loss": 0.4067, + "step": 27712 + }, + { + "epoch": 0.7609280615046677, + "grad_norm": 0.40119877457618713, + "learning_rate": 1.3692968274929475e-05, + "loss": 0.5134, + "step": 27713 + }, + { + "epoch": 0.7609555189456343, + "grad_norm": 0.4033125042915344, + "learning_rate": 1.369256690797652e-05, + "loss": 0.5442, + "step": 27714 + }, + { + "epoch": 0.7609829763866007, + "grad_norm": 0.3812948763370514, + "learning_rate": 1.3692165534135748e-05, + "loss": 0.4346, + "step": 27715 + }, + { + "epoch": 0.7610104338275673, + "grad_norm": 0.39885058999061584, + "learning_rate": 1.3691764153407909e-05, + "loss": 0.5615, + "step": 27716 + }, + { + "epoch": 0.7610378912685337, + "grad_norm": 0.38529273867607117, + "learning_rate": 1.3691362765793751e-05, + "loss": 0.4414, + "step": 27717 + }, + { + "epoch": 0.7610653487095003, + "grad_norm": 0.3980371057987213, + "learning_rate": 1.3690961371294028e-05, + "loss": 0.4736, + "step": 27718 + }, + { + "epoch": 0.7610928061504668, + "grad_norm": 0.3693397045135498, + "learning_rate": 1.3690559969909481e-05, + "loss": 0.5072, + "step": 27719 + }, + { + "epoch": 0.7611202635914333, + "grad_norm": 0.3797854483127594, + "learning_rate": 1.369015856164086e-05, + "loss": 0.4186, + "step": 27720 + }, + { + "epoch": 0.7611477210323998, + "grad_norm": 0.4087902307510376, + "learning_rate": 1.3689757146488918e-05, + "loss": 0.4539, + "step": 27721 + }, + { + "epoch": 0.7611751784733662, + "grad_norm": 0.34327200055122375, + "learning_rate": 1.3689355724454401e-05, + "loss": 0.4785, + "step": 27722 + }, + { + "epoch": 0.7612026359143328, + "grad_norm": 0.3739432692527771, + "learning_rate": 1.3688954295538057e-05, + "loss": 0.4793, + "step": 27723 + }, + { + "epoch": 0.7612300933552992, + "grad_norm": 0.4621001183986664, + "learning_rate": 1.3688552859740639e-05, + "loss": 0.5151, + "step": 27724 + }, + { + "epoch": 0.7612575507962658, + "grad_norm": 0.447447806596756, + "learning_rate": 1.3688151417062887e-05, + "loss": 0.5961, + "step": 27725 + }, + { + "epoch": 0.7612850082372323, + "grad_norm": 0.37173202633857727, + "learning_rate": 1.368774996750556e-05, + "loss": 0.4832, + "step": 27726 + }, + { + "epoch": 0.7613124656781988, + "grad_norm": 0.3942643702030182, + "learning_rate": 1.36873485110694e-05, + "loss": 0.5347, + "step": 27727 + }, + { + "epoch": 0.7613399231191653, + "grad_norm": 0.3896092474460602, + "learning_rate": 1.368694704775516e-05, + "loss": 0.4582, + "step": 27728 + }, + { + "epoch": 0.7613673805601318, + "grad_norm": 0.38348066806793213, + "learning_rate": 1.3686545577563586e-05, + "loss": 0.4897, + "step": 27729 + }, + { + "epoch": 0.7613948380010983, + "grad_norm": 0.423664391040802, + "learning_rate": 1.3686144100495426e-05, + "loss": 0.5298, + "step": 27730 + }, + { + "epoch": 0.7614222954420647, + "grad_norm": 0.35986703634262085, + "learning_rate": 1.3685742616551433e-05, + "loss": 0.5188, + "step": 27731 + }, + { + "epoch": 0.7614497528830313, + "grad_norm": 0.47581714391708374, + "learning_rate": 1.3685341125732354e-05, + "loss": 0.4955, + "step": 27732 + }, + { + "epoch": 0.7614772103239978, + "grad_norm": 0.4043029546737671, + "learning_rate": 1.3684939628038938e-05, + "loss": 0.4792, + "step": 27733 + }, + { + "epoch": 0.7615046677649643, + "grad_norm": 0.3783077001571655, + "learning_rate": 1.3684538123471933e-05, + "loss": 0.5398, + "step": 27734 + }, + { + "epoch": 0.7615321252059308, + "grad_norm": 0.3784475028514862, + "learning_rate": 1.3684136612032086e-05, + "loss": 0.561, + "step": 27735 + }, + { + "epoch": 0.7615595826468973, + "grad_norm": 0.3712831735610962, + "learning_rate": 1.3683735093720152e-05, + "loss": 0.5013, + "step": 27736 + }, + { + "epoch": 0.7615870400878638, + "grad_norm": 0.39237672090530396, + "learning_rate": 1.3683333568536875e-05, + "loss": 0.5866, + "step": 27737 + }, + { + "epoch": 0.7616144975288303, + "grad_norm": 0.4448559284210205, + "learning_rate": 1.3682932036483005e-05, + "loss": 0.5092, + "step": 27738 + }, + { + "epoch": 0.7616419549697968, + "grad_norm": 0.4101945161819458, + "learning_rate": 1.3682530497559291e-05, + "loss": 0.5347, + "step": 27739 + }, + { + "epoch": 0.7616694124107634, + "grad_norm": 0.37881746888160706, + "learning_rate": 1.3682128951766483e-05, + "loss": 0.4735, + "step": 27740 + }, + { + "epoch": 0.7616968698517298, + "grad_norm": 0.3992409408092499, + "learning_rate": 1.3681727399105328e-05, + "loss": 0.4644, + "step": 27741 + }, + { + "epoch": 0.7617243272926963, + "grad_norm": 0.4056754410266876, + "learning_rate": 1.3681325839576576e-05, + "loss": 0.6068, + "step": 27742 + }, + { + "epoch": 0.7617517847336628, + "grad_norm": 0.5025073289871216, + "learning_rate": 1.3680924273180977e-05, + "loss": 0.6022, + "step": 27743 + }, + { + "epoch": 0.7617792421746293, + "grad_norm": 0.4117066562175751, + "learning_rate": 1.3680522699919283e-05, + "loss": 0.6169, + "step": 27744 + }, + { + "epoch": 0.7618066996155958, + "grad_norm": 0.4183805286884308, + "learning_rate": 1.3680121119792236e-05, + "loss": 0.4412, + "step": 27745 + }, + { + "epoch": 0.7618341570565623, + "grad_norm": 0.4059961438179016, + "learning_rate": 1.3679719532800589e-05, + "loss": 0.5059, + "step": 27746 + }, + { + "epoch": 0.7618616144975289, + "grad_norm": 0.3868643641471863, + "learning_rate": 1.367931793894509e-05, + "loss": 0.4896, + "step": 27747 + }, + { + "epoch": 0.7618890719384953, + "grad_norm": 0.4365611672401428, + "learning_rate": 1.367891633822649e-05, + "loss": 0.5321, + "step": 27748 + }, + { + "epoch": 0.7619165293794619, + "grad_norm": 0.36385923624038696, + "learning_rate": 1.3678514730645534e-05, + "loss": 0.5024, + "step": 27749 + }, + { + "epoch": 0.7619439868204283, + "grad_norm": 0.3380683362483978, + "learning_rate": 1.3678113116202977e-05, + "loss": 0.445, + "step": 27750 + }, + { + "epoch": 0.7619714442613948, + "grad_norm": 0.4052305519580841, + "learning_rate": 1.3677711494899567e-05, + "loss": 0.5241, + "step": 27751 + }, + { + "epoch": 0.7619989017023613, + "grad_norm": 0.40106073021888733, + "learning_rate": 1.3677309866736047e-05, + "loss": 0.4339, + "step": 27752 + }, + { + "epoch": 0.7620263591433278, + "grad_norm": 0.3528982102870941, + "learning_rate": 1.367690823171317e-05, + "loss": 0.4474, + "step": 27753 + }, + { + "epoch": 0.7620538165842944, + "grad_norm": 0.3793761432170868, + "learning_rate": 1.367650658983169e-05, + "loss": 0.4831, + "step": 27754 + }, + { + "epoch": 0.7620812740252608, + "grad_norm": 0.39393332600593567, + "learning_rate": 1.367610494109235e-05, + "loss": 0.4606, + "step": 27755 + }, + { + "epoch": 0.7621087314662274, + "grad_norm": 0.40860724449157715, + "learning_rate": 1.36757032854959e-05, + "loss": 0.5324, + "step": 27756 + }, + { + "epoch": 0.7621361889071938, + "grad_norm": 0.40888237953186035, + "learning_rate": 1.367530162304309e-05, + "loss": 0.5216, + "step": 27757 + }, + { + "epoch": 0.7621636463481604, + "grad_norm": 0.35342031717300415, + "learning_rate": 1.3674899953734671e-05, + "loss": 0.4481, + "step": 27758 + }, + { + "epoch": 0.7621911037891268, + "grad_norm": 0.4081381559371948, + "learning_rate": 1.3674498277571388e-05, + "loss": 0.4811, + "step": 27759 + }, + { + "epoch": 0.7622185612300933, + "grad_norm": 0.3280874490737915, + "learning_rate": 1.3674096594553995e-05, + "loss": 0.4215, + "step": 27760 + }, + { + "epoch": 0.7622460186710599, + "grad_norm": 0.4109775424003601, + "learning_rate": 1.367369490468324e-05, + "loss": 0.5525, + "step": 27761 + }, + { + "epoch": 0.7622734761120263, + "grad_norm": 0.40750330686569214, + "learning_rate": 1.3673293207959871e-05, + "loss": 0.4804, + "step": 27762 + }, + { + "epoch": 0.7623009335529929, + "grad_norm": 0.38180074095726013, + "learning_rate": 1.3672891504384638e-05, + "loss": 0.4522, + "step": 27763 + }, + { + "epoch": 0.7623283909939593, + "grad_norm": 0.39386266469955444, + "learning_rate": 1.3672489793958288e-05, + "loss": 0.5578, + "step": 27764 + }, + { + "epoch": 0.7623558484349259, + "grad_norm": 0.7645328640937805, + "learning_rate": 1.3672088076681574e-05, + "loss": 0.5599, + "step": 27765 + }, + { + "epoch": 0.7623833058758923, + "grad_norm": 0.3817692995071411, + "learning_rate": 1.3671686352555243e-05, + "loss": 0.5069, + "step": 27766 + }, + { + "epoch": 0.7624107633168589, + "grad_norm": 0.3971666693687439, + "learning_rate": 1.3671284621580045e-05, + "loss": 0.526, + "step": 27767 + }, + { + "epoch": 0.7624382207578254, + "grad_norm": 0.3990360200405121, + "learning_rate": 1.367088288375673e-05, + "loss": 0.4888, + "step": 27768 + }, + { + "epoch": 0.7624656781987919, + "grad_norm": 0.3851589858531952, + "learning_rate": 1.3670481139086048e-05, + "loss": 0.4942, + "step": 27769 + }, + { + "epoch": 0.7624931356397584, + "grad_norm": 0.47141632437705994, + "learning_rate": 1.3670079387568747e-05, + "loss": 0.5224, + "step": 27770 + }, + { + "epoch": 0.7625205930807248, + "grad_norm": 0.37925323843955994, + "learning_rate": 1.3669677629205575e-05, + "loss": 0.5386, + "step": 27771 + }, + { + "epoch": 0.7625480505216914, + "grad_norm": 0.4285934269428253, + "learning_rate": 1.3669275863997284e-05, + "loss": 0.3895, + "step": 27772 + }, + { + "epoch": 0.7625755079626578, + "grad_norm": 0.4272766411304474, + "learning_rate": 1.3668874091944621e-05, + "loss": 0.5325, + "step": 27773 + }, + { + "epoch": 0.7626029654036244, + "grad_norm": 0.34230008721351624, + "learning_rate": 1.3668472313048337e-05, + "loss": 0.4218, + "step": 27774 + }, + { + "epoch": 0.7626304228445909, + "grad_norm": 0.48643726110458374, + "learning_rate": 1.3668070527309186e-05, + "loss": 0.5306, + "step": 27775 + }, + { + "epoch": 0.7626578802855574, + "grad_norm": 0.36312153935432434, + "learning_rate": 1.3667668734727909e-05, + "loss": 0.5255, + "step": 27776 + }, + { + "epoch": 0.7626853377265239, + "grad_norm": 0.38644668459892273, + "learning_rate": 1.3667266935305257e-05, + "loss": 0.5082, + "step": 27777 + }, + { + "epoch": 0.7627127951674904, + "grad_norm": 0.3857123553752899, + "learning_rate": 1.3666865129041984e-05, + "loss": 0.5449, + "step": 27778 + }, + { + "epoch": 0.7627402526084569, + "grad_norm": 0.34857305884361267, + "learning_rate": 1.3666463315938836e-05, + "loss": 0.4433, + "step": 27779 + }, + { + "epoch": 0.7627677100494233, + "grad_norm": 0.3622816801071167, + "learning_rate": 1.3666061495996566e-05, + "loss": 0.4512, + "step": 27780 + }, + { + "epoch": 0.7627951674903899, + "grad_norm": 0.4157201051712036, + "learning_rate": 1.366565966921592e-05, + "loss": 0.5656, + "step": 27781 + }, + { + "epoch": 0.7628226249313564, + "grad_norm": 0.3683641850948334, + "learning_rate": 1.366525783559765e-05, + "loss": 0.4394, + "step": 27782 + }, + { + "epoch": 0.7628500823723229, + "grad_norm": 0.4074408710002899, + "learning_rate": 1.3664855995142503e-05, + "loss": 0.543, + "step": 27783 + }, + { + "epoch": 0.7628775398132894, + "grad_norm": 0.36161404848098755, + "learning_rate": 1.3664454147851229e-05, + "loss": 0.5714, + "step": 27784 + }, + { + "epoch": 0.7629049972542559, + "grad_norm": 0.41448935866355896, + "learning_rate": 1.3664052293724579e-05, + "loss": 0.5617, + "step": 27785 + }, + { + "epoch": 0.7629324546952224, + "grad_norm": 0.34676072001457214, + "learning_rate": 1.3663650432763304e-05, + "loss": 0.4908, + "step": 27786 + }, + { + "epoch": 0.7629599121361889, + "grad_norm": 4.789437294006348, + "learning_rate": 1.366324856496815e-05, + "loss": 0.632, + "step": 27787 + }, + { + "epoch": 0.7629873695771554, + "grad_norm": 0.40399935841560364, + "learning_rate": 1.3662846690339868e-05, + "loss": 0.4697, + "step": 27788 + }, + { + "epoch": 0.763014827018122, + "grad_norm": 0.46968382596969604, + "learning_rate": 1.3662444808879209e-05, + "loss": 0.5173, + "step": 27789 + }, + { + "epoch": 0.7630422844590884, + "grad_norm": 0.47655972838401794, + "learning_rate": 1.366204292058692e-05, + "loss": 0.4826, + "step": 27790 + }, + { + "epoch": 0.7630697419000549, + "grad_norm": 0.4338267147541046, + "learning_rate": 1.3661641025463754e-05, + "loss": 0.5357, + "step": 27791 + }, + { + "epoch": 0.7630971993410214, + "grad_norm": 0.41392213106155396, + "learning_rate": 1.3661239123510457e-05, + "loss": 0.4851, + "step": 27792 + }, + { + "epoch": 0.7631246567819879, + "grad_norm": 0.39809951186180115, + "learning_rate": 1.3660837214727783e-05, + "loss": 0.5303, + "step": 27793 + }, + { + "epoch": 0.7631521142229544, + "grad_norm": 0.3641553521156311, + "learning_rate": 1.3660435299116476e-05, + "loss": 0.4618, + "step": 27794 + }, + { + "epoch": 0.7631795716639209, + "grad_norm": 0.43978753685951233, + "learning_rate": 1.3660033376677291e-05, + "loss": 0.4598, + "step": 27795 + }, + { + "epoch": 0.7632070291048875, + "grad_norm": 0.44936907291412354, + "learning_rate": 1.3659631447410975e-05, + "loss": 0.4893, + "step": 27796 + }, + { + "epoch": 0.7632344865458539, + "grad_norm": 0.44615668058395386, + "learning_rate": 1.3659229511318276e-05, + "loss": 0.4832, + "step": 27797 + }, + { + "epoch": 0.7632619439868205, + "grad_norm": 0.3849855661392212, + "learning_rate": 1.365882756839995e-05, + "loss": 0.4667, + "step": 27798 + }, + { + "epoch": 0.7632894014277869, + "grad_norm": 0.37337446212768555, + "learning_rate": 1.3658425618656744e-05, + "loss": 0.4359, + "step": 27799 + }, + { + "epoch": 0.7633168588687534, + "grad_norm": 0.36242055892944336, + "learning_rate": 1.3658023662089402e-05, + "loss": 0.447, + "step": 27800 + }, + { + "epoch": 0.7633443163097199, + "grad_norm": 0.3944315016269684, + "learning_rate": 1.365762169869868e-05, + "loss": 0.4538, + "step": 27801 + }, + { + "epoch": 0.7633717737506864, + "grad_norm": 0.3870130181312561, + "learning_rate": 1.3657219728485327e-05, + "loss": 0.4648, + "step": 27802 + }, + { + "epoch": 0.763399231191653, + "grad_norm": 0.35821667313575745, + "learning_rate": 1.3656817751450091e-05, + "loss": 0.4736, + "step": 27803 + }, + { + "epoch": 0.7634266886326194, + "grad_norm": 0.36469095945358276, + "learning_rate": 1.3656415767593725e-05, + "loss": 0.5544, + "step": 27804 + }, + { + "epoch": 0.763454146073586, + "grad_norm": 0.4492700695991516, + "learning_rate": 1.3656013776916973e-05, + "loss": 0.4036, + "step": 27805 + }, + { + "epoch": 0.7634816035145524, + "grad_norm": 0.48716190457344055, + "learning_rate": 1.365561177942059e-05, + "loss": 0.4997, + "step": 27806 + }, + { + "epoch": 0.763509060955519, + "grad_norm": 0.34977415204048157, + "learning_rate": 1.3655209775105327e-05, + "loss": 0.4977, + "step": 27807 + }, + { + "epoch": 0.7635365183964854, + "grad_norm": 0.3628666400909424, + "learning_rate": 1.3654807763971927e-05, + "loss": 0.4763, + "step": 27808 + }, + { + "epoch": 0.763563975837452, + "grad_norm": 0.3645716607570648, + "learning_rate": 1.3654405746021144e-05, + "loss": 0.4932, + "step": 27809 + }, + { + "epoch": 0.7635914332784185, + "grad_norm": 0.37192651629447937, + "learning_rate": 1.365400372125373e-05, + "loss": 0.433, + "step": 27810 + }, + { + "epoch": 0.7636188907193849, + "grad_norm": 0.40480494499206543, + "learning_rate": 1.3653601689670433e-05, + "loss": 0.496, + "step": 27811 + }, + { + "epoch": 0.7636463481603515, + "grad_norm": 0.42466622591018677, + "learning_rate": 1.3653199651272002e-05, + "loss": 0.4869, + "step": 27812 + }, + { + "epoch": 0.7636738056013179, + "grad_norm": 0.453584223985672, + "learning_rate": 1.3652797606059186e-05, + "loss": 0.4967, + "step": 27813 + }, + { + "epoch": 0.7637012630422845, + "grad_norm": 0.3777746260166168, + "learning_rate": 1.3652395554032741e-05, + "loss": 0.4789, + "step": 27814 + }, + { + "epoch": 0.7637287204832509, + "grad_norm": 0.36479008197784424, + "learning_rate": 1.3651993495193408e-05, + "loss": 0.4473, + "step": 27815 + }, + { + "epoch": 0.7637561779242175, + "grad_norm": 0.48667091131210327, + "learning_rate": 1.3651591429541946e-05, + "loss": 0.4181, + "step": 27816 + }, + { + "epoch": 0.763783635365184, + "grad_norm": 0.3714837431907654, + "learning_rate": 1.3651189357079098e-05, + "loss": 0.5322, + "step": 27817 + }, + { + "epoch": 0.7638110928061504, + "grad_norm": 0.43368715047836304, + "learning_rate": 1.3650787277805615e-05, + "loss": 0.504, + "step": 27818 + }, + { + "epoch": 0.763838550247117, + "grad_norm": 0.469939261674881, + "learning_rate": 1.3650385191722251e-05, + "loss": 0.5097, + "step": 27819 + }, + { + "epoch": 0.7638660076880834, + "grad_norm": 0.4302958548069, + "learning_rate": 1.3649983098829753e-05, + "loss": 0.498, + "step": 27820 + }, + { + "epoch": 0.76389346512905, + "grad_norm": 0.6261295080184937, + "learning_rate": 1.3649580999128871e-05, + "loss": 0.4836, + "step": 27821 + }, + { + "epoch": 0.7639209225700164, + "grad_norm": 0.39858943223953247, + "learning_rate": 1.3649178892620358e-05, + "loss": 0.5152, + "step": 27822 + }, + { + "epoch": 0.763948380010983, + "grad_norm": 0.4095688760280609, + "learning_rate": 1.3648776779304957e-05, + "loss": 0.428, + "step": 27823 + }, + { + "epoch": 0.7639758374519495, + "grad_norm": 0.428072988986969, + "learning_rate": 1.3648374659183427e-05, + "loss": 0.5611, + "step": 27824 + }, + { + "epoch": 0.764003294892916, + "grad_norm": 0.34700849652290344, + "learning_rate": 1.3647972532256512e-05, + "loss": 0.4337, + "step": 27825 + }, + { + "epoch": 0.7640307523338825, + "grad_norm": 0.3652670085430145, + "learning_rate": 1.3647570398524962e-05, + "loss": 0.4701, + "step": 27826 + }, + { + "epoch": 0.764058209774849, + "grad_norm": 0.40153026580810547, + "learning_rate": 1.3647168257989533e-05, + "loss": 0.5293, + "step": 27827 + }, + { + "epoch": 0.7640856672158155, + "grad_norm": 0.3866989016532898, + "learning_rate": 1.3646766110650968e-05, + "loss": 0.43, + "step": 27828 + }, + { + "epoch": 0.7641131246567819, + "grad_norm": 0.4103657007217407, + "learning_rate": 1.3646363956510022e-05, + "loss": 0.5307, + "step": 27829 + }, + { + "epoch": 0.7641405820977485, + "grad_norm": 0.4224601984024048, + "learning_rate": 1.3645961795567443e-05, + "loss": 0.5025, + "step": 27830 + }, + { + "epoch": 0.764168039538715, + "grad_norm": 0.5194474458694458, + "learning_rate": 1.3645559627823983e-05, + "loss": 0.5152, + "step": 27831 + }, + { + "epoch": 0.7641954969796815, + "grad_norm": 0.4085952341556549, + "learning_rate": 1.3645157453280388e-05, + "loss": 0.4994, + "step": 27832 + }, + { + "epoch": 0.764222954420648, + "grad_norm": 0.3722376525402069, + "learning_rate": 1.3644755271937416e-05, + "loss": 0.5262, + "step": 27833 + }, + { + "epoch": 0.7642504118616145, + "grad_norm": 0.45494306087493896, + "learning_rate": 1.3644353083795806e-05, + "loss": 0.6307, + "step": 27834 + }, + { + "epoch": 0.764277869302581, + "grad_norm": 0.40302610397338867, + "learning_rate": 1.3643950888856319e-05, + "loss": 0.5534, + "step": 27835 + }, + { + "epoch": 0.7643053267435475, + "grad_norm": 0.4879723787307739, + "learning_rate": 1.3643548687119697e-05, + "loss": 0.5714, + "step": 27836 + }, + { + "epoch": 0.764332784184514, + "grad_norm": 0.4247101843357086, + "learning_rate": 1.3643146478586696e-05, + "loss": 0.4267, + "step": 27837 + }, + { + "epoch": 0.7643602416254806, + "grad_norm": 0.41916462779045105, + "learning_rate": 1.3642744263258063e-05, + "loss": 0.4716, + "step": 27838 + }, + { + "epoch": 0.764387699066447, + "grad_norm": 0.36569055914878845, + "learning_rate": 1.3642342041134549e-05, + "loss": 0.535, + "step": 27839 + }, + { + "epoch": 0.7644151565074135, + "grad_norm": 0.3312757909297943, + "learning_rate": 1.3641939812216907e-05, + "loss": 0.4397, + "step": 27840 + }, + { + "epoch": 0.76444261394838, + "grad_norm": 0.450429230928421, + "learning_rate": 1.3641537576505882e-05, + "loss": 0.5709, + "step": 27841 + }, + { + "epoch": 0.7644700713893465, + "grad_norm": 0.3767551779747009, + "learning_rate": 1.364113533400223e-05, + "loss": 0.4768, + "step": 27842 + }, + { + "epoch": 0.764497528830313, + "grad_norm": 0.39069774746894836, + "learning_rate": 1.3640733084706696e-05, + "loss": 0.5115, + "step": 27843 + }, + { + "epoch": 0.7645249862712795, + "grad_norm": 0.39298439025878906, + "learning_rate": 1.3640330828620032e-05, + "loss": 0.592, + "step": 27844 + }, + { + "epoch": 0.7645524437122461, + "grad_norm": 0.39082473516464233, + "learning_rate": 1.3639928565742992e-05, + "loss": 0.528, + "step": 27845 + }, + { + "epoch": 0.7645799011532125, + "grad_norm": 0.3673419952392578, + "learning_rate": 1.3639526296076321e-05, + "loss": 0.4901, + "step": 27846 + }, + { + "epoch": 0.764607358594179, + "grad_norm": 0.39938321709632874, + "learning_rate": 1.3639124019620775e-05, + "loss": 0.5564, + "step": 27847 + }, + { + "epoch": 0.7646348160351455, + "grad_norm": 0.3836069107055664, + "learning_rate": 1.3638721736377098e-05, + "loss": 0.553, + "step": 27848 + }, + { + "epoch": 0.764662273476112, + "grad_norm": 0.3791085183620453, + "learning_rate": 1.3638319446346044e-05, + "loss": 0.4804, + "step": 27849 + }, + { + "epoch": 0.7646897309170785, + "grad_norm": 0.3713722229003906, + "learning_rate": 1.3637917149528366e-05, + "loss": 0.5082, + "step": 27850 + }, + { + "epoch": 0.764717188358045, + "grad_norm": 0.5478196144104004, + "learning_rate": 1.363751484592481e-05, + "loss": 0.4651, + "step": 27851 + }, + { + "epoch": 0.7647446457990116, + "grad_norm": 0.4450196623802185, + "learning_rate": 1.3637112535536125e-05, + "loss": 0.5261, + "step": 27852 + }, + { + "epoch": 0.764772103239978, + "grad_norm": 0.41863352060317993, + "learning_rate": 1.3636710218363068e-05, + "loss": 0.4847, + "step": 27853 + }, + { + "epoch": 0.7647995606809446, + "grad_norm": 0.3868766725063324, + "learning_rate": 1.3636307894406385e-05, + "loss": 0.537, + "step": 27854 + }, + { + "epoch": 0.764827018121911, + "grad_norm": 0.35054004192352295, + "learning_rate": 1.3635905563666824e-05, + "loss": 0.4685, + "step": 27855 + }, + { + "epoch": 0.7648544755628776, + "grad_norm": 0.36963996291160583, + "learning_rate": 1.3635503226145144e-05, + "loss": 0.4572, + "step": 27856 + }, + { + "epoch": 0.764881933003844, + "grad_norm": 0.37679052352905273, + "learning_rate": 1.3635100881842086e-05, + "loss": 0.4269, + "step": 27857 + }, + { + "epoch": 0.7649093904448105, + "grad_norm": 0.4062311351299286, + "learning_rate": 1.3634698530758405e-05, + "loss": 0.458, + "step": 27858 + }, + { + "epoch": 0.7649368478857771, + "grad_norm": 0.38182783126831055, + "learning_rate": 1.3634296172894852e-05, + "loss": 0.4345, + "step": 27859 + }, + { + "epoch": 0.7649643053267435, + "grad_norm": 0.3910367488861084, + "learning_rate": 1.3633893808252178e-05, + "loss": 0.4874, + "step": 27860 + }, + { + "epoch": 0.7649917627677101, + "grad_norm": 0.38973838090896606, + "learning_rate": 1.3633491436831131e-05, + "loss": 0.4639, + "step": 27861 + }, + { + "epoch": 0.7650192202086765, + "grad_norm": 0.4802083373069763, + "learning_rate": 1.3633089058632462e-05, + "loss": 0.4929, + "step": 27862 + }, + { + "epoch": 0.7650466776496431, + "grad_norm": 0.3954799175262451, + "learning_rate": 1.3632686673656925e-05, + "loss": 0.4296, + "step": 27863 + }, + { + "epoch": 0.7650741350906095, + "grad_norm": 0.4408734440803528, + "learning_rate": 1.3632284281905266e-05, + "loss": 0.5159, + "step": 27864 + }, + { + "epoch": 0.7651015925315761, + "grad_norm": 0.4179886281490326, + "learning_rate": 1.3631881883378237e-05, + "loss": 0.5303, + "step": 27865 + }, + { + "epoch": 0.7651290499725426, + "grad_norm": 0.36080968379974365, + "learning_rate": 1.363147947807659e-05, + "loss": 0.4554, + "step": 27866 + }, + { + "epoch": 0.765156507413509, + "grad_norm": 0.37062546610832214, + "learning_rate": 1.3631077066001074e-05, + "loss": 0.5149, + "step": 27867 + }, + { + "epoch": 0.7651839648544756, + "grad_norm": 0.3982182443141937, + "learning_rate": 1.3630674647152442e-05, + "loss": 0.5745, + "step": 27868 + }, + { + "epoch": 0.765211422295442, + "grad_norm": 0.37394070625305176, + "learning_rate": 1.3630272221531443e-05, + "loss": 0.4851, + "step": 27869 + }, + { + "epoch": 0.7652388797364086, + "grad_norm": 0.3372376263141632, + "learning_rate": 1.3629869789138825e-05, + "loss": 0.5194, + "step": 27870 + }, + { + "epoch": 0.765266337177375, + "grad_norm": 0.41050970554351807, + "learning_rate": 1.3629467349975345e-05, + "loss": 0.5555, + "step": 27871 + }, + { + "epoch": 0.7652937946183416, + "grad_norm": 0.34749579429626465, + "learning_rate": 1.3629064904041744e-05, + "loss": 0.3409, + "step": 27872 + }, + { + "epoch": 0.7653212520593081, + "grad_norm": 0.35551580786705017, + "learning_rate": 1.3628662451338785e-05, + "loss": 0.5057, + "step": 27873 + }, + { + "epoch": 0.7653487095002746, + "grad_norm": 0.414498507976532, + "learning_rate": 1.362825999186721e-05, + "loss": 0.4484, + "step": 27874 + }, + { + "epoch": 0.7653761669412411, + "grad_norm": 0.42545145750045776, + "learning_rate": 1.3627857525627772e-05, + "loss": 0.4645, + "step": 27875 + }, + { + "epoch": 0.7654036243822075, + "grad_norm": 0.4027795195579529, + "learning_rate": 1.3627455052621224e-05, + "loss": 0.4525, + "step": 27876 + }, + { + "epoch": 0.7654310818231741, + "grad_norm": 0.4570791721343994, + "learning_rate": 1.362705257284831e-05, + "loss": 0.4574, + "step": 27877 + }, + { + "epoch": 0.7654585392641405, + "grad_norm": 0.42148464918136597, + "learning_rate": 1.3626650086309789e-05, + "loss": 0.5596, + "step": 27878 + }, + { + "epoch": 0.7654859967051071, + "grad_norm": 0.4957759380340576, + "learning_rate": 1.3626247593006408e-05, + "loss": 0.5682, + "step": 27879 + }, + { + "epoch": 0.7655134541460736, + "grad_norm": 0.42917799949645996, + "learning_rate": 1.3625845092938917e-05, + "loss": 0.4698, + "step": 27880 + }, + { + "epoch": 0.7655409115870401, + "grad_norm": 0.4097735285758972, + "learning_rate": 1.3625442586108066e-05, + "loss": 0.4451, + "step": 27881 + }, + { + "epoch": 0.7655683690280066, + "grad_norm": 0.4042794108390808, + "learning_rate": 1.3625040072514611e-05, + "loss": 0.5317, + "step": 27882 + }, + { + "epoch": 0.7655958264689731, + "grad_norm": 0.3711957037448883, + "learning_rate": 1.3624637552159294e-05, + "loss": 0.5942, + "step": 27883 + }, + { + "epoch": 0.7656232839099396, + "grad_norm": 0.3671351373195648, + "learning_rate": 1.3624235025042878e-05, + "loss": 0.4647, + "step": 27884 + }, + { + "epoch": 0.765650741350906, + "grad_norm": 0.4117841422557831, + "learning_rate": 1.36238324911661e-05, + "loss": 0.4995, + "step": 27885 + }, + { + "epoch": 0.7656781987918726, + "grad_norm": 0.38087204098701477, + "learning_rate": 1.3623429950529723e-05, + "loss": 0.4751, + "step": 27886 + }, + { + "epoch": 0.7657056562328391, + "grad_norm": 0.4012148082256317, + "learning_rate": 1.3623027403134493e-05, + "loss": 0.4452, + "step": 27887 + }, + { + "epoch": 0.7657331136738056, + "grad_norm": 0.3719598054885864, + "learning_rate": 1.3622624848981157e-05, + "loss": 0.5151, + "step": 27888 + }, + { + "epoch": 0.7657605711147721, + "grad_norm": 0.36409908533096313, + "learning_rate": 1.362222228807047e-05, + "loss": 0.5513, + "step": 27889 + }, + { + "epoch": 0.7657880285557386, + "grad_norm": 0.3858785629272461, + "learning_rate": 1.3621819720403183e-05, + "loss": 0.502, + "step": 27890 + }, + { + "epoch": 0.7658154859967051, + "grad_norm": 0.5726978778839111, + "learning_rate": 1.3621417145980046e-05, + "loss": 0.4998, + "step": 27891 + }, + { + "epoch": 0.7658429434376716, + "grad_norm": 0.37165966629981995, + "learning_rate": 1.3621014564801814e-05, + "loss": 0.4895, + "step": 27892 + }, + { + "epoch": 0.7658704008786381, + "grad_norm": 0.41381168365478516, + "learning_rate": 1.3620611976869227e-05, + "loss": 0.5176, + "step": 27893 + }, + { + "epoch": 0.7658978583196047, + "grad_norm": 0.4211967885494232, + "learning_rate": 1.3620209382183048e-05, + "loss": 0.5578, + "step": 27894 + }, + { + "epoch": 0.7659253157605711, + "grad_norm": 0.3661515712738037, + "learning_rate": 1.3619806780744023e-05, + "loss": 0.4934, + "step": 27895 + }, + { + "epoch": 0.7659527732015377, + "grad_norm": 0.3936666250228882, + "learning_rate": 1.3619404172552901e-05, + "loss": 0.4356, + "step": 27896 + }, + { + "epoch": 0.7659802306425041, + "grad_norm": 0.39520037174224854, + "learning_rate": 1.3619001557610436e-05, + "loss": 0.4805, + "step": 27897 + }, + { + "epoch": 0.7660076880834706, + "grad_norm": 0.35052451491355896, + "learning_rate": 1.3618598935917378e-05, + "loss": 0.4464, + "step": 27898 + }, + { + "epoch": 0.7660351455244371, + "grad_norm": 0.36251866817474365, + "learning_rate": 1.3618196307474478e-05, + "loss": 0.4218, + "step": 27899 + }, + { + "epoch": 0.7660626029654036, + "grad_norm": 0.4100438952445984, + "learning_rate": 1.3617793672282489e-05, + "loss": 0.467, + "step": 27900 + }, + { + "epoch": 0.7660900604063702, + "grad_norm": 0.44292888045310974, + "learning_rate": 1.3617391030342158e-05, + "loss": 0.5008, + "step": 27901 + }, + { + "epoch": 0.7661175178473366, + "grad_norm": 0.3937571942806244, + "learning_rate": 1.361698838165424e-05, + "loss": 0.4319, + "step": 27902 + }, + { + "epoch": 0.7661449752883032, + "grad_norm": 0.3771359622478485, + "learning_rate": 1.361658572621948e-05, + "loss": 0.4573, + "step": 27903 + }, + { + "epoch": 0.7661724327292696, + "grad_norm": 0.5890532732009888, + "learning_rate": 1.3616183064038637e-05, + "loss": 0.4313, + "step": 27904 + }, + { + "epoch": 0.7661998901702362, + "grad_norm": 0.42245644330978394, + "learning_rate": 1.3615780395112458e-05, + "loss": 0.5069, + "step": 27905 + }, + { + "epoch": 0.7662273476112026, + "grad_norm": 0.4315625727176666, + "learning_rate": 1.3615377719441693e-05, + "loss": 0.5236, + "step": 27906 + }, + { + "epoch": 0.7662548050521691, + "grad_norm": 0.3646075129508972, + "learning_rate": 1.3614975037027097e-05, + "loss": 0.4929, + "step": 27907 + }, + { + "epoch": 0.7662822624931357, + "grad_norm": 0.4007493257522583, + "learning_rate": 1.3614572347869416e-05, + "loss": 0.5033, + "step": 27908 + }, + { + "epoch": 0.7663097199341021, + "grad_norm": 0.41998612880706787, + "learning_rate": 1.3614169651969407e-05, + "loss": 0.5195, + "step": 27909 + }, + { + "epoch": 0.7663371773750687, + "grad_norm": 0.3904440402984619, + "learning_rate": 1.3613766949327817e-05, + "loss": 0.4579, + "step": 27910 + }, + { + "epoch": 0.7663646348160351, + "grad_norm": 0.42993488907814026, + "learning_rate": 1.3613364239945397e-05, + "loss": 0.5413, + "step": 27911 + }, + { + "epoch": 0.7663920922570017, + "grad_norm": 0.4059152603149414, + "learning_rate": 1.3612961523822901e-05, + "loss": 0.6045, + "step": 27912 + }, + { + "epoch": 0.7664195496979681, + "grad_norm": 0.4058482050895691, + "learning_rate": 1.3612558800961078e-05, + "loss": 0.5271, + "step": 27913 + }, + { + "epoch": 0.7664470071389347, + "grad_norm": 0.38170701265335083, + "learning_rate": 1.3612156071360679e-05, + "loss": 0.5155, + "step": 27914 + }, + { + "epoch": 0.7664744645799012, + "grad_norm": 0.36836135387420654, + "learning_rate": 1.3611753335022458e-05, + "loss": 0.4884, + "step": 27915 + }, + { + "epoch": 0.7665019220208676, + "grad_norm": 0.3811154365539551, + "learning_rate": 1.3611350591947164e-05, + "loss": 0.512, + "step": 27916 + }, + { + "epoch": 0.7665293794618342, + "grad_norm": 0.39510875940322876, + "learning_rate": 1.3610947842135547e-05, + "loss": 0.5332, + "step": 27917 + }, + { + "epoch": 0.7665568369028006, + "grad_norm": 0.4496965706348419, + "learning_rate": 1.3610545085588359e-05, + "loss": 0.5635, + "step": 27918 + }, + { + "epoch": 0.7665842943437672, + "grad_norm": 0.4198967218399048, + "learning_rate": 1.3610142322306355e-05, + "loss": 0.5638, + "step": 27919 + }, + { + "epoch": 0.7666117517847336, + "grad_norm": 0.4147225320339203, + "learning_rate": 1.3609739552290282e-05, + "loss": 0.5349, + "step": 27920 + }, + { + "epoch": 0.7666392092257002, + "grad_norm": 0.40342646837234497, + "learning_rate": 1.3609336775540892e-05, + "loss": 0.5758, + "step": 27921 + }, + { + "epoch": 0.7666666666666667, + "grad_norm": 0.35331979393959045, + "learning_rate": 1.3608933992058936e-05, + "loss": 0.5079, + "step": 27922 + }, + { + "epoch": 0.7666941241076332, + "grad_norm": 0.3548499643802643, + "learning_rate": 1.3608531201845168e-05, + "loss": 0.3963, + "step": 27923 + }, + { + "epoch": 0.7667215815485997, + "grad_norm": 0.3902825117111206, + "learning_rate": 1.3608128404900339e-05, + "loss": 0.5084, + "step": 27924 + }, + { + "epoch": 0.7667490389895661, + "grad_norm": 0.35879606008529663, + "learning_rate": 1.3607725601225197e-05, + "loss": 0.5078, + "step": 27925 + }, + { + "epoch": 0.7667764964305327, + "grad_norm": 0.3608178496360779, + "learning_rate": 1.3607322790820495e-05, + "loss": 0.4802, + "step": 27926 + }, + { + "epoch": 0.7668039538714991, + "grad_norm": 0.36149460077285767, + "learning_rate": 1.3606919973686986e-05, + "loss": 0.4512, + "step": 27927 + }, + { + "epoch": 0.7668314113124657, + "grad_norm": 0.33917367458343506, + "learning_rate": 1.3606517149825418e-05, + "loss": 0.4317, + "step": 27928 + }, + { + "epoch": 0.7668588687534322, + "grad_norm": 0.41974562406539917, + "learning_rate": 1.3606114319236547e-05, + "loss": 0.4625, + "step": 27929 + }, + { + "epoch": 0.7668863261943987, + "grad_norm": 0.3692377507686615, + "learning_rate": 1.3605711481921117e-05, + "loss": 0.459, + "step": 27930 + }, + { + "epoch": 0.7669137836353652, + "grad_norm": 0.4317486584186554, + "learning_rate": 1.3605308637879891e-05, + "loss": 0.596, + "step": 27931 + }, + { + "epoch": 0.7669412410763317, + "grad_norm": 0.38630765676498413, + "learning_rate": 1.360490578711361e-05, + "loss": 0.5325, + "step": 27932 + }, + { + "epoch": 0.7669686985172982, + "grad_norm": 0.34342890977859497, + "learning_rate": 1.3604502929623029e-05, + "loss": 0.3644, + "step": 27933 + }, + { + "epoch": 0.7669961559582646, + "grad_norm": 0.412077397108078, + "learning_rate": 1.3604100065408901e-05, + "loss": 0.4653, + "step": 27934 + }, + { + "epoch": 0.7670236133992312, + "grad_norm": 0.43031978607177734, + "learning_rate": 1.3603697194471975e-05, + "loss": 0.583, + "step": 27935 + }, + { + "epoch": 0.7670510708401977, + "grad_norm": 0.3807745575904846, + "learning_rate": 1.3603294316813005e-05, + "loss": 0.4786, + "step": 27936 + }, + { + "epoch": 0.7670785282811642, + "grad_norm": 0.38018473982810974, + "learning_rate": 1.360289143243274e-05, + "loss": 0.5018, + "step": 27937 + }, + { + "epoch": 0.7671059857221307, + "grad_norm": 0.37534019351005554, + "learning_rate": 1.3602488541331932e-05, + "loss": 0.5771, + "step": 27938 + }, + { + "epoch": 0.7671334431630972, + "grad_norm": 0.41659069061279297, + "learning_rate": 1.3602085643511335e-05, + "loss": 0.4876, + "step": 27939 + }, + { + "epoch": 0.7671609006040637, + "grad_norm": 0.43477585911750793, + "learning_rate": 1.3601682738971695e-05, + "loss": 0.5483, + "step": 27940 + }, + { + "epoch": 0.7671883580450302, + "grad_norm": 0.3925119638442993, + "learning_rate": 1.3601279827713771e-05, + "loss": 0.5112, + "step": 27941 + }, + { + "epoch": 0.7672158154859967, + "grad_norm": 0.39348024129867554, + "learning_rate": 1.360087690973831e-05, + "loss": 0.523, + "step": 27942 + }, + { + "epoch": 0.7672432729269633, + "grad_norm": 0.495700865983963, + "learning_rate": 1.3600473985046064e-05, + "loss": 0.4979, + "step": 27943 + }, + { + "epoch": 0.7672707303679297, + "grad_norm": 0.40965840220451355, + "learning_rate": 1.3600071053637785e-05, + "loss": 0.5468, + "step": 27944 + }, + { + "epoch": 0.7672981878088962, + "grad_norm": 0.5231394171714783, + "learning_rate": 1.3599668115514222e-05, + "loss": 0.6477, + "step": 27945 + }, + { + "epoch": 0.7673256452498627, + "grad_norm": 0.37318387627601624, + "learning_rate": 1.359926517067613e-05, + "loss": 0.4665, + "step": 27946 + }, + { + "epoch": 0.7673531026908292, + "grad_norm": 0.48445335030555725, + "learning_rate": 1.359886221912426e-05, + "loss": 0.5752, + "step": 27947 + }, + { + "epoch": 0.7673805601317957, + "grad_norm": 0.3662641644477844, + "learning_rate": 1.3598459260859364e-05, + "loss": 0.4461, + "step": 27948 + }, + { + "epoch": 0.7674080175727622, + "grad_norm": 0.431325227022171, + "learning_rate": 1.3598056295882196e-05, + "loss": 0.4546, + "step": 27949 + }, + { + "epoch": 0.7674354750137288, + "grad_norm": 0.3369337022304535, + "learning_rate": 1.35976533241935e-05, + "loss": 0.5441, + "step": 27950 + }, + { + "epoch": 0.7674629324546952, + "grad_norm": 0.362045556306839, + "learning_rate": 1.3597250345794036e-05, + "loss": 0.4736, + "step": 27951 + }, + { + "epoch": 0.7674903898956618, + "grad_norm": 0.3732687830924988, + "learning_rate": 1.3596847360684549e-05, + "loss": 0.4866, + "step": 27952 + }, + { + "epoch": 0.7675178473366282, + "grad_norm": 0.4180013835430145, + "learning_rate": 1.3596444368865794e-05, + "loss": 0.554, + "step": 27953 + }, + { + "epoch": 0.7675453047775948, + "grad_norm": 0.35502302646636963, + "learning_rate": 1.3596041370338524e-05, + "loss": 0.463, + "step": 27954 + }, + { + "epoch": 0.7675727622185612, + "grad_norm": 0.37081098556518555, + "learning_rate": 1.3595638365103487e-05, + "loss": 0.4448, + "step": 27955 + }, + { + "epoch": 0.7676002196595277, + "grad_norm": 0.3752250373363495, + "learning_rate": 1.3595235353161438e-05, + "loss": 0.456, + "step": 27956 + }, + { + "epoch": 0.7676276771004943, + "grad_norm": 0.42058265209198, + "learning_rate": 1.3594832334513131e-05, + "loss": 0.4908, + "step": 27957 + }, + { + "epoch": 0.7676551345414607, + "grad_norm": 0.42261481285095215, + "learning_rate": 1.3594429309159307e-05, + "loss": 0.6008, + "step": 27958 + }, + { + "epoch": 0.7676825919824273, + "grad_norm": 0.3996158540248871, + "learning_rate": 1.3594026277100732e-05, + "loss": 0.5171, + "step": 27959 + }, + { + "epoch": 0.7677100494233937, + "grad_norm": 0.4017082750797272, + "learning_rate": 1.3593623238338145e-05, + "loss": 0.5353, + "step": 27960 + }, + { + "epoch": 0.7677375068643603, + "grad_norm": 0.41361287236213684, + "learning_rate": 1.3593220192872308e-05, + "loss": 0.5507, + "step": 27961 + }, + { + "epoch": 0.7677649643053267, + "grad_norm": 0.4877259433269501, + "learning_rate": 1.359281714070397e-05, + "loss": 0.6388, + "step": 27962 + }, + { + "epoch": 0.7677924217462933, + "grad_norm": 0.37224888801574707, + "learning_rate": 1.3592414081833876e-05, + "loss": 0.4271, + "step": 27963 + }, + { + "epoch": 0.7678198791872598, + "grad_norm": 0.4030722975730896, + "learning_rate": 1.3592011016262787e-05, + "loss": 0.5039, + "step": 27964 + }, + { + "epoch": 0.7678473366282262, + "grad_norm": 0.3558710515499115, + "learning_rate": 1.359160794399145e-05, + "loss": 0.4475, + "step": 27965 + }, + { + "epoch": 0.7678747940691928, + "grad_norm": 0.5430089235305786, + "learning_rate": 1.3591204865020617e-05, + "loss": 0.5374, + "step": 27966 + }, + { + "epoch": 0.7679022515101592, + "grad_norm": 0.40361642837524414, + "learning_rate": 1.3590801779351042e-05, + "loss": 0.5239, + "step": 27967 + }, + { + "epoch": 0.7679297089511258, + "grad_norm": 0.3550936281681061, + "learning_rate": 1.3590398686983472e-05, + "loss": 0.4967, + "step": 27968 + }, + { + "epoch": 0.7679571663920922, + "grad_norm": 0.38995662331581116, + "learning_rate": 1.3589995587918669e-05, + "loss": 0.5761, + "step": 27969 + }, + { + "epoch": 0.7679846238330588, + "grad_norm": 0.3682675063610077, + "learning_rate": 1.3589592482157374e-05, + "loss": 0.5166, + "step": 27970 + }, + { + "epoch": 0.7680120812740253, + "grad_norm": 0.3949057459831238, + "learning_rate": 1.3589189369700344e-05, + "loss": 0.4866, + "step": 27971 + }, + { + "epoch": 0.7680395387149918, + "grad_norm": 0.39920124411582947, + "learning_rate": 1.3588786250548332e-05, + "loss": 0.4927, + "step": 27972 + }, + { + "epoch": 0.7680669961559583, + "grad_norm": 0.36707353591918945, + "learning_rate": 1.3588383124702085e-05, + "loss": 0.447, + "step": 27973 + }, + { + "epoch": 0.7680944535969247, + "grad_norm": 0.4270772933959961, + "learning_rate": 1.3587979992162361e-05, + "loss": 0.4578, + "step": 27974 + }, + { + "epoch": 0.7681219110378913, + "grad_norm": 0.3590148985385895, + "learning_rate": 1.3587576852929909e-05, + "loss": 0.4862, + "step": 27975 + }, + { + "epoch": 0.7681493684788577, + "grad_norm": 0.3854174315929413, + "learning_rate": 1.358717370700548e-05, + "loss": 0.5316, + "step": 27976 + }, + { + "epoch": 0.7681768259198243, + "grad_norm": 0.42962580919265747, + "learning_rate": 1.3586770554389828e-05, + "loss": 0.5247, + "step": 27977 + }, + { + "epoch": 0.7682042833607908, + "grad_norm": 0.4102726876735687, + "learning_rate": 1.3586367395083701e-05, + "loss": 0.5207, + "step": 27978 + }, + { + "epoch": 0.7682317408017573, + "grad_norm": 0.3867008686065674, + "learning_rate": 1.3585964229087858e-05, + "loss": 0.5074, + "step": 27979 + }, + { + "epoch": 0.7682591982427238, + "grad_norm": 0.3736969232559204, + "learning_rate": 1.3585561056403047e-05, + "loss": 0.4771, + "step": 27980 + }, + { + "epoch": 0.7682866556836903, + "grad_norm": 0.37903568148612976, + "learning_rate": 1.358515787703002e-05, + "loss": 0.5476, + "step": 27981 + }, + { + "epoch": 0.7683141131246568, + "grad_norm": 0.44269874691963196, + "learning_rate": 1.3584754690969529e-05, + "loss": 0.4457, + "step": 27982 + }, + { + "epoch": 0.7683415705656232, + "grad_norm": 0.37522202730178833, + "learning_rate": 1.3584351498222326e-05, + "loss": 0.4912, + "step": 27983 + }, + { + "epoch": 0.7683690280065898, + "grad_norm": 0.5374528169631958, + "learning_rate": 1.3583948298789163e-05, + "loss": 0.4484, + "step": 27984 + }, + { + "epoch": 0.7683964854475562, + "grad_norm": 0.4107024669647217, + "learning_rate": 1.3583545092670793e-05, + "loss": 0.5298, + "step": 27985 + }, + { + "epoch": 0.7684239428885228, + "grad_norm": 0.37669023871421814, + "learning_rate": 1.3583141879867967e-05, + "loss": 0.477, + "step": 27986 + }, + { + "epoch": 0.7684514003294893, + "grad_norm": 0.37141096591949463, + "learning_rate": 1.358273866038144e-05, + "loss": 0.4773, + "step": 27987 + }, + { + "epoch": 0.7684788577704558, + "grad_norm": 0.33972862362861633, + "learning_rate": 1.358233543421196e-05, + "loss": 0.4399, + "step": 27988 + }, + { + "epoch": 0.7685063152114223, + "grad_norm": 0.3660767674446106, + "learning_rate": 1.3581932201360281e-05, + "loss": 0.4506, + "step": 27989 + }, + { + "epoch": 0.7685337726523888, + "grad_norm": 0.39641717076301575, + "learning_rate": 1.3581528961827157e-05, + "loss": 0.5154, + "step": 27990 + }, + { + "epoch": 0.7685612300933553, + "grad_norm": 0.4479491710662842, + "learning_rate": 1.3581125715613335e-05, + "loss": 0.5023, + "step": 27991 + }, + { + "epoch": 0.7685886875343217, + "grad_norm": 0.36564165353775024, + "learning_rate": 1.3580722462719573e-05, + "loss": 0.5418, + "step": 27992 + }, + { + "epoch": 0.7686161449752883, + "grad_norm": 0.39860275387763977, + "learning_rate": 1.3580319203146622e-05, + "loss": 0.5126, + "step": 27993 + }, + { + "epoch": 0.7686436024162548, + "grad_norm": 0.40494057536125183, + "learning_rate": 1.3579915936895231e-05, + "loss": 0.4553, + "step": 27994 + }, + { + "epoch": 0.7686710598572213, + "grad_norm": 0.35501962900161743, + "learning_rate": 1.3579512663966155e-05, + "loss": 0.4635, + "step": 27995 + }, + { + "epoch": 0.7686985172981878, + "grad_norm": 0.386973112821579, + "learning_rate": 1.3579109384360143e-05, + "loss": 0.535, + "step": 27996 + }, + { + "epoch": 0.7687259747391543, + "grad_norm": 0.3680135905742645, + "learning_rate": 1.3578706098077952e-05, + "loss": 0.4969, + "step": 27997 + }, + { + "epoch": 0.7687534321801208, + "grad_norm": 0.4172013998031616, + "learning_rate": 1.3578302805120333e-05, + "loss": 0.5812, + "step": 27998 + }, + { + "epoch": 0.7687808896210873, + "grad_norm": 0.3964799642562866, + "learning_rate": 1.3577899505488034e-05, + "loss": 0.5512, + "step": 27999 + }, + { + "epoch": 0.7688083470620538, + "grad_norm": 0.40725332498550415, + "learning_rate": 1.3577496199181814e-05, + "loss": 0.4978, + "step": 28000 + }, + { + "epoch": 0.7688358045030204, + "grad_norm": 0.4264396131038666, + "learning_rate": 1.357709288620242e-05, + "loss": 0.5704, + "step": 28001 + }, + { + "epoch": 0.7688632619439868, + "grad_norm": 0.36075058579444885, + "learning_rate": 1.3576689566550607e-05, + "loss": 0.5104, + "step": 28002 + }, + { + "epoch": 0.7688907193849533, + "grad_norm": 0.41394850611686707, + "learning_rate": 1.3576286240227125e-05, + "loss": 0.4964, + "step": 28003 + }, + { + "epoch": 0.7689181768259198, + "grad_norm": 0.5065253376960754, + "learning_rate": 1.357588290723273e-05, + "loss": 0.4913, + "step": 28004 + }, + { + "epoch": 0.7689456342668863, + "grad_norm": 0.371906042098999, + "learning_rate": 1.3575479567568168e-05, + "loss": 0.5775, + "step": 28005 + }, + { + "epoch": 0.7689730917078528, + "grad_norm": 0.3753708600997925, + "learning_rate": 1.35750762212342e-05, + "loss": 0.4567, + "step": 28006 + }, + { + "epoch": 0.7690005491488193, + "grad_norm": 0.3868293762207031, + "learning_rate": 1.357467286823157e-05, + "loss": 0.5786, + "step": 28007 + }, + { + "epoch": 0.7690280065897859, + "grad_norm": 0.42944371700286865, + "learning_rate": 1.3574269508561038e-05, + "loss": 0.6175, + "step": 28008 + }, + { + "epoch": 0.7690554640307523, + "grad_norm": 0.3710891008377075, + "learning_rate": 1.3573866142223348e-05, + "loss": 0.538, + "step": 28009 + }, + { + "epoch": 0.7690829214717189, + "grad_norm": 0.3486689031124115, + "learning_rate": 1.3573462769219262e-05, + "loss": 0.4269, + "step": 28010 + }, + { + "epoch": 0.7691103789126853, + "grad_norm": 0.4652608335018158, + "learning_rate": 1.3573059389549526e-05, + "loss": 0.5074, + "step": 28011 + }, + { + "epoch": 0.7691378363536518, + "grad_norm": 0.3810223639011383, + "learning_rate": 1.357265600321489e-05, + "loss": 0.538, + "step": 28012 + }, + { + "epoch": 0.7691652937946183, + "grad_norm": 0.40334707498550415, + "learning_rate": 1.3572252610216114e-05, + "loss": 0.4449, + "step": 28013 + }, + { + "epoch": 0.7691927512355848, + "grad_norm": 0.34336867928504944, + "learning_rate": 1.3571849210553944e-05, + "loss": 0.4671, + "step": 28014 + }, + { + "epoch": 0.7692202086765514, + "grad_norm": 0.37650883197784424, + "learning_rate": 1.357144580422914e-05, + "loss": 0.4996, + "step": 28015 + }, + { + "epoch": 0.7692476661175178, + "grad_norm": 0.4217616319656372, + "learning_rate": 1.3571042391242446e-05, + "loss": 0.5442, + "step": 28016 + }, + { + "epoch": 0.7692751235584844, + "grad_norm": 0.4186553657054901, + "learning_rate": 1.3570638971594618e-05, + "loss": 0.4883, + "step": 28017 + }, + { + "epoch": 0.7693025809994508, + "grad_norm": 0.39594221115112305, + "learning_rate": 1.3570235545286411e-05, + "loss": 0.5056, + "step": 28018 + }, + { + "epoch": 0.7693300384404174, + "grad_norm": 0.3648996651172638, + "learning_rate": 1.3569832112318576e-05, + "loss": 0.4473, + "step": 28019 + }, + { + "epoch": 0.7693574958813838, + "grad_norm": 0.37187623977661133, + "learning_rate": 1.356942867269186e-05, + "loss": 0.4626, + "step": 28020 + }, + { + "epoch": 0.7693849533223504, + "grad_norm": 0.4035250246524811, + "learning_rate": 1.3569025226407025e-05, + "loss": 0.4229, + "step": 28021 + }, + { + "epoch": 0.7694124107633169, + "grad_norm": 0.48372161388397217, + "learning_rate": 1.3568621773464814e-05, + "loss": 0.5111, + "step": 28022 + }, + { + "epoch": 0.7694398682042833, + "grad_norm": 0.36847466230392456, + "learning_rate": 1.3568218313865988e-05, + "loss": 0.4493, + "step": 28023 + }, + { + "epoch": 0.7694673256452499, + "grad_norm": 0.7029227018356323, + "learning_rate": 1.3567814847611295e-05, + "loss": 0.466, + "step": 28024 + }, + { + "epoch": 0.7694947830862163, + "grad_norm": 0.3704756498336792, + "learning_rate": 1.3567411374701489e-05, + "loss": 0.5098, + "step": 28025 + }, + { + "epoch": 0.7695222405271829, + "grad_norm": 0.38176873326301575, + "learning_rate": 1.3567007895137324e-05, + "loss": 0.4569, + "step": 28026 + }, + { + "epoch": 0.7695496979681493, + "grad_norm": 0.3453659415245056, + "learning_rate": 1.3566604408919545e-05, + "loss": 0.4207, + "step": 28027 + }, + { + "epoch": 0.7695771554091159, + "grad_norm": 0.3721897006034851, + "learning_rate": 1.3566200916048918e-05, + "loss": 0.5097, + "step": 28028 + }, + { + "epoch": 0.7696046128500824, + "grad_norm": 0.41934019327163696, + "learning_rate": 1.3565797416526185e-05, + "loss": 0.5452, + "step": 28029 + }, + { + "epoch": 0.7696320702910489, + "grad_norm": 0.3950050473213196, + "learning_rate": 1.3565393910352099e-05, + "loss": 0.5415, + "step": 28030 + }, + { + "epoch": 0.7696595277320154, + "grad_norm": 0.3357141613960266, + "learning_rate": 1.3564990397527418e-05, + "loss": 0.4571, + "step": 28031 + }, + { + "epoch": 0.7696869851729818, + "grad_norm": 0.35553568601608276, + "learning_rate": 1.356458687805289e-05, + "loss": 0.483, + "step": 28032 + }, + { + "epoch": 0.7697144426139484, + "grad_norm": 0.3440077304840088, + "learning_rate": 1.3564183351929274e-05, + "loss": 0.5116, + "step": 28033 + }, + { + "epoch": 0.7697419000549148, + "grad_norm": 0.4196542501449585, + "learning_rate": 1.3563779819157316e-05, + "loss": 0.5028, + "step": 28034 + }, + { + "epoch": 0.7697693574958814, + "grad_norm": 0.3786267638206482, + "learning_rate": 1.356337627973777e-05, + "loss": 0.4279, + "step": 28035 + }, + { + "epoch": 0.7697968149368479, + "grad_norm": 0.33255940675735474, + "learning_rate": 1.3562972733671391e-05, + "loss": 0.4083, + "step": 28036 + }, + { + "epoch": 0.7698242723778144, + "grad_norm": 0.40417125821113586, + "learning_rate": 1.3562569180958933e-05, + "loss": 0.4504, + "step": 28037 + }, + { + "epoch": 0.7698517298187809, + "grad_norm": 0.36948099732398987, + "learning_rate": 1.3562165621601142e-05, + "loss": 0.5183, + "step": 28038 + }, + { + "epoch": 0.7698791872597474, + "grad_norm": 0.3877907991409302, + "learning_rate": 1.3561762055598777e-05, + "loss": 0.5111, + "step": 28039 + }, + { + "epoch": 0.7699066447007139, + "grad_norm": 0.39727193117141724, + "learning_rate": 1.356135848295259e-05, + "loss": 0.5755, + "step": 28040 + }, + { + "epoch": 0.7699341021416803, + "grad_norm": 0.4464273452758789, + "learning_rate": 1.3560954903663333e-05, + "loss": 0.4457, + "step": 28041 + }, + { + "epoch": 0.7699615595826469, + "grad_norm": 0.408321738243103, + "learning_rate": 1.3560551317731758e-05, + "loss": 0.5236, + "step": 28042 + }, + { + "epoch": 0.7699890170236134, + "grad_norm": 0.42154422402381897, + "learning_rate": 1.3560147725158617e-05, + "loss": 0.537, + "step": 28043 + }, + { + "epoch": 0.7700164744645799, + "grad_norm": 0.4169590473175049, + "learning_rate": 1.3559744125944665e-05, + "loss": 0.4786, + "step": 28044 + }, + { + "epoch": 0.7700439319055464, + "grad_norm": 0.3673803508281708, + "learning_rate": 1.3559340520090654e-05, + "loss": 0.3718, + "step": 28045 + }, + { + "epoch": 0.7700713893465129, + "grad_norm": 0.3822043836116791, + "learning_rate": 1.355893690759734e-05, + "loss": 0.459, + "step": 28046 + }, + { + "epoch": 0.7700988467874794, + "grad_norm": 0.4061557352542877, + "learning_rate": 1.355853328846547e-05, + "loss": 0.5408, + "step": 28047 + }, + { + "epoch": 0.7701263042284459, + "grad_norm": 0.3583306670188904, + "learning_rate": 1.3558129662695799e-05, + "loss": 0.5139, + "step": 28048 + }, + { + "epoch": 0.7701537616694124, + "grad_norm": 0.34545695781707764, + "learning_rate": 1.3557726030289082e-05, + "loss": 0.4417, + "step": 28049 + }, + { + "epoch": 0.770181219110379, + "grad_norm": 0.4461559057235718, + "learning_rate": 1.355732239124607e-05, + "loss": 0.5377, + "step": 28050 + }, + { + "epoch": 0.7702086765513454, + "grad_norm": 0.36533525586128235, + "learning_rate": 1.3556918745567516e-05, + "loss": 0.4401, + "step": 28051 + }, + { + "epoch": 0.7702361339923119, + "grad_norm": 0.4652427136898041, + "learning_rate": 1.3556515093254174e-05, + "loss": 0.4798, + "step": 28052 + }, + { + "epoch": 0.7702635914332784, + "grad_norm": 0.41947072744369507, + "learning_rate": 1.3556111434306793e-05, + "loss": 0.4792, + "step": 28053 + }, + { + "epoch": 0.7702910488742449, + "grad_norm": 0.3946281671524048, + "learning_rate": 1.3555707768726134e-05, + "loss": 0.4914, + "step": 28054 + }, + { + "epoch": 0.7703185063152114, + "grad_norm": 0.40286222100257874, + "learning_rate": 1.3555304096512943e-05, + "loss": 0.4723, + "step": 28055 + }, + { + "epoch": 0.7703459637561779, + "grad_norm": 0.36907759308815, + "learning_rate": 1.3554900417667973e-05, + "loss": 0.4693, + "step": 28056 + }, + { + "epoch": 0.7703734211971445, + "grad_norm": 0.3980594873428345, + "learning_rate": 1.3554496732191984e-05, + "loss": 0.4944, + "step": 28057 + }, + { + "epoch": 0.7704008786381109, + "grad_norm": 0.36845189332962036, + "learning_rate": 1.3554093040085721e-05, + "loss": 0.5463, + "step": 28058 + }, + { + "epoch": 0.7704283360790775, + "grad_norm": 0.3730827867984772, + "learning_rate": 1.3553689341349941e-05, + "loss": 0.4794, + "step": 28059 + }, + { + "epoch": 0.7704557935200439, + "grad_norm": 0.37356460094451904, + "learning_rate": 1.3553285635985397e-05, + "loss": 0.4539, + "step": 28060 + }, + { + "epoch": 0.7704832509610104, + "grad_norm": 0.38706114888191223, + "learning_rate": 1.3552881923992838e-05, + "loss": 0.4844, + "step": 28061 + }, + { + "epoch": 0.7705107084019769, + "grad_norm": 0.3801008462905884, + "learning_rate": 1.3552478205373024e-05, + "loss": 0.518, + "step": 28062 + }, + { + "epoch": 0.7705381658429434, + "grad_norm": 0.3878426253795624, + "learning_rate": 1.35520744801267e-05, + "loss": 0.4344, + "step": 28063 + }, + { + "epoch": 0.77056562328391, + "grad_norm": 0.40854930877685547, + "learning_rate": 1.3551670748254627e-05, + "loss": 0.5112, + "step": 28064 + }, + { + "epoch": 0.7705930807248764, + "grad_norm": 0.4653995633125305, + "learning_rate": 1.3551267009757553e-05, + "loss": 0.4868, + "step": 28065 + }, + { + "epoch": 0.770620538165843, + "grad_norm": 0.3766760230064392, + "learning_rate": 1.3550863264636233e-05, + "loss": 0.5131, + "step": 28066 + }, + { + "epoch": 0.7706479956068094, + "grad_norm": 0.3708924651145935, + "learning_rate": 1.355045951289142e-05, + "loss": 0.5087, + "step": 28067 + }, + { + "epoch": 0.770675453047776, + "grad_norm": 0.4046022593975067, + "learning_rate": 1.3550055754523867e-05, + "loss": 0.4658, + "step": 28068 + }, + { + "epoch": 0.7707029104887424, + "grad_norm": 0.41350144147872925, + "learning_rate": 1.3549651989534327e-05, + "loss": 0.4975, + "step": 28069 + }, + { + "epoch": 0.770730367929709, + "grad_norm": 0.3963812291622162, + "learning_rate": 1.3549248217923554e-05, + "loss": 0.5011, + "step": 28070 + }, + { + "epoch": 0.7707578253706755, + "grad_norm": 0.3329675495624542, + "learning_rate": 1.3548844439692298e-05, + "loss": 0.3957, + "step": 28071 + }, + { + "epoch": 0.7707852828116419, + "grad_norm": 0.4136483669281006, + "learning_rate": 1.3548440654841316e-05, + "loss": 0.601, + "step": 28072 + }, + { + "epoch": 0.7708127402526085, + "grad_norm": 0.4158021807670593, + "learning_rate": 1.3548036863371359e-05, + "loss": 0.5151, + "step": 28073 + }, + { + "epoch": 0.7708401976935749, + "grad_norm": 0.4483943581581116, + "learning_rate": 1.3547633065283181e-05, + "loss": 0.5004, + "step": 28074 + }, + { + "epoch": 0.7708676551345415, + "grad_norm": 0.401706337928772, + "learning_rate": 1.3547229260577535e-05, + "loss": 0.476, + "step": 28075 + }, + { + "epoch": 0.7708951125755079, + "grad_norm": 0.36227771639823914, + "learning_rate": 1.3546825449255173e-05, + "loss": 0.442, + "step": 28076 + }, + { + "epoch": 0.7709225700164745, + "grad_norm": 0.5395846962928772, + "learning_rate": 1.3546421631316853e-05, + "loss": 0.54, + "step": 28077 + }, + { + "epoch": 0.770950027457441, + "grad_norm": 0.4544094502925873, + "learning_rate": 1.3546017806763322e-05, + "loss": 0.5798, + "step": 28078 + }, + { + "epoch": 0.7709774848984075, + "grad_norm": 0.40414515137672424, + "learning_rate": 1.3545613975595335e-05, + "loss": 0.4753, + "step": 28079 + }, + { + "epoch": 0.771004942339374, + "grad_norm": 0.4329834580421448, + "learning_rate": 1.3545210137813649e-05, + "loss": 0.467, + "step": 28080 + }, + { + "epoch": 0.7710323997803404, + "grad_norm": 0.4618852734565735, + "learning_rate": 1.3544806293419016e-05, + "loss": 0.4782, + "step": 28081 + }, + { + "epoch": 0.771059857221307, + "grad_norm": 0.43277621269226074, + "learning_rate": 1.3544402442412184e-05, + "loss": 0.5309, + "step": 28082 + }, + { + "epoch": 0.7710873146622734, + "grad_norm": 0.5024019479751587, + "learning_rate": 1.3543998584793912e-05, + "loss": 0.5312, + "step": 28083 + }, + { + "epoch": 0.77111477210324, + "grad_norm": 0.39985841512680054, + "learning_rate": 1.354359472056495e-05, + "loss": 0.495, + "step": 28084 + }, + { + "epoch": 0.7711422295442065, + "grad_norm": 0.4453759491443634, + "learning_rate": 1.3543190849726053e-05, + "loss": 0.5328, + "step": 28085 + }, + { + "epoch": 0.771169686985173, + "grad_norm": 0.3726481795310974, + "learning_rate": 1.3542786972277977e-05, + "loss": 0.5139, + "step": 28086 + }, + { + "epoch": 0.7711971444261395, + "grad_norm": 0.3856644034385681, + "learning_rate": 1.354238308822147e-05, + "loss": 0.472, + "step": 28087 + }, + { + "epoch": 0.771224601867106, + "grad_norm": 0.3621286451816559, + "learning_rate": 1.3541979197557288e-05, + "loss": 0.5403, + "step": 28088 + }, + { + "epoch": 0.7712520593080725, + "grad_norm": 0.37487852573394775, + "learning_rate": 1.3541575300286184e-05, + "loss": 0.4453, + "step": 28089 + }, + { + "epoch": 0.7712795167490389, + "grad_norm": 0.35984012484550476, + "learning_rate": 1.3541171396408915e-05, + "loss": 0.4301, + "step": 28090 + }, + { + "epoch": 0.7713069741900055, + "grad_norm": 0.42794057726860046, + "learning_rate": 1.354076748592623e-05, + "loss": 0.6026, + "step": 28091 + }, + { + "epoch": 0.771334431630972, + "grad_norm": 0.36211958527565, + "learning_rate": 1.3540363568838881e-05, + "loss": 0.5188, + "step": 28092 + }, + { + "epoch": 0.7713618890719385, + "grad_norm": 0.3773888349533081, + "learning_rate": 1.3539959645147627e-05, + "loss": 0.5461, + "step": 28093 + }, + { + "epoch": 0.771389346512905, + "grad_norm": 0.4743172526359558, + "learning_rate": 1.3539555714853214e-05, + "loss": 0.5139, + "step": 28094 + }, + { + "epoch": 0.7714168039538715, + "grad_norm": 0.556021511554718, + "learning_rate": 1.3539151777956406e-05, + "loss": 0.518, + "step": 28095 + }, + { + "epoch": 0.771444261394838, + "grad_norm": 0.4232417047023773, + "learning_rate": 1.3538747834457945e-05, + "loss": 0.5706, + "step": 28096 + }, + { + "epoch": 0.7714717188358045, + "grad_norm": 0.3846933841705322, + "learning_rate": 1.353834388435859e-05, + "loss": 0.4586, + "step": 28097 + }, + { + "epoch": 0.771499176276771, + "grad_norm": 0.4053516983985901, + "learning_rate": 1.3537939927659101e-05, + "loss": 0.5699, + "step": 28098 + }, + { + "epoch": 0.7715266337177376, + "grad_norm": 0.44594940543174744, + "learning_rate": 1.3537535964360219e-05, + "loss": 0.507, + "step": 28099 + }, + { + "epoch": 0.771554091158704, + "grad_norm": 0.4442480504512787, + "learning_rate": 1.3537131994462704e-05, + "loss": 0.4994, + "step": 28100 + }, + { + "epoch": 0.7715815485996705, + "grad_norm": 0.8298069834709167, + "learning_rate": 1.353672801796731e-05, + "loss": 0.5591, + "step": 28101 + }, + { + "epoch": 0.771609006040637, + "grad_norm": 0.44732266664505005, + "learning_rate": 1.3536324034874788e-05, + "loss": 0.5281, + "step": 28102 + }, + { + "epoch": 0.7716364634816035, + "grad_norm": 0.30889490246772766, + "learning_rate": 1.3535920045185896e-05, + "loss": 0.4321, + "step": 28103 + }, + { + "epoch": 0.77166392092257, + "grad_norm": 0.35455989837646484, + "learning_rate": 1.353551604890138e-05, + "loss": 0.4705, + "step": 28104 + }, + { + "epoch": 0.7716913783635365, + "grad_norm": 0.38841819763183594, + "learning_rate": 1.3535112046022003e-05, + "loss": 0.4847, + "step": 28105 + }, + { + "epoch": 0.7717188358045031, + "grad_norm": 0.3943098485469818, + "learning_rate": 1.3534708036548512e-05, + "loss": 0.5507, + "step": 28106 + }, + { + "epoch": 0.7717462932454695, + "grad_norm": 0.39878880977630615, + "learning_rate": 1.353430402048166e-05, + "loss": 0.4441, + "step": 28107 + }, + { + "epoch": 0.771773750686436, + "grad_norm": 0.38307464122772217, + "learning_rate": 1.3533899997822206e-05, + "loss": 0.5053, + "step": 28108 + }, + { + "epoch": 0.7718012081274025, + "grad_norm": 0.39007630944252014, + "learning_rate": 1.3533495968570898e-05, + "loss": 0.4688, + "step": 28109 + }, + { + "epoch": 0.771828665568369, + "grad_norm": 0.5208595395088196, + "learning_rate": 1.3533091932728495e-05, + "loss": 0.5065, + "step": 28110 + }, + { + "epoch": 0.7718561230093355, + "grad_norm": 0.3612220287322998, + "learning_rate": 1.3532687890295746e-05, + "loss": 0.4168, + "step": 28111 + }, + { + "epoch": 0.771883580450302, + "grad_norm": 0.3601859509944916, + "learning_rate": 1.3532283841273409e-05, + "loss": 0.4512, + "step": 28112 + }, + { + "epoch": 0.7719110378912686, + "grad_norm": 0.5171055197715759, + "learning_rate": 1.3531879785662231e-05, + "loss": 0.6205, + "step": 28113 + }, + { + "epoch": 0.771938495332235, + "grad_norm": 0.360683411359787, + "learning_rate": 1.3531475723462972e-05, + "loss": 0.499, + "step": 28114 + }, + { + "epoch": 0.7719659527732016, + "grad_norm": 0.4912988841533661, + "learning_rate": 1.3531071654676384e-05, + "loss": 0.5138, + "step": 28115 + }, + { + "epoch": 0.771993410214168, + "grad_norm": 0.35627758502960205, + "learning_rate": 1.3530667579303221e-05, + "loss": 0.425, + "step": 28116 + }, + { + "epoch": 0.7720208676551346, + "grad_norm": 0.36523035168647766, + "learning_rate": 1.3530263497344237e-05, + "loss": 0.5413, + "step": 28117 + }, + { + "epoch": 0.772048325096101, + "grad_norm": 0.33623793721199036, + "learning_rate": 1.3529859408800183e-05, + "loss": 0.447, + "step": 28118 + }, + { + "epoch": 0.7720757825370675, + "grad_norm": 0.3778061866760254, + "learning_rate": 1.3529455313671813e-05, + "loss": 0.5479, + "step": 28119 + }, + { + "epoch": 0.7721032399780341, + "grad_norm": 0.42022591829299927, + "learning_rate": 1.3529051211959886e-05, + "loss": 0.5087, + "step": 28120 + }, + { + "epoch": 0.7721306974190005, + "grad_norm": 0.5833909511566162, + "learning_rate": 1.3528647103665149e-05, + "loss": 0.5479, + "step": 28121 + }, + { + "epoch": 0.7721581548599671, + "grad_norm": 0.424650639295578, + "learning_rate": 1.3528242988788363e-05, + "loss": 0.5811, + "step": 28122 + }, + { + "epoch": 0.7721856123009335, + "grad_norm": 0.44501209259033203, + "learning_rate": 1.3527838867330272e-05, + "loss": 0.466, + "step": 28123 + }, + { + "epoch": 0.7722130697419001, + "grad_norm": 0.380220890045166, + "learning_rate": 1.352743473929164e-05, + "loss": 0.4473, + "step": 28124 + }, + { + "epoch": 0.7722405271828665, + "grad_norm": 0.3745841681957245, + "learning_rate": 1.3527030604673217e-05, + "loss": 0.5228, + "step": 28125 + }, + { + "epoch": 0.7722679846238331, + "grad_norm": 0.37489089369773865, + "learning_rate": 1.3526626463475752e-05, + "loss": 0.5132, + "step": 28126 + }, + { + "epoch": 0.7722954420647996, + "grad_norm": 0.3623855710029602, + "learning_rate": 1.3526222315700006e-05, + "loss": 0.4989, + "step": 28127 + }, + { + "epoch": 0.772322899505766, + "grad_norm": 0.3706584572792053, + "learning_rate": 1.3525818161346728e-05, + "loss": 0.5058, + "step": 28128 + }, + { + "epoch": 0.7723503569467326, + "grad_norm": 0.35792964696884155, + "learning_rate": 1.3525414000416677e-05, + "loss": 0.4673, + "step": 28129 + }, + { + "epoch": 0.772377814387699, + "grad_norm": 0.39853012561798096, + "learning_rate": 1.3525009832910602e-05, + "loss": 0.4672, + "step": 28130 + }, + { + "epoch": 0.7724052718286656, + "grad_norm": 0.3813755214214325, + "learning_rate": 1.3524605658829258e-05, + "loss": 0.4898, + "step": 28131 + }, + { + "epoch": 0.772432729269632, + "grad_norm": 0.40334632992744446, + "learning_rate": 1.3524201478173402e-05, + "loss": 0.5035, + "step": 28132 + }, + { + "epoch": 0.7724601867105986, + "grad_norm": 0.4218386113643646, + "learning_rate": 1.352379729094378e-05, + "loss": 0.5235, + "step": 28133 + }, + { + "epoch": 0.7724876441515651, + "grad_norm": 0.4870929419994354, + "learning_rate": 1.3523393097141158e-05, + "loss": 0.5413, + "step": 28134 + }, + { + "epoch": 0.7725151015925316, + "grad_norm": 0.4026777744293213, + "learning_rate": 1.352298889676628e-05, + "loss": 0.533, + "step": 28135 + }, + { + "epoch": 0.7725425590334981, + "grad_norm": 0.5732431411743164, + "learning_rate": 1.3522584689819901e-05, + "loss": 0.4565, + "step": 28136 + }, + { + "epoch": 0.7725700164744645, + "grad_norm": 0.4227655827999115, + "learning_rate": 1.3522180476302782e-05, + "loss": 0.4378, + "step": 28137 + }, + { + "epoch": 0.7725974739154311, + "grad_norm": 0.4186539947986603, + "learning_rate": 1.3521776256215668e-05, + "loss": 0.5147, + "step": 28138 + }, + { + "epoch": 0.7726249313563975, + "grad_norm": 0.34855538606643677, + "learning_rate": 1.3521372029559321e-05, + "loss": 0.4124, + "step": 28139 + }, + { + "epoch": 0.7726523887973641, + "grad_norm": 0.4212324619293213, + "learning_rate": 1.352096779633449e-05, + "loss": 0.4771, + "step": 28140 + }, + { + "epoch": 0.7726798462383306, + "grad_norm": 0.3861721158027649, + "learning_rate": 1.352056355654193e-05, + "loss": 0.5069, + "step": 28141 + }, + { + "epoch": 0.7727073036792971, + "grad_norm": 0.4387202262878418, + "learning_rate": 1.3520159310182394e-05, + "loss": 0.5347, + "step": 28142 + }, + { + "epoch": 0.7727347611202636, + "grad_norm": 0.4261201024055481, + "learning_rate": 1.3519755057256638e-05, + "loss": 0.4822, + "step": 28143 + }, + { + "epoch": 0.7727622185612301, + "grad_norm": 0.38961097598075867, + "learning_rate": 1.3519350797765418e-05, + "loss": 0.534, + "step": 28144 + }, + { + "epoch": 0.7727896760021966, + "grad_norm": 0.38125547766685486, + "learning_rate": 1.3518946531709482e-05, + "loss": 0.5085, + "step": 28145 + }, + { + "epoch": 0.772817133443163, + "grad_norm": 0.3463003635406494, + "learning_rate": 1.351854225908959e-05, + "loss": 0.4389, + "step": 28146 + }, + { + "epoch": 0.7728445908841296, + "grad_norm": 0.3934618830680847, + "learning_rate": 1.3518137979906494e-05, + "loss": 0.4912, + "step": 28147 + }, + { + "epoch": 0.7728720483250962, + "grad_norm": 0.37373045086860657, + "learning_rate": 1.3517733694160949e-05, + "loss": 0.5068, + "step": 28148 + }, + { + "epoch": 0.7728995057660626, + "grad_norm": 0.4456433057785034, + "learning_rate": 1.3517329401853702e-05, + "loss": 0.4188, + "step": 28149 + }, + { + "epoch": 0.7729269632070291, + "grad_norm": 0.3595468997955322, + "learning_rate": 1.351692510298552e-05, + "loss": 0.5505, + "step": 28150 + }, + { + "epoch": 0.7729544206479956, + "grad_norm": 0.41618210077285767, + "learning_rate": 1.3516520797557144e-05, + "loss": 0.5439, + "step": 28151 + }, + { + "epoch": 0.7729818780889621, + "grad_norm": 0.5136029720306396, + "learning_rate": 1.3516116485569338e-05, + "loss": 0.436, + "step": 28152 + }, + { + "epoch": 0.7730093355299286, + "grad_norm": 0.33721694350242615, + "learning_rate": 1.3515712167022852e-05, + "loss": 0.4504, + "step": 28153 + }, + { + "epoch": 0.7730367929708951, + "grad_norm": 0.41195061802864075, + "learning_rate": 1.3515307841918439e-05, + "loss": 0.4946, + "step": 28154 + }, + { + "epoch": 0.7730642504118617, + "grad_norm": 0.4286714196205139, + "learning_rate": 1.3514903510256859e-05, + "loss": 0.5263, + "step": 28155 + }, + { + "epoch": 0.7730917078528281, + "grad_norm": 0.3944145143032074, + "learning_rate": 1.3514499172038858e-05, + "loss": 0.4069, + "step": 28156 + }, + { + "epoch": 0.7731191652937947, + "grad_norm": 0.3621658384799957, + "learning_rate": 1.3514094827265196e-05, + "loss": 0.5327, + "step": 28157 + }, + { + "epoch": 0.7731466227347611, + "grad_norm": 0.38595983386039734, + "learning_rate": 1.3513690475936625e-05, + "loss": 0.4784, + "step": 28158 + }, + { + "epoch": 0.7731740801757276, + "grad_norm": 0.38389724493026733, + "learning_rate": 1.35132861180539e-05, + "loss": 0.4853, + "step": 28159 + }, + { + "epoch": 0.7732015376166941, + "grad_norm": 0.4450489580631256, + "learning_rate": 1.3512881753617777e-05, + "loss": 0.5541, + "step": 28160 + }, + { + "epoch": 0.7732289950576606, + "grad_norm": 1.0994484424591064, + "learning_rate": 1.3512477382629008e-05, + "loss": 0.4884, + "step": 28161 + }, + { + "epoch": 0.7732564524986272, + "grad_norm": 0.437832772731781, + "learning_rate": 1.3512073005088344e-05, + "loss": 0.5154, + "step": 28162 + }, + { + "epoch": 0.7732839099395936, + "grad_norm": 0.34799882769584656, + "learning_rate": 1.3511668620996546e-05, + "loss": 0.4029, + "step": 28163 + }, + { + "epoch": 0.7733113673805602, + "grad_norm": 0.36387404799461365, + "learning_rate": 1.3511264230354364e-05, + "loss": 0.4298, + "step": 28164 + }, + { + "epoch": 0.7733388248215266, + "grad_norm": 0.38041022419929504, + "learning_rate": 1.3510859833162555e-05, + "loss": 0.519, + "step": 28165 + }, + { + "epoch": 0.7733662822624932, + "grad_norm": 0.3908531665802002, + "learning_rate": 1.3510455429421871e-05, + "loss": 0.4843, + "step": 28166 + }, + { + "epoch": 0.7733937397034596, + "grad_norm": 0.4574621915817261, + "learning_rate": 1.3510051019133065e-05, + "loss": 0.4701, + "step": 28167 + }, + { + "epoch": 0.7734211971444261, + "grad_norm": 0.4848713278770447, + "learning_rate": 1.3509646602296898e-05, + "loss": 0.59, + "step": 28168 + }, + { + "epoch": 0.7734486545853927, + "grad_norm": 0.39097860455513, + "learning_rate": 1.3509242178914117e-05, + "loss": 0.4952, + "step": 28169 + }, + { + "epoch": 0.7734761120263591, + "grad_norm": 0.4004112482070923, + "learning_rate": 1.350883774898548e-05, + "loss": 0.491, + "step": 28170 + }, + { + "epoch": 0.7735035694673257, + "grad_norm": 0.4104389548301697, + "learning_rate": 1.3508433312511742e-05, + "loss": 0.5012, + "step": 28171 + }, + { + "epoch": 0.7735310269082921, + "grad_norm": 0.3916216790676117, + "learning_rate": 1.3508028869493654e-05, + "loss": 0.5004, + "step": 28172 + }, + { + "epoch": 0.7735584843492587, + "grad_norm": 0.4049875736236572, + "learning_rate": 1.3507624419931972e-05, + "loss": 0.5859, + "step": 28173 + }, + { + "epoch": 0.7735859417902251, + "grad_norm": 0.40612444281578064, + "learning_rate": 1.3507219963827454e-05, + "loss": 0.5817, + "step": 28174 + }, + { + "epoch": 0.7736133992311917, + "grad_norm": 0.4054552912712097, + "learning_rate": 1.3506815501180849e-05, + "loss": 0.5329, + "step": 28175 + }, + { + "epoch": 0.7736408566721582, + "grad_norm": 0.3710959851741791, + "learning_rate": 1.3506411031992914e-05, + "loss": 0.515, + "step": 28176 + }, + { + "epoch": 0.7736683141131246, + "grad_norm": 0.3760150074958801, + "learning_rate": 1.3506006556264404e-05, + "loss": 0.4658, + "step": 28177 + }, + { + "epoch": 0.7736957715540912, + "grad_norm": 0.3800768554210663, + "learning_rate": 1.3505602073996074e-05, + "loss": 0.5069, + "step": 28178 + }, + { + "epoch": 0.7737232289950576, + "grad_norm": 0.38846415281295776, + "learning_rate": 1.3505197585188676e-05, + "loss": 0.5864, + "step": 28179 + }, + { + "epoch": 0.7737506864360242, + "grad_norm": 0.3637012243270874, + "learning_rate": 1.3504793089842964e-05, + "loss": 0.4614, + "step": 28180 + }, + { + "epoch": 0.7737781438769906, + "grad_norm": 0.46601203083992004, + "learning_rate": 1.3504388587959695e-05, + "loss": 0.609, + "step": 28181 + }, + { + "epoch": 0.7738056013179572, + "grad_norm": 0.3705854117870331, + "learning_rate": 1.3503984079539625e-05, + "loss": 0.3897, + "step": 28182 + }, + { + "epoch": 0.7738330587589237, + "grad_norm": 0.39473235607147217, + "learning_rate": 1.3503579564583507e-05, + "loss": 0.4804, + "step": 28183 + }, + { + "epoch": 0.7738605161998902, + "grad_norm": 0.36216500401496887, + "learning_rate": 1.3503175043092091e-05, + "loss": 0.5223, + "step": 28184 + }, + { + "epoch": 0.7738879736408567, + "grad_norm": 0.4263075292110443, + "learning_rate": 1.3502770515066138e-05, + "loss": 0.5312, + "step": 28185 + }, + { + "epoch": 0.7739154310818231, + "grad_norm": 0.45693960785865784, + "learning_rate": 1.35023659805064e-05, + "loss": 0.5474, + "step": 28186 + }, + { + "epoch": 0.7739428885227897, + "grad_norm": 0.41497522592544556, + "learning_rate": 1.3501961439413629e-05, + "loss": 0.4826, + "step": 28187 + }, + { + "epoch": 0.7739703459637561, + "grad_norm": 0.39683568477630615, + "learning_rate": 1.3501556891788585e-05, + "loss": 0.5516, + "step": 28188 + }, + { + "epoch": 0.7739978034047227, + "grad_norm": 0.3927954137325287, + "learning_rate": 1.3501152337632023e-05, + "loss": 0.4248, + "step": 28189 + }, + { + "epoch": 0.7740252608456892, + "grad_norm": 0.37162286043167114, + "learning_rate": 1.3500747776944687e-05, + "loss": 0.5258, + "step": 28190 + }, + { + "epoch": 0.7740527182866557, + "grad_norm": 0.362943559885025, + "learning_rate": 1.3500343209727344e-05, + "loss": 0.493, + "step": 28191 + }, + { + "epoch": 0.7740801757276222, + "grad_norm": 0.38695845007896423, + "learning_rate": 1.3499938635980743e-05, + "loss": 0.4953, + "step": 28192 + }, + { + "epoch": 0.7741076331685887, + "grad_norm": 0.3890756070613861, + "learning_rate": 1.3499534055705638e-05, + "loss": 0.5148, + "step": 28193 + }, + { + "epoch": 0.7741350906095552, + "grad_norm": 0.3818354606628418, + "learning_rate": 1.3499129468902788e-05, + "loss": 0.498, + "step": 28194 + }, + { + "epoch": 0.7741625480505216, + "grad_norm": 0.3655965328216553, + "learning_rate": 1.3498724875572939e-05, + "loss": 0.4643, + "step": 28195 + }, + { + "epoch": 0.7741900054914882, + "grad_norm": 0.5055716633796692, + "learning_rate": 1.3498320275716857e-05, + "loss": 0.3904, + "step": 28196 + }, + { + "epoch": 0.7742174629324547, + "grad_norm": 0.3510594367980957, + "learning_rate": 1.349791566933529e-05, + "loss": 0.4644, + "step": 28197 + }, + { + "epoch": 0.7742449203734212, + "grad_norm": 0.41722628474235535, + "learning_rate": 1.3497511056428992e-05, + "loss": 0.5151, + "step": 28198 + }, + { + "epoch": 0.7742723778143877, + "grad_norm": 0.37877893447875977, + "learning_rate": 1.3497106436998722e-05, + "loss": 0.4706, + "step": 28199 + }, + { + "epoch": 0.7742998352553542, + "grad_norm": 0.3950786292552948, + "learning_rate": 1.349670181104523e-05, + "loss": 0.4981, + "step": 28200 + }, + { + "epoch": 0.7743272926963207, + "grad_norm": 0.39173269271850586, + "learning_rate": 1.3496297178569275e-05, + "loss": 0.4826, + "step": 28201 + }, + { + "epoch": 0.7743547501372872, + "grad_norm": 0.396249383687973, + "learning_rate": 1.3495892539571609e-05, + "loss": 0.4395, + "step": 28202 + }, + { + "epoch": 0.7743822075782537, + "grad_norm": 0.313416451215744, + "learning_rate": 1.3495487894052985e-05, + "loss": 0.4041, + "step": 28203 + }, + { + "epoch": 0.7744096650192203, + "grad_norm": 0.45207086205482483, + "learning_rate": 1.3495083242014166e-05, + "loss": 0.441, + "step": 28204 + }, + { + "epoch": 0.7744371224601867, + "grad_norm": 0.4104832708835602, + "learning_rate": 1.3494678583455896e-05, + "loss": 0.5408, + "step": 28205 + }, + { + "epoch": 0.7744645799011532, + "grad_norm": 0.5234546661376953, + "learning_rate": 1.3494273918378939e-05, + "loss": 0.4711, + "step": 28206 + }, + { + "epoch": 0.7744920373421197, + "grad_norm": 0.371689110994339, + "learning_rate": 1.3493869246784045e-05, + "loss": 0.4957, + "step": 28207 + }, + { + "epoch": 0.7745194947830862, + "grad_norm": 0.3746110498905182, + "learning_rate": 1.3493464568671968e-05, + "loss": 0.3972, + "step": 28208 + }, + { + "epoch": 0.7745469522240527, + "grad_norm": 0.3617337644100189, + "learning_rate": 1.3493059884043467e-05, + "loss": 0.498, + "step": 28209 + }, + { + "epoch": 0.7745744096650192, + "grad_norm": 0.4012305736541748, + "learning_rate": 1.3492655192899293e-05, + "loss": 0.5314, + "step": 28210 + }, + { + "epoch": 0.7746018671059858, + "grad_norm": 0.4669276773929596, + "learning_rate": 1.3492250495240203e-05, + "loss": 0.5374, + "step": 28211 + }, + { + "epoch": 0.7746293245469522, + "grad_norm": 0.38123661279678345, + "learning_rate": 1.3491845791066951e-05, + "loss": 0.5324, + "step": 28212 + }, + { + "epoch": 0.7746567819879188, + "grad_norm": 0.3987761437892914, + "learning_rate": 1.3491441080380289e-05, + "loss": 0.5425, + "step": 28213 + }, + { + "epoch": 0.7746842394288852, + "grad_norm": 0.3654307425022125, + "learning_rate": 1.349103636318098e-05, + "loss": 0.4682, + "step": 28214 + }, + { + "epoch": 0.7747116968698518, + "grad_norm": 0.36978432536125183, + "learning_rate": 1.3490631639469774e-05, + "loss": 0.4801, + "step": 28215 + }, + { + "epoch": 0.7747391543108182, + "grad_norm": 0.3881734609603882, + "learning_rate": 1.3490226909247422e-05, + "loss": 0.5447, + "step": 28216 + }, + { + "epoch": 0.7747666117517847, + "grad_norm": 0.41216522455215454, + "learning_rate": 1.3489822172514683e-05, + "loss": 0.5168, + "step": 28217 + }, + { + "epoch": 0.7747940691927513, + "grad_norm": 0.3619668483734131, + "learning_rate": 1.3489417429272313e-05, + "loss": 0.5075, + "step": 28218 + }, + { + "epoch": 0.7748215266337177, + "grad_norm": 0.3952668309211731, + "learning_rate": 1.3489012679521067e-05, + "loss": 0.4691, + "step": 28219 + }, + { + "epoch": 0.7748489840746843, + "grad_norm": 0.40763992071151733, + "learning_rate": 1.34886079232617e-05, + "loss": 0.5518, + "step": 28220 + }, + { + "epoch": 0.7748764415156507, + "grad_norm": 0.37600114941596985, + "learning_rate": 1.3488203160494963e-05, + "loss": 0.4482, + "step": 28221 + }, + { + "epoch": 0.7749038989566173, + "grad_norm": 0.3524140417575836, + "learning_rate": 1.3487798391221616e-05, + "loss": 0.4347, + "step": 28222 + }, + { + "epoch": 0.7749313563975837, + "grad_norm": 0.3742995262145996, + "learning_rate": 1.348739361544241e-05, + "loss": 0.4969, + "step": 28223 + }, + { + "epoch": 0.7749588138385503, + "grad_norm": 0.4089823067188263, + "learning_rate": 1.34869888331581e-05, + "loss": 0.5506, + "step": 28224 + }, + { + "epoch": 0.7749862712795168, + "grad_norm": 0.4307990074157715, + "learning_rate": 1.3486584044369448e-05, + "loss": 0.5623, + "step": 28225 + }, + { + "epoch": 0.7750137287204832, + "grad_norm": 0.3851928412914276, + "learning_rate": 1.34861792490772e-05, + "loss": 0.4861, + "step": 28226 + }, + { + "epoch": 0.7750411861614498, + "grad_norm": 0.39445430040359497, + "learning_rate": 1.3485774447282117e-05, + "loss": 0.4747, + "step": 28227 + }, + { + "epoch": 0.7750686436024162, + "grad_norm": 0.38409045338630676, + "learning_rate": 1.348536963898495e-05, + "loss": 0.4476, + "step": 28228 + }, + { + "epoch": 0.7750961010433828, + "grad_norm": 0.3743935823440552, + "learning_rate": 1.348496482418646e-05, + "loss": 0.5099, + "step": 28229 + }, + { + "epoch": 0.7751235584843492, + "grad_norm": 0.3843429684638977, + "learning_rate": 1.3484560002887395e-05, + "loss": 0.5027, + "step": 28230 + }, + { + "epoch": 0.7751510159253158, + "grad_norm": 0.3835598826408386, + "learning_rate": 1.3484155175088514e-05, + "loss": 0.5537, + "step": 28231 + }, + { + "epoch": 0.7751784733662823, + "grad_norm": 0.3626602292060852, + "learning_rate": 1.3483750340790573e-05, + "loss": 0.4398, + "step": 28232 + }, + { + "epoch": 0.7752059308072488, + "grad_norm": 0.3957691490650177, + "learning_rate": 1.3483345499994324e-05, + "loss": 0.4528, + "step": 28233 + }, + { + "epoch": 0.7752333882482153, + "grad_norm": 0.412028968334198, + "learning_rate": 1.3482940652700525e-05, + "loss": 0.4431, + "step": 28234 + }, + { + "epoch": 0.7752608456891817, + "grad_norm": 0.3249298632144928, + "learning_rate": 1.348253579890993e-05, + "loss": 0.4293, + "step": 28235 + }, + { + "epoch": 0.7752883031301483, + "grad_norm": 0.37951064109802246, + "learning_rate": 1.3482130938623292e-05, + "loss": 0.4358, + "step": 28236 + }, + { + "epoch": 0.7753157605711147, + "grad_norm": 0.3515189588069916, + "learning_rate": 1.3481726071841372e-05, + "loss": 0.5023, + "step": 28237 + }, + { + "epoch": 0.7753432180120813, + "grad_norm": 0.39141878485679626, + "learning_rate": 1.3481321198564921e-05, + "loss": 0.5116, + "step": 28238 + }, + { + "epoch": 0.7753706754530478, + "grad_norm": 0.3415139615535736, + "learning_rate": 1.3480916318794692e-05, + "loss": 0.444, + "step": 28239 + }, + { + "epoch": 0.7753981328940143, + "grad_norm": 0.3789421021938324, + "learning_rate": 1.3480511432531444e-05, + "loss": 0.4952, + "step": 28240 + }, + { + "epoch": 0.7754255903349808, + "grad_norm": 0.3773335814476013, + "learning_rate": 1.3480106539775935e-05, + "loss": 0.5677, + "step": 28241 + }, + { + "epoch": 0.7754530477759473, + "grad_norm": 0.3818061053752899, + "learning_rate": 1.3479701640528914e-05, + "loss": 0.5257, + "step": 28242 + }, + { + "epoch": 0.7754805052169138, + "grad_norm": 0.4140602946281433, + "learning_rate": 1.3479296734791136e-05, + "loss": 0.4812, + "step": 28243 + }, + { + "epoch": 0.7755079626578802, + "grad_norm": 0.37105098366737366, + "learning_rate": 1.3478891822563362e-05, + "loss": 0.4931, + "step": 28244 + }, + { + "epoch": 0.7755354200988468, + "grad_norm": 0.3386287987232208, + "learning_rate": 1.3478486903846344e-05, + "loss": 0.4403, + "step": 28245 + }, + { + "epoch": 0.7755628775398133, + "grad_norm": 0.3610832691192627, + "learning_rate": 1.347808197864084e-05, + "loss": 0.4812, + "step": 28246 + }, + { + "epoch": 0.7755903349807798, + "grad_norm": 0.38271254301071167, + "learning_rate": 1.34776770469476e-05, + "loss": 0.5111, + "step": 28247 + }, + { + "epoch": 0.7756177924217463, + "grad_norm": 0.3500004708766937, + "learning_rate": 1.3477272108767383e-05, + "loss": 0.4625, + "step": 28248 + }, + { + "epoch": 0.7756452498627128, + "grad_norm": 0.38372674584388733, + "learning_rate": 1.3476867164100944e-05, + "loss": 0.5782, + "step": 28249 + }, + { + "epoch": 0.7756727073036793, + "grad_norm": 0.436383992433548, + "learning_rate": 1.347646221294904e-05, + "loss": 0.4971, + "step": 28250 + }, + { + "epoch": 0.7757001647446458, + "grad_norm": 0.4247516691684723, + "learning_rate": 1.3476057255312422e-05, + "loss": 0.4993, + "step": 28251 + }, + { + "epoch": 0.7757276221856123, + "grad_norm": 0.3895137310028076, + "learning_rate": 1.3475652291191847e-05, + "loss": 0.4898, + "step": 28252 + }, + { + "epoch": 0.7757550796265787, + "grad_norm": 0.38318535685539246, + "learning_rate": 1.3475247320588074e-05, + "loss": 0.549, + "step": 28253 + }, + { + "epoch": 0.7757825370675453, + "grad_norm": 0.743206799030304, + "learning_rate": 1.3474842343501855e-05, + "loss": 0.4576, + "step": 28254 + }, + { + "epoch": 0.7758099945085118, + "grad_norm": 0.4293211102485657, + "learning_rate": 1.3474437359933943e-05, + "loss": 0.4711, + "step": 28255 + }, + { + "epoch": 0.7758374519494783, + "grad_norm": 0.3956354856491089, + "learning_rate": 1.34740323698851e-05, + "loss": 0.4098, + "step": 28256 + }, + { + "epoch": 0.7758649093904448, + "grad_norm": 0.41182565689086914, + "learning_rate": 1.3473627373356075e-05, + "loss": 0.5273, + "step": 28257 + }, + { + "epoch": 0.7758923668314113, + "grad_norm": 0.49123942852020264, + "learning_rate": 1.3473222370347628e-05, + "loss": 0.5356, + "step": 28258 + }, + { + "epoch": 0.7759198242723778, + "grad_norm": 0.38723188638687134, + "learning_rate": 1.3472817360860512e-05, + "loss": 0.5146, + "step": 28259 + }, + { + "epoch": 0.7759472817133443, + "grad_norm": 0.37261393666267395, + "learning_rate": 1.3472412344895483e-05, + "loss": 0.4551, + "step": 28260 + }, + { + "epoch": 0.7759747391543108, + "grad_norm": 0.40633153915405273, + "learning_rate": 1.3472007322453298e-05, + "loss": 0.5805, + "step": 28261 + }, + { + "epoch": 0.7760021965952774, + "grad_norm": 0.4587074816226959, + "learning_rate": 1.347160229353471e-05, + "loss": 0.4478, + "step": 28262 + }, + { + "epoch": 0.7760296540362438, + "grad_norm": 0.5762916803359985, + "learning_rate": 1.3471197258140476e-05, + "loss": 0.4173, + "step": 28263 + }, + { + "epoch": 0.7760571114772103, + "grad_norm": 0.37404805421829224, + "learning_rate": 1.3470792216271353e-05, + "loss": 0.4257, + "step": 28264 + }, + { + "epoch": 0.7760845689181768, + "grad_norm": 0.37844759225845337, + "learning_rate": 1.3470387167928092e-05, + "loss": 0.4575, + "step": 28265 + }, + { + "epoch": 0.7761120263591433, + "grad_norm": 0.39577963948249817, + "learning_rate": 1.3469982113111452e-05, + "loss": 0.4269, + "step": 28266 + }, + { + "epoch": 0.7761394838001098, + "grad_norm": 0.35193756222724915, + "learning_rate": 1.346957705182219e-05, + "loss": 0.4114, + "step": 28267 + }, + { + "epoch": 0.7761669412410763, + "grad_norm": 0.4198407232761383, + "learning_rate": 1.3469171984061057e-05, + "loss": 0.514, + "step": 28268 + }, + { + "epoch": 0.7761943986820429, + "grad_norm": 0.3921268880367279, + "learning_rate": 1.3468766909828811e-05, + "loss": 0.4815, + "step": 28269 + }, + { + "epoch": 0.7762218561230093, + "grad_norm": 0.49732667207717896, + "learning_rate": 1.3468361829126208e-05, + "loss": 0.4579, + "step": 28270 + }, + { + "epoch": 0.7762493135639759, + "grad_norm": 0.47012007236480713, + "learning_rate": 1.3467956741954007e-05, + "loss": 0.4738, + "step": 28271 + }, + { + "epoch": 0.7762767710049423, + "grad_norm": 0.3518120348453522, + "learning_rate": 1.3467551648312955e-05, + "loss": 0.4468, + "step": 28272 + }, + { + "epoch": 0.7763042284459089, + "grad_norm": 0.3907483220100403, + "learning_rate": 1.3467146548203814e-05, + "loss": 0.4959, + "step": 28273 + }, + { + "epoch": 0.7763316858868753, + "grad_norm": 0.39827674627304077, + "learning_rate": 1.3466741441627337e-05, + "loss": 0.425, + "step": 28274 + }, + { + "epoch": 0.7763591433278418, + "grad_norm": 1.072135329246521, + "learning_rate": 1.3466336328584282e-05, + "loss": 0.6105, + "step": 28275 + }, + { + "epoch": 0.7763866007688084, + "grad_norm": 0.36219584941864014, + "learning_rate": 1.3465931209075405e-05, + "loss": 0.4848, + "step": 28276 + }, + { + "epoch": 0.7764140582097748, + "grad_norm": 0.4321988523006439, + "learning_rate": 1.3465526083101458e-05, + "loss": 0.5018, + "step": 28277 + }, + { + "epoch": 0.7764415156507414, + "grad_norm": 0.34359318017959595, + "learning_rate": 1.3465120950663197e-05, + "loss": 0.4894, + "step": 28278 + }, + { + "epoch": 0.7764689730917078, + "grad_norm": 0.4370637834072113, + "learning_rate": 1.3464715811761383e-05, + "loss": 0.4434, + "step": 28279 + }, + { + "epoch": 0.7764964305326744, + "grad_norm": 0.3910772204399109, + "learning_rate": 1.3464310666396768e-05, + "loss": 0.4469, + "step": 28280 + }, + { + "epoch": 0.7765238879736408, + "grad_norm": 0.4678737223148346, + "learning_rate": 1.3463905514570105e-05, + "loss": 0.504, + "step": 28281 + }, + { + "epoch": 0.7765513454146074, + "grad_norm": 0.4155193865299225, + "learning_rate": 1.3463500356282155e-05, + "loss": 0.4867, + "step": 28282 + }, + { + "epoch": 0.7765788028555739, + "grad_norm": 0.4001312553882599, + "learning_rate": 1.3463095191533671e-05, + "loss": 0.4984, + "step": 28283 + }, + { + "epoch": 0.7766062602965403, + "grad_norm": 0.4213049113750458, + "learning_rate": 1.3462690020325408e-05, + "loss": 0.5859, + "step": 28284 + }, + { + "epoch": 0.7766337177375069, + "grad_norm": 0.40920019149780273, + "learning_rate": 1.3462284842658125e-05, + "loss": 0.5104, + "step": 28285 + }, + { + "epoch": 0.7766611751784733, + "grad_norm": 0.36911576986312866, + "learning_rate": 1.3461879658532575e-05, + "loss": 0.4965, + "step": 28286 + }, + { + "epoch": 0.7766886326194399, + "grad_norm": 1.3963522911071777, + "learning_rate": 1.3461474467949513e-05, + "loss": 0.5098, + "step": 28287 + }, + { + "epoch": 0.7767160900604063, + "grad_norm": 0.3688260316848755, + "learning_rate": 1.34610692709097e-05, + "loss": 0.4606, + "step": 28288 + }, + { + "epoch": 0.7767435475013729, + "grad_norm": 0.4914516508579254, + "learning_rate": 1.3460664067413885e-05, + "loss": 0.4688, + "step": 28289 + }, + { + "epoch": 0.7767710049423394, + "grad_norm": 0.344197541475296, + "learning_rate": 1.3460258857462827e-05, + "loss": 0.4299, + "step": 28290 + }, + { + "epoch": 0.7767984623833059, + "grad_norm": 0.35952383279800415, + "learning_rate": 1.3459853641057284e-05, + "loss": 0.4898, + "step": 28291 + }, + { + "epoch": 0.7768259198242724, + "grad_norm": 0.39834654331207275, + "learning_rate": 1.3459448418198004e-05, + "loss": 0.5118, + "step": 28292 + }, + { + "epoch": 0.7768533772652388, + "grad_norm": 0.36288490891456604, + "learning_rate": 1.3459043188885756e-05, + "loss": 0.4582, + "step": 28293 + }, + { + "epoch": 0.7768808347062054, + "grad_norm": 0.3628290593624115, + "learning_rate": 1.3458637953121286e-05, + "loss": 0.5368, + "step": 28294 + }, + { + "epoch": 0.7769082921471718, + "grad_norm": 0.37976381182670593, + "learning_rate": 1.345823271090535e-05, + "loss": 0.4732, + "step": 28295 + }, + { + "epoch": 0.7769357495881384, + "grad_norm": 0.4036215543746948, + "learning_rate": 1.3457827462238709e-05, + "loss": 0.4958, + "step": 28296 + }, + { + "epoch": 0.7769632070291049, + "grad_norm": 0.4164693057537079, + "learning_rate": 1.3457422207122113e-05, + "loss": 0.5987, + "step": 28297 + }, + { + "epoch": 0.7769906644700714, + "grad_norm": 0.36895301938056946, + "learning_rate": 1.3457016945556325e-05, + "loss": 0.4748, + "step": 28298 + }, + { + "epoch": 0.7770181219110379, + "grad_norm": 0.4363538324832916, + "learning_rate": 1.3456611677542096e-05, + "loss": 0.4878, + "step": 28299 + }, + { + "epoch": 0.7770455793520044, + "grad_norm": 0.4383692443370819, + "learning_rate": 1.345620640308018e-05, + "loss": 0.5271, + "step": 28300 + }, + { + "epoch": 0.7770730367929709, + "grad_norm": 0.3817042112350464, + "learning_rate": 1.345580112217134e-05, + "loss": 0.4846, + "step": 28301 + }, + { + "epoch": 0.7771004942339373, + "grad_norm": 0.3546237051486969, + "learning_rate": 1.3455395834816324e-05, + "loss": 0.4466, + "step": 28302 + }, + { + "epoch": 0.7771279516749039, + "grad_norm": 0.3541264235973358, + "learning_rate": 1.3454990541015895e-05, + "loss": 0.5262, + "step": 28303 + }, + { + "epoch": 0.7771554091158704, + "grad_norm": 0.35832759737968445, + "learning_rate": 1.3454585240770803e-05, + "loss": 0.5382, + "step": 28304 + }, + { + "epoch": 0.7771828665568369, + "grad_norm": 0.3685150146484375, + "learning_rate": 1.345417993408181e-05, + "loss": 0.4883, + "step": 28305 + }, + { + "epoch": 0.7772103239978034, + "grad_norm": 0.3387349545955658, + "learning_rate": 1.345377462094967e-05, + "loss": 0.4123, + "step": 28306 + }, + { + "epoch": 0.7772377814387699, + "grad_norm": 0.42654523253440857, + "learning_rate": 1.3453369301375135e-05, + "loss": 0.5131, + "step": 28307 + }, + { + "epoch": 0.7772652388797364, + "grad_norm": 0.35354942083358765, + "learning_rate": 1.3452963975358966e-05, + "loss": 0.5078, + "step": 28308 + }, + { + "epoch": 0.7772926963207029, + "grad_norm": 0.4756500720977783, + "learning_rate": 1.3452558642901914e-05, + "loss": 0.5358, + "step": 28309 + }, + { + "epoch": 0.7773201537616694, + "grad_norm": 0.3659714162349701, + "learning_rate": 1.3452153304004742e-05, + "loss": 0.4581, + "step": 28310 + }, + { + "epoch": 0.777347611202636, + "grad_norm": 0.3754408657550812, + "learning_rate": 1.3451747958668202e-05, + "loss": 0.4697, + "step": 28311 + }, + { + "epoch": 0.7773750686436024, + "grad_norm": 0.34801986813545227, + "learning_rate": 1.3451342606893048e-05, + "loss": 0.482, + "step": 28312 + }, + { + "epoch": 0.777402526084569, + "grad_norm": 0.3841097950935364, + "learning_rate": 1.3450937248680039e-05, + "loss": 0.4947, + "step": 28313 + }, + { + "epoch": 0.7774299835255354, + "grad_norm": 0.3857743442058563, + "learning_rate": 1.345053188402993e-05, + "loss": 0.5047, + "step": 28314 + }, + { + "epoch": 0.7774574409665019, + "grad_norm": 0.3777301013469696, + "learning_rate": 1.345012651294348e-05, + "loss": 0.4525, + "step": 28315 + }, + { + "epoch": 0.7774848984074684, + "grad_norm": 0.4195750653743744, + "learning_rate": 1.3449721135421442e-05, + "loss": 0.5852, + "step": 28316 + }, + { + "epoch": 0.7775123558484349, + "grad_norm": 0.38235345482826233, + "learning_rate": 1.344931575146457e-05, + "loss": 0.5092, + "step": 28317 + }, + { + "epoch": 0.7775398132894015, + "grad_norm": 0.44984009861946106, + "learning_rate": 1.3448910361073628e-05, + "loss": 0.5041, + "step": 28318 + }, + { + "epoch": 0.7775672707303679, + "grad_norm": 0.3625917136669159, + "learning_rate": 1.3448504964249366e-05, + "loss": 0.5533, + "step": 28319 + }, + { + "epoch": 0.7775947281713345, + "grad_norm": 0.44959181547164917, + "learning_rate": 1.344809956099254e-05, + "loss": 0.5733, + "step": 28320 + }, + { + "epoch": 0.7776221856123009, + "grad_norm": 0.41790828108787537, + "learning_rate": 1.344769415130391e-05, + "loss": 0.4825, + "step": 28321 + }, + { + "epoch": 0.7776496430532674, + "grad_norm": 0.4692777991294861, + "learning_rate": 1.344728873518423e-05, + "loss": 0.5568, + "step": 28322 + }, + { + "epoch": 0.7776771004942339, + "grad_norm": 0.9842053055763245, + "learning_rate": 1.3446883312634254e-05, + "loss": 0.4653, + "step": 28323 + }, + { + "epoch": 0.7777045579352004, + "grad_norm": 0.5059345364570618, + "learning_rate": 1.344647788365474e-05, + "loss": 0.5785, + "step": 28324 + }, + { + "epoch": 0.777732015376167, + "grad_norm": 0.3677705228328705, + "learning_rate": 1.3446072448246448e-05, + "loss": 0.4852, + "step": 28325 + }, + { + "epoch": 0.7777594728171334, + "grad_norm": 0.37064483761787415, + "learning_rate": 1.344566700641013e-05, + "loss": 0.4767, + "step": 28326 + }, + { + "epoch": 0.7777869302581, + "grad_norm": 0.3325006067752838, + "learning_rate": 1.3445261558146543e-05, + "loss": 0.3919, + "step": 28327 + }, + { + "epoch": 0.7778143876990664, + "grad_norm": 0.406099408864975, + "learning_rate": 1.3444856103456444e-05, + "loss": 0.5035, + "step": 28328 + }, + { + "epoch": 0.777841845140033, + "grad_norm": 0.40474891662597656, + "learning_rate": 1.3444450642340584e-05, + "loss": 0.5211, + "step": 28329 + }, + { + "epoch": 0.7778693025809994, + "grad_norm": 0.37044721841812134, + "learning_rate": 1.3444045174799731e-05, + "loss": 0.5017, + "step": 28330 + }, + { + "epoch": 0.777896760021966, + "grad_norm": 0.40171030163764954, + "learning_rate": 1.3443639700834635e-05, + "loss": 0.521, + "step": 28331 + }, + { + "epoch": 0.7779242174629325, + "grad_norm": 0.38076308369636536, + "learning_rate": 1.3443234220446047e-05, + "loss": 0.4676, + "step": 28332 + }, + { + "epoch": 0.7779516749038989, + "grad_norm": 0.3626200556755066, + "learning_rate": 1.344282873363473e-05, + "loss": 0.4917, + "step": 28333 + }, + { + "epoch": 0.7779791323448655, + "grad_norm": 0.3405854105949402, + "learning_rate": 1.3442423240401442e-05, + "loss": 0.4701, + "step": 28334 + }, + { + "epoch": 0.7780065897858319, + "grad_norm": 0.42208755016326904, + "learning_rate": 1.3442017740746929e-05, + "loss": 0.508, + "step": 28335 + }, + { + "epoch": 0.7780340472267985, + "grad_norm": 0.4467397928237915, + "learning_rate": 1.3441612234671958e-05, + "loss": 0.5731, + "step": 28336 + }, + { + "epoch": 0.7780615046677649, + "grad_norm": 0.3980191648006439, + "learning_rate": 1.3441206722177283e-05, + "loss": 0.5548, + "step": 28337 + }, + { + "epoch": 0.7780889621087315, + "grad_norm": 0.3671995997428894, + "learning_rate": 1.3440801203263658e-05, + "loss": 0.5417, + "step": 28338 + }, + { + "epoch": 0.778116419549698, + "grad_norm": 0.36819443106651306, + "learning_rate": 1.344039567793184e-05, + "loss": 0.5284, + "step": 28339 + }, + { + "epoch": 0.7781438769906645, + "grad_norm": 0.38161659240722656, + "learning_rate": 1.3439990146182587e-05, + "loss": 0.5039, + "step": 28340 + }, + { + "epoch": 0.778171334431631, + "grad_norm": 0.44572076201438904, + "learning_rate": 1.3439584608016654e-05, + "loss": 0.543, + "step": 28341 + }, + { + "epoch": 0.7781987918725974, + "grad_norm": 0.4220609664916992, + "learning_rate": 1.3439179063434798e-05, + "loss": 0.4882, + "step": 28342 + }, + { + "epoch": 0.778226249313564, + "grad_norm": 0.42330318689346313, + "learning_rate": 1.3438773512437774e-05, + "loss": 0.511, + "step": 28343 + }, + { + "epoch": 0.7782537067545304, + "grad_norm": 0.3726212680339813, + "learning_rate": 1.3438367955026344e-05, + "loss": 0.5027, + "step": 28344 + }, + { + "epoch": 0.778281164195497, + "grad_norm": 0.40305978059768677, + "learning_rate": 1.3437962391201255e-05, + "loss": 0.4669, + "step": 28345 + }, + { + "epoch": 0.7783086216364635, + "grad_norm": 0.41632702946662903, + "learning_rate": 1.3437556820963273e-05, + "loss": 0.4697, + "step": 28346 + }, + { + "epoch": 0.77833607907743, + "grad_norm": 0.3862800896167755, + "learning_rate": 1.3437151244313149e-05, + "loss": 0.4842, + "step": 28347 + }, + { + "epoch": 0.7783635365183965, + "grad_norm": 0.42688703536987305, + "learning_rate": 1.343674566125164e-05, + "loss": 0.5779, + "step": 28348 + }, + { + "epoch": 0.778390993959363, + "grad_norm": 0.3537035286426544, + "learning_rate": 1.3436340071779506e-05, + "loss": 0.4378, + "step": 28349 + }, + { + "epoch": 0.7784184514003295, + "grad_norm": 0.4033121168613434, + "learning_rate": 1.3435934475897497e-05, + "loss": 0.5151, + "step": 28350 + }, + { + "epoch": 0.7784459088412959, + "grad_norm": 0.39423590898513794, + "learning_rate": 1.3435528873606377e-05, + "loss": 0.5054, + "step": 28351 + }, + { + "epoch": 0.7784733662822625, + "grad_norm": 0.4398956000804901, + "learning_rate": 1.3435123264906897e-05, + "loss": 0.4635, + "step": 28352 + }, + { + "epoch": 0.778500823723229, + "grad_norm": 0.3882869482040405, + "learning_rate": 1.3434717649799816e-05, + "loss": 0.423, + "step": 28353 + }, + { + "epoch": 0.7785282811641955, + "grad_norm": 0.38836196064949036, + "learning_rate": 1.3434312028285892e-05, + "loss": 0.5456, + "step": 28354 + }, + { + "epoch": 0.778555738605162, + "grad_norm": 0.40438419580459595, + "learning_rate": 1.3433906400365877e-05, + "loss": 0.4391, + "step": 28355 + }, + { + "epoch": 0.7785831960461285, + "grad_norm": 0.3405722677707672, + "learning_rate": 1.3433500766040535e-05, + "loss": 0.4018, + "step": 28356 + }, + { + "epoch": 0.778610653487095, + "grad_norm": 0.4466783106327057, + "learning_rate": 1.3433095125310616e-05, + "loss": 0.4735, + "step": 28357 + }, + { + "epoch": 0.7786381109280615, + "grad_norm": 0.4132806956768036, + "learning_rate": 1.3432689478176876e-05, + "loss": 0.4767, + "step": 28358 + }, + { + "epoch": 0.778665568369028, + "grad_norm": 0.41407015919685364, + "learning_rate": 1.343228382464008e-05, + "loss": 0.5538, + "step": 28359 + }, + { + "epoch": 0.7786930258099946, + "grad_norm": 0.40532186627388, + "learning_rate": 1.3431878164700973e-05, + "loss": 0.4843, + "step": 28360 + }, + { + "epoch": 0.778720483250961, + "grad_norm": 0.3900507092475891, + "learning_rate": 1.3431472498360325e-05, + "loss": 0.4686, + "step": 28361 + }, + { + "epoch": 0.7787479406919275, + "grad_norm": 0.44119203090667725, + "learning_rate": 1.3431066825618882e-05, + "loss": 0.4779, + "step": 28362 + }, + { + "epoch": 0.778775398132894, + "grad_norm": 0.5149109959602356, + "learning_rate": 1.3430661146477402e-05, + "loss": 0.5022, + "step": 28363 + }, + { + "epoch": 0.7788028555738605, + "grad_norm": 0.3562660813331604, + "learning_rate": 1.3430255460936647e-05, + "loss": 0.4887, + "step": 28364 + }, + { + "epoch": 0.778830313014827, + "grad_norm": 0.3927086293697357, + "learning_rate": 1.3429849768997371e-05, + "loss": 0.5395, + "step": 28365 + }, + { + "epoch": 0.7788577704557935, + "grad_norm": 0.4236011505126953, + "learning_rate": 1.342944407066033e-05, + "loss": 0.4563, + "step": 28366 + }, + { + "epoch": 0.7788852278967601, + "grad_norm": 0.34525221586227417, + "learning_rate": 1.3429038365926279e-05, + "loss": 0.4917, + "step": 28367 + }, + { + "epoch": 0.7789126853377265, + "grad_norm": 0.48055121302604675, + "learning_rate": 1.3428632654795979e-05, + "loss": 0.4519, + "step": 28368 + }, + { + "epoch": 0.7789401427786931, + "grad_norm": 0.38502568006515503, + "learning_rate": 1.3428226937270185e-05, + "loss": 0.5079, + "step": 28369 + }, + { + "epoch": 0.7789676002196595, + "grad_norm": 0.39114829897880554, + "learning_rate": 1.3427821213349654e-05, + "loss": 0.4694, + "step": 28370 + }, + { + "epoch": 0.778995057660626, + "grad_norm": 0.4049071669578552, + "learning_rate": 1.342741548303514e-05, + "loss": 0.4661, + "step": 28371 + }, + { + "epoch": 0.7790225151015925, + "grad_norm": 0.3710751235485077, + "learning_rate": 1.3427009746327406e-05, + "loss": 0.5355, + "step": 28372 + }, + { + "epoch": 0.779049972542559, + "grad_norm": 0.48356571793556213, + "learning_rate": 1.34266040032272e-05, + "loss": 0.5492, + "step": 28373 + }, + { + "epoch": 0.7790774299835256, + "grad_norm": 0.3668820858001709, + "learning_rate": 1.3426198253735287e-05, + "loss": 0.4466, + "step": 28374 + }, + { + "epoch": 0.779104887424492, + "grad_norm": 0.3794287443161011, + "learning_rate": 1.3425792497852421e-05, + "loss": 0.5443, + "step": 28375 + }, + { + "epoch": 0.7791323448654586, + "grad_norm": 0.39222216606140137, + "learning_rate": 1.3425386735579358e-05, + "loss": 0.4468, + "step": 28376 + }, + { + "epoch": 0.779159802306425, + "grad_norm": 0.3606302738189697, + "learning_rate": 1.3424980966916856e-05, + "loss": 0.4598, + "step": 28377 + }, + { + "epoch": 0.7791872597473916, + "grad_norm": 0.4046744108200073, + "learning_rate": 1.3424575191865672e-05, + "loss": 0.4843, + "step": 28378 + }, + { + "epoch": 0.779214717188358, + "grad_norm": 0.4486985504627228, + "learning_rate": 1.342416941042656e-05, + "loss": 0.5368, + "step": 28379 + }, + { + "epoch": 0.7792421746293245, + "grad_norm": 0.382944792509079, + "learning_rate": 1.3423763622600281e-05, + "loss": 0.5517, + "step": 28380 + }, + { + "epoch": 0.7792696320702911, + "grad_norm": 0.4992680549621582, + "learning_rate": 1.3423357828387589e-05, + "loss": 0.5475, + "step": 28381 + }, + { + "epoch": 0.7792970895112575, + "grad_norm": 0.38581135869026184, + "learning_rate": 1.3422952027789244e-05, + "loss": 0.5106, + "step": 28382 + }, + { + "epoch": 0.7793245469522241, + "grad_norm": 0.3689609467983246, + "learning_rate": 1.3422546220806e-05, + "loss": 0.4851, + "step": 28383 + }, + { + "epoch": 0.7793520043931905, + "grad_norm": 0.417877733707428, + "learning_rate": 1.3422140407438613e-05, + "loss": 0.5482, + "step": 28384 + }, + { + "epoch": 0.7793794618341571, + "grad_norm": 0.6721457242965698, + "learning_rate": 1.3421734587687844e-05, + "loss": 0.5559, + "step": 28385 + }, + { + "epoch": 0.7794069192751235, + "grad_norm": 0.8405423164367676, + "learning_rate": 1.3421328761554446e-05, + "loss": 0.524, + "step": 28386 + }, + { + "epoch": 0.7794343767160901, + "grad_norm": 0.3702731728553772, + "learning_rate": 1.342092292903918e-05, + "loss": 0.3974, + "step": 28387 + }, + { + "epoch": 0.7794618341570566, + "grad_norm": 0.36919865012168884, + "learning_rate": 1.3420517090142801e-05, + "loss": 0.5025, + "step": 28388 + }, + { + "epoch": 0.779489291598023, + "grad_norm": 0.39232879877090454, + "learning_rate": 1.3420111244866063e-05, + "loss": 0.505, + "step": 28389 + }, + { + "epoch": 0.7795167490389896, + "grad_norm": 0.3390423059463501, + "learning_rate": 1.341970539320973e-05, + "loss": 0.3845, + "step": 28390 + }, + { + "epoch": 0.779544206479956, + "grad_norm": 0.4078536927700043, + "learning_rate": 1.3419299535174553e-05, + "loss": 0.5891, + "step": 28391 + }, + { + "epoch": 0.7795716639209226, + "grad_norm": 0.47244563698768616, + "learning_rate": 1.341889367076129e-05, + "loss": 0.553, + "step": 28392 + }, + { + "epoch": 0.779599121361889, + "grad_norm": 0.3809697926044464, + "learning_rate": 1.34184877999707e-05, + "loss": 0.4621, + "step": 28393 + }, + { + "epoch": 0.7796265788028556, + "grad_norm": 0.34132733941078186, + "learning_rate": 1.3418081922803538e-05, + "loss": 0.4932, + "step": 28394 + }, + { + "epoch": 0.7796540362438221, + "grad_norm": 0.39506322145462036, + "learning_rate": 1.3417676039260565e-05, + "loss": 0.6013, + "step": 28395 + }, + { + "epoch": 0.7796814936847886, + "grad_norm": 0.37229278683662415, + "learning_rate": 1.3417270149342536e-05, + "loss": 0.4564, + "step": 28396 + }, + { + "epoch": 0.7797089511257551, + "grad_norm": 0.4023679792881012, + "learning_rate": 1.3416864253050204e-05, + "loss": 0.4923, + "step": 28397 + }, + { + "epoch": 0.7797364085667216, + "grad_norm": 0.45694610476493835, + "learning_rate": 1.3416458350384331e-05, + "loss": 0.4729, + "step": 28398 + }, + { + "epoch": 0.7797638660076881, + "grad_norm": 0.36701759696006775, + "learning_rate": 1.3416052441345672e-05, + "loss": 0.5267, + "step": 28399 + }, + { + "epoch": 0.7797913234486545, + "grad_norm": 0.5573608875274658, + "learning_rate": 1.3415646525934986e-05, + "loss": 0.5149, + "step": 28400 + }, + { + "epoch": 0.7798187808896211, + "grad_norm": 0.3773966133594513, + "learning_rate": 1.341524060415303e-05, + "loss": 0.4482, + "step": 28401 + }, + { + "epoch": 0.7798462383305876, + "grad_norm": 0.3912425935268402, + "learning_rate": 1.3414834676000558e-05, + "loss": 0.5603, + "step": 28402 + }, + { + "epoch": 0.7798736957715541, + "grad_norm": 0.4053973853588104, + "learning_rate": 1.3414428741478332e-05, + "loss": 0.5301, + "step": 28403 + }, + { + "epoch": 0.7799011532125206, + "grad_norm": 0.4020237624645233, + "learning_rate": 1.3414022800587106e-05, + "loss": 0.5398, + "step": 28404 + }, + { + "epoch": 0.7799286106534871, + "grad_norm": 0.426035076379776, + "learning_rate": 1.3413616853327636e-05, + "loss": 0.5568, + "step": 28405 + }, + { + "epoch": 0.7799560680944536, + "grad_norm": 0.4072171449661255, + "learning_rate": 1.3413210899700683e-05, + "loss": 0.4506, + "step": 28406 + }, + { + "epoch": 0.77998352553542, + "grad_norm": 0.39409300684928894, + "learning_rate": 1.3412804939707e-05, + "loss": 0.5237, + "step": 28407 + }, + { + "epoch": 0.7800109829763866, + "grad_norm": 0.40994518995285034, + "learning_rate": 1.3412398973347349e-05, + "loss": 0.4391, + "step": 28408 + }, + { + "epoch": 0.7800384404173532, + "grad_norm": 0.34082934260368347, + "learning_rate": 1.3411993000622485e-05, + "loss": 0.4661, + "step": 28409 + }, + { + "epoch": 0.7800658978583196, + "grad_norm": 0.369512677192688, + "learning_rate": 1.3411587021533163e-05, + "loss": 0.505, + "step": 28410 + }, + { + "epoch": 0.7800933552992861, + "grad_norm": 0.38385850191116333, + "learning_rate": 1.3411181036080144e-05, + "loss": 0.5259, + "step": 28411 + }, + { + "epoch": 0.7801208127402526, + "grad_norm": 0.4414614737033844, + "learning_rate": 1.3410775044264182e-05, + "loss": 0.5526, + "step": 28412 + }, + { + "epoch": 0.7801482701812191, + "grad_norm": 0.36264342069625854, + "learning_rate": 1.3410369046086036e-05, + "loss": 0.5188, + "step": 28413 + }, + { + "epoch": 0.7801757276221856, + "grad_norm": 0.38743430376052856, + "learning_rate": 1.3409963041546465e-05, + "loss": 0.5006, + "step": 28414 + }, + { + "epoch": 0.7802031850631521, + "grad_norm": 0.4299183487892151, + "learning_rate": 1.3409557030646223e-05, + "loss": 0.4998, + "step": 28415 + }, + { + "epoch": 0.7802306425041187, + "grad_norm": 0.37382620573043823, + "learning_rate": 1.340915101338607e-05, + "loss": 0.5737, + "step": 28416 + }, + { + "epoch": 0.7802580999450851, + "grad_norm": 0.3595377504825592, + "learning_rate": 1.3408744989766762e-05, + "loss": 0.494, + "step": 28417 + }, + { + "epoch": 0.7802855573860517, + "grad_norm": 0.46890145540237427, + "learning_rate": 1.3408338959789057e-05, + "loss": 0.5098, + "step": 28418 + }, + { + "epoch": 0.7803130148270181, + "grad_norm": 0.3638168275356293, + "learning_rate": 1.3407932923453713e-05, + "loss": 0.5277, + "step": 28419 + }, + { + "epoch": 0.7803404722679846, + "grad_norm": 0.447844535112381, + "learning_rate": 1.3407526880761484e-05, + "loss": 0.5471, + "step": 28420 + }, + { + "epoch": 0.7803679297089511, + "grad_norm": 0.3813786506652832, + "learning_rate": 1.340712083171313e-05, + "loss": 0.5143, + "step": 28421 + }, + { + "epoch": 0.7803953871499176, + "grad_norm": 0.36296844482421875, + "learning_rate": 1.3406714776309408e-05, + "loss": 0.5241, + "step": 28422 + }, + { + "epoch": 0.7804228445908842, + "grad_norm": 0.3996497690677643, + "learning_rate": 1.3406308714551078e-05, + "loss": 0.4806, + "step": 28423 + }, + { + "epoch": 0.7804503020318506, + "grad_norm": 0.369353711605072, + "learning_rate": 1.3405902646438895e-05, + "loss": 0.4353, + "step": 28424 + }, + { + "epoch": 0.7804777594728172, + "grad_norm": 0.3882175087928772, + "learning_rate": 1.3405496571973614e-05, + "loss": 0.443, + "step": 28425 + }, + { + "epoch": 0.7805052169137836, + "grad_norm": 0.44214141368865967, + "learning_rate": 1.3405090491155996e-05, + "loss": 0.4843, + "step": 28426 + }, + { + "epoch": 0.7805326743547502, + "grad_norm": 0.3591752350330353, + "learning_rate": 1.3404684403986798e-05, + "loss": 0.4657, + "step": 28427 + }, + { + "epoch": 0.7805601317957166, + "grad_norm": 0.3726717233657837, + "learning_rate": 1.3404278310466777e-05, + "loss": 0.3781, + "step": 28428 + }, + { + "epoch": 0.7805875892366831, + "grad_norm": 0.43287816643714905, + "learning_rate": 1.3403872210596691e-05, + "loss": 0.4886, + "step": 28429 + }, + { + "epoch": 0.7806150466776497, + "grad_norm": 0.3651362657546997, + "learning_rate": 1.3403466104377293e-05, + "loss": 0.462, + "step": 28430 + }, + { + "epoch": 0.7806425041186161, + "grad_norm": 0.3935036361217499, + "learning_rate": 1.340305999180935e-05, + "loss": 0.5221, + "step": 28431 + }, + { + "epoch": 0.7806699615595827, + "grad_norm": 0.4111352860927582, + "learning_rate": 1.3402653872893611e-05, + "loss": 0.5658, + "step": 28432 + }, + { + "epoch": 0.7806974190005491, + "grad_norm": 0.37481313943862915, + "learning_rate": 1.3402247747630836e-05, + "loss": 0.5034, + "step": 28433 + }, + { + "epoch": 0.7807248764415157, + "grad_norm": 0.36274436116218567, + "learning_rate": 1.3401841616021787e-05, + "loss": 0.4884, + "step": 28434 + }, + { + "epoch": 0.7807523338824821, + "grad_norm": 0.37172359228134155, + "learning_rate": 1.3401435478067212e-05, + "loss": 0.3868, + "step": 28435 + }, + { + "epoch": 0.7807797913234487, + "grad_norm": 0.40933361649513245, + "learning_rate": 1.3401029333767878e-05, + "loss": 0.613, + "step": 28436 + }, + { + "epoch": 0.7808072487644152, + "grad_norm": 0.42853212356567383, + "learning_rate": 1.3400623183124538e-05, + "loss": 0.5748, + "step": 28437 + }, + { + "epoch": 0.7808347062053816, + "grad_norm": 0.3979892432689667, + "learning_rate": 1.340021702613795e-05, + "loss": 0.5257, + "step": 28438 + }, + { + "epoch": 0.7808621636463482, + "grad_norm": 0.38719895482063293, + "learning_rate": 1.3399810862808872e-05, + "loss": 0.4777, + "step": 28439 + }, + { + "epoch": 0.7808896210873146, + "grad_norm": 0.40805765986442566, + "learning_rate": 1.3399404693138062e-05, + "loss": 0.4353, + "step": 28440 + }, + { + "epoch": 0.7809170785282812, + "grad_norm": 0.3788049817085266, + "learning_rate": 1.3398998517126275e-05, + "loss": 0.4762, + "step": 28441 + }, + { + "epoch": 0.7809445359692476, + "grad_norm": 0.3962560296058655, + "learning_rate": 1.3398592334774276e-05, + "loss": 0.4862, + "step": 28442 + }, + { + "epoch": 0.7809719934102142, + "grad_norm": 0.36837902665138245, + "learning_rate": 1.3398186146082814e-05, + "loss": 0.4642, + "step": 28443 + }, + { + "epoch": 0.7809994508511807, + "grad_norm": 0.387105256319046, + "learning_rate": 1.3397779951052652e-05, + "loss": 0.4513, + "step": 28444 + }, + { + "epoch": 0.7810269082921472, + "grad_norm": 0.42085781693458557, + "learning_rate": 1.3397373749684546e-05, + "loss": 0.4917, + "step": 28445 + }, + { + "epoch": 0.7810543657331137, + "grad_norm": 0.42858198285102844, + "learning_rate": 1.3396967541979251e-05, + "loss": 0.5656, + "step": 28446 + }, + { + "epoch": 0.7810818231740801, + "grad_norm": 0.34804633259773254, + "learning_rate": 1.339656132793753e-05, + "loss": 0.5027, + "step": 28447 + }, + { + "epoch": 0.7811092806150467, + "grad_norm": 0.3719485104084015, + "learning_rate": 1.3396155107560134e-05, + "loss": 0.5196, + "step": 28448 + }, + { + "epoch": 0.7811367380560131, + "grad_norm": 0.44015923142433167, + "learning_rate": 1.339574888084783e-05, + "loss": 0.4639, + "step": 28449 + }, + { + "epoch": 0.7811641954969797, + "grad_norm": 0.3587462604045868, + "learning_rate": 1.3395342647801367e-05, + "loss": 0.468, + "step": 28450 + }, + { + "epoch": 0.7811916529379462, + "grad_norm": 0.40826842188835144, + "learning_rate": 1.3394936408421506e-05, + "loss": 0.498, + "step": 28451 + }, + { + "epoch": 0.7812191103789127, + "grad_norm": 0.36605533957481384, + "learning_rate": 1.3394530162709009e-05, + "loss": 0.5031, + "step": 28452 + }, + { + "epoch": 0.7812465678198792, + "grad_norm": 0.4033891558647156, + "learning_rate": 1.3394123910664628e-05, + "loss": 0.524, + "step": 28453 + }, + { + "epoch": 0.7812740252608457, + "grad_norm": 0.4219375252723694, + "learning_rate": 1.3393717652289122e-05, + "loss": 0.5339, + "step": 28454 + }, + { + "epoch": 0.7813014827018122, + "grad_norm": 0.4926528036594391, + "learning_rate": 1.339331138758325e-05, + "loss": 0.5984, + "step": 28455 + }, + { + "epoch": 0.7813289401427786, + "grad_norm": 0.4163082540035248, + "learning_rate": 1.3392905116547768e-05, + "loss": 0.4724, + "step": 28456 + }, + { + "epoch": 0.7813563975837452, + "grad_norm": 0.3724246919155121, + "learning_rate": 1.3392498839183436e-05, + "loss": 0.4732, + "step": 28457 + }, + { + "epoch": 0.7813838550247117, + "grad_norm": 0.34673550724983215, + "learning_rate": 1.339209255549101e-05, + "loss": 0.4608, + "step": 28458 + }, + { + "epoch": 0.7814113124656782, + "grad_norm": 0.3661571741104126, + "learning_rate": 1.3391686265471248e-05, + "loss": 0.5043, + "step": 28459 + }, + { + "epoch": 0.7814387699066447, + "grad_norm": 0.3749069273471832, + "learning_rate": 1.3391279969124912e-05, + "loss": 0.4922, + "step": 28460 + }, + { + "epoch": 0.7814662273476112, + "grad_norm": 0.34214121103286743, + "learning_rate": 1.3390873666452753e-05, + "loss": 0.4741, + "step": 28461 + }, + { + "epoch": 0.7814936847885777, + "grad_norm": 0.3599623143672943, + "learning_rate": 1.3390467357455537e-05, + "loss": 0.4604, + "step": 28462 + }, + { + "epoch": 0.7815211422295442, + "grad_norm": 0.4431513249874115, + "learning_rate": 1.3390061042134013e-05, + "loss": 0.5022, + "step": 28463 + }, + { + "epoch": 0.7815485996705107, + "grad_norm": 0.37014004588127136, + "learning_rate": 1.3389654720488943e-05, + "loss": 0.4621, + "step": 28464 + }, + { + "epoch": 0.7815760571114773, + "grad_norm": 0.3937191963195801, + "learning_rate": 1.3389248392521089e-05, + "loss": 0.472, + "step": 28465 + }, + { + "epoch": 0.7816035145524437, + "grad_norm": 0.3921925127506256, + "learning_rate": 1.3388842058231199e-05, + "loss": 0.492, + "step": 28466 + }, + { + "epoch": 0.7816309719934103, + "grad_norm": 0.39209091663360596, + "learning_rate": 1.3388435717620043e-05, + "loss": 0.4674, + "step": 28467 + }, + { + "epoch": 0.7816584294343767, + "grad_norm": 0.3981027901172638, + "learning_rate": 1.338802937068837e-05, + "loss": 0.5204, + "step": 28468 + }, + { + "epoch": 0.7816858868753432, + "grad_norm": 0.4413098692893982, + "learning_rate": 1.3387623017436942e-05, + "loss": 0.563, + "step": 28469 + }, + { + "epoch": 0.7817133443163097, + "grad_norm": 0.4021369218826294, + "learning_rate": 1.3387216657866516e-05, + "loss": 0.5333, + "step": 28470 + }, + { + "epoch": 0.7817408017572762, + "grad_norm": 0.38715115189552307, + "learning_rate": 1.3386810291977845e-05, + "loss": 0.4775, + "step": 28471 + }, + { + "epoch": 0.7817682591982428, + "grad_norm": 0.4343203902244568, + "learning_rate": 1.3386403919771698e-05, + "loss": 0.4695, + "step": 28472 + }, + { + "epoch": 0.7817957166392092, + "grad_norm": 0.4320381283760071, + "learning_rate": 1.3385997541248822e-05, + "loss": 0.51, + "step": 28473 + }, + { + "epoch": 0.7818231740801758, + "grad_norm": 0.3581677973270416, + "learning_rate": 1.3385591156409985e-05, + "loss": 0.489, + "step": 28474 + }, + { + "epoch": 0.7818506315211422, + "grad_norm": 0.33903494477272034, + "learning_rate": 1.338518476525594e-05, + "loss": 0.4474, + "step": 28475 + }, + { + "epoch": 0.7818780889621088, + "grad_norm": 0.37225276231765747, + "learning_rate": 1.338477836778744e-05, + "loss": 0.506, + "step": 28476 + }, + { + "epoch": 0.7819055464030752, + "grad_norm": 0.3785501718521118, + "learning_rate": 1.3384371964005252e-05, + "loss": 0.5032, + "step": 28477 + }, + { + "epoch": 0.7819330038440417, + "grad_norm": 0.3985570967197418, + "learning_rate": 1.3383965553910128e-05, + "loss": 0.4529, + "step": 28478 + }, + { + "epoch": 0.7819604612850083, + "grad_norm": 0.42770519852638245, + "learning_rate": 1.3383559137502829e-05, + "loss": 0.4902, + "step": 28479 + }, + { + "epoch": 0.7819879187259747, + "grad_norm": 0.38585570454597473, + "learning_rate": 1.338315271478411e-05, + "loss": 0.4176, + "step": 28480 + }, + { + "epoch": 0.7820153761669413, + "grad_norm": 0.39199960231781006, + "learning_rate": 1.3382746285754736e-05, + "loss": 0.4958, + "step": 28481 + }, + { + "epoch": 0.7820428336079077, + "grad_norm": 0.4406903088092804, + "learning_rate": 1.3382339850415457e-05, + "loss": 0.4479, + "step": 28482 + }, + { + "epoch": 0.7820702910488743, + "grad_norm": 0.3840814232826233, + "learning_rate": 1.3381933408767036e-05, + "loss": 0.4917, + "step": 28483 + }, + { + "epoch": 0.7820977484898407, + "grad_norm": 0.431279718875885, + "learning_rate": 1.338152696081023e-05, + "loss": 0.5117, + "step": 28484 + }, + { + "epoch": 0.7821252059308073, + "grad_norm": 0.371452271938324, + "learning_rate": 1.3381120506545795e-05, + "loss": 0.4996, + "step": 28485 + }, + { + "epoch": 0.7821526633717738, + "grad_norm": 0.4259665608406067, + "learning_rate": 1.3380714045974494e-05, + "loss": 0.6252, + "step": 28486 + }, + { + "epoch": 0.7821801208127402, + "grad_norm": 0.3834419846534729, + "learning_rate": 1.3380307579097082e-05, + "loss": 0.466, + "step": 28487 + }, + { + "epoch": 0.7822075782537068, + "grad_norm": 0.4216686487197876, + "learning_rate": 1.3379901105914314e-05, + "loss": 0.5586, + "step": 28488 + }, + { + "epoch": 0.7822350356946732, + "grad_norm": 0.42965811491012573, + "learning_rate": 1.3379494626426956e-05, + "loss": 0.5019, + "step": 28489 + }, + { + "epoch": 0.7822624931356398, + "grad_norm": 0.391330748796463, + "learning_rate": 1.337908814063576e-05, + "loss": 0.4852, + "step": 28490 + }, + { + "epoch": 0.7822899505766062, + "grad_norm": 0.35641250014305115, + "learning_rate": 1.3378681648541485e-05, + "loss": 0.5032, + "step": 28491 + }, + { + "epoch": 0.7823174080175728, + "grad_norm": 0.4122146964073181, + "learning_rate": 1.3378275150144893e-05, + "loss": 0.4962, + "step": 28492 + }, + { + "epoch": 0.7823448654585393, + "grad_norm": 0.4047633707523346, + "learning_rate": 1.3377868645446736e-05, + "loss": 0.6312, + "step": 28493 + }, + { + "epoch": 0.7823723228995058, + "grad_norm": 0.3865005671977997, + "learning_rate": 1.3377462134447781e-05, + "loss": 0.4904, + "step": 28494 + }, + { + "epoch": 0.7823997803404723, + "grad_norm": 0.43229925632476807, + "learning_rate": 1.3377055617148778e-05, + "loss": 0.492, + "step": 28495 + }, + { + "epoch": 0.7824272377814387, + "grad_norm": 0.4041188657283783, + "learning_rate": 1.3376649093550488e-05, + "loss": 0.5045, + "step": 28496 + }, + { + "epoch": 0.7824546952224053, + "grad_norm": 0.3728707730770111, + "learning_rate": 1.3376242563653673e-05, + "loss": 0.5072, + "step": 28497 + }, + { + "epoch": 0.7824821526633717, + "grad_norm": 0.3549196124076843, + "learning_rate": 1.3375836027459084e-05, + "loss": 0.4124, + "step": 28498 + }, + { + "epoch": 0.7825096101043383, + "grad_norm": 0.44475609064102173, + "learning_rate": 1.3375429484967484e-05, + "loss": 0.4649, + "step": 28499 + }, + { + "epoch": 0.7825370675453048, + "grad_norm": 0.3876137137413025, + "learning_rate": 1.3375022936179631e-05, + "loss": 0.5223, + "step": 28500 + }, + { + "epoch": 0.7825645249862713, + "grad_norm": 0.37685900926589966, + "learning_rate": 1.3374616381096285e-05, + "loss": 0.5476, + "step": 28501 + }, + { + "epoch": 0.7825919824272378, + "grad_norm": 0.3867493271827698, + "learning_rate": 1.3374209819718201e-05, + "loss": 0.5314, + "step": 28502 + }, + { + "epoch": 0.7826194398682043, + "grad_norm": 0.727252185344696, + "learning_rate": 1.3373803252046138e-05, + "loss": 0.4495, + "step": 28503 + }, + { + "epoch": 0.7826468973091708, + "grad_norm": 0.4604683220386505, + "learning_rate": 1.3373396678080856e-05, + "loss": 0.5372, + "step": 28504 + }, + { + "epoch": 0.7826743547501372, + "grad_norm": 0.3539043962955475, + "learning_rate": 1.3372990097823113e-05, + "loss": 0.4584, + "step": 28505 + }, + { + "epoch": 0.7827018121911038, + "grad_norm": 0.49406298995018005, + "learning_rate": 1.3372583511273668e-05, + "loss": 0.5424, + "step": 28506 + }, + { + "epoch": 0.7827292696320703, + "grad_norm": 0.43275701999664307, + "learning_rate": 1.3372176918433274e-05, + "loss": 0.5083, + "step": 28507 + }, + { + "epoch": 0.7827567270730368, + "grad_norm": 0.3733243942260742, + "learning_rate": 1.3371770319302697e-05, + "loss": 0.439, + "step": 28508 + }, + { + "epoch": 0.7827841845140033, + "grad_norm": 0.35461002588272095, + "learning_rate": 1.3371363713882693e-05, + "loss": 0.4323, + "step": 28509 + }, + { + "epoch": 0.7828116419549698, + "grad_norm": 0.34013083577156067, + "learning_rate": 1.3370957102174019e-05, + "loss": 0.4917, + "step": 28510 + }, + { + "epoch": 0.7828390993959363, + "grad_norm": 0.427473247051239, + "learning_rate": 1.3370550484177431e-05, + "loss": 0.608, + "step": 28511 + }, + { + "epoch": 0.7828665568369028, + "grad_norm": 0.39536038041114807, + "learning_rate": 1.3370143859893696e-05, + "loss": 0.5085, + "step": 28512 + }, + { + "epoch": 0.7828940142778693, + "grad_norm": 0.4015469551086426, + "learning_rate": 1.3369737229323564e-05, + "loss": 0.4803, + "step": 28513 + }, + { + "epoch": 0.7829214717188359, + "grad_norm": 0.4073996841907501, + "learning_rate": 1.3369330592467798e-05, + "loss": 0.548, + "step": 28514 + }, + { + "epoch": 0.7829489291598023, + "grad_norm": 0.4034525156021118, + "learning_rate": 1.336892394932715e-05, + "loss": 0.5399, + "step": 28515 + }, + { + "epoch": 0.7829763866007688, + "grad_norm": 0.43523699045181274, + "learning_rate": 1.3368517299902388e-05, + "loss": 0.5836, + "step": 28516 + }, + { + "epoch": 0.7830038440417353, + "grad_norm": 0.349631130695343, + "learning_rate": 1.3368110644194268e-05, + "loss": 0.4309, + "step": 28517 + }, + { + "epoch": 0.7830313014827018, + "grad_norm": 0.4010617733001709, + "learning_rate": 1.3367703982203541e-05, + "loss": 0.4765, + "step": 28518 + }, + { + "epoch": 0.7830587589236683, + "grad_norm": 0.44537878036499023, + "learning_rate": 1.3367297313930978e-05, + "loss": 0.5687, + "step": 28519 + }, + { + "epoch": 0.7830862163646348, + "grad_norm": 0.4076749086380005, + "learning_rate": 1.3366890639377328e-05, + "loss": 0.4945, + "step": 28520 + }, + { + "epoch": 0.7831136738056013, + "grad_norm": 0.42865896224975586, + "learning_rate": 1.336648395854335e-05, + "loss": 0.5827, + "step": 28521 + }, + { + "epoch": 0.7831411312465678, + "grad_norm": 0.4331841766834259, + "learning_rate": 1.3366077271429807e-05, + "loss": 0.4966, + "step": 28522 + }, + { + "epoch": 0.7831685886875344, + "grad_norm": 0.41074928641319275, + "learning_rate": 1.3365670578037455e-05, + "loss": 0.5129, + "step": 28523 + }, + { + "epoch": 0.7831960461285008, + "grad_norm": 0.3877605199813843, + "learning_rate": 1.3365263878367052e-05, + "loss": 0.5476, + "step": 28524 + }, + { + "epoch": 0.7832235035694673, + "grad_norm": 0.4297112226486206, + "learning_rate": 1.336485717241936e-05, + "loss": 0.5284, + "step": 28525 + }, + { + "epoch": 0.7832509610104338, + "grad_norm": 0.39266401529312134, + "learning_rate": 1.3364450460195135e-05, + "loss": 0.5369, + "step": 28526 + }, + { + "epoch": 0.7832784184514003, + "grad_norm": 0.42589110136032104, + "learning_rate": 1.3364043741695136e-05, + "loss": 0.5277, + "step": 28527 + }, + { + "epoch": 0.7833058758923668, + "grad_norm": 0.401692271232605, + "learning_rate": 1.3363637016920122e-05, + "loss": 0.4799, + "step": 28528 + }, + { + "epoch": 0.7833333333333333, + "grad_norm": 0.6176092624664307, + "learning_rate": 1.336323028587085e-05, + "loss": 0.4562, + "step": 28529 + }, + { + "epoch": 0.7833607907742999, + "grad_norm": 0.389681339263916, + "learning_rate": 1.336282354854808e-05, + "loss": 0.5766, + "step": 28530 + }, + { + "epoch": 0.7833882482152663, + "grad_norm": 0.3871902525424957, + "learning_rate": 1.3362416804952572e-05, + "loss": 0.4636, + "step": 28531 + }, + { + "epoch": 0.7834157056562329, + "grad_norm": 0.3995181918144226, + "learning_rate": 1.3362010055085084e-05, + "loss": 0.5076, + "step": 28532 + }, + { + "epoch": 0.7834431630971993, + "grad_norm": 0.39414969086647034, + "learning_rate": 1.3361603298946373e-05, + "loss": 0.4686, + "step": 28533 + }, + { + "epoch": 0.7834706205381659, + "grad_norm": 0.5603223443031311, + "learning_rate": 1.33611965365372e-05, + "loss": 0.5378, + "step": 28534 + }, + { + "epoch": 0.7834980779791323, + "grad_norm": 0.3895227611064911, + "learning_rate": 1.3360789767858322e-05, + "loss": 0.4158, + "step": 28535 + }, + { + "epoch": 0.7835255354200988, + "grad_norm": 0.3635835647583008, + "learning_rate": 1.3360382992910497e-05, + "loss": 0.5075, + "step": 28536 + }, + { + "epoch": 0.7835529928610654, + "grad_norm": 0.3528456389904022, + "learning_rate": 1.3359976211694488e-05, + "loss": 0.4137, + "step": 28537 + }, + { + "epoch": 0.7835804503020318, + "grad_norm": 0.3949505686759949, + "learning_rate": 1.3359569424211053e-05, + "loss": 0.4349, + "step": 28538 + }, + { + "epoch": 0.7836079077429984, + "grad_norm": 0.43972334265708923, + "learning_rate": 1.3359162630460943e-05, + "loss": 0.5766, + "step": 28539 + }, + { + "epoch": 0.7836353651839648, + "grad_norm": 0.42622682452201843, + "learning_rate": 1.3358755830444925e-05, + "loss": 0.4968, + "step": 28540 + }, + { + "epoch": 0.7836628226249314, + "grad_norm": 0.375531405210495, + "learning_rate": 1.3358349024163754e-05, + "loss": 0.5039, + "step": 28541 + }, + { + "epoch": 0.7836902800658978, + "grad_norm": 0.34669139981269836, + "learning_rate": 1.3357942211618193e-05, + "loss": 0.5259, + "step": 28542 + }, + { + "epoch": 0.7837177375068644, + "grad_norm": 0.347439706325531, + "learning_rate": 1.3357535392808998e-05, + "loss": 0.3919, + "step": 28543 + }, + { + "epoch": 0.7837451949478309, + "grad_norm": 0.403978168964386, + "learning_rate": 1.3357128567736926e-05, + "loss": 0.4659, + "step": 28544 + }, + { + "epoch": 0.7837726523887973, + "grad_norm": 0.3821006417274475, + "learning_rate": 1.3356721736402738e-05, + "loss": 0.4834, + "step": 28545 + }, + { + "epoch": 0.7838001098297639, + "grad_norm": 0.35240307450294495, + "learning_rate": 1.3356314898807192e-05, + "loss": 0.4767, + "step": 28546 + }, + { + "epoch": 0.7838275672707303, + "grad_norm": 0.4064358174800873, + "learning_rate": 1.3355908054951048e-05, + "loss": 0.4636, + "step": 28547 + }, + { + "epoch": 0.7838550247116969, + "grad_norm": 0.35197290778160095, + "learning_rate": 1.3355501204835066e-05, + "loss": 0.5124, + "step": 28548 + }, + { + "epoch": 0.7838824821526633, + "grad_norm": 0.37883713841438293, + "learning_rate": 1.3355094348460002e-05, + "loss": 0.4969, + "step": 28549 + }, + { + "epoch": 0.7839099395936299, + "grad_norm": 0.4068754315376282, + "learning_rate": 1.3354687485826617e-05, + "loss": 0.6057, + "step": 28550 + }, + { + "epoch": 0.7839373970345964, + "grad_norm": 0.3308519423007965, + "learning_rate": 1.3354280616935668e-05, + "loss": 0.4487, + "step": 28551 + }, + { + "epoch": 0.7839648544755629, + "grad_norm": 0.4298515319824219, + "learning_rate": 1.3353873741787915e-05, + "loss": 0.522, + "step": 28552 + }, + { + "epoch": 0.7839923119165294, + "grad_norm": 0.3906472623348236, + "learning_rate": 1.3353466860384118e-05, + "loss": 0.4407, + "step": 28553 + }, + { + "epoch": 0.7840197693574958, + "grad_norm": 0.3851187527179718, + "learning_rate": 1.3353059972725032e-05, + "loss": 0.5427, + "step": 28554 + }, + { + "epoch": 0.7840472267984624, + "grad_norm": 0.39142030477523804, + "learning_rate": 1.3352653078811427e-05, + "loss": 0.5822, + "step": 28555 + }, + { + "epoch": 0.7840746842394288, + "grad_norm": 0.3507900834083557, + "learning_rate": 1.3352246178644048e-05, + "loss": 0.4929, + "step": 28556 + }, + { + "epoch": 0.7841021416803954, + "grad_norm": 0.40149107575416565, + "learning_rate": 1.3351839272223658e-05, + "loss": 0.5484, + "step": 28557 + }, + { + "epoch": 0.7841295991213619, + "grad_norm": 0.3942813277244568, + "learning_rate": 1.3351432359551022e-05, + "loss": 0.5279, + "step": 28558 + }, + { + "epoch": 0.7841570565623284, + "grad_norm": 0.5833288431167603, + "learning_rate": 1.3351025440626893e-05, + "loss": 0.4546, + "step": 28559 + }, + { + "epoch": 0.7841845140032949, + "grad_norm": 0.3930203914642334, + "learning_rate": 1.3350618515452034e-05, + "loss": 0.4915, + "step": 28560 + }, + { + "epoch": 0.7842119714442614, + "grad_norm": 0.3552148640155792, + "learning_rate": 1.33502115840272e-05, + "loss": 0.4739, + "step": 28561 + }, + { + "epoch": 0.7842394288852279, + "grad_norm": 0.3673396110534668, + "learning_rate": 1.334980464635315e-05, + "loss": 0.474, + "step": 28562 + }, + { + "epoch": 0.7842668863261943, + "grad_norm": 0.4045673906803131, + "learning_rate": 1.334939770243065e-05, + "loss": 0.5394, + "step": 28563 + }, + { + "epoch": 0.7842943437671609, + "grad_norm": 0.4181194603443146, + "learning_rate": 1.3348990752260452e-05, + "loss": 0.3989, + "step": 28564 + }, + { + "epoch": 0.7843218012081274, + "grad_norm": 0.4101807475090027, + "learning_rate": 1.3348583795843317e-05, + "loss": 0.5123, + "step": 28565 + }, + { + "epoch": 0.7843492586490939, + "grad_norm": 0.3553203344345093, + "learning_rate": 1.3348176833180006e-05, + "loss": 0.5158, + "step": 28566 + }, + { + "epoch": 0.7843767160900604, + "grad_norm": 0.36090731620788574, + "learning_rate": 1.3347769864271275e-05, + "loss": 0.5112, + "step": 28567 + }, + { + "epoch": 0.7844041735310269, + "grad_norm": 0.40829378366470337, + "learning_rate": 1.3347362889117887e-05, + "loss": 0.5432, + "step": 28568 + }, + { + "epoch": 0.7844316309719934, + "grad_norm": 0.3922879993915558, + "learning_rate": 1.3346955907720598e-05, + "loss": 0.5055, + "step": 28569 + }, + { + "epoch": 0.7844590884129599, + "grad_norm": 0.38471484184265137, + "learning_rate": 1.3346548920080165e-05, + "loss": 0.4545, + "step": 28570 + }, + { + "epoch": 0.7844865458539264, + "grad_norm": 0.3782680332660675, + "learning_rate": 1.3346141926197354e-05, + "loss": 0.4931, + "step": 28571 + }, + { + "epoch": 0.784514003294893, + "grad_norm": 0.42047634720802307, + "learning_rate": 1.3345734926072917e-05, + "loss": 0.4747, + "step": 28572 + }, + { + "epoch": 0.7845414607358594, + "grad_norm": 0.3679380714893341, + "learning_rate": 1.334532791970762e-05, + "loss": 0.4495, + "step": 28573 + }, + { + "epoch": 0.784568918176826, + "grad_norm": 0.34807541966438293, + "learning_rate": 1.3344920907102216e-05, + "loss": 0.4144, + "step": 28574 + }, + { + "epoch": 0.7845963756177924, + "grad_norm": 0.3892156779766083, + "learning_rate": 1.334451388825747e-05, + "loss": 0.5014, + "step": 28575 + }, + { + "epoch": 0.7846238330587589, + "grad_norm": 0.37741124629974365, + "learning_rate": 1.3344106863174135e-05, + "loss": 0.4683, + "step": 28576 + }, + { + "epoch": 0.7846512904997254, + "grad_norm": 0.5001950860023499, + "learning_rate": 1.3343699831852975e-05, + "loss": 0.5324, + "step": 28577 + }, + { + "epoch": 0.7846787479406919, + "grad_norm": 0.36884984374046326, + "learning_rate": 1.3343292794294746e-05, + "loss": 0.4935, + "step": 28578 + }, + { + "epoch": 0.7847062053816585, + "grad_norm": 0.3299672305583954, + "learning_rate": 1.3342885750500212e-05, + "loss": 0.4651, + "step": 28579 + }, + { + "epoch": 0.7847336628226249, + "grad_norm": 0.6071812510490417, + "learning_rate": 1.3342478700470124e-05, + "loss": 0.4636, + "step": 28580 + }, + { + "epoch": 0.7847611202635915, + "grad_norm": 0.3639971911907196, + "learning_rate": 1.3342071644205253e-05, + "loss": 0.4514, + "step": 28581 + }, + { + "epoch": 0.7847885777045579, + "grad_norm": 0.3829035758972168, + "learning_rate": 1.3341664581706349e-05, + "loss": 0.4279, + "step": 28582 + }, + { + "epoch": 0.7848160351455244, + "grad_norm": 0.39494678378105164, + "learning_rate": 1.334125751297417e-05, + "loss": 0.4981, + "step": 28583 + }, + { + "epoch": 0.7848434925864909, + "grad_norm": 0.4093272387981415, + "learning_rate": 1.3340850438009485e-05, + "loss": 0.5351, + "step": 28584 + }, + { + "epoch": 0.7848709500274574, + "grad_norm": 0.42278704047203064, + "learning_rate": 1.3340443356813044e-05, + "loss": 0.6067, + "step": 28585 + }, + { + "epoch": 0.784898407468424, + "grad_norm": 0.37400755286216736, + "learning_rate": 1.3340036269385613e-05, + "loss": 0.5387, + "step": 28586 + }, + { + "epoch": 0.7849258649093904, + "grad_norm": 0.3952639400959015, + "learning_rate": 1.3339629175727947e-05, + "loss": 0.6254, + "step": 28587 + }, + { + "epoch": 0.784953322350357, + "grad_norm": 0.4326179027557373, + "learning_rate": 1.3339222075840805e-05, + "loss": 0.5481, + "step": 28588 + }, + { + "epoch": 0.7849807797913234, + "grad_norm": 0.48301681876182556, + "learning_rate": 1.333881496972495e-05, + "loss": 0.6137, + "step": 28589 + }, + { + "epoch": 0.78500823723229, + "grad_norm": 0.40100082755088806, + "learning_rate": 1.3338407857381137e-05, + "loss": 0.5249, + "step": 28590 + }, + { + "epoch": 0.7850356946732564, + "grad_norm": 0.412720650434494, + "learning_rate": 1.3338000738810131e-05, + "loss": 0.5367, + "step": 28591 + }, + { + "epoch": 0.785063152114223, + "grad_norm": 0.43882298469543457, + "learning_rate": 1.3337593614012688e-05, + "loss": 0.5046, + "step": 28592 + }, + { + "epoch": 0.7850906095551895, + "grad_norm": 0.3544243276119232, + "learning_rate": 1.3337186482989566e-05, + "loss": 0.5023, + "step": 28593 + }, + { + "epoch": 0.7851180669961559, + "grad_norm": 0.3966607451438904, + "learning_rate": 1.3336779345741526e-05, + "loss": 0.4714, + "step": 28594 + }, + { + "epoch": 0.7851455244371225, + "grad_norm": 0.3811722695827484, + "learning_rate": 1.3336372202269328e-05, + "loss": 0.4595, + "step": 28595 + }, + { + "epoch": 0.7851729818780889, + "grad_norm": 0.38858118653297424, + "learning_rate": 1.3335965052573732e-05, + "loss": 0.5003, + "step": 28596 + }, + { + "epoch": 0.7852004393190555, + "grad_norm": 0.3597685396671295, + "learning_rate": 1.3335557896655497e-05, + "loss": 0.5508, + "step": 28597 + }, + { + "epoch": 0.7852278967600219, + "grad_norm": 0.4281710684299469, + "learning_rate": 1.3335150734515381e-05, + "loss": 0.5099, + "step": 28598 + }, + { + "epoch": 0.7852553542009885, + "grad_norm": 0.36340469121932983, + "learning_rate": 1.3334743566154144e-05, + "loss": 0.4559, + "step": 28599 + }, + { + "epoch": 0.785282811641955, + "grad_norm": 0.44517242908477783, + "learning_rate": 1.3334336391572546e-05, + "loss": 0.5045, + "step": 28600 + }, + { + "epoch": 0.7853102690829215, + "grad_norm": 0.35935983061790466, + "learning_rate": 1.3333929210771346e-05, + "loss": 0.471, + "step": 28601 + }, + { + "epoch": 0.785337726523888, + "grad_norm": 0.3856869339942932, + "learning_rate": 1.3333522023751304e-05, + "loss": 0.5632, + "step": 28602 + }, + { + "epoch": 0.7853651839648544, + "grad_norm": 0.3806551694869995, + "learning_rate": 1.3333114830513178e-05, + "loss": 0.511, + "step": 28603 + }, + { + "epoch": 0.785392641405821, + "grad_norm": 0.42841389775276184, + "learning_rate": 1.3332707631057733e-05, + "loss": 0.5678, + "step": 28604 + }, + { + "epoch": 0.7854200988467874, + "grad_norm": 0.37921497225761414, + "learning_rate": 1.3332300425385722e-05, + "loss": 0.4011, + "step": 28605 + }, + { + "epoch": 0.785447556287754, + "grad_norm": 0.38186606764793396, + "learning_rate": 1.3331893213497908e-05, + "loss": 0.4915, + "step": 28606 + }, + { + "epoch": 0.7854750137287205, + "grad_norm": 0.403382271528244, + "learning_rate": 1.333148599539505e-05, + "loss": 0.4344, + "step": 28607 + }, + { + "epoch": 0.785502471169687, + "grad_norm": 0.44847145676612854, + "learning_rate": 1.3331078771077905e-05, + "loss": 0.5397, + "step": 28608 + }, + { + "epoch": 0.7855299286106535, + "grad_norm": 0.37197309732437134, + "learning_rate": 1.333067154054724e-05, + "loss": 0.5171, + "step": 28609 + }, + { + "epoch": 0.78555738605162, + "grad_norm": 0.4694949984550476, + "learning_rate": 1.3330264303803806e-05, + "loss": 0.5071, + "step": 28610 + }, + { + "epoch": 0.7855848434925865, + "grad_norm": 0.43054571747779846, + "learning_rate": 1.3329857060848365e-05, + "loss": 0.4501, + "step": 28611 + }, + { + "epoch": 0.7856123009335529, + "grad_norm": 0.38067638874053955, + "learning_rate": 1.332944981168168e-05, + "loss": 0.5018, + "step": 28612 + }, + { + "epoch": 0.7856397583745195, + "grad_norm": 0.40148431062698364, + "learning_rate": 1.3329042556304508e-05, + "loss": 0.4653, + "step": 28613 + }, + { + "epoch": 0.785667215815486, + "grad_norm": 0.4487994909286499, + "learning_rate": 1.332863529471761e-05, + "loss": 0.4561, + "step": 28614 + }, + { + "epoch": 0.7856946732564525, + "grad_norm": 0.42154935002326965, + "learning_rate": 1.3328228026921743e-05, + "loss": 0.5322, + "step": 28615 + }, + { + "epoch": 0.785722130697419, + "grad_norm": 0.393182635307312, + "learning_rate": 1.3327820752917669e-05, + "loss": 0.4948, + "step": 28616 + }, + { + "epoch": 0.7857495881383855, + "grad_norm": 0.38314366340637207, + "learning_rate": 1.3327413472706148e-05, + "loss": 0.4856, + "step": 28617 + }, + { + "epoch": 0.785777045579352, + "grad_norm": 0.4314896762371063, + "learning_rate": 1.3327006186287939e-05, + "loss": 0.5573, + "step": 28618 + }, + { + "epoch": 0.7858045030203185, + "grad_norm": 0.3996351361274719, + "learning_rate": 1.3326598893663798e-05, + "loss": 0.4918, + "step": 28619 + }, + { + "epoch": 0.785831960461285, + "grad_norm": 0.4297027885913849, + "learning_rate": 1.3326191594834493e-05, + "loss": 0.5724, + "step": 28620 + }, + { + "epoch": 0.7858594179022516, + "grad_norm": 0.4000239074230194, + "learning_rate": 1.3325784289800776e-05, + "loss": 0.4838, + "step": 28621 + }, + { + "epoch": 0.785886875343218, + "grad_norm": 0.4019842743873596, + "learning_rate": 1.3325376978563412e-05, + "loss": 0.5253, + "step": 28622 + }, + { + "epoch": 0.7859143327841845, + "grad_norm": 0.3807014226913452, + "learning_rate": 1.332496966112316e-05, + "loss": 0.4316, + "step": 28623 + }, + { + "epoch": 0.785941790225151, + "grad_norm": 0.38970303535461426, + "learning_rate": 1.3324562337480773e-05, + "loss": 0.5642, + "step": 28624 + }, + { + "epoch": 0.7859692476661175, + "grad_norm": 0.4067208170890808, + "learning_rate": 1.3324155007637019e-05, + "loss": 0.4722, + "step": 28625 + }, + { + "epoch": 0.785996705107084, + "grad_norm": 0.38186755776405334, + "learning_rate": 1.3323747671592655e-05, + "loss": 0.5537, + "step": 28626 + }, + { + "epoch": 0.7860241625480505, + "grad_norm": 0.35769546031951904, + "learning_rate": 1.3323340329348442e-05, + "loss": 0.4806, + "step": 28627 + }, + { + "epoch": 0.7860516199890171, + "grad_norm": 0.37352874875068665, + "learning_rate": 1.3322932980905137e-05, + "loss": 0.483, + "step": 28628 + }, + { + "epoch": 0.7860790774299835, + "grad_norm": 0.3881298899650574, + "learning_rate": 1.3322525626263501e-05, + "loss": 0.4706, + "step": 28629 + }, + { + "epoch": 0.7861065348709501, + "grad_norm": 0.37725841999053955, + "learning_rate": 1.3322118265424298e-05, + "loss": 0.4764, + "step": 28630 + }, + { + "epoch": 0.7861339923119165, + "grad_norm": 0.3734586536884308, + "learning_rate": 1.3321710898388281e-05, + "loss": 0.4533, + "step": 28631 + }, + { + "epoch": 0.786161449752883, + "grad_norm": 0.38989877700805664, + "learning_rate": 1.3321303525156213e-05, + "loss": 0.501, + "step": 28632 + }, + { + "epoch": 0.7861889071938495, + "grad_norm": 0.36490899324417114, + "learning_rate": 1.3320896145728855e-05, + "loss": 0.5211, + "step": 28633 + }, + { + "epoch": 0.786216364634816, + "grad_norm": 0.4901975393295288, + "learning_rate": 1.3320488760106965e-05, + "loss": 0.549, + "step": 28634 + }, + { + "epoch": 0.7862438220757826, + "grad_norm": 0.3965071439743042, + "learning_rate": 1.3320081368291302e-05, + "loss": 0.496, + "step": 28635 + }, + { + "epoch": 0.786271279516749, + "grad_norm": 0.3885347545146942, + "learning_rate": 1.3319673970282631e-05, + "loss": 0.4955, + "step": 28636 + }, + { + "epoch": 0.7862987369577156, + "grad_norm": 0.45938044786453247, + "learning_rate": 1.3319266566081705e-05, + "loss": 0.4793, + "step": 28637 + }, + { + "epoch": 0.786326194398682, + "grad_norm": 0.3902451992034912, + "learning_rate": 1.331885915568929e-05, + "loss": 0.4388, + "step": 28638 + }, + { + "epoch": 0.7863536518396486, + "grad_norm": 0.3827420771121979, + "learning_rate": 1.3318451739106143e-05, + "loss": 0.4703, + "step": 28639 + }, + { + "epoch": 0.786381109280615, + "grad_norm": 0.3847358226776123, + "learning_rate": 1.3318044316333025e-05, + "loss": 0.4063, + "step": 28640 + }, + { + "epoch": 0.7864085667215815, + "grad_norm": 0.39052614569664, + "learning_rate": 1.3317636887370695e-05, + "loss": 0.4618, + "step": 28641 + }, + { + "epoch": 0.7864360241625481, + "grad_norm": 0.3938121199607849, + "learning_rate": 1.331722945221991e-05, + "loss": 0.5058, + "step": 28642 + }, + { + "epoch": 0.7864634816035145, + "grad_norm": 0.3729912042617798, + "learning_rate": 1.3316822010881439e-05, + "loss": 0.5285, + "step": 28643 + }, + { + "epoch": 0.7864909390444811, + "grad_norm": 0.45940956473350525, + "learning_rate": 1.3316414563356035e-05, + "loss": 0.5992, + "step": 28644 + }, + { + "epoch": 0.7865183964854475, + "grad_norm": 0.43041175603866577, + "learning_rate": 1.3316007109644456e-05, + "loss": 0.5529, + "step": 28645 + }, + { + "epoch": 0.7865458539264141, + "grad_norm": 0.3898368775844574, + "learning_rate": 1.3315599649747468e-05, + "loss": 0.465, + "step": 28646 + }, + { + "epoch": 0.7865733113673805, + "grad_norm": 0.4616767168045044, + "learning_rate": 1.3315192183665826e-05, + "loss": 0.456, + "step": 28647 + }, + { + "epoch": 0.7866007688083471, + "grad_norm": 0.7247854471206665, + "learning_rate": 1.3314784711400295e-05, + "loss": 0.5186, + "step": 28648 + }, + { + "epoch": 0.7866282262493136, + "grad_norm": 0.3668421506881714, + "learning_rate": 1.331437723295163e-05, + "loss": 0.435, + "step": 28649 + }, + { + "epoch": 0.78665568369028, + "grad_norm": 0.41485854983329773, + "learning_rate": 1.3313969748320597e-05, + "loss": 0.4813, + "step": 28650 + }, + { + "epoch": 0.7866831411312466, + "grad_norm": 0.3943430185317993, + "learning_rate": 1.331356225750795e-05, + "loss": 0.4827, + "step": 28651 + }, + { + "epoch": 0.786710598572213, + "grad_norm": 0.4185878336429596, + "learning_rate": 1.3313154760514453e-05, + "loss": 0.5243, + "step": 28652 + }, + { + "epoch": 0.7867380560131796, + "grad_norm": 0.45483964681625366, + "learning_rate": 1.3312747257340865e-05, + "loss": 0.528, + "step": 28653 + }, + { + "epoch": 0.786765513454146, + "grad_norm": 0.343625545501709, + "learning_rate": 1.3312339747987946e-05, + "loss": 0.4616, + "step": 28654 + }, + { + "epoch": 0.7867929708951126, + "grad_norm": 0.3839671313762665, + "learning_rate": 1.3311932232456457e-05, + "loss": 0.5358, + "step": 28655 + }, + { + "epoch": 0.7868204283360791, + "grad_norm": 0.389047771692276, + "learning_rate": 1.3311524710747157e-05, + "loss": 0.5173, + "step": 28656 + }, + { + "epoch": 0.7868478857770456, + "grad_norm": 0.46810394525527954, + "learning_rate": 1.3311117182860805e-05, + "loss": 0.3487, + "step": 28657 + }, + { + "epoch": 0.7868753432180121, + "grad_norm": 0.3342956304550171, + "learning_rate": 1.3310709648798163e-05, + "loss": 0.4087, + "step": 28658 + }, + { + "epoch": 0.7869028006589786, + "grad_norm": 0.40955907106399536, + "learning_rate": 1.3310302108559992e-05, + "loss": 0.4568, + "step": 28659 + }, + { + "epoch": 0.7869302580999451, + "grad_norm": 0.41724738478660583, + "learning_rate": 1.330989456214705e-05, + "loss": 0.5074, + "step": 28660 + }, + { + "epoch": 0.7869577155409115, + "grad_norm": 0.4353211224079132, + "learning_rate": 1.33094870095601e-05, + "loss": 0.5222, + "step": 28661 + }, + { + "epoch": 0.7869851729818781, + "grad_norm": 0.4935241639614105, + "learning_rate": 1.3309079450799898e-05, + "loss": 0.5429, + "step": 28662 + }, + { + "epoch": 0.7870126304228446, + "grad_norm": 0.4105493128299713, + "learning_rate": 1.330867188586721e-05, + "loss": 0.4969, + "step": 28663 + }, + { + "epoch": 0.7870400878638111, + "grad_norm": 0.38226690888404846, + "learning_rate": 1.3308264314762789e-05, + "loss": 0.5021, + "step": 28664 + }, + { + "epoch": 0.7870675453047776, + "grad_norm": 0.43444204330444336, + "learning_rate": 1.33078567374874e-05, + "loss": 0.5607, + "step": 28665 + }, + { + "epoch": 0.7870950027457441, + "grad_norm": 0.4139217734336853, + "learning_rate": 1.3307449154041803e-05, + "loss": 0.4535, + "step": 28666 + }, + { + "epoch": 0.7871224601867106, + "grad_norm": 0.39236581325531006, + "learning_rate": 1.330704156442676e-05, + "loss": 0.5401, + "step": 28667 + }, + { + "epoch": 0.787149917627677, + "grad_norm": 0.39120668172836304, + "learning_rate": 1.3306633968643029e-05, + "loss": 0.4666, + "step": 28668 + }, + { + "epoch": 0.7871773750686436, + "grad_norm": 0.39181020855903625, + "learning_rate": 1.3306226366691367e-05, + "loss": 0.5086, + "step": 28669 + }, + { + "epoch": 0.7872048325096102, + "grad_norm": 0.39238253235816956, + "learning_rate": 1.330581875857254e-05, + "loss": 0.4304, + "step": 28670 + }, + { + "epoch": 0.7872322899505766, + "grad_norm": 0.3857230246067047, + "learning_rate": 1.3305411144287304e-05, + "loss": 0.5496, + "step": 28671 + }, + { + "epoch": 0.7872597473915431, + "grad_norm": 0.3652074933052063, + "learning_rate": 1.3305003523836422e-05, + "loss": 0.4587, + "step": 28672 + }, + { + "epoch": 0.7872872048325096, + "grad_norm": 0.3866010010242462, + "learning_rate": 1.3304595897220653e-05, + "loss": 0.4318, + "step": 28673 + }, + { + "epoch": 0.7873146622734761, + "grad_norm": 0.4507792890071869, + "learning_rate": 1.3304188264440758e-05, + "loss": 0.5441, + "step": 28674 + }, + { + "epoch": 0.7873421197144426, + "grad_norm": 0.37840813398361206, + "learning_rate": 1.33037806254975e-05, + "loss": 0.5038, + "step": 28675 + }, + { + "epoch": 0.7873695771554091, + "grad_norm": 0.38228511810302734, + "learning_rate": 1.3303372980391633e-05, + "loss": 0.4543, + "step": 28676 + }, + { + "epoch": 0.7873970345963757, + "grad_norm": 0.40321415662765503, + "learning_rate": 1.3302965329123925e-05, + "loss": 0.5197, + "step": 28677 + }, + { + "epoch": 0.7874244920373421, + "grad_norm": 0.3747674822807312, + "learning_rate": 1.3302557671695131e-05, + "loss": 0.5046, + "step": 28678 + }, + { + "epoch": 0.7874519494783087, + "grad_norm": 0.5481221079826355, + "learning_rate": 1.330215000810601e-05, + "loss": 0.5062, + "step": 28679 + }, + { + "epoch": 0.7874794069192751, + "grad_norm": 0.37306562066078186, + "learning_rate": 1.3301742338357328e-05, + "loss": 0.4624, + "step": 28680 + }, + { + "epoch": 0.7875068643602416, + "grad_norm": 0.4052174985408783, + "learning_rate": 1.330133466244984e-05, + "loss": 0.5054, + "step": 28681 + }, + { + "epoch": 0.7875343218012081, + "grad_norm": 0.4206315279006958, + "learning_rate": 1.3300926980384315e-05, + "loss": 0.4876, + "step": 28682 + }, + { + "epoch": 0.7875617792421746, + "grad_norm": 0.42313152551651, + "learning_rate": 1.3300519292161502e-05, + "loss": 0.4331, + "step": 28683 + }, + { + "epoch": 0.7875892366831412, + "grad_norm": 0.4104222059249878, + "learning_rate": 1.3300111597782171e-05, + "loss": 0.554, + "step": 28684 + }, + { + "epoch": 0.7876166941241076, + "grad_norm": 0.38474753499031067, + "learning_rate": 1.3299703897247075e-05, + "loss": 0.5117, + "step": 28685 + }, + { + "epoch": 0.7876441515650742, + "grad_norm": 0.6084360480308533, + "learning_rate": 1.329929619055698e-05, + "loss": 0.6342, + "step": 28686 + }, + { + "epoch": 0.7876716090060406, + "grad_norm": 0.4092370867729187, + "learning_rate": 1.3298888477712646e-05, + "loss": 0.5296, + "step": 28687 + }, + { + "epoch": 0.7876990664470072, + "grad_norm": 0.38154953718185425, + "learning_rate": 1.329848075871483e-05, + "loss": 0.5152, + "step": 28688 + }, + { + "epoch": 0.7877265238879736, + "grad_norm": 0.38066861033439636, + "learning_rate": 1.3298073033564296e-05, + "loss": 0.4978, + "step": 28689 + }, + { + "epoch": 0.7877539813289401, + "grad_norm": 0.4489380121231079, + "learning_rate": 1.3297665302261802e-05, + "loss": 0.5354, + "step": 28690 + }, + { + "epoch": 0.7877814387699067, + "grad_norm": 0.3307829797267914, + "learning_rate": 1.3297257564808108e-05, + "loss": 0.3817, + "step": 28691 + }, + { + "epoch": 0.7878088962108731, + "grad_norm": 0.39020416140556335, + "learning_rate": 1.329684982120398e-05, + "loss": 0.4815, + "step": 28692 + }, + { + "epoch": 0.7878363536518397, + "grad_norm": 0.39014992117881775, + "learning_rate": 1.3296442071450175e-05, + "loss": 0.3909, + "step": 28693 + }, + { + "epoch": 0.7878638110928061, + "grad_norm": 0.37285593152046204, + "learning_rate": 1.329603431554745e-05, + "loss": 0.4063, + "step": 28694 + }, + { + "epoch": 0.7878912685337727, + "grad_norm": 0.441509485244751, + "learning_rate": 1.3295626553496573e-05, + "loss": 0.5577, + "step": 28695 + }, + { + "epoch": 0.7879187259747391, + "grad_norm": 0.35273265838623047, + "learning_rate": 1.3295218785298296e-05, + "loss": 0.4714, + "step": 28696 + }, + { + "epoch": 0.7879461834157057, + "grad_norm": 0.696442186832428, + "learning_rate": 1.3294811010953388e-05, + "loss": 0.5854, + "step": 28697 + }, + { + "epoch": 0.7879736408566722, + "grad_norm": 0.374620646238327, + "learning_rate": 1.3294403230462606e-05, + "loss": 0.4804, + "step": 28698 + }, + { + "epoch": 0.7880010982976386, + "grad_norm": 0.4309881329536438, + "learning_rate": 1.3293995443826706e-05, + "loss": 0.5358, + "step": 28699 + }, + { + "epoch": 0.7880285557386052, + "grad_norm": 0.3447522521018982, + "learning_rate": 1.3293587651046458e-05, + "loss": 0.3828, + "step": 28700 + }, + { + "epoch": 0.7880560131795716, + "grad_norm": 0.4072777330875397, + "learning_rate": 1.3293179852122613e-05, + "loss": 0.4494, + "step": 28701 + }, + { + "epoch": 0.7880834706205382, + "grad_norm": 0.3874247074127197, + "learning_rate": 1.3292772047055942e-05, + "loss": 0.4562, + "step": 28702 + }, + { + "epoch": 0.7881109280615046, + "grad_norm": 0.3823643922805786, + "learning_rate": 1.3292364235847196e-05, + "loss": 0.3909, + "step": 28703 + }, + { + "epoch": 0.7881383855024712, + "grad_norm": 0.3705933392047882, + "learning_rate": 1.3291956418497143e-05, + "loss": 0.5745, + "step": 28704 + }, + { + "epoch": 0.7881658429434377, + "grad_norm": 0.4128609001636505, + "learning_rate": 1.329154859500654e-05, + "loss": 0.4475, + "step": 28705 + }, + { + "epoch": 0.7881933003844042, + "grad_norm": 0.3666200637817383, + "learning_rate": 1.3291140765376147e-05, + "loss": 0.4679, + "step": 28706 + }, + { + "epoch": 0.7882207578253707, + "grad_norm": 0.3920372724533081, + "learning_rate": 1.3290732929606727e-05, + "loss": 0.5515, + "step": 28707 + }, + { + "epoch": 0.7882482152663371, + "grad_norm": 0.3892602324485779, + "learning_rate": 1.3290325087699039e-05, + "loss": 0.6272, + "step": 28708 + }, + { + "epoch": 0.7882756727073037, + "grad_norm": 0.40660855174064636, + "learning_rate": 1.3289917239653845e-05, + "loss": 0.5492, + "step": 28709 + }, + { + "epoch": 0.7883031301482701, + "grad_norm": 0.3779171407222748, + "learning_rate": 1.3289509385471906e-05, + "loss": 0.4956, + "step": 28710 + }, + { + "epoch": 0.7883305875892367, + "grad_norm": 0.7255706787109375, + "learning_rate": 1.3289101525153982e-05, + "loss": 0.4457, + "step": 28711 + }, + { + "epoch": 0.7883580450302032, + "grad_norm": 0.540687084197998, + "learning_rate": 1.3288693658700833e-05, + "loss": 0.5702, + "step": 28712 + }, + { + "epoch": 0.7883855024711697, + "grad_norm": 0.40479639172554016, + "learning_rate": 1.3288285786113221e-05, + "loss": 0.5408, + "step": 28713 + }, + { + "epoch": 0.7884129599121362, + "grad_norm": 0.3845753073692322, + "learning_rate": 1.3287877907391906e-05, + "loss": 0.444, + "step": 28714 + }, + { + "epoch": 0.7884404173531027, + "grad_norm": 0.3765038549900055, + "learning_rate": 1.328747002253765e-05, + "loss": 0.4872, + "step": 28715 + }, + { + "epoch": 0.7884678747940692, + "grad_norm": 0.3913820683956146, + "learning_rate": 1.3287062131551214e-05, + "loss": 0.5165, + "step": 28716 + }, + { + "epoch": 0.7884953322350357, + "grad_norm": 0.34332478046417236, + "learning_rate": 1.3286654234433356e-05, + "loss": 0.4469, + "step": 28717 + }, + { + "epoch": 0.7885227896760022, + "grad_norm": 0.4851280152797699, + "learning_rate": 1.328624633118484e-05, + "loss": 0.5554, + "step": 28718 + }, + { + "epoch": 0.7885502471169687, + "grad_norm": 0.5946874618530273, + "learning_rate": 1.3285838421806426e-05, + "loss": 0.4878, + "step": 28719 + }, + { + "epoch": 0.7885777045579352, + "grad_norm": 0.35871437191963196, + "learning_rate": 1.3285430506298876e-05, + "loss": 0.4648, + "step": 28720 + }, + { + "epoch": 0.7886051619989017, + "grad_norm": 0.4237557053565979, + "learning_rate": 1.3285022584662948e-05, + "loss": 0.5119, + "step": 28721 + }, + { + "epoch": 0.7886326194398682, + "grad_norm": 0.4222288131713867, + "learning_rate": 1.3284614656899402e-05, + "loss": 0.5272, + "step": 28722 + }, + { + "epoch": 0.7886600768808347, + "grad_norm": 0.34645676612854004, + "learning_rate": 1.3284206723009006e-05, + "loss": 0.4642, + "step": 28723 + }, + { + "epoch": 0.7886875343218012, + "grad_norm": 0.36718112230300903, + "learning_rate": 1.3283798782992514e-05, + "loss": 0.522, + "step": 28724 + }, + { + "epoch": 0.7887149917627677, + "grad_norm": 0.39704757928848267, + "learning_rate": 1.3283390836850686e-05, + "loss": 0.5583, + "step": 28725 + }, + { + "epoch": 0.7887424492037343, + "grad_norm": 0.37737664580345154, + "learning_rate": 1.328298288458429e-05, + "loss": 0.4623, + "step": 28726 + }, + { + "epoch": 0.7887699066447007, + "grad_norm": 0.4209991693496704, + "learning_rate": 1.3282574926194083e-05, + "loss": 0.5249, + "step": 28727 + }, + { + "epoch": 0.7887973640856673, + "grad_norm": 0.34382298588752747, + "learning_rate": 1.3282166961680827e-05, + "loss": 0.4697, + "step": 28728 + }, + { + "epoch": 0.7888248215266337, + "grad_norm": 0.3796181380748749, + "learning_rate": 1.328175899104528e-05, + "loss": 0.4721, + "step": 28729 + }, + { + "epoch": 0.7888522789676002, + "grad_norm": 0.364887535572052, + "learning_rate": 1.3281351014288205e-05, + "loss": 0.3642, + "step": 28730 + }, + { + "epoch": 0.7888797364085667, + "grad_norm": 0.347625732421875, + "learning_rate": 1.3280943031410365e-05, + "loss": 0.4582, + "step": 28731 + }, + { + "epoch": 0.7889071938495332, + "grad_norm": 0.3529820144176483, + "learning_rate": 1.3280535042412515e-05, + "loss": 0.3773, + "step": 28732 + }, + { + "epoch": 0.7889346512904998, + "grad_norm": 0.35188260674476624, + "learning_rate": 1.3280127047295424e-05, + "loss": 0.4708, + "step": 28733 + }, + { + "epoch": 0.7889621087314662, + "grad_norm": 0.38121113181114197, + "learning_rate": 1.3279719046059849e-05, + "loss": 0.531, + "step": 28734 + }, + { + "epoch": 0.7889895661724328, + "grad_norm": 0.4306797981262207, + "learning_rate": 1.3279311038706548e-05, + "loss": 0.4604, + "step": 28735 + }, + { + "epoch": 0.7890170236133992, + "grad_norm": 0.3811364471912384, + "learning_rate": 1.3278903025236289e-05, + "loss": 0.3897, + "step": 28736 + }, + { + "epoch": 0.7890444810543658, + "grad_norm": 0.3542564809322357, + "learning_rate": 1.327849500564983e-05, + "loss": 0.4293, + "step": 28737 + }, + { + "epoch": 0.7890719384953322, + "grad_norm": 0.3825948238372803, + "learning_rate": 1.3278086979947927e-05, + "loss": 0.4534, + "step": 28738 + }, + { + "epoch": 0.7890993959362987, + "grad_norm": 0.3584884703159332, + "learning_rate": 1.3277678948131349e-05, + "loss": 0.5225, + "step": 28739 + }, + { + "epoch": 0.7891268533772653, + "grad_norm": 0.3880850672721863, + "learning_rate": 1.327727091020085e-05, + "loss": 0.4346, + "step": 28740 + }, + { + "epoch": 0.7891543108182317, + "grad_norm": 0.3914419114589691, + "learning_rate": 1.3276862866157199e-05, + "loss": 0.4989, + "step": 28741 + }, + { + "epoch": 0.7891817682591983, + "grad_norm": 0.40792304277420044, + "learning_rate": 1.3276454816001151e-05, + "loss": 0.5285, + "step": 28742 + }, + { + "epoch": 0.7892092257001647, + "grad_norm": 0.38914453983306885, + "learning_rate": 1.3276046759733468e-05, + "loss": 0.4984, + "step": 28743 + }, + { + "epoch": 0.7892366831411313, + "grad_norm": 0.4196838140487671, + "learning_rate": 1.3275638697354915e-05, + "loss": 0.4622, + "step": 28744 + }, + { + "epoch": 0.7892641405820977, + "grad_norm": 0.4279472827911377, + "learning_rate": 1.3275230628866247e-05, + "loss": 0.4798, + "step": 28745 + }, + { + "epoch": 0.7892915980230643, + "grad_norm": 0.3705406188964844, + "learning_rate": 1.3274822554268232e-05, + "loss": 0.4879, + "step": 28746 + }, + { + "epoch": 0.7893190554640308, + "grad_norm": 0.38727065920829773, + "learning_rate": 1.3274414473561628e-05, + "loss": 0.4768, + "step": 28747 + }, + { + "epoch": 0.7893465129049972, + "grad_norm": 0.34978559613227844, + "learning_rate": 1.3274006386747192e-05, + "loss": 0.471, + "step": 28748 + }, + { + "epoch": 0.7893739703459638, + "grad_norm": 0.41685059666633606, + "learning_rate": 1.3273598293825691e-05, + "loss": 0.5586, + "step": 28749 + }, + { + "epoch": 0.7894014277869302, + "grad_norm": 0.4733380675315857, + "learning_rate": 1.3273190194797882e-05, + "loss": 0.5458, + "step": 28750 + }, + { + "epoch": 0.7894288852278968, + "grad_norm": 0.40309932827949524, + "learning_rate": 1.3272782089664533e-05, + "loss": 0.5719, + "step": 28751 + }, + { + "epoch": 0.7894563426688632, + "grad_norm": 0.45125052332878113, + "learning_rate": 1.32723739784264e-05, + "loss": 0.5269, + "step": 28752 + }, + { + "epoch": 0.7894838001098298, + "grad_norm": 0.3974987864494324, + "learning_rate": 1.3271965861084243e-05, + "loss": 0.5641, + "step": 28753 + }, + { + "epoch": 0.7895112575507963, + "grad_norm": 0.41609787940979004, + "learning_rate": 1.3271557737638828e-05, + "loss": 0.4734, + "step": 28754 + }, + { + "epoch": 0.7895387149917628, + "grad_norm": 0.3417462408542633, + "learning_rate": 1.3271149608090912e-05, + "loss": 0.4477, + "step": 28755 + }, + { + "epoch": 0.7895661724327293, + "grad_norm": 0.3700108826160431, + "learning_rate": 1.3270741472441258e-05, + "loss": 0.5435, + "step": 28756 + }, + { + "epoch": 0.7895936298736957, + "grad_norm": 0.41643938422203064, + "learning_rate": 1.3270333330690629e-05, + "loss": 0.4562, + "step": 28757 + }, + { + "epoch": 0.7896210873146623, + "grad_norm": 0.3777387738227844, + "learning_rate": 1.326992518283978e-05, + "loss": 0.4687, + "step": 28758 + }, + { + "epoch": 0.7896485447556287, + "grad_norm": 0.4808649718761444, + "learning_rate": 1.326951702888948e-05, + "loss": 0.5712, + "step": 28759 + }, + { + "epoch": 0.7896760021965953, + "grad_norm": 0.37809059023857117, + "learning_rate": 1.3269108868840488e-05, + "loss": 0.5197, + "step": 28760 + }, + { + "epoch": 0.7897034596375618, + "grad_norm": 0.6567462086677551, + "learning_rate": 1.3268700702693561e-05, + "loss": 0.5087, + "step": 28761 + }, + { + "epoch": 0.7897309170785283, + "grad_norm": 0.37763139605522156, + "learning_rate": 1.326829253044947e-05, + "loss": 0.5108, + "step": 28762 + }, + { + "epoch": 0.7897583745194948, + "grad_norm": 0.36527568101882935, + "learning_rate": 1.3267884352108964e-05, + "loss": 0.4703, + "step": 28763 + }, + { + "epoch": 0.7897858319604613, + "grad_norm": 0.414664089679718, + "learning_rate": 1.3267476167672815e-05, + "loss": 0.5171, + "step": 28764 + }, + { + "epoch": 0.7898132894014278, + "grad_norm": 0.3772841989994049, + "learning_rate": 1.326706797714178e-05, + "loss": 0.4724, + "step": 28765 + }, + { + "epoch": 0.7898407468423942, + "grad_norm": 0.44333040714263916, + "learning_rate": 1.3266659780516618e-05, + "loss": 0.5378, + "step": 28766 + }, + { + "epoch": 0.7898682042833608, + "grad_norm": 0.3829750716686249, + "learning_rate": 1.3266251577798095e-05, + "loss": 0.5229, + "step": 28767 + }, + { + "epoch": 0.7898956617243273, + "grad_norm": 0.4057861864566803, + "learning_rate": 1.3265843368986971e-05, + "loss": 0.4957, + "step": 28768 + }, + { + "epoch": 0.7899231191652938, + "grad_norm": 0.35993650555610657, + "learning_rate": 1.3265435154084003e-05, + "loss": 0.4533, + "step": 28769 + }, + { + "epoch": 0.7899505766062603, + "grad_norm": 0.3637941777706146, + "learning_rate": 1.326502693308996e-05, + "loss": 0.561, + "step": 28770 + }, + { + "epoch": 0.7899780340472268, + "grad_norm": 0.37555134296417236, + "learning_rate": 1.3264618706005595e-05, + "loss": 0.5442, + "step": 28771 + }, + { + "epoch": 0.7900054914881933, + "grad_norm": 0.38919374346733093, + "learning_rate": 1.3264210472831679e-05, + "loss": 0.5717, + "step": 28772 + }, + { + "epoch": 0.7900329489291598, + "grad_norm": 0.4083525240421295, + "learning_rate": 1.3263802233568968e-05, + "loss": 0.4594, + "step": 28773 + }, + { + "epoch": 0.7900604063701263, + "grad_norm": 0.3497200310230255, + "learning_rate": 1.326339398821822e-05, + "loss": 0.5545, + "step": 28774 + }, + { + "epoch": 0.7900878638110929, + "grad_norm": 0.440510094165802, + "learning_rate": 1.3262985736780205e-05, + "loss": 0.5471, + "step": 28775 + }, + { + "epoch": 0.7901153212520593, + "grad_norm": 0.41741400957107544, + "learning_rate": 1.3262577479255678e-05, + "loss": 0.5417, + "step": 28776 + }, + { + "epoch": 0.7901427786930258, + "grad_norm": 0.38766446709632874, + "learning_rate": 1.3262169215645405e-05, + "loss": 0.4905, + "step": 28777 + }, + { + "epoch": 0.7901702361339923, + "grad_norm": 0.36024513840675354, + "learning_rate": 1.3261760945950144e-05, + "loss": 0.386, + "step": 28778 + }, + { + "epoch": 0.7901976935749588, + "grad_norm": 0.47505369782447815, + "learning_rate": 1.3261352670170657e-05, + "loss": 0.5368, + "step": 28779 + }, + { + "epoch": 0.7902251510159253, + "grad_norm": 0.4676710367202759, + "learning_rate": 1.3260944388307708e-05, + "loss": 0.6109, + "step": 28780 + }, + { + "epoch": 0.7902526084568918, + "grad_norm": 0.37154754996299744, + "learning_rate": 1.3260536100362055e-05, + "loss": 0.4531, + "step": 28781 + }, + { + "epoch": 0.7902800658978584, + "grad_norm": 0.34991809725761414, + "learning_rate": 1.3260127806334463e-05, + "loss": 0.4542, + "step": 28782 + }, + { + "epoch": 0.7903075233388248, + "grad_norm": 0.3522988557815552, + "learning_rate": 1.3259719506225692e-05, + "loss": 0.479, + "step": 28783 + }, + { + "epoch": 0.7903349807797914, + "grad_norm": 0.3739938735961914, + "learning_rate": 1.3259311200036502e-05, + "loss": 0.4743, + "step": 28784 + }, + { + "epoch": 0.7903624382207578, + "grad_norm": 0.362521767616272, + "learning_rate": 1.325890288776766e-05, + "loss": 0.4635, + "step": 28785 + }, + { + "epoch": 0.7903898956617244, + "grad_norm": 0.33983349800109863, + "learning_rate": 1.3258494569419922e-05, + "loss": 0.4133, + "step": 28786 + }, + { + "epoch": 0.7904173531026908, + "grad_norm": 0.35386502742767334, + "learning_rate": 1.325808624499405e-05, + "loss": 0.4941, + "step": 28787 + }, + { + "epoch": 0.7904448105436573, + "grad_norm": 0.4481097161769867, + "learning_rate": 1.325767791449081e-05, + "loss": 0.5186, + "step": 28788 + }, + { + "epoch": 0.7904722679846238, + "grad_norm": 0.4191543459892273, + "learning_rate": 1.3257269577910959e-05, + "loss": 0.4725, + "step": 28789 + }, + { + "epoch": 0.7904997254255903, + "grad_norm": 0.3973647654056549, + "learning_rate": 1.3256861235255261e-05, + "loss": 0.4125, + "step": 28790 + }, + { + "epoch": 0.7905271828665569, + "grad_norm": 0.3925064206123352, + "learning_rate": 1.3256452886524477e-05, + "loss": 0.4629, + "step": 28791 + }, + { + "epoch": 0.7905546403075233, + "grad_norm": 0.33964601159095764, + "learning_rate": 1.325604453171937e-05, + "loss": 0.4058, + "step": 28792 + }, + { + "epoch": 0.7905820977484899, + "grad_norm": 0.3849335014820099, + "learning_rate": 1.32556361708407e-05, + "loss": 0.5544, + "step": 28793 + }, + { + "epoch": 0.7906095551894563, + "grad_norm": 0.37691089510917664, + "learning_rate": 1.3255227803889228e-05, + "loss": 0.4062, + "step": 28794 + }, + { + "epoch": 0.7906370126304229, + "grad_norm": 0.4426472783088684, + "learning_rate": 1.325481943086572e-05, + "loss": 0.5622, + "step": 28795 + }, + { + "epoch": 0.7906644700713893, + "grad_norm": 0.4006551504135132, + "learning_rate": 1.3254411051770935e-05, + "loss": 0.5441, + "step": 28796 + }, + { + "epoch": 0.7906919275123558, + "grad_norm": 0.42031607031822205, + "learning_rate": 1.3254002666605632e-05, + "loss": 0.4502, + "step": 28797 + }, + { + "epoch": 0.7907193849533224, + "grad_norm": 0.3763772249221802, + "learning_rate": 1.3253594275370579e-05, + "loss": 0.5356, + "step": 28798 + }, + { + "epoch": 0.7907468423942888, + "grad_norm": 0.4517800211906433, + "learning_rate": 1.3253185878066533e-05, + "loss": 0.5563, + "step": 28799 + }, + { + "epoch": 0.7907742998352554, + "grad_norm": 0.383159875869751, + "learning_rate": 1.3252777474694253e-05, + "loss": 0.4516, + "step": 28800 + }, + { + "epoch": 0.7908017572762218, + "grad_norm": 0.3485627770423889, + "learning_rate": 1.325236906525451e-05, + "loss": 0.4104, + "step": 28801 + }, + { + "epoch": 0.7908292147171884, + "grad_norm": 0.40728214383125305, + "learning_rate": 1.3251960649748059e-05, + "loss": 0.4653, + "step": 28802 + }, + { + "epoch": 0.7908566721581548, + "grad_norm": 0.393544465303421, + "learning_rate": 1.3251552228175664e-05, + "loss": 0.5353, + "step": 28803 + }, + { + "epoch": 0.7908841295991214, + "grad_norm": 0.3621717691421509, + "learning_rate": 1.3251143800538086e-05, + "loss": 0.4772, + "step": 28804 + }, + { + "epoch": 0.7909115870400879, + "grad_norm": 0.41948407888412476, + "learning_rate": 1.3250735366836086e-05, + "loss": 0.5562, + "step": 28805 + }, + { + "epoch": 0.7909390444810543, + "grad_norm": 0.40144652128219604, + "learning_rate": 1.3250326927070429e-05, + "loss": 0.4751, + "step": 28806 + }, + { + "epoch": 0.7909665019220209, + "grad_norm": 0.3527287542819977, + "learning_rate": 1.3249918481241873e-05, + "loss": 0.5219, + "step": 28807 + }, + { + "epoch": 0.7909939593629873, + "grad_norm": 0.346693217754364, + "learning_rate": 1.3249510029351182e-05, + "loss": 0.4092, + "step": 28808 + }, + { + "epoch": 0.7910214168039539, + "grad_norm": 0.3968033790588379, + "learning_rate": 1.3249101571399118e-05, + "loss": 0.4977, + "step": 28809 + }, + { + "epoch": 0.7910488742449203, + "grad_norm": 0.4277237355709076, + "learning_rate": 1.3248693107386444e-05, + "loss": 0.5526, + "step": 28810 + }, + { + "epoch": 0.7910763316858869, + "grad_norm": 0.42285820841789246, + "learning_rate": 1.324828463731392e-05, + "loss": 0.5252, + "step": 28811 + }, + { + "epoch": 0.7911037891268534, + "grad_norm": 0.3829896152019501, + "learning_rate": 1.3247876161182308e-05, + "loss": 0.4626, + "step": 28812 + }, + { + "epoch": 0.7911312465678199, + "grad_norm": 0.35425707697868347, + "learning_rate": 1.3247467678992369e-05, + "loss": 0.479, + "step": 28813 + }, + { + "epoch": 0.7911587040087864, + "grad_norm": 0.4092322587966919, + "learning_rate": 1.3247059190744869e-05, + "loss": 0.5027, + "step": 28814 + }, + { + "epoch": 0.7911861614497528, + "grad_norm": 0.4137939512729645, + "learning_rate": 1.3246650696440563e-05, + "loss": 0.5203, + "step": 28815 + }, + { + "epoch": 0.7912136188907194, + "grad_norm": 0.5959417819976807, + "learning_rate": 1.3246242196080221e-05, + "loss": 0.4806, + "step": 28816 + }, + { + "epoch": 0.7912410763316858, + "grad_norm": 0.38896676898002625, + "learning_rate": 1.3245833689664602e-05, + "loss": 0.4062, + "step": 28817 + }, + { + "epoch": 0.7912685337726524, + "grad_norm": 0.4118989109992981, + "learning_rate": 1.3245425177194465e-05, + "loss": 0.4699, + "step": 28818 + }, + { + "epoch": 0.7912959912136189, + "grad_norm": 0.351879358291626, + "learning_rate": 1.3245016658670573e-05, + "loss": 0.4054, + "step": 28819 + }, + { + "epoch": 0.7913234486545854, + "grad_norm": 0.3627587854862213, + "learning_rate": 1.324460813409369e-05, + "loss": 0.4361, + "step": 28820 + }, + { + "epoch": 0.7913509060955519, + "grad_norm": 0.38015952706336975, + "learning_rate": 1.3244199603464581e-05, + "loss": 0.5092, + "step": 28821 + }, + { + "epoch": 0.7913783635365184, + "grad_norm": 0.4279283881187439, + "learning_rate": 1.3243791066784e-05, + "loss": 0.4967, + "step": 28822 + }, + { + "epoch": 0.7914058209774849, + "grad_norm": 0.3671358823776245, + "learning_rate": 1.3243382524052715e-05, + "loss": 0.5393, + "step": 28823 + }, + { + "epoch": 0.7914332784184513, + "grad_norm": 0.39224401116371155, + "learning_rate": 1.3242973975271487e-05, + "loss": 0.5066, + "step": 28824 + }, + { + "epoch": 0.7914607358594179, + "grad_norm": 0.40928715467453003, + "learning_rate": 1.3242565420441075e-05, + "loss": 0.6175, + "step": 28825 + }, + { + "epoch": 0.7914881933003844, + "grad_norm": 0.41364970803260803, + "learning_rate": 1.3242156859562245e-05, + "loss": 0.5509, + "step": 28826 + }, + { + "epoch": 0.7915156507413509, + "grad_norm": 0.4603220522403717, + "learning_rate": 1.3241748292635758e-05, + "loss": 0.5254, + "step": 28827 + }, + { + "epoch": 0.7915431081823174, + "grad_norm": 0.402834415435791, + "learning_rate": 1.3241339719662377e-05, + "loss": 0.5306, + "step": 28828 + }, + { + "epoch": 0.7915705656232839, + "grad_norm": 0.37796106934547424, + "learning_rate": 1.324093114064286e-05, + "loss": 0.4772, + "step": 28829 + }, + { + "epoch": 0.7915980230642504, + "grad_norm": 0.44534188508987427, + "learning_rate": 1.3240522555577975e-05, + "loss": 0.5066, + "step": 28830 + }, + { + "epoch": 0.7916254805052169, + "grad_norm": 0.3886604905128479, + "learning_rate": 1.324011396446848e-05, + "loss": 0.4695, + "step": 28831 + }, + { + "epoch": 0.7916529379461834, + "grad_norm": 0.3656606674194336, + "learning_rate": 1.323970536731514e-05, + "loss": 0.4217, + "step": 28832 + }, + { + "epoch": 0.79168039538715, + "grad_norm": 0.414870947599411, + "learning_rate": 1.3239296764118713e-05, + "loss": 0.5656, + "step": 28833 + }, + { + "epoch": 0.7917078528281164, + "grad_norm": 0.49546369910240173, + "learning_rate": 1.3238888154879963e-05, + "loss": 0.5589, + "step": 28834 + }, + { + "epoch": 0.791735310269083, + "grad_norm": 0.3685397505760193, + "learning_rate": 1.3238479539599654e-05, + "loss": 0.4269, + "step": 28835 + }, + { + "epoch": 0.7917627677100494, + "grad_norm": 0.39196398854255676, + "learning_rate": 1.3238070918278548e-05, + "loss": 0.6068, + "step": 28836 + }, + { + "epoch": 0.7917902251510159, + "grad_norm": 0.3487417995929718, + "learning_rate": 1.3237662290917406e-05, + "loss": 0.4564, + "step": 28837 + }, + { + "epoch": 0.7918176825919824, + "grad_norm": 0.4145239591598511, + "learning_rate": 1.3237253657516988e-05, + "loss": 0.5148, + "step": 28838 + }, + { + "epoch": 0.7918451400329489, + "grad_norm": 0.36039966344833374, + "learning_rate": 1.3236845018078061e-05, + "loss": 0.5154, + "step": 28839 + }, + { + "epoch": 0.7918725974739155, + "grad_norm": 0.3527248799800873, + "learning_rate": 1.3236436372601385e-05, + "loss": 0.534, + "step": 28840 + }, + { + "epoch": 0.7919000549148819, + "grad_norm": 0.38374635577201843, + "learning_rate": 1.3236027721087724e-05, + "loss": 0.4675, + "step": 28841 + }, + { + "epoch": 0.7919275123558485, + "grad_norm": 0.374239057302475, + "learning_rate": 1.3235619063537835e-05, + "loss": 0.547, + "step": 28842 + }, + { + "epoch": 0.7919549697968149, + "grad_norm": 0.35949474573135376, + "learning_rate": 1.3235210399952485e-05, + "loss": 0.5014, + "step": 28843 + }, + { + "epoch": 0.7919824272377815, + "grad_norm": 0.4086843430995941, + "learning_rate": 1.3234801730332436e-05, + "loss": 0.5773, + "step": 28844 + }, + { + "epoch": 0.7920098846787479, + "grad_norm": 0.366864413022995, + "learning_rate": 1.3234393054678447e-05, + "loss": 0.4851, + "step": 28845 + }, + { + "epoch": 0.7920373421197144, + "grad_norm": 0.37816229462623596, + "learning_rate": 1.3233984372991284e-05, + "loss": 0.4902, + "step": 28846 + }, + { + "epoch": 0.792064799560681, + "grad_norm": 0.38824498653411865, + "learning_rate": 1.3233575685271709e-05, + "loss": 0.5251, + "step": 28847 + }, + { + "epoch": 0.7920922570016474, + "grad_norm": 0.41435888409614563, + "learning_rate": 1.3233166991520482e-05, + "loss": 0.6081, + "step": 28848 + }, + { + "epoch": 0.792119714442614, + "grad_norm": 0.39882898330688477, + "learning_rate": 1.3232758291738367e-05, + "loss": 0.4776, + "step": 28849 + }, + { + "epoch": 0.7921471718835804, + "grad_norm": 0.3463805615901947, + "learning_rate": 1.3232349585926128e-05, + "loss": 0.4811, + "step": 28850 + }, + { + "epoch": 0.792174629324547, + "grad_norm": 0.33547157049179077, + "learning_rate": 1.323194087408452e-05, + "loss": 0.3953, + "step": 28851 + }, + { + "epoch": 0.7922020867655134, + "grad_norm": 0.4081627130508423, + "learning_rate": 1.3231532156214315e-05, + "loss": 0.4782, + "step": 28852 + }, + { + "epoch": 0.79222954420648, + "grad_norm": 0.3658817708492279, + "learning_rate": 1.3231123432316272e-05, + "loss": 0.4957, + "step": 28853 + }, + { + "epoch": 0.7922570016474465, + "grad_norm": 0.3836922347545624, + "learning_rate": 1.3230714702391152e-05, + "loss": 0.5295, + "step": 28854 + }, + { + "epoch": 0.7922844590884129, + "grad_norm": 0.37357601523399353, + "learning_rate": 1.3230305966439715e-05, + "loss": 0.4502, + "step": 28855 + }, + { + "epoch": 0.7923119165293795, + "grad_norm": 0.36162295937538147, + "learning_rate": 1.3229897224462728e-05, + "loss": 0.4605, + "step": 28856 + }, + { + "epoch": 0.7923393739703459, + "grad_norm": 0.40765616297721863, + "learning_rate": 1.3229488476460952e-05, + "loss": 0.497, + "step": 28857 + }, + { + "epoch": 0.7923668314113125, + "grad_norm": 0.3485613167285919, + "learning_rate": 1.322907972243515e-05, + "loss": 0.5149, + "step": 28858 + }, + { + "epoch": 0.7923942888522789, + "grad_norm": 0.42321762442588806, + "learning_rate": 1.3228670962386084e-05, + "loss": 0.4979, + "step": 28859 + }, + { + "epoch": 0.7924217462932455, + "grad_norm": 0.3583534061908722, + "learning_rate": 1.3228262196314515e-05, + "loss": 0.4873, + "step": 28860 + }, + { + "epoch": 0.792449203734212, + "grad_norm": 0.42862364649772644, + "learning_rate": 1.3227853424221206e-05, + "loss": 0.4781, + "step": 28861 + }, + { + "epoch": 0.7924766611751785, + "grad_norm": 0.4304666817188263, + "learning_rate": 1.3227444646106922e-05, + "loss": 0.5133, + "step": 28862 + }, + { + "epoch": 0.792504118616145, + "grad_norm": 0.38164833188056946, + "learning_rate": 1.3227035861972421e-05, + "loss": 0.489, + "step": 28863 + }, + { + "epoch": 0.7925315760571114, + "grad_norm": 0.33918261528015137, + "learning_rate": 1.322662707181847e-05, + "loss": 0.4562, + "step": 28864 + }, + { + "epoch": 0.792559033498078, + "grad_norm": 0.38227182626724243, + "learning_rate": 1.322621827564583e-05, + "loss": 0.4261, + "step": 28865 + }, + { + "epoch": 0.7925864909390444, + "grad_norm": 0.39893192052841187, + "learning_rate": 1.3225809473455264e-05, + "loss": 0.542, + "step": 28866 + }, + { + "epoch": 0.792613948380011, + "grad_norm": 0.4717659056186676, + "learning_rate": 1.3225400665247532e-05, + "loss": 0.506, + "step": 28867 + }, + { + "epoch": 0.7926414058209775, + "grad_norm": 0.37798482179641724, + "learning_rate": 1.3224991851023399e-05, + "loss": 0.4592, + "step": 28868 + }, + { + "epoch": 0.792668863261944, + "grad_norm": 0.36725515127182007, + "learning_rate": 1.3224583030783626e-05, + "loss": 0.4341, + "step": 28869 + }, + { + "epoch": 0.7926963207029105, + "grad_norm": 0.4269859492778778, + "learning_rate": 1.3224174204528978e-05, + "loss": 0.48, + "step": 28870 + }, + { + "epoch": 0.792723778143877, + "grad_norm": 0.4400424361228943, + "learning_rate": 1.3223765372260216e-05, + "loss": 0.4848, + "step": 28871 + }, + { + "epoch": 0.7927512355848435, + "grad_norm": 0.3910837173461914, + "learning_rate": 1.32233565339781e-05, + "loss": 0.4968, + "step": 28872 + }, + { + "epoch": 0.7927786930258099, + "grad_norm": 0.39259791374206543, + "learning_rate": 1.3222947689683398e-05, + "loss": 0.5624, + "step": 28873 + }, + { + "epoch": 0.7928061504667765, + "grad_norm": 0.35314613580703735, + "learning_rate": 1.322253883937687e-05, + "loss": 0.3769, + "step": 28874 + }, + { + "epoch": 0.792833607907743, + "grad_norm": 0.4707760810852051, + "learning_rate": 1.3222129983059277e-05, + "loss": 0.5189, + "step": 28875 + }, + { + "epoch": 0.7928610653487095, + "grad_norm": 0.456609845161438, + "learning_rate": 1.3221721120731385e-05, + "loss": 0.5191, + "step": 28876 + }, + { + "epoch": 0.792888522789676, + "grad_norm": 0.44897323846817017, + "learning_rate": 1.3221312252393954e-05, + "loss": 0.5007, + "step": 28877 + }, + { + "epoch": 0.7929159802306425, + "grad_norm": 0.3793288767337799, + "learning_rate": 1.3220903378047747e-05, + "loss": 0.4678, + "step": 28878 + }, + { + "epoch": 0.792943437671609, + "grad_norm": 0.37855416536331177, + "learning_rate": 1.3220494497693527e-05, + "loss": 0.4795, + "step": 28879 + }, + { + "epoch": 0.7929708951125755, + "grad_norm": 0.3419977128505707, + "learning_rate": 1.3220085611332058e-05, + "loss": 0.4603, + "step": 28880 + }, + { + "epoch": 0.792998352553542, + "grad_norm": 0.37966781854629517, + "learning_rate": 1.3219676718964103e-05, + "loss": 0.4587, + "step": 28881 + }, + { + "epoch": 0.7930258099945086, + "grad_norm": 0.4326576888561249, + "learning_rate": 1.321926782059042e-05, + "loss": 0.4851, + "step": 28882 + }, + { + "epoch": 0.793053267435475, + "grad_norm": 0.39535120129585266, + "learning_rate": 1.3218858916211776e-05, + "loss": 0.4479, + "step": 28883 + }, + { + "epoch": 0.7930807248764415, + "grad_norm": 0.39760252833366394, + "learning_rate": 1.3218450005828936e-05, + "loss": 0.4515, + "step": 28884 + }, + { + "epoch": 0.793108182317408, + "grad_norm": 0.47074970602989197, + "learning_rate": 1.3218041089442654e-05, + "loss": 0.5293, + "step": 28885 + }, + { + "epoch": 0.7931356397583745, + "grad_norm": 0.37029096484184265, + "learning_rate": 1.3217632167053703e-05, + "loss": 0.4433, + "step": 28886 + }, + { + "epoch": 0.793163097199341, + "grad_norm": 0.3818627893924713, + "learning_rate": 1.3217223238662838e-05, + "loss": 0.4652, + "step": 28887 + }, + { + "epoch": 0.7931905546403075, + "grad_norm": 0.3991394639015198, + "learning_rate": 1.3216814304270827e-05, + "loss": 0.4352, + "step": 28888 + }, + { + "epoch": 0.7932180120812741, + "grad_norm": 0.3925267159938812, + "learning_rate": 1.3216405363878429e-05, + "loss": 0.4879, + "step": 28889 + }, + { + "epoch": 0.7932454695222405, + "grad_norm": 0.41844043135643005, + "learning_rate": 1.3215996417486408e-05, + "loss": 0.4964, + "step": 28890 + }, + { + "epoch": 0.7932729269632071, + "grad_norm": 0.384756475687027, + "learning_rate": 1.3215587465095529e-05, + "loss": 0.6204, + "step": 28891 + }, + { + "epoch": 0.7933003844041735, + "grad_norm": 0.4123842716217041, + "learning_rate": 1.3215178506706552e-05, + "loss": 0.4751, + "step": 28892 + }, + { + "epoch": 0.79332784184514, + "grad_norm": 0.39646777510643005, + "learning_rate": 1.321476954232024e-05, + "loss": 0.5301, + "step": 28893 + }, + { + "epoch": 0.7933552992861065, + "grad_norm": 0.390313982963562, + "learning_rate": 1.3214360571937358e-05, + "loss": 0.5025, + "step": 28894 + }, + { + "epoch": 0.793382756727073, + "grad_norm": 0.4119836091995239, + "learning_rate": 1.3213951595558667e-05, + "loss": 0.5073, + "step": 28895 + }, + { + "epoch": 0.7934102141680396, + "grad_norm": 0.3242810070514679, + "learning_rate": 1.3213542613184933e-05, + "loss": 0.4325, + "step": 28896 + }, + { + "epoch": 0.793437671609006, + "grad_norm": 0.38877540826797485, + "learning_rate": 1.3213133624816916e-05, + "loss": 0.4542, + "step": 28897 + }, + { + "epoch": 0.7934651290499726, + "grad_norm": 0.4285781681537628, + "learning_rate": 1.3212724630455376e-05, + "loss": 0.4436, + "step": 28898 + }, + { + "epoch": 0.793492586490939, + "grad_norm": 0.3953193724155426, + "learning_rate": 1.3212315630101082e-05, + "loss": 0.4951, + "step": 28899 + }, + { + "epoch": 0.7935200439319056, + "grad_norm": 0.4985332190990448, + "learning_rate": 1.321190662375479e-05, + "loss": 0.4928, + "step": 28900 + }, + { + "epoch": 0.793547501372872, + "grad_norm": 0.43795469403266907, + "learning_rate": 1.3211497611417273e-05, + "loss": 0.4747, + "step": 28901 + }, + { + "epoch": 0.7935749588138385, + "grad_norm": 0.4009590446949005, + "learning_rate": 1.3211088593089287e-05, + "loss": 0.4691, + "step": 28902 + }, + { + "epoch": 0.7936024162548051, + "grad_norm": 0.38843539357185364, + "learning_rate": 1.3210679568771592e-05, + "loss": 0.5055, + "step": 28903 + }, + { + "epoch": 0.7936298736957715, + "grad_norm": 0.4087790846824646, + "learning_rate": 1.3210270538464958e-05, + "loss": 0.5421, + "step": 28904 + }, + { + "epoch": 0.7936573311367381, + "grad_norm": 0.38917821645736694, + "learning_rate": 1.3209861502170142e-05, + "loss": 0.4981, + "step": 28905 + }, + { + "epoch": 0.7936847885777045, + "grad_norm": 0.3837707042694092, + "learning_rate": 1.3209452459887912e-05, + "loss": 0.4955, + "step": 28906 + }, + { + "epoch": 0.7937122460186711, + "grad_norm": 0.4079829156398773, + "learning_rate": 1.320904341161903e-05, + "loss": 0.5346, + "step": 28907 + }, + { + "epoch": 0.7937397034596375, + "grad_norm": 0.35063624382019043, + "learning_rate": 1.3208634357364255e-05, + "loss": 0.4962, + "step": 28908 + }, + { + "epoch": 0.7937671609006041, + "grad_norm": 0.39185482263565063, + "learning_rate": 1.3208225297124355e-05, + "loss": 0.5217, + "step": 28909 + }, + { + "epoch": 0.7937946183415706, + "grad_norm": 0.39969444274902344, + "learning_rate": 1.320781623090009e-05, + "loss": 0.5255, + "step": 28910 + }, + { + "epoch": 0.793822075782537, + "grad_norm": 0.37885648012161255, + "learning_rate": 1.3207407158692223e-05, + "loss": 0.4382, + "step": 28911 + }, + { + "epoch": 0.7938495332235036, + "grad_norm": 0.5401685833930969, + "learning_rate": 1.320699808050152e-05, + "loss": 0.5728, + "step": 28912 + }, + { + "epoch": 0.79387699066447, + "grad_norm": 0.3693428039550781, + "learning_rate": 1.320658899632874e-05, + "loss": 0.4898, + "step": 28913 + }, + { + "epoch": 0.7939044481054366, + "grad_norm": 0.37648046016693115, + "learning_rate": 1.320617990617465e-05, + "loss": 0.4873, + "step": 28914 + }, + { + "epoch": 0.793931905546403, + "grad_norm": 0.3708253502845764, + "learning_rate": 1.3205770810040011e-05, + "loss": 0.4507, + "step": 28915 + }, + { + "epoch": 0.7939593629873696, + "grad_norm": 0.4348164200782776, + "learning_rate": 1.3205361707925584e-05, + "loss": 0.4804, + "step": 28916 + }, + { + "epoch": 0.7939868204283361, + "grad_norm": 0.3872295022010803, + "learning_rate": 1.3204952599832135e-05, + "loss": 0.496, + "step": 28917 + }, + { + "epoch": 0.7940142778693026, + "grad_norm": 0.3441913425922394, + "learning_rate": 1.3204543485760427e-05, + "loss": 0.428, + "step": 28918 + }, + { + "epoch": 0.7940417353102691, + "grad_norm": 0.4735545516014099, + "learning_rate": 1.3204134365711223e-05, + "loss": 0.5471, + "step": 28919 + }, + { + "epoch": 0.7940691927512356, + "grad_norm": 0.41436660289764404, + "learning_rate": 1.3203725239685288e-05, + "loss": 0.5609, + "step": 28920 + }, + { + "epoch": 0.7940966501922021, + "grad_norm": 0.3847878873348236, + "learning_rate": 1.3203316107683376e-05, + "loss": 0.4504, + "step": 28921 + }, + { + "epoch": 0.7941241076331685, + "grad_norm": 0.39955657720565796, + "learning_rate": 1.3202906969706266e-05, + "loss": 0.4405, + "step": 28922 + }, + { + "epoch": 0.7941515650741351, + "grad_norm": 0.3667219877243042, + "learning_rate": 1.3202497825754705e-05, + "loss": 0.3947, + "step": 28923 + }, + { + "epoch": 0.7941790225151016, + "grad_norm": 0.33845439553260803, + "learning_rate": 1.3202088675829468e-05, + "loss": 0.4869, + "step": 28924 + }, + { + "epoch": 0.7942064799560681, + "grad_norm": 0.39948487281799316, + "learning_rate": 1.3201679519931312e-05, + "loss": 0.494, + "step": 28925 + }, + { + "epoch": 0.7942339373970346, + "grad_norm": 0.3847973048686981, + "learning_rate": 1.3201270358061003e-05, + "loss": 0.565, + "step": 28926 + }, + { + "epoch": 0.7942613948380011, + "grad_norm": 0.45412498712539673, + "learning_rate": 1.3200861190219301e-05, + "loss": 0.5211, + "step": 28927 + }, + { + "epoch": 0.7942888522789676, + "grad_norm": 0.3907316327095032, + "learning_rate": 1.3200452016406973e-05, + "loss": 0.4964, + "step": 28928 + }, + { + "epoch": 0.794316309719934, + "grad_norm": 0.38353538513183594, + "learning_rate": 1.3200042836624778e-05, + "loss": 0.4768, + "step": 28929 + }, + { + "epoch": 0.7943437671609006, + "grad_norm": 0.5252509117126465, + "learning_rate": 1.3199633650873485e-05, + "loss": 0.517, + "step": 28930 + }, + { + "epoch": 0.7943712246018672, + "grad_norm": 0.40649229288101196, + "learning_rate": 1.3199224459153851e-05, + "loss": 0.5259, + "step": 28931 + }, + { + "epoch": 0.7943986820428336, + "grad_norm": 0.42858976125717163, + "learning_rate": 1.3198815261466644e-05, + "loss": 0.4701, + "step": 28932 + }, + { + "epoch": 0.7944261394838001, + "grad_norm": 0.35216280817985535, + "learning_rate": 1.3198406057812626e-05, + "loss": 0.4922, + "step": 28933 + }, + { + "epoch": 0.7944535969247666, + "grad_norm": 0.3455829918384552, + "learning_rate": 1.3197996848192558e-05, + "loss": 0.4383, + "step": 28934 + }, + { + "epoch": 0.7944810543657331, + "grad_norm": 0.36359497904777527, + "learning_rate": 1.3197587632607207e-05, + "loss": 0.4838, + "step": 28935 + }, + { + "epoch": 0.7945085118066996, + "grad_norm": 0.3743777573108673, + "learning_rate": 1.3197178411057335e-05, + "loss": 0.5327, + "step": 28936 + }, + { + "epoch": 0.7945359692476661, + "grad_norm": 0.3947320878505707, + "learning_rate": 1.3196769183543702e-05, + "loss": 0.3855, + "step": 28937 + }, + { + "epoch": 0.7945634266886327, + "grad_norm": 0.3827601969242096, + "learning_rate": 1.319635995006708e-05, + "loss": 0.453, + "step": 28938 + }, + { + "epoch": 0.7945908841295991, + "grad_norm": 0.35645008087158203, + "learning_rate": 1.319595071062822e-05, + "loss": 0.5089, + "step": 28939 + }, + { + "epoch": 0.7946183415705657, + "grad_norm": 0.43448230624198914, + "learning_rate": 1.3195541465227894e-05, + "loss": 0.5048, + "step": 28940 + }, + { + "epoch": 0.7946457990115321, + "grad_norm": 0.39545026421546936, + "learning_rate": 1.3195132213866865e-05, + "loss": 0.4999, + "step": 28941 + }, + { + "epoch": 0.7946732564524986, + "grad_norm": 0.40067028999328613, + "learning_rate": 1.3194722956545894e-05, + "loss": 0.5783, + "step": 28942 + }, + { + "epoch": 0.7947007138934651, + "grad_norm": 0.34980469942092896, + "learning_rate": 1.3194313693265743e-05, + "loss": 0.4871, + "step": 28943 + }, + { + "epoch": 0.7947281713344316, + "grad_norm": 0.3777106702327728, + "learning_rate": 1.319390442402718e-05, + "loss": 0.4947, + "step": 28944 + }, + { + "epoch": 0.7947556287753982, + "grad_norm": 0.37771689891815186, + "learning_rate": 1.3193495148830964e-05, + "loss": 0.4435, + "step": 28945 + }, + { + "epoch": 0.7947830862163646, + "grad_norm": 0.41709640622138977, + "learning_rate": 1.3193085867677862e-05, + "loss": 0.4871, + "step": 28946 + }, + { + "epoch": 0.7948105436573312, + "grad_norm": 0.6367322206497192, + "learning_rate": 1.3192676580568633e-05, + "loss": 0.5517, + "step": 28947 + }, + { + "epoch": 0.7948380010982976, + "grad_norm": 0.3614189028739929, + "learning_rate": 1.3192267287504045e-05, + "loss": 0.4476, + "step": 28948 + }, + { + "epoch": 0.7948654585392642, + "grad_norm": 0.39594003558158875, + "learning_rate": 1.319185798848486e-05, + "loss": 0.5804, + "step": 28949 + }, + { + "epoch": 0.7948929159802306, + "grad_norm": 0.3705328404903412, + "learning_rate": 1.3191448683511841e-05, + "loss": 0.4203, + "step": 28950 + }, + { + "epoch": 0.7949203734211971, + "grad_norm": 0.4032622277736664, + "learning_rate": 1.3191039372585753e-05, + "loss": 0.5207, + "step": 28951 + }, + { + "epoch": 0.7949478308621637, + "grad_norm": 0.4157354533672333, + "learning_rate": 1.3190630055707354e-05, + "loss": 0.4702, + "step": 28952 + }, + { + "epoch": 0.7949752883031301, + "grad_norm": 0.39681676030158997, + "learning_rate": 1.3190220732877417e-05, + "loss": 0.5039, + "step": 28953 + }, + { + "epoch": 0.7950027457440967, + "grad_norm": 0.3669542372226715, + "learning_rate": 1.3189811404096695e-05, + "loss": 0.567, + "step": 28954 + }, + { + "epoch": 0.7950302031850631, + "grad_norm": 0.3854005038738251, + "learning_rate": 1.3189402069365959e-05, + "loss": 0.4781, + "step": 28955 + }, + { + "epoch": 0.7950576606260297, + "grad_norm": 0.46714159846305847, + "learning_rate": 1.3188992728685971e-05, + "loss": 0.4627, + "step": 28956 + }, + { + "epoch": 0.7950851180669961, + "grad_norm": 0.41735193133354187, + "learning_rate": 1.3188583382057492e-05, + "loss": 0.5532, + "step": 28957 + }, + { + "epoch": 0.7951125755079627, + "grad_norm": 0.42548415064811707, + "learning_rate": 1.3188174029481289e-05, + "loss": 0.483, + "step": 28958 + }, + { + "epoch": 0.7951400329489292, + "grad_norm": 0.4392024576663971, + "learning_rate": 1.3187764670958126e-05, + "loss": 0.5085, + "step": 28959 + }, + { + "epoch": 0.7951674903898956, + "grad_norm": 0.4262329339981079, + "learning_rate": 1.3187355306488759e-05, + "loss": 0.5612, + "step": 28960 + }, + { + "epoch": 0.7951949478308622, + "grad_norm": 0.35186007618904114, + "learning_rate": 1.3186945936073961e-05, + "loss": 0.4993, + "step": 28961 + }, + { + "epoch": 0.7952224052718286, + "grad_norm": 0.6515346169471741, + "learning_rate": 1.3186536559714488e-05, + "loss": 0.4742, + "step": 28962 + }, + { + "epoch": 0.7952498627127952, + "grad_norm": 0.5186629891395569, + "learning_rate": 1.318612717741111e-05, + "loss": 0.5616, + "step": 28963 + }, + { + "epoch": 0.7952773201537616, + "grad_norm": 0.4028205871582031, + "learning_rate": 1.318571778916459e-05, + "loss": 0.5053, + "step": 28964 + }, + { + "epoch": 0.7953047775947282, + "grad_norm": 0.3469853401184082, + "learning_rate": 1.3185308394975684e-05, + "loss": 0.4505, + "step": 28965 + }, + { + "epoch": 0.7953322350356947, + "grad_norm": 0.4933312237262726, + "learning_rate": 1.3184898994845166e-05, + "loss": 0.4927, + "step": 28966 + }, + { + "epoch": 0.7953596924766612, + "grad_norm": 0.36995700001716614, + "learning_rate": 1.3184489588773793e-05, + "loss": 0.4944, + "step": 28967 + }, + { + "epoch": 0.7953871499176277, + "grad_norm": 0.47748467326164246, + "learning_rate": 1.318408017676233e-05, + "loss": 0.4968, + "step": 28968 + }, + { + "epoch": 0.7954146073585942, + "grad_norm": 0.40114104747772217, + "learning_rate": 1.3183670758811542e-05, + "loss": 0.5484, + "step": 28969 + }, + { + "epoch": 0.7954420647995607, + "grad_norm": 0.3817068934440613, + "learning_rate": 1.318326133492219e-05, + "loss": 0.5265, + "step": 28970 + }, + { + "epoch": 0.7954695222405271, + "grad_norm": 0.3893270790576935, + "learning_rate": 1.3182851905095045e-05, + "loss": 0.5267, + "step": 28971 + }, + { + "epoch": 0.7954969796814937, + "grad_norm": 0.3444291651248932, + "learning_rate": 1.318244246933086e-05, + "loss": 0.4493, + "step": 28972 + }, + { + "epoch": 0.7955244371224602, + "grad_norm": 0.3642902374267578, + "learning_rate": 1.3182033027630405e-05, + "loss": 0.5724, + "step": 28973 + }, + { + "epoch": 0.7955518945634267, + "grad_norm": 0.3791426718235016, + "learning_rate": 1.3181623579994444e-05, + "loss": 0.3808, + "step": 28974 + }, + { + "epoch": 0.7955793520043932, + "grad_norm": 0.38107529282569885, + "learning_rate": 1.3181214126423738e-05, + "loss": 0.4558, + "step": 28975 + }, + { + "epoch": 0.7956068094453597, + "grad_norm": 0.3459852635860443, + "learning_rate": 1.3180804666919055e-05, + "loss": 0.435, + "step": 28976 + }, + { + "epoch": 0.7956342668863262, + "grad_norm": 0.45314517617225647, + "learning_rate": 1.3180395201481155e-05, + "loss": 0.5414, + "step": 28977 + }, + { + "epoch": 0.7956617243272927, + "grad_norm": 0.39795181155204773, + "learning_rate": 1.3179985730110803e-05, + "loss": 0.4731, + "step": 28978 + }, + { + "epoch": 0.7956891817682592, + "grad_norm": 0.3769240379333496, + "learning_rate": 1.3179576252808763e-05, + "loss": 0.5555, + "step": 28979 + }, + { + "epoch": 0.7957166392092258, + "grad_norm": 0.37245845794677734, + "learning_rate": 1.3179166769575797e-05, + "loss": 0.4356, + "step": 28980 + }, + { + "epoch": 0.7957440966501922, + "grad_norm": 0.7072638273239136, + "learning_rate": 1.317875728041267e-05, + "loss": 0.4678, + "step": 28981 + }, + { + "epoch": 0.7957715540911587, + "grad_norm": 0.525495171546936, + "learning_rate": 1.3178347785320149e-05, + "loss": 0.4787, + "step": 28982 + }, + { + "epoch": 0.7957990115321252, + "grad_norm": 0.5341266989707947, + "learning_rate": 1.3177938284298992e-05, + "loss": 0.4895, + "step": 28983 + }, + { + "epoch": 0.7958264689730917, + "grad_norm": 0.36437392234802246, + "learning_rate": 1.3177528777349968e-05, + "loss": 0.494, + "step": 28984 + }, + { + "epoch": 0.7958539264140582, + "grad_norm": 0.3832783102989197, + "learning_rate": 1.3177119264473837e-05, + "loss": 0.4945, + "step": 28985 + }, + { + "epoch": 0.7958813838550247, + "grad_norm": 0.47551196813583374, + "learning_rate": 1.3176709745671366e-05, + "loss": 0.4647, + "step": 28986 + }, + { + "epoch": 0.7959088412959913, + "grad_norm": 0.4040127992630005, + "learning_rate": 1.3176300220943316e-05, + "loss": 0.4945, + "step": 28987 + }, + { + "epoch": 0.7959362987369577, + "grad_norm": 0.44782355427742004, + "learning_rate": 1.3175890690290452e-05, + "loss": 0.5005, + "step": 28988 + }, + { + "epoch": 0.7959637561779243, + "grad_norm": 0.3713240921497345, + "learning_rate": 1.3175481153713541e-05, + "loss": 0.4582, + "step": 28989 + }, + { + "epoch": 0.7959912136188907, + "grad_norm": 0.5358617901802063, + "learning_rate": 1.3175071611213343e-05, + "loss": 0.468, + "step": 28990 + }, + { + "epoch": 0.7960186710598572, + "grad_norm": 0.3746187090873718, + "learning_rate": 1.3174662062790622e-05, + "loss": 0.5507, + "step": 28991 + }, + { + "epoch": 0.7960461285008237, + "grad_norm": 0.4287177622318268, + "learning_rate": 1.3174252508446144e-05, + "loss": 0.518, + "step": 28992 + }, + { + "epoch": 0.7960735859417902, + "grad_norm": 0.5034673810005188, + "learning_rate": 1.317384294818067e-05, + "loss": 0.4232, + "step": 28993 + }, + { + "epoch": 0.7961010433827568, + "grad_norm": 0.4008547067642212, + "learning_rate": 1.3173433381994966e-05, + "loss": 0.5677, + "step": 28994 + }, + { + "epoch": 0.7961285008237232, + "grad_norm": 0.3953173756599426, + "learning_rate": 1.31730238098898e-05, + "loss": 0.595, + "step": 28995 + }, + { + "epoch": 0.7961559582646898, + "grad_norm": 0.42454051971435547, + "learning_rate": 1.3172614231865928e-05, + "loss": 0.501, + "step": 28996 + }, + { + "epoch": 0.7961834157056562, + "grad_norm": 0.3601769804954529, + "learning_rate": 1.3172204647924121e-05, + "loss": 0.4904, + "step": 28997 + }, + { + "epoch": 0.7962108731466228, + "grad_norm": 0.40780165791511536, + "learning_rate": 1.3171795058065136e-05, + "loss": 0.5028, + "step": 28998 + }, + { + "epoch": 0.7962383305875892, + "grad_norm": 0.39404717087745667, + "learning_rate": 1.3171385462289743e-05, + "loss": 0.4742, + "step": 28999 + }, + { + "epoch": 0.7962657880285557, + "grad_norm": 0.37312766909599304, + "learning_rate": 1.3170975860598705e-05, + "loss": 0.4567, + "step": 29000 + }, + { + "epoch": 0.7962932454695223, + "grad_norm": 0.5204466581344604, + "learning_rate": 1.3170566252992782e-05, + "loss": 0.5156, + "step": 29001 + }, + { + "epoch": 0.7963207029104887, + "grad_norm": 0.415997177362442, + "learning_rate": 1.3170156639472744e-05, + "loss": 0.5595, + "step": 29002 + }, + { + "epoch": 0.7963481603514553, + "grad_norm": 0.42201319336891174, + "learning_rate": 1.3169747020039352e-05, + "loss": 0.6014, + "step": 29003 + }, + { + "epoch": 0.7963756177924217, + "grad_norm": 0.3771829307079315, + "learning_rate": 1.3169337394693366e-05, + "loss": 0.4484, + "step": 29004 + }, + { + "epoch": 0.7964030752333883, + "grad_norm": 0.3933040201663971, + "learning_rate": 1.316892776343556e-05, + "loss": 0.5345, + "step": 29005 + }, + { + "epoch": 0.7964305326743547, + "grad_norm": 0.42519697546958923, + "learning_rate": 1.3168518126266687e-05, + "loss": 0.513, + "step": 29006 + }, + { + "epoch": 0.7964579901153213, + "grad_norm": 0.4067384898662567, + "learning_rate": 1.3168108483187521e-05, + "loss": 0.5442, + "step": 29007 + }, + { + "epoch": 0.7964854475562878, + "grad_norm": 0.37784504890441895, + "learning_rate": 1.3167698834198818e-05, + "loss": 0.5166, + "step": 29008 + }, + { + "epoch": 0.7965129049972542, + "grad_norm": 0.3664315640926361, + "learning_rate": 1.3167289179301345e-05, + "loss": 0.4835, + "step": 29009 + }, + { + "epoch": 0.7965403624382208, + "grad_norm": 0.46559232473373413, + "learning_rate": 1.3166879518495872e-05, + "loss": 0.4406, + "step": 29010 + }, + { + "epoch": 0.7965678198791872, + "grad_norm": 0.3529190421104431, + "learning_rate": 1.3166469851783152e-05, + "loss": 0.4549, + "step": 29011 + }, + { + "epoch": 0.7965952773201538, + "grad_norm": 0.3821374475955963, + "learning_rate": 1.316606017916396e-05, + "loss": 0.5085, + "step": 29012 + }, + { + "epoch": 0.7966227347611202, + "grad_norm": 0.3859216272830963, + "learning_rate": 1.3165650500639054e-05, + "loss": 0.4978, + "step": 29013 + }, + { + "epoch": 0.7966501922020868, + "grad_norm": 0.5575281977653503, + "learning_rate": 1.3165240816209196e-05, + "loss": 0.5499, + "step": 29014 + }, + { + "epoch": 0.7966776496430533, + "grad_norm": 0.4493682086467743, + "learning_rate": 1.3164831125875157e-05, + "loss": 0.4845, + "step": 29015 + }, + { + "epoch": 0.7967051070840198, + "grad_norm": 0.3335815370082855, + "learning_rate": 1.3164421429637697e-05, + "loss": 0.4792, + "step": 29016 + }, + { + "epoch": 0.7967325645249863, + "grad_norm": 0.3945569396018982, + "learning_rate": 1.3164011727497584e-05, + "loss": 0.5052, + "step": 29017 + }, + { + "epoch": 0.7967600219659527, + "grad_norm": 0.3899145722389221, + "learning_rate": 1.3163602019455577e-05, + "loss": 0.5455, + "step": 29018 + }, + { + "epoch": 0.7967874794069193, + "grad_norm": 0.5879490375518799, + "learning_rate": 1.316319230551244e-05, + "loss": 0.5277, + "step": 29019 + }, + { + "epoch": 0.7968149368478857, + "grad_norm": 0.41111961007118225, + "learning_rate": 1.3162782585668944e-05, + "loss": 0.4302, + "step": 29020 + }, + { + "epoch": 0.7968423942888523, + "grad_norm": 0.4166162610054016, + "learning_rate": 1.3162372859925845e-05, + "loss": 0.5109, + "step": 29021 + }, + { + "epoch": 0.7968698517298188, + "grad_norm": 0.4510578513145447, + "learning_rate": 1.3161963128283911e-05, + "loss": 0.53, + "step": 29022 + }, + { + "epoch": 0.7968973091707853, + "grad_norm": 0.3990943729877472, + "learning_rate": 1.316155339074391e-05, + "loss": 0.389, + "step": 29023 + }, + { + "epoch": 0.7969247666117518, + "grad_norm": 0.4815865457057953, + "learning_rate": 1.3161143647306603e-05, + "loss": 0.4574, + "step": 29024 + }, + { + "epoch": 0.7969522240527183, + "grad_norm": 0.36859461665153503, + "learning_rate": 1.3160733897972753e-05, + "loss": 0.5121, + "step": 29025 + }, + { + "epoch": 0.7969796814936848, + "grad_norm": 0.6260314583778381, + "learning_rate": 1.3160324142743125e-05, + "loss": 0.4624, + "step": 29026 + }, + { + "epoch": 0.7970071389346512, + "grad_norm": 0.40281176567077637, + "learning_rate": 1.3159914381618484e-05, + "loss": 0.4671, + "step": 29027 + }, + { + "epoch": 0.7970345963756178, + "grad_norm": 0.469783753156662, + "learning_rate": 1.3159504614599593e-05, + "loss": 0.5148, + "step": 29028 + }, + { + "epoch": 0.7970620538165843, + "grad_norm": 0.3544091284275055, + "learning_rate": 1.3159094841687218e-05, + "loss": 0.4611, + "step": 29029 + }, + { + "epoch": 0.7970895112575508, + "grad_norm": 0.3897426128387451, + "learning_rate": 1.3158685062882126e-05, + "loss": 0.523, + "step": 29030 + }, + { + "epoch": 0.7971169686985173, + "grad_norm": 0.4406341016292572, + "learning_rate": 1.3158275278185073e-05, + "loss": 0.4712, + "step": 29031 + }, + { + "epoch": 0.7971444261394838, + "grad_norm": 0.4181170165538788, + "learning_rate": 1.3157865487596833e-05, + "loss": 0.5434, + "step": 29032 + }, + { + "epoch": 0.7971718835804503, + "grad_norm": 0.39553216099739075, + "learning_rate": 1.3157455691118164e-05, + "loss": 0.53, + "step": 29033 + }, + { + "epoch": 0.7971993410214168, + "grad_norm": 0.48474088311195374, + "learning_rate": 1.3157045888749833e-05, + "loss": 0.5429, + "step": 29034 + }, + { + "epoch": 0.7972267984623833, + "grad_norm": 0.37144753336906433, + "learning_rate": 1.3156636080492605e-05, + "loss": 0.4854, + "step": 29035 + }, + { + "epoch": 0.7972542559033499, + "grad_norm": 0.3819325268268585, + "learning_rate": 1.3156226266347241e-05, + "loss": 0.4149, + "step": 29036 + }, + { + "epoch": 0.7972817133443163, + "grad_norm": 0.3960030674934387, + "learning_rate": 1.3155816446314508e-05, + "loss": 0.4779, + "step": 29037 + }, + { + "epoch": 0.7973091707852829, + "grad_norm": 0.3991811275482178, + "learning_rate": 1.315540662039517e-05, + "loss": 0.4801, + "step": 29038 + }, + { + "epoch": 0.7973366282262493, + "grad_norm": 0.37946075201034546, + "learning_rate": 1.3154996788589992e-05, + "loss": 0.5505, + "step": 29039 + }, + { + "epoch": 0.7973640856672158, + "grad_norm": 0.37354370951652527, + "learning_rate": 1.3154586950899737e-05, + "loss": 0.5421, + "step": 29040 + }, + { + "epoch": 0.7973915431081823, + "grad_norm": 0.38070148229599, + "learning_rate": 1.3154177107325174e-05, + "loss": 0.6137, + "step": 29041 + }, + { + "epoch": 0.7974190005491488, + "grad_norm": 0.35067659616470337, + "learning_rate": 1.3153767257867062e-05, + "loss": 0.4519, + "step": 29042 + }, + { + "epoch": 0.7974464579901154, + "grad_norm": 0.3871609568595886, + "learning_rate": 1.3153357402526166e-05, + "loss": 0.5573, + "step": 29043 + }, + { + "epoch": 0.7974739154310818, + "grad_norm": 0.4399052560329437, + "learning_rate": 1.3152947541303253e-05, + "loss": 0.5264, + "step": 29044 + }, + { + "epoch": 0.7975013728720484, + "grad_norm": 0.42424777150154114, + "learning_rate": 1.3152537674199086e-05, + "loss": 0.5176, + "step": 29045 + }, + { + "epoch": 0.7975288303130148, + "grad_norm": 0.3781950771808624, + "learning_rate": 1.315212780121443e-05, + "loss": 0.5501, + "step": 29046 + }, + { + "epoch": 0.7975562877539814, + "grad_norm": 0.38911280035972595, + "learning_rate": 1.3151717922350053e-05, + "loss": 0.5771, + "step": 29047 + }, + { + "epoch": 0.7975837451949478, + "grad_norm": 0.4925481975078583, + "learning_rate": 1.3151308037606714e-05, + "loss": 0.5295, + "step": 29048 + }, + { + "epoch": 0.7976112026359143, + "grad_norm": 0.45661115646362305, + "learning_rate": 1.3150898146985181e-05, + "loss": 0.5157, + "step": 29049 + }, + { + "epoch": 0.7976386600768809, + "grad_norm": 0.4560356140136719, + "learning_rate": 1.3150488250486212e-05, + "loss": 0.5802, + "step": 29050 + }, + { + "epoch": 0.7976661175178473, + "grad_norm": 0.3228946924209595, + "learning_rate": 1.3150078348110581e-05, + "loss": 0.4096, + "step": 29051 + }, + { + "epoch": 0.7976935749588139, + "grad_norm": 3.9901256561279297, + "learning_rate": 1.3149668439859049e-05, + "loss": 0.6683, + "step": 29052 + }, + { + "epoch": 0.7977210323997803, + "grad_norm": 0.3776914179325104, + "learning_rate": 1.3149258525732378e-05, + "loss": 0.4878, + "step": 29053 + }, + { + "epoch": 0.7977484898407469, + "grad_norm": 0.4084141254425049, + "learning_rate": 1.3148848605731336e-05, + "loss": 0.478, + "step": 29054 + }, + { + "epoch": 0.7977759472817133, + "grad_norm": 0.37120872735977173, + "learning_rate": 1.3148438679856685e-05, + "loss": 0.3956, + "step": 29055 + }, + { + "epoch": 0.7978034047226799, + "grad_norm": 0.4057466685771942, + "learning_rate": 1.3148028748109194e-05, + "loss": 0.5313, + "step": 29056 + }, + { + "epoch": 0.7978308621636463, + "grad_norm": 0.36052778363227844, + "learning_rate": 1.3147618810489624e-05, + "loss": 0.4306, + "step": 29057 + }, + { + "epoch": 0.7978583196046128, + "grad_norm": 0.3783266842365265, + "learning_rate": 1.3147208866998737e-05, + "loss": 0.4892, + "step": 29058 + }, + { + "epoch": 0.7978857770455794, + "grad_norm": 0.45477592945098877, + "learning_rate": 1.3146798917637304e-05, + "loss": 0.4776, + "step": 29059 + }, + { + "epoch": 0.7979132344865458, + "grad_norm": 0.36877313256263733, + "learning_rate": 1.3146388962406084e-05, + "loss": 0.4209, + "step": 29060 + }, + { + "epoch": 0.7979406919275124, + "grad_norm": 0.43184465169906616, + "learning_rate": 1.3145979001305849e-05, + "loss": 0.5766, + "step": 29061 + }, + { + "epoch": 0.7979681493684788, + "grad_norm": 0.6014851331710815, + "learning_rate": 1.3145569034337356e-05, + "loss": 0.4418, + "step": 29062 + }, + { + "epoch": 0.7979956068094454, + "grad_norm": 0.3952651023864746, + "learning_rate": 1.3145159061501374e-05, + "loss": 0.5244, + "step": 29063 + }, + { + "epoch": 0.7980230642504118, + "grad_norm": 0.3939405381679535, + "learning_rate": 1.3144749082798665e-05, + "loss": 0.5022, + "step": 29064 + }, + { + "epoch": 0.7980505216913784, + "grad_norm": 0.4110875427722931, + "learning_rate": 1.3144339098229996e-05, + "loss": 0.4944, + "step": 29065 + }, + { + "epoch": 0.7980779791323449, + "grad_norm": 0.36536529660224915, + "learning_rate": 1.3143929107796129e-05, + "loss": 0.54, + "step": 29066 + }, + { + "epoch": 0.7981054365733113, + "grad_norm": 0.4180467426776886, + "learning_rate": 1.3143519111497836e-05, + "loss": 0.4755, + "step": 29067 + }, + { + "epoch": 0.7981328940142779, + "grad_norm": 0.3996206820011139, + "learning_rate": 1.3143109109335873e-05, + "loss": 0.5248, + "step": 29068 + }, + { + "epoch": 0.7981603514552443, + "grad_norm": 0.45701864361763, + "learning_rate": 1.3142699101311008e-05, + "loss": 0.5638, + "step": 29069 + }, + { + "epoch": 0.7981878088962109, + "grad_norm": 0.4195692539215088, + "learning_rate": 1.3142289087424008e-05, + "loss": 0.5729, + "step": 29070 + }, + { + "epoch": 0.7982152663371773, + "grad_norm": 0.4224366545677185, + "learning_rate": 1.3141879067675633e-05, + "loss": 0.5256, + "step": 29071 + }, + { + "epoch": 0.7982427237781439, + "grad_norm": 0.39007267355918884, + "learning_rate": 1.3141469042066653e-05, + "loss": 0.4774, + "step": 29072 + }, + { + "epoch": 0.7982701812191104, + "grad_norm": 0.4094315767288208, + "learning_rate": 1.314105901059783e-05, + "loss": 0.4916, + "step": 29073 + }, + { + "epoch": 0.7982976386600769, + "grad_norm": 0.3510702848434448, + "learning_rate": 1.3140648973269929e-05, + "loss": 0.4631, + "step": 29074 + }, + { + "epoch": 0.7983250961010434, + "grad_norm": 0.34776371717453003, + "learning_rate": 1.3140238930083715e-05, + "loss": 0.4445, + "step": 29075 + }, + { + "epoch": 0.7983525535420098, + "grad_norm": 0.3266529440879822, + "learning_rate": 1.3139828881039955e-05, + "loss": 0.5192, + "step": 29076 + }, + { + "epoch": 0.7983800109829764, + "grad_norm": 0.3539287745952606, + "learning_rate": 1.3139418826139412e-05, + "loss": 0.3711, + "step": 29077 + }, + { + "epoch": 0.7984074684239428, + "grad_norm": 0.37110257148742676, + "learning_rate": 1.3139008765382848e-05, + "loss": 0.6131, + "step": 29078 + }, + { + "epoch": 0.7984349258649094, + "grad_norm": 0.36466532945632935, + "learning_rate": 1.3138598698771032e-05, + "loss": 0.4763, + "step": 29079 + }, + { + "epoch": 0.7984623833058759, + "grad_norm": 0.36504364013671875, + "learning_rate": 1.3138188626304731e-05, + "loss": 0.4301, + "step": 29080 + }, + { + "epoch": 0.7984898407468424, + "grad_norm": 0.35784369707107544, + "learning_rate": 1.3137778547984704e-05, + "loss": 0.4448, + "step": 29081 + }, + { + "epoch": 0.7985172981878089, + "grad_norm": 0.43151602149009705, + "learning_rate": 1.3137368463811718e-05, + "loss": 0.4895, + "step": 29082 + }, + { + "epoch": 0.7985447556287754, + "grad_norm": 0.40974509716033936, + "learning_rate": 1.3136958373786539e-05, + "loss": 0.4993, + "step": 29083 + }, + { + "epoch": 0.7985722130697419, + "grad_norm": 0.35487547516822815, + "learning_rate": 1.3136548277909931e-05, + "loss": 0.4106, + "step": 29084 + }, + { + "epoch": 0.7985996705107083, + "grad_norm": 0.4229618012905121, + "learning_rate": 1.313613817618266e-05, + "loss": 0.4984, + "step": 29085 + }, + { + "epoch": 0.7986271279516749, + "grad_norm": 0.3697095215320587, + "learning_rate": 1.313572806860549e-05, + "loss": 0.4999, + "step": 29086 + }, + { + "epoch": 0.7986545853926414, + "grad_norm": 0.4121082127094269, + "learning_rate": 1.3135317955179186e-05, + "loss": 0.4781, + "step": 29087 + }, + { + "epoch": 0.7986820428336079, + "grad_norm": 0.430002361536026, + "learning_rate": 1.3134907835904516e-05, + "loss": 0.4813, + "step": 29088 + }, + { + "epoch": 0.7987095002745744, + "grad_norm": 0.4083607792854309, + "learning_rate": 1.3134497710782239e-05, + "loss": 0.4761, + "step": 29089 + }, + { + "epoch": 0.7987369577155409, + "grad_norm": 0.3601089119911194, + "learning_rate": 1.3134087579813124e-05, + "loss": 0.478, + "step": 29090 + }, + { + "epoch": 0.7987644151565074, + "grad_norm": 0.4273609220981598, + "learning_rate": 1.3133677442997935e-05, + "loss": 0.4971, + "step": 29091 + }, + { + "epoch": 0.7987918725974739, + "grad_norm": 0.4148862957954407, + "learning_rate": 1.313326730033744e-05, + "loss": 0.6158, + "step": 29092 + }, + { + "epoch": 0.7988193300384404, + "grad_norm": 0.34946319460868835, + "learning_rate": 1.3132857151832399e-05, + "loss": 0.4901, + "step": 29093 + }, + { + "epoch": 0.798846787479407, + "grad_norm": 0.4432167410850525, + "learning_rate": 1.313244699748358e-05, + "loss": 0.6193, + "step": 29094 + }, + { + "epoch": 0.7988742449203734, + "grad_norm": 0.44857048988342285, + "learning_rate": 1.3132036837291749e-05, + "loss": 0.4659, + "step": 29095 + }, + { + "epoch": 0.79890170236134, + "grad_norm": 0.3773891031742096, + "learning_rate": 1.3131626671257669e-05, + "loss": 0.5581, + "step": 29096 + }, + { + "epoch": 0.7989291598023064, + "grad_norm": 0.37992119789123535, + "learning_rate": 1.3131216499382104e-05, + "loss": 0.4289, + "step": 29097 + }, + { + "epoch": 0.7989566172432729, + "grad_norm": 0.3994618058204651, + "learning_rate": 1.3130806321665823e-05, + "loss": 0.5594, + "step": 29098 + }, + { + "epoch": 0.7989840746842394, + "grad_norm": 0.4410887360572815, + "learning_rate": 1.3130396138109587e-05, + "loss": 0.5108, + "step": 29099 + }, + { + "epoch": 0.7990115321252059, + "grad_norm": 0.5912794470787048, + "learning_rate": 1.3129985948714166e-05, + "loss": 0.5296, + "step": 29100 + }, + { + "epoch": 0.7990389895661725, + "grad_norm": 0.40886834263801575, + "learning_rate": 1.3129575753480322e-05, + "loss": 0.4659, + "step": 29101 + }, + { + "epoch": 0.7990664470071389, + "grad_norm": 0.41191229224205017, + "learning_rate": 1.3129165552408819e-05, + "loss": 0.4523, + "step": 29102 + }, + { + "epoch": 0.7990939044481055, + "grad_norm": 0.38387927412986755, + "learning_rate": 1.3128755345500422e-05, + "loss": 0.4508, + "step": 29103 + }, + { + "epoch": 0.7991213618890719, + "grad_norm": 0.43947702646255493, + "learning_rate": 1.3128345132755898e-05, + "loss": 0.4797, + "step": 29104 + }, + { + "epoch": 0.7991488193300385, + "grad_norm": 0.36185574531555176, + "learning_rate": 1.3127934914176015e-05, + "loss": 0.4915, + "step": 29105 + }, + { + "epoch": 0.7991762767710049, + "grad_norm": 0.4595886766910553, + "learning_rate": 1.3127524689761533e-05, + "loss": 0.4743, + "step": 29106 + }, + { + "epoch": 0.7992037342119714, + "grad_norm": 0.4020952582359314, + "learning_rate": 1.3127114459513217e-05, + "loss": 0.5889, + "step": 29107 + }, + { + "epoch": 0.799231191652938, + "grad_norm": 0.3786865770816803, + "learning_rate": 1.3126704223431838e-05, + "loss": 0.5102, + "step": 29108 + }, + { + "epoch": 0.7992586490939044, + "grad_norm": 0.39134594798088074, + "learning_rate": 1.3126293981518155e-05, + "loss": 0.5629, + "step": 29109 + }, + { + "epoch": 0.799286106534871, + "grad_norm": 0.3818022906780243, + "learning_rate": 1.3125883733772939e-05, + "loss": 0.599, + "step": 29110 + }, + { + "epoch": 0.7993135639758374, + "grad_norm": 0.474028080701828, + "learning_rate": 1.3125473480196952e-05, + "loss": 0.4338, + "step": 29111 + }, + { + "epoch": 0.799341021416804, + "grad_norm": 0.4080114960670471, + "learning_rate": 1.3125063220790957e-05, + "loss": 0.5286, + "step": 29112 + }, + { + "epoch": 0.7993684788577704, + "grad_norm": 0.3642703890800476, + "learning_rate": 1.3124652955555724e-05, + "loss": 0.5151, + "step": 29113 + }, + { + "epoch": 0.799395936298737, + "grad_norm": 0.41107290983200073, + "learning_rate": 1.3124242684492014e-05, + "loss": 0.4436, + "step": 29114 + }, + { + "epoch": 0.7994233937397035, + "grad_norm": 0.3984230160713196, + "learning_rate": 1.3123832407600595e-05, + "loss": 0.5039, + "step": 29115 + }, + { + "epoch": 0.7994508511806699, + "grad_norm": 0.39857614040374756, + "learning_rate": 1.3123422124882232e-05, + "loss": 0.4713, + "step": 29116 + }, + { + "epoch": 0.7994783086216365, + "grad_norm": 0.36511629819869995, + "learning_rate": 1.3123011836337687e-05, + "loss": 0.489, + "step": 29117 + }, + { + "epoch": 0.7995057660626029, + "grad_norm": 0.4279373288154602, + "learning_rate": 1.3122601541967733e-05, + "loss": 0.4902, + "step": 29118 + }, + { + "epoch": 0.7995332235035695, + "grad_norm": 0.35879722237586975, + "learning_rate": 1.3122191241773129e-05, + "loss": 0.4692, + "step": 29119 + }, + { + "epoch": 0.7995606809445359, + "grad_norm": 0.4871772527694702, + "learning_rate": 1.3121780935754638e-05, + "loss": 0.4902, + "step": 29120 + }, + { + "epoch": 0.7995881383855025, + "grad_norm": 0.35764777660369873, + "learning_rate": 1.3121370623913033e-05, + "loss": 0.5113, + "step": 29121 + }, + { + "epoch": 0.799615595826469, + "grad_norm": 0.4404439926147461, + "learning_rate": 1.3120960306249073e-05, + "loss": 0.5129, + "step": 29122 + }, + { + "epoch": 0.7996430532674355, + "grad_norm": 0.34557315707206726, + "learning_rate": 1.312054998276353e-05, + "loss": 0.4338, + "step": 29123 + }, + { + "epoch": 0.799670510708402, + "grad_norm": 0.34522420167922974, + "learning_rate": 1.3120139653457162e-05, + "loss": 0.3949, + "step": 29124 + }, + { + "epoch": 0.7996979681493684, + "grad_norm": 1.368660807609558, + "learning_rate": 1.3119729318330738e-05, + "loss": 0.5649, + "step": 29125 + }, + { + "epoch": 0.799725425590335, + "grad_norm": 0.3706548511981964, + "learning_rate": 1.3119318977385025e-05, + "loss": 0.4643, + "step": 29126 + }, + { + "epoch": 0.7997528830313014, + "grad_norm": 0.4455910623073578, + "learning_rate": 1.3118908630620785e-05, + "loss": 0.486, + "step": 29127 + }, + { + "epoch": 0.799780340472268, + "grad_norm": 0.4087054133415222, + "learning_rate": 1.3118498278038785e-05, + "loss": 0.4282, + "step": 29128 + }, + { + "epoch": 0.7998077979132345, + "grad_norm": 0.3901224434375763, + "learning_rate": 1.311808791963979e-05, + "loss": 0.4786, + "step": 29129 + }, + { + "epoch": 0.799835255354201, + "grad_norm": 0.32245543599128723, + "learning_rate": 1.3117677555424566e-05, + "loss": 0.4286, + "step": 29130 + }, + { + "epoch": 0.7998627127951675, + "grad_norm": 0.3785116970539093, + "learning_rate": 1.3117267185393878e-05, + "loss": 0.4781, + "step": 29131 + }, + { + "epoch": 0.799890170236134, + "grad_norm": 0.414079874753952, + "learning_rate": 1.3116856809548495e-05, + "loss": 0.4662, + "step": 29132 + }, + { + "epoch": 0.7999176276771005, + "grad_norm": 0.4767438769340515, + "learning_rate": 1.3116446427889176e-05, + "loss": 0.5405, + "step": 29133 + }, + { + "epoch": 0.799945085118067, + "grad_norm": 0.5000969767570496, + "learning_rate": 1.3116036040416692e-05, + "loss": 0.4739, + "step": 29134 + }, + { + "epoch": 0.7999725425590335, + "grad_norm": 0.42922744154930115, + "learning_rate": 1.3115625647131802e-05, + "loss": 0.4968, + "step": 29135 + }, + { + "epoch": 0.8, + "grad_norm": 0.44298577308654785, + "learning_rate": 1.311521524803528e-05, + "loss": 0.5319, + "step": 29136 + }, + { + "epoch": 0.8000274574409665, + "grad_norm": 0.40506407618522644, + "learning_rate": 1.3114804843127886e-05, + "loss": 0.4823, + "step": 29137 + }, + { + "epoch": 0.800054914881933, + "grad_norm": 0.3785078823566437, + "learning_rate": 1.3114394432410387e-05, + "loss": 0.4462, + "step": 29138 + }, + { + "epoch": 0.8000823723228995, + "grad_norm": 0.3727637827396393, + "learning_rate": 1.3113984015883548e-05, + "loss": 0.4357, + "step": 29139 + }, + { + "epoch": 0.800109829763866, + "grad_norm": 0.3969329595565796, + "learning_rate": 1.3113573593548135e-05, + "loss": 0.5433, + "step": 29140 + }, + { + "epoch": 0.8001372872048325, + "grad_norm": 0.569121241569519, + "learning_rate": 1.3113163165404913e-05, + "loss": 0.5526, + "step": 29141 + }, + { + "epoch": 0.800164744645799, + "grad_norm": 0.44490399956703186, + "learning_rate": 1.311275273145465e-05, + "loss": 0.4389, + "step": 29142 + }, + { + "epoch": 0.8001922020867656, + "grad_norm": 0.42447057366371155, + "learning_rate": 1.3112342291698106e-05, + "loss": 0.3933, + "step": 29143 + }, + { + "epoch": 0.800219659527732, + "grad_norm": 0.40848058462142944, + "learning_rate": 1.3111931846136055e-05, + "loss": 0.4735, + "step": 29144 + }, + { + "epoch": 0.8002471169686985, + "grad_norm": 0.5512576103210449, + "learning_rate": 1.3111521394769255e-05, + "loss": 0.6299, + "step": 29145 + }, + { + "epoch": 0.800274574409665, + "grad_norm": 0.4880638122558594, + "learning_rate": 1.3111110937598475e-05, + "loss": 0.5181, + "step": 29146 + }, + { + "epoch": 0.8003020318506315, + "grad_norm": 0.35670068860054016, + "learning_rate": 1.311070047462448e-05, + "loss": 0.4629, + "step": 29147 + }, + { + "epoch": 0.800329489291598, + "grad_norm": 0.39041730761528015, + "learning_rate": 1.3110290005848034e-05, + "loss": 0.5705, + "step": 29148 + }, + { + "epoch": 0.8003569467325645, + "grad_norm": 0.37231674790382385, + "learning_rate": 1.3109879531269909e-05, + "loss": 0.54, + "step": 29149 + }, + { + "epoch": 0.8003844041735311, + "grad_norm": 0.4168016016483307, + "learning_rate": 1.3109469050890863e-05, + "loss": 0.478, + "step": 29150 + }, + { + "epoch": 0.8004118616144975, + "grad_norm": 0.46938660740852356, + "learning_rate": 1.3109058564711665e-05, + "loss": 0.5784, + "step": 29151 + }, + { + "epoch": 0.8004393190554641, + "grad_norm": 0.3670569360256195, + "learning_rate": 1.310864807273308e-05, + "loss": 0.5504, + "step": 29152 + }, + { + "epoch": 0.8004667764964305, + "grad_norm": 0.37420788407325745, + "learning_rate": 1.3108237574955875e-05, + "loss": 0.4928, + "step": 29153 + }, + { + "epoch": 0.800494233937397, + "grad_norm": 0.6301101446151733, + "learning_rate": 1.3107827071380817e-05, + "loss": 0.4848, + "step": 29154 + }, + { + "epoch": 0.8005216913783635, + "grad_norm": 0.4114154875278473, + "learning_rate": 1.3107416562008667e-05, + "loss": 0.5103, + "step": 29155 + }, + { + "epoch": 0.80054914881933, + "grad_norm": 0.4152678847312927, + "learning_rate": 1.3107006046840193e-05, + "loss": 0.4356, + "step": 29156 + }, + { + "epoch": 0.8005766062602966, + "grad_norm": 0.42905333638191223, + "learning_rate": 1.3106595525876162e-05, + "loss": 0.539, + "step": 29157 + }, + { + "epoch": 0.800604063701263, + "grad_norm": 0.44179752469062805, + "learning_rate": 1.3106184999117341e-05, + "loss": 0.5176, + "step": 29158 + }, + { + "epoch": 0.8006315211422296, + "grad_norm": 0.4532783627510071, + "learning_rate": 1.3105774466564488e-05, + "loss": 0.4806, + "step": 29159 + }, + { + "epoch": 0.800658978583196, + "grad_norm": 0.43077999353408813, + "learning_rate": 1.3105363928218379e-05, + "loss": 0.481, + "step": 29160 + }, + { + "epoch": 0.8006864360241626, + "grad_norm": 0.4026997685432434, + "learning_rate": 1.3104953384079772e-05, + "loss": 0.4536, + "step": 29161 + }, + { + "epoch": 0.800713893465129, + "grad_norm": 0.3785710334777832, + "learning_rate": 1.310454283414944e-05, + "loss": 0.5086, + "step": 29162 + }, + { + "epoch": 0.8007413509060956, + "grad_norm": 0.48225560784339905, + "learning_rate": 1.3104132278428146e-05, + "loss": 0.5338, + "step": 29163 + }, + { + "epoch": 0.8007688083470621, + "grad_norm": 0.3822985887527466, + "learning_rate": 1.3103721716916649e-05, + "loss": 0.4595, + "step": 29164 + }, + { + "epoch": 0.8007962657880285, + "grad_norm": 0.42490342259407043, + "learning_rate": 1.3103311149615723e-05, + "loss": 0.6067, + "step": 29165 + }, + { + "epoch": 0.8008237232289951, + "grad_norm": 0.5072552561759949, + "learning_rate": 1.310290057652613e-05, + "loss": 0.517, + "step": 29166 + }, + { + "epoch": 0.8008511806699615, + "grad_norm": 0.3773775100708008, + "learning_rate": 1.3102489997648638e-05, + "loss": 0.446, + "step": 29167 + }, + { + "epoch": 0.8008786381109281, + "grad_norm": 0.4199569821357727, + "learning_rate": 1.3102079412984012e-05, + "loss": 0.4436, + "step": 29168 + }, + { + "epoch": 0.8009060955518945, + "grad_norm": 0.43325966596603394, + "learning_rate": 1.3101668822533018e-05, + "loss": 0.5489, + "step": 29169 + }, + { + "epoch": 0.8009335529928611, + "grad_norm": 0.44219517707824707, + "learning_rate": 1.310125822629642e-05, + "loss": 0.446, + "step": 29170 + }, + { + "epoch": 0.8009610104338276, + "grad_norm": 0.39357301592826843, + "learning_rate": 1.3100847624274988e-05, + "loss": 0.4528, + "step": 29171 + }, + { + "epoch": 0.800988467874794, + "grad_norm": 0.37682464718818665, + "learning_rate": 1.3100437016469485e-05, + "loss": 0.5273, + "step": 29172 + }, + { + "epoch": 0.8010159253157606, + "grad_norm": 0.40211084485054016, + "learning_rate": 1.3100026402880677e-05, + "loss": 0.4865, + "step": 29173 + }, + { + "epoch": 0.801043382756727, + "grad_norm": 0.5158193111419678, + "learning_rate": 1.309961578350933e-05, + "loss": 0.4707, + "step": 29174 + }, + { + "epoch": 0.8010708401976936, + "grad_norm": 0.40495765209198, + "learning_rate": 1.309920515835621e-05, + "loss": 0.3965, + "step": 29175 + }, + { + "epoch": 0.80109829763866, + "grad_norm": 0.5107333660125732, + "learning_rate": 1.3098794527422086e-05, + "loss": 0.4284, + "step": 29176 + }, + { + "epoch": 0.8011257550796266, + "grad_norm": 0.3299923837184906, + "learning_rate": 1.3098383890707718e-05, + "loss": 0.4153, + "step": 29177 + }, + { + "epoch": 0.8011532125205931, + "grad_norm": 0.36808720231056213, + "learning_rate": 1.3097973248213876e-05, + "loss": 0.5153, + "step": 29178 + }, + { + "epoch": 0.8011806699615596, + "grad_norm": 0.4672467112541199, + "learning_rate": 1.3097562599941323e-05, + "loss": 0.4629, + "step": 29179 + }, + { + "epoch": 0.8012081274025261, + "grad_norm": 0.4379623532295227, + "learning_rate": 1.3097151945890832e-05, + "loss": 0.4771, + "step": 29180 + }, + { + "epoch": 0.8012355848434926, + "grad_norm": 0.5127833485603333, + "learning_rate": 1.3096741286063162e-05, + "loss": 0.5206, + "step": 29181 + }, + { + "epoch": 0.8012630422844591, + "grad_norm": 0.36670196056365967, + "learning_rate": 1.3096330620459078e-05, + "loss": 0.5611, + "step": 29182 + }, + { + "epoch": 0.8012904997254255, + "grad_norm": 0.4481138288974762, + "learning_rate": 1.3095919949079355e-05, + "loss": 0.5247, + "step": 29183 + }, + { + "epoch": 0.8013179571663921, + "grad_norm": 0.3792518973350525, + "learning_rate": 1.3095509271924747e-05, + "loss": 0.4895, + "step": 29184 + }, + { + "epoch": 0.8013454146073586, + "grad_norm": 0.3921165466308594, + "learning_rate": 1.309509858899603e-05, + "loss": 0.5042, + "step": 29185 + }, + { + "epoch": 0.8013728720483251, + "grad_norm": 0.40312889218330383, + "learning_rate": 1.3094687900293965e-05, + "loss": 0.4476, + "step": 29186 + }, + { + "epoch": 0.8014003294892916, + "grad_norm": 0.43470439314842224, + "learning_rate": 1.309427720581932e-05, + "loss": 0.5179, + "step": 29187 + }, + { + "epoch": 0.8014277869302581, + "grad_norm": 0.35027629137039185, + "learning_rate": 1.309386650557286e-05, + "loss": 0.4767, + "step": 29188 + }, + { + "epoch": 0.8014552443712246, + "grad_norm": 0.39299920201301575, + "learning_rate": 1.3093455799555352e-05, + "loss": 0.4734, + "step": 29189 + }, + { + "epoch": 0.8014827018121911, + "grad_norm": 0.9287964701652527, + "learning_rate": 1.3093045087767561e-05, + "loss": 0.5444, + "step": 29190 + }, + { + "epoch": 0.8015101592531576, + "grad_norm": 0.37981459498405457, + "learning_rate": 1.3092634370210258e-05, + "loss": 0.5016, + "step": 29191 + }, + { + "epoch": 0.8015376166941242, + "grad_norm": 0.37807193398475647, + "learning_rate": 1.3092223646884198e-05, + "loss": 0.4492, + "step": 29192 + }, + { + "epoch": 0.8015650741350906, + "grad_norm": 0.41267430782318115, + "learning_rate": 1.3091812917790158e-05, + "loss": 0.5157, + "step": 29193 + }, + { + "epoch": 0.8015925315760571, + "grad_norm": 0.7747998833656311, + "learning_rate": 1.3091402182928901e-05, + "loss": 0.4798, + "step": 29194 + }, + { + "epoch": 0.8016199890170236, + "grad_norm": 0.3421415090560913, + "learning_rate": 1.3090991442301189e-05, + "loss": 0.4514, + "step": 29195 + }, + { + "epoch": 0.8016474464579901, + "grad_norm": 0.4311414659023285, + "learning_rate": 1.3090580695907794e-05, + "loss": 0.5359, + "step": 29196 + }, + { + "epoch": 0.8016749038989566, + "grad_norm": 0.3741026818752289, + "learning_rate": 1.3090169943749475e-05, + "loss": 0.4867, + "step": 29197 + }, + { + "epoch": 0.8017023613399231, + "grad_norm": 0.3668615221977234, + "learning_rate": 1.3089759185827008e-05, + "loss": 0.4267, + "step": 29198 + }, + { + "epoch": 0.8017298187808897, + "grad_norm": 0.4216841459274292, + "learning_rate": 1.3089348422141153e-05, + "loss": 0.615, + "step": 29199 + }, + { + "epoch": 0.8017572762218561, + "grad_norm": 0.578728437423706, + "learning_rate": 1.3088937652692675e-05, + "loss": 0.4749, + "step": 29200 + }, + { + "epoch": 0.8017847336628227, + "grad_norm": 0.3730204403400421, + "learning_rate": 1.3088526877482343e-05, + "loss": 0.4474, + "step": 29201 + }, + { + "epoch": 0.8018121911037891, + "grad_norm": 0.3595421314239502, + "learning_rate": 1.3088116096510924e-05, + "loss": 0.5449, + "step": 29202 + }, + { + "epoch": 0.8018396485447556, + "grad_norm": 0.3826541602611542, + "learning_rate": 1.3087705309779182e-05, + "loss": 0.5341, + "step": 29203 + }, + { + "epoch": 0.8018671059857221, + "grad_norm": 0.42042264342308044, + "learning_rate": 1.3087294517287882e-05, + "loss": 0.5808, + "step": 29204 + }, + { + "epoch": 0.8018945634266886, + "grad_norm": 0.4367527663707733, + "learning_rate": 1.3086883719037797e-05, + "loss": 0.5858, + "step": 29205 + }, + { + "epoch": 0.8019220208676552, + "grad_norm": 0.3621957302093506, + "learning_rate": 1.3086472915029687e-05, + "loss": 0.4595, + "step": 29206 + }, + { + "epoch": 0.8019494783086216, + "grad_norm": 0.35541832447052, + "learning_rate": 1.308606210526432e-05, + "loss": 0.5087, + "step": 29207 + }, + { + "epoch": 0.8019769357495882, + "grad_norm": 0.5034292936325073, + "learning_rate": 1.3085651289742463e-05, + "loss": 0.6516, + "step": 29208 + }, + { + "epoch": 0.8020043931905546, + "grad_norm": 0.3400304317474365, + "learning_rate": 1.3085240468464877e-05, + "loss": 0.4546, + "step": 29209 + }, + { + "epoch": 0.8020318506315212, + "grad_norm": 0.36553654074668884, + "learning_rate": 1.3084829641432337e-05, + "loss": 0.476, + "step": 29210 + }, + { + "epoch": 0.8020593080724876, + "grad_norm": 0.3430170714855194, + "learning_rate": 1.3084418808645604e-05, + "loss": 0.5083, + "step": 29211 + }, + { + "epoch": 0.8020867655134541, + "grad_norm": 0.3544014096260071, + "learning_rate": 1.3084007970105445e-05, + "loss": 0.5695, + "step": 29212 + }, + { + "epoch": 0.8021142229544207, + "grad_norm": 0.4599895477294922, + "learning_rate": 1.3083597125812629e-05, + "loss": 0.4948, + "step": 29213 + }, + { + "epoch": 0.8021416803953871, + "grad_norm": 0.4548998773097992, + "learning_rate": 1.3083186275767916e-05, + "loss": 0.5179, + "step": 29214 + }, + { + "epoch": 0.8021691378363537, + "grad_norm": 0.41896748542785645, + "learning_rate": 1.308277541997208e-05, + "loss": 0.5401, + "step": 29215 + }, + { + "epoch": 0.8021965952773201, + "grad_norm": 0.3558630645275116, + "learning_rate": 1.3082364558425885e-05, + "loss": 0.4145, + "step": 29216 + }, + { + "epoch": 0.8022240527182867, + "grad_norm": 0.3833305537700653, + "learning_rate": 1.3081953691130092e-05, + "loss": 0.4613, + "step": 29217 + }, + { + "epoch": 0.8022515101592531, + "grad_norm": 0.3405984938144684, + "learning_rate": 1.3081542818085476e-05, + "loss": 0.3917, + "step": 29218 + }, + { + "epoch": 0.8022789676002197, + "grad_norm": 0.3519830107688904, + "learning_rate": 1.3081131939292795e-05, + "loss": 0.4249, + "step": 29219 + }, + { + "epoch": 0.8023064250411862, + "grad_norm": 0.4106930196285248, + "learning_rate": 1.3080721054752824e-05, + "loss": 0.5247, + "step": 29220 + }, + { + "epoch": 0.8023338824821526, + "grad_norm": 0.368289589881897, + "learning_rate": 1.308031016446632e-05, + "loss": 0.5197, + "step": 29221 + }, + { + "epoch": 0.8023613399231192, + "grad_norm": 0.4990631341934204, + "learning_rate": 1.3079899268434057e-05, + "loss": 0.5068, + "step": 29222 + }, + { + "epoch": 0.8023887973640856, + "grad_norm": 0.4203818440437317, + "learning_rate": 1.3079488366656801e-05, + "loss": 0.5577, + "step": 29223 + }, + { + "epoch": 0.8024162548050522, + "grad_norm": 0.3579583764076233, + "learning_rate": 1.3079077459135314e-05, + "loss": 0.5357, + "step": 29224 + }, + { + "epoch": 0.8024437122460186, + "grad_norm": 0.3787112832069397, + "learning_rate": 1.3078666545870367e-05, + "loss": 0.5438, + "step": 29225 + }, + { + "epoch": 0.8024711696869852, + "grad_norm": 0.35144075751304626, + "learning_rate": 1.307825562686272e-05, + "loss": 0.4945, + "step": 29226 + }, + { + "epoch": 0.8024986271279517, + "grad_norm": 0.38969871401786804, + "learning_rate": 1.3077844702113148e-05, + "loss": 0.5285, + "step": 29227 + }, + { + "epoch": 0.8025260845689182, + "grad_norm": 0.3513358235359192, + "learning_rate": 1.3077433771622412e-05, + "loss": 0.4304, + "step": 29228 + }, + { + "epoch": 0.8025535420098847, + "grad_norm": 0.3572852611541748, + "learning_rate": 1.3077022835391278e-05, + "loss": 0.5002, + "step": 29229 + }, + { + "epoch": 0.8025809994508512, + "grad_norm": 0.42672204971313477, + "learning_rate": 1.3076611893420516e-05, + "loss": 0.5599, + "step": 29230 + }, + { + "epoch": 0.8026084568918177, + "grad_norm": 0.36939480900764465, + "learning_rate": 1.3076200945710888e-05, + "loss": 0.5116, + "step": 29231 + }, + { + "epoch": 0.8026359143327841, + "grad_norm": 0.4221998453140259, + "learning_rate": 1.3075789992263168e-05, + "loss": 0.5114, + "step": 29232 + }, + { + "epoch": 0.8026633717737507, + "grad_norm": 0.40252476930618286, + "learning_rate": 1.3075379033078117e-05, + "loss": 0.5017, + "step": 29233 + }, + { + "epoch": 0.8026908292147172, + "grad_norm": 0.4087562561035156, + "learning_rate": 1.30749680681565e-05, + "loss": 0.534, + "step": 29234 + }, + { + "epoch": 0.8027182866556837, + "grad_norm": 0.4063475728034973, + "learning_rate": 1.307455709749909e-05, + "loss": 0.5283, + "step": 29235 + }, + { + "epoch": 0.8027457440966502, + "grad_norm": 0.4030974209308624, + "learning_rate": 1.3074146121106646e-05, + "loss": 0.5429, + "step": 29236 + }, + { + "epoch": 0.8027732015376167, + "grad_norm": 0.37640929222106934, + "learning_rate": 1.307373513897994e-05, + "loss": 0.4406, + "step": 29237 + }, + { + "epoch": 0.8028006589785832, + "grad_norm": 0.3428286612033844, + "learning_rate": 1.3073324151119737e-05, + "loss": 0.4398, + "step": 29238 + }, + { + "epoch": 0.8028281164195497, + "grad_norm": 0.3896891176700592, + "learning_rate": 1.3072913157526804e-05, + "loss": 0.4859, + "step": 29239 + }, + { + "epoch": 0.8028555738605162, + "grad_norm": 0.398946613073349, + "learning_rate": 1.3072502158201905e-05, + "loss": 0.4983, + "step": 29240 + }, + { + "epoch": 0.8028830313014828, + "grad_norm": 0.4389644265174866, + "learning_rate": 1.307209115314581e-05, + "loss": 0.4895, + "step": 29241 + }, + { + "epoch": 0.8029104887424492, + "grad_norm": 0.36754414439201355, + "learning_rate": 1.3071680142359287e-05, + "loss": 0.5013, + "step": 29242 + }, + { + "epoch": 0.8029379461834157, + "grad_norm": 0.3834399878978729, + "learning_rate": 1.3071269125843099e-05, + "loss": 0.4652, + "step": 29243 + }, + { + "epoch": 0.8029654036243822, + "grad_norm": 0.40663254261016846, + "learning_rate": 1.307085810359801e-05, + "loss": 0.4608, + "step": 29244 + }, + { + "epoch": 0.8029928610653487, + "grad_norm": 0.4026694595813751, + "learning_rate": 1.3070447075624795e-05, + "loss": 0.5746, + "step": 29245 + }, + { + "epoch": 0.8030203185063152, + "grad_norm": 0.4021170139312744, + "learning_rate": 1.3070036041924213e-05, + "loss": 0.5076, + "step": 29246 + }, + { + "epoch": 0.8030477759472817, + "grad_norm": 0.3857075273990631, + "learning_rate": 1.3069625002497037e-05, + "loss": 0.4789, + "step": 29247 + }, + { + "epoch": 0.8030752333882483, + "grad_norm": 0.40717148780822754, + "learning_rate": 1.3069213957344029e-05, + "loss": 0.5332, + "step": 29248 + }, + { + "epoch": 0.8031026908292147, + "grad_norm": 0.5499364733695984, + "learning_rate": 1.3068802906465956e-05, + "loss": 0.4673, + "step": 29249 + }, + { + "epoch": 0.8031301482701813, + "grad_norm": 0.416138619184494, + "learning_rate": 1.3068391849863588e-05, + "loss": 0.4311, + "step": 29250 + }, + { + "epoch": 0.8031576057111477, + "grad_norm": 0.44073930382728577, + "learning_rate": 1.3067980787537691e-05, + "loss": 0.491, + "step": 29251 + }, + { + "epoch": 0.8031850631521142, + "grad_norm": 0.40688058733940125, + "learning_rate": 1.3067569719489029e-05, + "loss": 0.5427, + "step": 29252 + }, + { + "epoch": 0.8032125205930807, + "grad_norm": 0.36823633313179016, + "learning_rate": 1.3067158645718372e-05, + "loss": 0.4854, + "step": 29253 + }, + { + "epoch": 0.8032399780340472, + "grad_norm": 0.3807859420776367, + "learning_rate": 1.3066747566226484e-05, + "loss": 0.4411, + "step": 29254 + }, + { + "epoch": 0.8032674354750138, + "grad_norm": 0.3783659338951111, + "learning_rate": 1.3066336481014133e-05, + "loss": 0.5111, + "step": 29255 + }, + { + "epoch": 0.8032948929159802, + "grad_norm": 0.39556705951690674, + "learning_rate": 1.3065925390082086e-05, + "loss": 0.5307, + "step": 29256 + }, + { + "epoch": 0.8033223503569468, + "grad_norm": 0.4396951198577881, + "learning_rate": 1.3065514293431107e-05, + "loss": 0.4986, + "step": 29257 + }, + { + "epoch": 0.8033498077979132, + "grad_norm": 2.4530866146087646, + "learning_rate": 1.306510319106197e-05, + "loss": 0.5037, + "step": 29258 + }, + { + "epoch": 0.8033772652388798, + "grad_norm": 0.3830241560935974, + "learning_rate": 1.3064692082975432e-05, + "loss": 0.5047, + "step": 29259 + }, + { + "epoch": 0.8034047226798462, + "grad_norm": 0.37599197030067444, + "learning_rate": 1.3064280969172269e-05, + "loss": 0.5248, + "step": 29260 + }, + { + "epoch": 0.8034321801208127, + "grad_norm": 0.40679746866226196, + "learning_rate": 1.3063869849653244e-05, + "loss": 0.4448, + "step": 29261 + }, + { + "epoch": 0.8034596375617793, + "grad_norm": 0.42717593908309937, + "learning_rate": 1.3063458724419122e-05, + "loss": 0.563, + "step": 29262 + }, + { + "epoch": 0.8034870950027457, + "grad_norm": 0.3784163296222687, + "learning_rate": 1.3063047593470675e-05, + "loss": 0.5172, + "step": 29263 + }, + { + "epoch": 0.8035145524437123, + "grad_norm": 0.43950018286705017, + "learning_rate": 1.3062636456808661e-05, + "loss": 0.5266, + "step": 29264 + }, + { + "epoch": 0.8035420098846787, + "grad_norm": 0.39761871099472046, + "learning_rate": 1.3062225314433858e-05, + "loss": 0.4879, + "step": 29265 + }, + { + "epoch": 0.8035694673256453, + "grad_norm": 0.3379712998867035, + "learning_rate": 1.3061814166347026e-05, + "loss": 0.3819, + "step": 29266 + }, + { + "epoch": 0.8035969247666117, + "grad_norm": 0.35254883766174316, + "learning_rate": 1.3061403012548932e-05, + "loss": 0.4624, + "step": 29267 + }, + { + "epoch": 0.8036243822075783, + "grad_norm": 0.3758380711078644, + "learning_rate": 1.3060991853040345e-05, + "loss": 0.5109, + "step": 29268 + }, + { + "epoch": 0.8036518396485448, + "grad_norm": 0.36637696623802185, + "learning_rate": 1.3060580687822033e-05, + "loss": 0.5015, + "step": 29269 + }, + { + "epoch": 0.8036792970895112, + "grad_norm": 0.3569898307323456, + "learning_rate": 1.3060169516894759e-05, + "loss": 0.4998, + "step": 29270 + }, + { + "epoch": 0.8037067545304778, + "grad_norm": 0.3481728136539459, + "learning_rate": 1.3059758340259293e-05, + "loss": 0.5021, + "step": 29271 + }, + { + "epoch": 0.8037342119714442, + "grad_norm": 0.4171615540981293, + "learning_rate": 1.30593471579164e-05, + "loss": 0.4763, + "step": 29272 + }, + { + "epoch": 0.8037616694124108, + "grad_norm": 0.3932040333747864, + "learning_rate": 1.3058935969866848e-05, + "loss": 0.5428, + "step": 29273 + }, + { + "epoch": 0.8037891268533772, + "grad_norm": 0.35196009278297424, + "learning_rate": 1.3058524776111405e-05, + "loss": 0.4913, + "step": 29274 + }, + { + "epoch": 0.8038165842943438, + "grad_norm": 0.3961043655872345, + "learning_rate": 1.3058113576650837e-05, + "loss": 0.402, + "step": 29275 + }, + { + "epoch": 0.8038440417353103, + "grad_norm": 0.3783618211746216, + "learning_rate": 1.3057702371485912e-05, + "loss": 0.532, + "step": 29276 + }, + { + "epoch": 0.8038714991762768, + "grad_norm": 0.4880228340625763, + "learning_rate": 1.3057291160617394e-05, + "loss": 0.5304, + "step": 29277 + }, + { + "epoch": 0.8038989566172433, + "grad_norm": 0.3352951407432556, + "learning_rate": 1.3056879944046054e-05, + "loss": 0.4647, + "step": 29278 + }, + { + "epoch": 0.8039264140582097, + "grad_norm": 0.41419297456741333, + "learning_rate": 1.305646872177266e-05, + "loss": 0.5851, + "step": 29279 + }, + { + "epoch": 0.8039538714991763, + "grad_norm": 0.3965702950954437, + "learning_rate": 1.305605749379797e-05, + "loss": 0.4502, + "step": 29280 + }, + { + "epoch": 0.8039813289401427, + "grad_norm": 0.42804551124572754, + "learning_rate": 1.3055646260122763e-05, + "loss": 0.5886, + "step": 29281 + }, + { + "epoch": 0.8040087863811093, + "grad_norm": 0.3650549650192261, + "learning_rate": 1.3055235020747797e-05, + "loss": 0.4509, + "step": 29282 + }, + { + "epoch": 0.8040362438220758, + "grad_norm": 0.34479251503944397, + "learning_rate": 1.3054823775673844e-05, + "loss": 0.4279, + "step": 29283 + }, + { + "epoch": 0.8040637012630423, + "grad_norm": 0.4186252951622009, + "learning_rate": 1.305441252490167e-05, + "loss": 0.3871, + "step": 29284 + }, + { + "epoch": 0.8040911587040088, + "grad_norm": 0.3462826609611511, + "learning_rate": 1.3054001268432039e-05, + "loss": 0.4502, + "step": 29285 + }, + { + "epoch": 0.8041186161449753, + "grad_norm": 0.44076478481292725, + "learning_rate": 1.3053590006265724e-05, + "loss": 0.5235, + "step": 29286 + }, + { + "epoch": 0.8041460735859418, + "grad_norm": 0.3502502143383026, + "learning_rate": 1.305317873840349e-05, + "loss": 0.4168, + "step": 29287 + }, + { + "epoch": 0.8041735310269083, + "grad_norm": 0.38856756687164307, + "learning_rate": 1.3052767464846099e-05, + "loss": 0.4506, + "step": 29288 + }, + { + "epoch": 0.8042009884678748, + "grad_norm": 0.41032955050468445, + "learning_rate": 1.3052356185594326e-05, + "loss": 0.5799, + "step": 29289 + }, + { + "epoch": 0.8042284459088413, + "grad_norm": 0.3966670632362366, + "learning_rate": 1.305194490064893e-05, + "loss": 0.5255, + "step": 29290 + }, + { + "epoch": 0.8042559033498078, + "grad_norm": 0.40781712532043457, + "learning_rate": 1.3051533610010688e-05, + "loss": 0.5297, + "step": 29291 + }, + { + "epoch": 0.8042833607907743, + "grad_norm": 0.3736419379711151, + "learning_rate": 1.305112231368036e-05, + "loss": 0.4154, + "step": 29292 + }, + { + "epoch": 0.8043108182317408, + "grad_norm": 0.36889439821243286, + "learning_rate": 1.3050711011658714e-05, + "loss": 0.5015, + "step": 29293 + }, + { + "epoch": 0.8043382756727073, + "grad_norm": 0.39999228715896606, + "learning_rate": 1.3050299703946521e-05, + "loss": 0.5293, + "step": 29294 + }, + { + "epoch": 0.8043657331136738, + "grad_norm": 0.38545048236846924, + "learning_rate": 1.3049888390544541e-05, + "loss": 0.5369, + "step": 29295 + }, + { + "epoch": 0.8043931905546403, + "grad_norm": 0.4583372473716736, + "learning_rate": 1.304947707145355e-05, + "loss": 0.5271, + "step": 29296 + }, + { + "epoch": 0.8044206479956069, + "grad_norm": 0.36652520298957825, + "learning_rate": 1.304906574667431e-05, + "loss": 0.4821, + "step": 29297 + }, + { + "epoch": 0.8044481054365733, + "grad_norm": 0.3696240186691284, + "learning_rate": 1.3048654416207587e-05, + "loss": 0.4932, + "step": 29298 + }, + { + "epoch": 0.8044755628775399, + "grad_norm": 0.3374003469944, + "learning_rate": 1.3048243080054152e-05, + "loss": 0.4159, + "step": 29299 + }, + { + "epoch": 0.8045030203185063, + "grad_norm": 0.42727285623550415, + "learning_rate": 1.304783173821477e-05, + "loss": 0.5362, + "step": 29300 + }, + { + "epoch": 0.8045304777594728, + "grad_norm": 0.3621295988559723, + "learning_rate": 1.3047420390690211e-05, + "loss": 0.4568, + "step": 29301 + }, + { + "epoch": 0.8045579352004393, + "grad_norm": 0.3824690878391266, + "learning_rate": 1.304700903748124e-05, + "loss": 0.4907, + "step": 29302 + }, + { + "epoch": 0.8045853926414058, + "grad_norm": 0.42043012380599976, + "learning_rate": 1.3046597678588621e-05, + "loss": 0.5117, + "step": 29303 + }, + { + "epoch": 0.8046128500823724, + "grad_norm": 0.3988417387008667, + "learning_rate": 1.3046186314013129e-05, + "loss": 0.4722, + "step": 29304 + }, + { + "epoch": 0.8046403075233388, + "grad_norm": 0.45411643385887146, + "learning_rate": 1.3045774943755526e-05, + "loss": 0.4466, + "step": 29305 + }, + { + "epoch": 0.8046677649643054, + "grad_norm": 0.37517988681793213, + "learning_rate": 1.304536356781658e-05, + "loss": 0.4144, + "step": 29306 + }, + { + "epoch": 0.8046952224052718, + "grad_norm": 0.3832337260246277, + "learning_rate": 1.3044952186197063e-05, + "loss": 0.4692, + "step": 29307 + }, + { + "epoch": 0.8047226798462384, + "grad_norm": 0.3998803496360779, + "learning_rate": 1.3044540798897733e-05, + "loss": 0.4552, + "step": 29308 + }, + { + "epoch": 0.8047501372872048, + "grad_norm": 0.3992183804512024, + "learning_rate": 1.3044129405919366e-05, + "loss": 0.4342, + "step": 29309 + }, + { + "epoch": 0.8047775947281713, + "grad_norm": 0.3600234389305115, + "learning_rate": 1.3043718007262724e-05, + "loss": 0.4406, + "step": 29310 + }, + { + "epoch": 0.8048050521691379, + "grad_norm": 0.3486102223396301, + "learning_rate": 1.3043306602928576e-05, + "loss": 0.5301, + "step": 29311 + }, + { + "epoch": 0.8048325096101043, + "grad_norm": 0.4384933114051819, + "learning_rate": 1.3042895192917693e-05, + "loss": 0.5678, + "step": 29312 + }, + { + "epoch": 0.8048599670510709, + "grad_norm": 0.4090483784675598, + "learning_rate": 1.3042483777230837e-05, + "loss": 0.4957, + "step": 29313 + }, + { + "epoch": 0.8048874244920373, + "grad_norm": 0.3703594505786896, + "learning_rate": 1.3042072355868779e-05, + "loss": 0.4884, + "step": 29314 + }, + { + "epoch": 0.8049148819330039, + "grad_norm": 0.4462020993232727, + "learning_rate": 1.3041660928832282e-05, + "loss": 0.4828, + "step": 29315 + }, + { + "epoch": 0.8049423393739703, + "grad_norm": 0.37689974904060364, + "learning_rate": 1.3041249496122119e-05, + "loss": 0.617, + "step": 29316 + }, + { + "epoch": 0.8049697968149369, + "grad_norm": 0.36636969447135925, + "learning_rate": 1.3040838057739056e-05, + "loss": 0.4816, + "step": 29317 + }, + { + "epoch": 0.8049972542559034, + "grad_norm": 0.3613339066505432, + "learning_rate": 1.3040426613683858e-05, + "loss": 0.4588, + "step": 29318 + }, + { + "epoch": 0.8050247116968698, + "grad_norm": 0.38181352615356445, + "learning_rate": 1.3040015163957294e-05, + "loss": 0.5458, + "step": 29319 + }, + { + "epoch": 0.8050521691378364, + "grad_norm": 0.41048315167427063, + "learning_rate": 1.3039603708560133e-05, + "loss": 0.5633, + "step": 29320 + }, + { + "epoch": 0.8050796265788028, + "grad_norm": 0.3927260637283325, + "learning_rate": 1.303919224749314e-05, + "loss": 0.5098, + "step": 29321 + }, + { + "epoch": 0.8051070840197694, + "grad_norm": 0.3640190064907074, + "learning_rate": 1.3038780780757083e-05, + "loss": 0.5287, + "step": 29322 + }, + { + "epoch": 0.8051345414607358, + "grad_norm": 0.4234021008014679, + "learning_rate": 1.3038369308352731e-05, + "loss": 0.4428, + "step": 29323 + }, + { + "epoch": 0.8051619989017024, + "grad_norm": 0.3672170042991638, + "learning_rate": 1.303795783028085e-05, + "loss": 0.4467, + "step": 29324 + }, + { + "epoch": 0.8051894563426688, + "grad_norm": 0.3756997883319855, + "learning_rate": 1.3037546346542206e-05, + "loss": 0.5332, + "step": 29325 + }, + { + "epoch": 0.8052169137836354, + "grad_norm": 0.4669555723667145, + "learning_rate": 1.303713485713757e-05, + "loss": 0.5059, + "step": 29326 + }, + { + "epoch": 0.8052443712246019, + "grad_norm": 0.4370321035385132, + "learning_rate": 1.303672336206771e-05, + "loss": 0.4102, + "step": 29327 + }, + { + "epoch": 0.8052718286655683, + "grad_norm": 0.41028234362602234, + "learning_rate": 1.303631186133339e-05, + "loss": 0.5214, + "step": 29328 + }, + { + "epoch": 0.8052992861065349, + "grad_norm": 0.42239952087402344, + "learning_rate": 1.3035900354935379e-05, + "loss": 0.5181, + "step": 29329 + }, + { + "epoch": 0.8053267435475013, + "grad_norm": 0.3377436101436615, + "learning_rate": 1.3035488842874447e-05, + "loss": 0.4914, + "step": 29330 + }, + { + "epoch": 0.8053542009884679, + "grad_norm": 0.4023153483867645, + "learning_rate": 1.303507732515136e-05, + "loss": 0.4867, + "step": 29331 + }, + { + "epoch": 0.8053816584294343, + "grad_norm": 0.4480002820491791, + "learning_rate": 1.303466580176688e-05, + "loss": 0.5036, + "step": 29332 + }, + { + "epoch": 0.8054091158704009, + "grad_norm": 0.3864193260669708, + "learning_rate": 1.3034254272721784e-05, + "loss": 0.5176, + "step": 29333 + }, + { + "epoch": 0.8054365733113674, + "grad_norm": 0.44890260696411133, + "learning_rate": 1.3033842738016833e-05, + "loss": 0.5265, + "step": 29334 + }, + { + "epoch": 0.8054640307523339, + "grad_norm": 0.4213705360889435, + "learning_rate": 1.30334311976528e-05, + "loss": 0.4608, + "step": 29335 + }, + { + "epoch": 0.8054914881933004, + "grad_norm": 0.3722233176231384, + "learning_rate": 1.3033019651630448e-05, + "loss": 0.498, + "step": 29336 + }, + { + "epoch": 0.8055189456342668, + "grad_norm": 0.49473217129707336, + "learning_rate": 1.3032608099950545e-05, + "loss": 0.4858, + "step": 29337 + }, + { + "epoch": 0.8055464030752334, + "grad_norm": 0.35101282596588135, + "learning_rate": 1.3032196542613862e-05, + "loss": 0.4626, + "step": 29338 + }, + { + "epoch": 0.8055738605161998, + "grad_norm": 0.37192296981811523, + "learning_rate": 1.303178497962116e-05, + "loss": 0.5281, + "step": 29339 + }, + { + "epoch": 0.8056013179571664, + "grad_norm": 0.38906019926071167, + "learning_rate": 1.3031373410973218e-05, + "loss": 0.5101, + "step": 29340 + }, + { + "epoch": 0.8056287753981329, + "grad_norm": 0.41012558341026306, + "learning_rate": 1.3030961836670794e-05, + "loss": 0.5216, + "step": 29341 + }, + { + "epoch": 0.8056562328390994, + "grad_norm": 0.4278920292854309, + "learning_rate": 1.3030550256714658e-05, + "loss": 0.601, + "step": 29342 + }, + { + "epoch": 0.8056836902800659, + "grad_norm": 0.42188629508018494, + "learning_rate": 1.3030138671105581e-05, + "loss": 0.539, + "step": 29343 + }, + { + "epoch": 0.8057111477210324, + "grad_norm": 0.36156055331230164, + "learning_rate": 1.3029727079844325e-05, + "loss": 0.5271, + "step": 29344 + }, + { + "epoch": 0.8057386051619989, + "grad_norm": 0.3534310758113861, + "learning_rate": 1.3029315482931664e-05, + "loss": 0.4318, + "step": 29345 + }, + { + "epoch": 0.8057660626029653, + "grad_norm": 0.3930320143699646, + "learning_rate": 1.3028903880368362e-05, + "loss": 0.4766, + "step": 29346 + }, + { + "epoch": 0.8057935200439319, + "grad_norm": 0.39342641830444336, + "learning_rate": 1.3028492272155186e-05, + "loss": 0.4411, + "step": 29347 + }, + { + "epoch": 0.8058209774848984, + "grad_norm": 0.4272310137748718, + "learning_rate": 1.3028080658292907e-05, + "loss": 0.5005, + "step": 29348 + }, + { + "epoch": 0.8058484349258649, + "grad_norm": 0.4306212067604065, + "learning_rate": 1.3027669038782294e-05, + "loss": 0.4856, + "step": 29349 + }, + { + "epoch": 0.8058758923668314, + "grad_norm": 0.4168049097061157, + "learning_rate": 1.3027257413624105e-05, + "loss": 0.5051, + "step": 29350 + }, + { + "epoch": 0.8059033498077979, + "grad_norm": 0.48116442561149597, + "learning_rate": 1.302684578281912e-05, + "loss": 0.542, + "step": 29351 + }, + { + "epoch": 0.8059308072487644, + "grad_norm": 0.36561664938926697, + "learning_rate": 1.3026434146368099e-05, + "loss": 0.5119, + "step": 29352 + }, + { + "epoch": 0.8059582646897309, + "grad_norm": 0.43488359451293945, + "learning_rate": 1.3026022504271814e-05, + "loss": 0.431, + "step": 29353 + }, + { + "epoch": 0.8059857221306974, + "grad_norm": 0.3575337827205658, + "learning_rate": 1.3025610856531033e-05, + "loss": 0.5065, + "step": 29354 + }, + { + "epoch": 0.806013179571664, + "grad_norm": 0.3681592345237732, + "learning_rate": 1.3025199203146516e-05, + "loss": 0.4955, + "step": 29355 + }, + { + "epoch": 0.8060406370126304, + "grad_norm": 0.3524419367313385, + "learning_rate": 1.3024787544119041e-05, + "loss": 0.4764, + "step": 29356 + }, + { + "epoch": 0.806068094453597, + "grad_norm": 0.4165794551372528, + "learning_rate": 1.3024375879449371e-05, + "loss": 0.6015, + "step": 29357 + }, + { + "epoch": 0.8060955518945634, + "grad_norm": 0.3568027913570404, + "learning_rate": 1.3023964209138275e-05, + "loss": 0.4648, + "step": 29358 + }, + { + "epoch": 0.8061230093355299, + "grad_norm": 0.37588179111480713, + "learning_rate": 1.3023552533186522e-05, + "loss": 0.618, + "step": 29359 + }, + { + "epoch": 0.8061504667764964, + "grad_norm": 0.400118887424469, + "learning_rate": 1.3023140851594877e-05, + "loss": 0.5167, + "step": 29360 + }, + { + "epoch": 0.8061779242174629, + "grad_norm": 0.4007990062236786, + "learning_rate": 1.302272916436411e-05, + "loss": 0.4874, + "step": 29361 + }, + { + "epoch": 0.8062053816584295, + "grad_norm": 0.3932684063911438, + "learning_rate": 1.302231747149499e-05, + "loss": 0.5122, + "step": 29362 + }, + { + "epoch": 0.8062328390993959, + "grad_norm": 0.3700112998485565, + "learning_rate": 1.302190577298828e-05, + "loss": 0.4661, + "step": 29363 + }, + { + "epoch": 0.8062602965403625, + "grad_norm": 0.3656933307647705, + "learning_rate": 1.3021494068844753e-05, + "loss": 0.4572, + "step": 29364 + }, + { + "epoch": 0.8062877539813289, + "grad_norm": 0.36733341217041016, + "learning_rate": 1.3021082359065173e-05, + "loss": 0.5285, + "step": 29365 + }, + { + "epoch": 0.8063152114222955, + "grad_norm": 0.34713247418403625, + "learning_rate": 1.3020670643650313e-05, + "loss": 0.4932, + "step": 29366 + }, + { + "epoch": 0.8063426688632619, + "grad_norm": 0.39993149042129517, + "learning_rate": 1.3020258922600939e-05, + "loss": 0.4571, + "step": 29367 + }, + { + "epoch": 0.8063701263042284, + "grad_norm": 0.3778378367424011, + "learning_rate": 1.3019847195917815e-05, + "loss": 0.5697, + "step": 29368 + }, + { + "epoch": 0.806397583745195, + "grad_norm": 0.3687410056591034, + "learning_rate": 1.3019435463601713e-05, + "loss": 0.4914, + "step": 29369 + }, + { + "epoch": 0.8064250411861614, + "grad_norm": 0.38451018929481506, + "learning_rate": 1.3019023725653398e-05, + "loss": 0.4369, + "step": 29370 + }, + { + "epoch": 0.806452498627128, + "grad_norm": 0.3888314962387085, + "learning_rate": 1.3018611982073646e-05, + "loss": 0.5161, + "step": 29371 + }, + { + "epoch": 0.8064799560680944, + "grad_norm": 0.40148288011550903, + "learning_rate": 1.3018200232863216e-05, + "loss": 0.3646, + "step": 29372 + }, + { + "epoch": 0.806507413509061, + "grad_norm": 0.3756755590438843, + "learning_rate": 1.301778847802288e-05, + "loss": 0.4443, + "step": 29373 + }, + { + "epoch": 0.8065348709500274, + "grad_norm": 0.3927207589149475, + "learning_rate": 1.3017376717553405e-05, + "loss": 0.5686, + "step": 29374 + }, + { + "epoch": 0.806562328390994, + "grad_norm": 0.40501537919044495, + "learning_rate": 1.3016964951455558e-05, + "loss": 0.5245, + "step": 29375 + }, + { + "epoch": 0.8065897858319605, + "grad_norm": 0.38609999418258667, + "learning_rate": 1.301655317973011e-05, + "loss": 0.4583, + "step": 29376 + }, + { + "epoch": 0.8066172432729269, + "grad_norm": 0.3541078269481659, + "learning_rate": 1.3016141402377827e-05, + "loss": 0.4032, + "step": 29377 + }, + { + "epoch": 0.8066447007138935, + "grad_norm": 0.43155819177627563, + "learning_rate": 1.3015729619399476e-05, + "loss": 0.4076, + "step": 29378 + }, + { + "epoch": 0.8066721581548599, + "grad_norm": 0.35251927375793457, + "learning_rate": 1.3015317830795827e-05, + "loss": 0.396, + "step": 29379 + }, + { + "epoch": 0.8066996155958265, + "grad_norm": 0.3832796812057495, + "learning_rate": 1.3014906036567654e-05, + "loss": 0.4579, + "step": 29380 + }, + { + "epoch": 0.8067270730367929, + "grad_norm": 0.35901930928230286, + "learning_rate": 1.3014494236715711e-05, + "loss": 0.468, + "step": 29381 + }, + { + "epoch": 0.8067545304777595, + "grad_norm": 0.36127135157585144, + "learning_rate": 1.301408243124078e-05, + "loss": 0.3762, + "step": 29382 + }, + { + "epoch": 0.806781987918726, + "grad_norm": 0.3636269271373749, + "learning_rate": 1.3013670620143619e-05, + "loss": 0.456, + "step": 29383 + }, + { + "epoch": 0.8068094453596925, + "grad_norm": 0.38814401626586914, + "learning_rate": 1.3013258803425002e-05, + "loss": 0.5559, + "step": 29384 + }, + { + "epoch": 0.806836902800659, + "grad_norm": 0.4110961854457855, + "learning_rate": 1.3012846981085695e-05, + "loss": 0.469, + "step": 29385 + }, + { + "epoch": 0.8068643602416254, + "grad_norm": 0.3730020225048065, + "learning_rate": 1.3012435153126469e-05, + "loss": 0.4556, + "step": 29386 + }, + { + "epoch": 0.806891817682592, + "grad_norm": 0.4088495671749115, + "learning_rate": 1.3012023319548088e-05, + "loss": 0.534, + "step": 29387 + }, + { + "epoch": 0.8069192751235584, + "grad_norm": 0.42812466621398926, + "learning_rate": 1.3011611480351323e-05, + "loss": 0.4689, + "step": 29388 + }, + { + "epoch": 0.806946732564525, + "grad_norm": 0.39296087622642517, + "learning_rate": 1.3011199635536942e-05, + "loss": 0.4693, + "step": 29389 + }, + { + "epoch": 0.8069741900054915, + "grad_norm": 0.3873558044433594, + "learning_rate": 1.301078778510571e-05, + "loss": 0.5128, + "step": 29390 + }, + { + "epoch": 0.807001647446458, + "grad_norm": 0.3912961483001709, + "learning_rate": 1.3010375929058401e-05, + "loss": 0.4191, + "step": 29391 + }, + { + "epoch": 0.8070291048874245, + "grad_norm": 0.47561711072921753, + "learning_rate": 1.300996406739578e-05, + "loss": 0.5744, + "step": 29392 + }, + { + "epoch": 0.807056562328391, + "grad_norm": 0.3955985903739929, + "learning_rate": 1.3009552200118612e-05, + "loss": 0.431, + "step": 29393 + }, + { + "epoch": 0.8070840197693575, + "grad_norm": 0.37683477997779846, + "learning_rate": 1.3009140327227669e-05, + "loss": 0.4321, + "step": 29394 + }, + { + "epoch": 0.807111477210324, + "grad_norm": 0.3950742185115814, + "learning_rate": 1.3008728448723721e-05, + "loss": 0.5412, + "step": 29395 + }, + { + "epoch": 0.8071389346512905, + "grad_norm": 0.5140008926391602, + "learning_rate": 1.3008316564607535e-05, + "loss": 0.4297, + "step": 29396 + }, + { + "epoch": 0.807166392092257, + "grad_norm": 0.4415270984172821, + "learning_rate": 1.3007904674879876e-05, + "loss": 0.5275, + "step": 29397 + }, + { + "epoch": 0.8071938495332235, + "grad_norm": 0.3497909605503082, + "learning_rate": 1.3007492779541515e-05, + "loss": 0.5807, + "step": 29398 + }, + { + "epoch": 0.80722130697419, + "grad_norm": 0.3741939067840576, + "learning_rate": 1.3007080878593224e-05, + "loss": 0.5668, + "step": 29399 + }, + { + "epoch": 0.8072487644151565, + "grad_norm": 0.3608841598033905, + "learning_rate": 1.3006668972035763e-05, + "loss": 0.4769, + "step": 29400 + }, + { + "epoch": 0.807276221856123, + "grad_norm": 0.39583635330200195, + "learning_rate": 1.3006257059869907e-05, + "loss": 0.4831, + "step": 29401 + }, + { + "epoch": 0.8073036792970895, + "grad_norm": 0.34256693720817566, + "learning_rate": 1.3005845142096417e-05, + "loss": 0.4392, + "step": 29402 + }, + { + "epoch": 0.807331136738056, + "grad_norm": 0.4751836657524109, + "learning_rate": 1.3005433218716075e-05, + "loss": 0.5432, + "step": 29403 + }, + { + "epoch": 0.8073585941790226, + "grad_norm": 0.42635682225227356, + "learning_rate": 1.3005021289729636e-05, + "loss": 0.4972, + "step": 29404 + }, + { + "epoch": 0.807386051619989, + "grad_norm": 0.4164731502532959, + "learning_rate": 1.300460935513787e-05, + "loss": 0.4701, + "step": 29405 + }, + { + "epoch": 0.8074135090609555, + "grad_norm": 0.41399696469306946, + "learning_rate": 1.3004197414941554e-05, + "loss": 0.4718, + "step": 29406 + }, + { + "epoch": 0.807440966501922, + "grad_norm": 0.3999139666557312, + "learning_rate": 1.300378546914145e-05, + "loss": 0.4936, + "step": 29407 + }, + { + "epoch": 0.8074684239428885, + "grad_norm": 0.4684426784515381, + "learning_rate": 1.3003373517738328e-05, + "loss": 0.5146, + "step": 29408 + }, + { + "epoch": 0.807495881383855, + "grad_norm": 0.42809706926345825, + "learning_rate": 1.3002961560732956e-05, + "loss": 0.4895, + "step": 29409 + }, + { + "epoch": 0.8075233388248215, + "grad_norm": 0.38195574283599854, + "learning_rate": 1.3002549598126097e-05, + "loss": 0.5228, + "step": 29410 + }, + { + "epoch": 0.8075507962657881, + "grad_norm": 0.3730248510837555, + "learning_rate": 1.3002137629918531e-05, + "loss": 0.4755, + "step": 29411 + }, + { + "epoch": 0.8075782537067545, + "grad_norm": 0.3368849754333496, + "learning_rate": 1.3001725656111016e-05, + "loss": 0.4233, + "step": 29412 + }, + { + "epoch": 0.8076057111477211, + "grad_norm": 0.3756151497364044, + "learning_rate": 1.3001313676704328e-05, + "loss": 0.4762, + "step": 29413 + }, + { + "epoch": 0.8076331685886875, + "grad_norm": 0.41606852412223816, + "learning_rate": 1.300090169169923e-05, + "loss": 0.5328, + "step": 29414 + }, + { + "epoch": 0.807660626029654, + "grad_norm": 0.38197484612464905, + "learning_rate": 1.3000489701096492e-05, + "loss": 0.4489, + "step": 29415 + }, + { + "epoch": 0.8076880834706205, + "grad_norm": 0.46300119161605835, + "learning_rate": 1.3000077704896885e-05, + "loss": 0.5031, + "step": 29416 + }, + { + "epoch": 0.807715540911587, + "grad_norm": 0.36390382051467896, + "learning_rate": 1.2999665703101175e-05, + "loss": 0.4843, + "step": 29417 + }, + { + "epoch": 0.8077429983525536, + "grad_norm": 0.4122953712940216, + "learning_rate": 1.2999253695710131e-05, + "loss": 0.4931, + "step": 29418 + }, + { + "epoch": 0.80777045579352, + "grad_norm": 0.4027218818664551, + "learning_rate": 1.2998841682724519e-05, + "loss": 0.5061, + "step": 29419 + }, + { + "epoch": 0.8077979132344866, + "grad_norm": 0.4687792956829071, + "learning_rate": 1.2998429664145114e-05, + "loss": 0.5133, + "step": 29420 + }, + { + "epoch": 0.807825370675453, + "grad_norm": 0.4201371371746063, + "learning_rate": 1.2998017639972678e-05, + "loss": 0.4439, + "step": 29421 + }, + { + "epoch": 0.8078528281164196, + "grad_norm": 0.3712780177593231, + "learning_rate": 1.2997605610207984e-05, + "loss": 0.4889, + "step": 29422 + }, + { + "epoch": 0.807880285557386, + "grad_norm": 0.3486365079879761, + "learning_rate": 1.2997193574851799e-05, + "loss": 0.481, + "step": 29423 + }, + { + "epoch": 0.8079077429983526, + "grad_norm": 0.3600008487701416, + "learning_rate": 1.299678153390489e-05, + "loss": 0.5144, + "step": 29424 + }, + { + "epoch": 0.8079352004393191, + "grad_norm": 0.427494078874588, + "learning_rate": 1.2996369487368026e-05, + "loss": 0.5239, + "step": 29425 + }, + { + "epoch": 0.8079626578802855, + "grad_norm": 0.4252254068851471, + "learning_rate": 1.2995957435241979e-05, + "loss": 0.4741, + "step": 29426 + }, + { + "epoch": 0.8079901153212521, + "grad_norm": 0.34764155745506287, + "learning_rate": 1.2995545377527514e-05, + "loss": 0.4178, + "step": 29427 + }, + { + "epoch": 0.8080175727622185, + "grad_norm": 0.39059314131736755, + "learning_rate": 1.29951333142254e-05, + "loss": 0.4194, + "step": 29428 + }, + { + "epoch": 0.8080450302031851, + "grad_norm": 0.3681449890136719, + "learning_rate": 1.299472124533641e-05, + "loss": 0.4661, + "step": 29429 + }, + { + "epoch": 0.8080724876441515, + "grad_norm": 0.40060120820999146, + "learning_rate": 1.2994309170861306e-05, + "loss": 0.4448, + "step": 29430 + }, + { + "epoch": 0.8080999450851181, + "grad_norm": 0.40241697430610657, + "learning_rate": 1.2993897090800859e-05, + "loss": 0.4768, + "step": 29431 + }, + { + "epoch": 0.8081274025260846, + "grad_norm": 0.3991473913192749, + "learning_rate": 1.299348500515584e-05, + "loss": 0.5595, + "step": 29432 + }, + { + "epoch": 0.808154859967051, + "grad_norm": 0.44829243421554565, + "learning_rate": 1.2993072913927016e-05, + "loss": 0.5852, + "step": 29433 + }, + { + "epoch": 0.8081823174080176, + "grad_norm": 0.39060178399086, + "learning_rate": 1.2992660817115156e-05, + "loss": 0.4811, + "step": 29434 + }, + { + "epoch": 0.808209774848984, + "grad_norm": 0.3648832440376282, + "learning_rate": 1.2992248714721028e-05, + "loss": 0.4753, + "step": 29435 + }, + { + "epoch": 0.8082372322899506, + "grad_norm": 0.43978139758110046, + "learning_rate": 1.29918366067454e-05, + "loss": 0.5131, + "step": 29436 + }, + { + "epoch": 0.808264689730917, + "grad_norm": 0.40225088596343994, + "learning_rate": 1.2991424493189041e-05, + "loss": 0.4803, + "step": 29437 + }, + { + "epoch": 0.8082921471718836, + "grad_norm": 0.3790709972381592, + "learning_rate": 1.2991012374052725e-05, + "loss": 0.4879, + "step": 29438 + }, + { + "epoch": 0.8083196046128501, + "grad_norm": 0.3985547423362732, + "learning_rate": 1.2990600249337213e-05, + "loss": 0.485, + "step": 29439 + }, + { + "epoch": 0.8083470620538166, + "grad_norm": 0.3995741307735443, + "learning_rate": 1.2990188119043276e-05, + "loss": 0.5021, + "step": 29440 + }, + { + "epoch": 0.8083745194947831, + "grad_norm": 0.37435781955718994, + "learning_rate": 1.2989775983171688e-05, + "loss": 0.4741, + "step": 29441 + }, + { + "epoch": 0.8084019769357496, + "grad_norm": 0.5141257047653198, + "learning_rate": 1.2989363841723213e-05, + "loss": 0.5813, + "step": 29442 + }, + { + "epoch": 0.8084294343767161, + "grad_norm": 0.35613977909088135, + "learning_rate": 1.2988951694698617e-05, + "loss": 0.4765, + "step": 29443 + }, + { + "epoch": 0.8084568918176825, + "grad_norm": 0.37130415439605713, + "learning_rate": 1.2988539542098675e-05, + "loss": 0.4947, + "step": 29444 + }, + { + "epoch": 0.8084843492586491, + "grad_norm": 0.4218887388706207, + "learning_rate": 1.298812738392415e-05, + "loss": 0.4629, + "step": 29445 + }, + { + "epoch": 0.8085118066996156, + "grad_norm": 0.4021981656551361, + "learning_rate": 1.2987715220175817e-05, + "loss": 0.6318, + "step": 29446 + }, + { + "epoch": 0.8085392641405821, + "grad_norm": 0.44219136238098145, + "learning_rate": 1.2987303050854439e-05, + "loss": 0.4955, + "step": 29447 + }, + { + "epoch": 0.8085667215815486, + "grad_norm": 0.39716288447380066, + "learning_rate": 1.2986890875960786e-05, + "loss": 0.511, + "step": 29448 + }, + { + "epoch": 0.8085941790225151, + "grad_norm": 0.38068515062332153, + "learning_rate": 1.2986478695495632e-05, + "loss": 0.5056, + "step": 29449 + }, + { + "epoch": 0.8086216364634816, + "grad_norm": 0.35609909892082214, + "learning_rate": 1.2986066509459741e-05, + "loss": 0.4288, + "step": 29450 + }, + { + "epoch": 0.8086490939044481, + "grad_norm": 0.3484458029270172, + "learning_rate": 1.2985654317853881e-05, + "loss": 0.4781, + "step": 29451 + }, + { + "epoch": 0.8086765513454146, + "grad_norm": 0.41526639461517334, + "learning_rate": 1.2985242120678824e-05, + "loss": 0.469, + "step": 29452 + }, + { + "epoch": 0.8087040087863812, + "grad_norm": 0.3589704930782318, + "learning_rate": 1.2984829917935338e-05, + "loss": 0.4729, + "step": 29453 + }, + { + "epoch": 0.8087314662273476, + "grad_norm": 0.42792582511901855, + "learning_rate": 1.2984417709624193e-05, + "loss": 0.5534, + "step": 29454 + }, + { + "epoch": 0.8087589236683141, + "grad_norm": 0.48579084873199463, + "learning_rate": 1.2984005495746155e-05, + "loss": 0.6071, + "step": 29455 + }, + { + "epoch": 0.8087863811092806, + "grad_norm": 0.4047023355960846, + "learning_rate": 1.2983593276301993e-05, + "loss": 0.4914, + "step": 29456 + }, + { + "epoch": 0.8088138385502471, + "grad_norm": 0.4741160571575165, + "learning_rate": 1.298318105129248e-05, + "loss": 0.5403, + "step": 29457 + }, + { + "epoch": 0.8088412959912136, + "grad_norm": 0.39354386925697327, + "learning_rate": 1.298276882071838e-05, + "loss": 0.4349, + "step": 29458 + }, + { + "epoch": 0.8088687534321801, + "grad_norm": 0.3863707482814789, + "learning_rate": 1.2982356584580464e-05, + "loss": 0.4427, + "step": 29459 + }, + { + "epoch": 0.8088962108731467, + "grad_norm": 0.3832639157772064, + "learning_rate": 1.2981944342879502e-05, + "loss": 0.417, + "step": 29460 + }, + { + "epoch": 0.8089236683141131, + "grad_norm": 0.4353289008140564, + "learning_rate": 1.2981532095616262e-05, + "loss": 0.4539, + "step": 29461 + }, + { + "epoch": 0.8089511257550797, + "grad_norm": 0.39595019817352295, + "learning_rate": 1.2981119842791515e-05, + "loss": 0.4936, + "step": 29462 + }, + { + "epoch": 0.8089785831960461, + "grad_norm": 0.41224920749664307, + "learning_rate": 1.2980707584406025e-05, + "loss": 0.5325, + "step": 29463 + }, + { + "epoch": 0.8090060406370126, + "grad_norm": 0.42082679271698, + "learning_rate": 1.2980295320460565e-05, + "loss": 0.4374, + "step": 29464 + }, + { + "epoch": 0.8090334980779791, + "grad_norm": 0.3755422830581665, + "learning_rate": 1.2979883050955904e-05, + "loss": 0.506, + "step": 29465 + }, + { + "epoch": 0.8090609555189456, + "grad_norm": 0.5387374758720398, + "learning_rate": 1.2979470775892808e-05, + "loss": 0.5295, + "step": 29466 + }, + { + "epoch": 0.8090884129599122, + "grad_norm": 0.40527233481407166, + "learning_rate": 1.297905849527205e-05, + "loss": 0.4798, + "step": 29467 + }, + { + "epoch": 0.8091158704008786, + "grad_norm": 0.626909613609314, + "learning_rate": 1.2978646209094397e-05, + "loss": 0.5444, + "step": 29468 + }, + { + "epoch": 0.8091433278418452, + "grad_norm": 0.36374878883361816, + "learning_rate": 1.2978233917360616e-05, + "loss": 0.3678, + "step": 29469 + }, + { + "epoch": 0.8091707852828116, + "grad_norm": 0.41649457812309265, + "learning_rate": 1.297782162007148e-05, + "loss": 0.5762, + "step": 29470 + }, + { + "epoch": 0.8091982427237782, + "grad_norm": 0.43338820338249207, + "learning_rate": 1.2977409317227756e-05, + "loss": 0.5431, + "step": 29471 + }, + { + "epoch": 0.8092257001647446, + "grad_norm": 0.41102635860443115, + "learning_rate": 1.2976997008830215e-05, + "loss": 0.5521, + "step": 29472 + }, + { + "epoch": 0.8092531576057111, + "grad_norm": 0.3560909926891327, + "learning_rate": 1.2976584694879624e-05, + "loss": 0.4622, + "step": 29473 + }, + { + "epoch": 0.8092806150466777, + "grad_norm": 0.34616658091545105, + "learning_rate": 1.2976172375376748e-05, + "loss": 0.4882, + "step": 29474 + }, + { + "epoch": 0.8093080724876441, + "grad_norm": 0.40120819211006165, + "learning_rate": 1.2975760050322366e-05, + "loss": 0.4568, + "step": 29475 + }, + { + "epoch": 0.8093355299286107, + "grad_norm": 0.3529524803161621, + "learning_rate": 1.2975347719717237e-05, + "loss": 0.4533, + "step": 29476 + }, + { + "epoch": 0.8093629873695771, + "grad_norm": 0.39462703466415405, + "learning_rate": 1.2974935383562139e-05, + "loss": 0.4806, + "step": 29477 + }, + { + "epoch": 0.8093904448105437, + "grad_norm": 0.4188230335712433, + "learning_rate": 1.2974523041857836e-05, + "loss": 0.4929, + "step": 29478 + }, + { + "epoch": 0.8094179022515101, + "grad_norm": 0.335833877325058, + "learning_rate": 1.2974110694605098e-05, + "loss": 0.4372, + "step": 29479 + }, + { + "epoch": 0.8094453596924767, + "grad_norm": 0.3695518672466278, + "learning_rate": 1.2973698341804695e-05, + "loss": 0.4518, + "step": 29480 + }, + { + "epoch": 0.8094728171334432, + "grad_norm": 0.4042917490005493, + "learning_rate": 1.2973285983457394e-05, + "loss": 0.4946, + "step": 29481 + }, + { + "epoch": 0.8095002745744097, + "grad_norm": 0.3929165303707123, + "learning_rate": 1.2972873619563966e-05, + "loss": 0.5122, + "step": 29482 + }, + { + "epoch": 0.8095277320153762, + "grad_norm": 0.3701874017715454, + "learning_rate": 1.2972461250125182e-05, + "loss": 0.5325, + "step": 29483 + }, + { + "epoch": 0.8095551894563426, + "grad_norm": 0.37275320291519165, + "learning_rate": 1.2972048875141805e-05, + "loss": 0.524, + "step": 29484 + }, + { + "epoch": 0.8095826468973092, + "grad_norm": 0.3935512900352478, + "learning_rate": 1.297163649461461e-05, + "loss": 0.5924, + "step": 29485 + }, + { + "epoch": 0.8096101043382756, + "grad_norm": 0.40529245138168335, + "learning_rate": 1.2971224108544366e-05, + "loss": 0.4702, + "step": 29486 + }, + { + "epoch": 0.8096375617792422, + "grad_norm": 0.4017189145088196, + "learning_rate": 1.2970811716931839e-05, + "loss": 0.4271, + "step": 29487 + }, + { + "epoch": 0.8096650192202087, + "grad_norm": 0.5570821762084961, + "learning_rate": 1.2970399319777802e-05, + "loss": 0.5466, + "step": 29488 + }, + { + "epoch": 0.8096924766611752, + "grad_norm": 0.42345738410949707, + "learning_rate": 1.2969986917083019e-05, + "loss": 0.5067, + "step": 29489 + }, + { + "epoch": 0.8097199341021417, + "grad_norm": 0.3816317021846771, + "learning_rate": 1.2969574508848266e-05, + "loss": 0.4863, + "step": 29490 + }, + { + "epoch": 0.8097473915431082, + "grad_norm": 0.4255378544330597, + "learning_rate": 1.2969162095074307e-05, + "loss": 0.5691, + "step": 29491 + }, + { + "epoch": 0.8097748489840747, + "grad_norm": 1.0249608755111694, + "learning_rate": 1.2968749675761914e-05, + "loss": 0.5383, + "step": 29492 + }, + { + "epoch": 0.8098023064250411, + "grad_norm": 0.41814616322517395, + "learning_rate": 1.2968337250911856e-05, + "loss": 0.5307, + "step": 29493 + }, + { + "epoch": 0.8098297638660077, + "grad_norm": 0.3621957302093506, + "learning_rate": 1.2967924820524898e-05, + "loss": 0.5893, + "step": 29494 + }, + { + "epoch": 0.8098572213069742, + "grad_norm": 0.35061565041542053, + "learning_rate": 1.2967512384601815e-05, + "loss": 0.4123, + "step": 29495 + }, + { + "epoch": 0.8098846787479407, + "grad_norm": 0.3685160279273987, + "learning_rate": 1.2967099943143376e-05, + "loss": 0.5488, + "step": 29496 + }, + { + "epoch": 0.8099121361889072, + "grad_norm": 0.4127882719039917, + "learning_rate": 1.2966687496150345e-05, + "loss": 0.5565, + "step": 29497 + }, + { + "epoch": 0.8099395936298737, + "grad_norm": 0.41425564885139465, + "learning_rate": 1.2966275043623498e-05, + "loss": 0.5224, + "step": 29498 + }, + { + "epoch": 0.8099670510708402, + "grad_norm": 0.4093611538410187, + "learning_rate": 1.29658625855636e-05, + "loss": 0.5095, + "step": 29499 + }, + { + "epoch": 0.8099945085118067, + "grad_norm": 0.36601340770721436, + "learning_rate": 1.2965450121971423e-05, + "loss": 0.5153, + "step": 29500 + }, + { + "epoch": 0.8100219659527732, + "grad_norm": 0.4663650095462799, + "learning_rate": 1.2965037652847734e-05, + "loss": 0.5076, + "step": 29501 + }, + { + "epoch": 0.8100494233937398, + "grad_norm": 0.3334299325942993, + "learning_rate": 1.2964625178193302e-05, + "loss": 0.4744, + "step": 29502 + }, + { + "epoch": 0.8100768808347062, + "grad_norm": 0.40422627329826355, + "learning_rate": 1.2964212698008903e-05, + "loss": 0.4501, + "step": 29503 + }, + { + "epoch": 0.8101043382756727, + "grad_norm": 0.41122275590896606, + "learning_rate": 1.2963800212295296e-05, + "loss": 0.5328, + "step": 29504 + }, + { + "epoch": 0.8101317957166392, + "grad_norm": 0.3514132499694824, + "learning_rate": 1.2963387721053256e-05, + "loss": 0.5073, + "step": 29505 + }, + { + "epoch": 0.8101592531576057, + "grad_norm": 0.43318405747413635, + "learning_rate": 1.2962975224283555e-05, + "loss": 0.536, + "step": 29506 + }, + { + "epoch": 0.8101867105985722, + "grad_norm": 0.3973047435283661, + "learning_rate": 1.2962562721986957e-05, + "loss": 0.4557, + "step": 29507 + }, + { + "epoch": 0.8102141680395387, + "grad_norm": 0.377435564994812, + "learning_rate": 1.2962150214164235e-05, + "loss": 0.4197, + "step": 29508 + }, + { + "epoch": 0.8102416254805053, + "grad_norm": 0.3853634297847748, + "learning_rate": 1.296173770081616e-05, + "loss": 0.5508, + "step": 29509 + }, + { + "epoch": 0.8102690829214717, + "grad_norm": 0.3784346580505371, + "learning_rate": 1.2961325181943494e-05, + "loss": 0.4869, + "step": 29510 + }, + { + "epoch": 0.8102965403624383, + "grad_norm": 0.41430482268333435, + "learning_rate": 1.2960912657547014e-05, + "loss": 0.4436, + "step": 29511 + }, + { + "epoch": 0.8103239978034047, + "grad_norm": 0.43021053075790405, + "learning_rate": 1.2960500127627485e-05, + "loss": 0.54, + "step": 29512 + }, + { + "epoch": 0.8103514552443712, + "grad_norm": 1.4004594087600708, + "learning_rate": 1.2960087592185681e-05, + "loss": 0.5657, + "step": 29513 + }, + { + "epoch": 0.8103789126853377, + "grad_norm": 0.3727751076221466, + "learning_rate": 1.2959675051222368e-05, + "loss": 0.4319, + "step": 29514 + }, + { + "epoch": 0.8104063701263042, + "grad_norm": 0.3506917655467987, + "learning_rate": 1.2959262504738316e-05, + "loss": 0.448, + "step": 29515 + }, + { + "epoch": 0.8104338275672708, + "grad_norm": 0.3836301267147064, + "learning_rate": 1.2958849952734297e-05, + "loss": 0.4563, + "step": 29516 + }, + { + "epoch": 0.8104612850082372, + "grad_norm": 0.394192099571228, + "learning_rate": 1.2958437395211076e-05, + "loss": 0.4563, + "step": 29517 + }, + { + "epoch": 0.8104887424492038, + "grad_norm": 0.372183233499527, + "learning_rate": 1.2958024832169424e-05, + "loss": 0.4386, + "step": 29518 + }, + { + "epoch": 0.8105161998901702, + "grad_norm": 0.39903146028518677, + "learning_rate": 1.2957612263610115e-05, + "loss": 0.5004, + "step": 29519 + }, + { + "epoch": 0.8105436573311368, + "grad_norm": 0.3910328149795532, + "learning_rate": 1.2957199689533912e-05, + "loss": 0.5197, + "step": 29520 + }, + { + "epoch": 0.8105711147721032, + "grad_norm": 0.4052291810512543, + "learning_rate": 1.295678710994159e-05, + "loss": 0.4389, + "step": 29521 + }, + { + "epoch": 0.8105985722130697, + "grad_norm": 0.3781130909919739, + "learning_rate": 1.2956374524833917e-05, + "loss": 0.5169, + "step": 29522 + }, + { + "epoch": 0.8106260296540363, + "grad_norm": 0.4098453223705292, + "learning_rate": 1.2955961934211658e-05, + "loss": 0.517, + "step": 29523 + }, + { + "epoch": 0.8106534870950027, + "grad_norm": 0.38490021228790283, + "learning_rate": 1.295554933807559e-05, + "loss": 0.4899, + "step": 29524 + }, + { + "epoch": 0.8106809445359693, + "grad_norm": 0.4297489523887634, + "learning_rate": 1.2955136736426476e-05, + "loss": 0.4903, + "step": 29525 + }, + { + "epoch": 0.8107084019769357, + "grad_norm": 0.37305212020874023, + "learning_rate": 1.2954724129265092e-05, + "loss": 0.4387, + "step": 29526 + }, + { + "epoch": 0.8107358594179023, + "grad_norm": 0.4005805253982544, + "learning_rate": 1.2954311516592207e-05, + "loss": 0.4855, + "step": 29527 + }, + { + "epoch": 0.8107633168588687, + "grad_norm": 0.36399951577186584, + "learning_rate": 1.2953898898408583e-05, + "loss": 0.517, + "step": 29528 + }, + { + "epoch": 0.8107907742998353, + "grad_norm": 0.4083423912525177, + "learning_rate": 1.2953486274714996e-05, + "loss": 0.4704, + "step": 29529 + }, + { + "epoch": 0.8108182317408018, + "grad_norm": 0.4460705518722534, + "learning_rate": 1.2953073645512216e-05, + "loss": 0.5362, + "step": 29530 + }, + { + "epoch": 0.8108456891817682, + "grad_norm": 0.38192039728164673, + "learning_rate": 1.2952661010801009e-05, + "loss": 0.4575, + "step": 29531 + }, + { + "epoch": 0.8108731466227348, + "grad_norm": 0.4847450256347656, + "learning_rate": 1.295224837058215e-05, + "loss": 0.52, + "step": 29532 + }, + { + "epoch": 0.8109006040637012, + "grad_norm": 0.3990357220172882, + "learning_rate": 1.2951835724856401e-05, + "loss": 0.4893, + "step": 29533 + }, + { + "epoch": 0.8109280615046678, + "grad_norm": 0.4013604521751404, + "learning_rate": 1.295142307362454e-05, + "loss": 0.491, + "step": 29534 + }, + { + "epoch": 0.8109555189456342, + "grad_norm": 0.3828431963920593, + "learning_rate": 1.2951010416887334e-05, + "loss": 0.5288, + "step": 29535 + }, + { + "epoch": 0.8109829763866008, + "grad_norm": 0.4829552471637726, + "learning_rate": 1.2950597754645547e-05, + "loss": 0.5561, + "step": 29536 + }, + { + "epoch": 0.8110104338275673, + "grad_norm": 0.4025965929031372, + "learning_rate": 1.2950185086899955e-05, + "loss": 0.4747, + "step": 29537 + }, + { + "epoch": 0.8110378912685338, + "grad_norm": 0.3385898768901825, + "learning_rate": 1.2949772413651328e-05, + "loss": 0.451, + "step": 29538 + }, + { + "epoch": 0.8110653487095003, + "grad_norm": 0.9679966568946838, + "learning_rate": 1.2949359734900433e-05, + "loss": 0.4322, + "step": 29539 + }, + { + "epoch": 0.8110928061504667, + "grad_norm": 0.38145795464515686, + "learning_rate": 1.2948947050648041e-05, + "loss": 0.5252, + "step": 29540 + }, + { + "epoch": 0.8111202635914333, + "grad_norm": 0.4032808542251587, + "learning_rate": 1.294853436089492e-05, + "loss": 0.5057, + "step": 29541 + }, + { + "epoch": 0.8111477210323997, + "grad_norm": 0.3743438124656677, + "learning_rate": 1.2948121665641844e-05, + "loss": 0.4878, + "step": 29542 + }, + { + "epoch": 0.8111751784733663, + "grad_norm": 0.35850146412849426, + "learning_rate": 1.2947708964889576e-05, + "loss": 0.4997, + "step": 29543 + }, + { + "epoch": 0.8112026359143328, + "grad_norm": 0.39038166403770447, + "learning_rate": 1.2947296258638894e-05, + "loss": 0.4851, + "step": 29544 + }, + { + "epoch": 0.8112300933552993, + "grad_norm": 0.37322530150413513, + "learning_rate": 1.2946883546890564e-05, + "loss": 0.4637, + "step": 29545 + }, + { + "epoch": 0.8112575507962658, + "grad_norm": 0.41893863677978516, + "learning_rate": 1.2946470829645351e-05, + "loss": 0.5578, + "step": 29546 + }, + { + "epoch": 0.8112850082372323, + "grad_norm": 0.41660282015800476, + "learning_rate": 1.2946058106904036e-05, + "loss": 0.4165, + "step": 29547 + }, + { + "epoch": 0.8113124656781988, + "grad_norm": 0.3646438717842102, + "learning_rate": 1.2945645378667377e-05, + "loss": 0.5101, + "step": 29548 + }, + { + "epoch": 0.8113399231191653, + "grad_norm": 0.3531654477119446, + "learning_rate": 1.2945232644936155e-05, + "loss": 0.3994, + "step": 29549 + }, + { + "epoch": 0.8113673805601318, + "grad_norm": 0.3620224893093109, + "learning_rate": 1.294481990571113e-05, + "loss": 0.4928, + "step": 29550 + }, + { + "epoch": 0.8113948380010984, + "grad_norm": 0.31709370017051697, + "learning_rate": 1.2944407160993076e-05, + "loss": 0.4365, + "step": 29551 + }, + { + "epoch": 0.8114222954420648, + "grad_norm": 0.40850868821144104, + "learning_rate": 1.2943994410782763e-05, + "loss": 0.5077, + "step": 29552 + }, + { + "epoch": 0.8114497528830313, + "grad_norm": 0.4181002080440521, + "learning_rate": 1.2943581655080962e-05, + "loss": 0.5369, + "step": 29553 + }, + { + "epoch": 0.8114772103239978, + "grad_norm": 0.3830395042896271, + "learning_rate": 1.294316889388844e-05, + "loss": 0.4238, + "step": 29554 + }, + { + "epoch": 0.8115046677649643, + "grad_norm": 0.40456250309944153, + "learning_rate": 1.294275612720597e-05, + "loss": 0.4366, + "step": 29555 + }, + { + "epoch": 0.8115321252059308, + "grad_norm": 0.4017515182495117, + "learning_rate": 1.294234335503432e-05, + "loss": 0.534, + "step": 29556 + }, + { + "epoch": 0.8115595826468973, + "grad_norm": 0.4970342516899109, + "learning_rate": 1.294193057737426e-05, + "loss": 0.4478, + "step": 29557 + }, + { + "epoch": 0.8115870400878639, + "grad_norm": 0.39134839177131653, + "learning_rate": 1.2941517794226563e-05, + "loss": 0.5249, + "step": 29558 + }, + { + "epoch": 0.8116144975288303, + "grad_norm": 0.4332135319709778, + "learning_rate": 1.2941105005591994e-05, + "loss": 0.5172, + "step": 29559 + }, + { + "epoch": 0.8116419549697969, + "grad_norm": 0.4169181287288666, + "learning_rate": 1.2940692211471326e-05, + "loss": 0.4596, + "step": 29560 + }, + { + "epoch": 0.8116694124107633, + "grad_norm": 0.395397424697876, + "learning_rate": 1.2940279411865328e-05, + "loss": 0.4681, + "step": 29561 + }, + { + "epoch": 0.8116968698517298, + "grad_norm": 0.40580812096595764, + "learning_rate": 1.293986660677477e-05, + "loss": 0.412, + "step": 29562 + }, + { + "epoch": 0.8117243272926963, + "grad_norm": 0.44008082151412964, + "learning_rate": 1.2939453796200425e-05, + "loss": 0.5563, + "step": 29563 + }, + { + "epoch": 0.8117517847336628, + "grad_norm": 0.3815366327762604, + "learning_rate": 1.293904098014306e-05, + "loss": 0.4923, + "step": 29564 + }, + { + "epoch": 0.8117792421746294, + "grad_norm": 0.3866012394428253, + "learning_rate": 1.2938628158603445e-05, + "loss": 0.5149, + "step": 29565 + }, + { + "epoch": 0.8118066996155958, + "grad_norm": 0.37853699922561646, + "learning_rate": 1.2938215331582349e-05, + "loss": 0.4605, + "step": 29566 + }, + { + "epoch": 0.8118341570565624, + "grad_norm": 0.521323561668396, + "learning_rate": 1.2937802499080544e-05, + "loss": 0.5136, + "step": 29567 + }, + { + "epoch": 0.8118616144975288, + "grad_norm": 0.3569294214248657, + "learning_rate": 1.2937389661098803e-05, + "loss": 0.4601, + "step": 29568 + }, + { + "epoch": 0.8118890719384954, + "grad_norm": 0.4325162172317505, + "learning_rate": 1.293697681763789e-05, + "loss": 0.5423, + "step": 29569 + }, + { + "epoch": 0.8119165293794618, + "grad_norm": 0.39335185289382935, + "learning_rate": 1.2936563968698579e-05, + "loss": 0.4392, + "step": 29570 + }, + { + "epoch": 0.8119439868204283, + "grad_norm": 0.4136277735233307, + "learning_rate": 1.2936151114281637e-05, + "loss": 0.524, + "step": 29571 + }, + { + "epoch": 0.8119714442613949, + "grad_norm": 0.39998939633369446, + "learning_rate": 1.2935738254387837e-05, + "loss": 0.5334, + "step": 29572 + }, + { + "epoch": 0.8119989017023613, + "grad_norm": 0.3581618368625641, + "learning_rate": 1.293532538901795e-05, + "loss": 0.5298, + "step": 29573 + }, + { + "epoch": 0.8120263591433279, + "grad_norm": 0.41104552149772644, + "learning_rate": 1.2934912518172743e-05, + "loss": 0.5137, + "step": 29574 + }, + { + "epoch": 0.8120538165842943, + "grad_norm": 0.3504338264465332, + "learning_rate": 1.2934499641852987e-05, + "loss": 0.4433, + "step": 29575 + }, + { + "epoch": 0.8120812740252609, + "grad_norm": 0.4092327356338501, + "learning_rate": 1.293408676005945e-05, + "loss": 0.6025, + "step": 29576 + }, + { + "epoch": 0.8121087314662273, + "grad_norm": 0.41085711121559143, + "learning_rate": 1.2933673872792909e-05, + "loss": 0.4566, + "step": 29577 + }, + { + "epoch": 0.8121361889071939, + "grad_norm": 0.39957955479621887, + "learning_rate": 1.293326098005413e-05, + "loss": 0.4571, + "step": 29578 + }, + { + "epoch": 0.8121636463481604, + "grad_norm": 0.4089006781578064, + "learning_rate": 1.2932848081843878e-05, + "loss": 0.4728, + "step": 29579 + }, + { + "epoch": 0.8121911037891268, + "grad_norm": 0.3994095027446747, + "learning_rate": 1.2932435178162934e-05, + "loss": 0.5446, + "step": 29580 + }, + { + "epoch": 0.8122185612300934, + "grad_norm": 0.4758124351501465, + "learning_rate": 1.293202226901206e-05, + "loss": 0.5161, + "step": 29581 + }, + { + "epoch": 0.8122460186710598, + "grad_norm": 0.37188291549682617, + "learning_rate": 1.293160935439203e-05, + "loss": 0.4089, + "step": 29582 + }, + { + "epoch": 0.8122734761120264, + "grad_norm": 0.4123263657093048, + "learning_rate": 1.2931196434303611e-05, + "loss": 0.4846, + "step": 29583 + }, + { + "epoch": 0.8123009335529928, + "grad_norm": 0.39721983671188354, + "learning_rate": 1.2930783508747575e-05, + "loss": 0.5171, + "step": 29584 + }, + { + "epoch": 0.8123283909939594, + "grad_norm": 0.35082757472991943, + "learning_rate": 1.2930370577724695e-05, + "loss": 0.5082, + "step": 29585 + }, + { + "epoch": 0.8123558484349259, + "grad_norm": 0.4238072335720062, + "learning_rate": 1.2929957641235736e-05, + "loss": 0.441, + "step": 29586 + }, + { + "epoch": 0.8123833058758924, + "grad_norm": 0.4310474097728729, + "learning_rate": 1.292954469928147e-05, + "loss": 0.4773, + "step": 29587 + }, + { + "epoch": 0.8124107633168589, + "grad_norm": 0.34800177812576294, + "learning_rate": 1.292913175186267e-05, + "loss": 0.4825, + "step": 29588 + }, + { + "epoch": 0.8124382207578253, + "grad_norm": 0.37402093410491943, + "learning_rate": 1.2928718798980104e-05, + "loss": 0.4686, + "step": 29589 + }, + { + "epoch": 0.8124656781987919, + "grad_norm": 0.3755300045013428, + "learning_rate": 1.2928305840634545e-05, + "loss": 0.4535, + "step": 29590 + }, + { + "epoch": 0.8124931356397583, + "grad_norm": 0.340661883354187, + "learning_rate": 1.2927892876826757e-05, + "loss": 0.4648, + "step": 29591 + }, + { + "epoch": 0.8125205930807249, + "grad_norm": 0.41110870242118835, + "learning_rate": 1.292747990755752e-05, + "loss": 0.5171, + "step": 29592 + }, + { + "epoch": 0.8125480505216913, + "grad_norm": 0.49654239416122437, + "learning_rate": 1.2927066932827592e-05, + "loss": 0.5008, + "step": 29593 + }, + { + "epoch": 0.8125755079626579, + "grad_norm": 0.3608541190624237, + "learning_rate": 1.2926653952637753e-05, + "loss": 0.4807, + "step": 29594 + }, + { + "epoch": 0.8126029654036244, + "grad_norm": 0.401468962430954, + "learning_rate": 1.292624096698877e-05, + "loss": 0.5339, + "step": 29595 + }, + { + "epoch": 0.8126304228445909, + "grad_norm": 0.4100181758403778, + "learning_rate": 1.2925827975881414e-05, + "loss": 0.5086, + "step": 29596 + }, + { + "epoch": 0.8126578802855574, + "grad_norm": 0.4900486171245575, + "learning_rate": 1.2925414979316458e-05, + "loss": 0.5329, + "step": 29597 + }, + { + "epoch": 0.8126853377265238, + "grad_norm": 0.4653148651123047, + "learning_rate": 1.2925001977294661e-05, + "loss": 0.5901, + "step": 29598 + }, + { + "epoch": 0.8127127951674904, + "grad_norm": 0.3838897943496704, + "learning_rate": 1.2924588969816809e-05, + "loss": 0.4765, + "step": 29599 + }, + { + "epoch": 0.8127402526084568, + "grad_norm": 0.6647209525108337, + "learning_rate": 1.2924175956883661e-05, + "loss": 0.5773, + "step": 29600 + }, + { + "epoch": 0.8127677100494234, + "grad_norm": 0.4066369831562042, + "learning_rate": 1.2923762938495996e-05, + "loss": 0.4626, + "step": 29601 + }, + { + "epoch": 0.8127951674903899, + "grad_norm": 0.34017422795295715, + "learning_rate": 1.2923349914654579e-05, + "loss": 0.4288, + "step": 29602 + }, + { + "epoch": 0.8128226249313564, + "grad_norm": 0.3408830165863037, + "learning_rate": 1.292293688536018e-05, + "loss": 0.3954, + "step": 29603 + }, + { + "epoch": 0.8128500823723229, + "grad_norm": 0.4165657162666321, + "learning_rate": 1.292252385061357e-05, + "loss": 0.4478, + "step": 29604 + }, + { + "epoch": 0.8128775398132894, + "grad_norm": 0.46480241417884827, + "learning_rate": 1.292211081041552e-05, + "loss": 0.4708, + "step": 29605 + }, + { + "epoch": 0.8129049972542559, + "grad_norm": 0.4057855010032654, + "learning_rate": 1.2921697764766802e-05, + "loss": 0.4627, + "step": 29606 + }, + { + "epoch": 0.8129324546952224, + "grad_norm": 0.4088543653488159, + "learning_rate": 1.2921284713668188e-05, + "loss": 0.4845, + "step": 29607 + }, + { + "epoch": 0.8129599121361889, + "grad_norm": 0.4309626519680023, + "learning_rate": 1.292087165712044e-05, + "loss": 0.4736, + "step": 29608 + }, + { + "epoch": 0.8129873695771554, + "grad_norm": 0.3678014874458313, + "learning_rate": 1.2920458595124337e-05, + "loss": 0.5028, + "step": 29609 + }, + { + "epoch": 0.8130148270181219, + "grad_norm": 0.36970940232276917, + "learning_rate": 1.2920045527680647e-05, + "loss": 0.4775, + "step": 29610 + }, + { + "epoch": 0.8130422844590884, + "grad_norm": 0.4626849591732025, + "learning_rate": 1.2919632454790138e-05, + "loss": 0.6166, + "step": 29611 + }, + { + "epoch": 0.8130697419000549, + "grad_norm": 0.43095266819000244, + "learning_rate": 1.2919219376453587e-05, + "loss": 0.5316, + "step": 29612 + }, + { + "epoch": 0.8130971993410214, + "grad_norm": 0.4321107566356659, + "learning_rate": 1.2918806292671756e-05, + "loss": 0.5078, + "step": 29613 + }, + { + "epoch": 0.8131246567819879, + "grad_norm": 0.3618690073490143, + "learning_rate": 1.291839320344542e-05, + "loss": 0.4716, + "step": 29614 + }, + { + "epoch": 0.8131521142229544, + "grad_norm": 0.4261356294155121, + "learning_rate": 1.2917980108775352e-05, + "loss": 0.5211, + "step": 29615 + }, + { + "epoch": 0.813179571663921, + "grad_norm": 0.38632476329803467, + "learning_rate": 1.2917567008662317e-05, + "loss": 0.5377, + "step": 29616 + }, + { + "epoch": 0.8132070291048874, + "grad_norm": 0.40701478719711304, + "learning_rate": 1.2917153903107091e-05, + "loss": 0.4577, + "step": 29617 + }, + { + "epoch": 0.813234486545854, + "grad_norm": 0.4208388328552246, + "learning_rate": 1.2916740792110439e-05, + "loss": 0.5512, + "step": 29618 + }, + { + "epoch": 0.8132619439868204, + "grad_norm": 0.39313143491744995, + "learning_rate": 1.2916327675673136e-05, + "loss": 0.4173, + "step": 29619 + }, + { + "epoch": 0.8132894014277869, + "grad_norm": 0.3423662781715393, + "learning_rate": 1.2915914553795953e-05, + "loss": 0.4318, + "step": 29620 + }, + { + "epoch": 0.8133168588687534, + "grad_norm": 0.3884488046169281, + "learning_rate": 1.2915501426479655e-05, + "loss": 0.4207, + "step": 29621 + }, + { + "epoch": 0.8133443163097199, + "grad_norm": 0.5153118968009949, + "learning_rate": 1.2915088293725017e-05, + "loss": 0.4444, + "step": 29622 + }, + { + "epoch": 0.8133717737506865, + "grad_norm": 0.3892473578453064, + "learning_rate": 1.291467515553281e-05, + "loss": 0.3661, + "step": 29623 + }, + { + "epoch": 0.8133992311916529, + "grad_norm": 0.4076761305332184, + "learning_rate": 1.2914262011903805e-05, + "loss": 0.4431, + "step": 29624 + }, + { + "epoch": 0.8134266886326195, + "grad_norm": 0.35966745018959045, + "learning_rate": 1.291384886283877e-05, + "loss": 0.5608, + "step": 29625 + }, + { + "epoch": 0.8134541460735859, + "grad_norm": 0.34544119238853455, + "learning_rate": 1.2913435708338473e-05, + "loss": 0.44, + "step": 29626 + }, + { + "epoch": 0.8134816035145525, + "grad_norm": 0.37218162417411804, + "learning_rate": 1.2913022548403692e-05, + "loss": 0.5809, + "step": 29627 + }, + { + "epoch": 0.8135090609555189, + "grad_norm": 0.34327733516693115, + "learning_rate": 1.2912609383035197e-05, + "loss": 0.4563, + "step": 29628 + }, + { + "epoch": 0.8135365183964854, + "grad_norm": 0.36169132590293884, + "learning_rate": 1.2912196212233751e-05, + "loss": 0.4286, + "step": 29629 + }, + { + "epoch": 0.813563975837452, + "grad_norm": 0.35497453808784485, + "learning_rate": 1.2911783036000134e-05, + "loss": 0.4525, + "step": 29630 + }, + { + "epoch": 0.8135914332784184, + "grad_norm": 0.43474435806274414, + "learning_rate": 1.291136985433511e-05, + "loss": 0.473, + "step": 29631 + }, + { + "epoch": 0.813618890719385, + "grad_norm": 0.3872703015804291, + "learning_rate": 1.2910956667239452e-05, + "loss": 0.4924, + "step": 29632 + }, + { + "epoch": 0.8136463481603514, + "grad_norm": 0.4053436517715454, + "learning_rate": 1.2910543474713932e-05, + "loss": 0.5807, + "step": 29633 + }, + { + "epoch": 0.813673805601318, + "grad_norm": 0.36375686526298523, + "learning_rate": 1.2910130276759315e-05, + "loss": 0.4646, + "step": 29634 + }, + { + "epoch": 0.8137012630422844, + "grad_norm": 0.3685169816017151, + "learning_rate": 1.2909717073376381e-05, + "loss": 0.4141, + "step": 29635 + }, + { + "epoch": 0.813728720483251, + "grad_norm": 0.3700298070907593, + "learning_rate": 1.2909303864565894e-05, + "loss": 0.4508, + "step": 29636 + }, + { + "epoch": 0.8137561779242175, + "grad_norm": 0.4163369834423065, + "learning_rate": 1.290889065032863e-05, + "loss": 0.4414, + "step": 29637 + }, + { + "epoch": 0.8137836353651839, + "grad_norm": 0.4599630534648895, + "learning_rate": 1.2908477430665354e-05, + "loss": 0.5072, + "step": 29638 + }, + { + "epoch": 0.8138110928061505, + "grad_norm": 0.4431251585483551, + "learning_rate": 1.2908064205576839e-05, + "loss": 0.5115, + "step": 29639 + }, + { + "epoch": 0.8138385502471169, + "grad_norm": 0.3723028004169464, + "learning_rate": 1.2907650975063856e-05, + "loss": 0.5277, + "step": 29640 + }, + { + "epoch": 0.8138660076880835, + "grad_norm": 0.4782600700855255, + "learning_rate": 1.2907237739127174e-05, + "loss": 0.5177, + "step": 29641 + }, + { + "epoch": 0.8138934651290499, + "grad_norm": 0.5230302810668945, + "learning_rate": 1.2906824497767569e-05, + "loss": 0.4926, + "step": 29642 + }, + { + "epoch": 0.8139209225700165, + "grad_norm": 3.103111743927002, + "learning_rate": 1.2906411250985809e-05, + "loss": 0.525, + "step": 29643 + }, + { + "epoch": 0.813948380010983, + "grad_norm": 0.3813362717628479, + "learning_rate": 1.2905997998782662e-05, + "loss": 0.4911, + "step": 29644 + }, + { + "epoch": 0.8139758374519495, + "grad_norm": 0.411438524723053, + "learning_rate": 1.2905584741158901e-05, + "loss": 0.4834, + "step": 29645 + }, + { + "epoch": 0.814003294892916, + "grad_norm": 0.40232861042022705, + "learning_rate": 1.2905171478115301e-05, + "loss": 0.6135, + "step": 29646 + }, + { + "epoch": 0.8140307523338824, + "grad_norm": 0.38741716742515564, + "learning_rate": 1.2904758209652624e-05, + "loss": 0.4616, + "step": 29647 + }, + { + "epoch": 0.814058209774849, + "grad_norm": 0.34849390387535095, + "learning_rate": 1.2904344935771649e-05, + "loss": 0.5621, + "step": 29648 + }, + { + "epoch": 0.8140856672158154, + "grad_norm": 0.42317554354667664, + "learning_rate": 1.2903931656473141e-05, + "loss": 0.5115, + "step": 29649 + }, + { + "epoch": 0.814113124656782, + "grad_norm": 0.4695170819759369, + "learning_rate": 1.2903518371757877e-05, + "loss": 0.502, + "step": 29650 + }, + { + "epoch": 0.8141405820977485, + "grad_norm": 0.40875184535980225, + "learning_rate": 1.2903105081626624e-05, + "loss": 0.542, + "step": 29651 + }, + { + "epoch": 0.814168039538715, + "grad_norm": 0.3518095314502716, + "learning_rate": 1.2902691786080151e-05, + "loss": 0.4491, + "step": 29652 + }, + { + "epoch": 0.8141954969796815, + "grad_norm": 0.35969388484954834, + "learning_rate": 1.2902278485119233e-05, + "loss": 0.4856, + "step": 29653 + }, + { + "epoch": 0.814222954420648, + "grad_norm": 0.33749881386756897, + "learning_rate": 1.2901865178744637e-05, + "loss": 0.4106, + "step": 29654 + }, + { + "epoch": 0.8142504118616145, + "grad_norm": 0.434109091758728, + "learning_rate": 1.290145186695714e-05, + "loss": 0.5132, + "step": 29655 + }, + { + "epoch": 0.814277869302581, + "grad_norm": 0.3891954720020294, + "learning_rate": 1.290103854975751e-05, + "loss": 0.4817, + "step": 29656 + }, + { + "epoch": 0.8143053267435475, + "grad_norm": 0.3787616789340973, + "learning_rate": 1.2900625227146513e-05, + "loss": 0.4944, + "step": 29657 + }, + { + "epoch": 0.814332784184514, + "grad_norm": 0.49212613701820374, + "learning_rate": 1.2900211899124925e-05, + "loss": 0.4822, + "step": 29658 + }, + { + "epoch": 0.8143602416254805, + "grad_norm": 0.3635871410369873, + "learning_rate": 1.289979856569352e-05, + "loss": 0.5128, + "step": 29659 + }, + { + "epoch": 0.814387699066447, + "grad_norm": 0.4330967664718628, + "learning_rate": 1.2899385226853061e-05, + "loss": 0.4993, + "step": 29660 + }, + { + "epoch": 0.8144151565074135, + "grad_norm": 0.4087710976600647, + "learning_rate": 1.2898971882604325e-05, + "loss": 0.5533, + "step": 29661 + }, + { + "epoch": 0.81444261394838, + "grad_norm": 0.3568320870399475, + "learning_rate": 1.289855853294808e-05, + "loss": 0.4173, + "step": 29662 + }, + { + "epoch": 0.8144700713893465, + "grad_norm": 0.3942646086215973, + "learning_rate": 1.28981451778851e-05, + "loss": 0.5365, + "step": 29663 + }, + { + "epoch": 0.814497528830313, + "grad_norm": 0.40745624899864197, + "learning_rate": 1.2897731817416152e-05, + "loss": 0.5747, + "step": 29664 + }, + { + "epoch": 0.8145249862712796, + "grad_norm": 0.3683948814868927, + "learning_rate": 1.289731845154201e-05, + "loss": 0.5188, + "step": 29665 + }, + { + "epoch": 0.814552443712246, + "grad_norm": 0.35330629348754883, + "learning_rate": 1.2896905080263447e-05, + "loss": 0.431, + "step": 29666 + }, + { + "epoch": 0.8145799011532125, + "grad_norm": 0.44796663522720337, + "learning_rate": 1.2896491703581226e-05, + "loss": 0.4751, + "step": 29667 + }, + { + "epoch": 0.814607358594179, + "grad_norm": 0.3655795454978943, + "learning_rate": 1.2896078321496129e-05, + "loss": 0.4808, + "step": 29668 + }, + { + "epoch": 0.8146348160351455, + "grad_norm": 0.415217787027359, + "learning_rate": 1.289566493400892e-05, + "loss": 0.4654, + "step": 29669 + }, + { + "epoch": 0.814662273476112, + "grad_norm": 0.42492350935935974, + "learning_rate": 1.289525154112037e-05, + "loss": 0.463, + "step": 29670 + }, + { + "epoch": 0.8146897309170785, + "grad_norm": 0.38228297233581543, + "learning_rate": 1.2894838142831254e-05, + "loss": 0.4751, + "step": 29671 + }, + { + "epoch": 0.8147171883580451, + "grad_norm": 0.38600119948387146, + "learning_rate": 1.289442473914234e-05, + "loss": 0.4729, + "step": 29672 + }, + { + "epoch": 0.8147446457990115, + "grad_norm": 0.49936333298683167, + "learning_rate": 1.28940113300544e-05, + "loss": 0.5595, + "step": 29673 + }, + { + "epoch": 0.8147721032399781, + "grad_norm": 0.35432907938957214, + "learning_rate": 1.2893597915568206e-05, + "loss": 0.4591, + "step": 29674 + }, + { + "epoch": 0.8147995606809445, + "grad_norm": 0.4329057037830353, + "learning_rate": 1.2893184495684526e-05, + "loss": 0.5276, + "step": 29675 + }, + { + "epoch": 0.814827018121911, + "grad_norm": 0.3661741018295288, + "learning_rate": 1.2892771070404135e-05, + "loss": 0.5129, + "step": 29676 + }, + { + "epoch": 0.8148544755628775, + "grad_norm": 0.386715829372406, + "learning_rate": 1.2892357639727804e-05, + "loss": 0.4963, + "step": 29677 + }, + { + "epoch": 0.814881933003844, + "grad_norm": 0.36390063166618347, + "learning_rate": 1.2891944203656302e-05, + "loss": 0.4332, + "step": 29678 + }, + { + "epoch": 0.8149093904448106, + "grad_norm": 0.41562891006469727, + "learning_rate": 1.2891530762190401e-05, + "loss": 0.4659, + "step": 29679 + }, + { + "epoch": 0.814936847885777, + "grad_norm": 0.39317476749420166, + "learning_rate": 1.289111731533087e-05, + "loss": 0.5087, + "step": 29680 + }, + { + "epoch": 0.8149643053267436, + "grad_norm": 0.36388128995895386, + "learning_rate": 1.2890703863078487e-05, + "loss": 0.4506, + "step": 29681 + }, + { + "epoch": 0.81499176276771, + "grad_norm": 0.3606870770454407, + "learning_rate": 1.2890290405434016e-05, + "loss": 0.4537, + "step": 29682 + }, + { + "epoch": 0.8150192202086766, + "grad_norm": 0.39668333530426025, + "learning_rate": 1.288987694239823e-05, + "loss": 0.5569, + "step": 29683 + }, + { + "epoch": 0.815046677649643, + "grad_norm": 0.3754545748233795, + "learning_rate": 1.2889463473971902e-05, + "loss": 0.5015, + "step": 29684 + }, + { + "epoch": 0.8150741350906096, + "grad_norm": 0.3048594892024994, + "learning_rate": 1.2889050000155803e-05, + "loss": 0.4042, + "step": 29685 + }, + { + "epoch": 0.8151015925315761, + "grad_norm": 0.37362435460090637, + "learning_rate": 1.2888636520950701e-05, + "loss": 0.4708, + "step": 29686 + }, + { + "epoch": 0.8151290499725425, + "grad_norm": 0.4279981553554535, + "learning_rate": 1.2888223036357374e-05, + "loss": 0.5575, + "step": 29687 + }, + { + "epoch": 0.8151565074135091, + "grad_norm": 0.503505527973175, + "learning_rate": 1.2887809546376587e-05, + "loss": 0.5745, + "step": 29688 + }, + { + "epoch": 0.8151839648544755, + "grad_norm": 0.38288772106170654, + "learning_rate": 1.2887396051009114e-05, + "loss": 0.5058, + "step": 29689 + }, + { + "epoch": 0.8152114222954421, + "grad_norm": 0.37281230092048645, + "learning_rate": 1.2886982550255726e-05, + "loss": 0.4524, + "step": 29690 + }, + { + "epoch": 0.8152388797364085, + "grad_norm": 0.36497363448143005, + "learning_rate": 1.2886569044117194e-05, + "loss": 0.4844, + "step": 29691 + }, + { + "epoch": 0.8152663371773751, + "grad_norm": 0.44114360213279724, + "learning_rate": 1.288615553259429e-05, + "loss": 0.5717, + "step": 29692 + }, + { + "epoch": 0.8152937946183416, + "grad_norm": 0.3874850273132324, + "learning_rate": 1.2885742015687783e-05, + "loss": 0.4447, + "step": 29693 + }, + { + "epoch": 0.815321252059308, + "grad_norm": 0.40775415301322937, + "learning_rate": 1.2885328493398449e-05, + "loss": 0.5454, + "step": 29694 + }, + { + "epoch": 0.8153487095002746, + "grad_norm": 0.5019264221191406, + "learning_rate": 1.2884914965727053e-05, + "loss": 0.5763, + "step": 29695 + }, + { + "epoch": 0.815376166941241, + "grad_norm": 0.35922524333000183, + "learning_rate": 1.2884501432674373e-05, + "loss": 0.4646, + "step": 29696 + }, + { + "epoch": 0.8154036243822076, + "grad_norm": 0.44227585196495056, + "learning_rate": 1.2884087894241175e-05, + "loss": 0.5414, + "step": 29697 + }, + { + "epoch": 0.815431081823174, + "grad_norm": 0.42100703716278076, + "learning_rate": 1.2883674350428232e-05, + "loss": 0.4557, + "step": 29698 + }, + { + "epoch": 0.8154585392641406, + "grad_norm": 0.3757691979408264, + "learning_rate": 1.2883260801236318e-05, + "loss": 0.5158, + "step": 29699 + }, + { + "epoch": 0.8154859967051071, + "grad_norm": 0.37019652128219604, + "learning_rate": 1.2882847246666204e-05, + "loss": 0.5474, + "step": 29700 + }, + { + "epoch": 0.8155134541460736, + "grad_norm": 0.3662957549095154, + "learning_rate": 1.2882433686718656e-05, + "loss": 0.4927, + "step": 29701 + }, + { + "epoch": 0.8155409115870401, + "grad_norm": 0.40276002883911133, + "learning_rate": 1.2882020121394452e-05, + "loss": 0.5507, + "step": 29702 + }, + { + "epoch": 0.8155683690280066, + "grad_norm": 0.3047461211681366, + "learning_rate": 1.2881606550694356e-05, + "loss": 0.3361, + "step": 29703 + }, + { + "epoch": 0.8155958264689731, + "grad_norm": 0.3473450541496277, + "learning_rate": 1.288119297461915e-05, + "loss": 0.4556, + "step": 29704 + }, + { + "epoch": 0.8156232839099395, + "grad_norm": 0.42273736000061035, + "learning_rate": 1.2880779393169599e-05, + "loss": 0.6042, + "step": 29705 + }, + { + "epoch": 0.8156507413509061, + "grad_norm": 0.3936721682548523, + "learning_rate": 1.288036580634647e-05, + "loss": 0.4423, + "step": 29706 + }, + { + "epoch": 0.8156781987918726, + "grad_norm": 0.35070574283599854, + "learning_rate": 1.2879952214150543e-05, + "loss": 0.524, + "step": 29707 + }, + { + "epoch": 0.8157056562328391, + "grad_norm": 0.37846872210502625, + "learning_rate": 1.2879538616582585e-05, + "loss": 0.5545, + "step": 29708 + }, + { + "epoch": 0.8157331136738056, + "grad_norm": 0.4268110990524292, + "learning_rate": 1.2879125013643368e-05, + "loss": 0.459, + "step": 29709 + }, + { + "epoch": 0.8157605711147721, + "grad_norm": 0.39726683497428894, + "learning_rate": 1.2878711405333666e-05, + "loss": 0.5088, + "step": 29710 + }, + { + "epoch": 0.8157880285557386, + "grad_norm": 0.38816994428634644, + "learning_rate": 1.2878297791654247e-05, + "loss": 0.4576, + "step": 29711 + }, + { + "epoch": 0.8158154859967051, + "grad_norm": 0.40452614426612854, + "learning_rate": 1.2877884172605884e-05, + "loss": 0.4886, + "step": 29712 + }, + { + "epoch": 0.8158429434376716, + "grad_norm": 0.4006691575050354, + "learning_rate": 1.287747054818935e-05, + "loss": 0.5545, + "step": 29713 + }, + { + "epoch": 0.8158704008786382, + "grad_norm": 0.403721421957016, + "learning_rate": 1.2877056918405413e-05, + "loss": 0.5866, + "step": 29714 + }, + { + "epoch": 0.8158978583196046, + "grad_norm": 0.4676116108894348, + "learning_rate": 1.2876643283254845e-05, + "loss": 0.5291, + "step": 29715 + }, + { + "epoch": 0.8159253157605711, + "grad_norm": 0.34624263644218445, + "learning_rate": 1.2876229642738422e-05, + "loss": 0.4647, + "step": 29716 + }, + { + "epoch": 0.8159527732015376, + "grad_norm": 0.4008829891681671, + "learning_rate": 1.2875815996856913e-05, + "loss": 0.5369, + "step": 29717 + }, + { + "epoch": 0.8159802306425041, + "grad_norm": 0.3786114454269409, + "learning_rate": 1.287540234561109e-05, + "loss": 0.4427, + "step": 29718 + }, + { + "epoch": 0.8160076880834706, + "grad_norm": 0.38280701637268066, + "learning_rate": 1.2874988689001722e-05, + "loss": 0.5238, + "step": 29719 + }, + { + "epoch": 0.8160351455244371, + "grad_norm": 0.39356446266174316, + "learning_rate": 1.2874575027029584e-05, + "loss": 0.4724, + "step": 29720 + }, + { + "epoch": 0.8160626029654037, + "grad_norm": 0.4333021342754364, + "learning_rate": 1.2874161359695446e-05, + "loss": 0.5083, + "step": 29721 + }, + { + "epoch": 0.8160900604063701, + "grad_norm": 0.37130939960479736, + "learning_rate": 1.2873747687000075e-05, + "loss": 0.4423, + "step": 29722 + }, + { + "epoch": 0.8161175178473367, + "grad_norm": 0.4218670427799225, + "learning_rate": 1.2873334008944252e-05, + "loss": 0.4912, + "step": 29723 + }, + { + "epoch": 0.8161449752883031, + "grad_norm": 0.3501530587673187, + "learning_rate": 1.287292032552874e-05, + "loss": 0.4189, + "step": 29724 + }, + { + "epoch": 0.8161724327292696, + "grad_norm": 0.45749709010124207, + "learning_rate": 1.287250663675432e-05, + "loss": 0.5541, + "step": 29725 + }, + { + "epoch": 0.8161998901702361, + "grad_norm": 0.39695268869400024, + "learning_rate": 1.2872092942621759e-05, + "loss": 0.5719, + "step": 29726 + }, + { + "epoch": 0.8162273476112026, + "grad_norm": 0.36696940660476685, + "learning_rate": 1.2871679243131822e-05, + "loss": 0.4814, + "step": 29727 + }, + { + "epoch": 0.8162548050521692, + "grad_norm": 0.3626328408718109, + "learning_rate": 1.2871265538285292e-05, + "loss": 0.4726, + "step": 29728 + }, + { + "epoch": 0.8162822624931356, + "grad_norm": 0.38126489520072937, + "learning_rate": 1.2870851828082931e-05, + "loss": 0.4939, + "step": 29729 + }, + { + "epoch": 0.8163097199341022, + "grad_norm": 0.3867487609386444, + "learning_rate": 1.2870438112525519e-05, + "loss": 0.5756, + "step": 29730 + }, + { + "epoch": 0.8163371773750686, + "grad_norm": 0.5645161867141724, + "learning_rate": 1.2870024391613822e-05, + "loss": 0.5693, + "step": 29731 + }, + { + "epoch": 0.8163646348160352, + "grad_norm": 1.1714768409729004, + "learning_rate": 1.2869610665348614e-05, + "loss": 0.461, + "step": 29732 + }, + { + "epoch": 0.8163920922570016, + "grad_norm": 0.39456361532211304, + "learning_rate": 1.2869196933730665e-05, + "loss": 0.4592, + "step": 29733 + }, + { + "epoch": 0.8164195496979681, + "grad_norm": 0.43155261874198914, + "learning_rate": 1.2868783196760747e-05, + "loss": 0.5164, + "step": 29734 + }, + { + "epoch": 0.8164470071389347, + "grad_norm": 0.3342248797416687, + "learning_rate": 1.2868369454439636e-05, + "loss": 0.4578, + "step": 29735 + }, + { + "epoch": 0.8164744645799011, + "grad_norm": 0.4071844220161438, + "learning_rate": 1.28679557067681e-05, + "loss": 0.4955, + "step": 29736 + }, + { + "epoch": 0.8165019220208677, + "grad_norm": 0.37005311250686646, + "learning_rate": 1.2867541953746909e-05, + "loss": 0.5064, + "step": 29737 + }, + { + "epoch": 0.8165293794618341, + "grad_norm": 0.39512720704078674, + "learning_rate": 1.2867128195376839e-05, + "loss": 0.4554, + "step": 29738 + }, + { + "epoch": 0.8165568369028007, + "grad_norm": 0.5053516626358032, + "learning_rate": 1.286671443165866e-05, + "loss": 0.4803, + "step": 29739 + }, + { + "epoch": 0.8165842943437671, + "grad_norm": 0.3862617611885071, + "learning_rate": 1.2866300662593142e-05, + "loss": 0.4621, + "step": 29740 + }, + { + "epoch": 0.8166117517847337, + "grad_norm": 0.40775880217552185, + "learning_rate": 1.2865886888181059e-05, + "loss": 0.4434, + "step": 29741 + }, + { + "epoch": 0.8166392092257002, + "grad_norm": 0.36323708295822144, + "learning_rate": 1.286547310842318e-05, + "loss": 0.4895, + "step": 29742 + }, + { + "epoch": 0.8166666666666667, + "grad_norm": 0.48690205812454224, + "learning_rate": 1.2865059323320283e-05, + "loss": 0.5689, + "step": 29743 + }, + { + "epoch": 0.8166941241076332, + "grad_norm": 0.3782793879508972, + "learning_rate": 1.2864645532873137e-05, + "loss": 0.4358, + "step": 29744 + }, + { + "epoch": 0.8167215815485996, + "grad_norm": 0.3822595775127411, + "learning_rate": 1.2864231737082508e-05, + "loss": 0.4783, + "step": 29745 + }, + { + "epoch": 0.8167490389895662, + "grad_norm": 0.4232713282108307, + "learning_rate": 1.2863817935949177e-05, + "loss": 0.4751, + "step": 29746 + }, + { + "epoch": 0.8167764964305326, + "grad_norm": 0.41067248582839966, + "learning_rate": 1.2863404129473906e-05, + "loss": 0.5064, + "step": 29747 + }, + { + "epoch": 0.8168039538714992, + "grad_norm": 0.3760000467300415, + "learning_rate": 1.2862990317657477e-05, + "loss": 0.4558, + "step": 29748 + }, + { + "epoch": 0.8168314113124657, + "grad_norm": 0.37570181488990784, + "learning_rate": 1.286257650050066e-05, + "loss": 0.5458, + "step": 29749 + }, + { + "epoch": 0.8168588687534322, + "grad_norm": 0.4286228120326996, + "learning_rate": 1.2862162678004217e-05, + "loss": 0.5763, + "step": 29750 + }, + { + "epoch": 0.8168863261943987, + "grad_norm": 0.36378636956214905, + "learning_rate": 1.2861748850168931e-05, + "loss": 0.4184, + "step": 29751 + }, + { + "epoch": 0.8169137836353652, + "grad_norm": 0.3504086136817932, + "learning_rate": 1.286133501699557e-05, + "loss": 0.4585, + "step": 29752 + }, + { + "epoch": 0.8169412410763317, + "grad_norm": 0.38307705521583557, + "learning_rate": 1.2860921178484904e-05, + "loss": 0.6137, + "step": 29753 + }, + { + "epoch": 0.8169686985172981, + "grad_norm": 0.5503459572792053, + "learning_rate": 1.286050733463771e-05, + "loss": 0.4692, + "step": 29754 + }, + { + "epoch": 0.8169961559582647, + "grad_norm": 0.3408075273036957, + "learning_rate": 1.2860093485454755e-05, + "loss": 0.4522, + "step": 29755 + }, + { + "epoch": 0.8170236133992312, + "grad_norm": 0.45777225494384766, + "learning_rate": 1.2859679630936812e-05, + "loss": 0.6154, + "step": 29756 + }, + { + "epoch": 0.8170510708401977, + "grad_norm": 0.42029720544815063, + "learning_rate": 1.2859265771084654e-05, + "loss": 0.5537, + "step": 29757 + }, + { + "epoch": 0.8170785282811642, + "grad_norm": 0.4263295531272888, + "learning_rate": 1.2858851905899054e-05, + "loss": 0.5452, + "step": 29758 + }, + { + "epoch": 0.8171059857221307, + "grad_norm": 0.3551003038883209, + "learning_rate": 1.2858438035380782e-05, + "loss": 0.469, + "step": 29759 + }, + { + "epoch": 0.8171334431630972, + "grad_norm": 0.39218613505363464, + "learning_rate": 1.2858024159530612e-05, + "loss": 0.3642, + "step": 29760 + }, + { + "epoch": 0.8171609006040637, + "grad_norm": 0.3816457986831665, + "learning_rate": 1.2857610278349315e-05, + "loss": 0.498, + "step": 29761 + }, + { + "epoch": 0.8171883580450302, + "grad_norm": 0.3663220703601837, + "learning_rate": 1.2857196391837658e-05, + "loss": 0.5079, + "step": 29762 + }, + { + "epoch": 0.8172158154859968, + "grad_norm": 0.3677317798137665, + "learning_rate": 1.2856782499996423e-05, + "loss": 0.4264, + "step": 29763 + }, + { + "epoch": 0.8172432729269632, + "grad_norm": 0.4292408525943756, + "learning_rate": 1.2856368602826374e-05, + "loss": 0.4762, + "step": 29764 + }, + { + "epoch": 0.8172707303679297, + "grad_norm": 0.3520265817642212, + "learning_rate": 1.285595470032829e-05, + "loss": 0.4849, + "step": 29765 + }, + { + "epoch": 0.8172981878088962, + "grad_norm": 0.41530415415763855, + "learning_rate": 1.2855540792502937e-05, + "loss": 0.5179, + "step": 29766 + }, + { + "epoch": 0.8173256452498627, + "grad_norm": 0.44347211718559265, + "learning_rate": 1.2855126879351087e-05, + "loss": 0.5606, + "step": 29767 + }, + { + "epoch": 0.8173531026908292, + "grad_norm": 0.4456445276737213, + "learning_rate": 1.2854712960873515e-05, + "loss": 0.5662, + "step": 29768 + }, + { + "epoch": 0.8173805601317957, + "grad_norm": 0.5070704817771912, + "learning_rate": 1.2854299037070992e-05, + "loss": 0.5295, + "step": 29769 + }, + { + "epoch": 0.8174080175727623, + "grad_norm": 0.3860585391521454, + "learning_rate": 1.2853885107944293e-05, + "loss": 0.504, + "step": 29770 + }, + { + "epoch": 0.8174354750137287, + "grad_norm": 0.3696349859237671, + "learning_rate": 1.2853471173494185e-05, + "loss": 0.4708, + "step": 29771 + }, + { + "epoch": 0.8174629324546953, + "grad_norm": 0.45967990159988403, + "learning_rate": 1.2853057233721442e-05, + "loss": 0.4607, + "step": 29772 + }, + { + "epoch": 0.8174903898956617, + "grad_norm": 0.4232665002346039, + "learning_rate": 1.2852643288626838e-05, + "loss": 0.5396, + "step": 29773 + }, + { + "epoch": 0.8175178473366282, + "grad_norm": 0.4151057302951813, + "learning_rate": 1.2852229338211141e-05, + "loss": 0.4545, + "step": 29774 + }, + { + "epoch": 0.8175453047775947, + "grad_norm": 0.4133724868297577, + "learning_rate": 1.2851815382475131e-05, + "loss": 0.5243, + "step": 29775 + }, + { + "epoch": 0.8175727622185612, + "grad_norm": 0.36403003334999084, + "learning_rate": 1.2851401421419569e-05, + "loss": 0.4807, + "step": 29776 + }, + { + "epoch": 0.8176002196595278, + "grad_norm": 0.41358503699302673, + "learning_rate": 1.2850987455045238e-05, + "loss": 0.4616, + "step": 29777 + }, + { + "epoch": 0.8176276771004942, + "grad_norm": 0.3617730140686035, + "learning_rate": 1.2850573483352907e-05, + "loss": 0.4267, + "step": 29778 + }, + { + "epoch": 0.8176551345414608, + "grad_norm": 0.39328116178512573, + "learning_rate": 1.285015950634334e-05, + "loss": 0.5058, + "step": 29779 + }, + { + "epoch": 0.8176825919824272, + "grad_norm": 0.4431750774383545, + "learning_rate": 1.284974552401732e-05, + "loss": 0.5915, + "step": 29780 + }, + { + "epoch": 0.8177100494233938, + "grad_norm": 0.4044540822505951, + "learning_rate": 1.2849331536375615e-05, + "loss": 0.5149, + "step": 29781 + }, + { + "epoch": 0.8177375068643602, + "grad_norm": 0.3452550172805786, + "learning_rate": 1.2848917543418996e-05, + "loss": 0.4379, + "step": 29782 + }, + { + "epoch": 0.8177649643053267, + "grad_norm": 0.3403281569480896, + "learning_rate": 1.2848503545148239e-05, + "loss": 0.4622, + "step": 29783 + }, + { + "epoch": 0.8177924217462933, + "grad_norm": 0.36390501260757446, + "learning_rate": 1.2848089541564113e-05, + "loss": 0.4866, + "step": 29784 + }, + { + "epoch": 0.8178198791872597, + "grad_norm": 0.3850850462913513, + "learning_rate": 1.284767553266739e-05, + "loss": 0.4964, + "step": 29785 + }, + { + "epoch": 0.8178473366282263, + "grad_norm": 0.387901246547699, + "learning_rate": 1.2847261518458843e-05, + "loss": 0.5279, + "step": 29786 + }, + { + "epoch": 0.8178747940691927, + "grad_norm": 0.7095381617546082, + "learning_rate": 1.2846847498939246e-05, + "loss": 0.5327, + "step": 29787 + }, + { + "epoch": 0.8179022515101593, + "grad_norm": 0.38062119483947754, + "learning_rate": 1.284643347410937e-05, + "loss": 0.4592, + "step": 29788 + }, + { + "epoch": 0.8179297089511257, + "grad_norm": 0.3846418559551239, + "learning_rate": 1.2846019443969984e-05, + "loss": 0.4263, + "step": 29789 + }, + { + "epoch": 0.8179571663920923, + "grad_norm": 0.3894844949245453, + "learning_rate": 1.2845605408521866e-05, + "loss": 0.4956, + "step": 29790 + }, + { + "epoch": 0.8179846238330588, + "grad_norm": 0.4017009437084198, + "learning_rate": 1.2845191367765784e-05, + "loss": 0.4934, + "step": 29791 + }, + { + "epoch": 0.8180120812740252, + "grad_norm": 0.3971722722053528, + "learning_rate": 1.2844777321702513e-05, + "loss": 0.5223, + "step": 29792 + }, + { + "epoch": 0.8180395387149918, + "grad_norm": 0.4198957681655884, + "learning_rate": 1.2844363270332827e-05, + "loss": 0.546, + "step": 29793 + }, + { + "epoch": 0.8180669961559582, + "grad_norm": 0.40161409974098206, + "learning_rate": 1.284394921365749e-05, + "loss": 0.5212, + "step": 29794 + }, + { + "epoch": 0.8180944535969248, + "grad_norm": 0.42480170726776123, + "learning_rate": 1.2843535151677284e-05, + "loss": 0.4874, + "step": 29795 + }, + { + "epoch": 0.8181219110378912, + "grad_norm": 0.36984366178512573, + "learning_rate": 1.2843121084392976e-05, + "loss": 0.4279, + "step": 29796 + }, + { + "epoch": 0.8181493684788578, + "grad_norm": 0.3794953525066376, + "learning_rate": 1.284270701180534e-05, + "loss": 0.5116, + "step": 29797 + }, + { + "epoch": 0.8181768259198243, + "grad_norm": 0.3618275821208954, + "learning_rate": 1.284229293391515e-05, + "loss": 0.5045, + "step": 29798 + }, + { + "epoch": 0.8182042833607908, + "grad_norm": 0.3655815124511719, + "learning_rate": 1.2841878850723175e-05, + "loss": 0.4918, + "step": 29799 + }, + { + "epoch": 0.8182317408017573, + "grad_norm": 0.3755134344100952, + "learning_rate": 1.284146476223019e-05, + "loss": 0.5354, + "step": 29800 + }, + { + "epoch": 0.8182591982427238, + "grad_norm": 0.4752017855644226, + "learning_rate": 1.2841050668436965e-05, + "loss": 0.5692, + "step": 29801 + }, + { + "epoch": 0.8182866556836903, + "grad_norm": 0.386306494474411, + "learning_rate": 1.2840636569344272e-05, + "loss": 0.4465, + "step": 29802 + }, + { + "epoch": 0.8183141131246567, + "grad_norm": 0.43851736187934875, + "learning_rate": 1.284022246495289e-05, + "loss": 0.4647, + "step": 29803 + }, + { + "epoch": 0.8183415705656233, + "grad_norm": 0.40463677048683167, + "learning_rate": 1.283980835526358e-05, + "loss": 0.4668, + "step": 29804 + }, + { + "epoch": 0.8183690280065898, + "grad_norm": 0.3938886523246765, + "learning_rate": 1.2839394240277128e-05, + "loss": 0.4555, + "step": 29805 + }, + { + "epoch": 0.8183964854475563, + "grad_norm": 0.3796316981315613, + "learning_rate": 1.2838980119994296e-05, + "loss": 0.5182, + "step": 29806 + }, + { + "epoch": 0.8184239428885228, + "grad_norm": 0.48716869950294495, + "learning_rate": 1.2838565994415859e-05, + "loss": 0.4594, + "step": 29807 + }, + { + "epoch": 0.8184514003294893, + "grad_norm": 0.3775138258934021, + "learning_rate": 1.2838151863542591e-05, + "loss": 0.4846, + "step": 29808 + }, + { + "epoch": 0.8184788577704558, + "grad_norm": 0.422982394695282, + "learning_rate": 1.2837737727375266e-05, + "loss": 0.4368, + "step": 29809 + }, + { + "epoch": 0.8185063152114223, + "grad_norm": 0.3985643982887268, + "learning_rate": 1.2837323585914651e-05, + "loss": 0.4925, + "step": 29810 + }, + { + "epoch": 0.8185337726523888, + "grad_norm": 0.37971627712249756, + "learning_rate": 1.2836909439161527e-05, + "loss": 0.5156, + "step": 29811 + }, + { + "epoch": 0.8185612300933554, + "grad_norm": 0.3909282982349396, + "learning_rate": 1.2836495287116657e-05, + "loss": 0.5297, + "step": 29812 + }, + { + "epoch": 0.8185886875343218, + "grad_norm": 0.4281807541847229, + "learning_rate": 1.283608112978082e-05, + "loss": 0.4078, + "step": 29813 + }, + { + "epoch": 0.8186161449752883, + "grad_norm": 0.37684252858161926, + "learning_rate": 1.2835666967154788e-05, + "loss": 0.6046, + "step": 29814 + }, + { + "epoch": 0.8186436024162548, + "grad_norm": 0.38392916321754456, + "learning_rate": 1.2835252799239326e-05, + "loss": 0.5565, + "step": 29815 + }, + { + "epoch": 0.8186710598572213, + "grad_norm": 0.39906471967697144, + "learning_rate": 1.283483862603522e-05, + "loss": 0.5272, + "step": 29816 + }, + { + "epoch": 0.8186985172981878, + "grad_norm": 0.35901233553886414, + "learning_rate": 1.283442444754323e-05, + "loss": 0.4892, + "step": 29817 + }, + { + "epoch": 0.8187259747391543, + "grad_norm": 0.3971671164035797, + "learning_rate": 1.2834010263764135e-05, + "loss": 0.4485, + "step": 29818 + }, + { + "epoch": 0.8187534321801209, + "grad_norm": 0.42621615529060364, + "learning_rate": 1.2833596074698708e-05, + "loss": 0.481, + "step": 29819 + }, + { + "epoch": 0.8187808896210873, + "grad_norm": 0.4067281186580658, + "learning_rate": 1.2833181880347718e-05, + "loss": 0.4965, + "step": 29820 + }, + { + "epoch": 0.8188083470620539, + "grad_norm": 0.354600191116333, + "learning_rate": 1.2832767680711941e-05, + "loss": 0.4434, + "step": 29821 + }, + { + "epoch": 0.8188358045030203, + "grad_norm": 0.3844257593154907, + "learning_rate": 1.2832353475792148e-05, + "loss": 0.5731, + "step": 29822 + }, + { + "epoch": 0.8188632619439868, + "grad_norm": 0.39769527316093445, + "learning_rate": 1.2831939265589113e-05, + "loss": 0.4689, + "step": 29823 + }, + { + "epoch": 0.8188907193849533, + "grad_norm": 0.36561912298202515, + "learning_rate": 1.2831525050103606e-05, + "loss": 0.5345, + "step": 29824 + }, + { + "epoch": 0.8189181768259198, + "grad_norm": 0.5199923515319824, + "learning_rate": 1.28311108293364e-05, + "loss": 0.4972, + "step": 29825 + }, + { + "epoch": 0.8189456342668864, + "grad_norm": 0.39659687876701355, + "learning_rate": 1.2830696603288271e-05, + "loss": 0.4071, + "step": 29826 + }, + { + "epoch": 0.8189730917078528, + "grad_norm": 0.4294394850730896, + "learning_rate": 1.2830282371959986e-05, + "loss": 0.5133, + "step": 29827 + }, + { + "epoch": 0.8190005491488194, + "grad_norm": 0.4190565049648285, + "learning_rate": 1.2829868135352324e-05, + "loss": 0.5887, + "step": 29828 + }, + { + "epoch": 0.8190280065897858, + "grad_norm": 0.38593602180480957, + "learning_rate": 1.2829453893466053e-05, + "loss": 0.4954, + "step": 29829 + }, + { + "epoch": 0.8190554640307524, + "grad_norm": 0.3709847629070282, + "learning_rate": 1.2829039646301946e-05, + "loss": 0.5136, + "step": 29830 + }, + { + "epoch": 0.8190829214717188, + "grad_norm": 0.40476980805397034, + "learning_rate": 1.2828625393860782e-05, + "loss": 0.5198, + "step": 29831 + }, + { + "epoch": 0.8191103789126853, + "grad_norm": 0.3767644464969635, + "learning_rate": 1.2828211136143326e-05, + "loss": 0.5791, + "step": 29832 + }, + { + "epoch": 0.8191378363536519, + "grad_norm": 0.3346176743507385, + "learning_rate": 1.2827796873150353e-05, + "loss": 0.4857, + "step": 29833 + }, + { + "epoch": 0.8191652937946183, + "grad_norm": 0.4106161892414093, + "learning_rate": 1.2827382604882639e-05, + "loss": 0.4706, + "step": 29834 + }, + { + "epoch": 0.8191927512355849, + "grad_norm": 0.3526047170162201, + "learning_rate": 1.2826968331340949e-05, + "loss": 0.4846, + "step": 29835 + }, + { + "epoch": 0.8192202086765513, + "grad_norm": 0.401846319437027, + "learning_rate": 1.2826554052526066e-05, + "loss": 0.4879, + "step": 29836 + }, + { + "epoch": 0.8192476661175179, + "grad_norm": 0.4400515854358673, + "learning_rate": 1.2826139768438756e-05, + "loss": 0.5374, + "step": 29837 + }, + { + "epoch": 0.8192751235584843, + "grad_norm": 0.3706651031970978, + "learning_rate": 1.2825725479079792e-05, + "loss": 0.4916, + "step": 29838 + }, + { + "epoch": 0.8193025809994509, + "grad_norm": 0.4236578643321991, + "learning_rate": 1.282531118444995e-05, + "loss": 0.4902, + "step": 29839 + }, + { + "epoch": 0.8193300384404174, + "grad_norm": 0.5619128942489624, + "learning_rate": 1.2824896884549997e-05, + "loss": 0.4542, + "step": 29840 + }, + { + "epoch": 0.8193574958813838, + "grad_norm": 0.3436570167541504, + "learning_rate": 1.2824482579380715e-05, + "loss": 0.4947, + "step": 29841 + }, + { + "epoch": 0.8193849533223504, + "grad_norm": 0.35925742983818054, + "learning_rate": 1.2824068268942868e-05, + "loss": 0.5106, + "step": 29842 + }, + { + "epoch": 0.8194124107633168, + "grad_norm": 0.4353228509426117, + "learning_rate": 1.2823653953237233e-05, + "loss": 0.4198, + "step": 29843 + }, + { + "epoch": 0.8194398682042834, + "grad_norm": 0.4057868421077728, + "learning_rate": 1.2823239632264584e-05, + "loss": 0.4985, + "step": 29844 + }, + { + "epoch": 0.8194673256452498, + "grad_norm": 0.7350486516952515, + "learning_rate": 1.2822825306025691e-05, + "loss": 0.4542, + "step": 29845 + }, + { + "epoch": 0.8194947830862164, + "grad_norm": 0.36042657494544983, + "learning_rate": 1.2822410974521326e-05, + "loss": 0.4392, + "step": 29846 + }, + { + "epoch": 0.8195222405271829, + "grad_norm": 0.4078938364982605, + "learning_rate": 1.2821996637752268e-05, + "loss": 0.511, + "step": 29847 + }, + { + "epoch": 0.8195496979681494, + "grad_norm": 0.5156942009925842, + "learning_rate": 1.2821582295719282e-05, + "loss": 0.5418, + "step": 29848 + }, + { + "epoch": 0.8195771554091159, + "grad_norm": 0.3941909670829773, + "learning_rate": 1.2821167948423145e-05, + "loss": 0.4683, + "step": 29849 + }, + { + "epoch": 0.8196046128500823, + "grad_norm": 0.40761852264404297, + "learning_rate": 1.282075359586463e-05, + "loss": 0.5008, + "step": 29850 + }, + { + "epoch": 0.8196320702910489, + "grad_norm": 0.37715354561805725, + "learning_rate": 1.2820339238044508e-05, + "loss": 0.4884, + "step": 29851 + }, + { + "epoch": 0.8196595277320153, + "grad_norm": 0.43794193863868713, + "learning_rate": 1.2819924874963553e-05, + "loss": 0.4518, + "step": 29852 + }, + { + "epoch": 0.8196869851729819, + "grad_norm": 0.3783273696899414, + "learning_rate": 1.281951050662254e-05, + "loss": 0.5185, + "step": 29853 + }, + { + "epoch": 0.8197144426139484, + "grad_norm": 0.38956475257873535, + "learning_rate": 1.281909613302224e-05, + "loss": 0.4552, + "step": 29854 + }, + { + "epoch": 0.8197419000549149, + "grad_norm": 0.488350510597229, + "learning_rate": 1.2818681754163424e-05, + "loss": 0.5068, + "step": 29855 + }, + { + "epoch": 0.8197693574958814, + "grad_norm": 0.45009031891822815, + "learning_rate": 1.2818267370046869e-05, + "loss": 0.4665, + "step": 29856 + }, + { + "epoch": 0.8197968149368479, + "grad_norm": 0.48266151547431946, + "learning_rate": 1.2817852980673344e-05, + "loss": 0.5353, + "step": 29857 + }, + { + "epoch": 0.8198242723778144, + "grad_norm": 0.3745397627353668, + "learning_rate": 1.2817438586043625e-05, + "loss": 0.42, + "step": 29858 + }, + { + "epoch": 0.8198517298187808, + "grad_norm": 0.4363009035587311, + "learning_rate": 1.2817024186158484e-05, + "loss": 0.5672, + "step": 29859 + }, + { + "epoch": 0.8198791872597474, + "grad_norm": 0.4505922496318817, + "learning_rate": 1.2816609781018694e-05, + "loss": 0.5208, + "step": 29860 + }, + { + "epoch": 0.8199066447007138, + "grad_norm": 0.42160022258758545, + "learning_rate": 1.2816195370625027e-05, + "loss": 0.5102, + "step": 29861 + }, + { + "epoch": 0.8199341021416804, + "grad_norm": 0.4656600058078766, + "learning_rate": 1.2815780954978256e-05, + "loss": 0.52, + "step": 29862 + }, + { + "epoch": 0.8199615595826469, + "grad_norm": 0.4245969355106354, + "learning_rate": 1.2815366534079157e-05, + "loss": 0.4924, + "step": 29863 + }, + { + "epoch": 0.8199890170236134, + "grad_norm": 0.382367879152298, + "learning_rate": 1.28149521079285e-05, + "loss": 0.4943, + "step": 29864 + }, + { + "epoch": 0.8200164744645799, + "grad_norm": 0.37704774737358093, + "learning_rate": 1.2814537676527058e-05, + "loss": 0.5113, + "step": 29865 + }, + { + "epoch": 0.8200439319055464, + "grad_norm": 0.4379624128341675, + "learning_rate": 1.2814123239875605e-05, + "loss": 0.5605, + "step": 29866 + }, + { + "epoch": 0.8200713893465129, + "grad_norm": 0.38951823115348816, + "learning_rate": 1.2813708797974916e-05, + "loss": 0.46, + "step": 29867 + }, + { + "epoch": 0.8200988467874794, + "grad_norm": 0.37134289741516113, + "learning_rate": 1.281329435082576e-05, + "loss": 0.5031, + "step": 29868 + }, + { + "epoch": 0.8201263042284459, + "grad_norm": 0.3908522427082062, + "learning_rate": 1.2812879898428912e-05, + "loss": 0.4355, + "step": 29869 + }, + { + "epoch": 0.8201537616694125, + "grad_norm": 0.4594365656375885, + "learning_rate": 1.2812465440785147e-05, + "loss": 0.6039, + "step": 29870 + }, + { + "epoch": 0.8201812191103789, + "grad_norm": 0.4187853932380676, + "learning_rate": 1.2812050977895232e-05, + "loss": 0.5031, + "step": 29871 + }, + { + "epoch": 0.8202086765513454, + "grad_norm": 0.39344313740730286, + "learning_rate": 1.2811636509759948e-05, + "loss": 0.5434, + "step": 29872 + }, + { + "epoch": 0.8202361339923119, + "grad_norm": 0.4878123998641968, + "learning_rate": 1.2811222036380067e-05, + "loss": 0.545, + "step": 29873 + }, + { + "epoch": 0.8202635914332784, + "grad_norm": 0.4038982093334198, + "learning_rate": 1.2810807557756354e-05, + "loss": 0.5517, + "step": 29874 + }, + { + "epoch": 0.8202910488742449, + "grad_norm": 0.4685501754283905, + "learning_rate": 1.2810393073889594e-05, + "loss": 0.5159, + "step": 29875 + }, + { + "epoch": 0.8203185063152114, + "grad_norm": 0.4132039248943329, + "learning_rate": 1.2809978584780546e-05, + "loss": 0.5286, + "step": 29876 + }, + { + "epoch": 0.820345963756178, + "grad_norm": 0.43112990260124207, + "learning_rate": 1.2809564090429998e-05, + "loss": 0.4618, + "step": 29877 + }, + { + "epoch": 0.8203734211971444, + "grad_norm": 0.4360826015472412, + "learning_rate": 1.2809149590838716e-05, + "loss": 0.4623, + "step": 29878 + }, + { + "epoch": 0.820400878638111, + "grad_norm": 0.3895154297351837, + "learning_rate": 1.2808735086007469e-05, + "loss": 0.4619, + "step": 29879 + }, + { + "epoch": 0.8204283360790774, + "grad_norm": 0.4145752191543579, + "learning_rate": 1.2808320575937039e-05, + "loss": 0.4981, + "step": 29880 + }, + { + "epoch": 0.8204557935200439, + "grad_norm": 0.6444263458251953, + "learning_rate": 1.2807906060628192e-05, + "loss": 0.5635, + "step": 29881 + }, + { + "epoch": 0.8204832509610104, + "grad_norm": 0.4042304754257202, + "learning_rate": 1.2807491540081705e-05, + "loss": 0.5288, + "step": 29882 + }, + { + "epoch": 0.8205107084019769, + "grad_norm": 0.38391855359077454, + "learning_rate": 1.280707701429835e-05, + "loss": 0.475, + "step": 29883 + }, + { + "epoch": 0.8205381658429435, + "grad_norm": 0.34838199615478516, + "learning_rate": 1.28066624832789e-05, + "loss": 0.4463, + "step": 29884 + }, + { + "epoch": 0.8205656232839099, + "grad_norm": 0.34607356786727905, + "learning_rate": 1.280624794702413e-05, + "loss": 0.4721, + "step": 29885 + }, + { + "epoch": 0.8205930807248765, + "grad_norm": 0.41536784172058105, + "learning_rate": 1.2805833405534812e-05, + "loss": 0.4354, + "step": 29886 + }, + { + "epoch": 0.8206205381658429, + "grad_norm": 0.34718164801597595, + "learning_rate": 1.2805418858811715e-05, + "loss": 0.4472, + "step": 29887 + }, + { + "epoch": 0.8206479956068095, + "grad_norm": 0.5155624151229858, + "learning_rate": 1.2805004306855621e-05, + "loss": 0.4329, + "step": 29888 + }, + { + "epoch": 0.8206754530477759, + "grad_norm": 0.44949495792388916, + "learning_rate": 1.2804589749667297e-05, + "loss": 0.5001, + "step": 29889 + }, + { + "epoch": 0.8207029104887424, + "grad_norm": 0.387574166059494, + "learning_rate": 1.2804175187247521e-05, + "loss": 0.4559, + "step": 29890 + }, + { + "epoch": 0.820730367929709, + "grad_norm": 0.39950132369995117, + "learning_rate": 1.280376061959706e-05, + "loss": 0.509, + "step": 29891 + }, + { + "epoch": 0.8207578253706754, + "grad_norm": 0.41575077176094055, + "learning_rate": 1.2803346046716692e-05, + "loss": 0.528, + "step": 29892 + }, + { + "epoch": 0.820785282811642, + "grad_norm": 0.3535975217819214, + "learning_rate": 1.2802931468607187e-05, + "loss": 0.4468, + "step": 29893 + }, + { + "epoch": 0.8208127402526084, + "grad_norm": 0.3681621849536896, + "learning_rate": 1.2802516885269323e-05, + "loss": 0.4727, + "step": 29894 + }, + { + "epoch": 0.820840197693575, + "grad_norm": 0.379279226064682, + "learning_rate": 1.2802102296703867e-05, + "loss": 0.5322, + "step": 29895 + }, + { + "epoch": 0.8208676551345414, + "grad_norm": 0.3884551525115967, + "learning_rate": 1.28016877029116e-05, + "loss": 0.537, + "step": 29896 + }, + { + "epoch": 0.820895112575508, + "grad_norm": 0.37283244729042053, + "learning_rate": 1.2801273103893289e-05, + "loss": 0.5382, + "step": 29897 + }, + { + "epoch": 0.8209225700164745, + "grad_norm": 0.3809974193572998, + "learning_rate": 1.2800858499649709e-05, + "loss": 0.4511, + "step": 29898 + }, + { + "epoch": 0.820950027457441, + "grad_norm": 0.41279274225234985, + "learning_rate": 1.2800443890181635e-05, + "loss": 0.4925, + "step": 29899 + }, + { + "epoch": 0.8209774848984075, + "grad_norm": 0.3863135874271393, + "learning_rate": 1.2800029275489838e-05, + "loss": 0.5206, + "step": 29900 + }, + { + "epoch": 0.8210049423393739, + "grad_norm": 0.39064309000968933, + "learning_rate": 1.2799614655575094e-05, + "loss": 0.5195, + "step": 29901 + }, + { + "epoch": 0.8210323997803405, + "grad_norm": 0.37219980359077454, + "learning_rate": 1.2799200030438174e-05, + "loss": 0.4593, + "step": 29902 + }, + { + "epoch": 0.8210598572213069, + "grad_norm": 0.40074869990348816, + "learning_rate": 1.2798785400079854e-05, + "loss": 0.5235, + "step": 29903 + }, + { + "epoch": 0.8210873146622735, + "grad_norm": 0.40831074118614197, + "learning_rate": 1.2798370764500907e-05, + "loss": 0.5429, + "step": 29904 + }, + { + "epoch": 0.82111477210324, + "grad_norm": 0.40143465995788574, + "learning_rate": 1.2797956123702103e-05, + "loss": 0.5066, + "step": 29905 + }, + { + "epoch": 0.8211422295442065, + "grad_norm": 0.44001856446266174, + "learning_rate": 1.2797541477684218e-05, + "loss": 0.477, + "step": 29906 + }, + { + "epoch": 0.821169686985173, + "grad_norm": 0.35562053322792053, + "learning_rate": 1.2797126826448025e-05, + "loss": 0.5422, + "step": 29907 + }, + { + "epoch": 0.8211971444261394, + "grad_norm": 0.385434091091156, + "learning_rate": 1.2796712169994302e-05, + "loss": 0.5467, + "step": 29908 + }, + { + "epoch": 0.821224601867106, + "grad_norm": 0.4164957106113434, + "learning_rate": 1.2796297508323814e-05, + "loss": 0.488, + "step": 29909 + }, + { + "epoch": 0.8212520593080724, + "grad_norm": 0.3466986119747162, + "learning_rate": 1.2795882841437337e-05, + "loss": 0.5509, + "step": 29910 + }, + { + "epoch": 0.821279516749039, + "grad_norm": 0.3941405415534973, + "learning_rate": 1.279546816933565e-05, + "loss": 0.4694, + "step": 29911 + }, + { + "epoch": 0.8213069741900055, + "grad_norm": 0.3895045518875122, + "learning_rate": 1.2795053492019523e-05, + "loss": 0.4959, + "step": 29912 + }, + { + "epoch": 0.821334431630972, + "grad_norm": 0.3811872899532318, + "learning_rate": 1.2794638809489726e-05, + "loss": 0.4561, + "step": 29913 + }, + { + "epoch": 0.8213618890719385, + "grad_norm": 0.36958014965057373, + "learning_rate": 1.2794224121747036e-05, + "loss": 0.4692, + "step": 29914 + }, + { + "epoch": 0.821389346512905, + "grad_norm": 0.3804799020290375, + "learning_rate": 1.2793809428792227e-05, + "loss": 0.4532, + "step": 29915 + }, + { + "epoch": 0.8214168039538715, + "grad_norm": 0.34270092844963074, + "learning_rate": 1.2793394730626074e-05, + "loss": 0.4669, + "step": 29916 + }, + { + "epoch": 0.821444261394838, + "grad_norm": 0.36368194222450256, + "learning_rate": 1.2792980027249346e-05, + "loss": 0.5075, + "step": 29917 + }, + { + "epoch": 0.8214717188358045, + "grad_norm": 0.3523280918598175, + "learning_rate": 1.279256531866282e-05, + "loss": 0.509, + "step": 29918 + }, + { + "epoch": 0.821499176276771, + "grad_norm": 0.39013880491256714, + "learning_rate": 1.2792150604867267e-05, + "loss": 0.5199, + "step": 29919 + }, + { + "epoch": 0.8215266337177375, + "grad_norm": 0.46968773007392883, + "learning_rate": 1.279173588586346e-05, + "loss": 0.5053, + "step": 29920 + }, + { + "epoch": 0.821554091158704, + "grad_norm": 0.4417799711227417, + "learning_rate": 1.2791321161652179e-05, + "loss": 0.4593, + "step": 29921 + }, + { + "epoch": 0.8215815485996705, + "grad_norm": 0.48014965653419495, + "learning_rate": 1.2790906432234192e-05, + "loss": 0.5026, + "step": 29922 + }, + { + "epoch": 0.821609006040637, + "grad_norm": 0.395215779542923, + "learning_rate": 1.2790491697610272e-05, + "loss": 0.5504, + "step": 29923 + }, + { + "epoch": 0.8216364634816035, + "grad_norm": 0.4149378538131714, + "learning_rate": 1.2790076957781196e-05, + "loss": 0.3855, + "step": 29924 + }, + { + "epoch": 0.82166392092257, + "grad_norm": 0.40790456533432007, + "learning_rate": 1.2789662212747737e-05, + "loss": 0.5428, + "step": 29925 + }, + { + "epoch": 0.8216913783635366, + "grad_norm": 0.4447188973426819, + "learning_rate": 1.2789247462510663e-05, + "loss": 0.4653, + "step": 29926 + }, + { + "epoch": 0.821718835804503, + "grad_norm": 0.3934735655784607, + "learning_rate": 1.2788832707070757e-05, + "loss": 0.3929, + "step": 29927 + }, + { + "epoch": 0.8217462932454696, + "grad_norm": 0.4023551344871521, + "learning_rate": 1.2788417946428787e-05, + "loss": 0.4554, + "step": 29928 + }, + { + "epoch": 0.821773750686436, + "grad_norm": 0.39139485359191895, + "learning_rate": 1.2788003180585527e-05, + "loss": 0.4843, + "step": 29929 + }, + { + "epoch": 0.8218012081274025, + "grad_norm": 0.5988703966140747, + "learning_rate": 1.2787588409541751e-05, + "loss": 0.4933, + "step": 29930 + }, + { + "epoch": 0.821828665568369, + "grad_norm": 0.3873371481895447, + "learning_rate": 1.2787173633298232e-05, + "loss": 0.4846, + "step": 29931 + }, + { + "epoch": 0.8218561230093355, + "grad_norm": 0.41739487648010254, + "learning_rate": 1.2786758851855746e-05, + "loss": 0.548, + "step": 29932 + }, + { + "epoch": 0.8218835804503021, + "grad_norm": 0.6519547700881958, + "learning_rate": 1.2786344065215064e-05, + "loss": 0.5361, + "step": 29933 + }, + { + "epoch": 0.8219110378912685, + "grad_norm": 0.3546435236930847, + "learning_rate": 1.2785929273376963e-05, + "loss": 0.4308, + "step": 29934 + }, + { + "epoch": 0.8219384953322351, + "grad_norm": 0.34583303332328796, + "learning_rate": 1.2785514476342213e-05, + "loss": 0.507, + "step": 29935 + }, + { + "epoch": 0.8219659527732015, + "grad_norm": 0.3663499355316162, + "learning_rate": 1.2785099674111589e-05, + "loss": 0.4887, + "step": 29936 + }, + { + "epoch": 0.821993410214168, + "grad_norm": 0.5385345816612244, + "learning_rate": 1.2784684866685866e-05, + "loss": 0.5456, + "step": 29937 + }, + { + "epoch": 0.8220208676551345, + "grad_norm": 0.3891913592815399, + "learning_rate": 1.2784270054065817e-05, + "loss": 0.4265, + "step": 29938 + }, + { + "epoch": 0.822048325096101, + "grad_norm": 0.3883543610572815, + "learning_rate": 1.2783855236252215e-05, + "loss": 0.4804, + "step": 29939 + }, + { + "epoch": 0.8220757825370676, + "grad_norm": 0.33860111236572266, + "learning_rate": 1.2783440413245836e-05, + "loss": 0.4411, + "step": 29940 + }, + { + "epoch": 0.822103239978034, + "grad_norm": 0.39401957392692566, + "learning_rate": 1.2783025585047454e-05, + "loss": 0.4939, + "step": 29941 + }, + { + "epoch": 0.8221306974190006, + "grad_norm": 0.3971298336982727, + "learning_rate": 1.2782610751657838e-05, + "loss": 0.4963, + "step": 29942 + }, + { + "epoch": 0.822158154859967, + "grad_norm": 0.39848434925079346, + "learning_rate": 1.2782195913077764e-05, + "loss": 0.4923, + "step": 29943 + }, + { + "epoch": 0.8221856123009336, + "grad_norm": 0.4648064970970154, + "learning_rate": 1.2781781069308008e-05, + "loss": 0.5747, + "step": 29944 + }, + { + "epoch": 0.8222130697419, + "grad_norm": 0.39721524715423584, + "learning_rate": 1.2781366220349343e-05, + "loss": 0.5657, + "step": 29945 + }, + { + "epoch": 0.8222405271828666, + "grad_norm": 0.41288328170776367, + "learning_rate": 1.2780951366202543e-05, + "loss": 0.4754, + "step": 29946 + }, + { + "epoch": 0.8222679846238331, + "grad_norm": 0.3748377859592438, + "learning_rate": 1.2780536506868379e-05, + "loss": 0.5287, + "step": 29947 + }, + { + "epoch": 0.8222954420647995, + "grad_norm": 0.39680829644203186, + "learning_rate": 1.2780121642347628e-05, + "loss": 0.5136, + "step": 29948 + }, + { + "epoch": 0.8223228995057661, + "grad_norm": 0.4079782962799072, + "learning_rate": 1.2779706772641064e-05, + "loss": 0.5923, + "step": 29949 + }, + { + "epoch": 0.8223503569467325, + "grad_norm": 0.40668410062789917, + "learning_rate": 1.2779291897749457e-05, + "loss": 0.4765, + "step": 29950 + }, + { + "epoch": 0.8223778143876991, + "grad_norm": 0.3868073523044586, + "learning_rate": 1.2778877017673586e-05, + "loss": 0.4281, + "step": 29951 + }, + { + "epoch": 0.8224052718286655, + "grad_norm": 0.3639702796936035, + "learning_rate": 1.2778462132414222e-05, + "loss": 0.491, + "step": 29952 + }, + { + "epoch": 0.8224327292696321, + "grad_norm": 0.3745518922805786, + "learning_rate": 1.2778047241972138e-05, + "loss": 0.4388, + "step": 29953 + }, + { + "epoch": 0.8224601867105986, + "grad_norm": 0.3815753161907196, + "learning_rate": 1.277763234634811e-05, + "loss": 0.4664, + "step": 29954 + }, + { + "epoch": 0.8224876441515651, + "grad_norm": 0.45984596014022827, + "learning_rate": 1.2777217445542912e-05, + "loss": 0.4529, + "step": 29955 + }, + { + "epoch": 0.8225151015925316, + "grad_norm": 0.3938109874725342, + "learning_rate": 1.2776802539557316e-05, + "loss": 0.4895, + "step": 29956 + }, + { + "epoch": 0.822542559033498, + "grad_norm": 0.3679609000682831, + "learning_rate": 1.2776387628392098e-05, + "loss": 0.4738, + "step": 29957 + }, + { + "epoch": 0.8225700164744646, + "grad_norm": 0.3644793927669525, + "learning_rate": 1.2775972712048028e-05, + "loss": 0.4357, + "step": 29958 + }, + { + "epoch": 0.822597473915431, + "grad_norm": 0.3909688889980316, + "learning_rate": 1.2775557790525888e-05, + "loss": 0.4717, + "step": 29959 + }, + { + "epoch": 0.8226249313563976, + "grad_norm": 0.4523567259311676, + "learning_rate": 1.2775142863826442e-05, + "loss": 0.5753, + "step": 29960 + }, + { + "epoch": 0.8226523887973641, + "grad_norm": 0.4281628131866455, + "learning_rate": 1.2774727931950472e-05, + "loss": 0.529, + "step": 29961 + }, + { + "epoch": 0.8226798462383306, + "grad_norm": 0.44045910239219666, + "learning_rate": 1.2774312994898747e-05, + "loss": 0.4804, + "step": 29962 + }, + { + "epoch": 0.8227073036792971, + "grad_norm": 0.3357817828655243, + "learning_rate": 1.2773898052672045e-05, + "loss": 0.4307, + "step": 29963 + }, + { + "epoch": 0.8227347611202636, + "grad_norm": 0.5774185657501221, + "learning_rate": 1.2773483105271136e-05, + "loss": 0.5336, + "step": 29964 + }, + { + "epoch": 0.8227622185612301, + "grad_norm": 0.36748388409614563, + "learning_rate": 1.2773068152696795e-05, + "loss": 0.4111, + "step": 29965 + }, + { + "epoch": 0.8227896760021965, + "grad_norm": 0.41485676169395447, + "learning_rate": 1.2772653194949798e-05, + "loss": 0.4646, + "step": 29966 + }, + { + "epoch": 0.8228171334431631, + "grad_norm": 0.4384790062904358, + "learning_rate": 1.2772238232030918e-05, + "loss": 0.4943, + "step": 29967 + }, + { + "epoch": 0.8228445908841296, + "grad_norm": 0.3912218511104584, + "learning_rate": 1.277182326394093e-05, + "loss": 0.5567, + "step": 29968 + }, + { + "epoch": 0.8228720483250961, + "grad_norm": 0.38021907210350037, + "learning_rate": 1.2771408290680608e-05, + "loss": 0.3705, + "step": 29969 + }, + { + "epoch": 0.8228995057660626, + "grad_norm": 0.368449330329895, + "learning_rate": 1.277099331225072e-05, + "loss": 0.4268, + "step": 29970 + }, + { + "epoch": 0.8229269632070291, + "grad_norm": 0.4010332524776459, + "learning_rate": 1.2770578328652047e-05, + "loss": 0.5891, + "step": 29971 + }, + { + "epoch": 0.8229544206479956, + "grad_norm": 0.3945092558860779, + "learning_rate": 1.2770163339885362e-05, + "loss": 0.5276, + "step": 29972 + }, + { + "epoch": 0.8229818780889621, + "grad_norm": 0.43296948075294495, + "learning_rate": 1.2769748345951438e-05, + "loss": 0.3899, + "step": 29973 + }, + { + "epoch": 0.8230093355299286, + "grad_norm": 0.3877841830253601, + "learning_rate": 1.2769333346851052e-05, + "loss": 0.5319, + "step": 29974 + }, + { + "epoch": 0.8230367929708952, + "grad_norm": 0.3753948509693146, + "learning_rate": 1.276891834258497e-05, + "loss": 0.521, + "step": 29975 + }, + { + "epoch": 0.8230642504118616, + "grad_norm": 0.35551419854164124, + "learning_rate": 1.2768503333153977e-05, + "loss": 0.3981, + "step": 29976 + }, + { + "epoch": 0.8230917078528281, + "grad_norm": 0.4029289782047272, + "learning_rate": 1.2768088318558838e-05, + "loss": 0.5081, + "step": 29977 + }, + { + "epoch": 0.8231191652937946, + "grad_norm": 0.8277904987335205, + "learning_rate": 1.2767673298800331e-05, + "loss": 0.4955, + "step": 29978 + }, + { + "epoch": 0.8231466227347611, + "grad_norm": 0.4085511565208435, + "learning_rate": 1.2767258273879232e-05, + "loss": 0.4482, + "step": 29979 + }, + { + "epoch": 0.8231740801757276, + "grad_norm": 0.4499463737010956, + "learning_rate": 1.2766843243796312e-05, + "loss": 0.4876, + "step": 29980 + }, + { + "epoch": 0.8232015376166941, + "grad_norm": 0.42418110370635986, + "learning_rate": 1.2766428208552348e-05, + "loss": 0.4974, + "step": 29981 + }, + { + "epoch": 0.8232289950576607, + "grad_norm": 0.3913818895816803, + "learning_rate": 1.2766013168148109e-05, + "loss": 0.5453, + "step": 29982 + }, + { + "epoch": 0.8232564524986271, + "grad_norm": 0.4030565917491913, + "learning_rate": 1.2765598122584375e-05, + "loss": 0.4955, + "step": 29983 + }, + { + "epoch": 0.8232839099395937, + "grad_norm": 0.39294466376304626, + "learning_rate": 1.2765183071861917e-05, + "loss": 0.4096, + "step": 29984 + }, + { + "epoch": 0.8233113673805601, + "grad_norm": 0.3509330451488495, + "learning_rate": 1.276476801598151e-05, + "loss": 0.4509, + "step": 29985 + }, + { + "epoch": 0.8233388248215266, + "grad_norm": 0.4455319941043854, + "learning_rate": 1.2764352954943928e-05, + "loss": 0.5588, + "step": 29986 + }, + { + "epoch": 0.8233662822624931, + "grad_norm": 0.4269362986087799, + "learning_rate": 1.276393788874995e-05, + "loss": 0.561, + "step": 29987 + }, + { + "epoch": 0.8233937397034596, + "grad_norm": 0.3653850853443146, + "learning_rate": 1.2763522817400337e-05, + "loss": 0.4691, + "step": 29988 + }, + { + "epoch": 0.8234211971444262, + "grad_norm": 0.3845020830631256, + "learning_rate": 1.276310774089588e-05, + "loss": 0.5371, + "step": 29989 + }, + { + "epoch": 0.8234486545853926, + "grad_norm": 0.3969345688819885, + "learning_rate": 1.2762692659237339e-05, + "loss": 0.5298, + "step": 29990 + }, + { + "epoch": 0.8234761120263592, + "grad_norm": 0.4187753200531006, + "learning_rate": 1.27622775724255e-05, + "loss": 0.4737, + "step": 29991 + }, + { + "epoch": 0.8235035694673256, + "grad_norm": 0.4063633680343628, + "learning_rate": 1.276186248046113e-05, + "loss": 0.4445, + "step": 29992 + }, + { + "epoch": 0.8235310269082922, + "grad_norm": 0.379369854927063, + "learning_rate": 1.2761447383345002e-05, + "loss": 0.4825, + "step": 29993 + }, + { + "epoch": 0.8235584843492586, + "grad_norm": 0.36130291223526, + "learning_rate": 1.2761032281077897e-05, + "loss": 0.4766, + "step": 29994 + }, + { + "epoch": 0.8235859417902252, + "grad_norm": 0.470331609249115, + "learning_rate": 1.2760617173660581e-05, + "loss": 0.4969, + "step": 29995 + }, + { + "epoch": 0.8236133992311917, + "grad_norm": 0.39652520418167114, + "learning_rate": 1.2760202061093836e-05, + "loss": 0.5041, + "step": 29996 + }, + { + "epoch": 0.8236408566721581, + "grad_norm": 0.3905658721923828, + "learning_rate": 1.2759786943378433e-05, + "loss": 0.4205, + "step": 29997 + }, + { + "epoch": 0.8236683141131247, + "grad_norm": 0.4269920885562897, + "learning_rate": 1.2759371820515147e-05, + "loss": 0.4691, + "step": 29998 + }, + { + "epoch": 0.8236957715540911, + "grad_norm": 0.39325404167175293, + "learning_rate": 1.2758956692504751e-05, + "loss": 0.6014, + "step": 29999 + }, + { + "epoch": 0.8237232289950577, + "grad_norm": 0.3313271999359131, + "learning_rate": 1.2758541559348019e-05, + "loss": 0.4626, + "step": 30000 + }, + { + "epoch": 0.8237506864360241, + "grad_norm": 0.42190682888031006, + "learning_rate": 1.275812642104573e-05, + "loss": 0.5343, + "step": 30001 + }, + { + "epoch": 0.8237781438769907, + "grad_norm": 0.4376525282859802, + "learning_rate": 1.2757711277598655e-05, + "loss": 0.5074, + "step": 30002 + }, + { + "epoch": 0.8238056013179572, + "grad_norm": 0.3896116316318512, + "learning_rate": 1.2757296129007564e-05, + "loss": 0.4573, + "step": 30003 + }, + { + "epoch": 0.8238330587589237, + "grad_norm": 0.3991791307926178, + "learning_rate": 1.2756880975273237e-05, + "loss": 0.4902, + "step": 30004 + }, + { + "epoch": 0.8238605161998902, + "grad_norm": 0.3675098717212677, + "learning_rate": 1.275646581639645e-05, + "loss": 0.5131, + "step": 30005 + }, + { + "epoch": 0.8238879736408566, + "grad_norm": 0.4021913707256317, + "learning_rate": 1.2756050652377972e-05, + "loss": 0.6101, + "step": 30006 + }, + { + "epoch": 0.8239154310818232, + "grad_norm": 0.38579806685447693, + "learning_rate": 1.2755635483218582e-05, + "loss": 0.5711, + "step": 30007 + }, + { + "epoch": 0.8239428885227896, + "grad_norm": 0.37184837460517883, + "learning_rate": 1.2755220308919048e-05, + "loss": 0.4905, + "step": 30008 + }, + { + "epoch": 0.8239703459637562, + "grad_norm": 0.3647450804710388, + "learning_rate": 1.2754805129480152e-05, + "loss": 0.5364, + "step": 30009 + }, + { + "epoch": 0.8239978034047227, + "grad_norm": 0.3583522439002991, + "learning_rate": 1.2754389944902665e-05, + "loss": 0.491, + "step": 30010 + }, + { + "epoch": 0.8240252608456892, + "grad_norm": 0.4194508194923401, + "learning_rate": 1.2753974755187361e-05, + "loss": 0.5487, + "step": 30011 + }, + { + "epoch": 0.8240527182866557, + "grad_norm": 0.40678131580352783, + "learning_rate": 1.2753559560335016e-05, + "loss": 0.5814, + "step": 30012 + }, + { + "epoch": 0.8240801757276222, + "grad_norm": 0.39239487051963806, + "learning_rate": 1.2753144360346403e-05, + "loss": 0.4455, + "step": 30013 + }, + { + "epoch": 0.8241076331685887, + "grad_norm": 0.3476126790046692, + "learning_rate": 1.27527291552223e-05, + "loss": 0.3995, + "step": 30014 + }, + { + "epoch": 0.8241350906095551, + "grad_norm": 0.40221983194351196, + "learning_rate": 1.2752313944963475e-05, + "loss": 0.4501, + "step": 30015 + }, + { + "epoch": 0.8241625480505217, + "grad_norm": 0.3327177166938782, + "learning_rate": 1.2751898729570704e-05, + "loss": 0.4882, + "step": 30016 + }, + { + "epoch": 0.8241900054914882, + "grad_norm": 0.4446655511856079, + "learning_rate": 1.2751483509044768e-05, + "loss": 0.5171, + "step": 30017 + }, + { + "epoch": 0.8242174629324547, + "grad_norm": 0.4090450704097748, + "learning_rate": 1.2751068283386436e-05, + "loss": 0.5762, + "step": 30018 + }, + { + "epoch": 0.8242449203734212, + "grad_norm": 0.37045615911483765, + "learning_rate": 1.2750653052596482e-05, + "loss": 0.5186, + "step": 30019 + }, + { + "epoch": 0.8242723778143877, + "grad_norm": 0.4112650752067566, + "learning_rate": 1.2750237816675686e-05, + "loss": 0.501, + "step": 30020 + }, + { + "epoch": 0.8242998352553542, + "grad_norm": 0.43648308515548706, + "learning_rate": 1.2749822575624814e-05, + "loss": 0.5663, + "step": 30021 + }, + { + "epoch": 0.8243272926963207, + "grad_norm": 0.40236660838127136, + "learning_rate": 1.2749407329444647e-05, + "loss": 0.4694, + "step": 30022 + }, + { + "epoch": 0.8243547501372872, + "grad_norm": 0.4052143692970276, + "learning_rate": 1.274899207813596e-05, + "loss": 0.598, + "step": 30023 + }, + { + "epoch": 0.8243822075782538, + "grad_norm": 0.40498629212379456, + "learning_rate": 1.2748576821699521e-05, + "loss": 0.5215, + "step": 30024 + }, + { + "epoch": 0.8244096650192202, + "grad_norm": 0.3881228566169739, + "learning_rate": 1.2748161560136113e-05, + "loss": 0.4459, + "step": 30025 + }, + { + "epoch": 0.8244371224601867, + "grad_norm": 0.4377542734146118, + "learning_rate": 1.2747746293446503e-05, + "loss": 0.4805, + "step": 30026 + }, + { + "epoch": 0.8244645799011532, + "grad_norm": 0.39185309410095215, + "learning_rate": 1.2747331021631472e-05, + "loss": 0.4323, + "step": 30027 + }, + { + "epoch": 0.8244920373421197, + "grad_norm": 0.3949560523033142, + "learning_rate": 1.2746915744691793e-05, + "loss": 0.4833, + "step": 30028 + }, + { + "epoch": 0.8245194947830862, + "grad_norm": 0.3868871331214905, + "learning_rate": 1.2746500462628236e-05, + "loss": 0.4647, + "step": 30029 + }, + { + "epoch": 0.8245469522240527, + "grad_norm": 0.4288168251514435, + "learning_rate": 1.2746085175441581e-05, + "loss": 0.5402, + "step": 30030 + }, + { + "epoch": 0.8245744096650193, + "grad_norm": 0.3613189160823822, + "learning_rate": 1.2745669883132598e-05, + "loss": 0.4258, + "step": 30031 + }, + { + "epoch": 0.8246018671059857, + "grad_norm": 0.6592974066734314, + "learning_rate": 1.2745254585702067e-05, + "loss": 0.492, + "step": 30032 + }, + { + "epoch": 0.8246293245469523, + "grad_norm": 0.39612558484077454, + "learning_rate": 1.274483928315076e-05, + "loss": 0.5031, + "step": 30033 + }, + { + "epoch": 0.8246567819879187, + "grad_norm": 0.3817562758922577, + "learning_rate": 1.274442397547945e-05, + "loss": 0.4313, + "step": 30034 + }, + { + "epoch": 0.8246842394288852, + "grad_norm": 0.39822861552238464, + "learning_rate": 1.2744008662688915e-05, + "loss": 0.5055, + "step": 30035 + }, + { + "epoch": 0.8247116968698517, + "grad_norm": 0.37748175859451294, + "learning_rate": 1.274359334477993e-05, + "loss": 0.501, + "step": 30036 + }, + { + "epoch": 0.8247391543108182, + "grad_norm": 0.4502507150173187, + "learning_rate": 1.2743178021753265e-05, + "loss": 0.4685, + "step": 30037 + }, + { + "epoch": 0.8247666117517848, + "grad_norm": 0.42123207449913025, + "learning_rate": 1.2742762693609695e-05, + "loss": 0.4623, + "step": 30038 + }, + { + "epoch": 0.8247940691927512, + "grad_norm": 0.3928719162940979, + "learning_rate": 1.274234736035e-05, + "loss": 0.4374, + "step": 30039 + }, + { + "epoch": 0.8248215266337178, + "grad_norm": 0.48763883113861084, + "learning_rate": 1.2741932021974953e-05, + "loss": 0.4966, + "step": 30040 + }, + { + "epoch": 0.8248489840746842, + "grad_norm": 0.4133080244064331, + "learning_rate": 1.274151667848533e-05, + "loss": 0.4802, + "step": 30041 + }, + { + "epoch": 0.8248764415156508, + "grad_norm": 0.36849987506866455, + "learning_rate": 1.2741101329881898e-05, + "loss": 0.529, + "step": 30042 + }, + { + "epoch": 0.8249038989566172, + "grad_norm": 0.49688974022865295, + "learning_rate": 1.274068597616544e-05, + "loss": 0.5193, + "step": 30043 + }, + { + "epoch": 0.8249313563975837, + "grad_norm": 0.34656471014022827, + "learning_rate": 1.2740270617336725e-05, + "loss": 0.4804, + "step": 30044 + }, + { + "epoch": 0.8249588138385503, + "grad_norm": 0.34300270676612854, + "learning_rate": 1.2739855253396533e-05, + "loss": 0.4406, + "step": 30045 + }, + { + "epoch": 0.8249862712795167, + "grad_norm": 0.41893136501312256, + "learning_rate": 1.2739439884345637e-05, + "loss": 0.5029, + "step": 30046 + }, + { + "epoch": 0.8250137287204833, + "grad_norm": 0.39303460717201233, + "learning_rate": 1.273902451018481e-05, + "loss": 0.4781, + "step": 30047 + }, + { + "epoch": 0.8250411861614497, + "grad_norm": 0.36963775753974915, + "learning_rate": 1.2738609130914829e-05, + "loss": 0.4409, + "step": 30048 + }, + { + "epoch": 0.8250686436024163, + "grad_norm": 0.44479629397392273, + "learning_rate": 1.273819374653647e-05, + "loss": 0.4713, + "step": 30049 + }, + { + "epoch": 0.8250961010433827, + "grad_norm": 0.3783676326274872, + "learning_rate": 1.2737778357050501e-05, + "loss": 0.4813, + "step": 30050 + }, + { + "epoch": 0.8251235584843493, + "grad_norm": 0.5109935402870178, + "learning_rate": 1.2737362962457704e-05, + "loss": 0.4319, + "step": 30051 + }, + { + "epoch": 0.8251510159253158, + "grad_norm": 0.3941800594329834, + "learning_rate": 1.273694756275885e-05, + "loss": 0.561, + "step": 30052 + }, + { + "epoch": 0.8251784733662823, + "grad_norm": 0.3979749381542206, + "learning_rate": 1.2736532157954718e-05, + "loss": 0.461, + "step": 30053 + }, + { + "epoch": 0.8252059308072488, + "grad_norm": 0.40778252482414246, + "learning_rate": 1.2736116748046079e-05, + "loss": 0.5404, + "step": 30054 + }, + { + "epoch": 0.8252333882482152, + "grad_norm": 0.37501847743988037, + "learning_rate": 1.2735701333033707e-05, + "loss": 0.4959, + "step": 30055 + }, + { + "epoch": 0.8252608456891818, + "grad_norm": 0.4288727343082428, + "learning_rate": 1.2735285912918382e-05, + "loss": 0.4935, + "step": 30056 + }, + { + "epoch": 0.8252883031301482, + "grad_norm": 0.39976486563682556, + "learning_rate": 1.2734870487700875e-05, + "loss": 0.5143, + "step": 30057 + }, + { + "epoch": 0.8253157605711148, + "grad_norm": 0.359549880027771, + "learning_rate": 1.273445505738196e-05, + "loss": 0.4336, + "step": 30058 + }, + { + "epoch": 0.8253432180120813, + "grad_norm": 0.35370197892189026, + "learning_rate": 1.2734039621962416e-05, + "loss": 0.4658, + "step": 30059 + }, + { + "epoch": 0.8253706754530478, + "grad_norm": 0.4350409507751465, + "learning_rate": 1.2733624181443013e-05, + "loss": 0.5244, + "step": 30060 + }, + { + "epoch": 0.8253981328940143, + "grad_norm": 0.3538420498371124, + "learning_rate": 1.2733208735824528e-05, + "loss": 0.5015, + "step": 30061 + }, + { + "epoch": 0.8254255903349808, + "grad_norm": 0.4439171254634857, + "learning_rate": 1.2732793285107738e-05, + "loss": 0.5278, + "step": 30062 + }, + { + "epoch": 0.8254530477759473, + "grad_norm": 0.39641737937927246, + "learning_rate": 1.2732377829293417e-05, + "loss": 0.5612, + "step": 30063 + }, + { + "epoch": 0.8254805052169137, + "grad_norm": 0.4680884778499603, + "learning_rate": 1.273196236838234e-05, + "loss": 0.4915, + "step": 30064 + }, + { + "epoch": 0.8255079626578803, + "grad_norm": 0.33782559633255005, + "learning_rate": 1.2731546902375277e-05, + "loss": 0.4798, + "step": 30065 + }, + { + "epoch": 0.8255354200988468, + "grad_norm": 0.4061569273471832, + "learning_rate": 1.2731131431273012e-05, + "loss": 0.4155, + "step": 30066 + }, + { + "epoch": 0.8255628775398133, + "grad_norm": 0.38242581486701965, + "learning_rate": 1.2730715955076314e-05, + "loss": 0.4768, + "step": 30067 + }, + { + "epoch": 0.8255903349807798, + "grad_norm": 0.4381382167339325, + "learning_rate": 1.2730300473785957e-05, + "loss": 0.5623, + "step": 30068 + }, + { + "epoch": 0.8256177924217463, + "grad_norm": 0.4432758688926697, + "learning_rate": 1.2729884987402722e-05, + "loss": 0.4646, + "step": 30069 + }, + { + "epoch": 0.8256452498627128, + "grad_norm": 0.38696810603141785, + "learning_rate": 1.2729469495927376e-05, + "loss": 0.4252, + "step": 30070 + }, + { + "epoch": 0.8256727073036793, + "grad_norm": 0.3531753122806549, + "learning_rate": 1.27290539993607e-05, + "loss": 0.4265, + "step": 30071 + }, + { + "epoch": 0.8257001647446458, + "grad_norm": 0.38056692481040955, + "learning_rate": 1.2728638497703467e-05, + "loss": 0.4811, + "step": 30072 + }, + { + "epoch": 0.8257276221856124, + "grad_norm": 0.40290361642837524, + "learning_rate": 1.2728222990956452e-05, + "loss": 0.5614, + "step": 30073 + }, + { + "epoch": 0.8257550796265788, + "grad_norm": 0.38526788353919983, + "learning_rate": 1.272780747912043e-05, + "loss": 0.5256, + "step": 30074 + }, + { + "epoch": 0.8257825370675453, + "grad_norm": 0.46520668268203735, + "learning_rate": 1.2727391962196179e-05, + "loss": 0.3929, + "step": 30075 + }, + { + "epoch": 0.8258099945085118, + "grad_norm": 0.35699141025543213, + "learning_rate": 1.2726976440184469e-05, + "loss": 0.4496, + "step": 30076 + }, + { + "epoch": 0.8258374519494783, + "grad_norm": 0.3921355903148651, + "learning_rate": 1.2726560913086079e-05, + "loss": 0.5055, + "step": 30077 + }, + { + "epoch": 0.8258649093904448, + "grad_norm": 0.3734223246574402, + "learning_rate": 1.2726145380901782e-05, + "loss": 0.525, + "step": 30078 + }, + { + "epoch": 0.8258923668314113, + "grad_norm": 0.37923407554626465, + "learning_rate": 1.2725729843632356e-05, + "loss": 0.4201, + "step": 30079 + }, + { + "epoch": 0.8259198242723779, + "grad_norm": 0.37023797631263733, + "learning_rate": 1.272531430127857e-05, + "loss": 0.4102, + "step": 30080 + }, + { + "epoch": 0.8259472817133443, + "grad_norm": 0.3981805145740509, + "learning_rate": 1.2724898753841205e-05, + "loss": 0.413, + "step": 30081 + }, + { + "epoch": 0.8259747391543109, + "grad_norm": 0.36054694652557373, + "learning_rate": 1.2724483201321034e-05, + "loss": 0.449, + "step": 30082 + }, + { + "epoch": 0.8260021965952773, + "grad_norm": 0.3362351953983307, + "learning_rate": 1.2724067643718831e-05, + "loss": 0.5562, + "step": 30083 + }, + { + "epoch": 0.8260296540362438, + "grad_norm": 0.3543832302093506, + "learning_rate": 1.2723652081035376e-05, + "loss": 0.4922, + "step": 30084 + }, + { + "epoch": 0.8260571114772103, + "grad_norm": 0.3987925946712494, + "learning_rate": 1.2723236513271438e-05, + "loss": 0.5044, + "step": 30085 + }, + { + "epoch": 0.8260845689181768, + "grad_norm": 0.4136558771133423, + "learning_rate": 1.2722820940427794e-05, + "loss": 0.3967, + "step": 30086 + }, + { + "epoch": 0.8261120263591434, + "grad_norm": 0.3615911602973938, + "learning_rate": 1.2722405362505221e-05, + "loss": 0.4499, + "step": 30087 + }, + { + "epoch": 0.8261394838001098, + "grad_norm": 0.4165189862251282, + "learning_rate": 1.2721989779504493e-05, + "loss": 0.5151, + "step": 30088 + }, + { + "epoch": 0.8261669412410764, + "grad_norm": 0.3700328767299652, + "learning_rate": 1.2721574191426384e-05, + "loss": 0.3437, + "step": 30089 + }, + { + "epoch": 0.8261943986820428, + "grad_norm": 0.3384542167186737, + "learning_rate": 1.2721158598271674e-05, + "loss": 0.3734, + "step": 30090 + }, + { + "epoch": 0.8262218561230094, + "grad_norm": 0.41426801681518555, + "learning_rate": 1.2720743000041131e-05, + "loss": 0.555, + "step": 30091 + }, + { + "epoch": 0.8262493135639758, + "grad_norm": 0.4262765944004059, + "learning_rate": 1.2720327396735536e-05, + "loss": 0.5074, + "step": 30092 + }, + { + "epoch": 0.8262767710049423, + "grad_norm": 1.041501522064209, + "learning_rate": 1.2719911788355661e-05, + "loss": 0.4802, + "step": 30093 + }, + { + "epoch": 0.8263042284459089, + "grad_norm": 0.3926730453968048, + "learning_rate": 1.2719496174902283e-05, + "loss": 0.4979, + "step": 30094 + }, + { + "epoch": 0.8263316858868753, + "grad_norm": 0.3984961211681366, + "learning_rate": 1.271908055637618e-05, + "loss": 0.4541, + "step": 30095 + }, + { + "epoch": 0.8263591433278419, + "grad_norm": 0.3787977993488312, + "learning_rate": 1.2718664932778117e-05, + "loss": 0.567, + "step": 30096 + }, + { + "epoch": 0.8263866007688083, + "grad_norm": 0.3780995309352875, + "learning_rate": 1.2718249304108882e-05, + "loss": 0.5596, + "step": 30097 + }, + { + "epoch": 0.8264140582097749, + "grad_norm": 0.3820092976093292, + "learning_rate": 1.2717833670369245e-05, + "loss": 0.5011, + "step": 30098 + }, + { + "epoch": 0.8264415156507413, + "grad_norm": 0.34030672907829285, + "learning_rate": 1.2717418031559975e-05, + "loss": 0.4785, + "step": 30099 + }, + { + "epoch": 0.8264689730917079, + "grad_norm": 0.36741650104522705, + "learning_rate": 1.2717002387681858e-05, + "loss": 0.4345, + "step": 30100 + }, + { + "epoch": 0.8264964305326744, + "grad_norm": 0.35311800241470337, + "learning_rate": 1.2716586738735661e-05, + "loss": 0.4831, + "step": 30101 + }, + { + "epoch": 0.8265238879736408, + "grad_norm": 0.41958972811698914, + "learning_rate": 1.2716171084722166e-05, + "loss": 0.6139, + "step": 30102 + }, + { + "epoch": 0.8265513454146074, + "grad_norm": 0.387897253036499, + "learning_rate": 1.2715755425642145e-05, + "loss": 0.4668, + "step": 30103 + }, + { + "epoch": 0.8265788028555738, + "grad_norm": 0.4208543002605438, + "learning_rate": 1.2715339761496369e-05, + "loss": 0.5108, + "step": 30104 + }, + { + "epoch": 0.8266062602965404, + "grad_norm": 0.3848203122615814, + "learning_rate": 1.2714924092285622e-05, + "loss": 0.5457, + "step": 30105 + }, + { + "epoch": 0.8266337177375068, + "grad_norm": 0.36472487449645996, + "learning_rate": 1.2714508418010672e-05, + "loss": 0.4319, + "step": 30106 + }, + { + "epoch": 0.8266611751784734, + "grad_norm": 0.36384403705596924, + "learning_rate": 1.2714092738672299e-05, + "loss": 0.5047, + "step": 30107 + }, + { + "epoch": 0.8266886326194399, + "grad_norm": 0.3845245838165283, + "learning_rate": 1.2713677054271278e-05, + "loss": 0.54, + "step": 30108 + }, + { + "epoch": 0.8267160900604064, + "grad_norm": 0.3769253194332123, + "learning_rate": 1.271326136480838e-05, + "loss": 0.4752, + "step": 30109 + }, + { + "epoch": 0.8267435475013729, + "grad_norm": 0.38465461134910583, + "learning_rate": 1.2712845670284386e-05, + "loss": 0.4791, + "step": 30110 + }, + { + "epoch": 0.8267710049423393, + "grad_norm": 0.3746567666530609, + "learning_rate": 1.271242997070007e-05, + "loss": 0.4809, + "step": 30111 + }, + { + "epoch": 0.8267984623833059, + "grad_norm": 0.3916131556034088, + "learning_rate": 1.2712014266056203e-05, + "loss": 0.5733, + "step": 30112 + }, + { + "epoch": 0.8268259198242723, + "grad_norm": 0.3987334668636322, + "learning_rate": 1.2711598556353567e-05, + "loss": 0.481, + "step": 30113 + }, + { + "epoch": 0.8268533772652389, + "grad_norm": 0.430874228477478, + "learning_rate": 1.2711182841592931e-05, + "loss": 0.4309, + "step": 30114 + }, + { + "epoch": 0.8268808347062054, + "grad_norm": 0.6166215538978577, + "learning_rate": 1.2710767121775076e-05, + "loss": 0.5483, + "step": 30115 + }, + { + "epoch": 0.8269082921471719, + "grad_norm": 0.42193493247032166, + "learning_rate": 1.2710351396900778e-05, + "loss": 0.4804, + "step": 30116 + }, + { + "epoch": 0.8269357495881384, + "grad_norm": 0.3564535975456238, + "learning_rate": 1.2709935666970803e-05, + "loss": 0.4864, + "step": 30117 + }, + { + "epoch": 0.8269632070291049, + "grad_norm": 0.38248199224472046, + "learning_rate": 1.2709519931985938e-05, + "loss": 0.4735, + "step": 30118 + }, + { + "epoch": 0.8269906644700714, + "grad_norm": 0.3628825545310974, + "learning_rate": 1.270910419194695e-05, + "loss": 0.4974, + "step": 30119 + }, + { + "epoch": 0.8270181219110379, + "grad_norm": 0.4056534171104431, + "learning_rate": 1.2708688446854623e-05, + "loss": 0.408, + "step": 30120 + }, + { + "epoch": 0.8270455793520044, + "grad_norm": 0.38093435764312744, + "learning_rate": 1.2708272696709722e-05, + "loss": 0.5439, + "step": 30121 + }, + { + "epoch": 0.827073036792971, + "grad_norm": 0.3965618312358856, + "learning_rate": 1.2707856941513032e-05, + "loss": 0.5498, + "step": 30122 + }, + { + "epoch": 0.8271004942339374, + "grad_norm": 0.3851867616176605, + "learning_rate": 1.2707441181265324e-05, + "loss": 0.4833, + "step": 30123 + }, + { + "epoch": 0.8271279516749039, + "grad_norm": 0.36996588110923767, + "learning_rate": 1.2707025415967371e-05, + "loss": 0.4948, + "step": 30124 + }, + { + "epoch": 0.8271554091158704, + "grad_norm": 0.4340568482875824, + "learning_rate": 1.2706609645619957e-05, + "loss": 0.5249, + "step": 30125 + }, + { + "epoch": 0.8271828665568369, + "grad_norm": 0.3755122423171997, + "learning_rate": 1.2706193870223847e-05, + "loss": 0.4781, + "step": 30126 + }, + { + "epoch": 0.8272103239978034, + "grad_norm": 0.4060381352901459, + "learning_rate": 1.2705778089779823e-05, + "loss": 0.4389, + "step": 30127 + }, + { + "epoch": 0.8272377814387699, + "grad_norm": 0.3720158636569977, + "learning_rate": 1.2705362304288661e-05, + "loss": 0.4482, + "step": 30128 + }, + { + "epoch": 0.8272652388797364, + "grad_norm": 0.48068419098854065, + "learning_rate": 1.2704946513751132e-05, + "loss": 0.5242, + "step": 30129 + }, + { + "epoch": 0.8272926963207029, + "grad_norm": 0.5425692200660706, + "learning_rate": 1.2704530718168018e-05, + "loss": 0.4399, + "step": 30130 + }, + { + "epoch": 0.8273201537616695, + "grad_norm": 0.4231375455856323, + "learning_rate": 1.270411491754009e-05, + "loss": 0.5214, + "step": 30131 + }, + { + "epoch": 0.8273476112026359, + "grad_norm": 0.32926681637763977, + "learning_rate": 1.2703699111868125e-05, + "loss": 0.471, + "step": 30132 + }, + { + "epoch": 0.8273750686436024, + "grad_norm": 0.40884506702423096, + "learning_rate": 1.2703283301152896e-05, + "loss": 0.4973, + "step": 30133 + }, + { + "epoch": 0.8274025260845689, + "grad_norm": 0.42598846554756165, + "learning_rate": 1.2702867485395181e-05, + "loss": 0.5446, + "step": 30134 + }, + { + "epoch": 0.8274299835255354, + "grad_norm": 0.36551520228385925, + "learning_rate": 1.2702451664595755e-05, + "loss": 0.5037, + "step": 30135 + }, + { + "epoch": 0.8274574409665019, + "grad_norm": 0.3586095869541168, + "learning_rate": 1.2702035838755398e-05, + "loss": 0.468, + "step": 30136 + }, + { + "epoch": 0.8274848984074684, + "grad_norm": 0.43615463376045227, + "learning_rate": 1.2701620007874879e-05, + "loss": 0.587, + "step": 30137 + }, + { + "epoch": 0.827512355848435, + "grad_norm": 0.3970477283000946, + "learning_rate": 1.2701204171954975e-05, + "loss": 0.5834, + "step": 30138 + }, + { + "epoch": 0.8275398132894014, + "grad_norm": 0.32892900705337524, + "learning_rate": 1.2700788330996467e-05, + "loss": 0.4238, + "step": 30139 + }, + { + "epoch": 0.827567270730368, + "grad_norm": 0.43328094482421875, + "learning_rate": 1.2700372485000125e-05, + "loss": 0.528, + "step": 30140 + }, + { + "epoch": 0.8275947281713344, + "grad_norm": 0.3569276034832001, + "learning_rate": 1.2699956633966726e-05, + "loss": 0.4591, + "step": 30141 + }, + { + "epoch": 0.8276221856123009, + "grad_norm": 0.43493252992630005, + "learning_rate": 1.2699540777897046e-05, + "loss": 0.5895, + "step": 30142 + }, + { + "epoch": 0.8276496430532674, + "grad_norm": 0.41313982009887695, + "learning_rate": 1.2699124916791861e-05, + "loss": 0.5469, + "step": 30143 + }, + { + "epoch": 0.8276771004942339, + "grad_norm": 0.45179951190948486, + "learning_rate": 1.2698709050651947e-05, + "loss": 0.491, + "step": 30144 + }, + { + "epoch": 0.8277045579352005, + "grad_norm": 0.37474048137664795, + "learning_rate": 1.2698293179478078e-05, + "loss": 0.5026, + "step": 30145 + }, + { + "epoch": 0.8277320153761669, + "grad_norm": 0.37731871008872986, + "learning_rate": 1.2697877303271034e-05, + "loss": 0.4817, + "step": 30146 + }, + { + "epoch": 0.8277594728171335, + "grad_norm": 0.42035356163978577, + "learning_rate": 1.2697461422031586e-05, + "loss": 0.5638, + "step": 30147 + }, + { + "epoch": 0.8277869302580999, + "grad_norm": 0.3683796525001526, + "learning_rate": 1.2697045535760508e-05, + "loss": 0.4321, + "step": 30148 + }, + { + "epoch": 0.8278143876990665, + "grad_norm": 0.3960830271244049, + "learning_rate": 1.2696629644458585e-05, + "loss": 0.4508, + "step": 30149 + }, + { + "epoch": 0.8278418451400329, + "grad_norm": 0.3760804831981659, + "learning_rate": 1.2696213748126583e-05, + "loss": 0.4632, + "step": 30150 + }, + { + "epoch": 0.8278693025809994, + "grad_norm": 0.3876819908618927, + "learning_rate": 1.2695797846765285e-05, + "loss": 0.4306, + "step": 30151 + }, + { + "epoch": 0.827896760021966, + "grad_norm": 0.4273565411567688, + "learning_rate": 1.2695381940375461e-05, + "loss": 0.4712, + "step": 30152 + }, + { + "epoch": 0.8279242174629324, + "grad_norm": 0.3546307682991028, + "learning_rate": 1.2694966028957891e-05, + "loss": 0.4521, + "step": 30153 + }, + { + "epoch": 0.827951674903899, + "grad_norm": 0.35536137223243713, + "learning_rate": 1.2694550112513348e-05, + "loss": 0.3796, + "step": 30154 + }, + { + "epoch": 0.8279791323448654, + "grad_norm": 0.3789352774620056, + "learning_rate": 1.269413419104261e-05, + "loss": 0.5208, + "step": 30155 + }, + { + "epoch": 0.828006589785832, + "grad_norm": 0.36131414771080017, + "learning_rate": 1.2693718264546453e-05, + "loss": 0.4721, + "step": 30156 + }, + { + "epoch": 0.8280340472267984, + "grad_norm": 0.3911927044391632, + "learning_rate": 1.269330233302565e-05, + "loss": 0.4985, + "step": 30157 + }, + { + "epoch": 0.828061504667765, + "grad_norm": 0.36796438694000244, + "learning_rate": 1.269288639648098e-05, + "loss": 0.4736, + "step": 30158 + }, + { + "epoch": 0.8280889621087315, + "grad_norm": 0.3711654543876648, + "learning_rate": 1.2692470454913216e-05, + "loss": 0.4563, + "step": 30159 + }, + { + "epoch": 0.828116419549698, + "grad_norm": 0.4181745946407318, + "learning_rate": 1.2692054508323138e-05, + "loss": 0.5324, + "step": 30160 + }, + { + "epoch": 0.8281438769906645, + "grad_norm": 0.6017858386039734, + "learning_rate": 1.2691638556711515e-05, + "loss": 0.4316, + "step": 30161 + }, + { + "epoch": 0.8281713344316309, + "grad_norm": 0.42729452252388, + "learning_rate": 1.269122260007913e-05, + "loss": 0.5055, + "step": 30162 + }, + { + "epoch": 0.8281987918725975, + "grad_norm": 0.43209296464920044, + "learning_rate": 1.269080663842675e-05, + "loss": 0.5181, + "step": 30163 + }, + { + "epoch": 0.8282262493135639, + "grad_norm": 0.3777647912502289, + "learning_rate": 1.2690390671755163e-05, + "loss": 0.4628, + "step": 30164 + }, + { + "epoch": 0.8282537067545305, + "grad_norm": 0.42750871181488037, + "learning_rate": 1.2689974700065139e-05, + "loss": 0.5378, + "step": 30165 + }, + { + "epoch": 0.828281164195497, + "grad_norm": 0.44176581501960754, + "learning_rate": 1.2689558723357451e-05, + "loss": 0.5742, + "step": 30166 + }, + { + "epoch": 0.8283086216364635, + "grad_norm": 0.4650537967681885, + "learning_rate": 1.2689142741632879e-05, + "loss": 0.4509, + "step": 30167 + }, + { + "epoch": 0.82833607907743, + "grad_norm": 0.44560912251472473, + "learning_rate": 1.2688726754892195e-05, + "loss": 0.4371, + "step": 30168 + }, + { + "epoch": 0.8283635365183964, + "grad_norm": 0.4083231985569, + "learning_rate": 1.268831076313618e-05, + "loss": 0.554, + "step": 30169 + }, + { + "epoch": 0.828390993959363, + "grad_norm": 0.381827712059021, + "learning_rate": 1.268789476636561e-05, + "loss": 0.4781, + "step": 30170 + }, + { + "epoch": 0.8284184514003294, + "grad_norm": 0.40177249908447266, + "learning_rate": 1.268747876458125e-05, + "loss": 0.4553, + "step": 30171 + }, + { + "epoch": 0.828445908841296, + "grad_norm": 0.37988927960395813, + "learning_rate": 1.2687062757783893e-05, + "loss": 0.3936, + "step": 30172 + }, + { + "epoch": 0.8284733662822625, + "grad_norm": 0.3665355145931244, + "learning_rate": 1.2686646745974302e-05, + "loss": 0.4449, + "step": 30173 + }, + { + "epoch": 0.828500823723229, + "grad_norm": 0.4080806076526642, + "learning_rate": 1.2686230729153257e-05, + "loss": 0.4952, + "step": 30174 + }, + { + "epoch": 0.8285282811641955, + "grad_norm": 0.498286634683609, + "learning_rate": 1.2685814707321537e-05, + "loss": 0.4839, + "step": 30175 + }, + { + "epoch": 0.828555738605162, + "grad_norm": 0.3898909091949463, + "learning_rate": 1.2685398680479915e-05, + "loss": 0.5103, + "step": 30176 + }, + { + "epoch": 0.8285831960461285, + "grad_norm": 0.38970428705215454, + "learning_rate": 1.2684982648629164e-05, + "loss": 0.5584, + "step": 30177 + }, + { + "epoch": 0.828610653487095, + "grad_norm": 0.35226118564605713, + "learning_rate": 1.2684566611770069e-05, + "loss": 0.5034, + "step": 30178 + }, + { + "epoch": 0.8286381109280615, + "grad_norm": 0.4234652817249298, + "learning_rate": 1.2684150569903396e-05, + "loss": 0.5623, + "step": 30179 + }, + { + "epoch": 0.828665568369028, + "grad_norm": 0.40462908148765564, + "learning_rate": 1.2683734523029928e-05, + "loss": 0.4813, + "step": 30180 + }, + { + "epoch": 0.8286930258099945, + "grad_norm": 0.44137856364250183, + "learning_rate": 1.2683318471150434e-05, + "loss": 0.4929, + "step": 30181 + }, + { + "epoch": 0.828720483250961, + "grad_norm": 0.4045804738998413, + "learning_rate": 1.2682902414265699e-05, + "loss": 0.5085, + "step": 30182 + }, + { + "epoch": 0.8287479406919275, + "grad_norm": 0.6157320141792297, + "learning_rate": 1.2682486352376495e-05, + "loss": 0.5273, + "step": 30183 + }, + { + "epoch": 0.828775398132894, + "grad_norm": 0.37834271788597107, + "learning_rate": 1.2682070285483595e-05, + "loss": 0.4627, + "step": 30184 + }, + { + "epoch": 0.8288028555738605, + "grad_norm": 0.3549070358276367, + "learning_rate": 1.2681654213587779e-05, + "loss": 0.472, + "step": 30185 + }, + { + "epoch": 0.828830313014827, + "grad_norm": 0.4049048125743866, + "learning_rate": 1.2681238136689822e-05, + "loss": 0.4856, + "step": 30186 + }, + { + "epoch": 0.8288577704557936, + "grad_norm": 0.40977662801742554, + "learning_rate": 1.26808220547905e-05, + "loss": 0.5938, + "step": 30187 + }, + { + "epoch": 0.82888522789676, + "grad_norm": 0.40600845217704773, + "learning_rate": 1.2680405967890592e-05, + "loss": 0.4962, + "step": 30188 + }, + { + "epoch": 0.8289126853377266, + "grad_norm": 0.3837948143482208, + "learning_rate": 1.2679989875990868e-05, + "loss": 0.4846, + "step": 30189 + }, + { + "epoch": 0.828940142778693, + "grad_norm": 0.3800135552883148, + "learning_rate": 1.2679573779092108e-05, + "loss": 0.4973, + "step": 30190 + }, + { + "epoch": 0.8289676002196595, + "grad_norm": 0.42047667503356934, + "learning_rate": 1.267915767719509e-05, + "loss": 0.4621, + "step": 30191 + }, + { + "epoch": 0.828995057660626, + "grad_norm": 0.3662310242652893, + "learning_rate": 1.2678741570300585e-05, + "loss": 0.525, + "step": 30192 + }, + { + "epoch": 0.8290225151015925, + "grad_norm": 0.41898858547210693, + "learning_rate": 1.2678325458409373e-05, + "loss": 0.4083, + "step": 30193 + }, + { + "epoch": 0.8290499725425591, + "grad_norm": 0.39827239513397217, + "learning_rate": 1.267790934152223e-05, + "loss": 0.4863, + "step": 30194 + }, + { + "epoch": 0.8290774299835255, + "grad_norm": 0.41429856419563293, + "learning_rate": 1.2677493219639932e-05, + "loss": 0.4983, + "step": 30195 + }, + { + "epoch": 0.8291048874244921, + "grad_norm": 0.3416401743888855, + "learning_rate": 1.2677077092763252e-05, + "loss": 0.4421, + "step": 30196 + }, + { + "epoch": 0.8291323448654585, + "grad_norm": 0.37801456451416016, + "learning_rate": 1.267666096089297e-05, + "loss": 0.4889, + "step": 30197 + }, + { + "epoch": 0.829159802306425, + "grad_norm": 0.39317193627357483, + "learning_rate": 1.2676244824029861e-05, + "loss": 0.5533, + "step": 30198 + }, + { + "epoch": 0.8291872597473915, + "grad_norm": 0.4269481599330902, + "learning_rate": 1.2675828682174701e-05, + "loss": 0.481, + "step": 30199 + }, + { + "epoch": 0.829214717188358, + "grad_norm": 0.37056225538253784, + "learning_rate": 1.2675412535328269e-05, + "loss": 0.4423, + "step": 30200 + }, + { + "epoch": 0.8292421746293246, + "grad_norm": 0.3704770505428314, + "learning_rate": 1.2674996383491337e-05, + "loss": 0.4946, + "step": 30201 + }, + { + "epoch": 0.829269632070291, + "grad_norm": 0.44553518295288086, + "learning_rate": 1.2674580226664681e-05, + "loss": 0.557, + "step": 30202 + }, + { + "epoch": 0.8292970895112576, + "grad_norm": 0.464017391204834, + "learning_rate": 1.2674164064849081e-05, + "loss": 0.5308, + "step": 30203 + }, + { + "epoch": 0.829324546952224, + "grad_norm": 0.3846050500869751, + "learning_rate": 1.2673747898045312e-05, + "loss": 0.5026, + "step": 30204 + }, + { + "epoch": 0.8293520043931906, + "grad_norm": 0.4030638337135315, + "learning_rate": 1.2673331726254148e-05, + "loss": 0.4383, + "step": 30205 + }, + { + "epoch": 0.829379461834157, + "grad_norm": 0.3786577880382538, + "learning_rate": 1.267291554947637e-05, + "loss": 0.4383, + "step": 30206 + }, + { + "epoch": 0.8294069192751236, + "grad_norm": 0.36833974719047546, + "learning_rate": 1.2672499367712747e-05, + "loss": 0.4874, + "step": 30207 + }, + { + "epoch": 0.8294343767160901, + "grad_norm": 0.4100600779056549, + "learning_rate": 1.2672083180964063e-05, + "loss": 0.4595, + "step": 30208 + }, + { + "epoch": 0.8294618341570565, + "grad_norm": 0.4139527380466461, + "learning_rate": 1.2671666989231093e-05, + "loss": 0.5364, + "step": 30209 + }, + { + "epoch": 0.8294892915980231, + "grad_norm": 0.4129088819026947, + "learning_rate": 1.2671250792514607e-05, + "loss": 0.5416, + "step": 30210 + }, + { + "epoch": 0.8295167490389895, + "grad_norm": 0.38217490911483765, + "learning_rate": 1.2670834590815388e-05, + "loss": 0.5002, + "step": 30211 + }, + { + "epoch": 0.8295442064799561, + "grad_norm": 0.4132919907569885, + "learning_rate": 1.267041838413421e-05, + "loss": 0.5468, + "step": 30212 + }, + { + "epoch": 0.8295716639209225, + "grad_norm": 0.40673577785491943, + "learning_rate": 1.2670002172471847e-05, + "loss": 0.458, + "step": 30213 + }, + { + "epoch": 0.8295991213618891, + "grad_norm": 0.3551134765148163, + "learning_rate": 1.2669585955829082e-05, + "loss": 0.4521, + "step": 30214 + }, + { + "epoch": 0.8296265788028556, + "grad_norm": 0.30851200222969055, + "learning_rate": 1.2669169734206684e-05, + "loss": 0.3382, + "step": 30215 + }, + { + "epoch": 0.8296540362438221, + "grad_norm": 0.477618932723999, + "learning_rate": 1.2668753507605433e-05, + "loss": 0.5923, + "step": 30216 + }, + { + "epoch": 0.8296814936847886, + "grad_norm": 0.3746917247772217, + "learning_rate": 1.2668337276026104e-05, + "loss": 0.463, + "step": 30217 + }, + { + "epoch": 0.829708951125755, + "grad_norm": 0.4033198058605194, + "learning_rate": 1.2667921039469477e-05, + "loss": 0.5204, + "step": 30218 + }, + { + "epoch": 0.8297364085667216, + "grad_norm": 0.331371009349823, + "learning_rate": 1.2667504797936325e-05, + "loss": 0.4451, + "step": 30219 + }, + { + "epoch": 0.829763866007688, + "grad_norm": 0.3861638009548187, + "learning_rate": 1.2667088551427424e-05, + "loss": 0.4146, + "step": 30220 + }, + { + "epoch": 0.8297913234486546, + "grad_norm": 0.41285786032676697, + "learning_rate": 1.2666672299943552e-05, + "loss": 0.4753, + "step": 30221 + }, + { + "epoch": 0.8298187808896211, + "grad_norm": 0.3504190146923065, + "learning_rate": 1.2666256043485488e-05, + "loss": 0.4921, + "step": 30222 + }, + { + "epoch": 0.8298462383305876, + "grad_norm": 0.41357100009918213, + "learning_rate": 1.2665839782054e-05, + "loss": 0.5287, + "step": 30223 + }, + { + "epoch": 0.8298736957715541, + "grad_norm": 0.38297852873802185, + "learning_rate": 1.2665423515649876e-05, + "loss": 0.4593, + "step": 30224 + }, + { + "epoch": 0.8299011532125206, + "grad_norm": 0.39998334646224976, + "learning_rate": 1.266500724427388e-05, + "loss": 0.4833, + "step": 30225 + }, + { + "epoch": 0.8299286106534871, + "grad_norm": 0.4182138442993164, + "learning_rate": 1.26645909679268e-05, + "loss": 0.4987, + "step": 30226 + }, + { + "epoch": 0.8299560680944535, + "grad_norm": 0.4057847559452057, + "learning_rate": 1.2664174686609405e-05, + "loss": 0.567, + "step": 30227 + }, + { + "epoch": 0.8299835255354201, + "grad_norm": 0.33791977167129517, + "learning_rate": 1.2663758400322474e-05, + "loss": 0.5015, + "step": 30228 + }, + { + "epoch": 0.8300109829763866, + "grad_norm": 0.34357163310050964, + "learning_rate": 1.2663342109066786e-05, + "loss": 0.4089, + "step": 30229 + }, + { + "epoch": 0.8300384404173531, + "grad_norm": 0.44750556349754333, + "learning_rate": 1.266292581284311e-05, + "loss": 0.3998, + "step": 30230 + }, + { + "epoch": 0.8300658978583196, + "grad_norm": 0.3905784487724304, + "learning_rate": 1.2662509511652233e-05, + "loss": 0.4672, + "step": 30231 + }, + { + "epoch": 0.8300933552992861, + "grad_norm": 0.39093077182769775, + "learning_rate": 1.2662093205494922e-05, + "loss": 0.463, + "step": 30232 + }, + { + "epoch": 0.8301208127402526, + "grad_norm": 0.35563895106315613, + "learning_rate": 1.2661676894371959e-05, + "loss": 0.4995, + "step": 30233 + }, + { + "epoch": 0.8301482701812191, + "grad_norm": 0.3526252508163452, + "learning_rate": 1.2661260578284119e-05, + "loss": 0.5078, + "step": 30234 + }, + { + "epoch": 0.8301757276221856, + "grad_norm": 0.3769455552101135, + "learning_rate": 1.266084425723218e-05, + "loss": 0.529, + "step": 30235 + }, + { + "epoch": 0.8302031850631522, + "grad_norm": 0.4023447632789612, + "learning_rate": 1.2660427931216915e-05, + "loss": 0.5426, + "step": 30236 + }, + { + "epoch": 0.8302306425041186, + "grad_norm": 0.35571062564849854, + "learning_rate": 1.2660011600239104e-05, + "loss": 0.4025, + "step": 30237 + }, + { + "epoch": 0.8302580999450851, + "grad_norm": 0.3777214288711548, + "learning_rate": 1.265959526429952e-05, + "loss": 0.4033, + "step": 30238 + }, + { + "epoch": 0.8302855573860516, + "grad_norm": 0.38111695647239685, + "learning_rate": 1.2659178923398945e-05, + "loss": 0.4702, + "step": 30239 + }, + { + "epoch": 0.8303130148270181, + "grad_norm": 0.3746671974658966, + "learning_rate": 1.2658762577538152e-05, + "loss": 0.5409, + "step": 30240 + }, + { + "epoch": 0.8303404722679846, + "grad_norm": 0.4144847095012665, + "learning_rate": 1.2658346226717917e-05, + "loss": 0.5662, + "step": 30241 + }, + { + "epoch": 0.8303679297089511, + "grad_norm": 0.4501059949398041, + "learning_rate": 1.2657929870939018e-05, + "loss": 0.5283, + "step": 30242 + }, + { + "epoch": 0.8303953871499177, + "grad_norm": 0.39457330107688904, + "learning_rate": 1.2657513510202233e-05, + "loss": 0.4719, + "step": 30243 + }, + { + "epoch": 0.8304228445908841, + "grad_norm": 0.403413325548172, + "learning_rate": 1.2657097144508337e-05, + "loss": 0.4586, + "step": 30244 + }, + { + "epoch": 0.8304503020318507, + "grad_norm": 0.40660321712493896, + "learning_rate": 1.2656680773858105e-05, + "loss": 0.5519, + "step": 30245 + }, + { + "epoch": 0.8304777594728171, + "grad_norm": 0.40034496784210205, + "learning_rate": 1.2656264398252316e-05, + "loss": 0.5132, + "step": 30246 + }, + { + "epoch": 0.8305052169137837, + "grad_norm": 0.399395614862442, + "learning_rate": 1.2655848017691748e-05, + "loss": 0.5312, + "step": 30247 + }, + { + "epoch": 0.8305326743547501, + "grad_norm": 0.3459046185016632, + "learning_rate": 1.2655431632177173e-05, + "loss": 0.4367, + "step": 30248 + }, + { + "epoch": 0.8305601317957166, + "grad_norm": 0.39666837453842163, + "learning_rate": 1.2655015241709372e-05, + "loss": 0.4597, + "step": 30249 + }, + { + "epoch": 0.8305875892366832, + "grad_norm": 0.34702301025390625, + "learning_rate": 1.265459884628912e-05, + "loss": 0.5302, + "step": 30250 + }, + { + "epoch": 0.8306150466776496, + "grad_norm": 0.3708016276359558, + "learning_rate": 1.2654182445917194e-05, + "loss": 0.4881, + "step": 30251 + }, + { + "epoch": 0.8306425041186162, + "grad_norm": 0.32588738203048706, + "learning_rate": 1.265376604059437e-05, + "loss": 0.4382, + "step": 30252 + }, + { + "epoch": 0.8306699615595826, + "grad_norm": 0.38643187284469604, + "learning_rate": 1.2653349630321428e-05, + "loss": 0.4117, + "step": 30253 + }, + { + "epoch": 0.8306974190005492, + "grad_norm": 0.35875391960144043, + "learning_rate": 1.2652933215099139e-05, + "loss": 0.5228, + "step": 30254 + }, + { + "epoch": 0.8307248764415156, + "grad_norm": 0.4428936243057251, + "learning_rate": 1.2652516794928285e-05, + "loss": 0.5322, + "step": 30255 + }, + { + "epoch": 0.8307523338824822, + "grad_norm": 0.3720729351043701, + "learning_rate": 1.2652100369809637e-05, + "loss": 0.4654, + "step": 30256 + }, + { + "epoch": 0.8307797913234487, + "grad_norm": 0.48424971103668213, + "learning_rate": 1.265168393974398e-05, + "loss": 0.403, + "step": 30257 + }, + { + "epoch": 0.8308072487644151, + "grad_norm": 0.3608262240886688, + "learning_rate": 1.2651267504732083e-05, + "loss": 0.5143, + "step": 30258 + }, + { + "epoch": 0.8308347062053817, + "grad_norm": 0.40514519810676575, + "learning_rate": 1.2650851064774727e-05, + "loss": 0.5554, + "step": 30259 + }, + { + "epoch": 0.8308621636463481, + "grad_norm": 0.3753630816936493, + "learning_rate": 1.2650434619872687e-05, + "loss": 0.5264, + "step": 30260 + }, + { + "epoch": 0.8308896210873147, + "grad_norm": 0.35506489872932434, + "learning_rate": 1.265001817002674e-05, + "loss": 0.4585, + "step": 30261 + }, + { + "epoch": 0.8309170785282811, + "grad_norm": 0.4116273522377014, + "learning_rate": 1.2649601715237664e-05, + "loss": 0.4688, + "step": 30262 + }, + { + "epoch": 0.8309445359692477, + "grad_norm": 0.3632226586341858, + "learning_rate": 1.2649185255506238e-05, + "loss": 0.5235, + "step": 30263 + }, + { + "epoch": 0.8309719934102142, + "grad_norm": 0.36816951632499695, + "learning_rate": 1.2648768790833232e-05, + "loss": 0.4846, + "step": 30264 + }, + { + "epoch": 0.8309994508511807, + "grad_norm": 0.383434534072876, + "learning_rate": 1.2648352321219428e-05, + "loss": 0.4273, + "step": 30265 + }, + { + "epoch": 0.8310269082921472, + "grad_norm": 0.3781331181526184, + "learning_rate": 1.2647935846665601e-05, + "loss": 0.5159, + "step": 30266 + }, + { + "epoch": 0.8310543657331136, + "grad_norm": 0.4319455623626709, + "learning_rate": 1.2647519367172532e-05, + "loss": 0.5483, + "step": 30267 + }, + { + "epoch": 0.8310818231740802, + "grad_norm": 0.7177205085754395, + "learning_rate": 1.2647102882740992e-05, + "loss": 0.5764, + "step": 30268 + }, + { + "epoch": 0.8311092806150466, + "grad_norm": 0.4133775532245636, + "learning_rate": 1.2646686393371759e-05, + "loss": 0.4455, + "step": 30269 + }, + { + "epoch": 0.8311367380560132, + "grad_norm": 0.3865716755390167, + "learning_rate": 1.2646269899065613e-05, + "loss": 0.4133, + "step": 30270 + }, + { + "epoch": 0.8311641954969797, + "grad_norm": 0.39414212107658386, + "learning_rate": 1.2645853399823329e-05, + "loss": 0.458, + "step": 30271 + }, + { + "epoch": 0.8311916529379462, + "grad_norm": 0.38279280066490173, + "learning_rate": 1.2645436895645682e-05, + "loss": 0.4867, + "step": 30272 + }, + { + "epoch": 0.8312191103789127, + "grad_norm": 0.4270465075969696, + "learning_rate": 1.2645020386533453e-05, + "loss": 0.5423, + "step": 30273 + }, + { + "epoch": 0.8312465678198792, + "grad_norm": 0.39220815896987915, + "learning_rate": 1.2644603872487415e-05, + "loss": 0.5524, + "step": 30274 + }, + { + "epoch": 0.8312740252608457, + "grad_norm": 0.45736730098724365, + "learning_rate": 1.2644187353508348e-05, + "loss": 0.5405, + "step": 30275 + }, + { + "epoch": 0.8313014827018121, + "grad_norm": 0.37588754296302795, + "learning_rate": 1.2643770829597029e-05, + "loss": 0.5701, + "step": 30276 + }, + { + "epoch": 0.8313289401427787, + "grad_norm": 0.4082329571247101, + "learning_rate": 1.264335430075423e-05, + "loss": 0.5961, + "step": 30277 + }, + { + "epoch": 0.8313563975837452, + "grad_norm": 0.38419216871261597, + "learning_rate": 1.2642937766980734e-05, + "loss": 0.4165, + "step": 30278 + }, + { + "epoch": 0.8313838550247117, + "grad_norm": 0.39413756132125854, + "learning_rate": 1.2642521228277314e-05, + "loss": 0.5244, + "step": 30279 + }, + { + "epoch": 0.8314113124656782, + "grad_norm": 0.37085646390914917, + "learning_rate": 1.2642104684644752e-05, + "loss": 0.4376, + "step": 30280 + }, + { + "epoch": 0.8314387699066447, + "grad_norm": 0.40708106756210327, + "learning_rate": 1.2641688136083819e-05, + "loss": 0.4942, + "step": 30281 + }, + { + "epoch": 0.8314662273476112, + "grad_norm": 0.38414785265922546, + "learning_rate": 1.2641271582595293e-05, + "loss": 0.4481, + "step": 30282 + }, + { + "epoch": 0.8314936847885777, + "grad_norm": 0.443010151386261, + "learning_rate": 1.2640855024179955e-05, + "loss": 0.5174, + "step": 30283 + }, + { + "epoch": 0.8315211422295442, + "grad_norm": 0.5067664384841919, + "learning_rate": 1.2640438460838577e-05, + "loss": 0.5755, + "step": 30284 + }, + { + "epoch": 0.8315485996705108, + "grad_norm": 0.38709741830825806, + "learning_rate": 1.264002189257194e-05, + "loss": 0.462, + "step": 30285 + }, + { + "epoch": 0.8315760571114772, + "grad_norm": 0.3594662845134735, + "learning_rate": 1.263960531938082e-05, + "loss": 0.4935, + "step": 30286 + }, + { + "epoch": 0.8316035145524437, + "grad_norm": 0.3746187090873718, + "learning_rate": 1.2639188741265993e-05, + "loss": 0.4587, + "step": 30287 + }, + { + "epoch": 0.8316309719934102, + "grad_norm": 0.3877832889556885, + "learning_rate": 1.2638772158228238e-05, + "loss": 0.5067, + "step": 30288 + }, + { + "epoch": 0.8316584294343767, + "grad_norm": 0.4159867763519287, + "learning_rate": 1.2638355570268331e-05, + "loss": 0.551, + "step": 30289 + }, + { + "epoch": 0.8316858868753432, + "grad_norm": 0.3844701945781708, + "learning_rate": 1.2637938977387046e-05, + "loss": 0.6179, + "step": 30290 + }, + { + "epoch": 0.8317133443163097, + "grad_norm": 0.41171231865882874, + "learning_rate": 1.2637522379585165e-05, + "loss": 0.4728, + "step": 30291 + }, + { + "epoch": 0.8317408017572763, + "grad_norm": 0.44197988510131836, + "learning_rate": 1.263710577686346e-05, + "loss": 0.47, + "step": 30292 + }, + { + "epoch": 0.8317682591982427, + "grad_norm": 0.41233834624290466, + "learning_rate": 1.2636689169222716e-05, + "loss": 0.5605, + "step": 30293 + }, + { + "epoch": 0.8317957166392093, + "grad_norm": 0.3313000500202179, + "learning_rate": 1.2636272556663702e-05, + "loss": 0.4556, + "step": 30294 + }, + { + "epoch": 0.8318231740801757, + "grad_norm": 0.41675838828086853, + "learning_rate": 1.2635855939187198e-05, + "loss": 0.5221, + "step": 30295 + }, + { + "epoch": 0.8318506315211422, + "grad_norm": 0.43815916776657104, + "learning_rate": 1.2635439316793983e-05, + "loss": 0.5229, + "step": 30296 + }, + { + "epoch": 0.8318780889621087, + "grad_norm": 0.39627939462661743, + "learning_rate": 1.2635022689484831e-05, + "loss": 0.4406, + "step": 30297 + }, + { + "epoch": 0.8319055464030752, + "grad_norm": 0.3404425382614136, + "learning_rate": 1.2634606057260524e-05, + "loss": 0.5073, + "step": 30298 + }, + { + "epoch": 0.8319330038440418, + "grad_norm": 0.4478636384010315, + "learning_rate": 1.2634189420121832e-05, + "loss": 0.6056, + "step": 30299 + }, + { + "epoch": 0.8319604612850082, + "grad_norm": 0.3727758228778839, + "learning_rate": 1.2633772778069538e-05, + "loss": 0.484, + "step": 30300 + }, + { + "epoch": 0.8319879187259748, + "grad_norm": 0.34177467226982117, + "learning_rate": 1.2633356131104415e-05, + "loss": 0.4395, + "step": 30301 + }, + { + "epoch": 0.8320153761669412, + "grad_norm": 0.6157618165016174, + "learning_rate": 1.2632939479227244e-05, + "loss": 0.5019, + "step": 30302 + }, + { + "epoch": 0.8320428336079078, + "grad_norm": 0.39014461636543274, + "learning_rate": 1.26325228224388e-05, + "loss": 0.4995, + "step": 30303 + }, + { + "epoch": 0.8320702910488742, + "grad_norm": 0.36032912135124207, + "learning_rate": 1.263210616073986e-05, + "loss": 0.3954, + "step": 30304 + }, + { + "epoch": 0.8320977484898407, + "grad_norm": 0.33769431710243225, + "learning_rate": 1.2631689494131204e-05, + "loss": 0.4405, + "step": 30305 + }, + { + "epoch": 0.8321252059308073, + "grad_norm": 0.4161273241043091, + "learning_rate": 1.2631272822613608e-05, + "loss": 0.4845, + "step": 30306 + }, + { + "epoch": 0.8321526633717737, + "grad_norm": 0.41217732429504395, + "learning_rate": 1.2630856146187844e-05, + "loss": 0.504, + "step": 30307 + }, + { + "epoch": 0.8321801208127403, + "grad_norm": 0.3860306739807129, + "learning_rate": 1.2630439464854697e-05, + "loss": 0.5634, + "step": 30308 + }, + { + "epoch": 0.8322075782537067, + "grad_norm": 0.36186227202415466, + "learning_rate": 1.2630022778614941e-05, + "loss": 0.5264, + "step": 30309 + }, + { + "epoch": 0.8322350356946733, + "grad_norm": 0.43043023347854614, + "learning_rate": 1.262960608746935e-05, + "loss": 0.4693, + "step": 30310 + }, + { + "epoch": 0.8322624931356397, + "grad_norm": 0.3838934600353241, + "learning_rate": 1.2629189391418708e-05, + "loss": 0.4588, + "step": 30311 + }, + { + "epoch": 0.8322899505766063, + "grad_norm": 0.4120745062828064, + "learning_rate": 1.2628772690463786e-05, + "loss": 0.5415, + "step": 30312 + }, + { + "epoch": 0.8323174080175728, + "grad_norm": 0.4422426223754883, + "learning_rate": 1.2628355984605366e-05, + "loss": 0.4607, + "step": 30313 + }, + { + "epoch": 0.8323448654585393, + "grad_norm": 0.4377015233039856, + "learning_rate": 1.2627939273844224e-05, + "loss": 0.5208, + "step": 30314 + }, + { + "epoch": 0.8323723228995058, + "grad_norm": 0.40354886651039124, + "learning_rate": 1.2627522558181133e-05, + "loss": 0.5455, + "step": 30315 + }, + { + "epoch": 0.8323997803404722, + "grad_norm": 0.41170769929885864, + "learning_rate": 1.2627105837616875e-05, + "loss": 0.4757, + "step": 30316 + }, + { + "epoch": 0.8324272377814388, + "grad_norm": 0.35126185417175293, + "learning_rate": 1.2626689112152226e-05, + "loss": 0.3886, + "step": 30317 + }, + { + "epoch": 0.8324546952224052, + "grad_norm": 0.42665454745292664, + "learning_rate": 1.2626272381787964e-05, + "loss": 0.4802, + "step": 30318 + }, + { + "epoch": 0.8324821526633718, + "grad_norm": 0.46385684609413147, + "learning_rate": 1.2625855646524867e-05, + "loss": 0.5209, + "step": 30319 + }, + { + "epoch": 0.8325096101043383, + "grad_norm": 0.3672662675380707, + "learning_rate": 1.2625438906363709e-05, + "loss": 0.4807, + "step": 30320 + }, + { + "epoch": 0.8325370675453048, + "grad_norm": 2.130838394165039, + "learning_rate": 1.2625022161305274e-05, + "loss": 0.3987, + "step": 30321 + }, + { + "epoch": 0.8325645249862713, + "grad_norm": 0.43482381105422974, + "learning_rate": 1.2624605411350327e-05, + "loss": 0.5755, + "step": 30322 + }, + { + "epoch": 0.8325919824272378, + "grad_norm": 0.39659082889556885, + "learning_rate": 1.2624188656499661e-05, + "loss": 0.5178, + "step": 30323 + }, + { + "epoch": 0.8326194398682043, + "grad_norm": 0.4611845016479492, + "learning_rate": 1.262377189675404e-05, + "loss": 0.4999, + "step": 30324 + }, + { + "epoch": 0.8326468973091707, + "grad_norm": 0.3986891806125641, + "learning_rate": 1.2623355132114252e-05, + "loss": 0.4726, + "step": 30325 + }, + { + "epoch": 0.8326743547501373, + "grad_norm": 0.38107144832611084, + "learning_rate": 1.2622938362581065e-05, + "loss": 0.475, + "step": 30326 + }, + { + "epoch": 0.8327018121911038, + "grad_norm": 0.34282296895980835, + "learning_rate": 1.2622521588155263e-05, + "loss": 0.4364, + "step": 30327 + }, + { + "epoch": 0.8327292696320703, + "grad_norm": 0.36565133929252625, + "learning_rate": 1.2622104808837621e-05, + "loss": 0.4967, + "step": 30328 + }, + { + "epoch": 0.8327567270730368, + "grad_norm": 0.370693564414978, + "learning_rate": 1.2621688024628915e-05, + "loss": 0.552, + "step": 30329 + }, + { + "epoch": 0.8327841845140033, + "grad_norm": 0.40343335270881653, + "learning_rate": 1.2621271235529928e-05, + "loss": 0.5319, + "step": 30330 + }, + { + "epoch": 0.8328116419549698, + "grad_norm": 0.44981154799461365, + "learning_rate": 1.2620854441541428e-05, + "loss": 0.5122, + "step": 30331 + }, + { + "epoch": 0.8328390993959363, + "grad_norm": 0.40278851985931396, + "learning_rate": 1.2620437642664202e-05, + "loss": 0.5524, + "step": 30332 + }, + { + "epoch": 0.8328665568369028, + "grad_norm": 0.38788270950317383, + "learning_rate": 1.2620020838899025e-05, + "loss": 0.4907, + "step": 30333 + }, + { + "epoch": 0.8328940142778694, + "grad_norm": 0.4460882544517517, + "learning_rate": 1.2619604030246669e-05, + "loss": 0.5181, + "step": 30334 + }, + { + "epoch": 0.8329214717188358, + "grad_norm": 0.3744889497756958, + "learning_rate": 1.2619187216707918e-05, + "loss": 0.5074, + "step": 30335 + }, + { + "epoch": 0.8329489291598023, + "grad_norm": 0.3618219196796417, + "learning_rate": 1.2618770398283545e-05, + "loss": 0.4948, + "step": 30336 + }, + { + "epoch": 0.8329763866007688, + "grad_norm": 0.38342711329460144, + "learning_rate": 1.2618353574974329e-05, + "loss": 0.5418, + "step": 30337 + }, + { + "epoch": 0.8330038440417353, + "grad_norm": 0.3774867355823517, + "learning_rate": 1.261793674678105e-05, + "loss": 0.5316, + "step": 30338 + }, + { + "epoch": 0.8330313014827018, + "grad_norm": 0.3637533187866211, + "learning_rate": 1.2617519913704482e-05, + "loss": 0.4931, + "step": 30339 + }, + { + "epoch": 0.8330587589236683, + "grad_norm": 0.5132043957710266, + "learning_rate": 1.2617103075745404e-05, + "loss": 0.5872, + "step": 30340 + }, + { + "epoch": 0.8330862163646349, + "grad_norm": 0.40197402238845825, + "learning_rate": 1.2616686232904593e-05, + "loss": 0.5143, + "step": 30341 + }, + { + "epoch": 0.8331136738056013, + "grad_norm": 0.37361249327659607, + "learning_rate": 1.2616269385182829e-05, + "loss": 0.6072, + "step": 30342 + }, + { + "epoch": 0.8331411312465679, + "grad_norm": 0.3954754173755646, + "learning_rate": 1.2615852532580888e-05, + "loss": 0.4517, + "step": 30343 + }, + { + "epoch": 0.8331685886875343, + "grad_norm": 0.6539633274078369, + "learning_rate": 1.2615435675099544e-05, + "loss": 0.4099, + "step": 30344 + }, + { + "epoch": 0.8331960461285008, + "grad_norm": 0.4081096947193146, + "learning_rate": 1.261501881273958e-05, + "loss": 0.4552, + "step": 30345 + }, + { + "epoch": 0.8332235035694673, + "grad_norm": 0.3596382737159729, + "learning_rate": 1.261460194550177e-05, + "loss": 0.4637, + "step": 30346 + }, + { + "epoch": 0.8332509610104338, + "grad_norm": 0.3939182162284851, + "learning_rate": 1.2614185073386893e-05, + "loss": 0.5254, + "step": 30347 + }, + { + "epoch": 0.8332784184514004, + "grad_norm": 0.3531131446361542, + "learning_rate": 1.2613768196395728e-05, + "loss": 0.4916, + "step": 30348 + }, + { + "epoch": 0.8333058758923668, + "grad_norm": 0.37459734082221985, + "learning_rate": 1.261335131452905e-05, + "loss": 0.4615, + "step": 30349 + }, + { + "epoch": 0.8333333333333334, + "grad_norm": 0.3843931257724762, + "learning_rate": 1.2612934427787638e-05, + "loss": 0.5201, + "step": 30350 + }, + { + "epoch": 0.8333607907742998, + "grad_norm": 0.3601520359516144, + "learning_rate": 1.261251753617227e-05, + "loss": 0.4883, + "step": 30351 + }, + { + "epoch": 0.8333882482152664, + "grad_norm": 0.3623564541339874, + "learning_rate": 1.2612100639683723e-05, + "loss": 0.5368, + "step": 30352 + }, + { + "epoch": 0.8334157056562328, + "grad_norm": 0.4150620102882385, + "learning_rate": 1.2611683738322774e-05, + "loss": 0.448, + "step": 30353 + }, + { + "epoch": 0.8334431630971993, + "grad_norm": 0.4236331880092621, + "learning_rate": 1.26112668320902e-05, + "loss": 0.5378, + "step": 30354 + }, + { + "epoch": 0.8334706205381659, + "grad_norm": 0.38926708698272705, + "learning_rate": 1.261084992098678e-05, + "loss": 0.5421, + "step": 30355 + }, + { + "epoch": 0.8334980779791323, + "grad_norm": 0.42358893156051636, + "learning_rate": 1.2610433005013294e-05, + "loss": 0.4212, + "step": 30356 + }, + { + "epoch": 0.8335255354200989, + "grad_norm": 0.3743465542793274, + "learning_rate": 1.2610016084170513e-05, + "loss": 0.5123, + "step": 30357 + }, + { + "epoch": 0.8335529928610653, + "grad_norm": 0.3726663887500763, + "learning_rate": 1.2609599158459225e-05, + "loss": 0.5095, + "step": 30358 + }, + { + "epoch": 0.8335804503020319, + "grad_norm": 0.45146259665489197, + "learning_rate": 1.2609182227880195e-05, + "loss": 0.5129, + "step": 30359 + }, + { + "epoch": 0.8336079077429983, + "grad_norm": 0.42571163177490234, + "learning_rate": 1.2608765292434213e-05, + "loss": 0.5152, + "step": 30360 + }, + { + "epoch": 0.8336353651839649, + "grad_norm": 0.355617880821228, + "learning_rate": 1.2608348352122049e-05, + "loss": 0.4407, + "step": 30361 + }, + { + "epoch": 0.8336628226249314, + "grad_norm": 0.37317395210266113, + "learning_rate": 1.2607931406944482e-05, + "loss": 0.462, + "step": 30362 + }, + { + "epoch": 0.8336902800658978, + "grad_norm": 0.36988508701324463, + "learning_rate": 1.2607514456902292e-05, + "loss": 0.5132, + "step": 30363 + }, + { + "epoch": 0.8337177375068644, + "grad_norm": 0.3858504295349121, + "learning_rate": 1.2607097501996255e-05, + "loss": 0.5284, + "step": 30364 + }, + { + "epoch": 0.8337451949478308, + "grad_norm": 0.3883386552333832, + "learning_rate": 1.2606680542227148e-05, + "loss": 0.5406, + "step": 30365 + }, + { + "epoch": 0.8337726523887974, + "grad_norm": 0.4710501432418823, + "learning_rate": 1.2606263577595752e-05, + "loss": 0.4565, + "step": 30366 + }, + { + "epoch": 0.8338001098297638, + "grad_norm": 0.3491963744163513, + "learning_rate": 1.260584660810284e-05, + "loss": 0.4946, + "step": 30367 + }, + { + "epoch": 0.8338275672707304, + "grad_norm": 0.3950599431991577, + "learning_rate": 1.2605429633749196e-05, + "loss": 0.5561, + "step": 30368 + }, + { + "epoch": 0.8338550247116969, + "grad_norm": 0.43843889236450195, + "learning_rate": 1.260501265453559e-05, + "loss": 0.4878, + "step": 30369 + }, + { + "epoch": 0.8338824821526634, + "grad_norm": 0.44451817870140076, + "learning_rate": 1.2604595670462806e-05, + "loss": 0.5418, + "step": 30370 + }, + { + "epoch": 0.8339099395936299, + "grad_norm": 0.37603601813316345, + "learning_rate": 1.260417868153162e-05, + "loss": 0.5445, + "step": 30371 + }, + { + "epoch": 0.8339373970345964, + "grad_norm": 0.33573755621910095, + "learning_rate": 1.2603761687742809e-05, + "loss": 0.4191, + "step": 30372 + }, + { + "epoch": 0.8339648544755629, + "grad_norm": 0.3754858374595642, + "learning_rate": 1.260334468909715e-05, + "loss": 0.358, + "step": 30373 + }, + { + "epoch": 0.8339923119165293, + "grad_norm": 0.4184570610523224, + "learning_rate": 1.2602927685595425e-05, + "loss": 0.4512, + "step": 30374 + }, + { + "epoch": 0.8340197693574959, + "grad_norm": 0.4093192219734192, + "learning_rate": 1.2602510677238407e-05, + "loss": 0.3981, + "step": 30375 + }, + { + "epoch": 0.8340472267984624, + "grad_norm": 0.3700283467769623, + "learning_rate": 1.260209366402688e-05, + "loss": 0.4495, + "step": 30376 + }, + { + "epoch": 0.8340746842394289, + "grad_norm": 0.3919629454612732, + "learning_rate": 1.2601676645961614e-05, + "loss": 0.5338, + "step": 30377 + }, + { + "epoch": 0.8341021416803954, + "grad_norm": 0.3945046663284302, + "learning_rate": 1.2601259623043389e-05, + "loss": 0.4994, + "step": 30378 + }, + { + "epoch": 0.8341295991213619, + "grad_norm": 0.4797935485839844, + "learning_rate": 1.2600842595272988e-05, + "loss": 0.528, + "step": 30379 + }, + { + "epoch": 0.8341570565623284, + "grad_norm": 0.4211718440055847, + "learning_rate": 1.2600425562651184e-05, + "loss": 0.4452, + "step": 30380 + }, + { + "epoch": 0.8341845140032949, + "grad_norm": 0.39875686168670654, + "learning_rate": 1.2600008525178757e-05, + "loss": 0.6351, + "step": 30381 + }, + { + "epoch": 0.8342119714442614, + "grad_norm": 0.5671746730804443, + "learning_rate": 1.2599591482856486e-05, + "loss": 0.4793, + "step": 30382 + }, + { + "epoch": 0.834239428885228, + "grad_norm": 0.3583989441394806, + "learning_rate": 1.2599174435685144e-05, + "loss": 0.4434, + "step": 30383 + }, + { + "epoch": 0.8342668863261944, + "grad_norm": 0.3920779228210449, + "learning_rate": 1.2598757383665514e-05, + "loss": 0.5764, + "step": 30384 + }, + { + "epoch": 0.8342943437671609, + "grad_norm": 0.4524482786655426, + "learning_rate": 1.2598340326798371e-05, + "loss": 0.5016, + "step": 30385 + }, + { + "epoch": 0.8343218012081274, + "grad_norm": 0.46638113260269165, + "learning_rate": 1.2597923265084495e-05, + "loss": 0.5158, + "step": 30386 + }, + { + "epoch": 0.8343492586490939, + "grad_norm": 0.4480866491794586, + "learning_rate": 1.2597506198524662e-05, + "loss": 0.5464, + "step": 30387 + }, + { + "epoch": 0.8343767160900604, + "grad_norm": 0.3769569396972656, + "learning_rate": 1.259708912711965e-05, + "loss": 0.4188, + "step": 30388 + }, + { + "epoch": 0.8344041735310269, + "grad_norm": 0.3846501111984253, + "learning_rate": 1.2596672050870242e-05, + "loss": 0.5762, + "step": 30389 + }, + { + "epoch": 0.8344316309719935, + "grad_norm": 0.3934304714202881, + "learning_rate": 1.2596254969777206e-05, + "loss": 0.4848, + "step": 30390 + }, + { + "epoch": 0.8344590884129599, + "grad_norm": 0.3965800106525421, + "learning_rate": 1.2595837883841332e-05, + "loss": 0.5344, + "step": 30391 + }, + { + "epoch": 0.8344865458539265, + "grad_norm": 0.3782135248184204, + "learning_rate": 1.2595420793063389e-05, + "loss": 0.4873, + "step": 30392 + }, + { + "epoch": 0.8345140032948929, + "grad_norm": 0.45516642928123474, + "learning_rate": 1.2595003697444158e-05, + "loss": 0.521, + "step": 30393 + }, + { + "epoch": 0.8345414607358594, + "grad_norm": 0.40234801173210144, + "learning_rate": 1.2594586596984418e-05, + "loss": 0.5523, + "step": 30394 + }, + { + "epoch": 0.8345689181768259, + "grad_norm": 0.3990074098110199, + "learning_rate": 1.2594169491684944e-05, + "loss": 0.4962, + "step": 30395 + }, + { + "epoch": 0.8345963756177924, + "grad_norm": 0.40876534581184387, + "learning_rate": 1.2593752381546516e-05, + "loss": 0.518, + "step": 30396 + }, + { + "epoch": 0.8346238330587589, + "grad_norm": 0.42773404717445374, + "learning_rate": 1.2593335266569913e-05, + "loss": 0.5166, + "step": 30397 + }, + { + "epoch": 0.8346512904997254, + "grad_norm": 0.32705938816070557, + "learning_rate": 1.2592918146755909e-05, + "loss": 0.4405, + "step": 30398 + }, + { + "epoch": 0.834678747940692, + "grad_norm": 0.401039183139801, + "learning_rate": 1.2592501022105288e-05, + "loss": 0.5402, + "step": 30399 + }, + { + "epoch": 0.8347062053816584, + "grad_norm": 0.4576355218887329, + "learning_rate": 1.2592083892618825e-05, + "loss": 0.5732, + "step": 30400 + }, + { + "epoch": 0.834733662822625, + "grad_norm": 0.3891449570655823, + "learning_rate": 1.2591666758297297e-05, + "loss": 0.4117, + "step": 30401 + }, + { + "epoch": 0.8347611202635914, + "grad_norm": 0.4207470715045929, + "learning_rate": 1.2591249619141485e-05, + "loss": 0.4191, + "step": 30402 + }, + { + "epoch": 0.8347885777045579, + "grad_norm": 0.4032715857028961, + "learning_rate": 1.2590832475152164e-05, + "loss": 0.4811, + "step": 30403 + }, + { + "epoch": 0.8348160351455244, + "grad_norm": 0.36735665798187256, + "learning_rate": 1.2590415326330114e-05, + "loss": 0.4872, + "step": 30404 + }, + { + "epoch": 0.8348434925864909, + "grad_norm": 0.393069326877594, + "learning_rate": 1.2589998172676114e-05, + "loss": 0.5645, + "step": 30405 + }, + { + "epoch": 0.8348709500274575, + "grad_norm": 0.4509965479373932, + "learning_rate": 1.258958101419094e-05, + "loss": 0.544, + "step": 30406 + }, + { + "epoch": 0.8348984074684239, + "grad_norm": 0.39796173572540283, + "learning_rate": 1.258916385087537e-05, + "loss": 0.4472, + "step": 30407 + }, + { + "epoch": 0.8349258649093905, + "grad_norm": 0.38559284806251526, + "learning_rate": 1.2588746682730185e-05, + "loss": 0.5212, + "step": 30408 + }, + { + "epoch": 0.8349533223503569, + "grad_norm": 0.443797767162323, + "learning_rate": 1.258832950975616e-05, + "loss": 0.5908, + "step": 30409 + }, + { + "epoch": 0.8349807797913235, + "grad_norm": 0.39976590871810913, + "learning_rate": 1.2587912331954074e-05, + "loss": 0.5695, + "step": 30410 + }, + { + "epoch": 0.8350082372322899, + "grad_norm": 0.36114034056663513, + "learning_rate": 1.2587495149324704e-05, + "loss": 0.4554, + "step": 30411 + }, + { + "epoch": 0.8350356946732564, + "grad_norm": 0.38968899846076965, + "learning_rate": 1.2587077961868831e-05, + "loss": 0.6291, + "step": 30412 + }, + { + "epoch": 0.835063152114223, + "grad_norm": 0.384721040725708, + "learning_rate": 1.2586660769587234e-05, + "loss": 0.53, + "step": 30413 + }, + { + "epoch": 0.8350906095551894, + "grad_norm": 0.37014463543891907, + "learning_rate": 1.2586243572480686e-05, + "loss": 0.484, + "step": 30414 + }, + { + "epoch": 0.835118066996156, + "grad_norm": 0.3598158657550812, + "learning_rate": 1.2585826370549969e-05, + "loss": 0.4543, + "step": 30415 + }, + { + "epoch": 0.8351455244371224, + "grad_norm": 0.41639795899391174, + "learning_rate": 1.2585409163795861e-05, + "loss": 0.4915, + "step": 30416 + }, + { + "epoch": 0.835172981878089, + "grad_norm": 0.41681620478630066, + "learning_rate": 1.2584991952219141e-05, + "loss": 0.5104, + "step": 30417 + }, + { + "epoch": 0.8352004393190554, + "grad_norm": 0.41427522897720337, + "learning_rate": 1.2584574735820584e-05, + "loss": 0.4793, + "step": 30418 + }, + { + "epoch": 0.835227896760022, + "grad_norm": 0.36098653078079224, + "learning_rate": 1.2584157514600972e-05, + "loss": 0.4223, + "step": 30419 + }, + { + "epoch": 0.8352553542009885, + "grad_norm": 0.41426321864128113, + "learning_rate": 1.2583740288561081e-05, + "loss": 0.473, + "step": 30420 + }, + { + "epoch": 0.835282811641955, + "grad_norm": 0.3609372079372406, + "learning_rate": 1.2583323057701688e-05, + "loss": 0.4526, + "step": 30421 + }, + { + "epoch": 0.8353102690829215, + "grad_norm": 0.4139242172241211, + "learning_rate": 1.2582905822023576e-05, + "loss": 0.4786, + "step": 30422 + }, + { + "epoch": 0.8353377265238879, + "grad_norm": 0.37611472606658936, + "learning_rate": 1.258248858152752e-05, + "loss": 0.4439, + "step": 30423 + }, + { + "epoch": 0.8353651839648545, + "grad_norm": 0.4390620291233063, + "learning_rate": 1.2582071336214297e-05, + "loss": 0.4649, + "step": 30424 + }, + { + "epoch": 0.8353926414058209, + "grad_norm": 0.38855257630348206, + "learning_rate": 1.2581654086084686e-05, + "loss": 0.5822, + "step": 30425 + }, + { + "epoch": 0.8354200988467875, + "grad_norm": 0.43391647934913635, + "learning_rate": 1.2581236831139468e-05, + "loss": 0.5102, + "step": 30426 + }, + { + "epoch": 0.835447556287754, + "grad_norm": 0.3664599061012268, + "learning_rate": 1.2580819571379417e-05, + "loss": 0.4977, + "step": 30427 + }, + { + "epoch": 0.8354750137287205, + "grad_norm": 0.37627461552619934, + "learning_rate": 1.2580402306805318e-05, + "loss": 0.4997, + "step": 30428 + }, + { + "epoch": 0.835502471169687, + "grad_norm": 0.48288893699645996, + "learning_rate": 1.2579985037417942e-05, + "loss": 0.5576, + "step": 30429 + }, + { + "epoch": 0.8355299286106534, + "grad_norm": 0.38438984751701355, + "learning_rate": 1.257956776321807e-05, + "loss": 0.4734, + "step": 30430 + }, + { + "epoch": 0.83555738605162, + "grad_norm": 0.3639982044696808, + "learning_rate": 1.2579150484206484e-05, + "loss": 0.5544, + "step": 30431 + }, + { + "epoch": 0.8355848434925864, + "grad_norm": 0.3813841938972473, + "learning_rate": 1.2578733200383954e-05, + "loss": 0.5246, + "step": 30432 + }, + { + "epoch": 0.835612300933553, + "grad_norm": 0.3609105348587036, + "learning_rate": 1.257831591175127e-05, + "loss": 0.4242, + "step": 30433 + }, + { + "epoch": 0.8356397583745195, + "grad_norm": 0.33332130312919617, + "learning_rate": 1.2577898618309196e-05, + "loss": 0.5132, + "step": 30434 + }, + { + "epoch": 0.835667215815486, + "grad_norm": 0.3776567578315735, + "learning_rate": 1.2577481320058525e-05, + "loss": 0.5449, + "step": 30435 + }, + { + "epoch": 0.8356946732564525, + "grad_norm": 0.39415937662124634, + "learning_rate": 1.2577064017000028e-05, + "loss": 0.5479, + "step": 30436 + }, + { + "epoch": 0.835722130697419, + "grad_norm": 0.3302742540836334, + "learning_rate": 1.257664670913448e-05, + "loss": 0.3933, + "step": 30437 + }, + { + "epoch": 0.8357495881383855, + "grad_norm": 0.43013089895248413, + "learning_rate": 1.2576229396462667e-05, + "loss": 0.5574, + "step": 30438 + }, + { + "epoch": 0.835777045579352, + "grad_norm": 0.3520798087120056, + "learning_rate": 1.2575812078985365e-05, + "loss": 0.4259, + "step": 30439 + }, + { + "epoch": 0.8358045030203185, + "grad_norm": 0.3958924412727356, + "learning_rate": 1.2575394756703348e-05, + "loss": 0.4749, + "step": 30440 + }, + { + "epoch": 0.835831960461285, + "grad_norm": 0.4059414565563202, + "learning_rate": 1.25749774296174e-05, + "loss": 0.5349, + "step": 30441 + }, + { + "epoch": 0.8358594179022515, + "grad_norm": 0.41743776202201843, + "learning_rate": 1.2574560097728297e-05, + "loss": 0.5133, + "step": 30442 + }, + { + "epoch": 0.835886875343218, + "grad_norm": 0.4545934796333313, + "learning_rate": 1.2574142761036816e-05, + "loss": 0.5437, + "step": 30443 + }, + { + "epoch": 0.8359143327841845, + "grad_norm": 0.41759464144706726, + "learning_rate": 1.2573725419543741e-05, + "loss": 0.6141, + "step": 30444 + }, + { + "epoch": 0.835941790225151, + "grad_norm": 0.363748699426651, + "learning_rate": 1.2573308073249841e-05, + "loss": 0.4587, + "step": 30445 + }, + { + "epoch": 0.8359692476661175, + "grad_norm": 0.4759811758995056, + "learning_rate": 1.2572890722155905e-05, + "loss": 0.5085, + "step": 30446 + }, + { + "epoch": 0.835996705107084, + "grad_norm": 0.4488232135772705, + "learning_rate": 1.2572473366262703e-05, + "loss": 0.5285, + "step": 30447 + }, + { + "epoch": 0.8360241625480506, + "grad_norm": 0.42045995593070984, + "learning_rate": 1.2572056005571021e-05, + "loss": 0.532, + "step": 30448 + }, + { + "epoch": 0.836051619989017, + "grad_norm": 0.37438148260116577, + "learning_rate": 1.2571638640081632e-05, + "loss": 0.5421, + "step": 30449 + }, + { + "epoch": 0.8360790774299836, + "grad_norm": 0.37755095958709717, + "learning_rate": 1.2571221269795315e-05, + "loss": 0.4682, + "step": 30450 + }, + { + "epoch": 0.83610653487095, + "grad_norm": 0.4064919054508209, + "learning_rate": 1.257080389471285e-05, + "loss": 0.476, + "step": 30451 + }, + { + "epoch": 0.8361339923119165, + "grad_norm": 0.46516042947769165, + "learning_rate": 1.2570386514835015e-05, + "loss": 0.6137, + "step": 30452 + }, + { + "epoch": 0.836161449752883, + "grad_norm": 0.37969258427619934, + "learning_rate": 1.2569969130162591e-05, + "loss": 0.496, + "step": 30453 + }, + { + "epoch": 0.8361889071938495, + "grad_norm": 0.39078250527381897, + "learning_rate": 1.2569551740696353e-05, + "loss": 0.4608, + "step": 30454 + }, + { + "epoch": 0.8362163646348161, + "grad_norm": 0.45649415254592896, + "learning_rate": 1.256913434643708e-05, + "loss": 0.5264, + "step": 30455 + }, + { + "epoch": 0.8362438220757825, + "grad_norm": 0.41031721234321594, + "learning_rate": 1.2568716947385553e-05, + "loss": 0.5447, + "step": 30456 + }, + { + "epoch": 0.8362712795167491, + "grad_norm": 0.4300644099712372, + "learning_rate": 1.2568299543542548e-05, + "loss": 0.468, + "step": 30457 + }, + { + "epoch": 0.8362987369577155, + "grad_norm": 0.39714306592941284, + "learning_rate": 1.2567882134908845e-05, + "loss": 0.5666, + "step": 30458 + }, + { + "epoch": 0.836326194398682, + "grad_norm": 0.3946171700954437, + "learning_rate": 1.2567464721485223e-05, + "loss": 0.5444, + "step": 30459 + }, + { + "epoch": 0.8363536518396485, + "grad_norm": 0.396936297416687, + "learning_rate": 1.2567047303272458e-05, + "loss": 0.474, + "step": 30460 + }, + { + "epoch": 0.836381109280615, + "grad_norm": 0.40881767868995667, + "learning_rate": 1.256662988027133e-05, + "loss": 0.526, + "step": 30461 + }, + { + "epoch": 0.8364085667215816, + "grad_norm": 0.3861304819583893, + "learning_rate": 1.2566212452482622e-05, + "loss": 0.5261, + "step": 30462 + }, + { + "epoch": 0.836436024162548, + "grad_norm": 0.47773048281669617, + "learning_rate": 1.2565795019907106e-05, + "loss": 0.4945, + "step": 30463 + }, + { + "epoch": 0.8364634816035146, + "grad_norm": 0.34292536973953247, + "learning_rate": 1.2565377582545563e-05, + "loss": 0.4508, + "step": 30464 + }, + { + "epoch": 0.836490939044481, + "grad_norm": 0.5099388957023621, + "learning_rate": 1.2564960140398772e-05, + "loss": 0.5255, + "step": 30465 + }, + { + "epoch": 0.8365183964854476, + "grad_norm": 0.4132194519042969, + "learning_rate": 1.2564542693467513e-05, + "loss": 0.5004, + "step": 30466 + }, + { + "epoch": 0.836545853926414, + "grad_norm": 0.3755113482475281, + "learning_rate": 1.2564125241752563e-05, + "loss": 0.441, + "step": 30467 + }, + { + "epoch": 0.8365733113673806, + "grad_norm": 0.3508821725845337, + "learning_rate": 1.25637077852547e-05, + "loss": 0.4387, + "step": 30468 + }, + { + "epoch": 0.8366007688083471, + "grad_norm": 0.44334694743156433, + "learning_rate": 1.2563290323974706e-05, + "loss": 0.5409, + "step": 30469 + }, + { + "epoch": 0.8366282262493135, + "grad_norm": 0.37245213985443115, + "learning_rate": 1.2562872857913358e-05, + "loss": 0.4261, + "step": 30470 + }, + { + "epoch": 0.8366556836902801, + "grad_norm": 0.3761880099773407, + "learning_rate": 1.256245538707143e-05, + "loss": 0.4338, + "step": 30471 + }, + { + "epoch": 0.8366831411312465, + "grad_norm": 0.6353683471679688, + "learning_rate": 1.2562037911449707e-05, + "loss": 0.5742, + "step": 30472 + }, + { + "epoch": 0.8367105985722131, + "grad_norm": 0.38505589962005615, + "learning_rate": 1.2561620431048965e-05, + "loss": 0.4022, + "step": 30473 + }, + { + "epoch": 0.8367380560131795, + "grad_norm": 0.35510653257369995, + "learning_rate": 1.2561202945869986e-05, + "loss": 0.4657, + "step": 30474 + }, + { + "epoch": 0.8367655134541461, + "grad_norm": 0.40166106820106506, + "learning_rate": 1.2560785455913545e-05, + "loss": 0.5122, + "step": 30475 + }, + { + "epoch": 0.8367929708951126, + "grad_norm": 0.3795928657054901, + "learning_rate": 1.2560367961180417e-05, + "loss": 0.5371, + "step": 30476 + }, + { + "epoch": 0.8368204283360791, + "grad_norm": 0.3821995258331299, + "learning_rate": 1.2559950461671391e-05, + "loss": 0.5353, + "step": 30477 + }, + { + "epoch": 0.8368478857770456, + "grad_norm": 0.41867461800575256, + "learning_rate": 1.2559532957387237e-05, + "loss": 0.5829, + "step": 30478 + }, + { + "epoch": 0.836875343218012, + "grad_norm": 0.3771328926086426, + "learning_rate": 1.2559115448328741e-05, + "loss": 0.4342, + "step": 30479 + }, + { + "epoch": 0.8369028006589786, + "grad_norm": 0.36414608359336853, + "learning_rate": 1.2558697934496676e-05, + "loss": 0.4796, + "step": 30480 + }, + { + "epoch": 0.836930258099945, + "grad_norm": 0.39816829562187195, + "learning_rate": 1.2558280415891822e-05, + "loss": 0.5926, + "step": 30481 + }, + { + "epoch": 0.8369577155409116, + "grad_norm": 0.33887267112731934, + "learning_rate": 1.255786289251496e-05, + "loss": 0.4299, + "step": 30482 + }, + { + "epoch": 0.8369851729818781, + "grad_norm": 0.32662302255630493, + "learning_rate": 1.2557445364366864e-05, + "loss": 0.3864, + "step": 30483 + }, + { + "epoch": 0.8370126304228446, + "grad_norm": 0.4129866361618042, + "learning_rate": 1.2557027831448321e-05, + "loss": 0.5225, + "step": 30484 + }, + { + "epoch": 0.8370400878638111, + "grad_norm": 0.40260010957717896, + "learning_rate": 1.2556610293760103e-05, + "loss": 0.4605, + "step": 30485 + }, + { + "epoch": 0.8370675453047776, + "grad_norm": 0.3679565191268921, + "learning_rate": 1.255619275130299e-05, + "loss": 0.4941, + "step": 30486 + }, + { + "epoch": 0.8370950027457441, + "grad_norm": 0.4204712212085724, + "learning_rate": 1.2555775204077763e-05, + "loss": 0.5388, + "step": 30487 + }, + { + "epoch": 0.8371224601867105, + "grad_norm": 0.49357491731643677, + "learning_rate": 1.2555357652085196e-05, + "loss": 0.3651, + "step": 30488 + }, + { + "epoch": 0.8371499176276771, + "grad_norm": 0.40383797883987427, + "learning_rate": 1.2554940095326074e-05, + "loss": 0.5541, + "step": 30489 + }, + { + "epoch": 0.8371773750686436, + "grad_norm": 0.36534613370895386, + "learning_rate": 1.2554522533801175e-05, + "loss": 0.4915, + "step": 30490 + }, + { + "epoch": 0.8372048325096101, + "grad_norm": 0.35690346360206604, + "learning_rate": 1.2554104967511273e-05, + "loss": 0.4752, + "step": 30491 + }, + { + "epoch": 0.8372322899505766, + "grad_norm": 0.3626042306423187, + "learning_rate": 1.2553687396457153e-05, + "loss": 0.4303, + "step": 30492 + }, + { + "epoch": 0.8372597473915431, + "grad_norm": 0.3611411154270172, + "learning_rate": 1.2553269820639586e-05, + "loss": 0.4575, + "step": 30493 + }, + { + "epoch": 0.8372872048325096, + "grad_norm": 0.364143043756485, + "learning_rate": 1.2552852240059361e-05, + "loss": 0.6268, + "step": 30494 + }, + { + "epoch": 0.8373146622734761, + "grad_norm": 0.37655550241470337, + "learning_rate": 1.2552434654717249e-05, + "loss": 0.5233, + "step": 30495 + }, + { + "epoch": 0.8373421197144426, + "grad_norm": 0.4005410075187683, + "learning_rate": 1.2552017064614032e-05, + "loss": 0.5058, + "step": 30496 + }, + { + "epoch": 0.8373695771554092, + "grad_norm": 0.3586280345916748, + "learning_rate": 1.255159946975049e-05, + "loss": 0.4501, + "step": 30497 + }, + { + "epoch": 0.8373970345963756, + "grad_norm": 0.5538439750671387, + "learning_rate": 1.25511818701274e-05, + "loss": 0.5703, + "step": 30498 + }, + { + "epoch": 0.8374244920373421, + "grad_norm": 0.3882840871810913, + "learning_rate": 1.2550764265745544e-05, + "loss": 0.4441, + "step": 30499 + }, + { + "epoch": 0.8374519494783086, + "grad_norm": 0.3873562812805176, + "learning_rate": 1.2550346656605694e-05, + "loss": 0.5042, + "step": 30500 + }, + { + "epoch": 0.8374794069192751, + "grad_norm": 0.4123181104660034, + "learning_rate": 1.2549929042708637e-05, + "loss": 0.5391, + "step": 30501 + }, + { + "epoch": 0.8375068643602416, + "grad_norm": 0.34177783131599426, + "learning_rate": 1.2549511424055147e-05, + "loss": 0.4338, + "step": 30502 + }, + { + "epoch": 0.8375343218012081, + "grad_norm": 0.4181652367115021, + "learning_rate": 1.2549093800646003e-05, + "loss": 0.4632, + "step": 30503 + }, + { + "epoch": 0.8375617792421747, + "grad_norm": 0.44736167788505554, + "learning_rate": 1.2548676172481988e-05, + "loss": 0.5822, + "step": 30504 + }, + { + "epoch": 0.8375892366831411, + "grad_norm": 0.39303043484687805, + "learning_rate": 1.2548258539563879e-05, + "loss": 0.5422, + "step": 30505 + }, + { + "epoch": 0.8376166941241077, + "grad_norm": 0.3506660759449005, + "learning_rate": 1.2547840901892452e-05, + "loss": 0.4155, + "step": 30506 + }, + { + "epoch": 0.8376441515650741, + "grad_norm": 0.40155160427093506, + "learning_rate": 1.254742325946849e-05, + "loss": 0.5065, + "step": 30507 + }, + { + "epoch": 0.8376716090060407, + "grad_norm": 0.3874933123588562, + "learning_rate": 1.2547005612292771e-05, + "loss": 0.4595, + "step": 30508 + }, + { + "epoch": 0.8376990664470071, + "grad_norm": 0.3977082371711731, + "learning_rate": 1.2546587960366073e-05, + "loss": 0.5346, + "step": 30509 + }, + { + "epoch": 0.8377265238879736, + "grad_norm": 0.42185431718826294, + "learning_rate": 1.2546170303689174e-05, + "loss": 0.486, + "step": 30510 + }, + { + "epoch": 0.8377539813289402, + "grad_norm": 0.3981020748615265, + "learning_rate": 1.2545752642262859e-05, + "loss": 0.4549, + "step": 30511 + }, + { + "epoch": 0.8377814387699066, + "grad_norm": 0.4689103364944458, + "learning_rate": 1.2545334976087898e-05, + "loss": 0.5193, + "step": 30512 + }, + { + "epoch": 0.8378088962108732, + "grad_norm": 0.3558647334575653, + "learning_rate": 1.2544917305165077e-05, + "loss": 0.4262, + "step": 30513 + }, + { + "epoch": 0.8378363536518396, + "grad_norm": 0.3798430860042572, + "learning_rate": 1.2544499629495173e-05, + "loss": 0.554, + "step": 30514 + }, + { + "epoch": 0.8378638110928062, + "grad_norm": 0.4213593006134033, + "learning_rate": 1.2544081949078964e-05, + "loss": 0.4245, + "step": 30515 + }, + { + "epoch": 0.8378912685337726, + "grad_norm": 0.3999357521533966, + "learning_rate": 1.2543664263917233e-05, + "loss": 0.5636, + "step": 30516 + }, + { + "epoch": 0.8379187259747392, + "grad_norm": 0.38754233717918396, + "learning_rate": 1.2543246574010752e-05, + "loss": 0.4964, + "step": 30517 + }, + { + "epoch": 0.8379461834157057, + "grad_norm": 0.37616366147994995, + "learning_rate": 1.254282887936031e-05, + "loss": 0.5111, + "step": 30518 + }, + { + "epoch": 0.8379736408566721, + "grad_norm": 0.41615623235702515, + "learning_rate": 1.2542411179966678e-05, + "loss": 0.4673, + "step": 30519 + }, + { + "epoch": 0.8380010982976387, + "grad_norm": 0.37341922521591187, + "learning_rate": 1.2541993475830637e-05, + "loss": 0.4641, + "step": 30520 + }, + { + "epoch": 0.8380285557386051, + "grad_norm": 0.3912630081176758, + "learning_rate": 1.2541575766952967e-05, + "loss": 0.5098, + "step": 30521 + }, + { + "epoch": 0.8380560131795717, + "grad_norm": 0.4364231824874878, + "learning_rate": 1.2541158053334446e-05, + "loss": 0.5089, + "step": 30522 + }, + { + "epoch": 0.8380834706205381, + "grad_norm": 0.5087426900863647, + "learning_rate": 1.2540740334975857e-05, + "loss": 0.564, + "step": 30523 + }, + { + "epoch": 0.8381109280615047, + "grad_norm": 0.3941194713115692, + "learning_rate": 1.2540322611877973e-05, + "loss": 0.5048, + "step": 30524 + }, + { + "epoch": 0.8381383855024712, + "grad_norm": 0.3691200613975525, + "learning_rate": 1.2539904884041578e-05, + "loss": 0.4882, + "step": 30525 + }, + { + "epoch": 0.8381658429434377, + "grad_norm": 0.38718971610069275, + "learning_rate": 1.2539487151467452e-05, + "loss": 0.4236, + "step": 30526 + }, + { + "epoch": 0.8381933003844042, + "grad_norm": 0.40315380692481995, + "learning_rate": 1.2539069414156368e-05, + "loss": 0.4775, + "step": 30527 + }, + { + "epoch": 0.8382207578253706, + "grad_norm": 0.3857317864894867, + "learning_rate": 1.2538651672109112e-05, + "loss": 0.5683, + "step": 30528 + }, + { + "epoch": 0.8382482152663372, + "grad_norm": 0.37971267104148865, + "learning_rate": 1.2538233925326459e-05, + "loss": 0.4759, + "step": 30529 + }, + { + "epoch": 0.8382756727073036, + "grad_norm": 0.3438046872615814, + "learning_rate": 1.253781617380919e-05, + "loss": 0.4465, + "step": 30530 + }, + { + "epoch": 0.8383031301482702, + "grad_norm": 0.3815001845359802, + "learning_rate": 1.2537398417558084e-05, + "loss": 0.5467, + "step": 30531 + }, + { + "epoch": 0.8383305875892367, + "grad_norm": 0.4323759973049164, + "learning_rate": 1.253698065657392e-05, + "loss": 0.5445, + "step": 30532 + }, + { + "epoch": 0.8383580450302032, + "grad_norm": 0.38768818974494934, + "learning_rate": 1.2536562890857477e-05, + "loss": 0.5287, + "step": 30533 + }, + { + "epoch": 0.8383855024711697, + "grad_norm": 0.37375524640083313, + "learning_rate": 1.2536145120409538e-05, + "loss": 0.5472, + "step": 30534 + }, + { + "epoch": 0.8384129599121362, + "grad_norm": 0.3384387493133545, + "learning_rate": 1.2535727345230875e-05, + "loss": 0.4878, + "step": 30535 + }, + { + "epoch": 0.8384404173531027, + "grad_norm": 0.39630618691444397, + "learning_rate": 1.253530956532227e-05, + "loss": 0.5213, + "step": 30536 + }, + { + "epoch": 0.8384678747940691, + "grad_norm": 0.3707351088523865, + "learning_rate": 1.2534891780684509e-05, + "loss": 0.5069, + "step": 30537 + }, + { + "epoch": 0.8384953322350357, + "grad_norm": 0.40763506293296814, + "learning_rate": 1.2534473991318362e-05, + "loss": 0.4664, + "step": 30538 + }, + { + "epoch": 0.8385227896760022, + "grad_norm": 0.3907795548439026, + "learning_rate": 1.253405619722461e-05, + "loss": 0.5353, + "step": 30539 + }, + { + "epoch": 0.8385502471169687, + "grad_norm": 0.4295741021633148, + "learning_rate": 1.2533638398404038e-05, + "loss": 0.5194, + "step": 30540 + }, + { + "epoch": 0.8385777045579352, + "grad_norm": 0.4103579521179199, + "learning_rate": 1.253322059485742e-05, + "loss": 0.4675, + "step": 30541 + }, + { + "epoch": 0.8386051619989017, + "grad_norm": 0.34887024760246277, + "learning_rate": 1.2532802786585539e-05, + "loss": 0.4254, + "step": 30542 + }, + { + "epoch": 0.8386326194398682, + "grad_norm": 0.4102264940738678, + "learning_rate": 1.2532384973589172e-05, + "loss": 0.4487, + "step": 30543 + }, + { + "epoch": 0.8386600768808347, + "grad_norm": 0.40181422233581543, + "learning_rate": 1.2531967155869098e-05, + "loss": 0.4631, + "step": 30544 + }, + { + "epoch": 0.8386875343218012, + "grad_norm": 0.3951427936553955, + "learning_rate": 1.2531549333426094e-05, + "loss": 0.4994, + "step": 30545 + }, + { + "epoch": 0.8387149917627678, + "grad_norm": 0.3817053735256195, + "learning_rate": 1.2531131506260948e-05, + "loss": 0.5816, + "step": 30546 + }, + { + "epoch": 0.8387424492037342, + "grad_norm": 0.4940425157546997, + "learning_rate": 1.2530713674374433e-05, + "loss": 0.556, + "step": 30547 + }, + { + "epoch": 0.8387699066447007, + "grad_norm": 0.39306649565696716, + "learning_rate": 1.2530295837767326e-05, + "loss": 0.5547, + "step": 30548 + }, + { + "epoch": 0.8387973640856672, + "grad_norm": 0.4140707552433014, + "learning_rate": 1.2529877996440412e-05, + "loss": 0.5227, + "step": 30549 + }, + { + "epoch": 0.8388248215266337, + "grad_norm": 0.43041905760765076, + "learning_rate": 1.2529460150394467e-05, + "loss": 0.4473, + "step": 30550 + }, + { + "epoch": 0.8388522789676002, + "grad_norm": 0.40430882573127747, + "learning_rate": 1.2529042299630271e-05, + "loss": 0.568, + "step": 30551 + }, + { + "epoch": 0.8388797364085667, + "grad_norm": 0.3902410864830017, + "learning_rate": 1.2528624444148607e-05, + "loss": 0.5017, + "step": 30552 + }, + { + "epoch": 0.8389071938495333, + "grad_norm": 0.44447848200798035, + "learning_rate": 1.252820658395025e-05, + "loss": 0.5839, + "step": 30553 + }, + { + "epoch": 0.8389346512904997, + "grad_norm": 0.4018041789531708, + "learning_rate": 1.252778871903598e-05, + "loss": 0.54, + "step": 30554 + }, + { + "epoch": 0.8389621087314663, + "grad_norm": 0.35875967144966125, + "learning_rate": 1.2527370849406579e-05, + "loss": 0.4572, + "step": 30555 + }, + { + "epoch": 0.8389895661724327, + "grad_norm": 0.37019219994544983, + "learning_rate": 1.252695297506282e-05, + "loss": 0.5257, + "step": 30556 + }, + { + "epoch": 0.8390170236133992, + "grad_norm": 0.36494043469429016, + "learning_rate": 1.2526535096005493e-05, + "loss": 0.4656, + "step": 30557 + }, + { + "epoch": 0.8390444810543657, + "grad_norm": 0.34855857491493225, + "learning_rate": 1.2526117212235367e-05, + "loss": 0.3702, + "step": 30558 + }, + { + "epoch": 0.8390719384953322, + "grad_norm": 0.44938549399375916, + "learning_rate": 1.252569932375323e-05, + "loss": 0.4853, + "step": 30559 + }, + { + "epoch": 0.8390993959362988, + "grad_norm": 0.4690459966659546, + "learning_rate": 1.2525281430559857e-05, + "loss": 0.501, + "step": 30560 + }, + { + "epoch": 0.8391268533772652, + "grad_norm": 0.41520318388938904, + "learning_rate": 1.2524863532656025e-05, + "loss": 0.6192, + "step": 30561 + }, + { + "epoch": 0.8391543108182318, + "grad_norm": 0.41666877269744873, + "learning_rate": 1.2524445630042518e-05, + "loss": 0.4896, + "step": 30562 + }, + { + "epoch": 0.8391817682591982, + "grad_norm": 0.4412323236465454, + "learning_rate": 1.2524027722720118e-05, + "loss": 0.4934, + "step": 30563 + }, + { + "epoch": 0.8392092257001648, + "grad_norm": 0.36083659529685974, + "learning_rate": 1.2523609810689597e-05, + "loss": 0.3908, + "step": 30564 + }, + { + "epoch": 0.8392366831411312, + "grad_norm": 0.4248509705066681, + "learning_rate": 1.2523191893951738e-05, + "loss": 0.5339, + "step": 30565 + }, + { + "epoch": 0.8392641405820978, + "grad_norm": 0.39332589507102966, + "learning_rate": 1.252277397250732e-05, + "loss": 0.5278, + "step": 30566 + }, + { + "epoch": 0.8392915980230643, + "grad_norm": 0.3696857988834381, + "learning_rate": 1.2522356046357127e-05, + "loss": 0.4885, + "step": 30567 + }, + { + "epoch": 0.8393190554640307, + "grad_norm": 0.38997671008110046, + "learning_rate": 1.2521938115501935e-05, + "loss": 0.4451, + "step": 30568 + }, + { + "epoch": 0.8393465129049973, + "grad_norm": 0.375417023897171, + "learning_rate": 1.2521520179942519e-05, + "loss": 0.4498, + "step": 30569 + }, + { + "epoch": 0.8393739703459637, + "grad_norm": 0.37603500485420227, + "learning_rate": 1.2521102239679665e-05, + "loss": 0.4846, + "step": 30570 + }, + { + "epoch": 0.8394014277869303, + "grad_norm": 0.39804819226264954, + "learning_rate": 1.2520684294714152e-05, + "loss": 0.473, + "step": 30571 + }, + { + "epoch": 0.8394288852278967, + "grad_norm": 0.39755845069885254, + "learning_rate": 1.2520266345046758e-05, + "loss": 0.5236, + "step": 30572 + }, + { + "epoch": 0.8394563426688633, + "grad_norm": 0.37407106161117554, + "learning_rate": 1.2519848390678261e-05, + "loss": 0.5417, + "step": 30573 + }, + { + "epoch": 0.8394838001098298, + "grad_norm": 0.4284287989139557, + "learning_rate": 1.2519430431609444e-05, + "loss": 0.499, + "step": 30574 + }, + { + "epoch": 0.8395112575507963, + "grad_norm": 0.41173967719078064, + "learning_rate": 1.2519012467841087e-05, + "loss": 0.4791, + "step": 30575 + }, + { + "epoch": 0.8395387149917628, + "grad_norm": 0.3735550045967102, + "learning_rate": 1.2518594499373964e-05, + "loss": 0.5092, + "step": 30576 + }, + { + "epoch": 0.8395661724327292, + "grad_norm": 0.4479895532131195, + "learning_rate": 1.2518176526208864e-05, + "loss": 0.4782, + "step": 30577 + }, + { + "epoch": 0.8395936298736958, + "grad_norm": 0.45051339268684387, + "learning_rate": 1.2517758548346554e-05, + "loss": 0.4851, + "step": 30578 + }, + { + "epoch": 0.8396210873146622, + "grad_norm": 0.42953935265541077, + "learning_rate": 1.2517340565787826e-05, + "loss": 0.481, + "step": 30579 + }, + { + "epoch": 0.8396485447556288, + "grad_norm": 0.40934669971466064, + "learning_rate": 1.2516922578533453e-05, + "loss": 0.544, + "step": 30580 + }, + { + "epoch": 0.8396760021965953, + "grad_norm": 0.400932639837265, + "learning_rate": 1.2516504586584215e-05, + "loss": 0.5226, + "step": 30581 + }, + { + "epoch": 0.8397034596375618, + "grad_norm": 0.4283602833747864, + "learning_rate": 1.2516086589940893e-05, + "loss": 0.5216, + "step": 30582 + }, + { + "epoch": 0.8397309170785283, + "grad_norm": 0.30337604880332947, + "learning_rate": 1.2515668588604268e-05, + "loss": 0.4416, + "step": 30583 + }, + { + "epoch": 0.8397583745194948, + "grad_norm": 0.7358819842338562, + "learning_rate": 1.2515250582575114e-05, + "loss": 0.5061, + "step": 30584 + }, + { + "epoch": 0.8397858319604613, + "grad_norm": 0.36385655403137207, + "learning_rate": 1.2514832571854218e-05, + "loss": 0.4802, + "step": 30585 + }, + { + "epoch": 0.8398132894014277, + "grad_norm": 0.38285067677497864, + "learning_rate": 1.251441455644236e-05, + "loss": 0.5212, + "step": 30586 + }, + { + "epoch": 0.8398407468423943, + "grad_norm": 0.4087943434715271, + "learning_rate": 1.2513996536340312e-05, + "loss": 0.5488, + "step": 30587 + }, + { + "epoch": 0.8398682042833608, + "grad_norm": 0.4052119553089142, + "learning_rate": 1.2513578511548858e-05, + "loss": 0.558, + "step": 30588 + }, + { + "epoch": 0.8398956617243273, + "grad_norm": 0.3880019187927246, + "learning_rate": 1.2513160482068778e-05, + "loss": 0.6085, + "step": 30589 + }, + { + "epoch": 0.8399231191652938, + "grad_norm": 0.3723009526729584, + "learning_rate": 1.2512742447900853e-05, + "loss": 0.4189, + "step": 30590 + }, + { + "epoch": 0.8399505766062603, + "grad_norm": 0.3724484443664551, + "learning_rate": 1.2512324409045862e-05, + "loss": 0.4398, + "step": 30591 + }, + { + "epoch": 0.8399780340472268, + "grad_norm": 0.3960621654987335, + "learning_rate": 1.251190636550458e-05, + "loss": 0.5035, + "step": 30592 + }, + { + "epoch": 0.8400054914881933, + "grad_norm": 0.37847957015037537, + "learning_rate": 1.2511488317277796e-05, + "loss": 0.4415, + "step": 30593 + }, + { + "epoch": 0.8400329489291598, + "grad_norm": 0.4386344850063324, + "learning_rate": 1.251107026436628e-05, + "loss": 0.552, + "step": 30594 + }, + { + "epoch": 0.8400604063701264, + "grad_norm": 0.41137126088142395, + "learning_rate": 1.2510652206770821e-05, + "loss": 0.5459, + "step": 30595 + }, + { + "epoch": 0.8400878638110928, + "grad_norm": 0.3803212642669678, + "learning_rate": 1.2510234144492193e-05, + "loss": 0.4352, + "step": 30596 + }, + { + "epoch": 0.8401153212520593, + "grad_norm": 0.4624294340610504, + "learning_rate": 1.2509816077531176e-05, + "loss": 0.5412, + "step": 30597 + }, + { + "epoch": 0.8401427786930258, + "grad_norm": 0.4009767770767212, + "learning_rate": 1.2509398005888551e-05, + "loss": 0.4563, + "step": 30598 + }, + { + "epoch": 0.8401702361339923, + "grad_norm": 0.4003833532333374, + "learning_rate": 1.25089799295651e-05, + "loss": 0.4369, + "step": 30599 + }, + { + "epoch": 0.8401976935749588, + "grad_norm": 0.39277997612953186, + "learning_rate": 1.2508561848561599e-05, + "loss": 0.4793, + "step": 30600 + }, + { + "epoch": 0.8402251510159253, + "grad_norm": 0.4100687801837921, + "learning_rate": 1.2508143762878829e-05, + "loss": 0.4924, + "step": 30601 + }, + { + "epoch": 0.8402526084568919, + "grad_norm": 0.34715601801872253, + "learning_rate": 1.2507725672517567e-05, + "loss": 0.4933, + "step": 30602 + }, + { + "epoch": 0.8402800658978583, + "grad_norm": 0.3964793086051941, + "learning_rate": 1.2507307577478602e-05, + "loss": 0.5272, + "step": 30603 + }, + { + "epoch": 0.8403075233388249, + "grad_norm": 0.381266713142395, + "learning_rate": 1.2506889477762707e-05, + "loss": 0.5001, + "step": 30604 + }, + { + "epoch": 0.8403349807797913, + "grad_norm": 0.3887172341346741, + "learning_rate": 1.2506471373370661e-05, + "loss": 0.4057, + "step": 30605 + }, + { + "epoch": 0.8403624382207578, + "grad_norm": 0.3514834940433502, + "learning_rate": 1.2506053264303248e-05, + "loss": 0.4896, + "step": 30606 + }, + { + "epoch": 0.8403898956617243, + "grad_norm": 0.48293349146842957, + "learning_rate": 1.2505635150561243e-05, + "loss": 0.5122, + "step": 30607 + }, + { + "epoch": 0.8404173531026908, + "grad_norm": 0.4390162229537964, + "learning_rate": 1.2505217032145434e-05, + "loss": 0.4457, + "step": 30608 + }, + { + "epoch": 0.8404448105436574, + "grad_norm": 0.7867057919502258, + "learning_rate": 1.2504798909056593e-05, + "loss": 0.5473, + "step": 30609 + }, + { + "epoch": 0.8404722679846238, + "grad_norm": 0.39432069659233093, + "learning_rate": 1.25043807812955e-05, + "loss": 0.4795, + "step": 30610 + }, + { + "epoch": 0.8404997254255904, + "grad_norm": 0.4152894914150238, + "learning_rate": 1.250396264886294e-05, + "loss": 0.5287, + "step": 30611 + }, + { + "epoch": 0.8405271828665568, + "grad_norm": 0.3947867751121521, + "learning_rate": 1.250354451175969e-05, + "loss": 0.4702, + "step": 30612 + }, + { + "epoch": 0.8405546403075234, + "grad_norm": 0.5227567553520203, + "learning_rate": 1.250312636998653e-05, + "loss": 0.6095, + "step": 30613 + }, + { + "epoch": 0.8405820977484898, + "grad_norm": 0.4005325436592102, + "learning_rate": 1.2502708223544243e-05, + "loss": 0.4249, + "step": 30614 + }, + { + "epoch": 0.8406095551894563, + "grad_norm": 0.3780266344547272, + "learning_rate": 1.2502290072433602e-05, + "loss": 0.4156, + "step": 30615 + }, + { + "epoch": 0.8406370126304229, + "grad_norm": 0.4126477837562561, + "learning_rate": 1.2501871916655395e-05, + "loss": 0.5928, + "step": 30616 + }, + { + "epoch": 0.8406644700713893, + "grad_norm": 0.39426368474960327, + "learning_rate": 1.2501453756210397e-05, + "loss": 0.5461, + "step": 30617 + }, + { + "epoch": 0.8406919275123559, + "grad_norm": 0.36441686749458313, + "learning_rate": 1.2501035591099388e-05, + "loss": 0.5047, + "step": 30618 + }, + { + "epoch": 0.8407193849533223, + "grad_norm": 0.40143176913261414, + "learning_rate": 1.2500617421323151e-05, + "loss": 0.5396, + "step": 30619 + }, + { + "epoch": 0.8407468423942889, + "grad_norm": 0.3671274483203888, + "learning_rate": 1.2500199246882464e-05, + "loss": 0.5081, + "step": 30620 + }, + { + "epoch": 0.8407742998352553, + "grad_norm": 0.41633185744285583, + "learning_rate": 1.2499781067778108e-05, + "loss": 0.5194, + "step": 30621 + }, + { + "epoch": 0.8408017572762219, + "grad_norm": 0.35814476013183594, + "learning_rate": 1.2499362884010864e-05, + "loss": 0.5496, + "step": 30622 + }, + { + "epoch": 0.8408292147171884, + "grad_norm": 0.4559701979160309, + "learning_rate": 1.2498944695581505e-05, + "loss": 0.5326, + "step": 30623 + }, + { + "epoch": 0.8408566721581548, + "grad_norm": 0.3734596073627472, + "learning_rate": 1.2498526502490822e-05, + "loss": 0.4737, + "step": 30624 + }, + { + "epoch": 0.8408841295991214, + "grad_norm": 0.4217323064804077, + "learning_rate": 1.2498108304739586e-05, + "loss": 0.4254, + "step": 30625 + }, + { + "epoch": 0.8409115870400878, + "grad_norm": 0.3695915937423706, + "learning_rate": 1.2497690102328583e-05, + "loss": 0.4968, + "step": 30626 + }, + { + "epoch": 0.8409390444810544, + "grad_norm": 0.40209925174713135, + "learning_rate": 1.249727189525859e-05, + "loss": 0.4791, + "step": 30627 + }, + { + "epoch": 0.8409665019220208, + "grad_norm": 0.3817031979560852, + "learning_rate": 1.2496853683530388e-05, + "loss": 0.5595, + "step": 30628 + }, + { + "epoch": 0.8409939593629874, + "grad_norm": 0.4128388464450836, + "learning_rate": 1.2496435467144759e-05, + "loss": 0.5508, + "step": 30629 + }, + { + "epoch": 0.8410214168039539, + "grad_norm": 0.3706955909729004, + "learning_rate": 1.249601724610248e-05, + "loss": 0.4445, + "step": 30630 + }, + { + "epoch": 0.8410488742449204, + "grad_norm": 0.4104049801826477, + "learning_rate": 1.249559902040433e-05, + "loss": 0.5328, + "step": 30631 + }, + { + "epoch": 0.8410763316858869, + "grad_norm": 0.3648521900177002, + "learning_rate": 1.2495180790051093e-05, + "loss": 0.5032, + "step": 30632 + }, + { + "epoch": 0.8411037891268534, + "grad_norm": 0.40022483468055725, + "learning_rate": 1.2494762555043547e-05, + "loss": 0.5111, + "step": 30633 + }, + { + "epoch": 0.8411312465678199, + "grad_norm": 0.44694384932518005, + "learning_rate": 1.2494344315382474e-05, + "loss": 0.5121, + "step": 30634 + }, + { + "epoch": 0.8411587040087863, + "grad_norm": 0.40759822726249695, + "learning_rate": 1.2493926071068651e-05, + "loss": 0.4604, + "step": 30635 + }, + { + "epoch": 0.8411861614497529, + "grad_norm": 0.3841599225997925, + "learning_rate": 1.2493507822102859e-05, + "loss": 0.5095, + "step": 30636 + }, + { + "epoch": 0.8412136188907194, + "grad_norm": 0.33726054430007935, + "learning_rate": 1.249308956848588e-05, + "loss": 0.4834, + "step": 30637 + }, + { + "epoch": 0.8412410763316859, + "grad_norm": 0.38278672099113464, + "learning_rate": 1.2492671310218494e-05, + "loss": 0.4549, + "step": 30638 + }, + { + "epoch": 0.8412685337726524, + "grad_norm": 0.41481173038482666, + "learning_rate": 1.2492253047301481e-05, + "loss": 0.4514, + "step": 30639 + }, + { + "epoch": 0.8412959912136189, + "grad_norm": 0.4671052098274231, + "learning_rate": 1.2491834779735621e-05, + "loss": 0.5251, + "step": 30640 + }, + { + "epoch": 0.8413234486545854, + "grad_norm": 0.4366128742694855, + "learning_rate": 1.2491416507521692e-05, + "loss": 0.5283, + "step": 30641 + }, + { + "epoch": 0.8413509060955519, + "grad_norm": 0.40222224593162537, + "learning_rate": 1.2490998230660476e-05, + "loss": 0.5011, + "step": 30642 + }, + { + "epoch": 0.8413783635365184, + "grad_norm": 0.4204326868057251, + "learning_rate": 1.2490579949152757e-05, + "loss": 0.4713, + "step": 30643 + }, + { + "epoch": 0.841405820977485, + "grad_norm": 0.38050511479377747, + "learning_rate": 1.2490161662999304e-05, + "loss": 0.435, + "step": 30644 + }, + { + "epoch": 0.8414332784184514, + "grad_norm": 0.3957335948944092, + "learning_rate": 1.2489743372200913e-05, + "loss": 0.4745, + "step": 30645 + }, + { + "epoch": 0.8414607358594179, + "grad_norm": 0.3603295087814331, + "learning_rate": 1.248932507675835e-05, + "loss": 0.4124, + "step": 30646 + }, + { + "epoch": 0.8414881933003844, + "grad_norm": 0.4290843605995178, + "learning_rate": 1.2488906776672405e-05, + "loss": 0.598, + "step": 30647 + }, + { + "epoch": 0.8415156507413509, + "grad_norm": 0.3954031467437744, + "learning_rate": 1.2488488471943854e-05, + "loss": 0.5429, + "step": 30648 + }, + { + "epoch": 0.8415431081823174, + "grad_norm": 0.3519553542137146, + "learning_rate": 1.2488070162573474e-05, + "loss": 0.5676, + "step": 30649 + }, + { + "epoch": 0.8415705656232839, + "grad_norm": 0.4318679869174957, + "learning_rate": 1.2487651848562052e-05, + "loss": 0.4529, + "step": 30650 + }, + { + "epoch": 0.8415980230642505, + "grad_norm": 0.34621772170066833, + "learning_rate": 1.2487233529910366e-05, + "loss": 0.4622, + "step": 30651 + }, + { + "epoch": 0.8416254805052169, + "grad_norm": 0.35052043199539185, + "learning_rate": 1.2486815206619195e-05, + "loss": 0.5176, + "step": 30652 + }, + { + "epoch": 0.8416529379461835, + "grad_norm": 0.49845945835113525, + "learning_rate": 1.248639687868932e-05, + "loss": 0.4927, + "step": 30653 + }, + { + "epoch": 0.8416803953871499, + "grad_norm": 0.3979085087776184, + "learning_rate": 1.248597854612152e-05, + "loss": 0.484, + "step": 30654 + }, + { + "epoch": 0.8417078528281164, + "grad_norm": 0.40298038721084595, + "learning_rate": 1.2485560208916579e-05, + "loss": 0.4798, + "step": 30655 + }, + { + "epoch": 0.8417353102690829, + "grad_norm": 0.3473118543624878, + "learning_rate": 1.248514186707527e-05, + "loss": 0.4182, + "step": 30656 + }, + { + "epoch": 0.8417627677100494, + "grad_norm": 0.3972627818584442, + "learning_rate": 1.2484723520598383e-05, + "loss": 0.4186, + "step": 30657 + }, + { + "epoch": 0.841790225151016, + "grad_norm": 0.40903374552726746, + "learning_rate": 1.2484305169486692e-05, + "loss": 0.5975, + "step": 30658 + }, + { + "epoch": 0.8418176825919824, + "grad_norm": 0.44929078221321106, + "learning_rate": 1.2483886813740979e-05, + "loss": 0.5353, + "step": 30659 + }, + { + "epoch": 0.841845140032949, + "grad_norm": 0.3850041329860687, + "learning_rate": 1.2483468453362026e-05, + "loss": 0.5101, + "step": 30660 + }, + { + "epoch": 0.8418725974739154, + "grad_norm": 0.42702826857566833, + "learning_rate": 1.248305008835061e-05, + "loss": 0.53, + "step": 30661 + }, + { + "epoch": 0.841900054914882, + "grad_norm": 0.385602742433548, + "learning_rate": 1.2482631718707511e-05, + "loss": 0.5222, + "step": 30662 + }, + { + "epoch": 0.8419275123558484, + "grad_norm": 0.38588955998420715, + "learning_rate": 1.2482213344433516e-05, + "loss": 0.4856, + "step": 30663 + }, + { + "epoch": 0.841954969796815, + "grad_norm": 0.40398669242858887, + "learning_rate": 1.2481794965529397e-05, + "loss": 0.5376, + "step": 30664 + }, + { + "epoch": 0.8419824272377814, + "grad_norm": 0.49422743916511536, + "learning_rate": 1.2481376581995941e-05, + "loss": 0.4352, + "step": 30665 + }, + { + "epoch": 0.8420098846787479, + "grad_norm": 0.39098626375198364, + "learning_rate": 1.2480958193833925e-05, + "loss": 0.4245, + "step": 30666 + }, + { + "epoch": 0.8420373421197145, + "grad_norm": 0.39878469705581665, + "learning_rate": 1.2480539801044129e-05, + "loss": 0.5179, + "step": 30667 + }, + { + "epoch": 0.8420647995606809, + "grad_norm": 0.41309472918510437, + "learning_rate": 1.2480121403627337e-05, + "loss": 0.5074, + "step": 30668 + }, + { + "epoch": 0.8420922570016475, + "grad_norm": 0.37900787591934204, + "learning_rate": 1.2479703001584324e-05, + "loss": 0.5158, + "step": 30669 + }, + { + "epoch": 0.8421197144426139, + "grad_norm": 0.38396382331848145, + "learning_rate": 1.2479284594915875e-05, + "loss": 0.456, + "step": 30670 + }, + { + "epoch": 0.8421471718835805, + "grad_norm": 0.406526654958725, + "learning_rate": 1.247886618362277e-05, + "loss": 0.4787, + "step": 30671 + }, + { + "epoch": 0.8421746293245469, + "grad_norm": 0.3931874632835388, + "learning_rate": 1.2478447767705785e-05, + "loss": 0.398, + "step": 30672 + }, + { + "epoch": 0.8422020867655134, + "grad_norm": 0.358964741230011, + "learning_rate": 1.2478029347165707e-05, + "loss": 0.4497, + "step": 30673 + }, + { + "epoch": 0.84222954420648, + "grad_norm": 0.4008294641971588, + "learning_rate": 1.2477610922003311e-05, + "loss": 0.5403, + "step": 30674 + }, + { + "epoch": 0.8422570016474464, + "grad_norm": 0.42626452445983887, + "learning_rate": 1.2477192492219383e-05, + "loss": 0.552, + "step": 30675 + }, + { + "epoch": 0.842284459088413, + "grad_norm": 0.37605512142181396, + "learning_rate": 1.2476774057814697e-05, + "loss": 0.472, + "step": 30676 + }, + { + "epoch": 0.8423119165293794, + "grad_norm": 0.35545578598976135, + "learning_rate": 1.2476355618790041e-05, + "loss": 0.4454, + "step": 30677 + }, + { + "epoch": 0.842339373970346, + "grad_norm": 0.44550999999046326, + "learning_rate": 1.247593717514619e-05, + "loss": 0.4729, + "step": 30678 + }, + { + "epoch": 0.8423668314113124, + "grad_norm": 0.3676581382751465, + "learning_rate": 1.2475518726883924e-05, + "loss": 0.4004, + "step": 30679 + }, + { + "epoch": 0.842394288852279, + "grad_norm": 0.3795294761657715, + "learning_rate": 1.2475100274004027e-05, + "loss": 0.4968, + "step": 30680 + }, + { + "epoch": 0.8424217462932455, + "grad_norm": 0.36627352237701416, + "learning_rate": 1.2474681816507275e-05, + "loss": 0.5477, + "step": 30681 + }, + { + "epoch": 0.842449203734212, + "grad_norm": 0.37374192476272583, + "learning_rate": 1.2474263354394456e-05, + "loss": 0.4772, + "step": 30682 + }, + { + "epoch": 0.8424766611751785, + "grad_norm": 0.42114371061325073, + "learning_rate": 1.2473844887666344e-05, + "loss": 0.5782, + "step": 30683 + }, + { + "epoch": 0.8425041186161449, + "grad_norm": 0.4570796489715576, + "learning_rate": 1.2473426416323722e-05, + "loss": 0.4974, + "step": 30684 + }, + { + "epoch": 0.8425315760571115, + "grad_norm": 0.396892249584198, + "learning_rate": 1.247300794036737e-05, + "loss": 0.5301, + "step": 30685 + }, + { + "epoch": 0.8425590334980779, + "grad_norm": 0.3989466428756714, + "learning_rate": 1.2472589459798067e-05, + "loss": 0.4949, + "step": 30686 + }, + { + "epoch": 0.8425864909390445, + "grad_norm": 0.3614695370197296, + "learning_rate": 1.2472170974616599e-05, + "loss": 0.4357, + "step": 30687 + }, + { + "epoch": 0.842613948380011, + "grad_norm": 0.4259668290615082, + "learning_rate": 1.2471752484823743e-05, + "loss": 0.5044, + "step": 30688 + }, + { + "epoch": 0.8426414058209775, + "grad_norm": 0.4482545852661133, + "learning_rate": 1.2471333990420278e-05, + "loss": 0.5364, + "step": 30689 + }, + { + "epoch": 0.842668863261944, + "grad_norm": 0.4045089781284332, + "learning_rate": 1.2470915491406988e-05, + "loss": 0.5442, + "step": 30690 + }, + { + "epoch": 0.8426963207029105, + "grad_norm": 0.40730026364326477, + "learning_rate": 1.2470496987784649e-05, + "loss": 0.4646, + "step": 30691 + }, + { + "epoch": 0.842723778143877, + "grad_norm": 0.4704570174217224, + "learning_rate": 1.2470078479554047e-05, + "loss": 0.5033, + "step": 30692 + }, + { + "epoch": 0.8427512355848434, + "grad_norm": 0.39906933903694153, + "learning_rate": 1.246965996671596e-05, + "loss": 0.47, + "step": 30693 + }, + { + "epoch": 0.84277869302581, + "grad_norm": 0.3706224262714386, + "learning_rate": 1.2469241449271167e-05, + "loss": 0.559, + "step": 30694 + }, + { + "epoch": 0.8428061504667765, + "grad_norm": 0.36619266867637634, + "learning_rate": 1.2468822927220454e-05, + "loss": 0.4813, + "step": 30695 + }, + { + "epoch": 0.842833607907743, + "grad_norm": 0.3440684676170349, + "learning_rate": 1.2468404400564595e-05, + "loss": 0.4262, + "step": 30696 + }, + { + "epoch": 0.8428610653487095, + "grad_norm": 0.3524691164493561, + "learning_rate": 1.2467985869304378e-05, + "loss": 0.4951, + "step": 30697 + }, + { + "epoch": 0.842888522789676, + "grad_norm": 0.40912672877311707, + "learning_rate": 1.2467567333440576e-05, + "loss": 0.4454, + "step": 30698 + }, + { + "epoch": 0.8429159802306425, + "grad_norm": 0.37947049736976624, + "learning_rate": 1.2467148792973974e-05, + "loss": 0.5128, + "step": 30699 + }, + { + "epoch": 0.842943437671609, + "grad_norm": 0.37977778911590576, + "learning_rate": 1.2466730247905353e-05, + "loss": 0.4664, + "step": 30700 + }, + { + "epoch": 0.8429708951125755, + "grad_norm": 0.46851998567581177, + "learning_rate": 1.246631169823549e-05, + "loss": 0.5384, + "step": 30701 + }, + { + "epoch": 0.842998352553542, + "grad_norm": 0.37341561913490295, + "learning_rate": 1.2465893143965173e-05, + "loss": 0.5048, + "step": 30702 + }, + { + "epoch": 0.8430258099945085, + "grad_norm": 0.37704819440841675, + "learning_rate": 1.2465474585095176e-05, + "loss": 0.4334, + "step": 30703 + }, + { + "epoch": 0.843053267435475, + "grad_norm": 0.4067319631576538, + "learning_rate": 1.2465056021626283e-05, + "loss": 0.5662, + "step": 30704 + }, + { + "epoch": 0.8430807248764415, + "grad_norm": 0.3846971392631531, + "learning_rate": 1.2464637453559274e-05, + "loss": 0.478, + "step": 30705 + }, + { + "epoch": 0.843108182317408, + "grad_norm": 0.3550218343734741, + "learning_rate": 1.2464218880894927e-05, + "loss": 0.4576, + "step": 30706 + }, + { + "epoch": 0.8431356397583745, + "grad_norm": 0.36386606097221375, + "learning_rate": 1.2463800303634028e-05, + "loss": 0.4986, + "step": 30707 + }, + { + "epoch": 0.843163097199341, + "grad_norm": 0.40859976410865784, + "learning_rate": 1.2463381721777352e-05, + "loss": 0.5197, + "step": 30708 + }, + { + "epoch": 0.8431905546403076, + "grad_norm": 0.37488341331481934, + "learning_rate": 1.2462963135325685e-05, + "loss": 0.4141, + "step": 30709 + }, + { + "epoch": 0.843218012081274, + "grad_norm": 0.39592963457107544, + "learning_rate": 1.2462544544279806e-05, + "loss": 0.4491, + "step": 30710 + }, + { + "epoch": 0.8432454695222406, + "grad_norm": 0.38112515211105347, + "learning_rate": 1.2462125948640493e-05, + "loss": 0.463, + "step": 30711 + }, + { + "epoch": 0.843272926963207, + "grad_norm": 0.39374786615371704, + "learning_rate": 1.2461707348408535e-05, + "loss": 0.4699, + "step": 30712 + }, + { + "epoch": 0.8433003844041735, + "grad_norm": 0.3705957531929016, + "learning_rate": 1.2461288743584701e-05, + "loss": 0.485, + "step": 30713 + }, + { + "epoch": 0.84332784184514, + "grad_norm": 0.3590146601200104, + "learning_rate": 1.246087013416978e-05, + "loss": 0.4148, + "step": 30714 + }, + { + "epoch": 0.8433552992861065, + "grad_norm": 0.40518423914909363, + "learning_rate": 1.2460451520164552e-05, + "loss": 0.465, + "step": 30715 + }, + { + "epoch": 0.8433827567270731, + "grad_norm": 0.3891465365886688, + "learning_rate": 1.2460032901569794e-05, + "loss": 0.4836, + "step": 30716 + }, + { + "epoch": 0.8434102141680395, + "grad_norm": 0.5013729333877563, + "learning_rate": 1.2459614278386292e-05, + "loss": 0.5064, + "step": 30717 + }, + { + "epoch": 0.8434376716090061, + "grad_norm": 0.35132691264152527, + "learning_rate": 1.2459195650614823e-05, + "loss": 0.5516, + "step": 30718 + }, + { + "epoch": 0.8434651290499725, + "grad_norm": 0.33527764678001404, + "learning_rate": 1.245877701825617e-05, + "loss": 0.5047, + "step": 30719 + }, + { + "epoch": 0.8434925864909391, + "grad_norm": 0.3903636038303375, + "learning_rate": 1.2458358381311113e-05, + "loss": 0.4574, + "step": 30720 + }, + { + "epoch": 0.8435200439319055, + "grad_norm": 0.3909883499145508, + "learning_rate": 1.245793973978043e-05, + "loss": 0.5346, + "step": 30721 + }, + { + "epoch": 0.843547501372872, + "grad_norm": 0.3726802468299866, + "learning_rate": 1.245752109366491e-05, + "loss": 0.5231, + "step": 30722 + }, + { + "epoch": 0.8435749588138386, + "grad_norm": 0.4226968288421631, + "learning_rate": 1.2457102442965328e-05, + "loss": 0.5354, + "step": 30723 + }, + { + "epoch": 0.843602416254805, + "grad_norm": 0.3827008306980133, + "learning_rate": 1.2456683787682464e-05, + "loss": 0.4928, + "step": 30724 + }, + { + "epoch": 0.8436298736957716, + "grad_norm": 0.3727739453315735, + "learning_rate": 1.2456265127817101e-05, + "loss": 0.3599, + "step": 30725 + }, + { + "epoch": 0.843657331136738, + "grad_norm": 0.38728219270706177, + "learning_rate": 1.2455846463370019e-05, + "loss": 0.3888, + "step": 30726 + }, + { + "epoch": 0.8436847885777046, + "grad_norm": 0.36932849884033203, + "learning_rate": 1.2455427794342e-05, + "loss": 0.5101, + "step": 30727 + }, + { + "epoch": 0.843712246018671, + "grad_norm": 0.3912103474140167, + "learning_rate": 1.2455009120733825e-05, + "loss": 0.5104, + "step": 30728 + }, + { + "epoch": 0.8437397034596376, + "grad_norm": 0.3767103850841522, + "learning_rate": 1.2454590442546272e-05, + "loss": 0.452, + "step": 30729 + }, + { + "epoch": 0.8437671609006041, + "grad_norm": 0.42547744512557983, + "learning_rate": 1.245417175978013e-05, + "loss": 0.456, + "step": 30730 + }, + { + "epoch": 0.8437946183415705, + "grad_norm": 0.3675438463687897, + "learning_rate": 1.2453753072436168e-05, + "loss": 0.4552, + "step": 30731 + }, + { + "epoch": 0.8438220757825371, + "grad_norm": 0.38660386204719543, + "learning_rate": 1.2453334380515177e-05, + "loss": 0.4823, + "step": 30732 + }, + { + "epoch": 0.8438495332235035, + "grad_norm": 0.39893409609794617, + "learning_rate": 1.2452915684017935e-05, + "loss": 0.5238, + "step": 30733 + }, + { + "epoch": 0.8438769906644701, + "grad_norm": 0.38012954592704773, + "learning_rate": 1.2452496982945219e-05, + "loss": 0.4393, + "step": 30734 + }, + { + "epoch": 0.8439044481054365, + "grad_norm": 0.4679085910320282, + "learning_rate": 1.2452078277297816e-05, + "loss": 0.5604, + "step": 30735 + }, + { + "epoch": 0.8439319055464031, + "grad_norm": 0.39917293190956116, + "learning_rate": 1.2451659567076504e-05, + "loss": 0.4299, + "step": 30736 + }, + { + "epoch": 0.8439593629873696, + "grad_norm": 0.3971346616744995, + "learning_rate": 1.2451240852282063e-05, + "loss": 0.4445, + "step": 30737 + }, + { + "epoch": 0.8439868204283361, + "grad_norm": 0.36895322799682617, + "learning_rate": 1.2450822132915276e-05, + "loss": 0.4696, + "step": 30738 + }, + { + "epoch": 0.8440142778693026, + "grad_norm": 0.36785027384757996, + "learning_rate": 1.2450403408976923e-05, + "loss": 0.4995, + "step": 30739 + }, + { + "epoch": 0.844041735310269, + "grad_norm": 0.3407529890537262, + "learning_rate": 1.2449984680467788e-05, + "loss": 0.4809, + "step": 30740 + }, + { + "epoch": 0.8440691927512356, + "grad_norm": 0.41946470737457275, + "learning_rate": 1.2449565947388651e-05, + "loss": 0.5164, + "step": 30741 + }, + { + "epoch": 0.844096650192202, + "grad_norm": 0.5481218695640564, + "learning_rate": 1.2449147209740287e-05, + "loss": 0.3922, + "step": 30742 + }, + { + "epoch": 0.8441241076331686, + "grad_norm": 0.42028671503067017, + "learning_rate": 1.2448728467523482e-05, + "loss": 0.5084, + "step": 30743 + }, + { + "epoch": 0.8441515650741351, + "grad_norm": 0.46516045928001404, + "learning_rate": 1.244830972073902e-05, + "loss": 0.55, + "step": 30744 + }, + { + "epoch": 0.8441790225151016, + "grad_norm": 0.4107125401496887, + "learning_rate": 1.2447890969387677e-05, + "loss": 0.4555, + "step": 30745 + }, + { + "epoch": 0.8442064799560681, + "grad_norm": 0.44805437326431274, + "learning_rate": 1.2447472213470237e-05, + "loss": 0.5526, + "step": 30746 + }, + { + "epoch": 0.8442339373970346, + "grad_norm": 0.3675466477870941, + "learning_rate": 1.244705345298748e-05, + "loss": 0.5257, + "step": 30747 + }, + { + "epoch": 0.8442613948380011, + "grad_norm": 0.4100622236728668, + "learning_rate": 1.2446634687940188e-05, + "loss": 0.4795, + "step": 30748 + }, + { + "epoch": 0.8442888522789675, + "grad_norm": 0.5004065632820129, + "learning_rate": 1.2446215918329139e-05, + "loss": 0.5186, + "step": 30749 + }, + { + "epoch": 0.8443163097199341, + "grad_norm": 0.3515418469905853, + "learning_rate": 1.2445797144155118e-05, + "loss": 0.428, + "step": 30750 + }, + { + "epoch": 0.8443437671609006, + "grad_norm": 0.3505128026008606, + "learning_rate": 1.2445378365418908e-05, + "loss": 0.5068, + "step": 30751 + }, + { + "epoch": 0.8443712246018671, + "grad_norm": 0.3595731556415558, + "learning_rate": 1.244495958212128e-05, + "loss": 0.4357, + "step": 30752 + }, + { + "epoch": 0.8443986820428336, + "grad_norm": 0.40208426117897034, + "learning_rate": 1.2444540794263027e-05, + "loss": 0.4948, + "step": 30753 + }, + { + "epoch": 0.8444261394838001, + "grad_norm": 0.41855236887931824, + "learning_rate": 1.2444122001844924e-05, + "loss": 0.514, + "step": 30754 + }, + { + "epoch": 0.8444535969247666, + "grad_norm": 0.4175373613834381, + "learning_rate": 1.2443703204867755e-05, + "loss": 0.5036, + "step": 30755 + }, + { + "epoch": 0.8444810543657331, + "grad_norm": 0.5334622263908386, + "learning_rate": 1.2443284403332296e-05, + "loss": 0.47, + "step": 30756 + }, + { + "epoch": 0.8445085118066996, + "grad_norm": 0.43275851011276245, + "learning_rate": 1.2442865597239333e-05, + "loss": 0.5941, + "step": 30757 + }, + { + "epoch": 0.8445359692476662, + "grad_norm": 0.39823341369628906, + "learning_rate": 1.2442446786589647e-05, + "loss": 0.586, + "step": 30758 + }, + { + "epoch": 0.8445634266886326, + "grad_norm": 0.3945773243904114, + "learning_rate": 1.244202797138402e-05, + "loss": 0.4616, + "step": 30759 + }, + { + "epoch": 0.8445908841295992, + "grad_norm": 0.4193163514137268, + "learning_rate": 1.2441609151623228e-05, + "loss": 0.4916, + "step": 30760 + }, + { + "epoch": 0.8446183415705656, + "grad_norm": 0.42948928475379944, + "learning_rate": 1.2441190327308057e-05, + "loss": 0.472, + "step": 30761 + }, + { + "epoch": 0.8446457990115321, + "grad_norm": 0.4326108396053314, + "learning_rate": 1.2440771498439286e-05, + "loss": 0.5863, + "step": 30762 + }, + { + "epoch": 0.8446732564524986, + "grad_norm": 0.4393850862979889, + "learning_rate": 1.2440352665017699e-05, + "loss": 0.4993, + "step": 30763 + }, + { + "epoch": 0.8447007138934651, + "grad_norm": 0.3841709494590759, + "learning_rate": 1.2439933827044075e-05, + "loss": 0.5338, + "step": 30764 + }, + { + "epoch": 0.8447281713344317, + "grad_norm": 0.3892265558242798, + "learning_rate": 1.2439514984519195e-05, + "loss": 0.4557, + "step": 30765 + }, + { + "epoch": 0.8447556287753981, + "grad_norm": 0.38360217213630676, + "learning_rate": 1.2439096137443842e-05, + "loss": 0.5009, + "step": 30766 + }, + { + "epoch": 0.8447830862163647, + "grad_norm": 0.3927462100982666, + "learning_rate": 1.2438677285818797e-05, + "loss": 0.4532, + "step": 30767 + }, + { + "epoch": 0.8448105436573311, + "grad_norm": 0.38551098108291626, + "learning_rate": 1.2438258429644836e-05, + "loss": 0.4821, + "step": 30768 + }, + { + "epoch": 0.8448380010982977, + "grad_norm": 0.37478873133659363, + "learning_rate": 1.243783956892275e-05, + "loss": 0.5584, + "step": 30769 + }, + { + "epoch": 0.8448654585392641, + "grad_norm": 0.37800586223602295, + "learning_rate": 1.2437420703653309e-05, + "loss": 0.4341, + "step": 30770 + }, + { + "epoch": 0.8448929159802306, + "grad_norm": 0.3979164659976959, + "learning_rate": 1.2437001833837306e-05, + "loss": 0.4469, + "step": 30771 + }, + { + "epoch": 0.8449203734211972, + "grad_norm": 0.3658542037010193, + "learning_rate": 1.2436582959475517e-05, + "loss": 0.4918, + "step": 30772 + }, + { + "epoch": 0.8449478308621636, + "grad_norm": 0.34203478693962097, + "learning_rate": 1.2436164080568718e-05, + "loss": 0.482, + "step": 30773 + }, + { + "epoch": 0.8449752883031302, + "grad_norm": 0.3595828711986542, + "learning_rate": 1.24357451971177e-05, + "loss": 0.529, + "step": 30774 + }, + { + "epoch": 0.8450027457440966, + "grad_norm": 0.4439932703971863, + "learning_rate": 1.2435326309123236e-05, + "loss": 0.4997, + "step": 30775 + }, + { + "epoch": 0.8450302031850632, + "grad_norm": 0.3953765034675598, + "learning_rate": 1.2434907416586115e-05, + "loss": 0.4871, + "step": 30776 + }, + { + "epoch": 0.8450576606260296, + "grad_norm": 0.38154059648513794, + "learning_rate": 1.2434488519507116e-05, + "loss": 0.4352, + "step": 30777 + }, + { + "epoch": 0.8450851180669962, + "grad_norm": 0.3829690217971802, + "learning_rate": 1.2434069617887015e-05, + "loss": 0.4326, + "step": 30778 + }, + { + "epoch": 0.8451125755079627, + "grad_norm": 0.391422301530838, + "learning_rate": 1.2433650711726597e-05, + "loss": 0.5101, + "step": 30779 + }, + { + "epoch": 0.8451400329489291, + "grad_norm": 0.3915350139141083, + "learning_rate": 1.2433231801026644e-05, + "loss": 0.513, + "step": 30780 + }, + { + "epoch": 0.8451674903898957, + "grad_norm": 0.36687204241752625, + "learning_rate": 1.2432812885787939e-05, + "loss": 0.4471, + "step": 30781 + }, + { + "epoch": 0.8451949478308621, + "grad_norm": 0.386595219373703, + "learning_rate": 1.2432393966011263e-05, + "loss": 0.4606, + "step": 30782 + }, + { + "epoch": 0.8452224052718287, + "grad_norm": 0.35161641240119934, + "learning_rate": 1.2431975041697391e-05, + "loss": 0.5018, + "step": 30783 + }, + { + "epoch": 0.8452498627127951, + "grad_norm": 0.3611421585083008, + "learning_rate": 1.2431556112847112e-05, + "loss": 0.4771, + "step": 30784 + }, + { + "epoch": 0.8452773201537617, + "grad_norm": 0.41973403096199036, + "learning_rate": 1.2431137179461206e-05, + "loss": 0.4916, + "step": 30785 + }, + { + "epoch": 0.8453047775947282, + "grad_norm": 0.40470975637435913, + "learning_rate": 1.2430718241540451e-05, + "loss": 0.4759, + "step": 30786 + }, + { + "epoch": 0.8453322350356947, + "grad_norm": 0.4087924063205719, + "learning_rate": 1.2430299299085634e-05, + "loss": 0.5453, + "step": 30787 + }, + { + "epoch": 0.8453596924766612, + "grad_norm": 0.4077273905277252, + "learning_rate": 1.2429880352097528e-05, + "loss": 0.5106, + "step": 30788 + }, + { + "epoch": 0.8453871499176276, + "grad_norm": 0.38181692361831665, + "learning_rate": 1.2429461400576923e-05, + "loss": 0.4189, + "step": 30789 + }, + { + "epoch": 0.8454146073585942, + "grad_norm": 0.3874835968017578, + "learning_rate": 1.2429042444524597e-05, + "loss": 0.3997, + "step": 30790 + }, + { + "epoch": 0.8454420647995606, + "grad_norm": 0.38676074147224426, + "learning_rate": 1.2428623483941329e-05, + "loss": 0.4773, + "step": 30791 + }, + { + "epoch": 0.8454695222405272, + "grad_norm": 0.4124961495399475, + "learning_rate": 1.2428204518827907e-05, + "loss": 0.526, + "step": 30792 + }, + { + "epoch": 0.8454969796814937, + "grad_norm": 0.38988107442855835, + "learning_rate": 1.2427785549185104e-05, + "loss": 0.4775, + "step": 30793 + }, + { + "epoch": 0.8455244371224602, + "grad_norm": 0.39029398560523987, + "learning_rate": 1.2427366575013711e-05, + "loss": 0.5259, + "step": 30794 + }, + { + "epoch": 0.8455518945634267, + "grad_norm": 0.39702367782592773, + "learning_rate": 1.2426947596314503e-05, + "loss": 0.5327, + "step": 30795 + }, + { + "epoch": 0.8455793520043932, + "grad_norm": 0.3583374321460724, + "learning_rate": 1.2426528613088262e-05, + "loss": 0.4801, + "step": 30796 + }, + { + "epoch": 0.8456068094453597, + "grad_norm": 0.3858087658882141, + "learning_rate": 1.2426109625335772e-05, + "loss": 0.5218, + "step": 30797 + }, + { + "epoch": 0.8456342668863261, + "grad_norm": 0.38249272108078003, + "learning_rate": 1.2425690633057811e-05, + "loss": 0.3806, + "step": 30798 + }, + { + "epoch": 0.8456617243272927, + "grad_norm": 0.3798992931842804, + "learning_rate": 1.2425271636255165e-05, + "loss": 0.4069, + "step": 30799 + }, + { + "epoch": 0.8456891817682592, + "grad_norm": 0.3444012403488159, + "learning_rate": 1.2424852634928612e-05, + "loss": 0.5043, + "step": 30800 + }, + { + "epoch": 0.8457166392092257, + "grad_norm": 0.4105103313922882, + "learning_rate": 1.2424433629078937e-05, + "loss": 0.5296, + "step": 30801 + }, + { + "epoch": 0.8457440966501922, + "grad_norm": 0.3665727376937866, + "learning_rate": 1.2424014618706917e-05, + "loss": 0.4582, + "step": 30802 + }, + { + "epoch": 0.8457715540911587, + "grad_norm": 0.3720276653766632, + "learning_rate": 1.2423595603813339e-05, + "loss": 0.4677, + "step": 30803 + }, + { + "epoch": 0.8457990115321252, + "grad_norm": 0.47655755281448364, + "learning_rate": 1.2423176584398977e-05, + "loss": 0.5319, + "step": 30804 + }, + { + "epoch": 0.8458264689730917, + "grad_norm": 0.3740408420562744, + "learning_rate": 1.242275756046462e-05, + "loss": 0.4999, + "step": 30805 + }, + { + "epoch": 0.8458539264140582, + "grad_norm": 0.388681024312973, + "learning_rate": 1.2422338532011047e-05, + "loss": 0.4833, + "step": 30806 + }, + { + "epoch": 0.8458813838550248, + "grad_norm": 0.38259458541870117, + "learning_rate": 1.242191949903904e-05, + "loss": 0.4691, + "step": 30807 + }, + { + "epoch": 0.8459088412959912, + "grad_norm": 0.3873147666454315, + "learning_rate": 1.242150046154938e-05, + "loss": 0.5252, + "step": 30808 + }, + { + "epoch": 0.8459362987369577, + "grad_norm": 0.38362711668014526, + "learning_rate": 1.2421081419542846e-05, + "loss": 0.4584, + "step": 30809 + }, + { + "epoch": 0.8459637561779242, + "grad_norm": 0.37525492906570435, + "learning_rate": 1.2420662373020227e-05, + "loss": 0.5049, + "step": 30810 + }, + { + "epoch": 0.8459912136188907, + "grad_norm": 0.5058762431144714, + "learning_rate": 1.2420243321982297e-05, + "loss": 0.5475, + "step": 30811 + }, + { + "epoch": 0.8460186710598572, + "grad_norm": 0.41086187958717346, + "learning_rate": 1.2419824266429843e-05, + "loss": 0.4821, + "step": 30812 + }, + { + "epoch": 0.8460461285008237, + "grad_norm": 0.6994169354438782, + "learning_rate": 1.2419405206363644e-05, + "loss": 0.4854, + "step": 30813 + }, + { + "epoch": 0.8460735859417903, + "grad_norm": 0.34632453322410583, + "learning_rate": 1.241898614178448e-05, + "loss": 0.4454, + "step": 30814 + }, + { + "epoch": 0.8461010433827567, + "grad_norm": 0.4212203621864319, + "learning_rate": 1.2418567072693135e-05, + "loss": 0.5141, + "step": 30815 + }, + { + "epoch": 0.8461285008237233, + "grad_norm": 0.3971915543079376, + "learning_rate": 1.2418147999090394e-05, + "loss": 0.5662, + "step": 30816 + }, + { + "epoch": 0.8461559582646897, + "grad_norm": 0.35159993171691895, + "learning_rate": 1.2417728920977031e-05, + "loss": 0.383, + "step": 30817 + }, + { + "epoch": 0.8461834157056562, + "grad_norm": 0.3830012381076813, + "learning_rate": 1.2417309838353833e-05, + "loss": 0.481, + "step": 30818 + }, + { + "epoch": 0.8462108731466227, + "grad_norm": 0.37009960412979126, + "learning_rate": 1.241689075122158e-05, + "loss": 0.4548, + "step": 30819 + }, + { + "epoch": 0.8462383305875892, + "grad_norm": 0.40312692523002625, + "learning_rate": 1.2416471659581057e-05, + "loss": 0.4882, + "step": 30820 + }, + { + "epoch": 0.8462657880285558, + "grad_norm": 0.37304168939590454, + "learning_rate": 1.2416052563433043e-05, + "loss": 0.5175, + "step": 30821 + }, + { + "epoch": 0.8462932454695222, + "grad_norm": 0.4129174053668976, + "learning_rate": 1.2415633462778316e-05, + "loss": 0.5196, + "step": 30822 + }, + { + "epoch": 0.8463207029104888, + "grad_norm": 0.3736322522163391, + "learning_rate": 1.2415214357617664e-05, + "loss": 0.488, + "step": 30823 + }, + { + "epoch": 0.8463481603514552, + "grad_norm": 0.37699583172798157, + "learning_rate": 1.2414795247951868e-05, + "loss": 0.5442, + "step": 30824 + }, + { + "epoch": 0.8463756177924218, + "grad_norm": 0.41436439752578735, + "learning_rate": 1.2414376133781705e-05, + "loss": 0.4961, + "step": 30825 + }, + { + "epoch": 0.8464030752333882, + "grad_norm": 0.3825435936450958, + "learning_rate": 1.2413957015107962e-05, + "loss": 0.4185, + "step": 30826 + }, + { + "epoch": 0.8464305326743548, + "grad_norm": 0.4149620831012726, + "learning_rate": 1.2413537891931418e-05, + "loss": 0.5455, + "step": 30827 + }, + { + "epoch": 0.8464579901153213, + "grad_norm": 0.37978580594062805, + "learning_rate": 1.2413118764252857e-05, + "loss": 0.4612, + "step": 30828 + }, + { + "epoch": 0.8464854475562877, + "grad_norm": 0.3983061611652374, + "learning_rate": 1.241269963207306e-05, + "loss": 0.5241, + "step": 30829 + }, + { + "epoch": 0.8465129049972543, + "grad_norm": 0.42849668860435486, + "learning_rate": 1.2412280495392803e-05, + "loss": 0.4719, + "step": 30830 + }, + { + "epoch": 0.8465403624382207, + "grad_norm": 0.36405274271965027, + "learning_rate": 1.2411861354212877e-05, + "loss": 0.4634, + "step": 30831 + }, + { + "epoch": 0.8465678198791873, + "grad_norm": 0.3840957581996918, + "learning_rate": 1.241144220853406e-05, + "loss": 0.4906, + "step": 30832 + }, + { + "epoch": 0.8465952773201537, + "grad_norm": 0.5997002720832825, + "learning_rate": 1.2411023058357133e-05, + "loss": 0.6239, + "step": 30833 + }, + { + "epoch": 0.8466227347611203, + "grad_norm": 0.38323572278022766, + "learning_rate": 1.2410603903682878e-05, + "loss": 0.4215, + "step": 30834 + }, + { + "epoch": 0.8466501922020868, + "grad_norm": 0.39844903349876404, + "learning_rate": 1.2410184744512077e-05, + "loss": 0.4611, + "step": 30835 + }, + { + "epoch": 0.8466776496430533, + "grad_norm": 0.38231945037841797, + "learning_rate": 1.2409765580845514e-05, + "loss": 0.4283, + "step": 30836 + }, + { + "epoch": 0.8467051070840198, + "grad_norm": 0.418429970741272, + "learning_rate": 1.2409346412683965e-05, + "loss": 0.5155, + "step": 30837 + }, + { + "epoch": 0.8467325645249862, + "grad_norm": 0.4392671585083008, + "learning_rate": 1.2408927240028222e-05, + "loss": 0.4762, + "step": 30838 + }, + { + "epoch": 0.8467600219659528, + "grad_norm": 0.3618520200252533, + "learning_rate": 1.2408508062879057e-05, + "loss": 0.4548, + "step": 30839 + }, + { + "epoch": 0.8467874794069192, + "grad_norm": 0.3948478400707245, + "learning_rate": 1.2408088881237255e-05, + "loss": 0.5264, + "step": 30840 + }, + { + "epoch": 0.8468149368478858, + "grad_norm": 0.4691292941570282, + "learning_rate": 1.2407669695103602e-05, + "loss": 0.475, + "step": 30841 + }, + { + "epoch": 0.8468423942888523, + "grad_norm": 0.4491787254810333, + "learning_rate": 1.2407250504478872e-05, + "loss": 0.4889, + "step": 30842 + }, + { + "epoch": 0.8468698517298188, + "grad_norm": 0.381631463766098, + "learning_rate": 1.2406831309363855e-05, + "loss": 0.5406, + "step": 30843 + }, + { + "epoch": 0.8468973091707853, + "grad_norm": 0.38228392601013184, + "learning_rate": 1.2406412109759329e-05, + "loss": 0.4461, + "step": 30844 + }, + { + "epoch": 0.8469247666117518, + "grad_norm": 0.5079454183578491, + "learning_rate": 1.2405992905666075e-05, + "loss": 0.538, + "step": 30845 + }, + { + "epoch": 0.8469522240527183, + "grad_norm": 0.35262101888656616, + "learning_rate": 1.2405573697084878e-05, + "loss": 0.4505, + "step": 30846 + }, + { + "epoch": 0.8469796814936847, + "grad_norm": 0.4142390787601471, + "learning_rate": 1.2405154484016519e-05, + "loss": 0.4849, + "step": 30847 + }, + { + "epoch": 0.8470071389346513, + "grad_norm": 0.45737743377685547, + "learning_rate": 1.2404735266461777e-05, + "loss": 0.5125, + "step": 30848 + }, + { + "epoch": 0.8470345963756178, + "grad_norm": 0.3770991265773773, + "learning_rate": 1.2404316044421436e-05, + "loss": 0.5227, + "step": 30849 + }, + { + "epoch": 0.8470620538165843, + "grad_norm": 0.3815614879131317, + "learning_rate": 1.240389681789628e-05, + "loss": 0.4289, + "step": 30850 + }, + { + "epoch": 0.8470895112575508, + "grad_norm": 0.36415335536003113, + "learning_rate": 1.2403477586887087e-05, + "loss": 0.4875, + "step": 30851 + }, + { + "epoch": 0.8471169686985173, + "grad_norm": 0.38489317893981934, + "learning_rate": 1.2403058351394645e-05, + "loss": 0.435, + "step": 30852 + }, + { + "epoch": 0.8471444261394838, + "grad_norm": 0.3770506978034973, + "learning_rate": 1.2402639111419727e-05, + "loss": 0.4995, + "step": 30853 + }, + { + "epoch": 0.8471718835804503, + "grad_norm": 0.38738927245140076, + "learning_rate": 1.2402219866963126e-05, + "loss": 0.4179, + "step": 30854 + }, + { + "epoch": 0.8471993410214168, + "grad_norm": 0.5262322425842285, + "learning_rate": 1.2401800618025612e-05, + "loss": 0.542, + "step": 30855 + }, + { + "epoch": 0.8472267984623834, + "grad_norm": 0.4075547456741333, + "learning_rate": 1.240138136460798e-05, + "loss": 0.4665, + "step": 30856 + }, + { + "epoch": 0.8472542559033498, + "grad_norm": 0.37239980697631836, + "learning_rate": 1.2400962106711e-05, + "loss": 0.468, + "step": 30857 + }, + { + "epoch": 0.8472817133443163, + "grad_norm": 0.4326724112033844, + "learning_rate": 1.2400542844335462e-05, + "loss": 0.4747, + "step": 30858 + }, + { + "epoch": 0.8473091707852828, + "grad_norm": 0.33898380398750305, + "learning_rate": 1.2400123577482147e-05, + "loss": 0.4623, + "step": 30859 + }, + { + "epoch": 0.8473366282262493, + "grad_norm": 0.3797750174999237, + "learning_rate": 1.239970430615183e-05, + "loss": 0.4951, + "step": 30860 + }, + { + "epoch": 0.8473640856672158, + "grad_norm": 0.3849031329154968, + "learning_rate": 1.2399285030345303e-05, + "loss": 0.5249, + "step": 30861 + }, + { + "epoch": 0.8473915431081823, + "grad_norm": 0.37914708256721497, + "learning_rate": 1.239886575006334e-05, + "loss": 0.5503, + "step": 30862 + }, + { + "epoch": 0.8474190005491489, + "grad_norm": 0.4071941673755646, + "learning_rate": 1.239844646530673e-05, + "loss": 0.4755, + "step": 30863 + }, + { + "epoch": 0.8474464579901153, + "grad_norm": 0.33149468898773193, + "learning_rate": 1.2398027176076253e-05, + "loss": 0.4209, + "step": 30864 + }, + { + "epoch": 0.8474739154310819, + "grad_norm": 0.4663156270980835, + "learning_rate": 1.2397607882372687e-05, + "loss": 0.5048, + "step": 30865 + }, + { + "epoch": 0.8475013728720483, + "grad_norm": 0.5159270167350769, + "learning_rate": 1.2397188584196817e-05, + "loss": 0.4589, + "step": 30866 + }, + { + "epoch": 0.8475288303130148, + "grad_norm": 0.4392662048339844, + "learning_rate": 1.2396769281549425e-05, + "loss": 0.5072, + "step": 30867 + }, + { + "epoch": 0.8475562877539813, + "grad_norm": 0.4814586341381073, + "learning_rate": 1.2396349974431296e-05, + "loss": 0.5035, + "step": 30868 + }, + { + "epoch": 0.8475837451949478, + "grad_norm": 0.4415634274482727, + "learning_rate": 1.239593066284321e-05, + "loss": 0.456, + "step": 30869 + }, + { + "epoch": 0.8476112026359144, + "grad_norm": 0.3966514468193054, + "learning_rate": 1.2395511346785945e-05, + "loss": 0.435, + "step": 30870 + }, + { + "epoch": 0.8476386600768808, + "grad_norm": 0.44009122252464294, + "learning_rate": 1.2395092026260289e-05, + "loss": 0.5077, + "step": 30871 + }, + { + "epoch": 0.8476661175178474, + "grad_norm": 0.41188380122184753, + "learning_rate": 1.239467270126702e-05, + "loss": 0.5306, + "step": 30872 + }, + { + "epoch": 0.8476935749588138, + "grad_norm": 0.3930886685848236, + "learning_rate": 1.2394253371806926e-05, + "loss": 0.407, + "step": 30873 + }, + { + "epoch": 0.8477210323997804, + "grad_norm": 0.399292916059494, + "learning_rate": 1.2393834037880782e-05, + "loss": 0.4929, + "step": 30874 + }, + { + "epoch": 0.8477484898407468, + "grad_norm": 0.4002346396446228, + "learning_rate": 1.2393414699489375e-05, + "loss": 0.5568, + "step": 30875 + }, + { + "epoch": 0.8477759472817133, + "grad_norm": 0.33578795194625854, + "learning_rate": 1.2392995356633484e-05, + "loss": 0.4619, + "step": 30876 + }, + { + "epoch": 0.8478034047226799, + "grad_norm": 0.44061189889907837, + "learning_rate": 1.2392576009313893e-05, + "loss": 0.51, + "step": 30877 + }, + { + "epoch": 0.8478308621636463, + "grad_norm": 0.33755648136138916, + "learning_rate": 1.2392156657531386e-05, + "loss": 0.513, + "step": 30878 + }, + { + "epoch": 0.8478583196046129, + "grad_norm": 0.4249807894229889, + "learning_rate": 1.2391737301286741e-05, + "loss": 0.5005, + "step": 30879 + }, + { + "epoch": 0.8478857770455793, + "grad_norm": 0.449179470539093, + "learning_rate": 1.2391317940580744e-05, + "loss": 0.5039, + "step": 30880 + }, + { + "epoch": 0.8479132344865459, + "grad_norm": 0.40607166290283203, + "learning_rate": 1.2390898575414179e-05, + "loss": 0.5206, + "step": 30881 + }, + { + "epoch": 0.8479406919275123, + "grad_norm": 0.41393545269966125, + "learning_rate": 1.239047920578782e-05, + "loss": 0.5334, + "step": 30882 + }, + { + "epoch": 0.8479681493684789, + "grad_norm": 0.38351187109947205, + "learning_rate": 1.2390059831702457e-05, + "loss": 0.4956, + "step": 30883 + }, + { + "epoch": 0.8479956068094454, + "grad_norm": 0.37643107771873474, + "learning_rate": 1.2389640453158866e-05, + "loss": 0.4676, + "step": 30884 + }, + { + "epoch": 0.8480230642504119, + "grad_norm": 0.4155269265174866, + "learning_rate": 1.2389221070157837e-05, + "loss": 0.4848, + "step": 30885 + }, + { + "epoch": 0.8480505216913784, + "grad_norm": 0.38235244154930115, + "learning_rate": 1.2388801682700146e-05, + "loss": 0.4545, + "step": 30886 + }, + { + "epoch": 0.8480779791323448, + "grad_norm": 0.4830605089664459, + "learning_rate": 1.2388382290786577e-05, + "loss": 0.582, + "step": 30887 + }, + { + "epoch": 0.8481054365733114, + "grad_norm": 0.3639731705188751, + "learning_rate": 1.2387962894417917e-05, + "loss": 0.4677, + "step": 30888 + }, + { + "epoch": 0.8481328940142778, + "grad_norm": 0.396697998046875, + "learning_rate": 1.238754349359494e-05, + "loss": 0.5334, + "step": 30889 + }, + { + "epoch": 0.8481603514552444, + "grad_norm": 0.4023316502571106, + "learning_rate": 1.2387124088318434e-05, + "loss": 0.4724, + "step": 30890 + }, + { + "epoch": 0.8481878088962109, + "grad_norm": 0.3582085072994232, + "learning_rate": 1.238670467858918e-05, + "loss": 0.5009, + "step": 30891 + }, + { + "epoch": 0.8482152663371774, + "grad_norm": 0.4006249010562897, + "learning_rate": 1.2386285264407958e-05, + "loss": 0.4893, + "step": 30892 + }, + { + "epoch": 0.8482427237781439, + "grad_norm": 0.7233282327651978, + "learning_rate": 1.2385865845775556e-05, + "loss": 0.5721, + "step": 30893 + }, + { + "epoch": 0.8482701812191104, + "grad_norm": 0.3633475601673126, + "learning_rate": 1.2385446422692749e-05, + "loss": 0.4744, + "step": 30894 + }, + { + "epoch": 0.8482976386600769, + "grad_norm": 0.4165462553501129, + "learning_rate": 1.2385026995160323e-05, + "loss": 0.5687, + "step": 30895 + }, + { + "epoch": 0.8483250961010433, + "grad_norm": 4.253535270690918, + "learning_rate": 1.2384607563179063e-05, + "loss": 0.5686, + "step": 30896 + }, + { + "epoch": 0.8483525535420099, + "grad_norm": 0.4614050090312958, + "learning_rate": 1.2384188126749746e-05, + "loss": 0.5211, + "step": 30897 + }, + { + "epoch": 0.8483800109829764, + "grad_norm": 0.3778660297393799, + "learning_rate": 1.2383768685873161e-05, + "loss": 0.4735, + "step": 30898 + }, + { + "epoch": 0.8484074684239429, + "grad_norm": 0.4002494812011719, + "learning_rate": 1.2383349240550082e-05, + "loss": 0.4337, + "step": 30899 + }, + { + "epoch": 0.8484349258649094, + "grad_norm": 0.3857932388782501, + "learning_rate": 1.23829297907813e-05, + "loss": 0.4564, + "step": 30900 + }, + { + "epoch": 0.8484623833058759, + "grad_norm": 0.41697341203689575, + "learning_rate": 1.2382510336567592e-05, + "loss": 0.5093, + "step": 30901 + }, + { + "epoch": 0.8484898407468424, + "grad_norm": 0.37241131067276, + "learning_rate": 1.238209087790974e-05, + "loss": 0.4346, + "step": 30902 + }, + { + "epoch": 0.8485172981878089, + "grad_norm": 0.39849504828453064, + "learning_rate": 1.2381671414808532e-05, + "loss": 0.5315, + "step": 30903 + }, + { + "epoch": 0.8485447556287754, + "grad_norm": 0.35603034496307373, + "learning_rate": 1.2381251947264743e-05, + "loss": 0.3911, + "step": 30904 + }, + { + "epoch": 0.848572213069742, + "grad_norm": 0.4314846992492676, + "learning_rate": 1.2380832475279161e-05, + "loss": 0.5573, + "step": 30905 + }, + { + "epoch": 0.8485996705107084, + "grad_norm": 0.39463120698928833, + "learning_rate": 1.2380412998852567e-05, + "loss": 0.5253, + "step": 30906 + }, + { + "epoch": 0.8486271279516749, + "grad_norm": 0.3880760371685028, + "learning_rate": 1.237999351798574e-05, + "loss": 0.5644, + "step": 30907 + }, + { + "epoch": 0.8486545853926414, + "grad_norm": 0.39166927337646484, + "learning_rate": 1.237957403267947e-05, + "loss": 0.4603, + "step": 30908 + }, + { + "epoch": 0.8486820428336079, + "grad_norm": 0.3631230294704437, + "learning_rate": 1.2379154542934534e-05, + "loss": 0.4736, + "step": 30909 + }, + { + "epoch": 0.8487095002745744, + "grad_norm": 0.36847206950187683, + "learning_rate": 1.2378735048751711e-05, + "loss": 0.4443, + "step": 30910 + }, + { + "epoch": 0.8487369577155409, + "grad_norm": 0.45778828859329224, + "learning_rate": 1.2378315550131793e-05, + "loss": 0.5183, + "step": 30911 + }, + { + "epoch": 0.8487644151565075, + "grad_norm": 0.36599844694137573, + "learning_rate": 1.2377896047075553e-05, + "loss": 0.4225, + "step": 30912 + }, + { + "epoch": 0.8487918725974739, + "grad_norm": 0.4267384707927704, + "learning_rate": 1.2377476539583781e-05, + "loss": 0.4645, + "step": 30913 + }, + { + "epoch": 0.8488193300384405, + "grad_norm": 0.404940664768219, + "learning_rate": 1.2377057027657258e-05, + "loss": 0.501, + "step": 30914 + }, + { + "epoch": 0.8488467874794069, + "grad_norm": 0.3359450697898865, + "learning_rate": 1.237663751129676e-05, + "loss": 0.455, + "step": 30915 + }, + { + "epoch": 0.8488742449203734, + "grad_norm": 0.36983630061149597, + "learning_rate": 1.237621799050308e-05, + "loss": 0.5332, + "step": 30916 + }, + { + "epoch": 0.8489017023613399, + "grad_norm": 0.39427778124809265, + "learning_rate": 1.2375798465276989e-05, + "loss": 0.4515, + "step": 30917 + }, + { + "epoch": 0.8489291598023064, + "grad_norm": 0.3734722137451172, + "learning_rate": 1.237537893561928e-05, + "loss": 0.4393, + "step": 30918 + }, + { + "epoch": 0.848956617243273, + "grad_norm": 0.4404390752315521, + "learning_rate": 1.2374959401530731e-05, + "loss": 0.5158, + "step": 30919 + }, + { + "epoch": 0.8489840746842394, + "grad_norm": 0.4286869168281555, + "learning_rate": 1.2374539863012122e-05, + "loss": 0.5243, + "step": 30920 + }, + { + "epoch": 0.849011532125206, + "grad_norm": 0.409033864736557, + "learning_rate": 1.2374120320064242e-05, + "loss": 0.4906, + "step": 30921 + }, + { + "epoch": 0.8490389895661724, + "grad_norm": 0.36536964774131775, + "learning_rate": 1.237370077268787e-05, + "loss": 0.4399, + "step": 30922 + }, + { + "epoch": 0.849066447007139, + "grad_norm": 0.3922259211540222, + "learning_rate": 1.2373281220883784e-05, + "loss": 0.5575, + "step": 30923 + }, + { + "epoch": 0.8490939044481054, + "grad_norm": 0.3407920300960541, + "learning_rate": 1.2372861664652774e-05, + "loss": 0.4524, + "step": 30924 + }, + { + "epoch": 0.849121361889072, + "grad_norm": 0.8646525144577026, + "learning_rate": 1.2372442103995617e-05, + "loss": 0.5817, + "step": 30925 + }, + { + "epoch": 0.8491488193300385, + "grad_norm": 0.4057696759700775, + "learning_rate": 1.2372022538913103e-05, + "loss": 0.4994, + "step": 30926 + }, + { + "epoch": 0.8491762767710049, + "grad_norm": 0.36860454082489014, + "learning_rate": 1.2371602969406007e-05, + "loss": 0.467, + "step": 30927 + }, + { + "epoch": 0.8492037342119715, + "grad_norm": 0.3949611783027649, + "learning_rate": 1.2371183395475112e-05, + "loss": 0.5215, + "step": 30928 + }, + { + "epoch": 0.8492311916529379, + "grad_norm": 0.3812295198440552, + "learning_rate": 1.2370763817121208e-05, + "loss": 0.5543, + "step": 30929 + }, + { + "epoch": 0.8492586490939045, + "grad_norm": 0.885351300239563, + "learning_rate": 1.2370344234345071e-05, + "loss": 0.5543, + "step": 30930 + }, + { + "epoch": 0.8492861065348709, + "grad_norm": 0.4170966148376465, + "learning_rate": 1.2369924647147484e-05, + "loss": 0.5309, + "step": 30931 + }, + { + "epoch": 0.8493135639758375, + "grad_norm": 0.4011058211326599, + "learning_rate": 1.2369505055529235e-05, + "loss": 0.4939, + "step": 30932 + }, + { + "epoch": 0.8493410214168039, + "grad_norm": 0.4200960397720337, + "learning_rate": 1.2369085459491098e-05, + "loss": 0.4295, + "step": 30933 + }, + { + "epoch": 0.8493684788577704, + "grad_norm": 0.3663303256034851, + "learning_rate": 1.2368665859033864e-05, + "loss": 0.4491, + "step": 30934 + }, + { + "epoch": 0.849395936298737, + "grad_norm": 0.3643839359283447, + "learning_rate": 1.236824625415831e-05, + "loss": 0.4733, + "step": 30935 + }, + { + "epoch": 0.8494233937397034, + "grad_norm": 0.43010213971138, + "learning_rate": 1.2367826644865222e-05, + "loss": 0.5642, + "step": 30936 + }, + { + "epoch": 0.84945085118067, + "grad_norm": 0.4022868573665619, + "learning_rate": 1.2367407031155383e-05, + "loss": 0.5508, + "step": 30937 + }, + { + "epoch": 0.8494783086216364, + "grad_norm": 0.36953771114349365, + "learning_rate": 1.236698741302957e-05, + "loss": 0.4676, + "step": 30938 + }, + { + "epoch": 0.849505766062603, + "grad_norm": 0.4101738929748535, + "learning_rate": 1.2366567790488574e-05, + "loss": 0.473, + "step": 30939 + }, + { + "epoch": 0.8495332235035694, + "grad_norm": 0.34641727805137634, + "learning_rate": 1.2366148163533173e-05, + "loss": 0.449, + "step": 30940 + }, + { + "epoch": 0.849560680944536, + "grad_norm": 0.42998266220092773, + "learning_rate": 1.2365728532164149e-05, + "loss": 0.4828, + "step": 30941 + }, + { + "epoch": 0.8495881383855025, + "grad_norm": 0.419980525970459, + "learning_rate": 1.2365308896382288e-05, + "loss": 0.5492, + "step": 30942 + }, + { + "epoch": 0.849615595826469, + "grad_norm": 0.359362930059433, + "learning_rate": 1.2364889256188368e-05, + "loss": 0.5441, + "step": 30943 + }, + { + "epoch": 0.8496430532674355, + "grad_norm": 0.4072943925857544, + "learning_rate": 1.2364469611583179e-05, + "loss": 0.4741, + "step": 30944 + }, + { + "epoch": 0.8496705107084019, + "grad_norm": 0.4366198480129242, + "learning_rate": 1.2364049962567499e-05, + "loss": 0.5613, + "step": 30945 + }, + { + "epoch": 0.8496979681493685, + "grad_norm": 0.3557508587837219, + "learning_rate": 1.2363630309142108e-05, + "loss": 0.4765, + "step": 30946 + }, + { + "epoch": 0.8497254255903349, + "grad_norm": 0.3666873872280121, + "learning_rate": 1.2363210651307795e-05, + "loss": 0.4756, + "step": 30947 + }, + { + "epoch": 0.8497528830313015, + "grad_norm": 0.4194634258747101, + "learning_rate": 1.2362790989065337e-05, + "loss": 0.5161, + "step": 30948 + }, + { + "epoch": 0.849780340472268, + "grad_norm": 0.3794061839580536, + "learning_rate": 1.2362371322415523e-05, + "loss": 0.4935, + "step": 30949 + }, + { + "epoch": 0.8498077979132345, + "grad_norm": 0.4171138405799866, + "learning_rate": 1.2361951651359131e-05, + "loss": 0.4718, + "step": 30950 + }, + { + "epoch": 0.849835255354201, + "grad_norm": 0.3794862627983093, + "learning_rate": 1.2361531975896943e-05, + "loss": 0.4612, + "step": 30951 + }, + { + "epoch": 0.8498627127951675, + "grad_norm": 0.36075684428215027, + "learning_rate": 1.2361112296029748e-05, + "loss": 0.4438, + "step": 30952 + }, + { + "epoch": 0.849890170236134, + "grad_norm": 0.36159682273864746, + "learning_rate": 1.2360692611758323e-05, + "loss": 0.4521, + "step": 30953 + }, + { + "epoch": 0.8499176276771004, + "grad_norm": 0.42345136404037476, + "learning_rate": 1.2360272923083454e-05, + "loss": 0.4993, + "step": 30954 + }, + { + "epoch": 0.849945085118067, + "grad_norm": 0.3904305398464203, + "learning_rate": 1.2359853230005923e-05, + "loss": 0.4906, + "step": 30955 + }, + { + "epoch": 0.8499725425590335, + "grad_norm": 0.4056706428527832, + "learning_rate": 1.235943353252651e-05, + "loss": 0.4522, + "step": 30956 + }, + { + "epoch": 0.85, + "grad_norm": 0.4858977794647217, + "learning_rate": 1.2359013830646003e-05, + "loss": 0.3685, + "step": 30957 + }, + { + "epoch": 0.8500274574409665, + "grad_norm": 0.42236262559890747, + "learning_rate": 1.2358594124365181e-05, + "loss": 0.5469, + "step": 30958 + }, + { + "epoch": 0.850054914881933, + "grad_norm": 0.39230114221572876, + "learning_rate": 1.2358174413684827e-05, + "loss": 0.5169, + "step": 30959 + }, + { + "epoch": 0.8500823723228995, + "grad_norm": 0.36642521619796753, + "learning_rate": 1.2357754698605729e-05, + "loss": 0.4472, + "step": 30960 + }, + { + "epoch": 0.850109829763866, + "grad_norm": 0.39375102519989014, + "learning_rate": 1.2357334979128661e-05, + "loss": 0.5151, + "step": 30961 + }, + { + "epoch": 0.8501372872048325, + "grad_norm": 0.3421141803264618, + "learning_rate": 1.2356915255254413e-05, + "loss": 0.4046, + "step": 30962 + }, + { + "epoch": 0.850164744645799, + "grad_norm": 0.38372719287872314, + "learning_rate": 1.2356495526983766e-05, + "loss": 0.4952, + "step": 30963 + }, + { + "epoch": 0.8501922020867655, + "grad_norm": 0.3562566637992859, + "learning_rate": 1.2356075794317503e-05, + "loss": 0.4393, + "step": 30964 + }, + { + "epoch": 0.850219659527732, + "grad_norm": 0.4984065592288971, + "learning_rate": 1.2355656057256409e-05, + "loss": 0.4912, + "step": 30965 + }, + { + "epoch": 0.8502471169686985, + "grad_norm": 0.4080946147441864, + "learning_rate": 1.235523631580126e-05, + "loss": 0.5069, + "step": 30966 + }, + { + "epoch": 0.850274574409665, + "grad_norm": 0.388995498418808, + "learning_rate": 1.2354816569952845e-05, + "loss": 0.4864, + "step": 30967 + }, + { + "epoch": 0.8503020318506315, + "grad_norm": 0.41058358550071716, + "learning_rate": 1.235439681971195e-05, + "loss": 0.4768, + "step": 30968 + }, + { + "epoch": 0.850329489291598, + "grad_norm": 0.42632609605789185, + "learning_rate": 1.2353977065079348e-05, + "loss": 0.4174, + "step": 30969 + }, + { + "epoch": 0.8503569467325646, + "grad_norm": 0.4027153253555298, + "learning_rate": 1.235355730605583e-05, + "loss": 0.519, + "step": 30970 + }, + { + "epoch": 0.850384404173531, + "grad_norm": 0.3705084025859833, + "learning_rate": 1.2353137542642176e-05, + "loss": 0.4759, + "step": 30971 + }, + { + "epoch": 0.8504118616144976, + "grad_norm": 0.39150431752204895, + "learning_rate": 1.2352717774839166e-05, + "loss": 0.5855, + "step": 30972 + }, + { + "epoch": 0.850439319055464, + "grad_norm": 0.38210493326187134, + "learning_rate": 1.2352298002647591e-05, + "loss": 0.4873, + "step": 30973 + }, + { + "epoch": 0.8504667764964305, + "grad_norm": 0.4211326241493225, + "learning_rate": 1.2351878226068228e-05, + "loss": 0.505, + "step": 30974 + }, + { + "epoch": 0.850494233937397, + "grad_norm": 0.42518481612205505, + "learning_rate": 1.2351458445101862e-05, + "loss": 0.56, + "step": 30975 + }, + { + "epoch": 0.8505216913783635, + "grad_norm": 0.3892074525356293, + "learning_rate": 1.2351038659749275e-05, + "loss": 0.4527, + "step": 30976 + }, + { + "epoch": 0.8505491488193301, + "grad_norm": 0.34533172845840454, + "learning_rate": 1.2350618870011251e-05, + "loss": 0.482, + "step": 30977 + }, + { + "epoch": 0.8505766062602965, + "grad_norm": 0.4422941505908966, + "learning_rate": 1.2350199075888572e-05, + "loss": 0.4593, + "step": 30978 + }, + { + "epoch": 0.8506040637012631, + "grad_norm": 0.3652843236923218, + "learning_rate": 1.2349779277382022e-05, + "loss": 0.5513, + "step": 30979 + }, + { + "epoch": 0.8506315211422295, + "grad_norm": 0.4604048430919647, + "learning_rate": 1.2349359474492382e-05, + "loss": 0.5314, + "step": 30980 + }, + { + "epoch": 0.8506589785831961, + "grad_norm": 0.38075563311576843, + "learning_rate": 1.2348939667220439e-05, + "loss": 0.5011, + "step": 30981 + }, + { + "epoch": 0.8506864360241625, + "grad_norm": 0.4071119725704193, + "learning_rate": 1.2348519855566971e-05, + "loss": 0.4905, + "step": 30982 + }, + { + "epoch": 0.850713893465129, + "grad_norm": 0.3925634026527405, + "learning_rate": 1.2348100039532769e-05, + "loss": 0.5347, + "step": 30983 + }, + { + "epoch": 0.8507413509060956, + "grad_norm": 0.4392060935497284, + "learning_rate": 1.2347680219118605e-05, + "loss": 0.4231, + "step": 30984 + }, + { + "epoch": 0.850768808347062, + "grad_norm": 0.38869625329971313, + "learning_rate": 1.2347260394325274e-05, + "loss": 0.519, + "step": 30985 + }, + { + "epoch": 0.8507962657880286, + "grad_norm": 0.43110522627830505, + "learning_rate": 1.234684056515355e-05, + "loss": 0.4977, + "step": 30986 + }, + { + "epoch": 0.850823723228995, + "grad_norm": 0.4264211356639862, + "learning_rate": 1.2346420731604219e-05, + "loss": 0.4733, + "step": 30987 + }, + { + "epoch": 0.8508511806699616, + "grad_norm": 0.36479851603507996, + "learning_rate": 1.2346000893678066e-05, + "loss": 0.45, + "step": 30988 + }, + { + "epoch": 0.850878638110928, + "grad_norm": 0.37932509183883667, + "learning_rate": 1.2345581051375871e-05, + "loss": 0.5515, + "step": 30989 + }, + { + "epoch": 0.8509060955518946, + "grad_norm": 0.43035778403282166, + "learning_rate": 1.2345161204698419e-05, + "loss": 0.5568, + "step": 30990 + }, + { + "epoch": 0.8509335529928611, + "grad_norm": 0.3508646786212921, + "learning_rate": 1.2344741353646492e-05, + "loss": 0.465, + "step": 30991 + }, + { + "epoch": 0.8509610104338275, + "grad_norm": 0.37300893664360046, + "learning_rate": 1.2344321498220875e-05, + "loss": 0.4699, + "step": 30992 + }, + { + "epoch": 0.8509884678747941, + "grad_norm": 0.4432971477508545, + "learning_rate": 1.2343901638422351e-05, + "loss": 0.543, + "step": 30993 + }, + { + "epoch": 0.8510159253157605, + "grad_norm": 0.40598854422569275, + "learning_rate": 1.2343481774251702e-05, + "loss": 0.5082, + "step": 30994 + }, + { + "epoch": 0.8510433827567271, + "grad_norm": 0.4135931134223938, + "learning_rate": 1.234306190570971e-05, + "loss": 0.5135, + "step": 30995 + }, + { + "epoch": 0.8510708401976935, + "grad_norm": 0.3538792133331299, + "learning_rate": 1.2342642032797162e-05, + "loss": 0.4506, + "step": 30996 + }, + { + "epoch": 0.8510982976386601, + "grad_norm": 0.41177627444267273, + "learning_rate": 1.2342222155514838e-05, + "loss": 0.4379, + "step": 30997 + }, + { + "epoch": 0.8511257550796266, + "grad_norm": 0.39413607120513916, + "learning_rate": 1.234180227386352e-05, + "loss": 0.5624, + "step": 30998 + }, + { + "epoch": 0.8511532125205931, + "grad_norm": 0.4378010928630829, + "learning_rate": 1.2341382387843999e-05, + "loss": 0.523, + "step": 30999 + }, + { + "epoch": 0.8511806699615596, + "grad_norm": 0.5057529211044312, + "learning_rate": 1.2340962497457048e-05, + "loss": 0.5792, + "step": 31000 + }, + { + "epoch": 0.851208127402526, + "grad_norm": 0.39691704511642456, + "learning_rate": 1.2340542602703456e-05, + "loss": 0.5186, + "step": 31001 + }, + { + "epoch": 0.8512355848434926, + "grad_norm": 0.34117212891578674, + "learning_rate": 1.2340122703584005e-05, + "loss": 0.4398, + "step": 31002 + }, + { + "epoch": 0.851263042284459, + "grad_norm": 0.36317354440689087, + "learning_rate": 1.2339702800099475e-05, + "loss": 0.4828, + "step": 31003 + }, + { + "epoch": 0.8512904997254256, + "grad_norm": 0.34949156641960144, + "learning_rate": 1.2339282892250659e-05, + "loss": 0.4198, + "step": 31004 + }, + { + "epoch": 0.8513179571663921, + "grad_norm": 0.37902507185935974, + "learning_rate": 1.2338862980038329e-05, + "loss": 0.4535, + "step": 31005 + }, + { + "epoch": 0.8513454146073586, + "grad_norm": 0.4213833808898926, + "learning_rate": 1.2338443063463274e-05, + "loss": 0.5033, + "step": 31006 + }, + { + "epoch": 0.8513728720483251, + "grad_norm": 0.40954479575157166, + "learning_rate": 1.2338023142526279e-05, + "loss": 0.5128, + "step": 31007 + }, + { + "epoch": 0.8514003294892916, + "grad_norm": 0.39020708203315735, + "learning_rate": 1.233760321722812e-05, + "loss": 0.4618, + "step": 31008 + }, + { + "epoch": 0.8514277869302581, + "grad_norm": 0.3601935803890228, + "learning_rate": 1.2337183287569588e-05, + "loss": 0.4603, + "step": 31009 + }, + { + "epoch": 0.8514552443712246, + "grad_norm": 0.3897362947463989, + "learning_rate": 1.2336763353551461e-05, + "loss": 0.4672, + "step": 31010 + }, + { + "epoch": 0.8514827018121911, + "grad_norm": 0.4006001651287079, + "learning_rate": 1.2336343415174527e-05, + "loss": 0.4746, + "step": 31011 + }, + { + "epoch": 0.8515101592531577, + "grad_norm": 0.3881980776786804, + "learning_rate": 1.2335923472439566e-05, + "loss": 0.5486, + "step": 31012 + }, + { + "epoch": 0.8515376166941241, + "grad_norm": 0.36915266513824463, + "learning_rate": 1.2335503525347363e-05, + "loss": 0.4749, + "step": 31013 + }, + { + "epoch": 0.8515650741350906, + "grad_norm": 0.3758358657360077, + "learning_rate": 1.2335083573898698e-05, + "loss": 0.4952, + "step": 31014 + }, + { + "epoch": 0.8515925315760571, + "grad_norm": 0.4227577745914459, + "learning_rate": 1.2334663618094358e-05, + "loss": 0.4877, + "step": 31015 + }, + { + "epoch": 0.8516199890170236, + "grad_norm": 0.3784068524837494, + "learning_rate": 1.2334243657935128e-05, + "loss": 0.5003, + "step": 31016 + }, + { + "epoch": 0.8516474464579901, + "grad_norm": 0.38637858629226685, + "learning_rate": 1.2333823693421788e-05, + "loss": 0.4324, + "step": 31017 + }, + { + "epoch": 0.8516749038989566, + "grad_norm": 0.6481234431266785, + "learning_rate": 1.2333403724555118e-05, + "loss": 0.5245, + "step": 31018 + }, + { + "epoch": 0.8517023613399232, + "grad_norm": 0.5134816765785217, + "learning_rate": 1.2332983751335907e-05, + "loss": 0.536, + "step": 31019 + }, + { + "epoch": 0.8517298187808896, + "grad_norm": 0.43678489327430725, + "learning_rate": 1.2332563773764938e-05, + "loss": 0.4471, + "step": 31020 + }, + { + "epoch": 0.8517572762218562, + "grad_norm": 0.3794896602630615, + "learning_rate": 1.2332143791842992e-05, + "loss": 0.48, + "step": 31021 + }, + { + "epoch": 0.8517847336628226, + "grad_norm": 0.39714333415031433, + "learning_rate": 1.2331723805570854e-05, + "loss": 0.5388, + "step": 31022 + }, + { + "epoch": 0.8518121911037891, + "grad_norm": 0.4056181311607361, + "learning_rate": 1.2331303814949308e-05, + "loss": 0.511, + "step": 31023 + }, + { + "epoch": 0.8518396485447556, + "grad_norm": 0.38568830490112305, + "learning_rate": 1.2330883819979134e-05, + "loss": 0.5224, + "step": 31024 + }, + { + "epoch": 0.8518671059857221, + "grad_norm": 0.4196723997592926, + "learning_rate": 1.233046382066112e-05, + "loss": 0.4452, + "step": 31025 + }, + { + "epoch": 0.8518945634266887, + "grad_norm": 0.3842492997646332, + "learning_rate": 1.2330043816996044e-05, + "loss": 0.5052, + "step": 31026 + }, + { + "epoch": 0.8519220208676551, + "grad_norm": 0.3602806329727173, + "learning_rate": 1.2329623808984696e-05, + "loss": 0.6149, + "step": 31027 + }, + { + "epoch": 0.8519494783086217, + "grad_norm": 0.32482197880744934, + "learning_rate": 1.2329203796627853e-05, + "loss": 0.4577, + "step": 31028 + }, + { + "epoch": 0.8519769357495881, + "grad_norm": 0.4096108376979828, + "learning_rate": 1.2328783779926303e-05, + "loss": 0.521, + "step": 31029 + }, + { + "epoch": 0.8520043931905547, + "grad_norm": 0.42781388759613037, + "learning_rate": 1.232836375888083e-05, + "loss": 0.4678, + "step": 31030 + }, + { + "epoch": 0.8520318506315211, + "grad_norm": 10.840044975280762, + "learning_rate": 1.2327943733492211e-05, + "loss": 0.5387, + "step": 31031 + }, + { + "epoch": 0.8520593080724876, + "grad_norm": 0.36492007970809937, + "learning_rate": 1.2327523703761239e-05, + "loss": 0.5016, + "step": 31032 + }, + { + "epoch": 0.8520867655134542, + "grad_norm": 0.393689900636673, + "learning_rate": 1.2327103669688691e-05, + "loss": 0.489, + "step": 31033 + }, + { + "epoch": 0.8521142229544206, + "grad_norm": 0.38645532727241516, + "learning_rate": 1.232668363127535e-05, + "loss": 0.4925, + "step": 31034 + }, + { + "epoch": 0.8521416803953872, + "grad_norm": 0.3617214560508728, + "learning_rate": 1.2326263588522003e-05, + "loss": 0.4303, + "step": 31035 + }, + { + "epoch": 0.8521691378363536, + "grad_norm": 0.392085462808609, + "learning_rate": 1.232584354142943e-05, + "loss": 0.4762, + "step": 31036 + }, + { + "epoch": 0.8521965952773202, + "grad_norm": 0.42371758818626404, + "learning_rate": 1.2325423489998418e-05, + "loss": 0.5399, + "step": 31037 + }, + { + "epoch": 0.8522240527182866, + "grad_norm": 0.38764989376068115, + "learning_rate": 1.2325003434229748e-05, + "loss": 0.4791, + "step": 31038 + }, + { + "epoch": 0.8522515101592532, + "grad_norm": 0.36356252431869507, + "learning_rate": 1.2324583374124206e-05, + "loss": 0.518, + "step": 31039 + }, + { + "epoch": 0.8522789676002197, + "grad_norm": 0.3406558930873871, + "learning_rate": 1.2324163309682576e-05, + "loss": 0.4146, + "step": 31040 + }, + { + "epoch": 0.8523064250411861, + "grad_norm": 0.3828067183494568, + "learning_rate": 1.2323743240905634e-05, + "loss": 0.5172, + "step": 31041 + }, + { + "epoch": 0.8523338824821527, + "grad_norm": 0.3835814595222473, + "learning_rate": 1.2323323167794171e-05, + "loss": 0.4769, + "step": 31042 + }, + { + "epoch": 0.8523613399231191, + "grad_norm": 0.4034666419029236, + "learning_rate": 1.2322903090348973e-05, + "loss": 0.5751, + "step": 31043 + }, + { + "epoch": 0.8523887973640857, + "grad_norm": 0.44863444566726685, + "learning_rate": 1.2322483008570816e-05, + "loss": 0.4815, + "step": 31044 + }, + { + "epoch": 0.8524162548050521, + "grad_norm": 0.4165095090866089, + "learning_rate": 1.2322062922460487e-05, + "loss": 0.4481, + "step": 31045 + }, + { + "epoch": 0.8524437122460187, + "grad_norm": 0.38595640659332275, + "learning_rate": 1.232164283201877e-05, + "loss": 0.428, + "step": 31046 + }, + { + "epoch": 0.8524711696869852, + "grad_norm": 0.37867414951324463, + "learning_rate": 1.2321222737246447e-05, + "loss": 0.4346, + "step": 31047 + }, + { + "epoch": 0.8524986271279517, + "grad_norm": 0.363534539937973, + "learning_rate": 1.2320802638144302e-05, + "loss": 0.5093, + "step": 31048 + }, + { + "epoch": 0.8525260845689182, + "grad_norm": 0.3747383952140808, + "learning_rate": 1.2320382534713121e-05, + "loss": 0.4807, + "step": 31049 + }, + { + "epoch": 0.8525535420098846, + "grad_norm": 0.35314878821372986, + "learning_rate": 1.2319962426953686e-05, + "loss": 0.4626, + "step": 31050 + }, + { + "epoch": 0.8525809994508512, + "grad_norm": 0.3536129891872406, + "learning_rate": 1.2319542314866784e-05, + "loss": 0.454, + "step": 31051 + }, + { + "epoch": 0.8526084568918176, + "grad_norm": 0.38634398579597473, + "learning_rate": 1.2319122198453191e-05, + "loss": 0.4754, + "step": 31052 + }, + { + "epoch": 0.8526359143327842, + "grad_norm": 0.39606526494026184, + "learning_rate": 1.2318702077713696e-05, + "loss": 0.4437, + "step": 31053 + }, + { + "epoch": 0.8526633717737507, + "grad_norm": 0.39848512411117554, + "learning_rate": 1.231828195264908e-05, + "loss": 0.5562, + "step": 31054 + }, + { + "epoch": 0.8526908292147172, + "grad_norm": 0.43289321660995483, + "learning_rate": 1.2317861823260132e-05, + "loss": 0.5352, + "step": 31055 + }, + { + "epoch": 0.8527182866556837, + "grad_norm": 0.529003381729126, + "learning_rate": 1.2317441689547629e-05, + "loss": 0.5452, + "step": 31056 + }, + { + "epoch": 0.8527457440966502, + "grad_norm": 0.4022093117237091, + "learning_rate": 1.2317021551512357e-05, + "loss": 0.5344, + "step": 31057 + }, + { + "epoch": 0.8527732015376167, + "grad_norm": 0.34161177277565, + "learning_rate": 1.2316601409155102e-05, + "loss": 0.4505, + "step": 31058 + }, + { + "epoch": 0.8528006589785831, + "grad_norm": 0.4639519453048706, + "learning_rate": 1.2316181262476646e-05, + "loss": 0.5361, + "step": 31059 + }, + { + "epoch": 0.8528281164195497, + "grad_norm": 0.41029810905456543, + "learning_rate": 1.231576111147777e-05, + "loss": 0.5182, + "step": 31060 + }, + { + "epoch": 0.8528555738605162, + "grad_norm": 0.38545432686805725, + "learning_rate": 1.2315340956159265e-05, + "loss": 0.4168, + "step": 31061 + }, + { + "epoch": 0.8528830313014827, + "grad_norm": 0.35454270243644714, + "learning_rate": 1.231492079652191e-05, + "loss": 0.4266, + "step": 31062 + }, + { + "epoch": 0.8529104887424492, + "grad_norm": 0.42645204067230225, + "learning_rate": 1.2314500632566484e-05, + "loss": 0.4349, + "step": 31063 + }, + { + "epoch": 0.8529379461834157, + "grad_norm": 0.44680285453796387, + "learning_rate": 1.2314080464293778e-05, + "loss": 0.498, + "step": 31064 + }, + { + "epoch": 0.8529654036243822, + "grad_norm": 0.4174591302871704, + "learning_rate": 1.2313660291704575e-05, + "loss": 0.5623, + "step": 31065 + }, + { + "epoch": 0.8529928610653487, + "grad_norm": 0.42842787504196167, + "learning_rate": 1.2313240114799656e-05, + "loss": 0.5087, + "step": 31066 + }, + { + "epoch": 0.8530203185063152, + "grad_norm": 0.3808627128601074, + "learning_rate": 1.2312819933579805e-05, + "loss": 0.3967, + "step": 31067 + }, + { + "epoch": 0.8530477759472818, + "grad_norm": 0.34249722957611084, + "learning_rate": 1.2312399748045807e-05, + "loss": 0.436, + "step": 31068 + }, + { + "epoch": 0.8530752333882482, + "grad_norm": 0.3909807503223419, + "learning_rate": 1.2311979558198445e-05, + "loss": 0.5707, + "step": 31069 + }, + { + "epoch": 0.8531026908292147, + "grad_norm": 0.40110358595848083, + "learning_rate": 1.2311559364038503e-05, + "loss": 0.4264, + "step": 31070 + }, + { + "epoch": 0.8531301482701812, + "grad_norm": 0.36902734637260437, + "learning_rate": 1.2311139165566769e-05, + "loss": 0.4319, + "step": 31071 + }, + { + "epoch": 0.8531576057111477, + "grad_norm": 0.38092124462127686, + "learning_rate": 1.2310718962784018e-05, + "loss": 0.433, + "step": 31072 + }, + { + "epoch": 0.8531850631521142, + "grad_norm": 0.39440736174583435, + "learning_rate": 1.2310298755691041e-05, + "loss": 0.5044, + "step": 31073 + }, + { + "epoch": 0.8532125205930807, + "grad_norm": 0.40205907821655273, + "learning_rate": 1.230987854428862e-05, + "loss": 0.4849, + "step": 31074 + }, + { + "epoch": 0.8532399780340473, + "grad_norm": 0.3771974742412567, + "learning_rate": 1.2309458328577538e-05, + "loss": 0.478, + "step": 31075 + }, + { + "epoch": 0.8532674354750137, + "grad_norm": 0.4436447024345398, + "learning_rate": 1.2309038108558578e-05, + "loss": 0.4888, + "step": 31076 + }, + { + "epoch": 0.8532948929159803, + "grad_norm": 0.44624853134155273, + "learning_rate": 1.2308617884232523e-05, + "loss": 0.4867, + "step": 31077 + }, + { + "epoch": 0.8533223503569467, + "grad_norm": 0.4120721220970154, + "learning_rate": 1.2308197655600162e-05, + "loss": 0.5216, + "step": 31078 + }, + { + "epoch": 0.8533498077979133, + "grad_norm": 0.5084753632545471, + "learning_rate": 1.2307777422662278e-05, + "loss": 0.5665, + "step": 31079 + }, + { + "epoch": 0.8533772652388797, + "grad_norm": 0.458081990480423, + "learning_rate": 1.2307357185419646e-05, + "loss": 0.5175, + "step": 31080 + }, + { + "epoch": 0.8534047226798462, + "grad_norm": 0.4814344644546509, + "learning_rate": 1.230693694387306e-05, + "loss": 0.5037, + "step": 31081 + }, + { + "epoch": 0.8534321801208128, + "grad_norm": 0.3895649313926697, + "learning_rate": 1.2306516698023303e-05, + "loss": 0.5016, + "step": 31082 + }, + { + "epoch": 0.8534596375617792, + "grad_norm": 0.4477512836456299, + "learning_rate": 1.2306096447871152e-05, + "loss": 0.5117, + "step": 31083 + }, + { + "epoch": 0.8534870950027458, + "grad_norm": 0.3832671344280243, + "learning_rate": 1.2305676193417398e-05, + "loss": 0.4342, + "step": 31084 + }, + { + "epoch": 0.8535145524437122, + "grad_norm": 0.3847145736217499, + "learning_rate": 1.2305255934662818e-05, + "loss": 0.5258, + "step": 31085 + }, + { + "epoch": 0.8535420098846788, + "grad_norm": 0.3888496458530426, + "learning_rate": 1.2304835671608204e-05, + "loss": 0.5603, + "step": 31086 + }, + { + "epoch": 0.8535694673256452, + "grad_norm": 0.4082445800304413, + "learning_rate": 1.2304415404254336e-05, + "loss": 0.5365, + "step": 31087 + }, + { + "epoch": 0.8535969247666118, + "grad_norm": 0.4175891876220703, + "learning_rate": 1.2303995132601994e-05, + "loss": 0.4132, + "step": 31088 + }, + { + "epoch": 0.8536243822075783, + "grad_norm": 0.36412301659584045, + "learning_rate": 1.2303574856651968e-05, + "loss": 0.4462, + "step": 31089 + }, + { + "epoch": 0.8536518396485447, + "grad_norm": 0.3987017869949341, + "learning_rate": 1.2303154576405038e-05, + "loss": 0.5003, + "step": 31090 + }, + { + "epoch": 0.8536792970895113, + "grad_norm": 0.3617419898509979, + "learning_rate": 1.2302734291861991e-05, + "loss": 0.478, + "step": 31091 + }, + { + "epoch": 0.8537067545304777, + "grad_norm": 0.3762076199054718, + "learning_rate": 1.2302314003023612e-05, + "loss": 0.462, + "step": 31092 + }, + { + "epoch": 0.8537342119714443, + "grad_norm": 0.4034312665462494, + "learning_rate": 1.2301893709890677e-05, + "loss": 0.563, + "step": 31093 + }, + { + "epoch": 0.8537616694124107, + "grad_norm": 0.38780027627944946, + "learning_rate": 1.230147341246398e-05, + "loss": 0.5566, + "step": 31094 + }, + { + "epoch": 0.8537891268533773, + "grad_norm": 0.3957446813583374, + "learning_rate": 1.23010531107443e-05, + "loss": 0.5553, + "step": 31095 + }, + { + "epoch": 0.8538165842943438, + "grad_norm": 0.43992879986763, + "learning_rate": 1.230063280473242e-05, + "loss": 0.5155, + "step": 31096 + }, + { + "epoch": 0.8538440417353103, + "grad_norm": 0.42226675152778625, + "learning_rate": 1.2300212494429129e-05, + "loss": 0.5362, + "step": 31097 + }, + { + "epoch": 0.8538714991762768, + "grad_norm": 0.4320169687271118, + "learning_rate": 1.2299792179835203e-05, + "loss": 0.5643, + "step": 31098 + }, + { + "epoch": 0.8538989566172432, + "grad_norm": 0.39293718338012695, + "learning_rate": 1.2299371860951432e-05, + "loss": 0.5156, + "step": 31099 + }, + { + "epoch": 0.8539264140582098, + "grad_norm": 0.41615521907806396, + "learning_rate": 1.22989515377786e-05, + "loss": 0.5479, + "step": 31100 + }, + { + "epoch": 0.8539538714991762, + "grad_norm": 0.36430731415748596, + "learning_rate": 1.2298531210317489e-05, + "loss": 0.4606, + "step": 31101 + }, + { + "epoch": 0.8539813289401428, + "grad_norm": 0.3614204525947571, + "learning_rate": 1.2298110878568882e-05, + "loss": 0.4658, + "step": 31102 + }, + { + "epoch": 0.8540087863811093, + "grad_norm": 0.3734956383705139, + "learning_rate": 1.2297690542533566e-05, + "loss": 0.4515, + "step": 31103 + }, + { + "epoch": 0.8540362438220758, + "grad_norm": 0.35006949305534363, + "learning_rate": 1.2297270202212326e-05, + "loss": 0.4527, + "step": 31104 + }, + { + "epoch": 0.8540637012630423, + "grad_norm": 0.42399707436561584, + "learning_rate": 1.2296849857605944e-05, + "loss": 0.5041, + "step": 31105 + }, + { + "epoch": 0.8540911587040088, + "grad_norm": 0.44979849457740784, + "learning_rate": 1.22964295087152e-05, + "loss": 0.4677, + "step": 31106 + }, + { + "epoch": 0.8541186161449753, + "grad_norm": 0.368526428937912, + "learning_rate": 1.2296009155540884e-05, + "loss": 0.4618, + "step": 31107 + }, + { + "epoch": 0.8541460735859417, + "grad_norm": 0.36994123458862305, + "learning_rate": 1.2295588798083777e-05, + "loss": 0.5376, + "step": 31108 + }, + { + "epoch": 0.8541735310269083, + "grad_norm": 0.42082175612449646, + "learning_rate": 1.2295168436344667e-05, + "loss": 0.4728, + "step": 31109 + }, + { + "epoch": 0.8542009884678748, + "grad_norm": 0.3538724184036255, + "learning_rate": 1.2294748070324333e-05, + "loss": 0.4471, + "step": 31110 + }, + { + "epoch": 0.8542284459088413, + "grad_norm": 0.4204031825065613, + "learning_rate": 1.2294327700023561e-05, + "loss": 0.4659, + "step": 31111 + }, + { + "epoch": 0.8542559033498078, + "grad_norm": 0.4318523108959198, + "learning_rate": 1.2293907325443137e-05, + "loss": 0.5382, + "step": 31112 + }, + { + "epoch": 0.8542833607907743, + "grad_norm": 0.3609636723995209, + "learning_rate": 1.2293486946583845e-05, + "loss": 0.4482, + "step": 31113 + }, + { + "epoch": 0.8543108182317408, + "grad_norm": 0.36573779582977295, + "learning_rate": 1.2293066563446465e-05, + "loss": 0.5236, + "step": 31114 + }, + { + "epoch": 0.8543382756727073, + "grad_norm": 0.36562487483024597, + "learning_rate": 1.2292646176031785e-05, + "loss": 0.5203, + "step": 31115 + }, + { + "epoch": 0.8543657331136738, + "grad_norm": 0.37902069091796875, + "learning_rate": 1.2292225784340589e-05, + "loss": 0.4758, + "step": 31116 + }, + { + "epoch": 0.8543931905546404, + "grad_norm": 0.4114731550216675, + "learning_rate": 1.229180538837366e-05, + "loss": 0.5821, + "step": 31117 + }, + { + "epoch": 0.8544206479956068, + "grad_norm": 0.34551650285720825, + "learning_rate": 1.2291384988131782e-05, + "loss": 0.4982, + "step": 31118 + }, + { + "epoch": 0.8544481054365733, + "grad_norm": 0.3871213495731354, + "learning_rate": 1.229096458361574e-05, + "loss": 0.5187, + "step": 31119 + }, + { + "epoch": 0.8544755628775398, + "grad_norm": 0.389157235622406, + "learning_rate": 1.2290544174826317e-05, + "loss": 0.5657, + "step": 31120 + }, + { + "epoch": 0.8545030203185063, + "grad_norm": 0.38362744450569153, + "learning_rate": 1.2290123761764296e-05, + "loss": 0.5315, + "step": 31121 + }, + { + "epoch": 0.8545304777594728, + "grad_norm": 0.4317735731601715, + "learning_rate": 1.2289703344430468e-05, + "loss": 0.6085, + "step": 31122 + }, + { + "epoch": 0.8545579352004393, + "grad_norm": 0.39359980821609497, + "learning_rate": 1.2289282922825611e-05, + "loss": 0.5134, + "step": 31123 + }, + { + "epoch": 0.8545853926414059, + "grad_norm": 0.4333030879497528, + "learning_rate": 1.228886249695051e-05, + "loss": 0.5261, + "step": 31124 + }, + { + "epoch": 0.8546128500823723, + "grad_norm": 0.39051109552383423, + "learning_rate": 1.228844206680595e-05, + "loss": 0.4655, + "step": 31125 + }, + { + "epoch": 0.8546403075233389, + "grad_norm": 0.3853558599948883, + "learning_rate": 1.2288021632392717e-05, + "loss": 0.4976, + "step": 31126 + }, + { + "epoch": 0.8546677649643053, + "grad_norm": 0.34997573494911194, + "learning_rate": 1.2287601193711588e-05, + "loss": 0.4949, + "step": 31127 + }, + { + "epoch": 0.8546952224052718, + "grad_norm": 0.38939419388771057, + "learning_rate": 1.2287180750763358e-05, + "loss": 0.3775, + "step": 31128 + }, + { + "epoch": 0.8547226798462383, + "grad_norm": 0.35364654660224915, + "learning_rate": 1.2286760303548803e-05, + "loss": 0.424, + "step": 31129 + }, + { + "epoch": 0.8547501372872048, + "grad_norm": 0.41810986399650574, + "learning_rate": 1.2286339852068712e-05, + "loss": 0.4748, + "step": 31130 + }, + { + "epoch": 0.8547775947281714, + "grad_norm": 0.3986365795135498, + "learning_rate": 1.2285919396323869e-05, + "loss": 0.517, + "step": 31131 + }, + { + "epoch": 0.8548050521691378, + "grad_norm": 0.41680508852005005, + "learning_rate": 1.2285498936315052e-05, + "loss": 0.5183, + "step": 31132 + }, + { + "epoch": 0.8548325096101044, + "grad_norm": 0.40593385696411133, + "learning_rate": 1.2285078472043056e-05, + "loss": 0.522, + "step": 31133 + }, + { + "epoch": 0.8548599670510708, + "grad_norm": 0.3749670386314392, + "learning_rate": 1.2284658003508654e-05, + "loss": 0.522, + "step": 31134 + }, + { + "epoch": 0.8548874244920374, + "grad_norm": 0.3708672821521759, + "learning_rate": 1.2284237530712636e-05, + "loss": 0.5104, + "step": 31135 + }, + { + "epoch": 0.8549148819330038, + "grad_norm": 0.4841960370540619, + "learning_rate": 1.2283817053655789e-05, + "loss": 0.5766, + "step": 31136 + }, + { + "epoch": 0.8549423393739704, + "grad_norm": 0.4529702961444855, + "learning_rate": 1.2283396572338893e-05, + "loss": 0.4718, + "step": 31137 + }, + { + "epoch": 0.8549697968149369, + "grad_norm": 0.40346720814704895, + "learning_rate": 1.2282976086762735e-05, + "loss": 0.5137, + "step": 31138 + }, + { + "epoch": 0.8549972542559033, + "grad_norm": 0.38118085265159607, + "learning_rate": 1.2282555596928094e-05, + "loss": 0.4926, + "step": 31139 + }, + { + "epoch": 0.8550247116968699, + "grad_norm": 0.4172128140926361, + "learning_rate": 1.228213510283576e-05, + "loss": 0.4978, + "step": 31140 + }, + { + "epoch": 0.8550521691378363, + "grad_norm": 0.6619917750358582, + "learning_rate": 1.228171460448652e-05, + "loss": 0.5456, + "step": 31141 + }, + { + "epoch": 0.8550796265788029, + "grad_norm": 0.41447141766548157, + "learning_rate": 1.2281294101881147e-05, + "loss": 0.5437, + "step": 31142 + }, + { + "epoch": 0.8551070840197693, + "grad_norm": 0.44310644268989563, + "learning_rate": 1.2280873595020438e-05, + "loss": 0.4949, + "step": 31143 + }, + { + "epoch": 0.8551345414607359, + "grad_norm": 0.38525375723838806, + "learning_rate": 1.2280453083905171e-05, + "loss": 0.4805, + "step": 31144 + }, + { + "epoch": 0.8551619989017024, + "grad_norm": 0.36892151832580566, + "learning_rate": 1.2280032568536126e-05, + "loss": 0.4585, + "step": 31145 + }, + { + "epoch": 0.8551894563426689, + "grad_norm": 0.3476870656013489, + "learning_rate": 1.2279612048914097e-05, + "loss": 0.4793, + "step": 31146 + }, + { + "epoch": 0.8552169137836354, + "grad_norm": 0.4255286455154419, + "learning_rate": 1.2279191525039864e-05, + "loss": 0.5564, + "step": 31147 + }, + { + "epoch": 0.8552443712246018, + "grad_norm": 0.45398518443107605, + "learning_rate": 1.227877099691421e-05, + "loss": 0.5771, + "step": 31148 + }, + { + "epoch": 0.8552718286655684, + "grad_norm": 0.4463924169540405, + "learning_rate": 1.2278350464537923e-05, + "loss": 0.5205, + "step": 31149 + }, + { + "epoch": 0.8552992861065348, + "grad_norm": 0.4087491035461426, + "learning_rate": 1.2277929927911782e-05, + "loss": 0.5005, + "step": 31150 + }, + { + "epoch": 0.8553267435475014, + "grad_norm": 0.3750789761543274, + "learning_rate": 1.2277509387036577e-05, + "loss": 0.4745, + "step": 31151 + }, + { + "epoch": 0.8553542009884679, + "grad_norm": 0.3907051086425781, + "learning_rate": 1.2277088841913089e-05, + "loss": 0.4281, + "step": 31152 + }, + { + "epoch": 0.8553816584294344, + "grad_norm": 0.35830461978912354, + "learning_rate": 1.2276668292542105e-05, + "loss": 0.5077, + "step": 31153 + }, + { + "epoch": 0.8554091158704009, + "grad_norm": 0.40933358669281006, + "learning_rate": 1.2276247738924407e-05, + "loss": 0.5135, + "step": 31154 + }, + { + "epoch": 0.8554365733113674, + "grad_norm": 0.3851228356361389, + "learning_rate": 1.227582718106078e-05, + "loss": 0.4432, + "step": 31155 + }, + { + "epoch": 0.8554640307523339, + "grad_norm": 0.44919613003730774, + "learning_rate": 1.227540661895201e-05, + "loss": 0.5504, + "step": 31156 + }, + { + "epoch": 0.8554914881933003, + "grad_norm": 0.5030062198638916, + "learning_rate": 1.2274986052598881e-05, + "loss": 0.56, + "step": 31157 + }, + { + "epoch": 0.8555189456342669, + "grad_norm": 0.36782172322273254, + "learning_rate": 1.2274565482002174e-05, + "loss": 0.4718, + "step": 31158 + }, + { + "epoch": 0.8555464030752334, + "grad_norm": 0.38524407148361206, + "learning_rate": 1.227414490716268e-05, + "loss": 0.4079, + "step": 31159 + }, + { + "epoch": 0.8555738605161999, + "grad_norm": 0.41931745409965515, + "learning_rate": 1.2273724328081176e-05, + "loss": 0.5874, + "step": 31160 + }, + { + "epoch": 0.8556013179571664, + "grad_norm": 0.4634385406970978, + "learning_rate": 1.2273303744758454e-05, + "loss": 0.5462, + "step": 31161 + }, + { + "epoch": 0.8556287753981329, + "grad_norm": 0.4506748914718628, + "learning_rate": 1.2272883157195292e-05, + "loss": 0.5103, + "step": 31162 + }, + { + "epoch": 0.8556562328390994, + "grad_norm": 0.41302892565727234, + "learning_rate": 1.2272462565392477e-05, + "loss": 0.5023, + "step": 31163 + }, + { + "epoch": 0.8556836902800659, + "grad_norm": 0.37175488471984863, + "learning_rate": 1.22720419693508e-05, + "loss": 0.4691, + "step": 31164 + }, + { + "epoch": 0.8557111477210324, + "grad_norm": 0.3931663930416107, + "learning_rate": 1.2271621369071034e-05, + "loss": 0.6153, + "step": 31165 + }, + { + "epoch": 0.855738605161999, + "grad_norm": 0.35619550943374634, + "learning_rate": 1.227120076455397e-05, + "loss": 0.4739, + "step": 31166 + }, + { + "epoch": 0.8557660626029654, + "grad_norm": 0.3666619062423706, + "learning_rate": 1.2270780155800392e-05, + "loss": 0.5373, + "step": 31167 + }, + { + "epoch": 0.8557935200439319, + "grad_norm": 0.37692150473594666, + "learning_rate": 1.2270359542811086e-05, + "loss": 0.5023, + "step": 31168 + }, + { + "epoch": 0.8558209774848984, + "grad_norm": 0.3557114601135254, + "learning_rate": 1.2269938925586833e-05, + "loss": 0.4497, + "step": 31169 + }, + { + "epoch": 0.8558484349258649, + "grad_norm": 0.5110921859741211, + "learning_rate": 1.2269518304128419e-05, + "loss": 0.5101, + "step": 31170 + }, + { + "epoch": 0.8558758923668314, + "grad_norm": 0.4081737995147705, + "learning_rate": 1.226909767843663e-05, + "loss": 0.4602, + "step": 31171 + }, + { + "epoch": 0.8559033498077979, + "grad_norm": 0.3897005319595337, + "learning_rate": 1.2268677048512252e-05, + "loss": 0.4427, + "step": 31172 + }, + { + "epoch": 0.8559308072487645, + "grad_norm": 0.33740636706352234, + "learning_rate": 1.2268256414356063e-05, + "loss": 0.4621, + "step": 31173 + }, + { + "epoch": 0.8559582646897309, + "grad_norm": 0.36769115924835205, + "learning_rate": 1.2267835775968855e-05, + "loss": 0.4925, + "step": 31174 + }, + { + "epoch": 0.8559857221306975, + "grad_norm": 0.39439424872398376, + "learning_rate": 1.2267415133351409e-05, + "loss": 0.4972, + "step": 31175 + }, + { + "epoch": 0.8560131795716639, + "grad_norm": 0.4376014471054077, + "learning_rate": 1.2266994486504507e-05, + "loss": 0.5121, + "step": 31176 + }, + { + "epoch": 0.8560406370126304, + "grad_norm": 0.3644297122955322, + "learning_rate": 1.2266573835428938e-05, + "loss": 0.4783, + "step": 31177 + }, + { + "epoch": 0.8560680944535969, + "grad_norm": 0.4137565791606903, + "learning_rate": 1.2266153180125486e-05, + "loss": 0.43, + "step": 31178 + }, + { + "epoch": 0.8560955518945634, + "grad_norm": 0.36673516035079956, + "learning_rate": 1.2265732520594938e-05, + "loss": 0.4023, + "step": 31179 + }, + { + "epoch": 0.85612300933553, + "grad_norm": 0.4770660102367401, + "learning_rate": 1.2265311856838073e-05, + "loss": 0.4638, + "step": 31180 + }, + { + "epoch": 0.8561504667764964, + "grad_norm": 0.4561827480792999, + "learning_rate": 1.2264891188855676e-05, + "loss": 0.4489, + "step": 31181 + }, + { + "epoch": 0.856177924217463, + "grad_norm": 0.33011594414711, + "learning_rate": 1.226447051664854e-05, + "loss": 0.4689, + "step": 31182 + }, + { + "epoch": 0.8562053816584294, + "grad_norm": 0.3626307249069214, + "learning_rate": 1.2264049840217438e-05, + "loss": 0.4255, + "step": 31183 + }, + { + "epoch": 0.856232839099396, + "grad_norm": 0.42238473892211914, + "learning_rate": 1.2263629159563162e-05, + "loss": 0.4437, + "step": 31184 + }, + { + "epoch": 0.8562602965403624, + "grad_norm": 0.41838133335113525, + "learning_rate": 1.2263208474686498e-05, + "loss": 0.4888, + "step": 31185 + }, + { + "epoch": 0.856287753981329, + "grad_norm": 0.35725274682044983, + "learning_rate": 1.2262787785588223e-05, + "loss": 0.4731, + "step": 31186 + }, + { + "epoch": 0.8563152114222955, + "grad_norm": 0.46352657675743103, + "learning_rate": 1.2262367092269131e-05, + "loss": 0.6084, + "step": 31187 + }, + { + "epoch": 0.8563426688632619, + "grad_norm": 0.45977890491485596, + "learning_rate": 1.2261946394730002e-05, + "loss": 0.5339, + "step": 31188 + }, + { + "epoch": 0.8563701263042285, + "grad_norm": 0.4117679297924042, + "learning_rate": 1.226152569297162e-05, + "loss": 0.5067, + "step": 31189 + }, + { + "epoch": 0.8563975837451949, + "grad_norm": 0.7090790867805481, + "learning_rate": 1.226110498699477e-05, + "loss": 0.5712, + "step": 31190 + }, + { + "epoch": 0.8564250411861615, + "grad_norm": 0.5142337083816528, + "learning_rate": 1.2260684276800237e-05, + "loss": 0.5874, + "step": 31191 + }, + { + "epoch": 0.8564524986271279, + "grad_norm": 0.42770060896873474, + "learning_rate": 1.2260263562388808e-05, + "loss": 0.5211, + "step": 31192 + }, + { + "epoch": 0.8564799560680945, + "grad_norm": 0.39739400148391724, + "learning_rate": 1.2259842843761265e-05, + "loss": 0.5639, + "step": 31193 + }, + { + "epoch": 0.856507413509061, + "grad_norm": 0.45155060291290283, + "learning_rate": 1.2259422120918393e-05, + "loss": 0.4859, + "step": 31194 + }, + { + "epoch": 0.8565348709500274, + "grad_norm": 0.3719451129436493, + "learning_rate": 1.2259001393860981e-05, + "loss": 0.39, + "step": 31195 + }, + { + "epoch": 0.856562328390994, + "grad_norm": 0.3901939392089844, + "learning_rate": 1.2258580662589805e-05, + "loss": 0.5585, + "step": 31196 + }, + { + "epoch": 0.8565897858319604, + "grad_norm": 0.3525839149951935, + "learning_rate": 1.2258159927105659e-05, + "loss": 0.4557, + "step": 31197 + }, + { + "epoch": 0.856617243272927, + "grad_norm": 0.3987579047679901, + "learning_rate": 1.2257739187409326e-05, + "loss": 0.4852, + "step": 31198 + }, + { + "epoch": 0.8566447007138934, + "grad_norm": 0.37753841280937195, + "learning_rate": 1.2257318443501585e-05, + "loss": 0.5095, + "step": 31199 + }, + { + "epoch": 0.85667215815486, + "grad_norm": 0.4072312116622925, + "learning_rate": 1.2256897695383226e-05, + "loss": 0.5053, + "step": 31200 + }, + { + "epoch": 0.8566996155958264, + "grad_norm": 0.3973519504070282, + "learning_rate": 1.2256476943055031e-05, + "loss": 0.4955, + "step": 31201 + }, + { + "epoch": 0.856727073036793, + "grad_norm": 0.4375503957271576, + "learning_rate": 1.2256056186517789e-05, + "loss": 0.5388, + "step": 31202 + }, + { + "epoch": 0.8567545304777595, + "grad_norm": 0.3835250735282898, + "learning_rate": 1.225563542577228e-05, + "loss": 0.5234, + "step": 31203 + }, + { + "epoch": 0.856781987918726, + "grad_norm": 1.0030382871627808, + "learning_rate": 1.2255214660819292e-05, + "loss": 0.4914, + "step": 31204 + }, + { + "epoch": 0.8568094453596925, + "grad_norm": 0.40137964487075806, + "learning_rate": 1.2254793891659609e-05, + "loss": 0.5624, + "step": 31205 + }, + { + "epoch": 0.8568369028006589, + "grad_norm": 0.36756718158721924, + "learning_rate": 1.2254373118294018e-05, + "loss": 0.5007, + "step": 31206 + }, + { + "epoch": 0.8568643602416255, + "grad_norm": 0.3762566149234772, + "learning_rate": 1.2253952340723296e-05, + "loss": 0.488, + "step": 31207 + }, + { + "epoch": 0.8568918176825919, + "grad_norm": 0.6128677129745483, + "learning_rate": 1.225353155894824e-05, + "loss": 0.503, + "step": 31208 + }, + { + "epoch": 0.8569192751235585, + "grad_norm": 0.44457894563674927, + "learning_rate": 1.2253110772969623e-05, + "loss": 0.5079, + "step": 31209 + }, + { + "epoch": 0.856946732564525, + "grad_norm": 0.3715469539165497, + "learning_rate": 1.2252689982788238e-05, + "loss": 0.4334, + "step": 31210 + }, + { + "epoch": 0.8569741900054915, + "grad_norm": 0.45603036880493164, + "learning_rate": 1.2252269188404866e-05, + "loss": 0.4049, + "step": 31211 + }, + { + "epoch": 0.857001647446458, + "grad_norm": 0.5046284794807434, + "learning_rate": 1.2251848389820293e-05, + "loss": 0.5554, + "step": 31212 + }, + { + "epoch": 0.8570291048874245, + "grad_norm": 0.3989475667476654, + "learning_rate": 1.2251427587035305e-05, + "loss": 0.5416, + "step": 31213 + }, + { + "epoch": 0.857056562328391, + "grad_norm": 0.43352210521698, + "learning_rate": 1.2251006780050684e-05, + "loss": 0.4883, + "step": 31214 + }, + { + "epoch": 0.8570840197693574, + "grad_norm": 0.4373731315135956, + "learning_rate": 1.2250585968867222e-05, + "loss": 0.5661, + "step": 31215 + }, + { + "epoch": 0.857111477210324, + "grad_norm": 0.4560258686542511, + "learning_rate": 1.2250165153485696e-05, + "loss": 0.5817, + "step": 31216 + }, + { + "epoch": 0.8571389346512905, + "grad_norm": 0.399280309677124, + "learning_rate": 1.2249744333906892e-05, + "loss": 0.5201, + "step": 31217 + }, + { + "epoch": 0.857166392092257, + "grad_norm": 0.40258800983428955, + "learning_rate": 1.2249323510131599e-05, + "loss": 0.5231, + "step": 31218 + }, + { + "epoch": 0.8571938495332235, + "grad_norm": 0.37941837310791016, + "learning_rate": 1.2248902682160597e-05, + "loss": 0.4857, + "step": 31219 + }, + { + "epoch": 0.85722130697419, + "grad_norm": 0.416501522064209, + "learning_rate": 1.2248481849994675e-05, + "loss": 0.4703, + "step": 31220 + }, + { + "epoch": 0.8572487644151565, + "grad_norm": 0.3670039474964142, + "learning_rate": 1.2248061013634619e-05, + "loss": 0.4211, + "step": 31221 + }, + { + "epoch": 0.857276221856123, + "grad_norm": 0.3630046248435974, + "learning_rate": 1.2247640173081207e-05, + "loss": 0.4684, + "step": 31222 + }, + { + "epoch": 0.8573036792970895, + "grad_norm": 0.3955976366996765, + "learning_rate": 1.2247219328335233e-05, + "loss": 0.5305, + "step": 31223 + }, + { + "epoch": 0.857331136738056, + "grad_norm": 0.403542697429657, + "learning_rate": 1.2246798479397477e-05, + "loss": 0.4957, + "step": 31224 + }, + { + "epoch": 0.8573585941790225, + "grad_norm": 0.41247448325157166, + "learning_rate": 1.2246377626268725e-05, + "loss": 0.5191, + "step": 31225 + }, + { + "epoch": 0.857386051619989, + "grad_norm": 0.44509851932525635, + "learning_rate": 1.2245956768949761e-05, + "loss": 0.4993, + "step": 31226 + }, + { + "epoch": 0.8574135090609555, + "grad_norm": 0.363427996635437, + "learning_rate": 1.2245535907441371e-05, + "loss": 0.4672, + "step": 31227 + }, + { + "epoch": 0.857440966501922, + "grad_norm": 0.42820101976394653, + "learning_rate": 1.224511504174434e-05, + "loss": 0.4713, + "step": 31228 + }, + { + "epoch": 0.8574684239428885, + "grad_norm": 0.41293075680732727, + "learning_rate": 1.224469417185945e-05, + "loss": 0.474, + "step": 31229 + }, + { + "epoch": 0.857495881383855, + "grad_norm": 0.391203373670578, + "learning_rate": 1.2244273297787492e-05, + "loss": 0.5106, + "step": 31230 + }, + { + "epoch": 0.8575233388248216, + "grad_norm": 0.4602276384830475, + "learning_rate": 1.2243852419529247e-05, + "loss": 0.4738, + "step": 31231 + }, + { + "epoch": 0.857550796265788, + "grad_norm": 0.39129823446273804, + "learning_rate": 1.2243431537085501e-05, + "loss": 0.3832, + "step": 31232 + }, + { + "epoch": 0.8575782537067546, + "grad_norm": 0.4594971835613251, + "learning_rate": 1.2243010650457041e-05, + "loss": 0.5376, + "step": 31233 + }, + { + "epoch": 0.857605711147721, + "grad_norm": 0.36841881275177, + "learning_rate": 1.2242589759644648e-05, + "loss": 0.438, + "step": 31234 + }, + { + "epoch": 0.8576331685886875, + "grad_norm": 0.3976348340511322, + "learning_rate": 1.2242168864649112e-05, + "loss": 0.4897, + "step": 31235 + }, + { + "epoch": 0.857660626029654, + "grad_norm": 0.3530547618865967, + "learning_rate": 1.2241747965471211e-05, + "loss": 0.4706, + "step": 31236 + }, + { + "epoch": 0.8576880834706205, + "grad_norm": 0.3832060396671295, + "learning_rate": 1.2241327062111738e-05, + "loss": 0.4591, + "step": 31237 + }, + { + "epoch": 0.8577155409115871, + "grad_norm": 0.44456860423088074, + "learning_rate": 1.2240906154571475e-05, + "loss": 0.5115, + "step": 31238 + }, + { + "epoch": 0.8577429983525535, + "grad_norm": 0.38424327969551086, + "learning_rate": 1.2240485242851205e-05, + "loss": 0.4642, + "step": 31239 + }, + { + "epoch": 0.8577704557935201, + "grad_norm": 0.40051573514938354, + "learning_rate": 1.2240064326951717e-05, + "loss": 0.5379, + "step": 31240 + }, + { + "epoch": 0.8577979132344865, + "grad_norm": 0.40379762649536133, + "learning_rate": 1.223964340687379e-05, + "loss": 0.5596, + "step": 31241 + }, + { + "epoch": 0.8578253706754531, + "grad_norm": 0.31290313601493835, + "learning_rate": 1.2239222482618218e-05, + "loss": 0.364, + "step": 31242 + }, + { + "epoch": 0.8578528281164195, + "grad_norm": 0.34535205364227295, + "learning_rate": 1.223880155418578e-05, + "loss": 0.4865, + "step": 31243 + }, + { + "epoch": 0.857880285557386, + "grad_norm": 0.5482646226882935, + "learning_rate": 1.2238380621577261e-05, + "loss": 0.4985, + "step": 31244 + }, + { + "epoch": 0.8579077429983526, + "grad_norm": 0.5475465655326843, + "learning_rate": 1.223795968479345e-05, + "loss": 0.4205, + "step": 31245 + }, + { + "epoch": 0.857935200439319, + "grad_norm": 0.40413138270378113, + "learning_rate": 1.2237538743835127e-05, + "loss": 0.4974, + "step": 31246 + }, + { + "epoch": 0.8579626578802856, + "grad_norm": 0.379414826631546, + "learning_rate": 1.2237117798703082e-05, + "loss": 0.4697, + "step": 31247 + }, + { + "epoch": 0.857990115321252, + "grad_norm": 0.36180299520492554, + "learning_rate": 1.2236696849398097e-05, + "loss": 0.4682, + "step": 31248 + }, + { + "epoch": 0.8580175727622186, + "grad_norm": 0.389423131942749, + "learning_rate": 1.2236275895920962e-05, + "loss": 0.4728, + "step": 31249 + }, + { + "epoch": 0.858045030203185, + "grad_norm": 0.4613798260688782, + "learning_rate": 1.2235854938272456e-05, + "loss": 0.5228, + "step": 31250 + }, + { + "epoch": 0.8580724876441516, + "grad_norm": 0.41690778732299805, + "learning_rate": 1.2235433976453366e-05, + "loss": 0.6017, + "step": 31251 + }, + { + "epoch": 0.8580999450851181, + "grad_norm": 0.4271576404571533, + "learning_rate": 1.223501301046448e-05, + "loss": 0.4387, + "step": 31252 + }, + { + "epoch": 0.8581274025260845, + "grad_norm": 0.41105902194976807, + "learning_rate": 1.2234592040306579e-05, + "loss": 0.5235, + "step": 31253 + }, + { + "epoch": 0.8581548599670511, + "grad_norm": 0.4484356641769409, + "learning_rate": 1.2234171065980454e-05, + "loss": 0.4543, + "step": 31254 + }, + { + "epoch": 0.8581823174080175, + "grad_norm": 0.3563523292541504, + "learning_rate": 1.2233750087486887e-05, + "loss": 0.4927, + "step": 31255 + }, + { + "epoch": 0.8582097748489841, + "grad_norm": 0.32308751344680786, + "learning_rate": 1.223332910482666e-05, + "loss": 0.4211, + "step": 31256 + }, + { + "epoch": 0.8582372322899505, + "grad_norm": 0.3844435513019562, + "learning_rate": 1.2232908118000564e-05, + "loss": 0.5124, + "step": 31257 + }, + { + "epoch": 0.8582646897309171, + "grad_norm": 0.42347580194473267, + "learning_rate": 1.2232487127009378e-05, + "loss": 0.5857, + "step": 31258 + }, + { + "epoch": 0.8582921471718836, + "grad_norm": 0.40339529514312744, + "learning_rate": 1.2232066131853895e-05, + "loss": 0.5006, + "step": 31259 + }, + { + "epoch": 0.8583196046128501, + "grad_norm": 0.41514450311660767, + "learning_rate": 1.2231645132534894e-05, + "loss": 0.5127, + "step": 31260 + }, + { + "epoch": 0.8583470620538166, + "grad_norm": 0.3713267743587494, + "learning_rate": 1.2231224129053164e-05, + "loss": 0.4703, + "step": 31261 + }, + { + "epoch": 0.858374519494783, + "grad_norm": 0.41198885440826416, + "learning_rate": 1.223080312140949e-05, + "loss": 0.481, + "step": 31262 + }, + { + "epoch": 0.8584019769357496, + "grad_norm": 0.39830541610717773, + "learning_rate": 1.223038210960465e-05, + "loss": 0.5438, + "step": 31263 + }, + { + "epoch": 0.858429434376716, + "grad_norm": 0.3979830741882324, + "learning_rate": 1.2229961093639441e-05, + "loss": 0.4577, + "step": 31264 + }, + { + "epoch": 0.8584568918176826, + "grad_norm": 0.38529831171035767, + "learning_rate": 1.2229540073514645e-05, + "loss": 0.4845, + "step": 31265 + }, + { + "epoch": 0.8584843492586491, + "grad_norm": 0.3619195222854614, + "learning_rate": 1.222911904923104e-05, + "loss": 0.4505, + "step": 31266 + }, + { + "epoch": 0.8585118066996156, + "grad_norm": 0.44146260619163513, + "learning_rate": 1.222869802078942e-05, + "loss": 0.5437, + "step": 31267 + }, + { + "epoch": 0.8585392641405821, + "grad_norm": 0.4120608866214752, + "learning_rate": 1.2228276988190567e-05, + "loss": 0.4967, + "step": 31268 + }, + { + "epoch": 0.8585667215815486, + "grad_norm": 0.38189375400543213, + "learning_rate": 1.222785595143526e-05, + "loss": 0.5191, + "step": 31269 + }, + { + "epoch": 0.8585941790225151, + "grad_norm": 0.3514060974121094, + "learning_rate": 1.2227434910524298e-05, + "loss": 0.4154, + "step": 31270 + }, + { + "epoch": 0.8586216364634816, + "grad_norm": 0.38597914576530457, + "learning_rate": 1.2227013865458456e-05, + "loss": 0.4802, + "step": 31271 + }, + { + "epoch": 0.8586490939044481, + "grad_norm": 0.4151039123535156, + "learning_rate": 1.2226592816238523e-05, + "loss": 0.5752, + "step": 31272 + }, + { + "epoch": 0.8586765513454147, + "grad_norm": 0.38063693046569824, + "learning_rate": 1.2226171762865285e-05, + "loss": 0.4466, + "step": 31273 + }, + { + "epoch": 0.8587040087863811, + "grad_norm": 0.4031619727611542, + "learning_rate": 1.2225750705339521e-05, + "loss": 0.4795, + "step": 31274 + }, + { + "epoch": 0.8587314662273476, + "grad_norm": 0.4467563033103943, + "learning_rate": 1.2225329643662028e-05, + "loss": 0.4599, + "step": 31275 + }, + { + "epoch": 0.8587589236683141, + "grad_norm": 0.41717079281806946, + "learning_rate": 1.2224908577833578e-05, + "loss": 0.4782, + "step": 31276 + }, + { + "epoch": 0.8587863811092806, + "grad_norm": 0.3691525459289551, + "learning_rate": 1.2224487507854969e-05, + "loss": 0.472, + "step": 31277 + }, + { + "epoch": 0.8588138385502471, + "grad_norm": 0.42044100165367126, + "learning_rate": 1.222406643372698e-05, + "loss": 0.5196, + "step": 31278 + }, + { + "epoch": 0.8588412959912136, + "grad_norm": 0.44205033779144287, + "learning_rate": 1.2223645355450396e-05, + "loss": 0.5333, + "step": 31279 + }, + { + "epoch": 0.8588687534321802, + "grad_norm": 0.4392353892326355, + "learning_rate": 1.2223224273026004e-05, + "loss": 0.493, + "step": 31280 + }, + { + "epoch": 0.8588962108731466, + "grad_norm": 0.388270765542984, + "learning_rate": 1.222280318645459e-05, + "loss": 0.4187, + "step": 31281 + }, + { + "epoch": 0.8589236683141132, + "grad_norm": 0.4832018315792084, + "learning_rate": 1.2222382095736938e-05, + "loss": 0.5345, + "step": 31282 + }, + { + "epoch": 0.8589511257550796, + "grad_norm": 0.554000198841095, + "learning_rate": 1.2221961000873834e-05, + "loss": 0.502, + "step": 31283 + }, + { + "epoch": 0.8589785831960461, + "grad_norm": 0.3446379601955414, + "learning_rate": 1.2221539901866062e-05, + "loss": 0.4467, + "step": 31284 + }, + { + "epoch": 0.8590060406370126, + "grad_norm": 0.4168390929698944, + "learning_rate": 1.2221118798714413e-05, + "loss": 0.5244, + "step": 31285 + }, + { + "epoch": 0.8590334980779791, + "grad_norm": 0.4153856933116913, + "learning_rate": 1.2220697691419667e-05, + "loss": 0.5098, + "step": 31286 + }, + { + "epoch": 0.8590609555189457, + "grad_norm": 0.3888363838195801, + "learning_rate": 1.2220276579982609e-05, + "loss": 0.555, + "step": 31287 + }, + { + "epoch": 0.8590884129599121, + "grad_norm": 0.371083527803421, + "learning_rate": 1.221985546440403e-05, + "loss": 0.5489, + "step": 31288 + }, + { + "epoch": 0.8591158704008787, + "grad_norm": 0.3912227153778076, + "learning_rate": 1.2219434344684706e-05, + "loss": 0.4737, + "step": 31289 + }, + { + "epoch": 0.8591433278418451, + "grad_norm": 0.41111046075820923, + "learning_rate": 1.2219013220825433e-05, + "loss": 0.4033, + "step": 31290 + }, + { + "epoch": 0.8591707852828117, + "grad_norm": 0.4009310305118561, + "learning_rate": 1.2218592092826993e-05, + "loss": 0.4607, + "step": 31291 + }, + { + "epoch": 0.8591982427237781, + "grad_norm": 0.46475574374198914, + "learning_rate": 1.2218170960690168e-05, + "loss": 0.5552, + "step": 31292 + }, + { + "epoch": 0.8592257001647446, + "grad_norm": 0.47291719913482666, + "learning_rate": 1.2217749824415749e-05, + "loss": 0.471, + "step": 31293 + }, + { + "epoch": 0.8592531576057112, + "grad_norm": 0.39113548398017883, + "learning_rate": 1.2217328684004515e-05, + "loss": 0.4596, + "step": 31294 + }, + { + "epoch": 0.8592806150466776, + "grad_norm": 0.3748525381088257, + "learning_rate": 1.2216907539457258e-05, + "loss": 0.4591, + "step": 31295 + }, + { + "epoch": 0.8593080724876442, + "grad_norm": 0.46413442492485046, + "learning_rate": 1.2216486390774761e-05, + "loss": 0.4306, + "step": 31296 + }, + { + "epoch": 0.8593355299286106, + "grad_norm": 0.3811872899532318, + "learning_rate": 1.2216065237957809e-05, + "loss": 0.4727, + "step": 31297 + }, + { + "epoch": 0.8593629873695772, + "grad_norm": 0.4518660306930542, + "learning_rate": 1.2215644081007188e-05, + "loss": 0.558, + "step": 31298 + }, + { + "epoch": 0.8593904448105436, + "grad_norm": 0.36762717366218567, + "learning_rate": 1.2215222919923685e-05, + "loss": 0.5163, + "step": 31299 + }, + { + "epoch": 0.8594179022515102, + "grad_norm": 0.3962073028087616, + "learning_rate": 1.221480175470808e-05, + "loss": 0.4161, + "step": 31300 + }, + { + "epoch": 0.8594453596924767, + "grad_norm": 0.8189207911491394, + "learning_rate": 1.2214380585361168e-05, + "loss": 0.4844, + "step": 31301 + }, + { + "epoch": 0.8594728171334431, + "grad_norm": 0.4133876860141754, + "learning_rate": 1.2213959411883724e-05, + "loss": 0.6187, + "step": 31302 + }, + { + "epoch": 0.8595002745744097, + "grad_norm": 0.4427950084209442, + "learning_rate": 1.2213538234276545e-05, + "loss": 0.47, + "step": 31303 + }, + { + "epoch": 0.8595277320153761, + "grad_norm": 0.3886093497276306, + "learning_rate": 1.2213117052540408e-05, + "loss": 0.4609, + "step": 31304 + }, + { + "epoch": 0.8595551894563427, + "grad_norm": 0.38039782643318176, + "learning_rate": 1.2212695866676101e-05, + "loss": 0.4995, + "step": 31305 + }, + { + "epoch": 0.8595826468973091, + "grad_norm": 0.3966236412525177, + "learning_rate": 1.221227467668441e-05, + "loss": 0.4827, + "step": 31306 + }, + { + "epoch": 0.8596101043382757, + "grad_norm": 0.4804043769836426, + "learning_rate": 1.2211853482566119e-05, + "loss": 0.5205, + "step": 31307 + }, + { + "epoch": 0.8596375617792422, + "grad_norm": 0.4277108609676361, + "learning_rate": 1.2211432284322019e-05, + "loss": 0.4637, + "step": 31308 + }, + { + "epoch": 0.8596650192202087, + "grad_norm": 0.3633856177330017, + "learning_rate": 1.2211011081952891e-05, + "loss": 0.4768, + "step": 31309 + }, + { + "epoch": 0.8596924766611752, + "grad_norm": 0.5255773067474365, + "learning_rate": 1.221058987545952e-05, + "loss": 0.5372, + "step": 31310 + }, + { + "epoch": 0.8597199341021416, + "grad_norm": 0.4376133680343628, + "learning_rate": 1.2210168664842694e-05, + "loss": 0.5048, + "step": 31311 + }, + { + "epoch": 0.8597473915431082, + "grad_norm": 0.4208686351776123, + "learning_rate": 1.2209747450103198e-05, + "loss": 0.5823, + "step": 31312 + }, + { + "epoch": 0.8597748489840746, + "grad_norm": 0.4360350966453552, + "learning_rate": 1.2209326231241819e-05, + "loss": 0.468, + "step": 31313 + }, + { + "epoch": 0.8598023064250412, + "grad_norm": 0.3907027840614319, + "learning_rate": 1.220890500825934e-05, + "loss": 0.5034, + "step": 31314 + }, + { + "epoch": 0.8598297638660077, + "grad_norm": 0.407071053981781, + "learning_rate": 1.2208483781156546e-05, + "loss": 0.5227, + "step": 31315 + }, + { + "epoch": 0.8598572213069742, + "grad_norm": 0.4122464656829834, + "learning_rate": 1.220806254993423e-05, + "loss": 0.4603, + "step": 31316 + }, + { + "epoch": 0.8598846787479407, + "grad_norm": 0.3701207637786865, + "learning_rate": 1.220764131459317e-05, + "loss": 0.4747, + "step": 31317 + }, + { + "epoch": 0.8599121361889072, + "grad_norm": 0.41987308859825134, + "learning_rate": 1.2207220075134154e-05, + "loss": 0.4214, + "step": 31318 + }, + { + "epoch": 0.8599395936298737, + "grad_norm": 0.44811657071113586, + "learning_rate": 1.220679883155797e-05, + "loss": 0.5715, + "step": 31319 + }, + { + "epoch": 0.8599670510708401, + "grad_norm": 0.4045025706291199, + "learning_rate": 1.2206377583865399e-05, + "loss": 0.4895, + "step": 31320 + }, + { + "epoch": 0.8599945085118067, + "grad_norm": 0.3782580494880676, + "learning_rate": 1.220595633205723e-05, + "loss": 0.4271, + "step": 31321 + }, + { + "epoch": 0.8600219659527732, + "grad_norm": 0.490177720785141, + "learning_rate": 1.220553507613425e-05, + "loss": 0.606, + "step": 31322 + }, + { + "epoch": 0.8600494233937397, + "grad_norm": 0.359328031539917, + "learning_rate": 1.2205113816097242e-05, + "loss": 0.4521, + "step": 31323 + }, + { + "epoch": 0.8600768808347062, + "grad_norm": 0.409210741519928, + "learning_rate": 1.2204692551946996e-05, + "loss": 0.5573, + "step": 31324 + }, + { + "epoch": 0.8601043382756727, + "grad_norm": 0.36547163128852844, + "learning_rate": 1.2204271283684289e-05, + "loss": 0.4901, + "step": 31325 + }, + { + "epoch": 0.8601317957166392, + "grad_norm": 0.3905768096446991, + "learning_rate": 1.2203850011309916e-05, + "loss": 0.5263, + "step": 31326 + }, + { + "epoch": 0.8601592531576057, + "grad_norm": 0.36346250772476196, + "learning_rate": 1.220342873482466e-05, + "loss": 0.488, + "step": 31327 + }, + { + "epoch": 0.8601867105985722, + "grad_norm": 0.3605482876300812, + "learning_rate": 1.2203007454229305e-05, + "loss": 0.5228, + "step": 31328 + }, + { + "epoch": 0.8602141680395388, + "grad_norm": 0.4963330328464508, + "learning_rate": 1.2202586169524638e-05, + "loss": 0.528, + "step": 31329 + }, + { + "epoch": 0.8602416254805052, + "grad_norm": 0.37144699692726135, + "learning_rate": 1.2202164880711446e-05, + "loss": 0.458, + "step": 31330 + }, + { + "epoch": 0.8602690829214718, + "grad_norm": 0.3776478171348572, + "learning_rate": 1.220174358779051e-05, + "loss": 0.542, + "step": 31331 + }, + { + "epoch": 0.8602965403624382, + "grad_norm": 0.3982198238372803, + "learning_rate": 1.2201322290762624e-05, + "loss": 0.4385, + "step": 31332 + }, + { + "epoch": 0.8603239978034047, + "grad_norm": 0.3848790228366852, + "learning_rate": 1.2200900989628566e-05, + "loss": 0.4645, + "step": 31333 + }, + { + "epoch": 0.8603514552443712, + "grad_norm": 0.4136074483394623, + "learning_rate": 1.2200479684389129e-05, + "loss": 0.5867, + "step": 31334 + }, + { + "epoch": 0.8603789126853377, + "grad_norm": 0.3775768280029297, + "learning_rate": 1.2200058375045092e-05, + "loss": 0.4721, + "step": 31335 + }, + { + "epoch": 0.8604063701263043, + "grad_norm": 0.3791636824607849, + "learning_rate": 1.2199637061597244e-05, + "loss": 0.4867, + "step": 31336 + }, + { + "epoch": 0.8604338275672707, + "grad_norm": 0.3575994670391083, + "learning_rate": 1.2199215744046373e-05, + "loss": 0.4971, + "step": 31337 + }, + { + "epoch": 0.8604612850082373, + "grad_norm": 0.44650033116340637, + "learning_rate": 1.2198794422393258e-05, + "loss": 0.4744, + "step": 31338 + }, + { + "epoch": 0.8604887424492037, + "grad_norm": 0.3909582197666168, + "learning_rate": 1.2198373096638697e-05, + "loss": 0.5157, + "step": 31339 + }, + { + "epoch": 0.8605161998901703, + "grad_norm": 0.4205089807510376, + "learning_rate": 1.2197951766783462e-05, + "loss": 0.5235, + "step": 31340 + }, + { + "epoch": 0.8605436573311367, + "grad_norm": 0.437282532453537, + "learning_rate": 1.2197530432828349e-05, + "loss": 0.4998, + "step": 31341 + }, + { + "epoch": 0.8605711147721032, + "grad_norm": 0.4647047221660614, + "learning_rate": 1.219710909477414e-05, + "loss": 0.5361, + "step": 31342 + }, + { + "epoch": 0.8605985722130698, + "grad_norm": 0.37779924273490906, + "learning_rate": 1.2196687752621619e-05, + "loss": 0.5709, + "step": 31343 + }, + { + "epoch": 0.8606260296540362, + "grad_norm": 0.4236941933631897, + "learning_rate": 1.2196266406371575e-05, + "loss": 0.4425, + "step": 31344 + }, + { + "epoch": 0.8606534870950028, + "grad_norm": 0.3945387601852417, + "learning_rate": 1.2195845056024796e-05, + "loss": 0.4371, + "step": 31345 + }, + { + "epoch": 0.8606809445359692, + "grad_norm": 0.37782523036003113, + "learning_rate": 1.219542370158206e-05, + "loss": 0.5279, + "step": 31346 + }, + { + "epoch": 0.8607084019769358, + "grad_norm": 0.4053337574005127, + "learning_rate": 1.2195002343044164e-05, + "loss": 0.4945, + "step": 31347 + }, + { + "epoch": 0.8607358594179022, + "grad_norm": 0.39608192443847656, + "learning_rate": 1.2194580980411885e-05, + "loss": 0.5008, + "step": 31348 + }, + { + "epoch": 0.8607633168588688, + "grad_norm": 0.3938605487346649, + "learning_rate": 1.2194159613686011e-05, + "loss": 0.5555, + "step": 31349 + }, + { + "epoch": 0.8607907742998353, + "grad_norm": 0.43662217259407043, + "learning_rate": 1.2193738242867332e-05, + "loss": 0.62, + "step": 31350 + }, + { + "epoch": 0.8608182317408017, + "grad_norm": 0.41826504468917847, + "learning_rate": 1.2193316867956629e-05, + "loss": 0.4945, + "step": 31351 + }, + { + "epoch": 0.8608456891817683, + "grad_norm": 0.40584608912467957, + "learning_rate": 1.219289548895469e-05, + "loss": 0.4378, + "step": 31352 + }, + { + "epoch": 0.8608731466227347, + "grad_norm": 0.3724440932273865, + "learning_rate": 1.2192474105862303e-05, + "loss": 0.4985, + "step": 31353 + }, + { + "epoch": 0.8609006040637013, + "grad_norm": 0.37889915704727173, + "learning_rate": 1.2192052718680249e-05, + "loss": 0.4969, + "step": 31354 + }, + { + "epoch": 0.8609280615046677, + "grad_norm": 0.4117904305458069, + "learning_rate": 1.2191631327409319e-05, + "loss": 0.5228, + "step": 31355 + }, + { + "epoch": 0.8609555189456343, + "grad_norm": 0.3804273307323456, + "learning_rate": 1.2191209932050297e-05, + "loss": 0.5143, + "step": 31356 + }, + { + "epoch": 0.8609829763866008, + "grad_norm": 0.3504452407360077, + "learning_rate": 1.2190788532603967e-05, + "loss": 0.427, + "step": 31357 + }, + { + "epoch": 0.8610104338275673, + "grad_norm": 0.7245714068412781, + "learning_rate": 1.2190367129071122e-05, + "loss": 0.3867, + "step": 31358 + }, + { + "epoch": 0.8610378912685338, + "grad_norm": 0.38244062662124634, + "learning_rate": 1.2189945721452538e-05, + "loss": 0.4892, + "step": 31359 + }, + { + "epoch": 0.8610653487095002, + "grad_norm": 0.4037846624851227, + "learning_rate": 1.218952430974901e-05, + "loss": 0.5373, + "step": 31360 + }, + { + "epoch": 0.8610928061504668, + "grad_norm": 0.38630884885787964, + "learning_rate": 1.2189102893961317e-05, + "loss": 0.6379, + "step": 31361 + }, + { + "epoch": 0.8611202635914332, + "grad_norm": 1.0298833847045898, + "learning_rate": 1.2188681474090252e-05, + "loss": 0.5322, + "step": 31362 + }, + { + "epoch": 0.8611477210323998, + "grad_norm": 0.44588354229927063, + "learning_rate": 1.2188260050136595e-05, + "loss": 0.56, + "step": 31363 + }, + { + "epoch": 0.8611751784733663, + "grad_norm": 0.35238221287727356, + "learning_rate": 1.2187838622101133e-05, + "loss": 0.5407, + "step": 31364 + }, + { + "epoch": 0.8612026359143328, + "grad_norm": 0.4253321588039398, + "learning_rate": 1.2187417189984658e-05, + "loss": 0.5041, + "step": 31365 + }, + { + "epoch": 0.8612300933552993, + "grad_norm": 0.32900747656822205, + "learning_rate": 1.2186995753787949e-05, + "loss": 0.4104, + "step": 31366 + }, + { + "epoch": 0.8612575507962658, + "grad_norm": 0.38761138916015625, + "learning_rate": 1.2186574313511797e-05, + "loss": 0.4695, + "step": 31367 + }, + { + "epoch": 0.8612850082372323, + "grad_norm": 0.3965902328491211, + "learning_rate": 1.2186152869156984e-05, + "loss": 0.4557, + "step": 31368 + }, + { + "epoch": 0.8613124656781987, + "grad_norm": 0.3813440799713135, + "learning_rate": 1.2185731420724298e-05, + "loss": 0.4777, + "step": 31369 + }, + { + "epoch": 0.8613399231191653, + "grad_norm": 0.433554083108902, + "learning_rate": 1.2185309968214527e-05, + "loss": 0.5805, + "step": 31370 + }, + { + "epoch": 0.8613673805601318, + "grad_norm": 0.38253292441368103, + "learning_rate": 1.2184888511628454e-05, + "loss": 0.4606, + "step": 31371 + }, + { + "epoch": 0.8613948380010983, + "grad_norm": 0.4328339993953705, + "learning_rate": 1.2184467050966867e-05, + "loss": 0.4754, + "step": 31372 + }, + { + "epoch": 0.8614222954420648, + "grad_norm": 0.4179971516132355, + "learning_rate": 1.2184045586230553e-05, + "loss": 0.4852, + "step": 31373 + }, + { + "epoch": 0.8614497528830313, + "grad_norm": 0.4774787127971649, + "learning_rate": 1.2183624117420293e-05, + "loss": 0.5838, + "step": 31374 + }, + { + "epoch": 0.8614772103239978, + "grad_norm": 0.3898458480834961, + "learning_rate": 1.2183202644536883e-05, + "loss": 0.417, + "step": 31375 + }, + { + "epoch": 0.8615046677649643, + "grad_norm": 0.3487231135368347, + "learning_rate": 1.21827811675811e-05, + "loss": 0.4454, + "step": 31376 + }, + { + "epoch": 0.8615321252059308, + "grad_norm": 0.36818331480026245, + "learning_rate": 1.2182359686553733e-05, + "loss": 0.4492, + "step": 31377 + }, + { + "epoch": 0.8615595826468974, + "grad_norm": 0.3885168433189392, + "learning_rate": 1.218193820145557e-05, + "loss": 0.5046, + "step": 31378 + }, + { + "epoch": 0.8615870400878638, + "grad_norm": 0.36676642298698425, + "learning_rate": 1.2181516712287396e-05, + "loss": 0.474, + "step": 31379 + }, + { + "epoch": 0.8616144975288303, + "grad_norm": 0.477789044380188, + "learning_rate": 1.2181095219049993e-05, + "loss": 0.5248, + "step": 31380 + }, + { + "epoch": 0.8616419549697968, + "grad_norm": 0.4089867174625397, + "learning_rate": 1.2180673721744157e-05, + "loss": 0.5086, + "step": 31381 + }, + { + "epoch": 0.8616694124107633, + "grad_norm": 0.38655516505241394, + "learning_rate": 1.2180252220370666e-05, + "loss": 0.5095, + "step": 31382 + }, + { + "epoch": 0.8616968698517298, + "grad_norm": 0.38183602690696716, + "learning_rate": 1.2179830714930309e-05, + "loss": 0.4519, + "step": 31383 + }, + { + "epoch": 0.8617243272926963, + "grad_norm": 0.4732474088668823, + "learning_rate": 1.2179409205423874e-05, + "loss": 0.4665, + "step": 31384 + }, + { + "epoch": 0.8617517847336629, + "grad_norm": 0.4348851144313812, + "learning_rate": 1.217898769185214e-05, + "loss": 0.5202, + "step": 31385 + }, + { + "epoch": 0.8617792421746293, + "grad_norm": 0.3864341974258423, + "learning_rate": 1.2178566174215903e-05, + "loss": 0.5081, + "step": 31386 + }, + { + "epoch": 0.8618066996155959, + "grad_norm": 0.8861841559410095, + "learning_rate": 1.2178144652515942e-05, + "loss": 0.6036, + "step": 31387 + }, + { + "epoch": 0.8618341570565623, + "grad_norm": 0.3373664617538452, + "learning_rate": 1.2177723126753048e-05, + "loss": 0.5018, + "step": 31388 + }, + { + "epoch": 0.8618616144975288, + "grad_norm": 0.3849511742591858, + "learning_rate": 1.2177301596928005e-05, + "loss": 0.46, + "step": 31389 + }, + { + "epoch": 0.8618890719384953, + "grad_norm": 0.39529773592948914, + "learning_rate": 1.2176880063041598e-05, + "loss": 0.4532, + "step": 31390 + }, + { + "epoch": 0.8619165293794618, + "grad_norm": 0.3211962878704071, + "learning_rate": 1.2176458525094617e-05, + "loss": 0.4742, + "step": 31391 + }, + { + "epoch": 0.8619439868204284, + "grad_norm": 0.43557047843933105, + "learning_rate": 1.2176036983087847e-05, + "loss": 0.5279, + "step": 31392 + }, + { + "epoch": 0.8619714442613948, + "grad_norm": 0.44129088521003723, + "learning_rate": 1.217561543702207e-05, + "loss": 0.555, + "step": 31393 + }, + { + "epoch": 0.8619989017023614, + "grad_norm": 0.40078625082969666, + "learning_rate": 1.2175193886898078e-05, + "loss": 0.4874, + "step": 31394 + }, + { + "epoch": 0.8620263591433278, + "grad_norm": 0.3765333592891693, + "learning_rate": 1.2174772332716653e-05, + "loss": 0.4652, + "step": 31395 + }, + { + "epoch": 0.8620538165842944, + "grad_norm": 0.4323311150074005, + "learning_rate": 1.2174350774478587e-05, + "loss": 0.4366, + "step": 31396 + }, + { + "epoch": 0.8620812740252608, + "grad_norm": 0.378772109746933, + "learning_rate": 1.2173929212184663e-05, + "loss": 0.5, + "step": 31397 + }, + { + "epoch": 0.8621087314662274, + "grad_norm": 0.36059001088142395, + "learning_rate": 1.2173507645835663e-05, + "loss": 0.4136, + "step": 31398 + }, + { + "epoch": 0.8621361889071939, + "grad_norm": 0.37629786133766174, + "learning_rate": 1.217308607543238e-05, + "loss": 0.5004, + "step": 31399 + }, + { + "epoch": 0.8621636463481603, + "grad_norm": 0.41888654232025146, + "learning_rate": 1.2172664500975598e-05, + "loss": 0.4398, + "step": 31400 + }, + { + "epoch": 0.8621911037891269, + "grad_norm": 0.44094330072402954, + "learning_rate": 1.2172242922466105e-05, + "loss": 0.5817, + "step": 31401 + }, + { + "epoch": 0.8622185612300933, + "grad_norm": 0.43093687295913696, + "learning_rate": 1.2171821339904684e-05, + "loss": 0.5633, + "step": 31402 + }, + { + "epoch": 0.8622460186710599, + "grad_norm": 0.3847319483757019, + "learning_rate": 1.2171399753292122e-05, + "loss": 0.4926, + "step": 31403 + }, + { + "epoch": 0.8622734761120263, + "grad_norm": 0.3456861972808838, + "learning_rate": 1.2170978162629208e-05, + "loss": 0.482, + "step": 31404 + }, + { + "epoch": 0.8623009335529929, + "grad_norm": 0.4453994333744049, + "learning_rate": 1.2170556567916726e-05, + "loss": 0.4596, + "step": 31405 + }, + { + "epoch": 0.8623283909939594, + "grad_norm": 0.43211716413497925, + "learning_rate": 1.2170134969155463e-05, + "loss": 0.5339, + "step": 31406 + }, + { + "epoch": 0.8623558484349259, + "grad_norm": 0.5397331714630127, + "learning_rate": 1.2169713366346205e-05, + "loss": 0.4743, + "step": 31407 + }, + { + "epoch": 0.8623833058758924, + "grad_norm": 0.40424805879592896, + "learning_rate": 1.216929175948974e-05, + "loss": 0.4974, + "step": 31408 + }, + { + "epoch": 0.8624107633168588, + "grad_norm": 0.4279015064239502, + "learning_rate": 1.2168870148586853e-05, + "loss": 0.5002, + "step": 31409 + }, + { + "epoch": 0.8624382207578254, + "grad_norm": 0.38152948021888733, + "learning_rate": 1.216844853363833e-05, + "loss": 0.4829, + "step": 31410 + }, + { + "epoch": 0.8624656781987918, + "grad_norm": 0.3795414865016937, + "learning_rate": 1.216802691464496e-05, + "loss": 0.4102, + "step": 31411 + }, + { + "epoch": 0.8624931356397584, + "grad_norm": 0.357448548078537, + "learning_rate": 1.216760529160753e-05, + "loss": 0.4431, + "step": 31412 + }, + { + "epoch": 0.8625205930807249, + "grad_norm": 0.47998157143592834, + "learning_rate": 1.216718366452682e-05, + "loss": 0.508, + "step": 31413 + }, + { + "epoch": 0.8625480505216914, + "grad_norm": 0.37344714999198914, + "learning_rate": 1.2166762033403625e-05, + "loss": 0.4725, + "step": 31414 + }, + { + "epoch": 0.8625755079626579, + "grad_norm": 0.4621744751930237, + "learning_rate": 1.2166340398238724e-05, + "loss": 0.6038, + "step": 31415 + }, + { + "epoch": 0.8626029654036244, + "grad_norm": 0.40091249346733093, + "learning_rate": 1.2165918759032908e-05, + "loss": 0.5908, + "step": 31416 + }, + { + "epoch": 0.8626304228445909, + "grad_norm": 0.3997773230075836, + "learning_rate": 1.2165497115786962e-05, + "loss": 0.4863, + "step": 31417 + }, + { + "epoch": 0.8626578802855573, + "grad_norm": 0.42403993010520935, + "learning_rate": 1.2165075468501673e-05, + "loss": 0.5829, + "step": 31418 + }, + { + "epoch": 0.8626853377265239, + "grad_norm": 0.4686851501464844, + "learning_rate": 1.2164653817177828e-05, + "loss": 0.556, + "step": 31419 + }, + { + "epoch": 0.8627127951674904, + "grad_norm": 0.3703324794769287, + "learning_rate": 1.2164232161816209e-05, + "loss": 0.4879, + "step": 31420 + }, + { + "epoch": 0.8627402526084569, + "grad_norm": 0.3716850280761719, + "learning_rate": 1.2163810502417612e-05, + "loss": 0.49, + "step": 31421 + }, + { + "epoch": 0.8627677100494234, + "grad_norm": 0.35182061791419983, + "learning_rate": 1.2163388838982812e-05, + "loss": 0.4717, + "step": 31422 + }, + { + "epoch": 0.8627951674903899, + "grad_norm": 0.39828890562057495, + "learning_rate": 1.2162967171512605e-05, + "loss": 0.5181, + "step": 31423 + }, + { + "epoch": 0.8628226249313564, + "grad_norm": 0.3596118688583374, + "learning_rate": 1.2162545500007774e-05, + "loss": 0.4178, + "step": 31424 + }, + { + "epoch": 0.8628500823723229, + "grad_norm": 0.3950846791267395, + "learning_rate": 1.2162123824469104e-05, + "loss": 0.5353, + "step": 31425 + }, + { + "epoch": 0.8628775398132894, + "grad_norm": 0.3694448471069336, + "learning_rate": 1.2161702144897384e-05, + "loss": 0.4533, + "step": 31426 + }, + { + "epoch": 0.862904997254256, + "grad_norm": 0.3870159387588501, + "learning_rate": 1.21612804612934e-05, + "loss": 0.4242, + "step": 31427 + }, + { + "epoch": 0.8629324546952224, + "grad_norm": 0.41604816913604736, + "learning_rate": 1.2160858773657935e-05, + "loss": 0.553, + "step": 31428 + }, + { + "epoch": 0.862959912136189, + "grad_norm": 0.3952045738697052, + "learning_rate": 1.216043708199178e-05, + "loss": 0.4611, + "step": 31429 + }, + { + "epoch": 0.8629873695771554, + "grad_norm": 0.43566423654556274, + "learning_rate": 1.2160015386295723e-05, + "loss": 0.5012, + "step": 31430 + }, + { + "epoch": 0.8630148270181219, + "grad_norm": 0.4323706328868866, + "learning_rate": 1.2159593686570548e-05, + "loss": 0.5523, + "step": 31431 + }, + { + "epoch": 0.8630422844590884, + "grad_norm": 0.4292236864566803, + "learning_rate": 1.2159171982817038e-05, + "loss": 0.6514, + "step": 31432 + }, + { + "epoch": 0.8630697419000549, + "grad_norm": 0.4094109535217285, + "learning_rate": 1.2158750275035987e-05, + "loss": 0.4716, + "step": 31433 + }, + { + "epoch": 0.8630971993410215, + "grad_norm": 0.3770277202129364, + "learning_rate": 1.2158328563228172e-05, + "loss": 0.461, + "step": 31434 + }, + { + "epoch": 0.8631246567819879, + "grad_norm": 0.3735966980457306, + "learning_rate": 1.2157906847394389e-05, + "loss": 0.5388, + "step": 31435 + }, + { + "epoch": 0.8631521142229545, + "grad_norm": 0.5216818451881409, + "learning_rate": 1.2157485127535424e-05, + "loss": 0.5796, + "step": 31436 + }, + { + "epoch": 0.8631795716639209, + "grad_norm": 0.4204326868057251, + "learning_rate": 1.2157063403652056e-05, + "loss": 0.5361, + "step": 31437 + }, + { + "epoch": 0.8632070291048874, + "grad_norm": 0.42258837819099426, + "learning_rate": 1.2156641675745078e-05, + "loss": 0.547, + "step": 31438 + }, + { + "epoch": 0.8632344865458539, + "grad_norm": 0.5095781087875366, + "learning_rate": 1.2156219943815275e-05, + "loss": 0.4845, + "step": 31439 + }, + { + "epoch": 0.8632619439868204, + "grad_norm": 0.3613291084766388, + "learning_rate": 1.2155798207863435e-05, + "loss": 0.4895, + "step": 31440 + }, + { + "epoch": 0.863289401427787, + "grad_norm": 0.38864246010780334, + "learning_rate": 1.2155376467890341e-05, + "loss": 0.5144, + "step": 31441 + }, + { + "epoch": 0.8633168588687534, + "grad_norm": 0.4082963168621063, + "learning_rate": 1.2154954723896782e-05, + "loss": 0.5443, + "step": 31442 + }, + { + "epoch": 0.86334431630972, + "grad_norm": 0.3520066440105438, + "learning_rate": 1.2154532975883546e-05, + "loss": 0.4908, + "step": 31443 + }, + { + "epoch": 0.8633717737506864, + "grad_norm": 0.4259317219257355, + "learning_rate": 1.2154111223851417e-05, + "loss": 0.4568, + "step": 31444 + }, + { + "epoch": 0.863399231191653, + "grad_norm": 0.5068135857582092, + "learning_rate": 1.2153689467801184e-05, + "loss": 0.5592, + "step": 31445 + }, + { + "epoch": 0.8634266886326194, + "grad_norm": 0.37814968824386597, + "learning_rate": 1.2153267707733633e-05, + "loss": 0.3706, + "step": 31446 + }, + { + "epoch": 0.863454146073586, + "grad_norm": 0.41941264271736145, + "learning_rate": 1.215284594364955e-05, + "loss": 0.4669, + "step": 31447 + }, + { + "epoch": 0.8634816035145525, + "grad_norm": 0.44661736488342285, + "learning_rate": 1.2152424175549721e-05, + "loss": 0.4946, + "step": 31448 + }, + { + "epoch": 0.8635090609555189, + "grad_norm": 0.37812310457229614, + "learning_rate": 1.2152002403434936e-05, + "loss": 0.5358, + "step": 31449 + }, + { + "epoch": 0.8635365183964855, + "grad_norm": 0.4027478098869324, + "learning_rate": 1.215158062730598e-05, + "loss": 0.4536, + "step": 31450 + }, + { + "epoch": 0.8635639758374519, + "grad_norm": 0.42484593391418457, + "learning_rate": 1.2151158847163638e-05, + "loss": 0.4465, + "step": 31451 + }, + { + "epoch": 0.8635914332784185, + "grad_norm": 0.36470314860343933, + "learning_rate": 1.2150737063008697e-05, + "loss": 0.3849, + "step": 31452 + }, + { + "epoch": 0.8636188907193849, + "grad_norm": 0.3893754184246063, + "learning_rate": 1.2150315274841946e-05, + "loss": 0.5107, + "step": 31453 + }, + { + "epoch": 0.8636463481603515, + "grad_norm": 0.5707032680511475, + "learning_rate": 1.2149893482664172e-05, + "loss": 0.5775, + "step": 31454 + }, + { + "epoch": 0.863673805601318, + "grad_norm": 0.4218432605266571, + "learning_rate": 1.214947168647616e-05, + "loss": 0.4253, + "step": 31455 + }, + { + "epoch": 0.8637012630422845, + "grad_norm": 0.38254213333129883, + "learning_rate": 1.2149049886278697e-05, + "loss": 0.5186, + "step": 31456 + }, + { + "epoch": 0.863728720483251, + "grad_norm": 0.3997059166431427, + "learning_rate": 1.2148628082072569e-05, + "loss": 0.5987, + "step": 31457 + }, + { + "epoch": 0.8637561779242174, + "grad_norm": 0.4094833433628082, + "learning_rate": 1.2148206273858566e-05, + "loss": 0.4981, + "step": 31458 + }, + { + "epoch": 0.863783635365184, + "grad_norm": 0.5090482831001282, + "learning_rate": 1.2147784461637471e-05, + "loss": 0.5123, + "step": 31459 + }, + { + "epoch": 0.8638110928061504, + "grad_norm": 0.37398484349250793, + "learning_rate": 1.2147362645410072e-05, + "loss": 0.438, + "step": 31460 + }, + { + "epoch": 0.863838550247117, + "grad_norm": 0.3715219795703888, + "learning_rate": 1.2146940825177158e-05, + "loss": 0.4766, + "step": 31461 + }, + { + "epoch": 0.8638660076880835, + "grad_norm": 0.41074997186660767, + "learning_rate": 1.2146519000939512e-05, + "loss": 0.4347, + "step": 31462 + }, + { + "epoch": 0.86389346512905, + "grad_norm": 0.3914940655231476, + "learning_rate": 1.2146097172697926e-05, + "loss": 0.4674, + "step": 31463 + }, + { + "epoch": 0.8639209225700165, + "grad_norm": 0.7171555161476135, + "learning_rate": 1.2145675340453182e-05, + "loss": 0.4632, + "step": 31464 + }, + { + "epoch": 0.863948380010983, + "grad_norm": 0.3693390488624573, + "learning_rate": 1.2145253504206069e-05, + "loss": 0.4087, + "step": 31465 + }, + { + "epoch": 0.8639758374519495, + "grad_norm": 0.3633575737476349, + "learning_rate": 1.2144831663957373e-05, + "loss": 0.4664, + "step": 31466 + }, + { + "epoch": 0.8640032948929159, + "grad_norm": 0.42450740933418274, + "learning_rate": 1.2144409819707881e-05, + "loss": 0.5124, + "step": 31467 + }, + { + "epoch": 0.8640307523338825, + "grad_norm": 0.4466194808483124, + "learning_rate": 1.2143987971458383e-05, + "loss": 0.479, + "step": 31468 + }, + { + "epoch": 0.8640582097748489, + "grad_norm": 0.4256153404712677, + "learning_rate": 1.2143566119209662e-05, + "loss": 0.4286, + "step": 31469 + }, + { + "epoch": 0.8640856672158155, + "grad_norm": 0.33586785197257996, + "learning_rate": 1.2143144262962503e-05, + "loss": 0.4501, + "step": 31470 + }, + { + "epoch": 0.864113124656782, + "grad_norm": 0.38954028487205505, + "learning_rate": 1.21427224027177e-05, + "loss": 0.4896, + "step": 31471 + }, + { + "epoch": 0.8641405820977485, + "grad_norm": 0.3689498007297516, + "learning_rate": 1.2142300538476035e-05, + "loss": 0.4662, + "step": 31472 + }, + { + "epoch": 0.864168039538715, + "grad_norm": 0.4973452389240265, + "learning_rate": 1.2141878670238292e-05, + "loss": 0.4828, + "step": 31473 + }, + { + "epoch": 0.8641954969796815, + "grad_norm": 0.4002431035041809, + "learning_rate": 1.2141456798005266e-05, + "loss": 0.5521, + "step": 31474 + }, + { + "epoch": 0.864222954420648, + "grad_norm": 0.4470595419406891, + "learning_rate": 1.2141034921777737e-05, + "loss": 0.5132, + "step": 31475 + }, + { + "epoch": 0.8642504118616144, + "grad_norm": 0.37496358156204224, + "learning_rate": 1.2140613041556498e-05, + "loss": 0.525, + "step": 31476 + }, + { + "epoch": 0.864277869302581, + "grad_norm": 0.35777318477630615, + "learning_rate": 1.214019115734233e-05, + "loss": 0.5076, + "step": 31477 + }, + { + "epoch": 0.8643053267435475, + "grad_norm": 0.4308715760707855, + "learning_rate": 1.2139769269136022e-05, + "loss": 0.634, + "step": 31478 + }, + { + "epoch": 0.864332784184514, + "grad_norm": 0.37373873591423035, + "learning_rate": 1.2139347376938362e-05, + "loss": 0.4654, + "step": 31479 + }, + { + "epoch": 0.8643602416254805, + "grad_norm": 0.4608699381351471, + "learning_rate": 1.2138925480750134e-05, + "loss": 0.5339, + "step": 31480 + }, + { + "epoch": 0.864387699066447, + "grad_norm": 0.400287926197052, + "learning_rate": 1.2138503580572129e-05, + "loss": 0.5658, + "step": 31481 + }, + { + "epoch": 0.8644151565074135, + "grad_norm": 0.3666137456893921, + "learning_rate": 1.2138081676405136e-05, + "loss": 0.423, + "step": 31482 + }, + { + "epoch": 0.86444261394838, + "grad_norm": 0.4233371615409851, + "learning_rate": 1.2137659768249932e-05, + "loss": 0.5233, + "step": 31483 + }, + { + "epoch": 0.8644700713893465, + "grad_norm": 0.4288518726825714, + "learning_rate": 1.2137237856107315e-05, + "loss": 0.4913, + "step": 31484 + }, + { + "epoch": 0.8644975288303131, + "grad_norm": 0.3871913552284241, + "learning_rate": 1.2136815939978065e-05, + "loss": 0.5189, + "step": 31485 + }, + { + "epoch": 0.8645249862712795, + "grad_norm": 0.3665926158428192, + "learning_rate": 1.2136394019862972e-05, + "loss": 0.5191, + "step": 31486 + }, + { + "epoch": 0.864552443712246, + "grad_norm": 0.3975391089916229, + "learning_rate": 1.2135972095762823e-05, + "loss": 0.3726, + "step": 31487 + }, + { + "epoch": 0.8645799011532125, + "grad_norm": 0.3710477352142334, + "learning_rate": 1.2135550167678403e-05, + "loss": 0.4698, + "step": 31488 + }, + { + "epoch": 0.864607358594179, + "grad_norm": 0.38806837797164917, + "learning_rate": 1.21351282356105e-05, + "loss": 0.4098, + "step": 31489 + }, + { + "epoch": 0.8646348160351455, + "grad_norm": 0.3590100407600403, + "learning_rate": 1.2134706299559904e-05, + "loss": 0.4222, + "step": 31490 + }, + { + "epoch": 0.864662273476112, + "grad_norm": 0.4225962162017822, + "learning_rate": 1.2134284359527398e-05, + "loss": 0.5022, + "step": 31491 + }, + { + "epoch": 0.8646897309170786, + "grad_norm": 0.39680561423301697, + "learning_rate": 1.2133862415513771e-05, + "loss": 0.4953, + "step": 31492 + }, + { + "epoch": 0.864717188358045, + "grad_norm": 0.3813709318637848, + "learning_rate": 1.2133440467519806e-05, + "loss": 0.4481, + "step": 31493 + }, + { + "epoch": 0.8647446457990116, + "grad_norm": 0.39235228300094604, + "learning_rate": 1.2133018515546298e-05, + "loss": 0.5381, + "step": 31494 + }, + { + "epoch": 0.864772103239978, + "grad_norm": 0.41417133808135986, + "learning_rate": 1.2132596559594028e-05, + "loss": 0.4217, + "step": 31495 + }, + { + "epoch": 0.8647995606809445, + "grad_norm": 0.36216551065444946, + "learning_rate": 1.2132174599663785e-05, + "loss": 0.3984, + "step": 31496 + }, + { + "epoch": 0.864827018121911, + "grad_norm": 0.4111325442790985, + "learning_rate": 1.2131752635756355e-05, + "loss": 0.5119, + "step": 31497 + }, + { + "epoch": 0.8648544755628775, + "grad_norm": 0.7796735763549805, + "learning_rate": 1.2131330667872525e-05, + "loss": 0.6074, + "step": 31498 + }, + { + "epoch": 0.8648819330038441, + "grad_norm": 0.37634190917015076, + "learning_rate": 1.2130908696013086e-05, + "loss": 0.4669, + "step": 31499 + }, + { + "epoch": 0.8649093904448105, + "grad_norm": 0.38186725974082947, + "learning_rate": 1.2130486720178822e-05, + "loss": 0.4915, + "step": 31500 + }, + { + "epoch": 0.8649368478857771, + "grad_norm": 0.3454498052597046, + "learning_rate": 1.2130064740370517e-05, + "loss": 0.3978, + "step": 31501 + }, + { + "epoch": 0.8649643053267435, + "grad_norm": 0.42750146985054016, + "learning_rate": 1.2129642756588964e-05, + "loss": 0.5078, + "step": 31502 + }, + { + "epoch": 0.8649917627677101, + "grad_norm": 0.3775571286678314, + "learning_rate": 1.2129220768834949e-05, + "loss": 0.5707, + "step": 31503 + }, + { + "epoch": 0.8650192202086765, + "grad_norm": 0.416988343000412, + "learning_rate": 1.2128798777109254e-05, + "loss": 0.5396, + "step": 31504 + }, + { + "epoch": 0.865046677649643, + "grad_norm": 0.39961788058280945, + "learning_rate": 1.2128376781412671e-05, + "loss": 0.4827, + "step": 31505 + }, + { + "epoch": 0.8650741350906096, + "grad_norm": 0.378293514251709, + "learning_rate": 1.2127954781745988e-05, + "loss": 0.4811, + "step": 31506 + }, + { + "epoch": 0.865101592531576, + "grad_norm": 0.3830195963382721, + "learning_rate": 1.212753277810999e-05, + "loss": 0.5018, + "step": 31507 + }, + { + "epoch": 0.8651290499725426, + "grad_norm": 0.4913994073867798, + "learning_rate": 1.2127110770505464e-05, + "loss": 0.5553, + "step": 31508 + }, + { + "epoch": 0.865156507413509, + "grad_norm": 0.3871981203556061, + "learning_rate": 1.2126688758933194e-05, + "loss": 0.4635, + "step": 31509 + }, + { + "epoch": 0.8651839648544756, + "grad_norm": 0.45018842816352844, + "learning_rate": 1.2126266743393975e-05, + "loss": 0.5197, + "step": 31510 + }, + { + "epoch": 0.865211422295442, + "grad_norm": 0.3617717921733856, + "learning_rate": 1.2125844723888587e-05, + "loss": 0.5081, + "step": 31511 + }, + { + "epoch": 0.8652388797364086, + "grad_norm": 0.3449403941631317, + "learning_rate": 1.2125422700417822e-05, + "loss": 0.4334, + "step": 31512 + }, + { + "epoch": 0.8652663371773751, + "grad_norm": 0.39613163471221924, + "learning_rate": 1.2125000672982466e-05, + "loss": 0.5539, + "step": 31513 + }, + { + "epoch": 0.8652937946183415, + "grad_norm": 0.4431510269641876, + "learning_rate": 1.2124578641583302e-05, + "loss": 0.4888, + "step": 31514 + }, + { + "epoch": 0.8653212520593081, + "grad_norm": 0.4131149649620056, + "learning_rate": 1.2124156606221124e-05, + "loss": 0.5194, + "step": 31515 + }, + { + "epoch": 0.8653487095002745, + "grad_norm": 0.40295642614364624, + "learning_rate": 1.2123734566896717e-05, + "loss": 0.4524, + "step": 31516 + }, + { + "epoch": 0.8653761669412411, + "grad_norm": 0.41537362337112427, + "learning_rate": 1.2123312523610861e-05, + "loss": 0.4668, + "step": 31517 + }, + { + "epoch": 0.8654036243822075, + "grad_norm": 0.4315067231655121, + "learning_rate": 1.2122890476364358e-05, + "loss": 0.5617, + "step": 31518 + }, + { + "epoch": 0.8654310818231741, + "grad_norm": 0.3924228549003601, + "learning_rate": 1.212246842515798e-05, + "loss": 0.5282, + "step": 31519 + }, + { + "epoch": 0.8654585392641406, + "grad_norm": 0.3875119388103485, + "learning_rate": 1.2122046369992524e-05, + "loss": 0.4927, + "step": 31520 + }, + { + "epoch": 0.8654859967051071, + "grad_norm": 0.4284254014492035, + "learning_rate": 1.2121624310868774e-05, + "loss": 0.4301, + "step": 31521 + }, + { + "epoch": 0.8655134541460736, + "grad_norm": 0.3712712824344635, + "learning_rate": 1.2121202247787516e-05, + "loss": 0.4229, + "step": 31522 + }, + { + "epoch": 0.86554091158704, + "grad_norm": 0.3869176506996155, + "learning_rate": 1.2120780180749541e-05, + "loss": 0.4862, + "step": 31523 + }, + { + "epoch": 0.8655683690280066, + "grad_norm": 0.3753916621208191, + "learning_rate": 1.212035810975563e-05, + "loss": 0.5269, + "step": 31524 + }, + { + "epoch": 0.865595826468973, + "grad_norm": 0.39775538444519043, + "learning_rate": 1.2119936034806578e-05, + "loss": 0.4823, + "step": 31525 + }, + { + "epoch": 0.8656232839099396, + "grad_norm": 0.38884565234184265, + "learning_rate": 1.211951395590317e-05, + "loss": 0.5467, + "step": 31526 + }, + { + "epoch": 0.8656507413509061, + "grad_norm": 0.3829613924026489, + "learning_rate": 1.2119091873046189e-05, + "loss": 0.5603, + "step": 31527 + }, + { + "epoch": 0.8656781987918726, + "grad_norm": 0.38568374514579773, + "learning_rate": 1.2118669786236427e-05, + "loss": 0.4822, + "step": 31528 + }, + { + "epoch": 0.8657056562328391, + "grad_norm": 0.36596959829330444, + "learning_rate": 1.2118247695474667e-05, + "loss": 0.4371, + "step": 31529 + }, + { + "epoch": 0.8657331136738056, + "grad_norm": 0.40159446001052856, + "learning_rate": 1.2117825600761704e-05, + "loss": 0.5116, + "step": 31530 + }, + { + "epoch": 0.8657605711147721, + "grad_norm": 0.3983573019504547, + "learning_rate": 1.2117403502098317e-05, + "loss": 0.512, + "step": 31531 + }, + { + "epoch": 0.8657880285557386, + "grad_norm": 0.35656166076660156, + "learning_rate": 1.2116981399485294e-05, + "loss": 0.3709, + "step": 31532 + }, + { + "epoch": 0.8658154859967051, + "grad_norm": 0.41709575057029724, + "learning_rate": 1.211655929292343e-05, + "loss": 0.4763, + "step": 31533 + }, + { + "epoch": 0.8658429434376717, + "grad_norm": 0.35590896010398865, + "learning_rate": 1.2116137182413507e-05, + "loss": 0.3706, + "step": 31534 + }, + { + "epoch": 0.8658704008786381, + "grad_norm": 0.3634921908378601, + "learning_rate": 1.211571506795631e-05, + "loss": 0.498, + "step": 31535 + }, + { + "epoch": 0.8658978583196046, + "grad_norm": 0.3729120194911957, + "learning_rate": 1.211529294955263e-05, + "loss": 0.4172, + "step": 31536 + }, + { + "epoch": 0.8659253157605711, + "grad_norm": 0.40334153175354004, + "learning_rate": 1.2114870827203253e-05, + "loss": 0.4092, + "step": 31537 + }, + { + "epoch": 0.8659527732015376, + "grad_norm": 0.4208989441394806, + "learning_rate": 1.211444870090897e-05, + "loss": 0.4897, + "step": 31538 + }, + { + "epoch": 0.8659802306425041, + "grad_norm": 0.35903212428092957, + "learning_rate": 1.2114026570670563e-05, + "loss": 0.4842, + "step": 31539 + }, + { + "epoch": 0.8660076880834706, + "grad_norm": 0.33998242020606995, + "learning_rate": 1.211360443648882e-05, + "loss": 0.4815, + "step": 31540 + }, + { + "epoch": 0.8660351455244372, + "grad_norm": 0.38330650329589844, + "learning_rate": 1.2113182298364534e-05, + "loss": 0.5357, + "step": 31541 + }, + { + "epoch": 0.8660626029654036, + "grad_norm": 0.4054710268974304, + "learning_rate": 1.2112760156298484e-05, + "loss": 0.5663, + "step": 31542 + }, + { + "epoch": 0.8660900604063702, + "grad_norm": 0.39296719431877136, + "learning_rate": 1.2112338010291467e-05, + "loss": 0.4467, + "step": 31543 + }, + { + "epoch": 0.8661175178473366, + "grad_norm": 0.4181499183177948, + "learning_rate": 1.2111915860344263e-05, + "loss": 0.4614, + "step": 31544 + }, + { + "epoch": 0.8661449752883031, + "grad_norm": 0.3998241722583771, + "learning_rate": 1.211149370645766e-05, + "loss": 0.5443, + "step": 31545 + }, + { + "epoch": 0.8661724327292696, + "grad_norm": 0.33989381790161133, + "learning_rate": 1.211107154863245e-05, + "loss": 0.4426, + "step": 31546 + }, + { + "epoch": 0.8661998901702361, + "grad_norm": 0.4103005528450012, + "learning_rate": 1.2110649386869418e-05, + "loss": 0.5189, + "step": 31547 + }, + { + "epoch": 0.8662273476112027, + "grad_norm": 0.33803364634513855, + "learning_rate": 1.2110227221169349e-05, + "loss": 0.3899, + "step": 31548 + }, + { + "epoch": 0.8662548050521691, + "grad_norm": 0.4022403061389923, + "learning_rate": 1.2109805051533035e-05, + "loss": 0.4425, + "step": 31549 + }, + { + "epoch": 0.8662822624931357, + "grad_norm": 0.36575186252593994, + "learning_rate": 1.2109382877961258e-05, + "loss": 0.4922, + "step": 31550 + }, + { + "epoch": 0.8663097199341021, + "grad_norm": 0.3922678828239441, + "learning_rate": 1.2108960700454813e-05, + "loss": 0.5032, + "step": 31551 + }, + { + "epoch": 0.8663371773750687, + "grad_norm": 0.42850300669670105, + "learning_rate": 1.2108538519014482e-05, + "loss": 0.5394, + "step": 31552 + }, + { + "epoch": 0.8663646348160351, + "grad_norm": 0.3467021584510803, + "learning_rate": 1.2108116333641049e-05, + "loss": 0.4516, + "step": 31553 + }, + { + "epoch": 0.8663920922570016, + "grad_norm": 0.35923898220062256, + "learning_rate": 1.2107694144335312e-05, + "loss": 0.4788, + "step": 31554 + }, + { + "epoch": 0.8664195496979682, + "grad_norm": 0.3970898389816284, + "learning_rate": 1.2107271951098049e-05, + "loss": 0.5069, + "step": 31555 + }, + { + "epoch": 0.8664470071389346, + "grad_norm": 0.6505720019340515, + "learning_rate": 1.2106849753930055e-05, + "loss": 0.5785, + "step": 31556 + }, + { + "epoch": 0.8664744645799012, + "grad_norm": 0.40575045347213745, + "learning_rate": 1.2106427552832111e-05, + "loss": 0.5232, + "step": 31557 + }, + { + "epoch": 0.8665019220208676, + "grad_norm": 0.43006595969200134, + "learning_rate": 1.2106005347805007e-05, + "loss": 0.4939, + "step": 31558 + }, + { + "epoch": 0.8665293794618342, + "grad_norm": 0.36968427896499634, + "learning_rate": 1.2105583138849532e-05, + "loss": 0.4161, + "step": 31559 + }, + { + "epoch": 0.8665568369028006, + "grad_norm": 0.3751338720321655, + "learning_rate": 1.210516092596647e-05, + "loss": 0.5197, + "step": 31560 + }, + { + "epoch": 0.8665842943437672, + "grad_norm": 0.3933388590812683, + "learning_rate": 1.2104738709156616e-05, + "loss": 0.4871, + "step": 31561 + }, + { + "epoch": 0.8666117517847337, + "grad_norm": 0.37276193499565125, + "learning_rate": 1.2104316488420752e-05, + "loss": 0.434, + "step": 31562 + }, + { + "epoch": 0.8666392092257001, + "grad_norm": 0.37470871210098267, + "learning_rate": 1.2103894263759661e-05, + "loss": 0.4636, + "step": 31563 + }, + { + "epoch": 0.8666666666666667, + "grad_norm": 0.3823563754558563, + "learning_rate": 1.210347203517414e-05, + "loss": 0.4376, + "step": 31564 + }, + { + "epoch": 0.8666941241076331, + "grad_norm": 0.5199916958808899, + "learning_rate": 1.2103049802664972e-05, + "loss": 0.4092, + "step": 31565 + }, + { + "epoch": 0.8667215815485997, + "grad_norm": 0.40976542234420776, + "learning_rate": 1.210262756623294e-05, + "loss": 0.5279, + "step": 31566 + }, + { + "epoch": 0.8667490389895661, + "grad_norm": 0.38259872794151306, + "learning_rate": 1.2102205325878841e-05, + "loss": 0.4864, + "step": 31567 + }, + { + "epoch": 0.8667764964305327, + "grad_norm": 0.5102532505989075, + "learning_rate": 1.2101783081603457e-05, + "loss": 0.5466, + "step": 31568 + }, + { + "epoch": 0.8668039538714992, + "grad_norm": 0.4776012897491455, + "learning_rate": 1.2101360833407578e-05, + "loss": 0.5881, + "step": 31569 + }, + { + "epoch": 0.8668314113124657, + "grad_norm": 0.3758331537246704, + "learning_rate": 1.2100938581291991e-05, + "loss": 0.424, + "step": 31570 + }, + { + "epoch": 0.8668588687534322, + "grad_norm": 0.38166308403015137, + "learning_rate": 1.2100516325257481e-05, + "loss": 0.4312, + "step": 31571 + }, + { + "epoch": 0.8668863261943986, + "grad_norm": 0.33474355936050415, + "learning_rate": 1.2100094065304839e-05, + "loss": 0.5266, + "step": 31572 + }, + { + "epoch": 0.8669137836353652, + "grad_norm": 0.42058178782463074, + "learning_rate": 1.2099671801434849e-05, + "loss": 0.5597, + "step": 31573 + }, + { + "epoch": 0.8669412410763316, + "grad_norm": 0.3597318232059479, + "learning_rate": 1.2099249533648302e-05, + "loss": 0.5304, + "step": 31574 + }, + { + "epoch": 0.8669686985172982, + "grad_norm": 0.37091323733329773, + "learning_rate": 1.2098827261945987e-05, + "loss": 0.5641, + "step": 31575 + }, + { + "epoch": 0.8669961559582647, + "grad_norm": 0.4377965033054352, + "learning_rate": 1.2098404986328686e-05, + "loss": 0.4881, + "step": 31576 + }, + { + "epoch": 0.8670236133992312, + "grad_norm": 0.395074725151062, + "learning_rate": 1.2097982706797194e-05, + "loss": 0.5025, + "step": 31577 + }, + { + "epoch": 0.8670510708401977, + "grad_norm": 0.3801513612270355, + "learning_rate": 1.2097560423352293e-05, + "loss": 0.4958, + "step": 31578 + }, + { + "epoch": 0.8670785282811642, + "grad_norm": 0.4642842411994934, + "learning_rate": 1.2097138135994769e-05, + "loss": 0.4992, + "step": 31579 + }, + { + "epoch": 0.8671059857221307, + "grad_norm": 0.41331765055656433, + "learning_rate": 1.2096715844725416e-05, + "loss": 0.4299, + "step": 31580 + }, + { + "epoch": 0.8671334431630972, + "grad_norm": 0.5631094574928284, + "learning_rate": 1.2096293549545017e-05, + "loss": 0.5708, + "step": 31581 + }, + { + "epoch": 0.8671609006040637, + "grad_norm": 0.3748146891593933, + "learning_rate": 1.2095871250454362e-05, + "loss": 0.4804, + "step": 31582 + }, + { + "epoch": 0.8671883580450302, + "grad_norm": 0.431368887424469, + "learning_rate": 1.209544894745424e-05, + "loss": 0.4656, + "step": 31583 + }, + { + "epoch": 0.8672158154859967, + "grad_norm": 0.42296621203422546, + "learning_rate": 1.2095026640545435e-05, + "loss": 0.4988, + "step": 31584 + }, + { + "epoch": 0.8672432729269632, + "grad_norm": 0.42643114924430847, + "learning_rate": 1.2094604329728739e-05, + "loss": 0.5359, + "step": 31585 + }, + { + "epoch": 0.8672707303679297, + "grad_norm": 0.40635889768600464, + "learning_rate": 1.2094182015004934e-05, + "loss": 0.4588, + "step": 31586 + }, + { + "epoch": 0.8672981878088962, + "grad_norm": 0.351245254278183, + "learning_rate": 1.2093759696374814e-05, + "loss": 0.4838, + "step": 31587 + }, + { + "epoch": 0.8673256452498627, + "grad_norm": 0.4385960102081299, + "learning_rate": 1.2093337373839163e-05, + "loss": 0.4629, + "step": 31588 + }, + { + "epoch": 0.8673531026908292, + "grad_norm": 0.4271770417690277, + "learning_rate": 1.209291504739877e-05, + "loss": 0.456, + "step": 31589 + }, + { + "epoch": 0.8673805601317958, + "grad_norm": 0.40859419107437134, + "learning_rate": 1.2092492717054423e-05, + "loss": 0.5214, + "step": 31590 + }, + { + "epoch": 0.8674080175727622, + "grad_norm": 0.3935602605342865, + "learning_rate": 1.2092070382806907e-05, + "loss": 0.4853, + "step": 31591 + }, + { + "epoch": 0.8674354750137288, + "grad_norm": 0.43324166536331177, + "learning_rate": 1.2091648044657011e-05, + "loss": 0.4953, + "step": 31592 + }, + { + "epoch": 0.8674629324546952, + "grad_norm": 0.4370555877685547, + "learning_rate": 1.2091225702605528e-05, + "loss": 0.5115, + "step": 31593 + }, + { + "epoch": 0.8674903898956617, + "grad_norm": 0.4666878879070282, + "learning_rate": 1.2090803356653237e-05, + "loss": 0.4956, + "step": 31594 + }, + { + "epoch": 0.8675178473366282, + "grad_norm": 0.4186277687549591, + "learning_rate": 1.2090381006800936e-05, + "loss": 0.5788, + "step": 31595 + }, + { + "epoch": 0.8675453047775947, + "grad_norm": 0.3628820776939392, + "learning_rate": 1.2089958653049403e-05, + "loss": 0.446, + "step": 31596 + }, + { + "epoch": 0.8675727622185613, + "grad_norm": 0.3588583171367645, + "learning_rate": 1.2089536295399434e-05, + "loss": 0.4834, + "step": 31597 + }, + { + "epoch": 0.8676002196595277, + "grad_norm": 0.38723406195640564, + "learning_rate": 1.2089113933851808e-05, + "loss": 0.4784, + "step": 31598 + }, + { + "epoch": 0.8676276771004943, + "grad_norm": 0.4281860589981079, + "learning_rate": 1.2088691568407321e-05, + "loss": 0.501, + "step": 31599 + }, + { + "epoch": 0.8676551345414607, + "grad_norm": 0.35443490743637085, + "learning_rate": 1.2088269199066759e-05, + "loss": 0.4412, + "step": 31600 + }, + { + "epoch": 0.8676825919824273, + "grad_norm": 0.3788078725337982, + "learning_rate": 1.2087846825830902e-05, + "loss": 0.5097, + "step": 31601 + }, + { + "epoch": 0.8677100494233937, + "grad_norm": 0.35570192337036133, + "learning_rate": 1.2087424448700552e-05, + "loss": 0.3861, + "step": 31602 + }, + { + "epoch": 0.8677375068643602, + "grad_norm": 0.386441171169281, + "learning_rate": 1.2087002067676482e-05, + "loss": 0.4651, + "step": 31603 + }, + { + "epoch": 0.8677649643053268, + "grad_norm": 0.38152217864990234, + "learning_rate": 1.2086579682759492e-05, + "loss": 0.45, + "step": 31604 + }, + { + "epoch": 0.8677924217462932, + "grad_norm": 0.3883955478668213, + "learning_rate": 1.2086157293950365e-05, + "loss": 0.4526, + "step": 31605 + }, + { + "epoch": 0.8678198791872598, + "grad_norm": 0.39036470651626587, + "learning_rate": 1.2085734901249886e-05, + "loss": 0.438, + "step": 31606 + }, + { + "epoch": 0.8678473366282262, + "grad_norm": 0.3728494346141815, + "learning_rate": 1.2085312504658847e-05, + "loss": 0.4843, + "step": 31607 + }, + { + "epoch": 0.8678747940691928, + "grad_norm": 0.5006009936332703, + "learning_rate": 1.2084890104178035e-05, + "loss": 0.5828, + "step": 31608 + }, + { + "epoch": 0.8679022515101592, + "grad_norm": 0.6260147094726562, + "learning_rate": 1.2084467699808237e-05, + "loss": 0.5412, + "step": 31609 + }, + { + "epoch": 0.8679297089511258, + "grad_norm": 0.5722406506538391, + "learning_rate": 1.2084045291550241e-05, + "loss": 0.4462, + "step": 31610 + }, + { + "epoch": 0.8679571663920923, + "grad_norm": 0.45947232842445374, + "learning_rate": 1.2083622879404836e-05, + "loss": 0.4391, + "step": 31611 + }, + { + "epoch": 0.8679846238330587, + "grad_norm": 0.37008780241012573, + "learning_rate": 1.208320046337281e-05, + "loss": 0.5021, + "step": 31612 + }, + { + "epoch": 0.8680120812740253, + "grad_norm": 0.4158608317375183, + "learning_rate": 1.208277804345495e-05, + "loss": 0.4626, + "step": 31613 + }, + { + "epoch": 0.8680395387149917, + "grad_norm": 0.42814093828201294, + "learning_rate": 1.2082355619652045e-05, + "loss": 0.4537, + "step": 31614 + }, + { + "epoch": 0.8680669961559583, + "grad_norm": 0.3351251780986786, + "learning_rate": 1.2081933191964878e-05, + "loss": 0.4105, + "step": 31615 + }, + { + "epoch": 0.8680944535969247, + "grad_norm": 0.3913279175758362, + "learning_rate": 1.2081510760394247e-05, + "loss": 0.5129, + "step": 31616 + }, + { + "epoch": 0.8681219110378913, + "grad_norm": 0.4175621569156647, + "learning_rate": 1.208108832494093e-05, + "loss": 0.4563, + "step": 31617 + }, + { + "epoch": 0.8681493684788578, + "grad_norm": 0.37003374099731445, + "learning_rate": 1.208066588560572e-05, + "loss": 0.4768, + "step": 31618 + }, + { + "epoch": 0.8681768259198243, + "grad_norm": 0.43162956833839417, + "learning_rate": 1.2080243442389405e-05, + "loss": 0.4827, + "step": 31619 + }, + { + "epoch": 0.8682042833607908, + "grad_norm": 0.4078165292739868, + "learning_rate": 1.207982099529277e-05, + "loss": 0.4889, + "step": 31620 + }, + { + "epoch": 0.8682317408017572, + "grad_norm": 0.3562571704387665, + "learning_rate": 1.2079398544316609e-05, + "loss": 0.4858, + "step": 31621 + }, + { + "epoch": 0.8682591982427238, + "grad_norm": 0.43014103174209595, + "learning_rate": 1.2078976089461702e-05, + "loss": 0.5299, + "step": 31622 + }, + { + "epoch": 0.8682866556836902, + "grad_norm": 0.43100211024284363, + "learning_rate": 1.2078553630728843e-05, + "loss": 0.5587, + "step": 31623 + }, + { + "epoch": 0.8683141131246568, + "grad_norm": 0.4150688648223877, + "learning_rate": 1.2078131168118818e-05, + "loss": 0.556, + "step": 31624 + }, + { + "epoch": 0.8683415705656233, + "grad_norm": 0.4101172089576721, + "learning_rate": 1.2077708701632413e-05, + "loss": 0.5061, + "step": 31625 + }, + { + "epoch": 0.8683690280065898, + "grad_norm": 0.40778496861457825, + "learning_rate": 1.2077286231270422e-05, + "loss": 0.4271, + "step": 31626 + }, + { + "epoch": 0.8683964854475563, + "grad_norm": 0.4044745862483978, + "learning_rate": 1.2076863757033628e-05, + "loss": 0.5393, + "step": 31627 + }, + { + "epoch": 0.8684239428885228, + "grad_norm": 0.42857328057289124, + "learning_rate": 1.2076441278922819e-05, + "loss": 0.5259, + "step": 31628 + }, + { + "epoch": 0.8684514003294893, + "grad_norm": 0.40724214911460876, + "learning_rate": 1.2076018796938784e-05, + "loss": 0.5912, + "step": 31629 + }, + { + "epoch": 0.8684788577704557, + "grad_norm": 0.3855232000350952, + "learning_rate": 1.2075596311082312e-05, + "loss": 0.4715, + "step": 31630 + }, + { + "epoch": 0.8685063152114223, + "grad_norm": 0.5054461359977722, + "learning_rate": 1.2075173821354192e-05, + "loss": 0.5461, + "step": 31631 + }, + { + "epoch": 0.8685337726523888, + "grad_norm": 0.4077126681804657, + "learning_rate": 1.207475132775521e-05, + "loss": 0.5305, + "step": 31632 + }, + { + "epoch": 0.8685612300933553, + "grad_norm": 0.4125198423862457, + "learning_rate": 1.2074328830286151e-05, + "loss": 0.4885, + "step": 31633 + }, + { + "epoch": 0.8685886875343218, + "grad_norm": 0.4084867238998413, + "learning_rate": 1.2073906328947811e-05, + "loss": 0.4732, + "step": 31634 + }, + { + "epoch": 0.8686161449752883, + "grad_norm": 0.39985111355781555, + "learning_rate": 1.207348382374097e-05, + "loss": 0.4424, + "step": 31635 + }, + { + "epoch": 0.8686436024162548, + "grad_norm": 0.4138895571231842, + "learning_rate": 1.2073061314666424e-05, + "loss": 0.5922, + "step": 31636 + }, + { + "epoch": 0.8686710598572213, + "grad_norm": 0.33902326226234436, + "learning_rate": 1.2072638801724954e-05, + "loss": 0.3394, + "step": 31637 + }, + { + "epoch": 0.8686985172981878, + "grad_norm": 0.3559619188308716, + "learning_rate": 1.2072216284917352e-05, + "loss": 0.4498, + "step": 31638 + }, + { + "epoch": 0.8687259747391544, + "grad_norm": 0.36807939410209656, + "learning_rate": 1.2071793764244405e-05, + "loss": 0.4829, + "step": 31639 + }, + { + "epoch": 0.8687534321801208, + "grad_norm": 0.3842988908290863, + "learning_rate": 1.2071371239706902e-05, + "loss": 0.4294, + "step": 31640 + }, + { + "epoch": 0.8687808896210873, + "grad_norm": 0.4467127025127411, + "learning_rate": 1.207094871130563e-05, + "loss": 0.4416, + "step": 31641 + }, + { + "epoch": 0.8688083470620538, + "grad_norm": 0.3984508216381073, + "learning_rate": 1.2070526179041378e-05, + "loss": 0.557, + "step": 31642 + }, + { + "epoch": 0.8688358045030203, + "grad_norm": 0.36687788367271423, + "learning_rate": 1.2070103642914932e-05, + "loss": 0.457, + "step": 31643 + }, + { + "epoch": 0.8688632619439868, + "grad_norm": 0.35939621925354004, + "learning_rate": 1.2069681102927084e-05, + "loss": 0.4402, + "step": 31644 + }, + { + "epoch": 0.8688907193849533, + "grad_norm": 0.3983241021633148, + "learning_rate": 1.2069258559078623e-05, + "loss": 0.4559, + "step": 31645 + }, + { + "epoch": 0.8689181768259199, + "grad_norm": 0.45618686079978943, + "learning_rate": 1.2068836011370329e-05, + "loss": 0.5201, + "step": 31646 + }, + { + "epoch": 0.8689456342668863, + "grad_norm": 0.4162209630012512, + "learning_rate": 1.2068413459803e-05, + "loss": 0.5551, + "step": 31647 + }, + { + "epoch": 0.8689730917078529, + "grad_norm": 0.41182029247283936, + "learning_rate": 1.2067990904377416e-05, + "loss": 0.5245, + "step": 31648 + }, + { + "epoch": 0.8690005491488193, + "grad_norm": 0.4035487473011017, + "learning_rate": 1.2067568345094372e-05, + "loss": 0.4688, + "step": 31649 + }, + { + "epoch": 0.8690280065897859, + "grad_norm": 0.4340803027153015, + "learning_rate": 1.206714578195465e-05, + "loss": 0.4614, + "step": 31650 + }, + { + "epoch": 0.8690554640307523, + "grad_norm": 0.4075888395309448, + "learning_rate": 1.2066723214959043e-05, + "loss": 0.4762, + "step": 31651 + }, + { + "epoch": 0.8690829214717188, + "grad_norm": 0.38181158900260925, + "learning_rate": 1.2066300644108337e-05, + "loss": 0.4386, + "step": 31652 + }, + { + "epoch": 0.8691103789126854, + "grad_norm": 0.5001672506332397, + "learning_rate": 1.206587806940332e-05, + "loss": 0.5021, + "step": 31653 + }, + { + "epoch": 0.8691378363536518, + "grad_norm": 0.38483431935310364, + "learning_rate": 1.2065455490844783e-05, + "loss": 0.5922, + "step": 31654 + }, + { + "epoch": 0.8691652937946184, + "grad_norm": 0.3772445619106293, + "learning_rate": 1.2065032908433513e-05, + "loss": 0.4808, + "step": 31655 + }, + { + "epoch": 0.8691927512355848, + "grad_norm": 0.44232377409935, + "learning_rate": 1.2064610322170296e-05, + "loss": 0.567, + "step": 31656 + }, + { + "epoch": 0.8692202086765514, + "grad_norm": 0.353787362575531, + "learning_rate": 1.2064187732055922e-05, + "loss": 0.5463, + "step": 31657 + }, + { + "epoch": 0.8692476661175178, + "grad_norm": 0.3550264239311218, + "learning_rate": 1.206376513809118e-05, + "loss": 0.4545, + "step": 31658 + }, + { + "epoch": 0.8692751235584844, + "grad_norm": 0.44025999307632446, + "learning_rate": 1.2063342540276857e-05, + "loss": 0.5337, + "step": 31659 + }, + { + "epoch": 0.8693025809994509, + "grad_norm": 0.33932697772979736, + "learning_rate": 1.206291993861374e-05, + "loss": 0.4143, + "step": 31660 + }, + { + "epoch": 0.8693300384404173, + "grad_norm": 0.39654868841171265, + "learning_rate": 1.206249733310262e-05, + "loss": 0.4421, + "step": 31661 + }, + { + "epoch": 0.8693574958813839, + "grad_norm": 0.42295345664024353, + "learning_rate": 1.2062074723744286e-05, + "loss": 0.4093, + "step": 31662 + }, + { + "epoch": 0.8693849533223503, + "grad_norm": 0.4290679097175598, + "learning_rate": 1.2061652110539521e-05, + "loss": 0.5029, + "step": 31663 + }, + { + "epoch": 0.8694124107633169, + "grad_norm": 0.4308621287345886, + "learning_rate": 1.2061229493489117e-05, + "loss": 0.5603, + "step": 31664 + }, + { + "epoch": 0.8694398682042833, + "grad_norm": 0.39272138476371765, + "learning_rate": 1.2060806872593867e-05, + "loss": 0.4509, + "step": 31665 + }, + { + "epoch": 0.8694673256452499, + "grad_norm": 0.37769007682800293, + "learning_rate": 1.2060384247854548e-05, + "loss": 0.4607, + "step": 31666 + }, + { + "epoch": 0.8694947830862164, + "grad_norm": 0.3870627284049988, + "learning_rate": 1.205996161927196e-05, + "loss": 0.5334, + "step": 31667 + }, + { + "epoch": 0.8695222405271829, + "grad_norm": 0.41003304719924927, + "learning_rate": 1.2059538986846883e-05, + "loss": 0.5362, + "step": 31668 + }, + { + "epoch": 0.8695496979681494, + "grad_norm": 0.4000462591648102, + "learning_rate": 1.205911635058011e-05, + "loss": 0.5203, + "step": 31669 + }, + { + "epoch": 0.8695771554091158, + "grad_norm": 0.3935049772262573, + "learning_rate": 1.2058693710472426e-05, + "loss": 0.5042, + "step": 31670 + }, + { + "epoch": 0.8696046128500824, + "grad_norm": 0.407703161239624, + "learning_rate": 1.2058271066524623e-05, + "loss": 0.5413, + "step": 31671 + }, + { + "epoch": 0.8696320702910488, + "grad_norm": 0.3974623680114746, + "learning_rate": 1.2057848418737488e-05, + "loss": 0.6216, + "step": 31672 + }, + { + "epoch": 0.8696595277320154, + "grad_norm": 0.3803424835205078, + "learning_rate": 1.2057425767111808e-05, + "loss": 0.5081, + "step": 31673 + }, + { + "epoch": 0.8696869851729819, + "grad_norm": 0.3655150532722473, + "learning_rate": 1.2057003111648371e-05, + "loss": 0.4824, + "step": 31674 + }, + { + "epoch": 0.8697144426139484, + "grad_norm": 0.38135477900505066, + "learning_rate": 1.205658045234797e-05, + "loss": 0.4725, + "step": 31675 + }, + { + "epoch": 0.8697419000549149, + "grad_norm": 0.37456902861595154, + "learning_rate": 1.2056157789211387e-05, + "loss": 0.5208, + "step": 31676 + }, + { + "epoch": 0.8697693574958814, + "grad_norm": 0.4727851450443268, + "learning_rate": 1.2055735122239414e-05, + "loss": 0.4935, + "step": 31677 + }, + { + "epoch": 0.8697968149368479, + "grad_norm": 0.40783873200416565, + "learning_rate": 1.2055312451432839e-05, + "loss": 0.4766, + "step": 31678 + }, + { + "epoch": 0.8698242723778143, + "grad_norm": 0.4057944416999817, + "learning_rate": 1.205488977679245e-05, + "loss": 0.5568, + "step": 31679 + }, + { + "epoch": 0.8698517298187809, + "grad_norm": 0.46521854400634766, + "learning_rate": 1.2054467098319039e-05, + "loss": 0.4948, + "step": 31680 + }, + { + "epoch": 0.8698791872597474, + "grad_norm": 0.40025267004966736, + "learning_rate": 1.2054044416013389e-05, + "loss": 0.5377, + "step": 31681 + }, + { + "epoch": 0.8699066447007139, + "grad_norm": 0.37527114152908325, + "learning_rate": 1.2053621729876288e-05, + "loss": 0.5174, + "step": 31682 + }, + { + "epoch": 0.8699341021416804, + "grad_norm": 0.3960643410682678, + "learning_rate": 1.2053199039908529e-05, + "loss": 0.4681, + "step": 31683 + }, + { + "epoch": 0.8699615595826469, + "grad_norm": 0.43398910760879517, + "learning_rate": 1.2052776346110897e-05, + "loss": 0.5391, + "step": 31684 + }, + { + "epoch": 0.8699890170236134, + "grad_norm": 0.38053351640701294, + "learning_rate": 1.2052353648484186e-05, + "loss": 0.4534, + "step": 31685 + }, + { + "epoch": 0.8700164744645799, + "grad_norm": 0.3521953821182251, + "learning_rate": 1.2051930947029177e-05, + "loss": 0.5431, + "step": 31686 + }, + { + "epoch": 0.8700439319055464, + "grad_norm": 0.400277316570282, + "learning_rate": 1.205150824174666e-05, + "loss": 0.4657, + "step": 31687 + }, + { + "epoch": 0.870071389346513, + "grad_norm": 0.38259997963905334, + "learning_rate": 1.2051085532637432e-05, + "loss": 0.4427, + "step": 31688 + }, + { + "epoch": 0.8700988467874794, + "grad_norm": 0.4148651957511902, + "learning_rate": 1.2050662819702273e-05, + "loss": 0.6388, + "step": 31689 + }, + { + "epoch": 0.870126304228446, + "grad_norm": 0.3767339289188385, + "learning_rate": 1.2050240102941969e-05, + "loss": 0.5227, + "step": 31690 + }, + { + "epoch": 0.8701537616694124, + "grad_norm": 0.3606727719306946, + "learning_rate": 1.2049817382357316e-05, + "loss": 0.4668, + "step": 31691 + }, + { + "epoch": 0.8701812191103789, + "grad_norm": 0.41241946816444397, + "learning_rate": 1.2049394657949097e-05, + "loss": 0.498, + "step": 31692 + }, + { + "epoch": 0.8702086765513454, + "grad_norm": 0.4232375919818878, + "learning_rate": 1.2048971929718105e-05, + "loss": 0.4542, + "step": 31693 + }, + { + "epoch": 0.8702361339923119, + "grad_norm": 0.37773871421813965, + "learning_rate": 1.2048549197665127e-05, + "loss": 0.4297, + "step": 31694 + }, + { + "epoch": 0.8702635914332785, + "grad_norm": 0.36337199807167053, + "learning_rate": 1.2048126461790948e-05, + "loss": 0.4949, + "step": 31695 + }, + { + "epoch": 0.8702910488742449, + "grad_norm": 0.39970192313194275, + "learning_rate": 1.2047703722096361e-05, + "loss": 0.4353, + "step": 31696 + }, + { + "epoch": 0.8703185063152115, + "grad_norm": 0.4104253351688385, + "learning_rate": 1.2047280978582152e-05, + "loss": 0.5033, + "step": 31697 + }, + { + "epoch": 0.8703459637561779, + "grad_norm": 0.3614622950553894, + "learning_rate": 1.2046858231249114e-05, + "loss": 0.4581, + "step": 31698 + }, + { + "epoch": 0.8703734211971444, + "grad_norm": 0.36246806383132935, + "learning_rate": 1.2046435480098032e-05, + "loss": 0.469, + "step": 31699 + }, + { + "epoch": 0.8704008786381109, + "grad_norm": 0.4749883711338043, + "learning_rate": 1.2046012725129689e-05, + "loss": 0.5537, + "step": 31700 + }, + { + "epoch": 0.8704283360790774, + "grad_norm": 0.44101083278656006, + "learning_rate": 1.2045589966344884e-05, + "loss": 0.553, + "step": 31701 + }, + { + "epoch": 0.870455793520044, + "grad_norm": 0.41452717781066895, + "learning_rate": 1.20451672037444e-05, + "loss": 0.5425, + "step": 31702 + }, + { + "epoch": 0.8704832509610104, + "grad_norm": 0.43778014183044434, + "learning_rate": 1.2044744437329025e-05, + "loss": 0.5215, + "step": 31703 + }, + { + "epoch": 0.870510708401977, + "grad_norm": 0.4183540940284729, + "learning_rate": 1.2044321667099553e-05, + "loss": 0.5694, + "step": 31704 + }, + { + "epoch": 0.8705381658429434, + "grad_norm": 0.43823695182800293, + "learning_rate": 1.2043898893056763e-05, + "loss": 0.545, + "step": 31705 + }, + { + "epoch": 0.87056562328391, + "grad_norm": 0.4044095575809479, + "learning_rate": 1.2043476115201453e-05, + "loss": 0.4808, + "step": 31706 + }, + { + "epoch": 0.8705930807248764, + "grad_norm": 0.3744589686393738, + "learning_rate": 1.2043053333534407e-05, + "loss": 0.4152, + "step": 31707 + }, + { + "epoch": 0.870620538165843, + "grad_norm": 0.35266631841659546, + "learning_rate": 1.2042630548056413e-05, + "loss": 0.4343, + "step": 31708 + }, + { + "epoch": 0.8706479956068095, + "grad_norm": 0.43429768085479736, + "learning_rate": 1.2042207758768262e-05, + "loss": 0.4344, + "step": 31709 + }, + { + "epoch": 0.8706754530477759, + "grad_norm": 0.4127954840660095, + "learning_rate": 1.2041784965670742e-05, + "loss": 0.469, + "step": 31710 + }, + { + "epoch": 0.8707029104887425, + "grad_norm": 0.44362592697143555, + "learning_rate": 1.2041362168764642e-05, + "loss": 0.5701, + "step": 31711 + }, + { + "epoch": 0.8707303679297089, + "grad_norm": 0.3784850537776947, + "learning_rate": 1.2040939368050752e-05, + "loss": 0.4446, + "step": 31712 + }, + { + "epoch": 0.8707578253706755, + "grad_norm": 0.399278461933136, + "learning_rate": 1.2040516563529854e-05, + "loss": 0.4874, + "step": 31713 + }, + { + "epoch": 0.8707852828116419, + "grad_norm": 0.398638516664505, + "learning_rate": 1.2040093755202744e-05, + "loss": 0.5665, + "step": 31714 + }, + { + "epoch": 0.8708127402526085, + "grad_norm": 0.3658207058906555, + "learning_rate": 1.2039670943070207e-05, + "loss": 0.4541, + "step": 31715 + }, + { + "epoch": 0.870840197693575, + "grad_norm": 0.3910529613494873, + "learning_rate": 1.2039248127133035e-05, + "loss": 0.4453, + "step": 31716 + }, + { + "epoch": 0.8708676551345415, + "grad_norm": 0.40921375155448914, + "learning_rate": 1.2038825307392013e-05, + "loss": 0.5132, + "step": 31717 + }, + { + "epoch": 0.870895112575508, + "grad_norm": 0.35969141125679016, + "learning_rate": 1.2038402483847929e-05, + "loss": 0.4535, + "step": 31718 + }, + { + "epoch": 0.8709225700164744, + "grad_norm": 0.3709227740764618, + "learning_rate": 1.2037979656501578e-05, + "loss": 0.467, + "step": 31719 + }, + { + "epoch": 0.870950027457441, + "grad_norm": 0.3793713450431824, + "learning_rate": 1.2037556825353741e-05, + "loss": 0.5671, + "step": 31720 + }, + { + "epoch": 0.8709774848984074, + "grad_norm": 0.37220171093940735, + "learning_rate": 1.203713399040521e-05, + "loss": 0.4686, + "step": 31721 + }, + { + "epoch": 0.871004942339374, + "grad_norm": 0.4014543890953064, + "learning_rate": 1.2036711151656776e-05, + "loss": 0.5563, + "step": 31722 + }, + { + "epoch": 0.8710323997803405, + "grad_norm": 0.43236851692199707, + "learning_rate": 1.2036288309109223e-05, + "loss": 0.4504, + "step": 31723 + }, + { + "epoch": 0.871059857221307, + "grad_norm": 0.38676249980926514, + "learning_rate": 1.2035865462763346e-05, + "loss": 0.4525, + "step": 31724 + }, + { + "epoch": 0.8710873146622735, + "grad_norm": 0.4216510057449341, + "learning_rate": 1.203544261261993e-05, + "loss": 0.4786, + "step": 31725 + }, + { + "epoch": 0.87111477210324, + "grad_norm": 0.3941477835178375, + "learning_rate": 1.203501975867976e-05, + "loss": 0.4438, + "step": 31726 + }, + { + "epoch": 0.8711422295442065, + "grad_norm": 0.45727089047431946, + "learning_rate": 1.2034596900943632e-05, + "loss": 0.525, + "step": 31727 + }, + { + "epoch": 0.8711696869851729, + "grad_norm": 0.40201425552368164, + "learning_rate": 1.2034174039412329e-05, + "loss": 0.4528, + "step": 31728 + }, + { + "epoch": 0.8711971444261395, + "grad_norm": 0.4060031473636627, + "learning_rate": 1.2033751174086646e-05, + "loss": 0.5113, + "step": 31729 + }, + { + "epoch": 0.871224601867106, + "grad_norm": 0.43756580352783203, + "learning_rate": 1.2033328304967364e-05, + "loss": 0.4966, + "step": 31730 + }, + { + "epoch": 0.8712520593080725, + "grad_norm": 0.36767688393592834, + "learning_rate": 1.2032905432055277e-05, + "loss": 0.4636, + "step": 31731 + }, + { + "epoch": 0.871279516749039, + "grad_norm": 0.39948758482933044, + "learning_rate": 1.2032482555351173e-05, + "loss": 0.4617, + "step": 31732 + }, + { + "epoch": 0.8713069741900055, + "grad_norm": 0.3776066303253174, + "learning_rate": 1.2032059674855837e-05, + "loss": 0.4366, + "step": 31733 + }, + { + "epoch": 0.871334431630972, + "grad_norm": 0.349153995513916, + "learning_rate": 1.2031636790570067e-05, + "loss": 0.4652, + "step": 31734 + }, + { + "epoch": 0.8713618890719385, + "grad_norm": 0.39699193835258484, + "learning_rate": 1.2031213902494643e-05, + "loss": 0.5631, + "step": 31735 + }, + { + "epoch": 0.871389346512905, + "grad_norm": 0.4125523269176483, + "learning_rate": 1.2030791010630355e-05, + "loss": 0.4715, + "step": 31736 + }, + { + "epoch": 0.8714168039538714, + "grad_norm": 0.4020121693611145, + "learning_rate": 1.2030368114977997e-05, + "loss": 0.6101, + "step": 31737 + }, + { + "epoch": 0.871444261394838, + "grad_norm": 0.34408894181251526, + "learning_rate": 1.2029945215538355e-05, + "loss": 0.4675, + "step": 31738 + }, + { + "epoch": 0.8714717188358045, + "grad_norm": 0.36047104001045227, + "learning_rate": 1.2029522312312214e-05, + "loss": 0.4509, + "step": 31739 + }, + { + "epoch": 0.871499176276771, + "grad_norm": 0.34066787362098694, + "learning_rate": 1.2029099405300368e-05, + "loss": 0.4866, + "step": 31740 + }, + { + "epoch": 0.8715266337177375, + "grad_norm": 0.3502885103225708, + "learning_rate": 1.2028676494503601e-05, + "loss": 0.5312, + "step": 31741 + }, + { + "epoch": 0.871554091158704, + "grad_norm": 0.3771030008792877, + "learning_rate": 1.2028253579922708e-05, + "loss": 0.4811, + "step": 31742 + }, + { + "epoch": 0.8715815485996705, + "grad_norm": 0.4103029668331146, + "learning_rate": 1.2027830661558477e-05, + "loss": 0.5636, + "step": 31743 + }, + { + "epoch": 0.871609006040637, + "grad_norm": 0.39628323912620544, + "learning_rate": 1.2027407739411689e-05, + "loss": 0.4536, + "step": 31744 + }, + { + "epoch": 0.8716364634816035, + "grad_norm": 0.39756056666374207, + "learning_rate": 1.2026984813483143e-05, + "loss": 0.4943, + "step": 31745 + }, + { + "epoch": 0.8716639209225701, + "grad_norm": 0.3826403021812439, + "learning_rate": 1.2026561883773619e-05, + "loss": 0.5134, + "step": 31746 + }, + { + "epoch": 0.8716913783635365, + "grad_norm": 0.3825899064540863, + "learning_rate": 1.2026138950283914e-05, + "loss": 0.5769, + "step": 31747 + }, + { + "epoch": 0.871718835804503, + "grad_norm": 0.36443251371383667, + "learning_rate": 1.2025716013014815e-05, + "loss": 0.4457, + "step": 31748 + }, + { + "epoch": 0.8717462932454695, + "grad_norm": 0.7360069155693054, + "learning_rate": 1.2025293071967103e-05, + "loss": 0.4813, + "step": 31749 + }, + { + "epoch": 0.871773750686436, + "grad_norm": 0.44219285249710083, + "learning_rate": 1.2024870127141577e-05, + "loss": 0.4925, + "step": 31750 + }, + { + "epoch": 0.8718012081274025, + "grad_norm": 0.47277334332466125, + "learning_rate": 1.2024447178539022e-05, + "loss": 0.5051, + "step": 31751 + }, + { + "epoch": 0.871828665568369, + "grad_norm": 0.34962978959083557, + "learning_rate": 1.2024024226160224e-05, + "loss": 0.5175, + "step": 31752 + }, + { + "epoch": 0.8718561230093356, + "grad_norm": 0.4262475371360779, + "learning_rate": 1.2023601270005978e-05, + "loss": 0.5245, + "step": 31753 + }, + { + "epoch": 0.871883580450302, + "grad_norm": 0.33628734946250916, + "learning_rate": 1.2023178310077068e-05, + "loss": 0.4373, + "step": 31754 + }, + { + "epoch": 0.8719110378912686, + "grad_norm": 0.3383757174015045, + "learning_rate": 1.2022755346374286e-05, + "loss": 0.3956, + "step": 31755 + }, + { + "epoch": 0.871938495332235, + "grad_norm": 0.38651955127716064, + "learning_rate": 1.202233237889842e-05, + "loss": 0.4943, + "step": 31756 + }, + { + "epoch": 0.8719659527732015, + "grad_norm": 0.44454750418663025, + "learning_rate": 1.2021909407650256e-05, + "loss": 0.5704, + "step": 31757 + }, + { + "epoch": 0.871993410214168, + "grad_norm": 0.36933431029319763, + "learning_rate": 1.2021486432630589e-05, + "loss": 0.4243, + "step": 31758 + }, + { + "epoch": 0.8720208676551345, + "grad_norm": 0.41504836082458496, + "learning_rate": 1.20210634538402e-05, + "loss": 0.5056, + "step": 31759 + }, + { + "epoch": 0.8720483250961011, + "grad_norm": 0.4241590201854706, + "learning_rate": 1.2020640471279888e-05, + "loss": 0.5428, + "step": 31760 + }, + { + "epoch": 0.8720757825370675, + "grad_norm": 0.41125398874282837, + "learning_rate": 1.2020217484950434e-05, + "loss": 0.5272, + "step": 31761 + }, + { + "epoch": 0.8721032399780341, + "grad_norm": 0.3474518358707428, + "learning_rate": 1.2019794494852628e-05, + "loss": 0.4178, + "step": 31762 + }, + { + "epoch": 0.8721306974190005, + "grad_norm": 0.40074214339256287, + "learning_rate": 1.2019371500987265e-05, + "loss": 0.445, + "step": 31763 + }, + { + "epoch": 0.8721581548599671, + "grad_norm": 0.40242859721183777, + "learning_rate": 1.2018948503355125e-05, + "loss": 0.3684, + "step": 31764 + }, + { + "epoch": 0.8721856123009335, + "grad_norm": 0.39941275119781494, + "learning_rate": 1.2018525501957004e-05, + "loss": 0.5571, + "step": 31765 + }, + { + "epoch": 0.8722130697419, + "grad_norm": 0.36917898058891296, + "learning_rate": 1.2018102496793688e-05, + "loss": 0.4642, + "step": 31766 + }, + { + "epoch": 0.8722405271828666, + "grad_norm": 0.3781525790691376, + "learning_rate": 1.2017679487865967e-05, + "loss": 0.5826, + "step": 31767 + }, + { + "epoch": 0.872267984623833, + "grad_norm": 0.40221431851387024, + "learning_rate": 1.201725647517463e-05, + "loss": 0.4623, + "step": 31768 + }, + { + "epoch": 0.8722954420647996, + "grad_norm": 0.36389055848121643, + "learning_rate": 1.2016833458720467e-05, + "loss": 0.5253, + "step": 31769 + }, + { + "epoch": 0.872322899505766, + "grad_norm": 0.39639490842819214, + "learning_rate": 1.2016410438504264e-05, + "loss": 0.4685, + "step": 31770 + }, + { + "epoch": 0.8723503569467326, + "grad_norm": 0.3534793555736542, + "learning_rate": 1.2015987414526812e-05, + "loss": 0.5573, + "step": 31771 + }, + { + "epoch": 0.872377814387699, + "grad_norm": 0.35887348651885986, + "learning_rate": 1.20155643867889e-05, + "loss": 0.5285, + "step": 31772 + }, + { + "epoch": 0.8724052718286656, + "grad_norm": 0.38705700635910034, + "learning_rate": 1.201514135529132e-05, + "loss": 0.5019, + "step": 31773 + }, + { + "epoch": 0.8724327292696321, + "grad_norm": 0.3917185366153717, + "learning_rate": 1.2014718320034854e-05, + "loss": 0.5326, + "step": 31774 + }, + { + "epoch": 0.8724601867105986, + "grad_norm": 0.3924156725406647, + "learning_rate": 1.2014295281020298e-05, + "loss": 0.4267, + "step": 31775 + }, + { + "epoch": 0.8724876441515651, + "grad_norm": 0.4275032877922058, + "learning_rate": 1.2013872238248439e-05, + "loss": 0.5094, + "step": 31776 + }, + { + "epoch": 0.8725151015925315, + "grad_norm": 0.3948640525341034, + "learning_rate": 1.2013449191720063e-05, + "loss": 0.4906, + "step": 31777 + }, + { + "epoch": 0.8725425590334981, + "grad_norm": 0.3737691342830658, + "learning_rate": 1.2013026141435963e-05, + "loss": 0.5471, + "step": 31778 + }, + { + "epoch": 0.8725700164744645, + "grad_norm": 0.39559319615364075, + "learning_rate": 1.2012603087396926e-05, + "loss": 0.5294, + "step": 31779 + }, + { + "epoch": 0.8725974739154311, + "grad_norm": 0.3751121163368225, + "learning_rate": 1.2012180029603744e-05, + "loss": 0.5673, + "step": 31780 + }, + { + "epoch": 0.8726249313563976, + "grad_norm": 0.5202319025993347, + "learning_rate": 1.2011756968057202e-05, + "loss": 0.4837, + "step": 31781 + }, + { + "epoch": 0.8726523887973641, + "grad_norm": 0.3846253752708435, + "learning_rate": 1.2011333902758091e-05, + "loss": 0.4025, + "step": 31782 + }, + { + "epoch": 0.8726798462383306, + "grad_norm": 0.44339439272880554, + "learning_rate": 1.20109108337072e-05, + "loss": 0.5921, + "step": 31783 + }, + { + "epoch": 0.872707303679297, + "grad_norm": 0.41370755434036255, + "learning_rate": 1.201048776090532e-05, + "loss": 0.5265, + "step": 31784 + }, + { + "epoch": 0.8727347611202636, + "grad_norm": 0.4096059501171112, + "learning_rate": 1.201006468435324e-05, + "loss": 0.4412, + "step": 31785 + }, + { + "epoch": 0.87276221856123, + "grad_norm": 0.4163645803928375, + "learning_rate": 1.2009641604051744e-05, + "loss": 0.4613, + "step": 31786 + }, + { + "epoch": 0.8727896760021966, + "grad_norm": 0.40264567732810974, + "learning_rate": 1.2009218520001627e-05, + "loss": 0.4443, + "step": 31787 + }, + { + "epoch": 0.8728171334431631, + "grad_norm": 0.34742844104766846, + "learning_rate": 1.2008795432203676e-05, + "loss": 0.4747, + "step": 31788 + }, + { + "epoch": 0.8728445908841296, + "grad_norm": 0.3947164714336395, + "learning_rate": 1.2008372340658681e-05, + "loss": 0.4916, + "step": 31789 + }, + { + "epoch": 0.8728720483250961, + "grad_norm": 0.3626787066459656, + "learning_rate": 1.2007949245367432e-05, + "loss": 0.4809, + "step": 31790 + }, + { + "epoch": 0.8728995057660626, + "grad_norm": 0.378290057182312, + "learning_rate": 1.2007526146330713e-05, + "loss": 0.4274, + "step": 31791 + }, + { + "epoch": 0.8729269632070291, + "grad_norm": 0.3793698251247406, + "learning_rate": 1.200710304354932e-05, + "loss": 0.4539, + "step": 31792 + }, + { + "epoch": 0.8729544206479956, + "grad_norm": 0.3822493851184845, + "learning_rate": 1.2006679937024039e-05, + "loss": 0.482, + "step": 31793 + }, + { + "epoch": 0.8729818780889621, + "grad_norm": 0.4084101915359497, + "learning_rate": 1.2006256826755658e-05, + "loss": 0.4954, + "step": 31794 + }, + { + "epoch": 0.8730093355299287, + "grad_norm": 0.385445773601532, + "learning_rate": 1.2005833712744967e-05, + "loss": 0.3683, + "step": 31795 + }, + { + "epoch": 0.8730367929708951, + "grad_norm": 0.5624918937683105, + "learning_rate": 1.2005410594992757e-05, + "loss": 0.4051, + "step": 31796 + }, + { + "epoch": 0.8730642504118616, + "grad_norm": 0.40518176555633545, + "learning_rate": 1.2004987473499819e-05, + "loss": 0.5506, + "step": 31797 + }, + { + "epoch": 0.8730917078528281, + "grad_norm": 0.37118038535118103, + "learning_rate": 1.2004564348266936e-05, + "loss": 0.389, + "step": 31798 + }, + { + "epoch": 0.8731191652937946, + "grad_norm": 0.38809269666671753, + "learning_rate": 1.20041412192949e-05, + "loss": 0.5495, + "step": 31799 + }, + { + "epoch": 0.8731466227347611, + "grad_norm": 0.516579270362854, + "learning_rate": 1.2003718086584503e-05, + "loss": 0.5189, + "step": 31800 + }, + { + "epoch": 0.8731740801757276, + "grad_norm": 0.40232235193252563, + "learning_rate": 1.2003294950136533e-05, + "loss": 0.5378, + "step": 31801 + }, + { + "epoch": 0.8732015376166942, + "grad_norm": 0.4514601230621338, + "learning_rate": 1.2002871809951777e-05, + "loss": 0.5082, + "step": 31802 + }, + { + "epoch": 0.8732289950576606, + "grad_norm": 0.3800548017024994, + "learning_rate": 1.2002448666031026e-05, + "loss": 0.5034, + "step": 31803 + }, + { + "epoch": 0.8732564524986272, + "grad_norm": 0.4707348644733429, + "learning_rate": 1.200202551837507e-05, + "loss": 0.4754, + "step": 31804 + }, + { + "epoch": 0.8732839099395936, + "grad_norm": 0.4020514190196991, + "learning_rate": 1.2001602366984698e-05, + "loss": 0.4607, + "step": 31805 + }, + { + "epoch": 0.8733113673805601, + "grad_norm": 0.44606342911720276, + "learning_rate": 1.2001179211860696e-05, + "loss": 0.4007, + "step": 31806 + }, + { + "epoch": 0.8733388248215266, + "grad_norm": 0.43890783190727234, + "learning_rate": 1.200075605300386e-05, + "loss": 0.55, + "step": 31807 + }, + { + "epoch": 0.8733662822624931, + "grad_norm": 0.48364341259002686, + "learning_rate": 1.200033289041497e-05, + "loss": 0.5266, + "step": 31808 + }, + { + "epoch": 0.8733937397034597, + "grad_norm": 0.40394335985183716, + "learning_rate": 1.1999909724094826e-05, + "loss": 0.429, + "step": 31809 + }, + { + "epoch": 0.8734211971444261, + "grad_norm": 0.4339142441749573, + "learning_rate": 1.1999486554044208e-05, + "loss": 0.5589, + "step": 31810 + }, + { + "epoch": 0.8734486545853927, + "grad_norm": 0.3961137533187866, + "learning_rate": 1.199906338026391e-05, + "loss": 0.5134, + "step": 31811 + }, + { + "epoch": 0.8734761120263591, + "grad_norm": 0.385503351688385, + "learning_rate": 1.1998640202754724e-05, + "loss": 0.4957, + "step": 31812 + }, + { + "epoch": 0.8735035694673257, + "grad_norm": 0.39601874351501465, + "learning_rate": 1.1998217021517435e-05, + "loss": 0.5431, + "step": 31813 + }, + { + "epoch": 0.8735310269082921, + "grad_norm": 0.42424455285072327, + "learning_rate": 1.1997793836552832e-05, + "loss": 0.4573, + "step": 31814 + }, + { + "epoch": 0.8735584843492586, + "grad_norm": 0.40475431084632874, + "learning_rate": 1.1997370647861706e-05, + "loss": 0.5796, + "step": 31815 + }, + { + "epoch": 0.8735859417902252, + "grad_norm": 0.4121479094028473, + "learning_rate": 1.1996947455444846e-05, + "loss": 0.5259, + "step": 31816 + }, + { + "epoch": 0.8736133992311916, + "grad_norm": 0.437285453081131, + "learning_rate": 1.1996524259303043e-05, + "loss": 0.5095, + "step": 31817 + }, + { + "epoch": 0.8736408566721582, + "grad_norm": 0.37723395228385925, + "learning_rate": 1.1996101059437086e-05, + "loss": 0.4836, + "step": 31818 + }, + { + "epoch": 0.8736683141131246, + "grad_norm": 0.4225607216358185, + "learning_rate": 1.199567785584776e-05, + "loss": 0.5352, + "step": 31819 + }, + { + "epoch": 0.8736957715540912, + "grad_norm": 0.3953564465045929, + "learning_rate": 1.1995254648535861e-05, + "loss": 0.5044, + "step": 31820 + }, + { + "epoch": 0.8737232289950576, + "grad_norm": 0.4088812470436096, + "learning_rate": 1.1994831437502172e-05, + "loss": 0.5352, + "step": 31821 + }, + { + "epoch": 0.8737506864360242, + "grad_norm": 0.42399170994758606, + "learning_rate": 1.1994408222747488e-05, + "loss": 0.5104, + "step": 31822 + }, + { + "epoch": 0.8737781438769907, + "grad_norm": 0.42973560094833374, + "learning_rate": 1.1993985004272596e-05, + "loss": 0.4954, + "step": 31823 + }, + { + "epoch": 0.8738056013179571, + "grad_norm": 0.39414912462234497, + "learning_rate": 1.1993561782078286e-05, + "loss": 0.5415, + "step": 31824 + }, + { + "epoch": 0.8738330587589237, + "grad_norm": 0.353997141122818, + "learning_rate": 1.1993138556165346e-05, + "loss": 0.4179, + "step": 31825 + }, + { + "epoch": 0.8738605161998901, + "grad_norm": 0.5182406306266785, + "learning_rate": 1.1992715326534568e-05, + "loss": 0.4721, + "step": 31826 + }, + { + "epoch": 0.8738879736408567, + "grad_norm": 0.4468846321105957, + "learning_rate": 1.199229209318674e-05, + "loss": 0.5967, + "step": 31827 + }, + { + "epoch": 0.8739154310818231, + "grad_norm": 0.4158179759979248, + "learning_rate": 1.1991868856122651e-05, + "loss": 0.4877, + "step": 31828 + }, + { + "epoch": 0.8739428885227897, + "grad_norm": 0.34615716338157654, + "learning_rate": 1.1991445615343087e-05, + "loss": 0.4801, + "step": 31829 + }, + { + "epoch": 0.8739703459637562, + "grad_norm": 0.49149075150489807, + "learning_rate": 1.1991022370848845e-05, + "loss": 0.5378, + "step": 31830 + }, + { + "epoch": 0.8739978034047227, + "grad_norm": 0.5359712243080139, + "learning_rate": 1.1990599122640712e-05, + "loss": 0.4586, + "step": 31831 + }, + { + "epoch": 0.8740252608456892, + "grad_norm": 0.3539796471595764, + "learning_rate": 1.1990175870719472e-05, + "loss": 0.5256, + "step": 31832 + }, + { + "epoch": 0.8740527182866556, + "grad_norm": 0.378316193819046, + "learning_rate": 1.1989752615085923e-05, + "loss": 0.501, + "step": 31833 + }, + { + "epoch": 0.8740801757276222, + "grad_norm": 0.42206141352653503, + "learning_rate": 1.1989329355740846e-05, + "loss": 0.4576, + "step": 31834 + }, + { + "epoch": 0.8741076331685886, + "grad_norm": 0.5431830883026123, + "learning_rate": 1.198890609268504e-05, + "loss": 0.573, + "step": 31835 + }, + { + "epoch": 0.8741350906095552, + "grad_norm": 0.39128991961479187, + "learning_rate": 1.1988482825919287e-05, + "loss": 0.579, + "step": 31836 + }, + { + "epoch": 0.8741625480505217, + "grad_norm": 0.437341570854187, + "learning_rate": 1.1988059555444378e-05, + "loss": 0.4741, + "step": 31837 + }, + { + "epoch": 0.8741900054914882, + "grad_norm": 0.4063326418399811, + "learning_rate": 1.1987636281261104e-05, + "loss": 0.5454, + "step": 31838 + }, + { + "epoch": 0.8742174629324547, + "grad_norm": 0.3886168301105499, + "learning_rate": 1.1987213003370253e-05, + "loss": 0.5148, + "step": 31839 + }, + { + "epoch": 0.8742449203734212, + "grad_norm": 0.3732737600803375, + "learning_rate": 1.1986789721772617e-05, + "loss": 0.5207, + "step": 31840 + }, + { + "epoch": 0.8742723778143877, + "grad_norm": 0.4126232862472534, + "learning_rate": 1.1986366436468986e-05, + "loss": 0.5164, + "step": 31841 + }, + { + "epoch": 0.8742998352553542, + "grad_norm": 0.3686927556991577, + "learning_rate": 1.1985943147460141e-05, + "loss": 0.4398, + "step": 31842 + }, + { + "epoch": 0.8743272926963207, + "grad_norm": 0.4349934458732605, + "learning_rate": 1.1985519854746886e-05, + "loss": 0.4949, + "step": 31843 + }, + { + "epoch": 0.8743547501372873, + "grad_norm": 0.4629543423652649, + "learning_rate": 1.1985096558329998e-05, + "loss": 0.4784, + "step": 31844 + }, + { + "epoch": 0.8743822075782537, + "grad_norm": 0.3778669834136963, + "learning_rate": 1.198467325821027e-05, + "loss": 0.5128, + "step": 31845 + }, + { + "epoch": 0.8744096650192202, + "grad_norm": 0.40083444118499756, + "learning_rate": 1.1984249954388499e-05, + "loss": 0.4755, + "step": 31846 + }, + { + "epoch": 0.8744371224601867, + "grad_norm": 0.3765396475791931, + "learning_rate": 1.1983826646865463e-05, + "loss": 0.467, + "step": 31847 + }, + { + "epoch": 0.8744645799011532, + "grad_norm": 0.4069221615791321, + "learning_rate": 1.1983403335641961e-05, + "loss": 0.5675, + "step": 31848 + }, + { + "epoch": 0.8744920373421197, + "grad_norm": 0.6716517210006714, + "learning_rate": 1.1982980020718777e-05, + "loss": 0.508, + "step": 31849 + }, + { + "epoch": 0.8745194947830862, + "grad_norm": 0.359580934047699, + "learning_rate": 1.1982556702096702e-05, + "loss": 0.4604, + "step": 31850 + }, + { + "epoch": 0.8745469522240528, + "grad_norm": 0.4504145681858063, + "learning_rate": 1.1982133379776526e-05, + "loss": 0.4924, + "step": 31851 + }, + { + "epoch": 0.8745744096650192, + "grad_norm": 0.38685372471809387, + "learning_rate": 1.1981710053759036e-05, + "loss": 0.5172, + "step": 31852 + }, + { + "epoch": 0.8746018671059858, + "grad_norm": 0.39003244042396545, + "learning_rate": 1.1981286724045029e-05, + "loss": 0.5653, + "step": 31853 + }, + { + "epoch": 0.8746293245469522, + "grad_norm": 0.4088445007801056, + "learning_rate": 1.198086339063529e-05, + "loss": 0.4287, + "step": 31854 + }, + { + "epoch": 0.8746567819879187, + "grad_norm": 0.4351009130477905, + "learning_rate": 1.1980440053530604e-05, + "loss": 0.4778, + "step": 31855 + }, + { + "epoch": 0.8746842394288852, + "grad_norm": 0.4301143288612366, + "learning_rate": 1.198001671273177e-05, + "loss": 0.4576, + "step": 31856 + }, + { + "epoch": 0.8747116968698517, + "grad_norm": 0.3633374273777008, + "learning_rate": 1.1979593368239568e-05, + "loss": 0.4628, + "step": 31857 + }, + { + "epoch": 0.8747391543108183, + "grad_norm": 0.3858073651790619, + "learning_rate": 1.1979170020054798e-05, + "loss": 0.4506, + "step": 31858 + }, + { + "epoch": 0.8747666117517847, + "grad_norm": 0.3935641050338745, + "learning_rate": 1.197874666817824e-05, + "loss": 0.5347, + "step": 31859 + }, + { + "epoch": 0.8747940691927513, + "grad_norm": 0.39750561118125916, + "learning_rate": 1.1978323312610689e-05, + "loss": 0.5519, + "step": 31860 + }, + { + "epoch": 0.8748215266337177, + "grad_norm": 0.4369578957557678, + "learning_rate": 1.1977899953352936e-05, + "loss": 0.5132, + "step": 31861 + }, + { + "epoch": 0.8748489840746843, + "grad_norm": 0.41695836186408997, + "learning_rate": 1.1977476590405765e-05, + "loss": 0.4952, + "step": 31862 + }, + { + "epoch": 0.8748764415156507, + "grad_norm": 0.3689553439617157, + "learning_rate": 1.197705322376997e-05, + "loss": 0.54, + "step": 31863 + }, + { + "epoch": 0.8749038989566172, + "grad_norm": 0.37761154770851135, + "learning_rate": 1.1976629853446343e-05, + "loss": 0.4975, + "step": 31864 + }, + { + "epoch": 0.8749313563975838, + "grad_norm": 0.362316370010376, + "learning_rate": 1.1976206479435666e-05, + "loss": 0.4671, + "step": 31865 + }, + { + "epoch": 0.8749588138385502, + "grad_norm": 0.37033727765083313, + "learning_rate": 1.1975783101738733e-05, + "loss": 0.4277, + "step": 31866 + }, + { + "epoch": 0.8749862712795168, + "grad_norm": 0.40566325187683105, + "learning_rate": 1.1975359720356336e-05, + "loss": 0.4495, + "step": 31867 + }, + { + "epoch": 0.8750137287204832, + "grad_norm": 0.360006719827652, + "learning_rate": 1.1974936335289262e-05, + "loss": 0.5112, + "step": 31868 + }, + { + "epoch": 0.8750411861614498, + "grad_norm": 0.4962717890739441, + "learning_rate": 1.1974512946538303e-05, + "loss": 0.4344, + "step": 31869 + }, + { + "epoch": 0.8750686436024162, + "grad_norm": 0.4417881369590759, + "learning_rate": 1.1974089554104244e-05, + "loss": 0.5199, + "step": 31870 + }, + { + "epoch": 0.8750961010433828, + "grad_norm": 0.4432356357574463, + "learning_rate": 1.1973666157987883e-05, + "loss": 0.5189, + "step": 31871 + }, + { + "epoch": 0.8751235584843493, + "grad_norm": 0.3841994106769562, + "learning_rate": 1.197324275819e-05, + "loss": 0.5048, + "step": 31872 + }, + { + "epoch": 0.8751510159253157, + "grad_norm": 0.44454407691955566, + "learning_rate": 1.1972819354711391e-05, + "loss": 0.5119, + "step": 31873 + }, + { + "epoch": 0.8751784733662823, + "grad_norm": 0.37630370259284973, + "learning_rate": 1.1972395947552845e-05, + "loss": 0.4179, + "step": 31874 + }, + { + "epoch": 0.8752059308072487, + "grad_norm": 0.5090975165367126, + "learning_rate": 1.1971972536715151e-05, + "loss": 0.5399, + "step": 31875 + }, + { + "epoch": 0.8752333882482153, + "grad_norm": 0.380250483751297, + "learning_rate": 1.1971549122199098e-05, + "loss": 0.4997, + "step": 31876 + }, + { + "epoch": 0.8752608456891817, + "grad_norm": 0.3578014671802521, + "learning_rate": 1.1971125704005476e-05, + "loss": 0.5191, + "step": 31877 + }, + { + "epoch": 0.8752883031301483, + "grad_norm": 0.34112077951431274, + "learning_rate": 1.1970702282135076e-05, + "loss": 0.4376, + "step": 31878 + }, + { + "epoch": 0.8753157605711148, + "grad_norm": 0.41724973917007446, + "learning_rate": 1.1970278856588688e-05, + "loss": 0.5152, + "step": 31879 + }, + { + "epoch": 0.8753432180120813, + "grad_norm": 0.43110817670822144, + "learning_rate": 1.1969855427367101e-05, + "loss": 0.5546, + "step": 31880 + }, + { + "epoch": 0.8753706754530478, + "grad_norm": 0.3579998314380646, + "learning_rate": 1.1969431994471103e-05, + "loss": 0.4564, + "step": 31881 + }, + { + "epoch": 0.8753981328940142, + "grad_norm": 0.390323668718338, + "learning_rate": 1.1969008557901488e-05, + "loss": 0.5075, + "step": 31882 + }, + { + "epoch": 0.8754255903349808, + "grad_norm": 0.4457523822784424, + "learning_rate": 1.1968585117659041e-05, + "loss": 0.4842, + "step": 31883 + }, + { + "epoch": 0.8754530477759472, + "grad_norm": 0.35011765360832214, + "learning_rate": 1.1968161673744558e-05, + "loss": 0.4315, + "step": 31884 + }, + { + "epoch": 0.8754805052169138, + "grad_norm": 0.3927701711654663, + "learning_rate": 1.1967738226158825e-05, + "loss": 0.4838, + "step": 31885 + }, + { + "epoch": 0.8755079626578803, + "grad_norm": 0.40069547295570374, + "learning_rate": 1.196731477490263e-05, + "loss": 0.5077, + "step": 31886 + }, + { + "epoch": 0.8755354200988468, + "grad_norm": 0.3680017590522766, + "learning_rate": 1.1966891319976766e-05, + "loss": 0.4628, + "step": 31887 + }, + { + "epoch": 0.8755628775398133, + "grad_norm": 0.4282234013080597, + "learning_rate": 1.196646786138202e-05, + "loss": 0.4738, + "step": 31888 + }, + { + "epoch": 0.8755903349807798, + "grad_norm": 0.40885573625564575, + "learning_rate": 1.1966044399119186e-05, + "loss": 0.6013, + "step": 31889 + }, + { + "epoch": 0.8756177924217463, + "grad_norm": 0.4058675467967987, + "learning_rate": 1.1965620933189053e-05, + "loss": 0.4697, + "step": 31890 + }, + { + "epoch": 0.8756452498627127, + "grad_norm": 0.36844050884246826, + "learning_rate": 1.1965197463592406e-05, + "loss": 0.4418, + "step": 31891 + }, + { + "epoch": 0.8756727073036793, + "grad_norm": 0.3595139682292938, + "learning_rate": 1.1964773990330041e-05, + "loss": 0.5022, + "step": 31892 + }, + { + "epoch": 0.8757001647446458, + "grad_norm": 0.4897362291812897, + "learning_rate": 1.1964350513402748e-05, + "loss": 0.5345, + "step": 31893 + }, + { + "epoch": 0.8757276221856123, + "grad_norm": 0.3723672032356262, + "learning_rate": 1.1963927032811308e-05, + "loss": 0.4599, + "step": 31894 + }, + { + "epoch": 0.8757550796265788, + "grad_norm": 0.42442867159843445, + "learning_rate": 1.196350354855652e-05, + "loss": 0.6202, + "step": 31895 + }, + { + "epoch": 0.8757825370675453, + "grad_norm": 0.41181549429893494, + "learning_rate": 1.1963080060639171e-05, + "loss": 0.556, + "step": 31896 + }, + { + "epoch": 0.8758099945085118, + "grad_norm": 0.3460538983345032, + "learning_rate": 1.1962656569060054e-05, + "loss": 0.4509, + "step": 31897 + }, + { + "epoch": 0.8758374519494783, + "grad_norm": 0.36512717604637146, + "learning_rate": 1.1962233073819954e-05, + "loss": 0.4704, + "step": 31898 + }, + { + "epoch": 0.8758649093904448, + "grad_norm": 0.41690772771835327, + "learning_rate": 1.1961809574919663e-05, + "loss": 0.4956, + "step": 31899 + }, + { + "epoch": 0.8758923668314114, + "grad_norm": 0.3404284119606018, + "learning_rate": 1.196138607235997e-05, + "loss": 0.4156, + "step": 31900 + }, + { + "epoch": 0.8759198242723778, + "grad_norm": 0.35257238149642944, + "learning_rate": 1.1960962566141667e-05, + "loss": 0.5304, + "step": 31901 + }, + { + "epoch": 0.8759472817133444, + "grad_norm": 0.4191863238811493, + "learning_rate": 1.1960539056265543e-05, + "loss": 0.4897, + "step": 31902 + }, + { + "epoch": 0.8759747391543108, + "grad_norm": 0.385280579328537, + "learning_rate": 1.1960115542732389e-05, + "loss": 0.4782, + "step": 31903 + }, + { + "epoch": 0.8760021965952773, + "grad_norm": 0.34118834137916565, + "learning_rate": 1.195969202554299e-05, + "loss": 0.481, + "step": 31904 + }, + { + "epoch": 0.8760296540362438, + "grad_norm": 0.344910204410553, + "learning_rate": 1.1959268504698144e-05, + "loss": 0.39, + "step": 31905 + }, + { + "epoch": 0.8760571114772103, + "grad_norm": 0.5103614926338196, + "learning_rate": 1.1958844980198636e-05, + "loss": 0.4709, + "step": 31906 + }, + { + "epoch": 0.8760845689181769, + "grad_norm": 0.4079245626926422, + "learning_rate": 1.1958421452045256e-05, + "loss": 0.5053, + "step": 31907 + }, + { + "epoch": 0.8761120263591433, + "grad_norm": 0.35297486186027527, + "learning_rate": 1.1957997920238794e-05, + "loss": 0.4438, + "step": 31908 + }, + { + "epoch": 0.8761394838001099, + "grad_norm": 0.4198410212993622, + "learning_rate": 1.1957574384780044e-05, + "loss": 0.4567, + "step": 31909 + }, + { + "epoch": 0.8761669412410763, + "grad_norm": 0.4119419753551483, + "learning_rate": 1.1957150845669791e-05, + "loss": 0.5006, + "step": 31910 + }, + { + "epoch": 0.8761943986820429, + "grad_norm": 0.40050578117370605, + "learning_rate": 1.1956727302908829e-05, + "loss": 0.5428, + "step": 31911 + }, + { + "epoch": 0.8762218561230093, + "grad_norm": 0.40592026710510254, + "learning_rate": 1.1956303756497942e-05, + "loss": 0.4595, + "step": 31912 + }, + { + "epoch": 0.8762493135639758, + "grad_norm": 0.49376603960990906, + "learning_rate": 1.1955880206437928e-05, + "loss": 0.5527, + "step": 31913 + }, + { + "epoch": 0.8762767710049424, + "grad_norm": 0.4548511505126953, + "learning_rate": 1.1955456652729572e-05, + "loss": 0.5078, + "step": 31914 + }, + { + "epoch": 0.8763042284459088, + "grad_norm": 0.3654191792011261, + "learning_rate": 1.1955033095373665e-05, + "loss": 0.4309, + "step": 31915 + }, + { + "epoch": 0.8763316858868754, + "grad_norm": 0.3512386381626129, + "learning_rate": 1.1954609534370998e-05, + "loss": 0.5061, + "step": 31916 + }, + { + "epoch": 0.8763591433278418, + "grad_norm": 0.39778560400009155, + "learning_rate": 1.1954185969722358e-05, + "loss": 0.4544, + "step": 31917 + }, + { + "epoch": 0.8763866007688084, + "grad_norm": 0.37687769532203674, + "learning_rate": 1.195376240142854e-05, + "loss": 0.4895, + "step": 31918 + }, + { + "epoch": 0.8764140582097748, + "grad_norm": 0.41235029697418213, + "learning_rate": 1.195333882949033e-05, + "loss": 0.4582, + "step": 31919 + }, + { + "epoch": 0.8764415156507414, + "grad_norm": 0.42908975481987, + "learning_rate": 1.1952915253908522e-05, + "loss": 0.5547, + "step": 31920 + }, + { + "epoch": 0.8764689730917079, + "grad_norm": 0.4723831117153168, + "learning_rate": 1.19524916746839e-05, + "loss": 0.4697, + "step": 31921 + }, + { + "epoch": 0.8764964305326743, + "grad_norm": 0.37824997305870056, + "learning_rate": 1.195206809181726e-05, + "loss": 0.4845, + "step": 31922 + }, + { + "epoch": 0.8765238879736409, + "grad_norm": 0.3811679482460022, + "learning_rate": 1.195164450530939e-05, + "loss": 0.4681, + "step": 31923 + }, + { + "epoch": 0.8765513454146073, + "grad_norm": 0.42048579454421997, + "learning_rate": 1.1951220915161081e-05, + "loss": 0.5278, + "step": 31924 + }, + { + "epoch": 0.8765788028555739, + "grad_norm": 0.3896344304084778, + "learning_rate": 1.195079732137312e-05, + "loss": 0.4346, + "step": 31925 + }, + { + "epoch": 0.8766062602965403, + "grad_norm": 0.4703982472419739, + "learning_rate": 1.1950373723946303e-05, + "loss": 0.506, + "step": 31926 + }, + { + "epoch": 0.8766337177375069, + "grad_norm": 1.3505984544754028, + "learning_rate": 1.1949950122881413e-05, + "loss": 0.5346, + "step": 31927 + }, + { + "epoch": 0.8766611751784734, + "grad_norm": 0.3653607964515686, + "learning_rate": 1.1949526518179244e-05, + "loss": 0.4245, + "step": 31928 + }, + { + "epoch": 0.8766886326194399, + "grad_norm": 0.4055114686489105, + "learning_rate": 1.1949102909840589e-05, + "loss": 0.5356, + "step": 31929 + }, + { + "epoch": 0.8767160900604064, + "grad_norm": 0.4203973412513733, + "learning_rate": 1.194867929786623e-05, + "loss": 0.4289, + "step": 31930 + }, + { + "epoch": 0.8767435475013728, + "grad_norm": 0.5237071514129639, + "learning_rate": 1.1948255682256966e-05, + "loss": 0.4813, + "step": 31931 + }, + { + "epoch": 0.8767710049423394, + "grad_norm": 0.3563280403614044, + "learning_rate": 1.194783206301358e-05, + "loss": 0.5408, + "step": 31932 + }, + { + "epoch": 0.8767984623833058, + "grad_norm": 0.39450258016586304, + "learning_rate": 1.194740844013687e-05, + "loss": 0.446, + "step": 31933 + }, + { + "epoch": 0.8768259198242724, + "grad_norm": 0.35435187816619873, + "learning_rate": 1.194698481362762e-05, + "loss": 0.4901, + "step": 31934 + }, + { + "epoch": 0.8768533772652389, + "grad_norm": 0.3812824785709381, + "learning_rate": 1.194656118348662e-05, + "loss": 0.5197, + "step": 31935 + }, + { + "epoch": 0.8768808347062054, + "grad_norm": 0.3906446695327759, + "learning_rate": 1.1946137549714662e-05, + "loss": 0.4534, + "step": 31936 + }, + { + "epoch": 0.8769082921471719, + "grad_norm": 0.3830597698688507, + "learning_rate": 1.194571391231254e-05, + "loss": 0.4638, + "step": 31937 + }, + { + "epoch": 0.8769357495881384, + "grad_norm": 0.3929833173751831, + "learning_rate": 1.1945290271281038e-05, + "loss": 0.5269, + "step": 31938 + }, + { + "epoch": 0.8769632070291049, + "grad_norm": 0.3906625807285309, + "learning_rate": 1.194486662662095e-05, + "loss": 0.5639, + "step": 31939 + }, + { + "epoch": 0.8769906644700713, + "grad_norm": 0.35075417160987854, + "learning_rate": 1.1944442978333061e-05, + "loss": 0.4676, + "step": 31940 + }, + { + "epoch": 0.8770181219110379, + "grad_norm": 0.41575199365615845, + "learning_rate": 1.194401932641817e-05, + "loss": 0.5629, + "step": 31941 + }, + { + "epoch": 0.8770455793520044, + "grad_norm": 0.4342697262763977, + "learning_rate": 1.1943595670877062e-05, + "loss": 0.5081, + "step": 31942 + }, + { + "epoch": 0.8770730367929709, + "grad_norm": 0.4022100567817688, + "learning_rate": 1.1943172011710525e-05, + "loss": 0.5192, + "step": 31943 + }, + { + "epoch": 0.8771004942339374, + "grad_norm": 0.4607600271701813, + "learning_rate": 1.1942748348919356e-05, + "loss": 0.5565, + "step": 31944 + }, + { + "epoch": 0.8771279516749039, + "grad_norm": 0.4278750717639923, + "learning_rate": 1.1942324682504336e-05, + "loss": 0.5147, + "step": 31945 + }, + { + "epoch": 0.8771554091158704, + "grad_norm": 0.37528759241104126, + "learning_rate": 1.1941901012466264e-05, + "loss": 0.4868, + "step": 31946 + }, + { + "epoch": 0.8771828665568369, + "grad_norm": 0.3850431740283966, + "learning_rate": 1.1941477338805926e-05, + "loss": 0.5302, + "step": 31947 + }, + { + "epoch": 0.8772103239978034, + "grad_norm": 0.3976559042930603, + "learning_rate": 1.1941053661524114e-05, + "loss": 0.4954, + "step": 31948 + }, + { + "epoch": 0.87723778143877, + "grad_norm": 0.7980462312698364, + "learning_rate": 1.1940629980621617e-05, + "loss": 0.547, + "step": 31949 + }, + { + "epoch": 0.8772652388797364, + "grad_norm": 0.4159824252128601, + "learning_rate": 1.1940206296099224e-05, + "loss": 0.4845, + "step": 31950 + }, + { + "epoch": 0.877292696320703, + "grad_norm": 0.4210914969444275, + "learning_rate": 1.1939782607957731e-05, + "loss": 0.5759, + "step": 31951 + }, + { + "epoch": 0.8773201537616694, + "grad_norm": 0.491599440574646, + "learning_rate": 1.193935891619792e-05, + "loss": 0.6394, + "step": 31952 + }, + { + "epoch": 0.8773476112026359, + "grad_norm": 0.38474002480506897, + "learning_rate": 1.1938935220820587e-05, + "loss": 0.5074, + "step": 31953 + }, + { + "epoch": 0.8773750686436024, + "grad_norm": 0.5116145014762878, + "learning_rate": 1.1938511521826524e-05, + "loss": 0.4148, + "step": 31954 + }, + { + "epoch": 0.8774025260845689, + "grad_norm": 0.37616270780563354, + "learning_rate": 1.1938087819216514e-05, + "loss": 0.5131, + "step": 31955 + }, + { + "epoch": 0.8774299835255355, + "grad_norm": 0.42467406392097473, + "learning_rate": 1.1937664112991353e-05, + "loss": 0.6235, + "step": 31956 + }, + { + "epoch": 0.8774574409665019, + "grad_norm": 0.36354827880859375, + "learning_rate": 1.1937240403151832e-05, + "loss": 0.552, + "step": 31957 + }, + { + "epoch": 0.8774848984074685, + "grad_norm": 0.4360863268375397, + "learning_rate": 1.1936816689698737e-05, + "loss": 0.6094, + "step": 31958 + }, + { + "epoch": 0.8775123558484349, + "grad_norm": 0.3619670569896698, + "learning_rate": 1.1936392972632862e-05, + "loss": 0.4734, + "step": 31959 + }, + { + "epoch": 0.8775398132894014, + "grad_norm": 0.44548746943473816, + "learning_rate": 1.1935969251954992e-05, + "loss": 0.5201, + "step": 31960 + }, + { + "epoch": 0.8775672707303679, + "grad_norm": 0.35143551230430603, + "learning_rate": 1.1935545527665928e-05, + "loss": 0.4345, + "step": 31961 + }, + { + "epoch": 0.8775947281713344, + "grad_norm": 0.3826131224632263, + "learning_rate": 1.1935121799766452e-05, + "loss": 0.4947, + "step": 31962 + }, + { + "epoch": 0.877622185612301, + "grad_norm": 0.4370388686656952, + "learning_rate": 1.1934698068257353e-05, + "loss": 0.5267, + "step": 31963 + }, + { + "epoch": 0.8776496430532674, + "grad_norm": 0.4482690393924713, + "learning_rate": 1.1934274333139428e-05, + "loss": 0.5257, + "step": 31964 + }, + { + "epoch": 0.877677100494234, + "grad_norm": 0.43024206161499023, + "learning_rate": 1.193385059441346e-05, + "loss": 0.4191, + "step": 31965 + }, + { + "epoch": 0.8777045579352004, + "grad_norm": 0.4049198627471924, + "learning_rate": 1.1933426852080248e-05, + "loss": 0.5261, + "step": 31966 + }, + { + "epoch": 0.877732015376167, + "grad_norm": 0.36380627751350403, + "learning_rate": 1.1933003106140574e-05, + "loss": 0.424, + "step": 31967 + }, + { + "epoch": 0.8777594728171334, + "grad_norm": 0.48806729912757874, + "learning_rate": 1.1932579356595235e-05, + "loss": 0.5013, + "step": 31968 + }, + { + "epoch": 0.8777869302581, + "grad_norm": 0.3901720941066742, + "learning_rate": 1.1932155603445019e-05, + "loss": 0.5414, + "step": 31969 + }, + { + "epoch": 0.8778143876990665, + "grad_norm": 0.3796052634716034, + "learning_rate": 1.1931731846690715e-05, + "loss": 0.4619, + "step": 31970 + }, + { + "epoch": 0.8778418451400329, + "grad_norm": 0.47389957308769226, + "learning_rate": 1.1931308086333116e-05, + "loss": 0.4857, + "step": 31971 + }, + { + "epoch": 0.8778693025809995, + "grad_norm": 0.45830944180488586, + "learning_rate": 1.1930884322373007e-05, + "loss": 0.6025, + "step": 31972 + }, + { + "epoch": 0.8778967600219659, + "grad_norm": 0.3966934084892273, + "learning_rate": 1.1930460554811188e-05, + "loss": 0.4284, + "step": 31973 + }, + { + "epoch": 0.8779242174629325, + "grad_norm": 0.40891844034194946, + "learning_rate": 1.193003678364844e-05, + "loss": 0.4911, + "step": 31974 + }, + { + "epoch": 0.8779516749038989, + "grad_norm": 0.35183870792388916, + "learning_rate": 1.1929613008885556e-05, + "loss": 0.4971, + "step": 31975 + }, + { + "epoch": 0.8779791323448655, + "grad_norm": 0.4106239080429077, + "learning_rate": 1.1929189230523333e-05, + "loss": 0.5669, + "step": 31976 + }, + { + "epoch": 0.878006589785832, + "grad_norm": 0.3904968500137329, + "learning_rate": 1.1928765448562551e-05, + "loss": 0.4856, + "step": 31977 + }, + { + "epoch": 0.8780340472267985, + "grad_norm": 0.33858057856559753, + "learning_rate": 1.192834166300401e-05, + "loss": 0.4417, + "step": 31978 + }, + { + "epoch": 0.878061504667765, + "grad_norm": 0.5419690012931824, + "learning_rate": 1.1927917873848495e-05, + "loss": 0.5512, + "step": 31979 + }, + { + "epoch": 0.8780889621087314, + "grad_norm": 0.37872421741485596, + "learning_rate": 1.1927494081096797e-05, + "loss": 0.4445, + "step": 31980 + }, + { + "epoch": 0.878116419549698, + "grad_norm": 0.5295234322547913, + "learning_rate": 1.192707028474971e-05, + "loss": 0.4313, + "step": 31981 + }, + { + "epoch": 0.8781438769906644, + "grad_norm": 0.3854120969772339, + "learning_rate": 1.1926646484808018e-05, + "loss": 0.4798, + "step": 31982 + }, + { + "epoch": 0.878171334431631, + "grad_norm": 0.3849250376224518, + "learning_rate": 1.1926222681272519e-05, + "loss": 0.4058, + "step": 31983 + }, + { + "epoch": 0.8781987918725975, + "grad_norm": 0.38627946376800537, + "learning_rate": 1.1925798874143997e-05, + "loss": 0.4198, + "step": 31984 + }, + { + "epoch": 0.878226249313564, + "grad_norm": 0.48482489585876465, + "learning_rate": 1.1925375063423248e-05, + "loss": 0.6001, + "step": 31985 + }, + { + "epoch": 0.8782537067545305, + "grad_norm": 0.44988325238227844, + "learning_rate": 1.192495124911106e-05, + "loss": 0.5519, + "step": 31986 + }, + { + "epoch": 0.878281164195497, + "grad_norm": 0.4350222945213318, + "learning_rate": 1.1924527431208218e-05, + "loss": 0.523, + "step": 31987 + }, + { + "epoch": 0.8783086216364635, + "grad_norm": 0.4062343239784241, + "learning_rate": 1.1924103609715525e-05, + "loss": 0.476, + "step": 31988 + }, + { + "epoch": 0.8783360790774299, + "grad_norm": 0.4025411903858185, + "learning_rate": 1.1923679784633761e-05, + "loss": 0.5272, + "step": 31989 + }, + { + "epoch": 0.8783635365183965, + "grad_norm": 0.3779144883155823, + "learning_rate": 1.1923255955963722e-05, + "loss": 0.3976, + "step": 31990 + }, + { + "epoch": 0.878390993959363, + "grad_norm": 0.45494818687438965, + "learning_rate": 1.1922832123706197e-05, + "loss": 0.5027, + "step": 31991 + }, + { + "epoch": 0.8784184514003295, + "grad_norm": 0.34485360980033875, + "learning_rate": 1.1922408287861973e-05, + "loss": 0.4355, + "step": 31992 + }, + { + "epoch": 0.878445908841296, + "grad_norm": 0.38893452286720276, + "learning_rate": 1.192198444843185e-05, + "loss": 0.5133, + "step": 31993 + }, + { + "epoch": 0.8784733662822625, + "grad_norm": 0.3244379162788391, + "learning_rate": 1.1921560605416606e-05, + "loss": 0.472, + "step": 31994 + }, + { + "epoch": 0.878500823723229, + "grad_norm": 0.3989643156528473, + "learning_rate": 1.1921136758817043e-05, + "loss": 0.527, + "step": 31995 + }, + { + "epoch": 0.8785282811641955, + "grad_norm": 0.3782273828983307, + "learning_rate": 1.1920712908633944e-05, + "loss": 0.521, + "step": 31996 + }, + { + "epoch": 0.878555738605162, + "grad_norm": 0.9833260774612427, + "learning_rate": 1.1920289054868104e-05, + "loss": 0.461, + "step": 31997 + }, + { + "epoch": 0.8785831960461286, + "grad_norm": 0.40176069736480713, + "learning_rate": 1.1919865197520311e-05, + "loss": 0.3898, + "step": 31998 + }, + { + "epoch": 0.878610653487095, + "grad_norm": 0.46752214431762695, + "learning_rate": 1.1919441336591358e-05, + "loss": 0.5511, + "step": 31999 + }, + { + "epoch": 0.8786381109280615, + "grad_norm": 0.4041384756565094, + "learning_rate": 1.1919017472082033e-05, + "loss": 0.5061, + "step": 32000 + }, + { + "epoch": 0.878665568369028, + "grad_norm": 0.38660281896591187, + "learning_rate": 1.1918593603993129e-05, + "loss": 0.5524, + "step": 32001 + }, + { + "epoch": 0.8786930258099945, + "grad_norm": 0.5176632404327393, + "learning_rate": 1.1918169732325434e-05, + "loss": 0.4718, + "step": 32002 + }, + { + "epoch": 0.878720483250961, + "grad_norm": 0.40639564394950867, + "learning_rate": 1.1917745857079741e-05, + "loss": 0.5305, + "step": 32003 + }, + { + "epoch": 0.8787479406919275, + "grad_norm": 0.35734692215919495, + "learning_rate": 1.191732197825684e-05, + "loss": 0.3743, + "step": 32004 + }, + { + "epoch": 0.878775398132894, + "grad_norm": 0.3844582438468933, + "learning_rate": 1.1916898095857522e-05, + "loss": 0.4675, + "step": 32005 + }, + { + "epoch": 0.8788028555738605, + "grad_norm": 0.37682437896728516, + "learning_rate": 1.1916474209882578e-05, + "loss": 0.4173, + "step": 32006 + }, + { + "epoch": 0.8788303130148271, + "grad_norm": 0.43074601888656616, + "learning_rate": 1.1916050320332795e-05, + "loss": 0.5054, + "step": 32007 + }, + { + "epoch": 0.8788577704557935, + "grad_norm": 0.45952683687210083, + "learning_rate": 1.1915626427208969e-05, + "loss": 0.521, + "step": 32008 + }, + { + "epoch": 0.87888522789676, + "grad_norm": 0.44202834367752075, + "learning_rate": 1.191520253051189e-05, + "loss": 0.4322, + "step": 32009 + }, + { + "epoch": 0.8789126853377265, + "grad_norm": 0.3344026803970337, + "learning_rate": 1.1914778630242343e-05, + "loss": 0.4618, + "step": 32010 + }, + { + "epoch": 0.878940142778693, + "grad_norm": 0.3713780343532562, + "learning_rate": 1.1914354726401125e-05, + "loss": 0.4625, + "step": 32011 + }, + { + "epoch": 0.8789676002196595, + "grad_norm": 0.37654852867126465, + "learning_rate": 1.1913930818989021e-05, + "loss": 0.5066, + "step": 32012 + }, + { + "epoch": 0.878995057660626, + "grad_norm": 0.39898210763931274, + "learning_rate": 1.191350690800683e-05, + "loss": 0.5013, + "step": 32013 + }, + { + "epoch": 0.8790225151015926, + "grad_norm": 0.3703964948654175, + "learning_rate": 1.1913082993455335e-05, + "loss": 0.4914, + "step": 32014 + }, + { + "epoch": 0.879049972542559, + "grad_norm": 0.35936257243156433, + "learning_rate": 1.191265907533533e-05, + "loss": 0.4473, + "step": 32015 + }, + { + "epoch": 0.8790774299835256, + "grad_norm": 0.4618874788284302, + "learning_rate": 1.1912235153647606e-05, + "loss": 0.5335, + "step": 32016 + }, + { + "epoch": 0.879104887424492, + "grad_norm": 0.39840376377105713, + "learning_rate": 1.1911811228392956e-05, + "loss": 0.4734, + "step": 32017 + }, + { + "epoch": 0.8791323448654585, + "grad_norm": 0.37391307950019836, + "learning_rate": 1.1911387299572164e-05, + "loss": 0.4965, + "step": 32018 + }, + { + "epoch": 0.879159802306425, + "grad_norm": 0.4246211647987366, + "learning_rate": 1.1910963367186025e-05, + "loss": 0.5253, + "step": 32019 + }, + { + "epoch": 0.8791872597473915, + "grad_norm": 0.36970582604408264, + "learning_rate": 1.1910539431235331e-05, + "loss": 0.5115, + "step": 32020 + }, + { + "epoch": 0.8792147171883581, + "grad_norm": 0.3908650279045105, + "learning_rate": 1.191011549172087e-05, + "loss": 0.4835, + "step": 32021 + }, + { + "epoch": 0.8792421746293245, + "grad_norm": 0.375203400850296, + "learning_rate": 1.1909691548643436e-05, + "loss": 0.4093, + "step": 32022 + }, + { + "epoch": 0.8792696320702911, + "grad_norm": 0.4199284315109253, + "learning_rate": 1.1909267602003813e-05, + "loss": 0.5706, + "step": 32023 + }, + { + "epoch": 0.8792970895112575, + "grad_norm": 0.3806227147579193, + "learning_rate": 1.1908843651802801e-05, + "loss": 0.527, + "step": 32024 + }, + { + "epoch": 0.8793245469522241, + "grad_norm": 0.33185502886772156, + "learning_rate": 1.1908419698041183e-05, + "loss": 0.4983, + "step": 32025 + }, + { + "epoch": 0.8793520043931905, + "grad_norm": 0.43008968234062195, + "learning_rate": 1.1907995740719755e-05, + "loss": 0.4392, + "step": 32026 + }, + { + "epoch": 0.879379461834157, + "grad_norm": 0.36459285020828247, + "learning_rate": 1.190757177983931e-05, + "loss": 0.4528, + "step": 32027 + }, + { + "epoch": 0.8794069192751236, + "grad_norm": 0.42241615056991577, + "learning_rate": 1.1907147815400627e-05, + "loss": 0.461, + "step": 32028 + }, + { + "epoch": 0.87943437671609, + "grad_norm": 0.3753504753112793, + "learning_rate": 1.190672384740451e-05, + "loss": 0.5343, + "step": 32029 + }, + { + "epoch": 0.8794618341570566, + "grad_norm": 0.41857773065567017, + "learning_rate": 1.1906299875851741e-05, + "loss": 0.5097, + "step": 32030 + }, + { + "epoch": 0.879489291598023, + "grad_norm": 0.34342458844184875, + "learning_rate": 1.1905875900743117e-05, + "loss": 0.4013, + "step": 32031 + }, + { + "epoch": 0.8795167490389896, + "grad_norm": 0.3791235387325287, + "learning_rate": 1.1905451922079427e-05, + "loss": 0.4209, + "step": 32032 + }, + { + "epoch": 0.879544206479956, + "grad_norm": 0.4058675467967987, + "learning_rate": 1.1905027939861457e-05, + "loss": 0.5139, + "step": 32033 + }, + { + "epoch": 0.8795716639209226, + "grad_norm": 0.4845820665359497, + "learning_rate": 1.1904603954090007e-05, + "loss": 0.4481, + "step": 32034 + }, + { + "epoch": 0.8795991213618891, + "grad_norm": 0.39102426171302795, + "learning_rate": 1.1904179964765861e-05, + "loss": 0.4713, + "step": 32035 + }, + { + "epoch": 0.8796265788028556, + "grad_norm": 0.4286566376686096, + "learning_rate": 1.190375597188981e-05, + "loss": 0.5997, + "step": 32036 + }, + { + "epoch": 0.8796540362438221, + "grad_norm": 0.3659784197807312, + "learning_rate": 1.1903331975462646e-05, + "loss": 0.4866, + "step": 32037 + }, + { + "epoch": 0.8796814936847885, + "grad_norm": 0.4067437946796417, + "learning_rate": 1.190290797548516e-05, + "loss": 0.4845, + "step": 32038 + }, + { + "epoch": 0.8797089511257551, + "grad_norm": 0.4310038983821869, + "learning_rate": 1.1902483971958147e-05, + "loss": 0.5331, + "step": 32039 + }, + { + "epoch": 0.8797364085667215, + "grad_norm": 0.3957591950893402, + "learning_rate": 1.1902059964882393e-05, + "loss": 0.4667, + "step": 32040 + }, + { + "epoch": 0.8797638660076881, + "grad_norm": 0.46333348751068115, + "learning_rate": 1.1901635954258688e-05, + "loss": 0.558, + "step": 32041 + }, + { + "epoch": 0.8797913234486546, + "grad_norm": 0.3651861548423767, + "learning_rate": 1.1901211940087826e-05, + "loss": 0.4411, + "step": 32042 + }, + { + "epoch": 0.8798187808896211, + "grad_norm": 0.3846897780895233, + "learning_rate": 1.1900787922370596e-05, + "loss": 0.4918, + "step": 32043 + }, + { + "epoch": 0.8798462383305876, + "grad_norm": 0.42736342549324036, + "learning_rate": 1.1900363901107792e-05, + "loss": 0.6054, + "step": 32044 + }, + { + "epoch": 0.879873695771554, + "grad_norm": 0.3596585988998413, + "learning_rate": 1.1899939876300203e-05, + "loss": 0.4639, + "step": 32045 + }, + { + "epoch": 0.8799011532125206, + "grad_norm": 0.42206114530563354, + "learning_rate": 1.1899515847948616e-05, + "loss": 0.4657, + "step": 32046 + }, + { + "epoch": 0.879928610653487, + "grad_norm": 0.408849835395813, + "learning_rate": 1.189909181605383e-05, + "loss": 0.5279, + "step": 32047 + }, + { + "epoch": 0.8799560680944536, + "grad_norm": 0.40449389815330505, + "learning_rate": 1.1898667780616631e-05, + "loss": 0.4696, + "step": 32048 + }, + { + "epoch": 0.8799835255354201, + "grad_norm": 0.3709248900413513, + "learning_rate": 1.1898243741637807e-05, + "loss": 0.3951, + "step": 32049 + }, + { + "epoch": 0.8800109829763866, + "grad_norm": 0.6570876240730286, + "learning_rate": 1.1897819699118155e-05, + "loss": 0.5076, + "step": 32050 + }, + { + "epoch": 0.8800384404173531, + "grad_norm": 0.3638741672039032, + "learning_rate": 1.1897395653058464e-05, + "loss": 0.4618, + "step": 32051 + }, + { + "epoch": 0.8800658978583196, + "grad_norm": 0.34563493728637695, + "learning_rate": 1.1896971603459523e-05, + "loss": 0.4946, + "step": 32052 + }, + { + "epoch": 0.8800933552992861, + "grad_norm": 0.3757338225841522, + "learning_rate": 1.1896547550322128e-05, + "loss": 0.4954, + "step": 32053 + }, + { + "epoch": 0.8801208127402526, + "grad_norm": 0.4365825653076172, + "learning_rate": 1.1896123493647062e-05, + "loss": 0.5067, + "step": 32054 + }, + { + "epoch": 0.8801482701812191, + "grad_norm": 0.35737791657447815, + "learning_rate": 1.1895699433435124e-05, + "loss": 0.5241, + "step": 32055 + }, + { + "epoch": 0.8801757276221857, + "grad_norm": 0.41004377603530884, + "learning_rate": 1.18952753696871e-05, + "loss": 0.5033, + "step": 32056 + }, + { + "epoch": 0.8802031850631521, + "grad_norm": 0.43826383352279663, + "learning_rate": 1.1894851302403782e-05, + "loss": 0.4883, + "step": 32057 + }, + { + "epoch": 0.8802306425041186, + "grad_norm": 0.3583780825138092, + "learning_rate": 1.1894427231585963e-05, + "loss": 0.4975, + "step": 32058 + }, + { + "epoch": 0.8802580999450851, + "grad_norm": 0.4494962692260742, + "learning_rate": 1.1894003157234431e-05, + "loss": 0.4885, + "step": 32059 + }, + { + "epoch": 0.8802855573860516, + "grad_norm": 0.3832293152809143, + "learning_rate": 1.1893579079349982e-05, + "loss": 0.4636, + "step": 32060 + }, + { + "epoch": 0.8803130148270181, + "grad_norm": 0.37113338708877563, + "learning_rate": 1.1893154997933399e-05, + "loss": 0.4885, + "step": 32061 + }, + { + "epoch": 0.8803404722679846, + "grad_norm": 0.4141026437282562, + "learning_rate": 1.189273091298548e-05, + "loss": 0.4654, + "step": 32062 + }, + { + "epoch": 0.8803679297089512, + "grad_norm": 0.36419039964675903, + "learning_rate": 1.1892306824507013e-05, + "loss": 0.4776, + "step": 32063 + }, + { + "epoch": 0.8803953871499176, + "grad_norm": 0.39587879180908203, + "learning_rate": 1.189188273249879e-05, + "loss": 0.5302, + "step": 32064 + }, + { + "epoch": 0.8804228445908842, + "grad_norm": 0.3792327642440796, + "learning_rate": 1.1891458636961603e-05, + "loss": 0.4742, + "step": 32065 + }, + { + "epoch": 0.8804503020318506, + "grad_norm": 0.3839843273162842, + "learning_rate": 1.1891034537896242e-05, + "loss": 0.4027, + "step": 32066 + }, + { + "epoch": 0.8804777594728171, + "grad_norm": 0.3863787353038788, + "learning_rate": 1.1890610435303498e-05, + "loss": 0.5643, + "step": 32067 + }, + { + "epoch": 0.8805052169137836, + "grad_norm": 0.4238605797290802, + "learning_rate": 1.1890186329184161e-05, + "loss": 0.4596, + "step": 32068 + }, + { + "epoch": 0.8805326743547501, + "grad_norm": 0.35735079646110535, + "learning_rate": 1.1889762219539022e-05, + "loss": 0.4599, + "step": 32069 + }, + { + "epoch": 0.8805601317957167, + "grad_norm": 0.38202327489852905, + "learning_rate": 1.1889338106368875e-05, + "loss": 0.4879, + "step": 32070 + }, + { + "epoch": 0.8805875892366831, + "grad_norm": 0.46469634771347046, + "learning_rate": 1.1888913989674511e-05, + "loss": 0.4428, + "step": 32071 + }, + { + "epoch": 0.8806150466776497, + "grad_norm": 0.3900083601474762, + "learning_rate": 1.1888489869456716e-05, + "loss": 0.5008, + "step": 32072 + }, + { + "epoch": 0.8806425041186161, + "grad_norm": 0.38179516792297363, + "learning_rate": 1.1888065745716289e-05, + "loss": 0.4236, + "step": 32073 + }, + { + "epoch": 0.8806699615595827, + "grad_norm": 0.4124305248260498, + "learning_rate": 1.1887641618454012e-05, + "loss": 0.5585, + "step": 32074 + }, + { + "epoch": 0.8806974190005491, + "grad_norm": 0.3727075457572937, + "learning_rate": 1.1887217487670683e-05, + "loss": 0.4271, + "step": 32075 + }, + { + "epoch": 0.8807248764415156, + "grad_norm": 0.38836339116096497, + "learning_rate": 1.1886793353367094e-05, + "loss": 0.4441, + "step": 32076 + }, + { + "epoch": 0.8807523338824822, + "grad_norm": 0.3936639428138733, + "learning_rate": 1.1886369215544029e-05, + "loss": 0.5089, + "step": 32077 + }, + { + "epoch": 0.8807797913234486, + "grad_norm": 0.3877533972263336, + "learning_rate": 1.1885945074202286e-05, + "loss": 0.4874, + "step": 32078 + }, + { + "epoch": 0.8808072487644152, + "grad_norm": 0.3711925446987152, + "learning_rate": 1.1885520929342654e-05, + "loss": 0.4337, + "step": 32079 + }, + { + "epoch": 0.8808347062053816, + "grad_norm": 0.41186392307281494, + "learning_rate": 1.1885096780965919e-05, + "loss": 0.4979, + "step": 32080 + }, + { + "epoch": 0.8808621636463482, + "grad_norm": 0.37694042921066284, + "learning_rate": 1.1884672629072883e-05, + "loss": 0.5182, + "step": 32081 + }, + { + "epoch": 0.8808896210873146, + "grad_norm": 0.3601510524749756, + "learning_rate": 1.1884248473664326e-05, + "loss": 0.5146, + "step": 32082 + }, + { + "epoch": 0.8809170785282812, + "grad_norm": 0.6628205180168152, + "learning_rate": 1.1883824314741047e-05, + "loss": 0.4497, + "step": 32083 + }, + { + "epoch": 0.8809445359692477, + "grad_norm": 0.4313538670539856, + "learning_rate": 1.1883400152303833e-05, + "loss": 0.5152, + "step": 32084 + }, + { + "epoch": 0.8809719934102141, + "grad_norm": 0.38718125224113464, + "learning_rate": 1.1882975986353475e-05, + "loss": 0.4343, + "step": 32085 + }, + { + "epoch": 0.8809994508511807, + "grad_norm": 0.46678808331489563, + "learning_rate": 1.1882551816890771e-05, + "loss": 0.5072, + "step": 32086 + }, + { + "epoch": 0.8810269082921471, + "grad_norm": 0.3905032277107239, + "learning_rate": 1.1882127643916501e-05, + "loss": 0.4806, + "step": 32087 + }, + { + "epoch": 0.8810543657331137, + "grad_norm": 0.38409483432769775, + "learning_rate": 1.1881703467431468e-05, + "loss": 0.439, + "step": 32088 + }, + { + "epoch": 0.8810818231740801, + "grad_norm": 0.415054589509964, + "learning_rate": 1.1881279287436453e-05, + "loss": 0.5077, + "step": 32089 + }, + { + "epoch": 0.8811092806150467, + "grad_norm": 0.41051170229911804, + "learning_rate": 1.1880855103932254e-05, + "loss": 0.4857, + "step": 32090 + }, + { + "epoch": 0.8811367380560132, + "grad_norm": 0.39251744747161865, + "learning_rate": 1.1880430916919658e-05, + "loss": 0.4663, + "step": 32091 + }, + { + "epoch": 0.8811641954969797, + "grad_norm": 0.9403777122497559, + "learning_rate": 1.188000672639946e-05, + "loss": 0.5023, + "step": 32092 + }, + { + "epoch": 0.8811916529379462, + "grad_norm": 0.3428557515144348, + "learning_rate": 1.1879582532372448e-05, + "loss": 0.4217, + "step": 32093 + }, + { + "epoch": 0.8812191103789127, + "grad_norm": 0.3441402316093445, + "learning_rate": 1.1879158334839419e-05, + "loss": 0.4689, + "step": 32094 + }, + { + "epoch": 0.8812465678198792, + "grad_norm": 0.36560502648353577, + "learning_rate": 1.1878734133801153e-05, + "loss": 0.4158, + "step": 32095 + }, + { + "epoch": 0.8812740252608456, + "grad_norm": 0.4121265411376953, + "learning_rate": 1.1878309929258453e-05, + "loss": 0.5806, + "step": 32096 + }, + { + "epoch": 0.8813014827018122, + "grad_norm": 0.32833656668663025, + "learning_rate": 1.1877885721212106e-05, + "loss": 0.3955, + "step": 32097 + }, + { + "epoch": 0.8813289401427787, + "grad_norm": 0.3870598375797272, + "learning_rate": 1.18774615096629e-05, + "loss": 0.5002, + "step": 32098 + }, + { + "epoch": 0.8813563975837452, + "grad_norm": 0.4454132318496704, + "learning_rate": 1.1877037294611631e-05, + "loss": 0.4773, + "step": 32099 + }, + { + "epoch": 0.8813838550247117, + "grad_norm": 0.4635816216468811, + "learning_rate": 1.1876613076059084e-05, + "loss": 0.5264, + "step": 32100 + }, + { + "epoch": 0.8814113124656782, + "grad_norm": 0.40398865938186646, + "learning_rate": 1.1876188854006062e-05, + "loss": 0.5328, + "step": 32101 + }, + { + "epoch": 0.8814387699066447, + "grad_norm": 0.43785589933395386, + "learning_rate": 1.1875764628453343e-05, + "loss": 0.4867, + "step": 32102 + }, + { + "epoch": 0.8814662273476112, + "grad_norm": 0.4817052483558655, + "learning_rate": 1.1875340399401728e-05, + "loss": 0.5601, + "step": 32103 + }, + { + "epoch": 0.8814936847885777, + "grad_norm": 0.46695569157600403, + "learning_rate": 1.1874916166852002e-05, + "loss": 0.5405, + "step": 32104 + }, + { + "epoch": 0.8815211422295443, + "grad_norm": 0.4183516502380371, + "learning_rate": 1.187449193080496e-05, + "loss": 0.5866, + "step": 32105 + }, + { + "epoch": 0.8815485996705107, + "grad_norm": 0.38090187311172485, + "learning_rate": 1.1874067691261393e-05, + "loss": 0.596, + "step": 32106 + }, + { + "epoch": 0.8815760571114772, + "grad_norm": 0.40867894887924194, + "learning_rate": 1.1873643448222092e-05, + "loss": 0.4901, + "step": 32107 + }, + { + "epoch": 0.8816035145524437, + "grad_norm": 0.3976020812988281, + "learning_rate": 1.1873219201687844e-05, + "loss": 0.4582, + "step": 32108 + }, + { + "epoch": 0.8816309719934102, + "grad_norm": 0.43231889605522156, + "learning_rate": 1.1872794951659447e-05, + "loss": 0.4598, + "step": 32109 + }, + { + "epoch": 0.8816584294343767, + "grad_norm": 0.3727076053619385, + "learning_rate": 1.1872370698137692e-05, + "loss": 0.4633, + "step": 32110 + }, + { + "epoch": 0.8816858868753432, + "grad_norm": 0.39448443055152893, + "learning_rate": 1.1871946441123366e-05, + "loss": 0.5077, + "step": 32111 + }, + { + "epoch": 0.8817133443163098, + "grad_norm": 0.41087251901626587, + "learning_rate": 1.187152218061726e-05, + "loss": 0.5077, + "step": 32112 + }, + { + "epoch": 0.8817408017572762, + "grad_norm": 0.39023974537849426, + "learning_rate": 1.187109791662017e-05, + "loss": 0.4947, + "step": 32113 + }, + { + "epoch": 0.8817682591982428, + "grad_norm": 0.41291481256484985, + "learning_rate": 1.1870673649132887e-05, + "loss": 0.4661, + "step": 32114 + }, + { + "epoch": 0.8817957166392092, + "grad_norm": 0.5143707990646362, + "learning_rate": 1.18702493781562e-05, + "loss": 0.5273, + "step": 32115 + }, + { + "epoch": 0.8818231740801757, + "grad_norm": 0.3820711374282837, + "learning_rate": 1.1869825103690898e-05, + "loss": 0.7375, + "step": 32116 + }, + { + "epoch": 0.8818506315211422, + "grad_norm": 0.5015829801559448, + "learning_rate": 1.1869400825737779e-05, + "loss": 0.4869, + "step": 32117 + }, + { + "epoch": 0.8818780889621087, + "grad_norm": 0.4504486918449402, + "learning_rate": 1.1868976544297628e-05, + "loss": 0.4907, + "step": 32118 + }, + { + "epoch": 0.8819055464030753, + "grad_norm": 0.34252864122390747, + "learning_rate": 1.186855225937124e-05, + "loss": 0.4316, + "step": 32119 + }, + { + "epoch": 0.8819330038440417, + "grad_norm": 0.46135252714157104, + "learning_rate": 1.1868127970959407e-05, + "loss": 0.4842, + "step": 32120 + }, + { + "epoch": 0.8819604612850083, + "grad_norm": 0.3694794774055481, + "learning_rate": 1.1867703679062917e-05, + "loss": 0.5145, + "step": 32121 + }, + { + "epoch": 0.8819879187259747, + "grad_norm": 0.4088258743286133, + "learning_rate": 1.1867279383682566e-05, + "loss": 0.4841, + "step": 32122 + }, + { + "epoch": 0.8820153761669413, + "grad_norm": 0.3882768750190735, + "learning_rate": 1.186685508481914e-05, + "loss": 0.4991, + "step": 32123 + }, + { + "epoch": 0.8820428336079077, + "grad_norm": 0.3928532302379608, + "learning_rate": 1.1866430782473437e-05, + "loss": 0.5007, + "step": 32124 + }, + { + "epoch": 0.8820702910488742, + "grad_norm": 0.38079574704170227, + "learning_rate": 1.1866006476646244e-05, + "loss": 0.5797, + "step": 32125 + }, + { + "epoch": 0.8820977484898408, + "grad_norm": 0.3903767466545105, + "learning_rate": 1.186558216733835e-05, + "loss": 0.4285, + "step": 32126 + }, + { + "epoch": 0.8821252059308072, + "grad_norm": 0.4241892993450165, + "learning_rate": 1.1865157854550553e-05, + "loss": 0.5415, + "step": 32127 + }, + { + "epoch": 0.8821526633717738, + "grad_norm": 0.3793124258518219, + "learning_rate": 1.1864733538283641e-05, + "loss": 0.4305, + "step": 32128 + }, + { + "epoch": 0.8821801208127402, + "grad_norm": 0.4295221269130707, + "learning_rate": 1.1864309218538406e-05, + "loss": 0.5671, + "step": 32129 + }, + { + "epoch": 0.8822075782537068, + "grad_norm": 0.3773331642150879, + "learning_rate": 1.1863884895315637e-05, + "loss": 0.4569, + "step": 32130 + }, + { + "epoch": 0.8822350356946732, + "grad_norm": 0.4088532030582428, + "learning_rate": 1.186346056861613e-05, + "loss": 0.4807, + "step": 32131 + }, + { + "epoch": 0.8822624931356398, + "grad_norm": 0.41398003697395325, + "learning_rate": 1.1863036238440672e-05, + "loss": 0.4587, + "step": 32132 + }, + { + "epoch": 0.8822899505766063, + "grad_norm": 0.3913275897502899, + "learning_rate": 1.186261190479006e-05, + "loss": 0.4039, + "step": 32133 + }, + { + "epoch": 0.8823174080175727, + "grad_norm": 0.4251953363418579, + "learning_rate": 1.186218756766508e-05, + "loss": 0.4913, + "step": 32134 + }, + { + "epoch": 0.8823448654585393, + "grad_norm": 0.4061933159828186, + "learning_rate": 1.1861763227066527e-05, + "loss": 0.5518, + "step": 32135 + }, + { + "epoch": 0.8823723228995057, + "grad_norm": 0.4285753667354584, + "learning_rate": 1.186133888299519e-05, + "loss": 0.5017, + "step": 32136 + }, + { + "epoch": 0.8823997803404723, + "grad_norm": 0.39799264073371887, + "learning_rate": 1.1860914535451864e-05, + "loss": 0.5507, + "step": 32137 + }, + { + "epoch": 0.8824272377814387, + "grad_norm": 0.3841661214828491, + "learning_rate": 1.1860490184437339e-05, + "loss": 0.4859, + "step": 32138 + }, + { + "epoch": 0.8824546952224053, + "grad_norm": 0.3925778269767761, + "learning_rate": 1.1860065829952404e-05, + "loss": 0.3974, + "step": 32139 + }, + { + "epoch": 0.8824821526633718, + "grad_norm": 0.35641440749168396, + "learning_rate": 1.185964147199785e-05, + "loss": 0.5035, + "step": 32140 + }, + { + "epoch": 0.8825096101043383, + "grad_norm": 0.40499305725097656, + "learning_rate": 1.1859217110574474e-05, + "loss": 0.476, + "step": 32141 + }, + { + "epoch": 0.8825370675453048, + "grad_norm": 0.3896653950214386, + "learning_rate": 1.1858792745683067e-05, + "loss": 0.5192, + "step": 32142 + }, + { + "epoch": 0.8825645249862712, + "grad_norm": 0.3816666901111603, + "learning_rate": 1.1858368377324417e-05, + "loss": 0.4383, + "step": 32143 + }, + { + "epoch": 0.8825919824272378, + "grad_norm": 0.3993043005466461, + "learning_rate": 1.1857944005499315e-05, + "loss": 0.461, + "step": 32144 + }, + { + "epoch": 0.8826194398682042, + "grad_norm": 0.5778219103813171, + "learning_rate": 1.1857519630208555e-05, + "loss": 0.4686, + "step": 32145 + }, + { + "epoch": 0.8826468973091708, + "grad_norm": 0.3837796449661255, + "learning_rate": 1.1857095251452928e-05, + "loss": 0.5388, + "step": 32146 + }, + { + "epoch": 0.8826743547501373, + "grad_norm": 0.47597536444664, + "learning_rate": 1.1856670869233226e-05, + "loss": 0.5216, + "step": 32147 + }, + { + "epoch": 0.8827018121911038, + "grad_norm": 0.41416996717453003, + "learning_rate": 1.185624648355024e-05, + "loss": 0.5847, + "step": 32148 + }, + { + "epoch": 0.8827292696320703, + "grad_norm": 0.3612534701824188, + "learning_rate": 1.1855822094404762e-05, + "loss": 0.522, + "step": 32149 + }, + { + "epoch": 0.8827567270730368, + "grad_norm": 0.4231935739517212, + "learning_rate": 1.1855397701797585e-05, + "loss": 0.4988, + "step": 32150 + }, + { + "epoch": 0.8827841845140033, + "grad_norm": 0.5280308127403259, + "learning_rate": 1.1854973305729495e-05, + "loss": 0.5315, + "step": 32151 + }, + { + "epoch": 0.8828116419549698, + "grad_norm": 0.41743096709251404, + "learning_rate": 1.1854548906201293e-05, + "loss": 0.4701, + "step": 32152 + }, + { + "epoch": 0.8828390993959363, + "grad_norm": 0.4033883512020111, + "learning_rate": 1.1854124503213763e-05, + "loss": 0.4447, + "step": 32153 + }, + { + "epoch": 0.8828665568369028, + "grad_norm": 0.35017630457878113, + "learning_rate": 1.1853700096767698e-05, + "loss": 0.5249, + "step": 32154 + }, + { + "epoch": 0.8828940142778693, + "grad_norm": 0.38255488872528076, + "learning_rate": 1.1853275686863893e-05, + "loss": 0.4747, + "step": 32155 + }, + { + "epoch": 0.8829214717188358, + "grad_norm": 0.4302094578742981, + "learning_rate": 1.1852851273503134e-05, + "loss": 0.531, + "step": 32156 + }, + { + "epoch": 0.8829489291598023, + "grad_norm": 0.3728107213973999, + "learning_rate": 1.1852426856686219e-05, + "loss": 0.53, + "step": 32157 + }, + { + "epoch": 0.8829763866007688, + "grad_norm": 0.42744162678718567, + "learning_rate": 1.1852002436413934e-05, + "loss": 0.5456, + "step": 32158 + }, + { + "epoch": 0.8830038440417353, + "grad_norm": 0.4061581790447235, + "learning_rate": 1.1851578012687078e-05, + "loss": 0.4842, + "step": 32159 + }, + { + "epoch": 0.8830313014827018, + "grad_norm": 0.3831661343574524, + "learning_rate": 1.1851153585506435e-05, + "loss": 0.537, + "step": 32160 + }, + { + "epoch": 0.8830587589236684, + "grad_norm": 0.4399864077568054, + "learning_rate": 1.1850729154872798e-05, + "loss": 0.5373, + "step": 32161 + }, + { + "epoch": 0.8830862163646348, + "grad_norm": 0.391813188791275, + "learning_rate": 1.1850304720786962e-05, + "loss": 0.5289, + "step": 32162 + }, + { + "epoch": 0.8831136738056014, + "grad_norm": 0.4477876126766205, + "learning_rate": 1.1849880283249717e-05, + "loss": 0.5337, + "step": 32163 + }, + { + "epoch": 0.8831411312465678, + "grad_norm": 0.4569886326789856, + "learning_rate": 1.1849455842261857e-05, + "loss": 0.6615, + "step": 32164 + }, + { + "epoch": 0.8831685886875343, + "grad_norm": 0.41136762499809265, + "learning_rate": 1.1849031397824166e-05, + "loss": 0.5329, + "step": 32165 + }, + { + "epoch": 0.8831960461285008, + "grad_norm": 0.4011352062225342, + "learning_rate": 1.1848606949937447e-05, + "loss": 0.5884, + "step": 32166 + }, + { + "epoch": 0.8832235035694673, + "grad_norm": 0.41272225975990295, + "learning_rate": 1.1848182498602485e-05, + "loss": 0.5466, + "step": 32167 + }, + { + "epoch": 0.8832509610104339, + "grad_norm": 0.43644753098487854, + "learning_rate": 1.1847758043820071e-05, + "loss": 0.4938, + "step": 32168 + }, + { + "epoch": 0.8832784184514003, + "grad_norm": 0.39141562581062317, + "learning_rate": 1.1847333585591001e-05, + "loss": 0.5147, + "step": 32169 + }, + { + "epoch": 0.8833058758923669, + "grad_norm": 0.34249141812324524, + "learning_rate": 1.1846909123916061e-05, + "loss": 0.4756, + "step": 32170 + }, + { + "epoch": 0.8833333333333333, + "grad_norm": 0.4218800365924835, + "learning_rate": 1.1846484658796048e-05, + "loss": 0.522, + "step": 32171 + }, + { + "epoch": 0.8833607907742999, + "grad_norm": 0.4212571680545807, + "learning_rate": 1.1846060190231752e-05, + "loss": 0.5223, + "step": 32172 + }, + { + "epoch": 0.8833882482152663, + "grad_norm": 0.3780130445957184, + "learning_rate": 1.1845635718223962e-05, + "loss": 0.4909, + "step": 32173 + }, + { + "epoch": 0.8834157056562328, + "grad_norm": 0.3961924910545349, + "learning_rate": 1.1845211242773476e-05, + "loss": 0.5045, + "step": 32174 + }, + { + "epoch": 0.8834431630971994, + "grad_norm": 0.4031975269317627, + "learning_rate": 1.184478676388108e-05, + "loss": 0.4923, + "step": 32175 + }, + { + "epoch": 0.8834706205381658, + "grad_norm": 0.3795796036720276, + "learning_rate": 1.1844362281547567e-05, + "loss": 0.5248, + "step": 32176 + }, + { + "epoch": 0.8834980779791324, + "grad_norm": 0.3640701472759247, + "learning_rate": 1.1843937795773735e-05, + "loss": 0.3851, + "step": 32177 + }, + { + "epoch": 0.8835255354200988, + "grad_norm": 0.37172266840934753, + "learning_rate": 1.1843513306560363e-05, + "loss": 0.5061, + "step": 32178 + }, + { + "epoch": 0.8835529928610654, + "grad_norm": 0.42259564995765686, + "learning_rate": 1.1843088813908257e-05, + "loss": 0.5187, + "step": 32179 + }, + { + "epoch": 0.8835804503020318, + "grad_norm": 0.4077850878238678, + "learning_rate": 1.1842664317818196e-05, + "loss": 0.4437, + "step": 32180 + }, + { + "epoch": 0.8836079077429984, + "grad_norm": 0.46389225125312805, + "learning_rate": 1.1842239818290981e-05, + "loss": 0.4818, + "step": 32181 + }, + { + "epoch": 0.8836353651839649, + "grad_norm": 0.37171807885169983, + "learning_rate": 1.1841815315327403e-05, + "loss": 0.4769, + "step": 32182 + }, + { + "epoch": 0.8836628226249313, + "grad_norm": 0.3757531940937042, + "learning_rate": 1.1841390808928248e-05, + "loss": 0.4652, + "step": 32183 + }, + { + "epoch": 0.8836902800658979, + "grad_norm": 0.39977747201919556, + "learning_rate": 1.1840966299094314e-05, + "loss": 0.5045, + "step": 32184 + }, + { + "epoch": 0.8837177375068643, + "grad_norm": 0.34406939148902893, + "learning_rate": 1.1840541785826387e-05, + "loss": 0.4514, + "step": 32185 + }, + { + "epoch": 0.8837451949478309, + "grad_norm": 0.3807407021522522, + "learning_rate": 1.1840117269125265e-05, + "loss": 0.5179, + "step": 32186 + }, + { + "epoch": 0.8837726523887973, + "grad_norm": 0.36314013600349426, + "learning_rate": 1.1839692748991737e-05, + "loss": 0.5465, + "step": 32187 + }, + { + "epoch": 0.8838001098297639, + "grad_norm": 0.5790812373161316, + "learning_rate": 1.1839268225426592e-05, + "loss": 0.5139, + "step": 32188 + }, + { + "epoch": 0.8838275672707304, + "grad_norm": 0.42763590812683105, + "learning_rate": 1.1838843698430629e-05, + "loss": 0.4964, + "step": 32189 + }, + { + "epoch": 0.8838550247116969, + "grad_norm": 0.3995521366596222, + "learning_rate": 1.1838419168004633e-05, + "loss": 0.444, + "step": 32190 + }, + { + "epoch": 0.8838824821526634, + "grad_norm": 0.41822493076324463, + "learning_rate": 1.18379946341494e-05, + "loss": 0.5329, + "step": 32191 + }, + { + "epoch": 0.8839099395936298, + "grad_norm": 0.4079596996307373, + "learning_rate": 1.1837570096865718e-05, + "loss": 0.4618, + "step": 32192 + }, + { + "epoch": 0.8839373970345964, + "grad_norm": 0.39923998713493347, + "learning_rate": 1.1837145556154381e-05, + "loss": 0.5101, + "step": 32193 + }, + { + "epoch": 0.8839648544755628, + "grad_norm": 0.3490218222141266, + "learning_rate": 1.1836721012016184e-05, + "loss": 0.4736, + "step": 32194 + }, + { + "epoch": 0.8839923119165294, + "grad_norm": 0.3912433385848999, + "learning_rate": 1.1836296464451915e-05, + "loss": 0.5242, + "step": 32195 + }, + { + "epoch": 0.8840197693574959, + "grad_norm": 0.36327850818634033, + "learning_rate": 1.1835871913462367e-05, + "loss": 0.4737, + "step": 32196 + }, + { + "epoch": 0.8840472267984624, + "grad_norm": 0.3886588513851166, + "learning_rate": 1.183544735904833e-05, + "loss": 0.5638, + "step": 32197 + }, + { + "epoch": 0.8840746842394289, + "grad_norm": 0.4627472460269928, + "learning_rate": 1.18350228012106e-05, + "loss": 0.4521, + "step": 32198 + }, + { + "epoch": 0.8841021416803954, + "grad_norm": 0.3643319606781006, + "learning_rate": 1.1834598239949968e-05, + "loss": 0.5694, + "step": 32199 + }, + { + "epoch": 0.8841295991213619, + "grad_norm": 0.36182764172554016, + "learning_rate": 1.1834173675267224e-05, + "loss": 0.532, + "step": 32200 + }, + { + "epoch": 0.8841570565623283, + "grad_norm": 0.38726598024368286, + "learning_rate": 1.1833749107163157e-05, + "loss": 0.4638, + "step": 32201 + }, + { + "epoch": 0.8841845140032949, + "grad_norm": 0.41657403111457825, + "learning_rate": 1.1833324535638568e-05, + "loss": 0.3977, + "step": 32202 + }, + { + "epoch": 0.8842119714442614, + "grad_norm": 0.36668258905410767, + "learning_rate": 1.1832899960694242e-05, + "loss": 0.4848, + "step": 32203 + }, + { + "epoch": 0.8842394288852279, + "grad_norm": 0.3804365396499634, + "learning_rate": 1.1832475382330971e-05, + "loss": 0.4361, + "step": 32204 + }, + { + "epoch": 0.8842668863261944, + "grad_norm": 0.4318682849407196, + "learning_rate": 1.1832050800549549e-05, + "loss": 0.4455, + "step": 32205 + }, + { + "epoch": 0.8842943437671609, + "grad_norm": 0.411655068397522, + "learning_rate": 1.1831626215350766e-05, + "loss": 0.4367, + "step": 32206 + }, + { + "epoch": 0.8843218012081274, + "grad_norm": 0.39048585295677185, + "learning_rate": 1.1831201626735418e-05, + "loss": 0.3904, + "step": 32207 + }, + { + "epoch": 0.8843492586490939, + "grad_norm": 0.33986759185791016, + "learning_rate": 1.1830777034704294e-05, + "loss": 0.4661, + "step": 32208 + }, + { + "epoch": 0.8843767160900604, + "grad_norm": 0.39313238859176636, + "learning_rate": 1.1830352439258186e-05, + "loss": 0.4511, + "step": 32209 + }, + { + "epoch": 0.884404173531027, + "grad_norm": 0.4647328555583954, + "learning_rate": 1.1829927840397888e-05, + "loss": 0.4766, + "step": 32210 + }, + { + "epoch": 0.8844316309719934, + "grad_norm": 0.44786790013313293, + "learning_rate": 1.1829503238124188e-05, + "loss": 0.4907, + "step": 32211 + }, + { + "epoch": 0.88445908841296, + "grad_norm": 0.4534197747707367, + "learning_rate": 1.1829078632437883e-05, + "loss": 0.5093, + "step": 32212 + }, + { + "epoch": 0.8844865458539264, + "grad_norm": 0.4146195352077484, + "learning_rate": 1.1828654023339762e-05, + "loss": 0.5246, + "step": 32213 + }, + { + "epoch": 0.8845140032948929, + "grad_norm": 0.3602127730846405, + "learning_rate": 1.1828229410830616e-05, + "loss": 0.4229, + "step": 32214 + }, + { + "epoch": 0.8845414607358594, + "grad_norm": 0.3959024250507355, + "learning_rate": 1.1827804794911239e-05, + "loss": 0.4203, + "step": 32215 + }, + { + "epoch": 0.8845689181768259, + "grad_norm": 0.38194435834884644, + "learning_rate": 1.1827380175582423e-05, + "loss": 0.5425, + "step": 32216 + }, + { + "epoch": 0.8845963756177925, + "grad_norm": 0.4104350209236145, + "learning_rate": 1.1826955552844961e-05, + "loss": 0.5609, + "step": 32217 + }, + { + "epoch": 0.8846238330587589, + "grad_norm": 0.4221123158931732, + "learning_rate": 1.1826530926699645e-05, + "loss": 0.4843, + "step": 32218 + }, + { + "epoch": 0.8846512904997255, + "grad_norm": 0.35375499725341797, + "learning_rate": 1.1826106297147262e-05, + "loss": 0.4644, + "step": 32219 + }, + { + "epoch": 0.8846787479406919, + "grad_norm": 0.372004896402359, + "learning_rate": 1.1825681664188611e-05, + "loss": 0.4379, + "step": 32220 + }, + { + "epoch": 0.8847062053816585, + "grad_norm": 0.463944673538208, + "learning_rate": 1.1825257027824481e-05, + "loss": 0.4815, + "step": 32221 + }, + { + "epoch": 0.8847336628226249, + "grad_norm": 0.31470710039138794, + "learning_rate": 1.1824832388055661e-05, + "loss": 0.4445, + "step": 32222 + }, + { + "epoch": 0.8847611202635914, + "grad_norm": 0.3491107225418091, + "learning_rate": 1.1824407744882948e-05, + "loss": 0.487, + "step": 32223 + }, + { + "epoch": 0.884788577704558, + "grad_norm": 0.39206451177597046, + "learning_rate": 1.1823983098307133e-05, + "loss": 0.5246, + "step": 32224 + }, + { + "epoch": 0.8848160351455244, + "grad_norm": 0.44791409373283386, + "learning_rate": 1.1823558448329006e-05, + "loss": 0.5721, + "step": 32225 + }, + { + "epoch": 0.884843492586491, + "grad_norm": 0.3837636411190033, + "learning_rate": 1.1823133794949361e-05, + "loss": 0.5349, + "step": 32226 + }, + { + "epoch": 0.8848709500274574, + "grad_norm": 0.3785567581653595, + "learning_rate": 1.1822709138168991e-05, + "loss": 0.4714, + "step": 32227 + }, + { + "epoch": 0.884898407468424, + "grad_norm": 0.38533204793930054, + "learning_rate": 1.1822284477988686e-05, + "loss": 0.4955, + "step": 32228 + }, + { + "epoch": 0.8849258649093904, + "grad_norm": 0.3779972195625305, + "learning_rate": 1.1821859814409238e-05, + "loss": 0.4595, + "step": 32229 + }, + { + "epoch": 0.884953322350357, + "grad_norm": 0.43550172448158264, + "learning_rate": 1.1821435147431438e-05, + "loss": 0.5219, + "step": 32230 + }, + { + "epoch": 0.8849807797913235, + "grad_norm": 0.40131136775016785, + "learning_rate": 1.1821010477056084e-05, + "loss": 0.451, + "step": 32231 + }, + { + "epoch": 0.8850082372322899, + "grad_norm": 0.4214985966682434, + "learning_rate": 1.1820585803283962e-05, + "loss": 0.4996, + "step": 32232 + }, + { + "epoch": 0.8850356946732565, + "grad_norm": 0.37786179780960083, + "learning_rate": 1.1820161126115868e-05, + "loss": 0.55, + "step": 32233 + }, + { + "epoch": 0.8850631521142229, + "grad_norm": 0.3775619566440582, + "learning_rate": 1.1819736445552591e-05, + "loss": 0.4858, + "step": 32234 + }, + { + "epoch": 0.8850906095551895, + "grad_norm": 0.4334871768951416, + "learning_rate": 1.1819311761594927e-05, + "loss": 0.5804, + "step": 32235 + }, + { + "epoch": 0.8851180669961559, + "grad_norm": 0.4062921106815338, + "learning_rate": 1.1818887074243665e-05, + "loss": 0.4861, + "step": 32236 + }, + { + "epoch": 0.8851455244371225, + "grad_norm": 0.4451112747192383, + "learning_rate": 1.1818462383499596e-05, + "loss": 0.4682, + "step": 32237 + }, + { + "epoch": 0.885172981878089, + "grad_norm": 0.3192233443260193, + "learning_rate": 1.1818037689363514e-05, + "loss": 0.5007, + "step": 32238 + }, + { + "epoch": 0.8852004393190555, + "grad_norm": 0.38611313700675964, + "learning_rate": 1.1817612991836216e-05, + "loss": 0.4691, + "step": 32239 + }, + { + "epoch": 0.885227896760022, + "grad_norm": 0.3647097647190094, + "learning_rate": 1.1817188290918487e-05, + "loss": 0.553, + "step": 32240 + }, + { + "epoch": 0.8852553542009884, + "grad_norm": 0.371381551027298, + "learning_rate": 1.1816763586611121e-05, + "loss": 0.5702, + "step": 32241 + }, + { + "epoch": 0.885282811641955, + "grad_norm": 0.48543012142181396, + "learning_rate": 1.181633887891491e-05, + "loss": 0.5819, + "step": 32242 + }, + { + "epoch": 0.8853102690829214, + "grad_norm": 0.4578612148761749, + "learning_rate": 1.181591416783065e-05, + "loss": 0.4723, + "step": 32243 + }, + { + "epoch": 0.885337726523888, + "grad_norm": 0.3944311738014221, + "learning_rate": 1.181548945335913e-05, + "loss": 0.4929, + "step": 32244 + }, + { + "epoch": 0.8853651839648545, + "grad_norm": 0.35601234436035156, + "learning_rate": 1.1815064735501142e-05, + "loss": 0.4683, + "step": 32245 + }, + { + "epoch": 0.885392641405821, + "grad_norm": 0.40630781650543213, + "learning_rate": 1.1814640014257479e-05, + "loss": 0.4764, + "step": 32246 + }, + { + "epoch": 0.8854200988467875, + "grad_norm": 0.4339616000652313, + "learning_rate": 1.1814215289628934e-05, + "loss": 0.5842, + "step": 32247 + }, + { + "epoch": 0.885447556287754, + "grad_norm": 0.4239129424095154, + "learning_rate": 1.1813790561616298e-05, + "loss": 0.524, + "step": 32248 + }, + { + "epoch": 0.8854750137287205, + "grad_norm": 0.4122961759567261, + "learning_rate": 1.1813365830220365e-05, + "loss": 0.4637, + "step": 32249 + }, + { + "epoch": 0.8855024711696869, + "grad_norm": 0.3633846044540405, + "learning_rate": 1.1812941095441923e-05, + "loss": 0.501, + "step": 32250 + }, + { + "epoch": 0.8855299286106535, + "grad_norm": 0.38852912187576294, + "learning_rate": 1.181251635728177e-05, + "loss": 0.4841, + "step": 32251 + }, + { + "epoch": 0.88555738605162, + "grad_norm": 0.3883962035179138, + "learning_rate": 1.1812091615740694e-05, + "loss": 0.5001, + "step": 32252 + }, + { + "epoch": 0.8855848434925865, + "grad_norm": 0.3708323538303375, + "learning_rate": 1.1811666870819489e-05, + "loss": 0.4761, + "step": 32253 + }, + { + "epoch": 0.885612300933553, + "grad_norm": 0.37440213561058044, + "learning_rate": 1.1811242122518946e-05, + "loss": 0.4884, + "step": 32254 + }, + { + "epoch": 0.8856397583745195, + "grad_norm": 0.374057412147522, + "learning_rate": 1.181081737083986e-05, + "loss": 0.466, + "step": 32255 + }, + { + "epoch": 0.885667215815486, + "grad_norm": 0.41832435131073, + "learning_rate": 1.1810392615783021e-05, + "loss": 0.5287, + "step": 32256 + }, + { + "epoch": 0.8856946732564525, + "grad_norm": 0.44175073504447937, + "learning_rate": 1.1809967857349223e-05, + "loss": 0.5712, + "step": 32257 + }, + { + "epoch": 0.885722130697419, + "grad_norm": 0.3755776882171631, + "learning_rate": 1.1809543095539255e-05, + "loss": 0.4574, + "step": 32258 + }, + { + "epoch": 0.8857495881383856, + "grad_norm": 0.34833279252052307, + "learning_rate": 1.1809118330353914e-05, + "loss": 0.4279, + "step": 32259 + }, + { + "epoch": 0.885777045579352, + "grad_norm": 0.4183677136898041, + "learning_rate": 1.1808693561793987e-05, + "loss": 0.5081, + "step": 32260 + }, + { + "epoch": 0.8858045030203185, + "grad_norm": 0.3653058111667633, + "learning_rate": 1.1808268789860274e-05, + "loss": 0.4472, + "step": 32261 + }, + { + "epoch": 0.885831960461285, + "grad_norm": 0.42404279112815857, + "learning_rate": 1.180784401455356e-05, + "loss": 0.5389, + "step": 32262 + }, + { + "epoch": 0.8858594179022515, + "grad_norm": 0.3922504782676697, + "learning_rate": 1.1807419235874637e-05, + "loss": 0.4702, + "step": 32263 + }, + { + "epoch": 0.885886875343218, + "grad_norm": 0.46520131826400757, + "learning_rate": 1.1806994453824306e-05, + "loss": 0.4426, + "step": 32264 + }, + { + "epoch": 0.8859143327841845, + "grad_norm": 0.39741405844688416, + "learning_rate": 1.180656966840335e-05, + "loss": 0.4639, + "step": 32265 + }, + { + "epoch": 0.8859417902251511, + "grad_norm": 0.3872389793395996, + "learning_rate": 1.1806144879612564e-05, + "loss": 0.5258, + "step": 32266 + }, + { + "epoch": 0.8859692476661175, + "grad_norm": 0.43221431970596313, + "learning_rate": 1.1805720087452743e-05, + "loss": 0.5147, + "step": 32267 + }, + { + "epoch": 0.8859967051070841, + "grad_norm": 0.37292519211769104, + "learning_rate": 1.1805295291924677e-05, + "loss": 0.4525, + "step": 32268 + }, + { + "epoch": 0.8860241625480505, + "grad_norm": 0.3899640142917633, + "learning_rate": 1.1804870493029161e-05, + "loss": 0.6363, + "step": 32269 + }, + { + "epoch": 0.886051619989017, + "grad_norm": 0.4001234471797943, + "learning_rate": 1.1804445690766983e-05, + "loss": 0.5041, + "step": 32270 + }, + { + "epoch": 0.8860790774299835, + "grad_norm": 0.3919003903865814, + "learning_rate": 1.1804020885138937e-05, + "loss": 0.5335, + "step": 32271 + }, + { + "epoch": 0.88610653487095, + "grad_norm": 0.4145071804523468, + "learning_rate": 1.180359607614582e-05, + "loss": 0.4394, + "step": 32272 + }, + { + "epoch": 0.8861339923119165, + "grad_norm": 0.39130929112434387, + "learning_rate": 1.1803171263788418e-05, + "loss": 0.424, + "step": 32273 + }, + { + "epoch": 0.886161449752883, + "grad_norm": 0.3803912401199341, + "learning_rate": 1.1802746448067528e-05, + "loss": 0.4773, + "step": 32274 + }, + { + "epoch": 0.8861889071938496, + "grad_norm": 0.346413791179657, + "learning_rate": 1.1802321628983942e-05, + "loss": 0.477, + "step": 32275 + }, + { + "epoch": 0.886216364634816, + "grad_norm": 0.41806361079216003, + "learning_rate": 1.1801896806538447e-05, + "loss": 0.4713, + "step": 32276 + }, + { + "epoch": 0.8862438220757826, + "grad_norm": 0.3451319634914398, + "learning_rate": 1.180147198073184e-05, + "loss": 0.4306, + "step": 32277 + }, + { + "epoch": 0.886271279516749, + "grad_norm": 0.38805362582206726, + "learning_rate": 1.1801047151564912e-05, + "loss": 0.6115, + "step": 32278 + }, + { + "epoch": 0.8862987369577155, + "grad_norm": 0.3937864303588867, + "learning_rate": 1.180062231903846e-05, + "loss": 0.5451, + "step": 32279 + }, + { + "epoch": 0.886326194398682, + "grad_norm": 0.41906753182411194, + "learning_rate": 1.1800197483153271e-05, + "loss": 0.5249, + "step": 32280 + }, + { + "epoch": 0.8863536518396485, + "grad_norm": 0.3743286430835724, + "learning_rate": 1.1799772643910137e-05, + "loss": 0.4593, + "step": 32281 + }, + { + "epoch": 0.8863811092806151, + "grad_norm": 0.4307684600353241, + "learning_rate": 1.1799347801309856e-05, + "loss": 0.4923, + "step": 32282 + }, + { + "epoch": 0.8864085667215815, + "grad_norm": 0.3971366882324219, + "learning_rate": 1.1798922955353216e-05, + "loss": 0.4684, + "step": 32283 + }, + { + "epoch": 0.8864360241625481, + "grad_norm": 0.4436178207397461, + "learning_rate": 1.1798498106041007e-05, + "loss": 0.4828, + "step": 32284 + }, + { + "epoch": 0.8864634816035145, + "grad_norm": 0.40856921672821045, + "learning_rate": 1.1798073253374032e-05, + "loss": 0.4458, + "step": 32285 + }, + { + "epoch": 0.8864909390444811, + "grad_norm": 0.38338544964790344, + "learning_rate": 1.179764839735307e-05, + "loss": 0.4774, + "step": 32286 + }, + { + "epoch": 0.8865183964854475, + "grad_norm": 0.4247235059738159, + "learning_rate": 1.1797223537978923e-05, + "loss": 0.4788, + "step": 32287 + }, + { + "epoch": 0.886545853926414, + "grad_norm": 1.4680722951889038, + "learning_rate": 1.1796798675252382e-05, + "loss": 0.4618, + "step": 32288 + }, + { + "epoch": 0.8865733113673806, + "grad_norm": 0.3966628611087799, + "learning_rate": 1.1796373809174233e-05, + "loss": 0.4407, + "step": 32289 + }, + { + "epoch": 0.886600768808347, + "grad_norm": 0.4033275842666626, + "learning_rate": 1.1795948939745277e-05, + "loss": 0.5064, + "step": 32290 + }, + { + "epoch": 0.8866282262493136, + "grad_norm": 0.3833080530166626, + "learning_rate": 1.1795524066966303e-05, + "loss": 0.5277, + "step": 32291 + }, + { + "epoch": 0.88665568369028, + "grad_norm": 0.4392809569835663, + "learning_rate": 1.1795099190838102e-05, + "loss": 0.4858, + "step": 32292 + }, + { + "epoch": 0.8866831411312466, + "grad_norm": 0.3940228819847107, + "learning_rate": 1.1794674311361471e-05, + "loss": 0.5114, + "step": 32293 + }, + { + "epoch": 0.886710598572213, + "grad_norm": 0.44981974363327026, + "learning_rate": 1.1794249428537197e-05, + "loss": 0.5166, + "step": 32294 + }, + { + "epoch": 0.8867380560131796, + "grad_norm": 0.4200170636177063, + "learning_rate": 1.1793824542366075e-05, + "loss": 0.5414, + "step": 32295 + }, + { + "epoch": 0.8867655134541461, + "grad_norm": 0.43664661049842834, + "learning_rate": 1.1793399652848901e-05, + "loss": 0.5494, + "step": 32296 + }, + { + "epoch": 0.8867929708951126, + "grad_norm": 0.3656286299228668, + "learning_rate": 1.179297475998646e-05, + "loss": 0.4719, + "step": 32297 + }, + { + "epoch": 0.8868204283360791, + "grad_norm": 0.3872431218624115, + "learning_rate": 1.179254986377955e-05, + "loss": 0.4738, + "step": 32298 + }, + { + "epoch": 0.8868478857770455, + "grad_norm": 0.38341832160949707, + "learning_rate": 1.1792124964228964e-05, + "loss": 0.4458, + "step": 32299 + }, + { + "epoch": 0.8868753432180121, + "grad_norm": 0.3539150655269623, + "learning_rate": 1.1791700061335491e-05, + "loss": 0.4317, + "step": 32300 + }, + { + "epoch": 0.8869028006589785, + "grad_norm": 0.4034280776977539, + "learning_rate": 1.1791275155099929e-05, + "loss": 0.4997, + "step": 32301 + }, + { + "epoch": 0.8869302580999451, + "grad_norm": 0.43483179807662964, + "learning_rate": 1.1790850245523063e-05, + "loss": 0.5214, + "step": 32302 + }, + { + "epoch": 0.8869577155409116, + "grad_norm": 0.3797767460346222, + "learning_rate": 1.1790425332605691e-05, + "loss": 0.4729, + "step": 32303 + }, + { + "epoch": 0.8869851729818781, + "grad_norm": 0.373826801776886, + "learning_rate": 1.1790000416348604e-05, + "loss": 0.4954, + "step": 32304 + }, + { + "epoch": 0.8870126304228446, + "grad_norm": 0.4002823829650879, + "learning_rate": 1.1789575496752597e-05, + "loss": 0.5375, + "step": 32305 + }, + { + "epoch": 0.887040087863811, + "grad_norm": 0.39299434423446655, + "learning_rate": 1.1789150573818459e-05, + "loss": 0.5001, + "step": 32306 + }, + { + "epoch": 0.8870675453047776, + "grad_norm": 0.39473775029182434, + "learning_rate": 1.1788725647546984e-05, + "loss": 0.4947, + "step": 32307 + }, + { + "epoch": 0.887095002745744, + "grad_norm": 0.42034900188446045, + "learning_rate": 1.1788300717938965e-05, + "loss": 0.5281, + "step": 32308 + }, + { + "epoch": 0.8871224601867106, + "grad_norm": 0.3666178584098816, + "learning_rate": 1.1787875784995193e-05, + "loss": 0.4568, + "step": 32309 + }, + { + "epoch": 0.8871499176276771, + "grad_norm": 0.3575306534767151, + "learning_rate": 1.1787450848716464e-05, + "loss": 0.4807, + "step": 32310 + }, + { + "epoch": 0.8871773750686436, + "grad_norm": 0.3794727027416229, + "learning_rate": 1.1787025909103568e-05, + "loss": 0.5429, + "step": 32311 + }, + { + "epoch": 0.8872048325096101, + "grad_norm": 0.42677199840545654, + "learning_rate": 1.1786600966157298e-05, + "loss": 0.5397, + "step": 32312 + }, + { + "epoch": 0.8872322899505766, + "grad_norm": 0.3842495381832123, + "learning_rate": 1.1786176019878449e-05, + "loss": 0.4824, + "step": 32313 + }, + { + "epoch": 0.8872597473915431, + "grad_norm": 0.41714659333229065, + "learning_rate": 1.178575107026781e-05, + "loss": 0.4555, + "step": 32314 + }, + { + "epoch": 0.8872872048325096, + "grad_norm": 0.35265809297561646, + "learning_rate": 1.1785326117326174e-05, + "loss": 0.4254, + "step": 32315 + }, + { + "epoch": 0.8873146622734761, + "grad_norm": 0.4360281527042389, + "learning_rate": 1.1784901161054339e-05, + "loss": 0.5909, + "step": 32316 + }, + { + "epoch": 0.8873421197144427, + "grad_norm": 0.596787691116333, + "learning_rate": 1.178447620145309e-05, + "loss": 0.5033, + "step": 32317 + }, + { + "epoch": 0.8873695771554091, + "grad_norm": 0.44819721579551697, + "learning_rate": 1.1784051238523225e-05, + "loss": 0.5593, + "step": 32318 + }, + { + "epoch": 0.8873970345963756, + "grad_norm": 0.36871597170829773, + "learning_rate": 1.1783626272265534e-05, + "loss": 0.4819, + "step": 32319 + }, + { + "epoch": 0.8874244920373421, + "grad_norm": 0.3932063579559326, + "learning_rate": 1.1783201302680812e-05, + "loss": 0.4944, + "step": 32320 + }, + { + "epoch": 0.8874519494783086, + "grad_norm": 0.4390850067138672, + "learning_rate": 1.178277632976985e-05, + "loss": 0.4879, + "step": 32321 + }, + { + "epoch": 0.8874794069192751, + "grad_norm": 0.41895365715026855, + "learning_rate": 1.178235135353344e-05, + "loss": 0.5454, + "step": 32322 + }, + { + "epoch": 0.8875068643602416, + "grad_norm": 0.35101598501205444, + "learning_rate": 1.1781926373972377e-05, + "loss": 0.4813, + "step": 32323 + }, + { + "epoch": 0.8875343218012082, + "grad_norm": 0.43347662687301636, + "learning_rate": 1.1781501391087451e-05, + "loss": 0.5112, + "step": 32324 + }, + { + "epoch": 0.8875617792421746, + "grad_norm": 0.45791372656822205, + "learning_rate": 1.1781076404879457e-05, + "loss": 0.5788, + "step": 32325 + }, + { + "epoch": 0.8875892366831412, + "grad_norm": 0.3925175368785858, + "learning_rate": 1.1780651415349189e-05, + "loss": 0.529, + "step": 32326 + }, + { + "epoch": 0.8876166941241076, + "grad_norm": 0.41011446714401245, + "learning_rate": 1.1780226422497436e-05, + "loss": 0.5168, + "step": 32327 + }, + { + "epoch": 0.8876441515650741, + "grad_norm": 0.42135438323020935, + "learning_rate": 1.1779801426324992e-05, + "loss": 0.4773, + "step": 32328 + }, + { + "epoch": 0.8876716090060406, + "grad_norm": 0.44236207008361816, + "learning_rate": 1.1779376426832649e-05, + "loss": 0.5122, + "step": 32329 + }, + { + "epoch": 0.8876990664470071, + "grad_norm": 0.36410778760910034, + "learning_rate": 1.1778951424021205e-05, + "loss": 0.4803, + "step": 32330 + }, + { + "epoch": 0.8877265238879737, + "grad_norm": 0.38060396909713745, + "learning_rate": 1.177852641789145e-05, + "loss": 0.5327, + "step": 32331 + }, + { + "epoch": 0.8877539813289401, + "grad_norm": 0.4074549973011017, + "learning_rate": 1.1778101408444169e-05, + "loss": 0.4457, + "step": 32332 + }, + { + "epoch": 0.8877814387699067, + "grad_norm": 0.4042617082595825, + "learning_rate": 1.1777676395680166e-05, + "loss": 0.5639, + "step": 32333 + }, + { + "epoch": 0.8878088962108731, + "grad_norm": 0.3944231867790222, + "learning_rate": 1.1777251379600227e-05, + "loss": 0.5142, + "step": 32334 + }, + { + "epoch": 0.8878363536518397, + "grad_norm": 0.347436785697937, + "learning_rate": 1.1776826360205148e-05, + "loss": 0.4646, + "step": 32335 + }, + { + "epoch": 0.8878638110928061, + "grad_norm": 0.3979927599430084, + "learning_rate": 1.1776401337495725e-05, + "loss": 0.5431, + "step": 32336 + }, + { + "epoch": 0.8878912685337726, + "grad_norm": 0.4092268943786621, + "learning_rate": 1.1775976311472739e-05, + "loss": 0.5415, + "step": 32337 + }, + { + "epoch": 0.8879187259747392, + "grad_norm": 0.456389844417572, + "learning_rate": 1.1775551282136997e-05, + "loss": 0.4889, + "step": 32338 + }, + { + "epoch": 0.8879461834157056, + "grad_norm": 0.4914087653160095, + "learning_rate": 1.177512624948928e-05, + "loss": 0.4732, + "step": 32339 + }, + { + "epoch": 0.8879736408566722, + "grad_norm": 0.3956902325153351, + "learning_rate": 1.1774701213530388e-05, + "loss": 0.3794, + "step": 32340 + }, + { + "epoch": 0.8880010982976386, + "grad_norm": 0.5093299150466919, + "learning_rate": 1.1774276174261113e-05, + "loss": 0.4938, + "step": 32341 + }, + { + "epoch": 0.8880285557386052, + "grad_norm": 0.48273202776908875, + "learning_rate": 1.1773851131682244e-05, + "loss": 0.4972, + "step": 32342 + }, + { + "epoch": 0.8880560131795716, + "grad_norm": 0.38012975454330444, + "learning_rate": 1.1773426085794579e-05, + "loss": 0.4958, + "step": 32343 + }, + { + "epoch": 0.8880834706205382, + "grad_norm": 0.4162265956401825, + "learning_rate": 1.1773001036598906e-05, + "loss": 0.5059, + "step": 32344 + }, + { + "epoch": 0.8881109280615047, + "grad_norm": 0.42823150753974915, + "learning_rate": 1.1772575984096022e-05, + "loss": 0.5195, + "step": 32345 + }, + { + "epoch": 0.8881383855024712, + "grad_norm": 0.4016713798046112, + "learning_rate": 1.1772150928286716e-05, + "loss": 0.469, + "step": 32346 + }, + { + "epoch": 0.8881658429434377, + "grad_norm": 0.4203815758228302, + "learning_rate": 1.1771725869171786e-05, + "loss": 0.5084, + "step": 32347 + }, + { + "epoch": 0.8881933003844041, + "grad_norm": 0.35750606656074524, + "learning_rate": 1.177130080675202e-05, + "loss": 0.4488, + "step": 32348 + }, + { + "epoch": 0.8882207578253707, + "grad_norm": 0.34417724609375, + "learning_rate": 1.1770875741028212e-05, + "loss": 0.499, + "step": 32349 + }, + { + "epoch": 0.8882482152663371, + "grad_norm": 0.5740039944648743, + "learning_rate": 1.1770450672001156e-05, + "loss": 0.5557, + "step": 32350 + }, + { + "epoch": 0.8882756727073037, + "grad_norm": 0.39526838064193726, + "learning_rate": 1.1770025599671643e-05, + "loss": 0.4459, + "step": 32351 + }, + { + "epoch": 0.8883031301482702, + "grad_norm": 0.3914460241794586, + "learning_rate": 1.1769600524040472e-05, + "loss": 0.5408, + "step": 32352 + }, + { + "epoch": 0.8883305875892367, + "grad_norm": 0.4597156047821045, + "learning_rate": 1.176917544510843e-05, + "loss": 0.5075, + "step": 32353 + }, + { + "epoch": 0.8883580450302032, + "grad_norm": 0.4138011336326599, + "learning_rate": 1.1768750362876307e-05, + "loss": 0.499, + "step": 32354 + }, + { + "epoch": 0.8883855024711697, + "grad_norm": 0.37115558981895447, + "learning_rate": 1.1768325277344903e-05, + "loss": 0.4782, + "step": 32355 + }, + { + "epoch": 0.8884129599121362, + "grad_norm": 0.8096694946289062, + "learning_rate": 1.1767900188515007e-05, + "loss": 0.4704, + "step": 32356 + }, + { + "epoch": 0.8884404173531026, + "grad_norm": 0.38582080602645874, + "learning_rate": 1.1767475096387413e-05, + "loss": 0.4534, + "step": 32357 + }, + { + "epoch": 0.8884678747940692, + "grad_norm": 0.4118908643722534, + "learning_rate": 1.1767050000962917e-05, + "loss": 0.463, + "step": 32358 + }, + { + "epoch": 0.8884953322350357, + "grad_norm": 0.4188452959060669, + "learning_rate": 1.1766624902242303e-05, + "loss": 0.5489, + "step": 32359 + }, + { + "epoch": 0.8885227896760022, + "grad_norm": 0.385082870721817, + "learning_rate": 1.1766199800226374e-05, + "loss": 0.5048, + "step": 32360 + }, + { + "epoch": 0.8885502471169687, + "grad_norm": 0.40431004762649536, + "learning_rate": 1.1765774694915917e-05, + "loss": 0.5361, + "step": 32361 + }, + { + "epoch": 0.8885777045579352, + "grad_norm": 0.38908153772354126, + "learning_rate": 1.1765349586311727e-05, + "loss": 0.5182, + "step": 32362 + }, + { + "epoch": 0.8886051619989017, + "grad_norm": 0.3730350136756897, + "learning_rate": 1.17649244744146e-05, + "loss": 0.4621, + "step": 32363 + }, + { + "epoch": 0.8886326194398682, + "grad_norm": 0.4007670283317566, + "learning_rate": 1.1764499359225319e-05, + "loss": 0.4809, + "step": 32364 + }, + { + "epoch": 0.8886600768808347, + "grad_norm": 0.4085747003555298, + "learning_rate": 1.1764074240744687e-05, + "loss": 0.5064, + "step": 32365 + }, + { + "epoch": 0.8886875343218013, + "grad_norm": 0.4137759506702423, + "learning_rate": 1.1763649118973493e-05, + "loss": 0.492, + "step": 32366 + }, + { + "epoch": 0.8887149917627677, + "grad_norm": 0.44986778497695923, + "learning_rate": 1.1763223993912532e-05, + "loss": 0.4696, + "step": 32367 + }, + { + "epoch": 0.8887424492037342, + "grad_norm": 0.39771515130996704, + "learning_rate": 1.1762798865562594e-05, + "loss": 0.5037, + "step": 32368 + }, + { + "epoch": 0.8887699066447007, + "grad_norm": 0.37728968262672424, + "learning_rate": 1.1762373733924473e-05, + "loss": 0.4566, + "step": 32369 + }, + { + "epoch": 0.8887973640856672, + "grad_norm": 0.41342806816101074, + "learning_rate": 1.1761948598998964e-05, + "loss": 0.5826, + "step": 32370 + }, + { + "epoch": 0.8888248215266337, + "grad_norm": 0.40776675939559937, + "learning_rate": 1.1761523460786857e-05, + "loss": 0.5533, + "step": 32371 + }, + { + "epoch": 0.8888522789676002, + "grad_norm": 0.4128161072731018, + "learning_rate": 1.1761098319288945e-05, + "loss": 0.4897, + "step": 32372 + }, + { + "epoch": 0.8888797364085668, + "grad_norm": 0.3733905851840973, + "learning_rate": 1.1760673174506028e-05, + "loss": 0.497, + "step": 32373 + }, + { + "epoch": 0.8889071938495332, + "grad_norm": 0.3621194064617157, + "learning_rate": 1.176024802643889e-05, + "loss": 0.537, + "step": 32374 + }, + { + "epoch": 0.8889346512904998, + "grad_norm": 0.38264036178588867, + "learning_rate": 1.1759822875088327e-05, + "loss": 0.4505, + "step": 32375 + }, + { + "epoch": 0.8889621087314662, + "grad_norm": 0.42262452840805054, + "learning_rate": 1.1759397720455135e-05, + "loss": 0.5588, + "step": 32376 + }, + { + "epoch": 0.8889895661724327, + "grad_norm": 0.3561718165874481, + "learning_rate": 1.1758972562540104e-05, + "loss": 0.4528, + "step": 32377 + }, + { + "epoch": 0.8890170236133992, + "grad_norm": 0.3670588433742523, + "learning_rate": 1.1758547401344027e-05, + "loss": 0.511, + "step": 32378 + }, + { + "epoch": 0.8890444810543657, + "grad_norm": 0.41049832105636597, + "learning_rate": 1.1758122236867697e-05, + "loss": 0.4169, + "step": 32379 + }, + { + "epoch": 0.8890719384953323, + "grad_norm": 0.47716331481933594, + "learning_rate": 1.1757697069111908e-05, + "loss": 0.5833, + "step": 32380 + }, + { + "epoch": 0.8890993959362987, + "grad_norm": 0.41590675711631775, + "learning_rate": 1.1757271898077456e-05, + "loss": 0.545, + "step": 32381 + }, + { + "epoch": 0.8891268533772653, + "grad_norm": 0.4433037340641022, + "learning_rate": 1.1756846723765127e-05, + "loss": 0.569, + "step": 32382 + }, + { + "epoch": 0.8891543108182317, + "grad_norm": 0.3423425853252411, + "learning_rate": 1.1756421546175722e-05, + "loss": 0.4544, + "step": 32383 + }, + { + "epoch": 0.8891817682591983, + "grad_norm": 0.4555296003818512, + "learning_rate": 1.1755996365310028e-05, + "loss": 0.531, + "step": 32384 + }, + { + "epoch": 0.8892092257001647, + "grad_norm": 0.44451913237571716, + "learning_rate": 1.1755571181168842e-05, + "loss": 0.6125, + "step": 32385 + }, + { + "epoch": 0.8892366831411312, + "grad_norm": 0.38836273550987244, + "learning_rate": 1.1755145993752954e-05, + "loss": 0.5076, + "step": 32386 + }, + { + "epoch": 0.8892641405820978, + "grad_norm": 0.41401177644729614, + "learning_rate": 1.1754720803063158e-05, + "loss": 0.4525, + "step": 32387 + }, + { + "epoch": 0.8892915980230642, + "grad_norm": 0.439525842666626, + "learning_rate": 1.1754295609100249e-05, + "loss": 0.6742, + "step": 32388 + }, + { + "epoch": 0.8893190554640308, + "grad_norm": 0.3925476372241974, + "learning_rate": 1.1753870411865016e-05, + "loss": 0.454, + "step": 32389 + }, + { + "epoch": 0.8893465129049972, + "grad_norm": 0.394071489572525, + "learning_rate": 1.1753445211358259e-05, + "loss": 0.4827, + "step": 32390 + }, + { + "epoch": 0.8893739703459638, + "grad_norm": 0.3309151828289032, + "learning_rate": 1.1753020007580764e-05, + "loss": 0.4259, + "step": 32391 + }, + { + "epoch": 0.8894014277869302, + "grad_norm": 0.3527893126010895, + "learning_rate": 1.1752594800533327e-05, + "loss": 0.4615, + "step": 32392 + }, + { + "epoch": 0.8894288852278968, + "grad_norm": 0.32082241773605347, + "learning_rate": 1.1752169590216745e-05, + "loss": 0.4563, + "step": 32393 + }, + { + "epoch": 0.8894563426688633, + "grad_norm": 0.4568484127521515, + "learning_rate": 1.1751744376631806e-05, + "loss": 0.5384, + "step": 32394 + }, + { + "epoch": 0.8894838001098297, + "grad_norm": 0.392188161611557, + "learning_rate": 1.1751319159779302e-05, + "loss": 0.5142, + "step": 32395 + }, + { + "epoch": 0.8895112575507963, + "grad_norm": 0.39252617955207825, + "learning_rate": 1.175089393966003e-05, + "loss": 0.5056, + "step": 32396 + }, + { + "epoch": 0.8895387149917627, + "grad_norm": 0.3725234270095825, + "learning_rate": 1.1750468716274782e-05, + "loss": 0.5134, + "step": 32397 + }, + { + "epoch": 0.8895661724327293, + "grad_norm": 0.3647273778915405, + "learning_rate": 1.1750043489624351e-05, + "loss": 0.414, + "step": 32398 + }, + { + "epoch": 0.8895936298736957, + "grad_norm": 0.4010416567325592, + "learning_rate": 1.1749618259709534e-05, + "loss": 0.5629, + "step": 32399 + }, + { + "epoch": 0.8896210873146623, + "grad_norm": 0.39836594462394714, + "learning_rate": 1.1749193026531117e-05, + "loss": 0.4913, + "step": 32400 + }, + { + "epoch": 0.8896485447556288, + "grad_norm": 0.41681885719299316, + "learning_rate": 1.1748767790089897e-05, + "loss": 0.4631, + "step": 32401 + }, + { + "epoch": 0.8896760021965953, + "grad_norm": 0.38705354928970337, + "learning_rate": 1.1748342550386667e-05, + "loss": 0.4992, + "step": 32402 + }, + { + "epoch": 0.8897034596375618, + "grad_norm": 0.4186708927154541, + "learning_rate": 1.174791730742222e-05, + "loss": 0.4924, + "step": 32403 + }, + { + "epoch": 0.8897309170785282, + "grad_norm": 0.42460423707962036, + "learning_rate": 1.174749206119735e-05, + "loss": 0.4569, + "step": 32404 + }, + { + "epoch": 0.8897583745194948, + "grad_norm": 0.4063410758972168, + "learning_rate": 1.1747066811712849e-05, + "loss": 0.509, + "step": 32405 + }, + { + "epoch": 0.8897858319604612, + "grad_norm": 0.41847217082977295, + "learning_rate": 1.1746641558969511e-05, + "loss": 0.4928, + "step": 32406 + }, + { + "epoch": 0.8898132894014278, + "grad_norm": 0.41622909903526306, + "learning_rate": 1.174621630296813e-05, + "loss": 0.5318, + "step": 32407 + }, + { + "epoch": 0.8898407468423943, + "grad_norm": 0.3933540880680084, + "learning_rate": 1.1745791043709496e-05, + "loss": 0.4907, + "step": 32408 + }, + { + "epoch": 0.8898682042833608, + "grad_norm": 0.4651888310909271, + "learning_rate": 1.1745365781194406e-05, + "loss": 0.4883, + "step": 32409 + }, + { + "epoch": 0.8898956617243273, + "grad_norm": 0.4539794921875, + "learning_rate": 1.174494051542365e-05, + "loss": 0.6052, + "step": 32410 + }, + { + "epoch": 0.8899231191652938, + "grad_norm": 0.5250715613365173, + "learning_rate": 1.1744515246398027e-05, + "loss": 0.5026, + "step": 32411 + }, + { + "epoch": 0.8899505766062603, + "grad_norm": 0.384137362241745, + "learning_rate": 1.1744089974118325e-05, + "loss": 0.4773, + "step": 32412 + }, + { + "epoch": 0.8899780340472268, + "grad_norm": 0.4267968237400055, + "learning_rate": 1.1743664698585335e-05, + "loss": 0.4936, + "step": 32413 + }, + { + "epoch": 0.8900054914881933, + "grad_norm": 0.8176286816596985, + "learning_rate": 1.1743239419799859e-05, + "loss": 0.5174, + "step": 32414 + }, + { + "epoch": 0.8900329489291599, + "grad_norm": 0.39010506868362427, + "learning_rate": 1.1742814137762679e-05, + "loss": 0.4746, + "step": 32415 + }, + { + "epoch": 0.8900604063701263, + "grad_norm": 0.4035094678401947, + "learning_rate": 1.17423888524746e-05, + "loss": 0.4107, + "step": 32416 + }, + { + "epoch": 0.8900878638110928, + "grad_norm": 0.3504658043384552, + "learning_rate": 1.1741963563936407e-05, + "loss": 0.4646, + "step": 32417 + }, + { + "epoch": 0.8901153212520593, + "grad_norm": 0.39697760343551636, + "learning_rate": 1.1741538272148896e-05, + "loss": 0.5116, + "step": 32418 + }, + { + "epoch": 0.8901427786930258, + "grad_norm": 0.39515557885169983, + "learning_rate": 1.1741112977112862e-05, + "loss": 0.5875, + "step": 32419 + }, + { + "epoch": 0.8901702361339923, + "grad_norm": 0.50847989320755, + "learning_rate": 1.1740687678829094e-05, + "loss": 0.6076, + "step": 32420 + }, + { + "epoch": 0.8901976935749588, + "grad_norm": 0.4187418818473816, + "learning_rate": 1.174026237729839e-05, + "loss": 0.5485, + "step": 32421 + }, + { + "epoch": 0.8902251510159254, + "grad_norm": 0.38995087146759033, + "learning_rate": 1.173983707252154e-05, + "loss": 0.519, + "step": 32422 + }, + { + "epoch": 0.8902526084568918, + "grad_norm": 0.4382348358631134, + "learning_rate": 1.173941176449934e-05, + "loss": 0.4671, + "step": 32423 + }, + { + "epoch": 0.8902800658978584, + "grad_norm": 0.36309435963630676, + "learning_rate": 1.1738986453232582e-05, + "loss": 0.4533, + "step": 32424 + }, + { + "epoch": 0.8903075233388248, + "grad_norm": 0.3785346448421478, + "learning_rate": 1.1738561138722058e-05, + "loss": 0.4326, + "step": 32425 + }, + { + "epoch": 0.8903349807797913, + "grad_norm": 0.4069986045360565, + "learning_rate": 1.1738135820968562e-05, + "loss": 0.5193, + "step": 32426 + }, + { + "epoch": 0.8903624382207578, + "grad_norm": 0.40305399894714355, + "learning_rate": 1.173771049997289e-05, + "loss": 0.5261, + "step": 32427 + }, + { + "epoch": 0.8903898956617243, + "grad_norm": 0.41060611605644226, + "learning_rate": 1.173728517573583e-05, + "loss": 0.5317, + "step": 32428 + }, + { + "epoch": 0.8904173531026909, + "grad_norm": 0.351764976978302, + "learning_rate": 1.1736859848258183e-05, + "loss": 0.4578, + "step": 32429 + }, + { + "epoch": 0.8904448105436573, + "grad_norm": 0.699099600315094, + "learning_rate": 1.1736434517540735e-05, + "loss": 0.3935, + "step": 32430 + }, + { + "epoch": 0.8904722679846239, + "grad_norm": 0.40426191687583923, + "learning_rate": 1.1736009183584283e-05, + "loss": 0.4564, + "step": 32431 + }, + { + "epoch": 0.8904997254255903, + "grad_norm": 0.4306930601596832, + "learning_rate": 1.173558384638962e-05, + "loss": 0.5574, + "step": 32432 + }, + { + "epoch": 0.8905271828665569, + "grad_norm": 0.41826358437538147, + "learning_rate": 1.1735158505957537e-05, + "loss": 0.5673, + "step": 32433 + }, + { + "epoch": 0.8905546403075233, + "grad_norm": 0.3304488956928253, + "learning_rate": 1.1734733162288834e-05, + "loss": 0.4091, + "step": 32434 + }, + { + "epoch": 0.8905820977484898, + "grad_norm": 0.48887091875076294, + "learning_rate": 1.1734307815384298e-05, + "loss": 0.446, + "step": 32435 + }, + { + "epoch": 0.8906095551894564, + "grad_norm": 0.38786226511001587, + "learning_rate": 1.1733882465244724e-05, + "loss": 0.5353, + "step": 32436 + }, + { + "epoch": 0.8906370126304228, + "grad_norm": 0.37554997205734253, + "learning_rate": 1.1733457111870905e-05, + "loss": 0.5003, + "step": 32437 + }, + { + "epoch": 0.8906644700713894, + "grad_norm": 0.35488516092300415, + "learning_rate": 1.1733031755263637e-05, + "loss": 0.4615, + "step": 32438 + }, + { + "epoch": 0.8906919275123558, + "grad_norm": 0.3990750312805176, + "learning_rate": 1.1732606395423711e-05, + "loss": 0.5104, + "step": 32439 + }, + { + "epoch": 0.8907193849533224, + "grad_norm": 0.398849755525589, + "learning_rate": 1.1732181032351921e-05, + "loss": 0.4935, + "step": 32440 + }, + { + "epoch": 0.8907468423942888, + "grad_norm": 0.46101969480514526, + "learning_rate": 1.1731755666049058e-05, + "loss": 0.453, + "step": 32441 + }, + { + "epoch": 0.8907742998352554, + "grad_norm": 0.36402982473373413, + "learning_rate": 1.1731330296515924e-05, + "loss": 0.4695, + "step": 32442 + }, + { + "epoch": 0.8908017572762219, + "grad_norm": 0.4021591246128082, + "learning_rate": 1.1730904923753301e-05, + "loss": 0.5031, + "step": 32443 + }, + { + "epoch": 0.8908292147171883, + "grad_norm": 0.4736441373825073, + "learning_rate": 1.173047954776199e-05, + "loss": 0.4975, + "step": 32444 + }, + { + "epoch": 0.8908566721581549, + "grad_norm": 0.4110836386680603, + "learning_rate": 1.1730054168542784e-05, + "loss": 0.5047, + "step": 32445 + }, + { + "epoch": 0.8908841295991213, + "grad_norm": 0.3443698287010193, + "learning_rate": 1.1729628786096469e-05, + "loss": 0.4938, + "step": 32446 + }, + { + "epoch": 0.8909115870400879, + "grad_norm": 0.39684832096099854, + "learning_rate": 1.172920340042385e-05, + "loss": 0.5027, + "step": 32447 + }, + { + "epoch": 0.8909390444810543, + "grad_norm": 0.37833935022354126, + "learning_rate": 1.1728778011525715e-05, + "loss": 0.5145, + "step": 32448 + }, + { + "epoch": 0.8909665019220209, + "grad_norm": 0.39248815178871155, + "learning_rate": 1.1728352619402854e-05, + "loss": 0.5254, + "step": 32449 + }, + { + "epoch": 0.8909939593629874, + "grad_norm": 0.38658639788627625, + "learning_rate": 1.1727927224056063e-05, + "loss": 0.4603, + "step": 32450 + }, + { + "epoch": 0.8910214168039539, + "grad_norm": 0.39409154653549194, + "learning_rate": 1.1727501825486139e-05, + "loss": 0.5641, + "step": 32451 + }, + { + "epoch": 0.8910488742449204, + "grad_norm": 0.35562965273857117, + "learning_rate": 1.1727076423693872e-05, + "loss": 0.478, + "step": 32452 + }, + { + "epoch": 0.8910763316858868, + "grad_norm": 0.41538628935813904, + "learning_rate": 1.1726651018680058e-05, + "loss": 0.5032, + "step": 32453 + }, + { + "epoch": 0.8911037891268534, + "grad_norm": 0.3801780641078949, + "learning_rate": 1.1726225610445486e-05, + "loss": 0.4434, + "step": 32454 + }, + { + "epoch": 0.8911312465678198, + "grad_norm": 0.33724385499954224, + "learning_rate": 1.1725800198990954e-05, + "loss": 0.3648, + "step": 32455 + }, + { + "epoch": 0.8911587040087864, + "grad_norm": 0.4919099807739258, + "learning_rate": 1.1725374784317254e-05, + "loss": 0.603, + "step": 32456 + }, + { + "epoch": 0.8911861614497529, + "grad_norm": 0.3811601996421814, + "learning_rate": 1.1724949366425175e-05, + "loss": 0.4982, + "step": 32457 + }, + { + "epoch": 0.8912136188907194, + "grad_norm": 0.4133003056049347, + "learning_rate": 1.172452394531552e-05, + "loss": 0.6423, + "step": 32458 + }, + { + "epoch": 0.8912410763316859, + "grad_norm": 0.4586585760116577, + "learning_rate": 1.1724098520989076e-05, + "loss": 0.4948, + "step": 32459 + }, + { + "epoch": 0.8912685337726524, + "grad_norm": 0.39781269431114197, + "learning_rate": 1.1723673093446637e-05, + "loss": 0.5143, + "step": 32460 + }, + { + "epoch": 0.8912959912136189, + "grad_norm": 0.43534761667251587, + "learning_rate": 1.1723247662689e-05, + "loss": 0.4383, + "step": 32461 + }, + { + "epoch": 0.8913234486545853, + "grad_norm": 0.3441343903541565, + "learning_rate": 1.1722822228716952e-05, + "loss": 0.5113, + "step": 32462 + }, + { + "epoch": 0.8913509060955519, + "grad_norm": 0.3912842571735382, + "learning_rate": 1.1722396791531293e-05, + "loss": 0.5308, + "step": 32463 + }, + { + "epoch": 0.8913783635365184, + "grad_norm": 0.43969395756721497, + "learning_rate": 1.1721971351132815e-05, + "loss": 0.4847, + "step": 32464 + }, + { + "epoch": 0.8914058209774849, + "grad_norm": 0.4065682888031006, + "learning_rate": 1.1721545907522312e-05, + "loss": 0.4496, + "step": 32465 + }, + { + "epoch": 0.8914332784184514, + "grad_norm": 0.46845540404319763, + "learning_rate": 1.1721120460700575e-05, + "loss": 0.502, + "step": 32466 + }, + { + "epoch": 0.8914607358594179, + "grad_norm": 0.3653510510921478, + "learning_rate": 1.1720695010668398e-05, + "loss": 0.3692, + "step": 32467 + }, + { + "epoch": 0.8914881933003844, + "grad_norm": 0.38543039560317993, + "learning_rate": 1.1720269557426576e-05, + "loss": 0.4764, + "step": 32468 + }, + { + "epoch": 0.8915156507413509, + "grad_norm": 0.35894346237182617, + "learning_rate": 1.1719844100975903e-05, + "loss": 0.5105, + "step": 32469 + }, + { + "epoch": 0.8915431081823174, + "grad_norm": 0.3557935357093811, + "learning_rate": 1.1719418641317168e-05, + "loss": 0.4714, + "step": 32470 + }, + { + "epoch": 0.891570565623284, + "grad_norm": 0.3557232916355133, + "learning_rate": 1.1718993178451175e-05, + "loss": 0.5171, + "step": 32471 + }, + { + "epoch": 0.8915980230642504, + "grad_norm": 0.3811531662940979, + "learning_rate": 1.1718567712378706e-05, + "loss": 0.4417, + "step": 32472 + }, + { + "epoch": 0.891625480505217, + "grad_norm": 0.37903130054473877, + "learning_rate": 1.171814224310056e-05, + "loss": 0.4459, + "step": 32473 + }, + { + "epoch": 0.8916529379461834, + "grad_norm": 0.3766954839229584, + "learning_rate": 1.1717716770617536e-05, + "loss": 0.5145, + "step": 32474 + }, + { + "epoch": 0.8916803953871499, + "grad_norm": 0.4396255910396576, + "learning_rate": 1.1717291294930415e-05, + "loss": 0.4645, + "step": 32475 + }, + { + "epoch": 0.8917078528281164, + "grad_norm": 0.382417231798172, + "learning_rate": 1.1716865816040002e-05, + "loss": 0.5129, + "step": 32476 + }, + { + "epoch": 0.8917353102690829, + "grad_norm": 0.4136011600494385, + "learning_rate": 1.1716440333947083e-05, + "loss": 0.5576, + "step": 32477 + }, + { + "epoch": 0.8917627677100495, + "grad_norm": 0.40323665738105774, + "learning_rate": 1.1716014848652457e-05, + "loss": 0.4371, + "step": 32478 + }, + { + "epoch": 0.8917902251510159, + "grad_norm": 0.3513175845146179, + "learning_rate": 1.1715589360156918e-05, + "loss": 0.4489, + "step": 32479 + }, + { + "epoch": 0.8918176825919825, + "grad_norm": 0.45766788721084595, + "learning_rate": 1.1715163868461253e-05, + "loss": 0.532, + "step": 32480 + }, + { + "epoch": 0.8918451400329489, + "grad_norm": 0.35563552379608154, + "learning_rate": 1.1714738373566262e-05, + "loss": 0.5176, + "step": 32481 + }, + { + "epoch": 0.8918725974739155, + "grad_norm": 0.42652571201324463, + "learning_rate": 1.1714312875472736e-05, + "loss": 0.5213, + "step": 32482 + }, + { + "epoch": 0.8919000549148819, + "grad_norm": 0.3864377737045288, + "learning_rate": 1.1713887374181469e-05, + "loss": 0.5289, + "step": 32483 + }, + { + "epoch": 0.8919275123558484, + "grad_norm": 0.34865570068359375, + "learning_rate": 1.1713461869693256e-05, + "loss": 0.4575, + "step": 32484 + }, + { + "epoch": 0.891954969796815, + "grad_norm": 0.3778305649757385, + "learning_rate": 1.171303636200889e-05, + "loss": 0.4684, + "step": 32485 + }, + { + "epoch": 0.8919824272377814, + "grad_norm": 0.42127057909965515, + "learning_rate": 1.1712610851129162e-05, + "loss": 0.5192, + "step": 32486 + }, + { + "epoch": 0.892009884678748, + "grad_norm": 0.4252839982509613, + "learning_rate": 1.1712185337054874e-05, + "loss": 0.5792, + "step": 32487 + }, + { + "epoch": 0.8920373421197144, + "grad_norm": 0.334799587726593, + "learning_rate": 1.1711759819786808e-05, + "loss": 0.4278, + "step": 32488 + }, + { + "epoch": 0.892064799560681, + "grad_norm": 0.39321208000183105, + "learning_rate": 1.1711334299325765e-05, + "loss": 0.4736, + "step": 32489 + }, + { + "epoch": 0.8920922570016474, + "grad_norm": 0.42462244629859924, + "learning_rate": 1.1710908775672536e-05, + "loss": 0.4465, + "step": 32490 + }, + { + "epoch": 0.892119714442614, + "grad_norm": 0.4046567380428314, + "learning_rate": 1.171048324882792e-05, + "loss": 0.483, + "step": 32491 + }, + { + "epoch": 0.8921471718835805, + "grad_norm": 0.3680742383003235, + "learning_rate": 1.1710057718792705e-05, + "loss": 0.4484, + "step": 32492 + }, + { + "epoch": 0.8921746293245469, + "grad_norm": 0.3571147620677948, + "learning_rate": 1.1709632185567685e-05, + "loss": 0.4912, + "step": 32493 + }, + { + "epoch": 0.8922020867655135, + "grad_norm": 0.4114469587802887, + "learning_rate": 1.1709206649153656e-05, + "loss": 0.4984, + "step": 32494 + }, + { + "epoch": 0.8922295442064799, + "grad_norm": 0.4509887397289276, + "learning_rate": 1.1708781109551413e-05, + "loss": 0.4454, + "step": 32495 + }, + { + "epoch": 0.8922570016474465, + "grad_norm": 0.42692023515701294, + "learning_rate": 1.1708355566761747e-05, + "loss": 0.4705, + "step": 32496 + }, + { + "epoch": 0.8922844590884129, + "grad_norm": 0.43807896971702576, + "learning_rate": 1.1707930020785452e-05, + "loss": 0.5619, + "step": 32497 + }, + { + "epoch": 0.8923119165293795, + "grad_norm": 0.4062166213989258, + "learning_rate": 1.170750447162332e-05, + "loss": 0.5549, + "step": 32498 + }, + { + "epoch": 0.892339373970346, + "grad_norm": 0.3357721269130707, + "learning_rate": 1.1707078919276153e-05, + "loss": 0.4754, + "step": 32499 + }, + { + "epoch": 0.8923668314113125, + "grad_norm": 0.34605303406715393, + "learning_rate": 1.1706653363744736e-05, + "loss": 0.4792, + "step": 32500 + }, + { + "epoch": 0.892394288852279, + "grad_norm": 0.39949265122413635, + "learning_rate": 1.1706227805029864e-05, + "loss": 0.434, + "step": 32501 + }, + { + "epoch": 0.8924217462932454, + "grad_norm": 0.42568159103393555, + "learning_rate": 1.1705802243132337e-05, + "loss": 0.4876, + "step": 32502 + }, + { + "epoch": 0.892449203734212, + "grad_norm": 0.38674062490463257, + "learning_rate": 1.1705376678052938e-05, + "loss": 0.5222, + "step": 32503 + }, + { + "epoch": 0.8924766611751784, + "grad_norm": 0.40564924478530884, + "learning_rate": 1.1704951109792473e-05, + "loss": 0.4667, + "step": 32504 + }, + { + "epoch": 0.892504118616145, + "grad_norm": 0.41106119751930237, + "learning_rate": 1.1704525538351728e-05, + "loss": 0.485, + "step": 32505 + }, + { + "epoch": 0.8925315760571115, + "grad_norm": 0.4139018654823303, + "learning_rate": 1.1704099963731496e-05, + "loss": 0.5707, + "step": 32506 + }, + { + "epoch": 0.892559033498078, + "grad_norm": 0.5302929282188416, + "learning_rate": 1.1703674385932578e-05, + "loss": 0.5028, + "step": 32507 + }, + { + "epoch": 0.8925864909390445, + "grad_norm": 0.36119917035102844, + "learning_rate": 1.170324880495576e-05, + "loss": 0.566, + "step": 32508 + }, + { + "epoch": 0.892613948380011, + "grad_norm": 0.4036698639392853, + "learning_rate": 1.1702823220801842e-05, + "loss": 0.6021, + "step": 32509 + }, + { + "epoch": 0.8926414058209775, + "grad_norm": 0.39405354857444763, + "learning_rate": 1.1702397633471615e-05, + "loss": 0.4292, + "step": 32510 + }, + { + "epoch": 0.892668863261944, + "grad_norm": 0.40856075286865234, + "learning_rate": 1.1701972042965872e-05, + "loss": 0.5068, + "step": 32511 + }, + { + "epoch": 0.8926963207029105, + "grad_norm": 0.4929540455341339, + "learning_rate": 1.170154644928541e-05, + "loss": 0.4838, + "step": 32512 + }, + { + "epoch": 0.892723778143877, + "grad_norm": 0.3865600526332855, + "learning_rate": 1.1701120852431016e-05, + "loss": 0.5091, + "step": 32513 + }, + { + "epoch": 0.8927512355848435, + "grad_norm": 0.3650461435317993, + "learning_rate": 1.1700695252403493e-05, + "loss": 0.4434, + "step": 32514 + }, + { + "epoch": 0.89277869302581, + "grad_norm": 0.3840435743331909, + "learning_rate": 1.1700269649203628e-05, + "loss": 0.573, + "step": 32515 + }, + { + "epoch": 0.8928061504667765, + "grad_norm": 0.4811302721500397, + "learning_rate": 1.169984404283222e-05, + "loss": 0.5171, + "step": 32516 + }, + { + "epoch": 0.892833607907743, + "grad_norm": 0.397836834192276, + "learning_rate": 1.169941843329006e-05, + "loss": 0.4936, + "step": 32517 + }, + { + "epoch": 0.8928610653487095, + "grad_norm": 0.4077151417732239, + "learning_rate": 1.169899282057794e-05, + "loss": 0.5473, + "step": 32518 + }, + { + "epoch": 0.892888522789676, + "grad_norm": 0.4028734862804413, + "learning_rate": 1.169856720469666e-05, + "loss": 0.4753, + "step": 32519 + }, + { + "epoch": 0.8929159802306426, + "grad_norm": 0.38280826807022095, + "learning_rate": 1.1698141585647006e-05, + "loss": 0.5099, + "step": 32520 + }, + { + "epoch": 0.892943437671609, + "grad_norm": 0.4188937544822693, + "learning_rate": 1.1697715963429776e-05, + "loss": 0.4877, + "step": 32521 + }, + { + "epoch": 0.8929708951125755, + "grad_norm": 0.45384013652801514, + "learning_rate": 1.1697290338045765e-05, + "loss": 0.5166, + "step": 32522 + }, + { + "epoch": 0.892998352553542, + "grad_norm": 0.34766751527786255, + "learning_rate": 1.1696864709495767e-05, + "loss": 0.4288, + "step": 32523 + }, + { + "epoch": 0.8930258099945085, + "grad_norm": 0.406495600938797, + "learning_rate": 1.1696439077780574e-05, + "loss": 0.5127, + "step": 32524 + }, + { + "epoch": 0.893053267435475, + "grad_norm": 0.3738694489002228, + "learning_rate": 1.1696013442900982e-05, + "loss": 0.4391, + "step": 32525 + }, + { + "epoch": 0.8930807248764415, + "grad_norm": 0.492237389087677, + "learning_rate": 1.1695587804857782e-05, + "loss": 0.4348, + "step": 32526 + }, + { + "epoch": 0.8931081823174081, + "grad_norm": 0.34916797280311584, + "learning_rate": 1.1695162163651767e-05, + "loss": 0.5332, + "step": 32527 + }, + { + "epoch": 0.8931356397583745, + "grad_norm": 0.3778017461299896, + "learning_rate": 1.1694736519283738e-05, + "loss": 0.4467, + "step": 32528 + }, + { + "epoch": 0.8931630971993411, + "grad_norm": 0.5276066660881042, + "learning_rate": 1.1694310871754485e-05, + "loss": 0.5121, + "step": 32529 + }, + { + "epoch": 0.8931905546403075, + "grad_norm": 0.34471598267555237, + "learning_rate": 1.1693885221064798e-05, + "loss": 0.3963, + "step": 32530 + }, + { + "epoch": 0.893218012081274, + "grad_norm": 0.39737752079963684, + "learning_rate": 1.1693459567215478e-05, + "loss": 0.4895, + "step": 32531 + }, + { + "epoch": 0.8932454695222405, + "grad_norm": 0.40448781847953796, + "learning_rate": 1.169303391020731e-05, + "loss": 0.4668, + "step": 32532 + }, + { + "epoch": 0.893272926963207, + "grad_norm": 0.41511252522468567, + "learning_rate": 1.1692608250041098e-05, + "loss": 0.4714, + "step": 32533 + }, + { + "epoch": 0.8933003844041736, + "grad_norm": 0.43033212423324585, + "learning_rate": 1.169218258671763e-05, + "loss": 0.4783, + "step": 32534 + }, + { + "epoch": 0.89332784184514, + "grad_norm": 0.4088570773601532, + "learning_rate": 1.16917569202377e-05, + "loss": 0.4476, + "step": 32535 + }, + { + "epoch": 0.8933552992861066, + "grad_norm": 0.38479411602020264, + "learning_rate": 1.1691331250602106e-05, + "loss": 0.4298, + "step": 32536 + }, + { + "epoch": 0.893382756727073, + "grad_norm": 0.3611641526222229, + "learning_rate": 1.1690905577811638e-05, + "loss": 0.4685, + "step": 32537 + }, + { + "epoch": 0.8934102141680396, + "grad_norm": 0.40367981791496277, + "learning_rate": 1.169047990186709e-05, + "loss": 0.5107, + "step": 32538 + }, + { + "epoch": 0.893437671609006, + "grad_norm": 0.4271829426288605, + "learning_rate": 1.1690054222769258e-05, + "loss": 0.5252, + "step": 32539 + }, + { + "epoch": 0.8934651290499726, + "grad_norm": 0.3868961036205292, + "learning_rate": 1.1689628540518938e-05, + "loss": 0.4547, + "step": 32540 + }, + { + "epoch": 0.893492586490939, + "grad_norm": 0.40087834000587463, + "learning_rate": 1.1689202855116922e-05, + "loss": 0.5829, + "step": 32541 + }, + { + "epoch": 0.8935200439319055, + "grad_norm": 0.37643304467201233, + "learning_rate": 1.1688777166564e-05, + "loss": 0.4517, + "step": 32542 + }, + { + "epoch": 0.8935475013728721, + "grad_norm": 0.4646138846874237, + "learning_rate": 1.1688351474860972e-05, + "loss": 0.4663, + "step": 32543 + }, + { + "epoch": 0.8935749588138385, + "grad_norm": 0.41672787070274353, + "learning_rate": 1.1687925780008626e-05, + "loss": 0.5313, + "step": 32544 + }, + { + "epoch": 0.8936024162548051, + "grad_norm": 0.3525448739528656, + "learning_rate": 1.1687500082007766e-05, + "loss": 0.4867, + "step": 32545 + }, + { + "epoch": 0.8936298736957715, + "grad_norm": 0.35519030690193176, + "learning_rate": 1.1687074380859175e-05, + "loss": 0.4652, + "step": 32546 + }, + { + "epoch": 0.8936573311367381, + "grad_norm": 0.35632357001304626, + "learning_rate": 1.1686648676563652e-05, + "loss": 0.5282, + "step": 32547 + }, + { + "epoch": 0.8936847885777045, + "grad_norm": 0.4244459867477417, + "learning_rate": 1.1686222969121995e-05, + "loss": 0.4843, + "step": 32548 + }, + { + "epoch": 0.893712246018671, + "grad_norm": 0.37945252656936646, + "learning_rate": 1.168579725853499e-05, + "loss": 0.4921, + "step": 32549 + }, + { + "epoch": 0.8937397034596376, + "grad_norm": 0.41924500465393066, + "learning_rate": 1.1685371544803434e-05, + "loss": 0.5497, + "step": 32550 + }, + { + "epoch": 0.893767160900604, + "grad_norm": 0.3704057037830353, + "learning_rate": 1.1684945827928126e-05, + "loss": 0.4594, + "step": 32551 + }, + { + "epoch": 0.8937946183415706, + "grad_norm": 0.3957259953022003, + "learning_rate": 1.1684520107909854e-05, + "loss": 0.4715, + "step": 32552 + }, + { + "epoch": 0.893822075782537, + "grad_norm": 0.3904044032096863, + "learning_rate": 1.1684094384749415e-05, + "loss": 0.4949, + "step": 32553 + }, + { + "epoch": 0.8938495332235036, + "grad_norm": 0.4437530040740967, + "learning_rate": 1.1683668658447603e-05, + "loss": 0.5827, + "step": 32554 + }, + { + "epoch": 0.89387699066447, + "grad_norm": 0.381779283285141, + "learning_rate": 1.168324292900521e-05, + "loss": 0.5405, + "step": 32555 + }, + { + "epoch": 0.8939044481054366, + "grad_norm": 0.38041165471076965, + "learning_rate": 1.1682817196423033e-05, + "loss": 0.482, + "step": 32556 + }, + { + "epoch": 0.8939319055464031, + "grad_norm": 0.3589346408843994, + "learning_rate": 1.1682391460701863e-05, + "loss": 0.4728, + "step": 32557 + }, + { + "epoch": 0.8939593629873696, + "grad_norm": 0.4665050804615021, + "learning_rate": 1.16819657218425e-05, + "loss": 0.4561, + "step": 32558 + }, + { + "epoch": 0.8939868204283361, + "grad_norm": 0.3728574216365814, + "learning_rate": 1.1681539979845731e-05, + "loss": 0.46, + "step": 32559 + }, + { + "epoch": 0.8940142778693025, + "grad_norm": 0.37752634286880493, + "learning_rate": 1.1681114234712353e-05, + "loss": 0.4666, + "step": 32560 + }, + { + "epoch": 0.8940417353102691, + "grad_norm": 0.38963356614112854, + "learning_rate": 1.1680688486443161e-05, + "loss": 0.4949, + "step": 32561 + }, + { + "epoch": 0.8940691927512355, + "grad_norm": 0.4223586320877075, + "learning_rate": 1.1680262735038949e-05, + "loss": 0.5086, + "step": 32562 + }, + { + "epoch": 0.8940966501922021, + "grad_norm": 0.3784710168838501, + "learning_rate": 1.167983698050051e-05, + "loss": 0.4964, + "step": 32563 + }, + { + "epoch": 0.8941241076331686, + "grad_norm": 0.41445478796958923, + "learning_rate": 1.1679411222828641e-05, + "loss": 0.4874, + "step": 32564 + }, + { + "epoch": 0.8941515650741351, + "grad_norm": 0.41318026185035706, + "learning_rate": 1.167898546202413e-05, + "loss": 0.4654, + "step": 32565 + }, + { + "epoch": 0.8941790225151016, + "grad_norm": 0.38548168540000916, + "learning_rate": 1.1678559698087778e-05, + "loss": 0.4939, + "step": 32566 + }, + { + "epoch": 0.8942064799560681, + "grad_norm": 0.3867526650428772, + "learning_rate": 1.1678133931020377e-05, + "loss": 0.5269, + "step": 32567 + }, + { + "epoch": 0.8942339373970346, + "grad_norm": 0.382984459400177, + "learning_rate": 1.1677708160822719e-05, + "loss": 0.5331, + "step": 32568 + }, + { + "epoch": 0.894261394838001, + "grad_norm": 0.37195590138435364, + "learning_rate": 1.1677282387495601e-05, + "loss": 0.4826, + "step": 32569 + }, + { + "epoch": 0.8942888522789676, + "grad_norm": 0.4366418123245239, + "learning_rate": 1.1676856611039815e-05, + "loss": 0.5392, + "step": 32570 + }, + { + "epoch": 0.8943163097199341, + "grad_norm": 0.3595702648162842, + "learning_rate": 1.1676430831456158e-05, + "loss": 0.4419, + "step": 32571 + }, + { + "epoch": 0.8943437671609006, + "grad_norm": 0.34699302911758423, + "learning_rate": 1.167600504874542e-05, + "loss": 0.4392, + "step": 32572 + }, + { + "epoch": 0.8943712246018671, + "grad_norm": 0.3654663562774658, + "learning_rate": 1.1675579262908398e-05, + "loss": 0.4035, + "step": 32573 + }, + { + "epoch": 0.8943986820428336, + "grad_norm": 0.3672502934932709, + "learning_rate": 1.1675153473945889e-05, + "loss": 0.4208, + "step": 32574 + }, + { + "epoch": 0.8944261394838001, + "grad_norm": 0.3843938112258911, + "learning_rate": 1.1674727681858679e-05, + "loss": 0.518, + "step": 32575 + }, + { + "epoch": 0.8944535969247666, + "grad_norm": 0.3730832636356354, + "learning_rate": 1.1674301886647572e-05, + "loss": 0.5056, + "step": 32576 + }, + { + "epoch": 0.8944810543657331, + "grad_norm": 0.416843980550766, + "learning_rate": 1.1673876088313355e-05, + "loss": 0.4883, + "step": 32577 + }, + { + "epoch": 0.8945085118066997, + "grad_norm": 0.470184862613678, + "learning_rate": 1.1673450286856826e-05, + "loss": 0.4605, + "step": 32578 + }, + { + "epoch": 0.8945359692476661, + "grad_norm": 0.34935232996940613, + "learning_rate": 1.1673024482278778e-05, + "loss": 0.4627, + "step": 32579 + }, + { + "epoch": 0.8945634266886326, + "grad_norm": 0.3633621633052826, + "learning_rate": 1.1672598674580007e-05, + "loss": 0.4584, + "step": 32580 + }, + { + "epoch": 0.8945908841295991, + "grad_norm": 0.395826518535614, + "learning_rate": 1.1672172863761302e-05, + "loss": 0.549, + "step": 32581 + }, + { + "epoch": 0.8946183415705656, + "grad_norm": 0.35172855854034424, + "learning_rate": 1.1671747049823465e-05, + "loss": 0.4927, + "step": 32582 + }, + { + "epoch": 0.8946457990115321, + "grad_norm": 0.39305880665779114, + "learning_rate": 1.1671321232767282e-05, + "loss": 0.5254, + "step": 32583 + }, + { + "epoch": 0.8946732564524986, + "grad_norm": 0.44602951407432556, + "learning_rate": 1.1670895412593555e-05, + "loss": 0.5525, + "step": 32584 + }, + { + "epoch": 0.8947007138934652, + "grad_norm": 0.3906934857368469, + "learning_rate": 1.1670469589303073e-05, + "loss": 0.4984, + "step": 32585 + }, + { + "epoch": 0.8947281713344316, + "grad_norm": 0.3662193715572357, + "learning_rate": 1.167004376289663e-05, + "loss": 0.4205, + "step": 32586 + }, + { + "epoch": 0.8947556287753982, + "grad_norm": 0.38284143805503845, + "learning_rate": 1.1669617933375026e-05, + "loss": 0.4211, + "step": 32587 + }, + { + "epoch": 0.8947830862163646, + "grad_norm": 0.4032760560512543, + "learning_rate": 1.1669192100739048e-05, + "loss": 0.4761, + "step": 32588 + }, + { + "epoch": 0.8948105436573311, + "grad_norm": 0.5354055166244507, + "learning_rate": 1.16687662649895e-05, + "loss": 0.4796, + "step": 32589 + }, + { + "epoch": 0.8948380010982976, + "grad_norm": 0.4171702563762665, + "learning_rate": 1.1668340426127167e-05, + "loss": 0.5242, + "step": 32590 + }, + { + "epoch": 0.8948654585392641, + "grad_norm": 0.4452265202999115, + "learning_rate": 1.1667914584152844e-05, + "loss": 0.4928, + "step": 32591 + }, + { + "epoch": 0.8948929159802307, + "grad_norm": 0.38703668117523193, + "learning_rate": 1.166748873906733e-05, + "loss": 0.519, + "step": 32592 + }, + { + "epoch": 0.8949203734211971, + "grad_norm": 0.429293155670166, + "learning_rate": 1.1667062890871419e-05, + "loss": 0.4956, + "step": 32593 + }, + { + "epoch": 0.8949478308621637, + "grad_norm": 0.3767801523208618, + "learning_rate": 1.16666370395659e-05, + "loss": 0.5358, + "step": 32594 + }, + { + "epoch": 0.8949752883031301, + "grad_norm": 0.5724570751190186, + "learning_rate": 1.1666211185151575e-05, + "loss": 0.5134, + "step": 32595 + }, + { + "epoch": 0.8950027457440967, + "grad_norm": 0.43475452065467834, + "learning_rate": 1.166578532762923e-05, + "loss": 0.4879, + "step": 32596 + }, + { + "epoch": 0.8950302031850631, + "grad_norm": 0.4025103449821472, + "learning_rate": 1.1665359466999669e-05, + "loss": 0.5131, + "step": 32597 + }, + { + "epoch": 0.8950576606260296, + "grad_norm": 0.3740403950214386, + "learning_rate": 1.166493360326368e-05, + "loss": 0.4808, + "step": 32598 + }, + { + "epoch": 0.8950851180669962, + "grad_norm": 0.388345867395401, + "learning_rate": 1.1664507736422053e-05, + "loss": 0.4733, + "step": 32599 + }, + { + "epoch": 0.8951125755079626, + "grad_norm": 0.42624130845069885, + "learning_rate": 1.1664081866475595e-05, + "loss": 0.4941, + "step": 32600 + }, + { + "epoch": 0.8951400329489292, + "grad_norm": 0.37275442481040955, + "learning_rate": 1.1663655993425087e-05, + "loss": 0.4224, + "step": 32601 + }, + { + "epoch": 0.8951674903898956, + "grad_norm": 0.45872315764427185, + "learning_rate": 1.1663230117271333e-05, + "loss": 0.506, + "step": 32602 + }, + { + "epoch": 0.8951949478308622, + "grad_norm": 0.38736942410469055, + "learning_rate": 1.1662804238015124e-05, + "loss": 0.5161, + "step": 32603 + }, + { + "epoch": 0.8952224052718286, + "grad_norm": 0.4123973250389099, + "learning_rate": 1.1662378355657251e-05, + "loss": 0.484, + "step": 32604 + }, + { + "epoch": 0.8952498627127952, + "grad_norm": 0.36702993512153625, + "learning_rate": 1.1661952470198516e-05, + "loss": 0.4973, + "step": 32605 + }, + { + "epoch": 0.8952773201537617, + "grad_norm": 0.39501234889030457, + "learning_rate": 1.1661526581639706e-05, + "loss": 0.5544, + "step": 32606 + }, + { + "epoch": 0.8953047775947282, + "grad_norm": 0.39999252557754517, + "learning_rate": 1.1661100689981622e-05, + "loss": 0.5208, + "step": 32607 + }, + { + "epoch": 0.8953322350356947, + "grad_norm": 0.3906855285167694, + "learning_rate": 1.1660674795225053e-05, + "loss": 0.5247, + "step": 32608 + }, + { + "epoch": 0.8953596924766611, + "grad_norm": 0.4163754880428314, + "learning_rate": 1.1660248897370796e-05, + "loss": 0.5775, + "step": 32609 + }, + { + "epoch": 0.8953871499176277, + "grad_norm": 0.3981000483036041, + "learning_rate": 1.1659822996419645e-05, + "loss": 0.5837, + "step": 32610 + }, + { + "epoch": 0.8954146073585941, + "grad_norm": 0.3910103738307953, + "learning_rate": 1.1659397092372396e-05, + "loss": 0.4698, + "step": 32611 + }, + { + "epoch": 0.8954420647995607, + "grad_norm": 0.36317509412765503, + "learning_rate": 1.1658971185229838e-05, + "loss": 0.4652, + "step": 32612 + }, + { + "epoch": 0.8954695222405272, + "grad_norm": 0.3790709972381592, + "learning_rate": 1.1658545274992772e-05, + "loss": 0.5216, + "step": 32613 + }, + { + "epoch": 0.8954969796814937, + "grad_norm": 0.42623037099838257, + "learning_rate": 1.1658119361661989e-05, + "loss": 0.4903, + "step": 32614 + }, + { + "epoch": 0.8955244371224602, + "grad_norm": 0.3793802261352539, + "learning_rate": 1.1657693445238282e-05, + "loss": 0.5134, + "step": 32615 + }, + { + "epoch": 0.8955518945634267, + "grad_norm": 0.33462151885032654, + "learning_rate": 1.1657267525722453e-05, + "loss": 0.4622, + "step": 32616 + }, + { + "epoch": 0.8955793520043932, + "grad_norm": 0.43516841530799866, + "learning_rate": 1.1656841603115286e-05, + "loss": 0.4306, + "step": 32617 + }, + { + "epoch": 0.8956068094453596, + "grad_norm": 0.43572551012039185, + "learning_rate": 1.1656415677417583e-05, + "loss": 0.5001, + "step": 32618 + }, + { + "epoch": 0.8956342668863262, + "grad_norm": 0.4188925623893738, + "learning_rate": 1.1655989748630134e-05, + "loss": 0.5366, + "step": 32619 + }, + { + "epoch": 0.8956617243272927, + "grad_norm": 0.3632935583591461, + "learning_rate": 1.1655563816753738e-05, + "loss": 0.5301, + "step": 32620 + }, + { + "epoch": 0.8956891817682592, + "grad_norm": 0.44817492365837097, + "learning_rate": 1.1655137881789187e-05, + "loss": 0.439, + "step": 32621 + }, + { + "epoch": 0.8957166392092257, + "grad_norm": 0.36852961778640747, + "learning_rate": 1.1654711943737273e-05, + "loss": 0.4187, + "step": 32622 + }, + { + "epoch": 0.8957440966501922, + "grad_norm": 0.42394590377807617, + "learning_rate": 1.1654286002598797e-05, + "loss": 0.4922, + "step": 32623 + }, + { + "epoch": 0.8957715540911587, + "grad_norm": 0.40181198716163635, + "learning_rate": 1.1653860058374549e-05, + "loss": 0.4384, + "step": 32624 + }, + { + "epoch": 0.8957990115321252, + "grad_norm": 0.34506380558013916, + "learning_rate": 1.165343411106532e-05, + "loss": 0.4096, + "step": 32625 + }, + { + "epoch": 0.8958264689730917, + "grad_norm": 0.4283783435821533, + "learning_rate": 1.1653008160671913e-05, + "loss": 0.5172, + "step": 32626 + }, + { + "epoch": 0.8958539264140583, + "grad_norm": 0.38553136587142944, + "learning_rate": 1.1652582207195116e-05, + "loss": 0.4793, + "step": 32627 + }, + { + "epoch": 0.8958813838550247, + "grad_norm": 0.35982194542884827, + "learning_rate": 1.1652156250635727e-05, + "loss": 0.4765, + "step": 32628 + }, + { + "epoch": 0.8959088412959912, + "grad_norm": 0.37828367948532104, + "learning_rate": 1.1651730290994538e-05, + "loss": 0.4661, + "step": 32629 + }, + { + "epoch": 0.8959362987369577, + "grad_norm": 0.4036012887954712, + "learning_rate": 1.1651304328272346e-05, + "loss": 0.491, + "step": 32630 + }, + { + "epoch": 0.8959637561779242, + "grad_norm": 0.40595516562461853, + "learning_rate": 1.1650878362469944e-05, + "loss": 0.5271, + "step": 32631 + }, + { + "epoch": 0.8959912136188907, + "grad_norm": 0.49353137612342834, + "learning_rate": 1.1650452393588125e-05, + "loss": 0.5215, + "step": 32632 + }, + { + "epoch": 0.8960186710598572, + "grad_norm": 0.43085089325904846, + "learning_rate": 1.1650026421627686e-05, + "loss": 0.4605, + "step": 32633 + }, + { + "epoch": 0.8960461285008238, + "grad_norm": 0.3422888517379761, + "learning_rate": 1.1649600446589427e-05, + "loss": 0.4872, + "step": 32634 + }, + { + "epoch": 0.8960735859417902, + "grad_norm": 0.34748417139053345, + "learning_rate": 1.1649174468474129e-05, + "loss": 0.4674, + "step": 32635 + }, + { + "epoch": 0.8961010433827568, + "grad_norm": 0.41219890117645264, + "learning_rate": 1.16487484872826e-05, + "loss": 0.5229, + "step": 32636 + }, + { + "epoch": 0.8961285008237232, + "grad_norm": 0.36417707800865173, + "learning_rate": 1.1648322503015623e-05, + "loss": 0.4627, + "step": 32637 + }, + { + "epoch": 0.8961559582646897, + "grad_norm": 0.4020203649997711, + "learning_rate": 1.1647896515674002e-05, + "loss": 0.5303, + "step": 32638 + }, + { + "epoch": 0.8961834157056562, + "grad_norm": 0.39101287722587585, + "learning_rate": 1.1647470525258528e-05, + "loss": 0.4826, + "step": 32639 + }, + { + "epoch": 0.8962108731466227, + "grad_norm": 0.40349602699279785, + "learning_rate": 1.1647044531769996e-05, + "loss": 0.4245, + "step": 32640 + }, + { + "epoch": 0.8962383305875893, + "grad_norm": 0.4252493381500244, + "learning_rate": 1.16466185352092e-05, + "loss": 0.5964, + "step": 32641 + }, + { + "epoch": 0.8962657880285557, + "grad_norm": 0.3932085335254669, + "learning_rate": 1.1646192535576934e-05, + "loss": 0.4328, + "step": 32642 + }, + { + "epoch": 0.8962932454695223, + "grad_norm": 0.37804657220840454, + "learning_rate": 1.1645766532873994e-05, + "loss": 0.4187, + "step": 32643 + }, + { + "epoch": 0.8963207029104887, + "grad_norm": 0.4784456491470337, + "learning_rate": 1.1645340527101174e-05, + "loss": 0.5875, + "step": 32644 + }, + { + "epoch": 0.8963481603514553, + "grad_norm": 0.3721034824848175, + "learning_rate": 1.164491451825927e-05, + "loss": 0.556, + "step": 32645 + }, + { + "epoch": 0.8963756177924217, + "grad_norm": 0.4007091224193573, + "learning_rate": 1.1644488506349075e-05, + "loss": 0.4644, + "step": 32646 + }, + { + "epoch": 0.8964030752333882, + "grad_norm": 0.3999159634113312, + "learning_rate": 1.1644062491371382e-05, + "loss": 0.5254, + "step": 32647 + }, + { + "epoch": 0.8964305326743548, + "grad_norm": 0.38134151697158813, + "learning_rate": 1.164363647332699e-05, + "loss": 0.4093, + "step": 32648 + }, + { + "epoch": 0.8964579901153212, + "grad_norm": 0.4016670882701874, + "learning_rate": 1.164321045221669e-05, + "loss": 0.4601, + "step": 32649 + }, + { + "epoch": 0.8964854475562878, + "grad_norm": 0.5225759148597717, + "learning_rate": 1.1642784428041279e-05, + "loss": 0.4775, + "step": 32650 + }, + { + "epoch": 0.8965129049972542, + "grad_norm": 0.3884890675544739, + "learning_rate": 1.164235840080155e-05, + "loss": 0.4666, + "step": 32651 + }, + { + "epoch": 0.8965403624382208, + "grad_norm": 0.37912389636039734, + "learning_rate": 1.1641932370498298e-05, + "loss": 0.5009, + "step": 32652 + }, + { + "epoch": 0.8965678198791872, + "grad_norm": 0.4009639620780945, + "learning_rate": 1.164150633713232e-05, + "loss": 0.4635, + "step": 32653 + }, + { + "epoch": 0.8965952773201538, + "grad_norm": 0.3754901885986328, + "learning_rate": 1.1641080300704409e-05, + "loss": 0.553, + "step": 32654 + }, + { + "epoch": 0.8966227347611203, + "grad_norm": 0.37446075677871704, + "learning_rate": 1.1640654261215358e-05, + "loss": 0.4297, + "step": 32655 + }, + { + "epoch": 0.8966501922020867, + "grad_norm": 0.42605385184288025, + "learning_rate": 1.164022821866596e-05, + "loss": 0.4921, + "step": 32656 + }, + { + "epoch": 0.8966776496430533, + "grad_norm": 0.3782831132411957, + "learning_rate": 1.1639802173057017e-05, + "loss": 0.4404, + "step": 32657 + }, + { + "epoch": 0.8967051070840197, + "grad_norm": 0.43848246335983276, + "learning_rate": 1.163937612438932e-05, + "loss": 0.4914, + "step": 32658 + }, + { + "epoch": 0.8967325645249863, + "grad_norm": 0.38480740785598755, + "learning_rate": 1.1638950072663661e-05, + "loss": 0.5043, + "step": 32659 + }, + { + "epoch": 0.8967600219659527, + "grad_norm": 0.38077038526535034, + "learning_rate": 1.163852401788084e-05, + "loss": 0.5339, + "step": 32660 + }, + { + "epoch": 0.8967874794069193, + "grad_norm": 0.3458142876625061, + "learning_rate": 1.1638097960041645e-05, + "loss": 0.4727, + "step": 32661 + }, + { + "epoch": 0.8968149368478858, + "grad_norm": 0.3869684636592865, + "learning_rate": 1.1637671899146876e-05, + "loss": 0.5566, + "step": 32662 + }, + { + "epoch": 0.8968423942888523, + "grad_norm": 0.4756930470466614, + "learning_rate": 1.1637245835197326e-05, + "loss": 0.4799, + "step": 32663 + }, + { + "epoch": 0.8968698517298188, + "grad_norm": 0.3847728669643402, + "learning_rate": 1.1636819768193793e-05, + "loss": 0.5204, + "step": 32664 + }, + { + "epoch": 0.8968973091707853, + "grad_norm": 0.41690969467163086, + "learning_rate": 1.1636393698137066e-05, + "loss": 0.5024, + "step": 32665 + }, + { + "epoch": 0.8969247666117518, + "grad_norm": 0.42319172620773315, + "learning_rate": 1.1635967625027943e-05, + "loss": 0.5077, + "step": 32666 + }, + { + "epoch": 0.8969522240527182, + "grad_norm": 0.36244162917137146, + "learning_rate": 1.1635541548867217e-05, + "loss": 0.4654, + "step": 32667 + }, + { + "epoch": 0.8969796814936848, + "grad_norm": 0.43067094683647156, + "learning_rate": 1.1635115469655682e-05, + "loss": 0.4428, + "step": 32668 + }, + { + "epoch": 0.8970071389346513, + "grad_norm": 0.35173308849334717, + "learning_rate": 1.1634689387394141e-05, + "loss": 0.5053, + "step": 32669 + }, + { + "epoch": 0.8970345963756178, + "grad_norm": 0.4097588062286377, + "learning_rate": 1.163426330208338e-05, + "loss": 0.4719, + "step": 32670 + }, + { + "epoch": 0.8970620538165843, + "grad_norm": 0.36013802886009216, + "learning_rate": 1.1633837213724193e-05, + "loss": 0.4676, + "step": 32671 + }, + { + "epoch": 0.8970895112575508, + "grad_norm": 0.3869297206401825, + "learning_rate": 1.1633411122317381e-05, + "loss": 0.4442, + "step": 32672 + }, + { + "epoch": 0.8971169686985173, + "grad_norm": 0.416865736246109, + "learning_rate": 1.1632985027863738e-05, + "loss": 0.4441, + "step": 32673 + }, + { + "epoch": 0.8971444261394838, + "grad_norm": 0.3334949016571045, + "learning_rate": 1.1632558930364053e-05, + "loss": 0.4544, + "step": 32674 + }, + { + "epoch": 0.8971718835804503, + "grad_norm": 0.5066133141517639, + "learning_rate": 1.1632132829819128e-05, + "loss": 0.4912, + "step": 32675 + }, + { + "epoch": 0.8971993410214169, + "grad_norm": 0.42233145236968994, + "learning_rate": 1.163170672622975e-05, + "loss": 0.4664, + "step": 32676 + }, + { + "epoch": 0.8972267984623833, + "grad_norm": 0.37307772040367126, + "learning_rate": 1.163128061959672e-05, + "loss": 0.5135, + "step": 32677 + }, + { + "epoch": 0.8972542559033498, + "grad_norm": 0.3963410258293152, + "learning_rate": 1.1630854509920832e-05, + "loss": 0.5571, + "step": 32678 + }, + { + "epoch": 0.8972817133443163, + "grad_norm": 0.4539443254470825, + "learning_rate": 1.163042839720288e-05, + "loss": 0.5114, + "step": 32679 + }, + { + "epoch": 0.8973091707852828, + "grad_norm": 0.39364922046661377, + "learning_rate": 1.1630002281443657e-05, + "loss": 0.5033, + "step": 32680 + }, + { + "epoch": 0.8973366282262493, + "grad_norm": 0.4868185818195343, + "learning_rate": 1.162957616264396e-05, + "loss": 0.4811, + "step": 32681 + }, + { + "epoch": 0.8973640856672158, + "grad_norm": 0.44907325506210327, + "learning_rate": 1.1629150040804584e-05, + "loss": 0.5717, + "step": 32682 + }, + { + "epoch": 0.8973915431081824, + "grad_norm": 0.4163096249103546, + "learning_rate": 1.1628723915926323e-05, + "loss": 0.5255, + "step": 32683 + }, + { + "epoch": 0.8974190005491488, + "grad_norm": 0.35299837589263916, + "learning_rate": 1.1628297788009972e-05, + "loss": 0.3841, + "step": 32684 + }, + { + "epoch": 0.8974464579901154, + "grad_norm": 0.3902292847633362, + "learning_rate": 1.1627871657056327e-05, + "loss": 0.4343, + "step": 32685 + }, + { + "epoch": 0.8974739154310818, + "grad_norm": 0.4132979214191437, + "learning_rate": 1.1627445523066179e-05, + "loss": 0.4605, + "step": 32686 + }, + { + "epoch": 0.8975013728720483, + "grad_norm": 0.38919907808303833, + "learning_rate": 1.162701938604033e-05, + "loss": 0.4933, + "step": 32687 + }, + { + "epoch": 0.8975288303130148, + "grad_norm": 0.42582741379737854, + "learning_rate": 1.1626593245979568e-05, + "loss": 0.4339, + "step": 32688 + }, + { + "epoch": 0.8975562877539813, + "grad_norm": 0.39474114775657654, + "learning_rate": 1.162616710288469e-05, + "loss": 0.5058, + "step": 32689 + }, + { + "epoch": 0.8975837451949479, + "grad_norm": 0.3754110038280487, + "learning_rate": 1.1625740956756493e-05, + "loss": 0.4989, + "step": 32690 + }, + { + "epoch": 0.8976112026359143, + "grad_norm": 0.41280537843704224, + "learning_rate": 1.1625314807595766e-05, + "loss": 0.4455, + "step": 32691 + }, + { + "epoch": 0.8976386600768809, + "grad_norm": 0.39651426672935486, + "learning_rate": 1.1624888655403312e-05, + "loss": 0.4889, + "step": 32692 + }, + { + "epoch": 0.8976661175178473, + "grad_norm": 0.3628430664539337, + "learning_rate": 1.1624462500179923e-05, + "loss": 0.4682, + "step": 32693 + }, + { + "epoch": 0.8976935749588139, + "grad_norm": 0.41252532601356506, + "learning_rate": 1.162403634192639e-05, + "loss": 0.479, + "step": 32694 + }, + { + "epoch": 0.8977210323997803, + "grad_norm": 0.4267137348651886, + "learning_rate": 1.1623610180643512e-05, + "loss": 0.5091, + "step": 32695 + }, + { + "epoch": 0.8977484898407468, + "grad_norm": 0.41550132632255554, + "learning_rate": 1.1623184016332083e-05, + "loss": 0.4648, + "step": 32696 + }, + { + "epoch": 0.8977759472817134, + "grad_norm": 0.4152181148529053, + "learning_rate": 1.1622757848992897e-05, + "loss": 0.4424, + "step": 32697 + }, + { + "epoch": 0.8978034047226798, + "grad_norm": 0.3918894827365875, + "learning_rate": 1.1622331678626753e-05, + "loss": 0.5132, + "step": 32698 + }, + { + "epoch": 0.8978308621636464, + "grad_norm": 0.39079949259757996, + "learning_rate": 1.1621905505234438e-05, + "loss": 0.4565, + "step": 32699 + }, + { + "epoch": 0.8978583196046128, + "grad_norm": 0.402592271566391, + "learning_rate": 1.1621479328816753e-05, + "loss": 0.5014, + "step": 32700 + }, + { + "epoch": 0.8978857770455794, + "grad_norm": 0.4398922026157379, + "learning_rate": 1.1621053149374493e-05, + "loss": 0.4765, + "step": 32701 + }, + { + "epoch": 0.8979132344865458, + "grad_norm": 0.3655937612056732, + "learning_rate": 1.162062696690845e-05, + "loss": 0.526, + "step": 32702 + }, + { + "epoch": 0.8979406919275124, + "grad_norm": 0.477766752243042, + "learning_rate": 1.1620200781419421e-05, + "loss": 0.5944, + "step": 32703 + }, + { + "epoch": 0.8979681493684789, + "grad_norm": 0.4063863456249237, + "learning_rate": 1.1619774592908202e-05, + "loss": 0.5049, + "step": 32704 + }, + { + "epoch": 0.8979956068094453, + "grad_norm": 0.3810517489910126, + "learning_rate": 1.1619348401375585e-05, + "loss": 0.507, + "step": 32705 + }, + { + "epoch": 0.8980230642504119, + "grad_norm": 0.43409061431884766, + "learning_rate": 1.1618922206822366e-05, + "loss": 0.5374, + "step": 32706 + }, + { + "epoch": 0.8980505216913783, + "grad_norm": 0.33770066499710083, + "learning_rate": 1.161849600924934e-05, + "loss": 0.424, + "step": 32707 + }, + { + "epoch": 0.8980779791323449, + "grad_norm": 0.3927633464336395, + "learning_rate": 1.1618069808657301e-05, + "loss": 0.4806, + "step": 32708 + }, + { + "epoch": 0.8981054365733113, + "grad_norm": 0.4736536145210266, + "learning_rate": 1.161764360504705e-05, + "loss": 0.5636, + "step": 32709 + }, + { + "epoch": 0.8981328940142779, + "grad_norm": 0.5429362058639526, + "learning_rate": 1.1617217398419376e-05, + "loss": 0.5601, + "step": 32710 + }, + { + "epoch": 0.8981603514552444, + "grad_norm": 0.3783341944217682, + "learning_rate": 1.1616791188775074e-05, + "loss": 0.5182, + "step": 32711 + }, + { + "epoch": 0.8981878088962109, + "grad_norm": 0.3604768216609955, + "learning_rate": 1.161636497611494e-05, + "loss": 0.4873, + "step": 32712 + }, + { + "epoch": 0.8982152663371774, + "grad_norm": 0.38956743478775024, + "learning_rate": 1.1615938760439769e-05, + "loss": 0.5089, + "step": 32713 + }, + { + "epoch": 0.8982427237781438, + "grad_norm": 0.3936191201210022, + "learning_rate": 1.1615512541750357e-05, + "loss": 0.5062, + "step": 32714 + }, + { + "epoch": 0.8982701812191104, + "grad_norm": 0.39461660385131836, + "learning_rate": 1.1615086320047501e-05, + "loss": 0.4796, + "step": 32715 + }, + { + "epoch": 0.8982976386600768, + "grad_norm": 5.1553850173950195, + "learning_rate": 1.1614660095331991e-05, + "loss": 0.4927, + "step": 32716 + }, + { + "epoch": 0.8983250961010434, + "grad_norm": 0.37210798263549805, + "learning_rate": 1.1614233867604625e-05, + "loss": 0.4139, + "step": 32717 + }, + { + "epoch": 0.8983525535420099, + "grad_norm": 0.37070798873901367, + "learning_rate": 1.1613807636866197e-05, + "loss": 0.4741, + "step": 32718 + }, + { + "epoch": 0.8983800109829764, + "grad_norm": 0.4042204022407532, + "learning_rate": 1.1613381403117506e-05, + "loss": 0.462, + "step": 32719 + }, + { + "epoch": 0.8984074684239429, + "grad_norm": 0.4323727786540985, + "learning_rate": 1.1612955166359338e-05, + "loss": 0.5619, + "step": 32720 + }, + { + "epoch": 0.8984349258649094, + "grad_norm": 0.35440149903297424, + "learning_rate": 1.1612528926592498e-05, + "loss": 0.4523, + "step": 32721 + }, + { + "epoch": 0.8984623833058759, + "grad_norm": 0.40436050295829773, + "learning_rate": 1.1612102683817778e-05, + "loss": 0.4753, + "step": 32722 + }, + { + "epoch": 0.8984898407468423, + "grad_norm": 0.34313881397247314, + "learning_rate": 1.1611676438035969e-05, + "loss": 0.4749, + "step": 32723 + }, + { + "epoch": 0.8985172981878089, + "grad_norm": 0.4020698070526123, + "learning_rate": 1.1611250189247869e-05, + "loss": 0.6204, + "step": 32724 + }, + { + "epoch": 0.8985447556287754, + "grad_norm": 0.35529419779777527, + "learning_rate": 1.1610823937454274e-05, + "loss": 0.4861, + "step": 32725 + }, + { + "epoch": 0.8985722130697419, + "grad_norm": 0.5045909285545349, + "learning_rate": 1.161039768265598e-05, + "loss": 0.5748, + "step": 32726 + }, + { + "epoch": 0.8985996705107084, + "grad_norm": 0.3488752543926239, + "learning_rate": 1.1609971424853779e-05, + "loss": 0.4657, + "step": 32727 + }, + { + "epoch": 0.8986271279516749, + "grad_norm": 0.37972691655158997, + "learning_rate": 1.1609545164048465e-05, + "loss": 0.5077, + "step": 32728 + }, + { + "epoch": 0.8986545853926414, + "grad_norm": 0.3996376693248749, + "learning_rate": 1.160911890024084e-05, + "loss": 0.4433, + "step": 32729 + }, + { + "epoch": 0.8986820428336079, + "grad_norm": 0.35644859075546265, + "learning_rate": 1.1608692633431691e-05, + "loss": 0.4958, + "step": 32730 + }, + { + "epoch": 0.8987095002745744, + "grad_norm": 0.40972837805747986, + "learning_rate": 1.1608266363621819e-05, + "loss": 0.4489, + "step": 32731 + }, + { + "epoch": 0.898736957715541, + "grad_norm": 0.41396191716194153, + "learning_rate": 1.1607840090812017e-05, + "loss": 0.5268, + "step": 32732 + }, + { + "epoch": 0.8987644151565074, + "grad_norm": 0.37637242674827576, + "learning_rate": 1.1607413815003076e-05, + "loss": 0.4525, + "step": 32733 + }, + { + "epoch": 0.898791872597474, + "grad_norm": 0.3816293776035309, + "learning_rate": 1.1606987536195802e-05, + "loss": 0.4843, + "step": 32734 + }, + { + "epoch": 0.8988193300384404, + "grad_norm": 0.41885194182395935, + "learning_rate": 1.160656125439098e-05, + "loss": 0.4069, + "step": 32735 + }, + { + "epoch": 0.8988467874794069, + "grad_norm": 0.37986546754837036, + "learning_rate": 1.1606134969589406e-05, + "loss": 0.4711, + "step": 32736 + }, + { + "epoch": 0.8988742449203734, + "grad_norm": 1.3919717073440552, + "learning_rate": 1.1605708681791881e-05, + "loss": 0.5011, + "step": 32737 + }, + { + "epoch": 0.8989017023613399, + "grad_norm": 0.36967402696609497, + "learning_rate": 1.1605282390999195e-05, + "loss": 0.4047, + "step": 32738 + }, + { + "epoch": 0.8989291598023065, + "grad_norm": 0.422403484582901, + "learning_rate": 1.1604856097212148e-05, + "loss": 0.5106, + "step": 32739 + }, + { + "epoch": 0.8989566172432729, + "grad_norm": 0.40169787406921387, + "learning_rate": 1.1604429800431528e-05, + "loss": 0.4472, + "step": 32740 + }, + { + "epoch": 0.8989840746842395, + "grad_norm": 0.3655209243297577, + "learning_rate": 1.1604003500658137e-05, + "loss": 0.5138, + "step": 32741 + }, + { + "epoch": 0.8990115321252059, + "grad_norm": 0.41175416111946106, + "learning_rate": 1.1603577197892769e-05, + "loss": 0.5225, + "step": 32742 + }, + { + "epoch": 0.8990389895661725, + "grad_norm": 0.4078075587749481, + "learning_rate": 1.1603150892136213e-05, + "loss": 0.4891, + "step": 32743 + }, + { + "epoch": 0.8990664470071389, + "grad_norm": 0.41250982880592346, + "learning_rate": 1.1602724583389272e-05, + "loss": 0.5571, + "step": 32744 + }, + { + "epoch": 0.8990939044481054, + "grad_norm": 0.4074331521987915, + "learning_rate": 1.1602298271652738e-05, + "loss": 0.4976, + "step": 32745 + }, + { + "epoch": 0.899121361889072, + "grad_norm": 0.47156479954719543, + "learning_rate": 1.1601871956927404e-05, + "loss": 0.5694, + "step": 32746 + }, + { + "epoch": 0.8991488193300384, + "grad_norm": 0.3842099606990814, + "learning_rate": 1.1601445639214071e-05, + "loss": 0.4886, + "step": 32747 + }, + { + "epoch": 0.899176276771005, + "grad_norm": 0.38151857256889343, + "learning_rate": 1.160101931851353e-05, + "loss": 0.5246, + "step": 32748 + }, + { + "epoch": 0.8992037342119714, + "grad_norm": 0.4067126214504242, + "learning_rate": 1.1600592994826576e-05, + "loss": 0.5173, + "step": 32749 + }, + { + "epoch": 0.899231191652938, + "grad_norm": 0.40469205379486084, + "learning_rate": 1.1600166668154005e-05, + "loss": 0.5535, + "step": 32750 + }, + { + "epoch": 0.8992586490939044, + "grad_norm": 0.40603646636009216, + "learning_rate": 1.1599740338496612e-05, + "loss": 0.5167, + "step": 32751 + }, + { + "epoch": 0.899286106534871, + "grad_norm": 0.4026154577732086, + "learning_rate": 1.1599314005855196e-05, + "loss": 0.4483, + "step": 32752 + }, + { + "epoch": 0.8993135639758375, + "grad_norm": 0.3870466649532318, + "learning_rate": 1.1598887670230545e-05, + "loss": 0.538, + "step": 32753 + }, + { + "epoch": 0.8993410214168039, + "grad_norm": 0.409155011177063, + "learning_rate": 1.159846133162346e-05, + "loss": 0.5344, + "step": 32754 + }, + { + "epoch": 0.8993684788577705, + "grad_norm": 0.387613981962204, + "learning_rate": 1.1598034990034734e-05, + "loss": 0.5011, + "step": 32755 + }, + { + "epoch": 0.8993959362987369, + "grad_norm": 0.40535223484039307, + "learning_rate": 1.1597608645465161e-05, + "loss": 0.515, + "step": 32756 + }, + { + "epoch": 0.8994233937397035, + "grad_norm": 0.3798578083515167, + "learning_rate": 1.1597182297915542e-05, + "loss": 0.5122, + "step": 32757 + }, + { + "epoch": 0.8994508511806699, + "grad_norm": 0.41384249925613403, + "learning_rate": 1.1596755947386667e-05, + "loss": 0.4713, + "step": 32758 + }, + { + "epoch": 0.8994783086216365, + "grad_norm": 0.37009114027023315, + "learning_rate": 1.159632959387933e-05, + "loss": 0.4991, + "step": 32759 + }, + { + "epoch": 0.899505766062603, + "grad_norm": 0.393777459859848, + "learning_rate": 1.1595903237394333e-05, + "loss": 0.4499, + "step": 32760 + }, + { + "epoch": 0.8995332235035695, + "grad_norm": 0.5197467803955078, + "learning_rate": 1.159547687793246e-05, + "loss": 0.5145, + "step": 32761 + }, + { + "epoch": 0.899560680944536, + "grad_norm": 0.35659635066986084, + "learning_rate": 1.1595050515494521e-05, + "loss": 0.4204, + "step": 32762 + }, + { + "epoch": 0.8995881383855024, + "grad_norm": 0.45321881771087646, + "learning_rate": 1.1594624150081302e-05, + "loss": 0.494, + "step": 32763 + }, + { + "epoch": 0.899615595826469, + "grad_norm": 0.3872327506542206, + "learning_rate": 1.1594197781693597e-05, + "loss": 0.5201, + "step": 32764 + }, + { + "epoch": 0.8996430532674354, + "grad_norm": 0.4133322536945343, + "learning_rate": 1.1593771410332206e-05, + "loss": 0.4897, + "step": 32765 + }, + { + "epoch": 0.899670510708402, + "grad_norm": 0.3595501780509949, + "learning_rate": 1.1593345035997926e-05, + "loss": 0.4461, + "step": 32766 + }, + { + "epoch": 0.8996979681493685, + "grad_norm": 0.48478463292121887, + "learning_rate": 1.1592918658691542e-05, + "loss": 0.4321, + "step": 32767 + }, + { + "epoch": 0.899725425590335, + "grad_norm": 0.38440829515457153, + "learning_rate": 1.159249227841386e-05, + "loss": 0.3926, + "step": 32768 + }, + { + "epoch": 0.8997528830313015, + "grad_norm": 0.34093913435935974, + "learning_rate": 1.1592065895165672e-05, + "loss": 0.4699, + "step": 32769 + }, + { + "epoch": 0.899780340472268, + "grad_norm": 0.37332603335380554, + "learning_rate": 1.1591639508947772e-05, + "loss": 0.5199, + "step": 32770 + }, + { + "epoch": 0.8998077979132345, + "grad_norm": 0.3840343952178955, + "learning_rate": 1.1591213119760957e-05, + "loss": 0.4396, + "step": 32771 + }, + { + "epoch": 0.899835255354201, + "grad_norm": 0.35701295733451843, + "learning_rate": 1.1590786727606022e-05, + "loss": 0.515, + "step": 32772 + }, + { + "epoch": 0.8998627127951675, + "grad_norm": 0.4172215163707733, + "learning_rate": 1.1590360332483763e-05, + "loss": 0.5122, + "step": 32773 + }, + { + "epoch": 0.899890170236134, + "grad_norm": 0.4152337908744812, + "learning_rate": 1.1589933934394971e-05, + "loss": 0.4626, + "step": 32774 + }, + { + "epoch": 0.8999176276771005, + "grad_norm": 0.4170018434524536, + "learning_rate": 1.1589507533340447e-05, + "loss": 0.4433, + "step": 32775 + }, + { + "epoch": 0.899945085118067, + "grad_norm": 0.3722338378429413, + "learning_rate": 1.1589081129320984e-05, + "loss": 0.41, + "step": 32776 + }, + { + "epoch": 0.8999725425590335, + "grad_norm": 0.37589994072914124, + "learning_rate": 1.1588654722337379e-05, + "loss": 0.5707, + "step": 32777 + }, + { + "epoch": 0.9, + "grad_norm": 0.3769027292728424, + "learning_rate": 1.1588228312390423e-05, + "loss": 0.5549, + "step": 32778 + }, + { + "epoch": 0.9000274574409665, + "grad_norm": 0.3728613555431366, + "learning_rate": 1.1587801899480914e-05, + "loss": 0.4893, + "step": 32779 + }, + { + "epoch": 0.900054914881933, + "grad_norm": 0.35271498560905457, + "learning_rate": 1.1587375483609651e-05, + "loss": 0.4424, + "step": 32780 + }, + { + "epoch": 0.9000823723228996, + "grad_norm": 0.48649755120277405, + "learning_rate": 1.1586949064777424e-05, + "loss": 0.4966, + "step": 32781 + }, + { + "epoch": 0.900109829763866, + "grad_norm": 0.37999382615089417, + "learning_rate": 1.1586522642985031e-05, + "loss": 0.4792, + "step": 32782 + }, + { + "epoch": 0.9001372872048325, + "grad_norm": 0.7673463225364685, + "learning_rate": 1.1586096218233266e-05, + "loss": 0.4988, + "step": 32783 + }, + { + "epoch": 0.900164744645799, + "grad_norm": 0.38595688343048096, + "learning_rate": 1.1585669790522929e-05, + "loss": 0.4628, + "step": 32784 + }, + { + "epoch": 0.9001922020867655, + "grad_norm": 0.41306746006011963, + "learning_rate": 1.1585243359854807e-05, + "loss": 0.4877, + "step": 32785 + }, + { + "epoch": 0.900219659527732, + "grad_norm": 0.5601764917373657, + "learning_rate": 1.1584816926229703e-05, + "loss": 0.4857, + "step": 32786 + }, + { + "epoch": 0.9002471169686985, + "grad_norm": 0.44512608647346497, + "learning_rate": 1.1584390489648406e-05, + "loss": 0.5227, + "step": 32787 + }, + { + "epoch": 0.9002745744096651, + "grad_norm": 0.36789053678512573, + "learning_rate": 1.158396405011172e-05, + "loss": 0.4684, + "step": 32788 + }, + { + "epoch": 0.9003020318506315, + "grad_norm": 0.3996233642101288, + "learning_rate": 1.1583537607620435e-05, + "loss": 0.4917, + "step": 32789 + }, + { + "epoch": 0.9003294892915981, + "grad_norm": 0.4338568150997162, + "learning_rate": 1.1583111162175341e-05, + "loss": 0.5649, + "step": 32790 + }, + { + "epoch": 0.9003569467325645, + "grad_norm": 0.40710774064064026, + "learning_rate": 1.1582684713777246e-05, + "loss": 0.5483, + "step": 32791 + }, + { + "epoch": 0.900384404173531, + "grad_norm": 0.40178051590919495, + "learning_rate": 1.1582258262426936e-05, + "loss": 0.4952, + "step": 32792 + }, + { + "epoch": 0.9004118616144975, + "grad_norm": 0.389490008354187, + "learning_rate": 1.158183180812521e-05, + "loss": 0.5928, + "step": 32793 + }, + { + "epoch": 0.900439319055464, + "grad_norm": 0.4393548369407654, + "learning_rate": 1.1581405350872862e-05, + "loss": 0.5137, + "step": 32794 + }, + { + "epoch": 0.9004667764964306, + "grad_norm": 0.462162584066391, + "learning_rate": 1.1580978890670688e-05, + "loss": 0.55, + "step": 32795 + }, + { + "epoch": 0.900494233937397, + "grad_norm": 0.4594816267490387, + "learning_rate": 1.1580552427519486e-05, + "loss": 0.5041, + "step": 32796 + }, + { + "epoch": 0.9005216913783636, + "grad_norm": 0.41510316729545593, + "learning_rate": 1.158012596142005e-05, + "loss": 0.4955, + "step": 32797 + }, + { + "epoch": 0.90054914881933, + "grad_norm": 0.3712053596973419, + "learning_rate": 1.157969949237317e-05, + "loss": 0.4897, + "step": 32798 + }, + { + "epoch": 0.9005766062602966, + "grad_norm": 0.3646470010280609, + "learning_rate": 1.157927302037965e-05, + "loss": 0.4872, + "step": 32799 + }, + { + "epoch": 0.900604063701263, + "grad_norm": 0.3846958577632904, + "learning_rate": 1.157884654544028e-05, + "loss": 0.4434, + "step": 32800 + }, + { + "epoch": 0.9006315211422296, + "grad_norm": 0.6313398480415344, + "learning_rate": 1.157842006755586e-05, + "loss": 0.4735, + "step": 32801 + }, + { + "epoch": 0.9006589785831961, + "grad_norm": 0.38445010781288147, + "learning_rate": 1.157799358672718e-05, + "loss": 0.5022, + "step": 32802 + }, + { + "epoch": 0.9006864360241625, + "grad_norm": 0.33676573634147644, + "learning_rate": 1.157756710295504e-05, + "loss": 0.4022, + "step": 32803 + }, + { + "epoch": 0.9007138934651291, + "grad_norm": 0.4172518849372864, + "learning_rate": 1.1577140616240233e-05, + "loss": 0.4789, + "step": 32804 + }, + { + "epoch": 0.9007413509060955, + "grad_norm": 0.35785675048828125, + "learning_rate": 1.1576714126583555e-05, + "loss": 0.4307, + "step": 32805 + }, + { + "epoch": 0.9007688083470621, + "grad_norm": 0.42172878980636597, + "learning_rate": 1.1576287633985803e-05, + "loss": 0.534, + "step": 32806 + }, + { + "epoch": 0.9007962657880285, + "grad_norm": 0.42864108085632324, + "learning_rate": 1.1575861138447771e-05, + "loss": 0.5053, + "step": 32807 + }, + { + "epoch": 0.9008237232289951, + "grad_norm": 0.4352286756038666, + "learning_rate": 1.1575434639970254e-05, + "loss": 0.5569, + "step": 32808 + }, + { + "epoch": 0.9008511806699615, + "grad_norm": 0.39410310983657837, + "learning_rate": 1.157500813855405e-05, + "loss": 0.4693, + "step": 32809 + }, + { + "epoch": 0.900878638110928, + "grad_norm": 0.3816404640674591, + "learning_rate": 1.157458163419995e-05, + "loss": 0.4723, + "step": 32810 + }, + { + "epoch": 0.9009060955518946, + "grad_norm": 0.3467515707015991, + "learning_rate": 1.1574155126908756e-05, + "loss": 0.4513, + "step": 32811 + }, + { + "epoch": 0.900933552992861, + "grad_norm": 0.5754850506782532, + "learning_rate": 1.157372861668126e-05, + "loss": 0.5277, + "step": 32812 + }, + { + "epoch": 0.9009610104338276, + "grad_norm": 0.42997023463249207, + "learning_rate": 1.1573302103518258e-05, + "loss": 0.4688, + "step": 32813 + }, + { + "epoch": 0.900988467874794, + "grad_norm": 0.4190651476383209, + "learning_rate": 1.1572875587420544e-05, + "loss": 0.4537, + "step": 32814 + }, + { + "epoch": 0.9010159253157606, + "grad_norm": 0.4189777374267578, + "learning_rate": 1.1572449068388919e-05, + "loss": 0.3822, + "step": 32815 + }, + { + "epoch": 0.901043382756727, + "grad_norm": 0.4212717115879059, + "learning_rate": 1.1572022546424168e-05, + "loss": 0.5259, + "step": 32816 + }, + { + "epoch": 0.9010708401976936, + "grad_norm": 0.3902416527271271, + "learning_rate": 1.1571596021527099e-05, + "loss": 0.4915, + "step": 32817 + }, + { + "epoch": 0.9010982976386601, + "grad_norm": 0.3393566906452179, + "learning_rate": 1.1571169493698497e-05, + "loss": 0.3896, + "step": 32818 + }, + { + "epoch": 0.9011257550796266, + "grad_norm": 0.38766077160835266, + "learning_rate": 1.1570742962939168e-05, + "loss": 0.511, + "step": 32819 + }, + { + "epoch": 0.9011532125205931, + "grad_norm": 0.36420729756355286, + "learning_rate": 1.15703164292499e-05, + "loss": 0.4633, + "step": 32820 + }, + { + "epoch": 0.9011806699615595, + "grad_norm": 0.4324902296066284, + "learning_rate": 1.1569889892631488e-05, + "loss": 0.4692, + "step": 32821 + }, + { + "epoch": 0.9012081274025261, + "grad_norm": 0.44995298981666565, + "learning_rate": 1.156946335308473e-05, + "loss": 0.5115, + "step": 32822 + }, + { + "epoch": 0.9012355848434925, + "grad_norm": 0.34962910413742065, + "learning_rate": 1.1569036810610426e-05, + "loss": 0.3849, + "step": 32823 + }, + { + "epoch": 0.9012630422844591, + "grad_norm": 0.4184189736843109, + "learning_rate": 1.1568610265209365e-05, + "loss": 0.4699, + "step": 32824 + }, + { + "epoch": 0.9012904997254256, + "grad_norm": 0.4056295156478882, + "learning_rate": 1.1568183716882346e-05, + "loss": 0.4749, + "step": 32825 + }, + { + "epoch": 0.9013179571663921, + "grad_norm": 0.43671050667762756, + "learning_rate": 1.1567757165630163e-05, + "loss": 0.4831, + "step": 32826 + }, + { + "epoch": 0.9013454146073586, + "grad_norm": 0.43156298995018005, + "learning_rate": 1.1567330611453614e-05, + "loss": 0.4349, + "step": 32827 + }, + { + "epoch": 0.9013728720483251, + "grad_norm": 0.41672977805137634, + "learning_rate": 1.1566904054353491e-05, + "loss": 0.4308, + "step": 32828 + }, + { + "epoch": 0.9014003294892916, + "grad_norm": 0.4665127694606781, + "learning_rate": 1.1566477494330592e-05, + "loss": 0.5467, + "step": 32829 + }, + { + "epoch": 0.901427786930258, + "grad_norm": 0.4095206558704376, + "learning_rate": 1.1566050931385715e-05, + "loss": 0.5159, + "step": 32830 + }, + { + "epoch": 0.9014552443712246, + "grad_norm": 0.4141923189163208, + "learning_rate": 1.1565624365519652e-05, + "loss": 0.4757, + "step": 32831 + }, + { + "epoch": 0.9014827018121911, + "grad_norm": 0.4334023594856262, + "learning_rate": 1.1565197796733201e-05, + "loss": 0.567, + "step": 32832 + }, + { + "epoch": 0.9015101592531576, + "grad_norm": 0.3434852659702301, + "learning_rate": 1.1564771225027156e-05, + "loss": 0.5069, + "step": 32833 + }, + { + "epoch": 0.9015376166941241, + "grad_norm": 0.4589049518108368, + "learning_rate": 1.156434465040231e-05, + "loss": 0.5403, + "step": 32834 + }, + { + "epoch": 0.9015650741350906, + "grad_norm": 0.4140706956386566, + "learning_rate": 1.1563918072859466e-05, + "loss": 0.4555, + "step": 32835 + }, + { + "epoch": 0.9015925315760571, + "grad_norm": 0.44501492381095886, + "learning_rate": 1.1563491492399413e-05, + "loss": 0.369, + "step": 32836 + }, + { + "epoch": 0.9016199890170236, + "grad_norm": 0.4057570695877075, + "learning_rate": 1.156306490902295e-05, + "loss": 0.4657, + "step": 32837 + }, + { + "epoch": 0.9016474464579901, + "grad_norm": 0.3512583374977112, + "learning_rate": 1.1562638322730875e-05, + "loss": 0.3969, + "step": 32838 + }, + { + "epoch": 0.9016749038989567, + "grad_norm": 0.6557492017745972, + "learning_rate": 1.1562211733523976e-05, + "loss": 0.481, + "step": 32839 + }, + { + "epoch": 0.9017023613399231, + "grad_norm": 0.39824166893959045, + "learning_rate": 1.1561785141403057e-05, + "loss": 0.4728, + "step": 32840 + }, + { + "epoch": 0.9017298187808896, + "grad_norm": 0.39168581366539, + "learning_rate": 1.1561358546368907e-05, + "loss": 0.4645, + "step": 32841 + }, + { + "epoch": 0.9017572762218561, + "grad_norm": 0.3670452833175659, + "learning_rate": 1.156093194842233e-05, + "loss": 0.3855, + "step": 32842 + }, + { + "epoch": 0.9017847336628226, + "grad_norm": 0.4192328155040741, + "learning_rate": 1.1560505347564113e-05, + "loss": 0.4502, + "step": 32843 + }, + { + "epoch": 0.9018121911037891, + "grad_norm": 0.4151935279369354, + "learning_rate": 1.1560078743795055e-05, + "loss": 0.5038, + "step": 32844 + }, + { + "epoch": 0.9018396485447556, + "grad_norm": 0.5091087222099304, + "learning_rate": 1.1559652137115953e-05, + "loss": 0.5163, + "step": 32845 + }, + { + "epoch": 0.9018671059857222, + "grad_norm": 0.4174017310142517, + "learning_rate": 1.1559225527527604e-05, + "loss": 0.5316, + "step": 32846 + }, + { + "epoch": 0.9018945634266886, + "grad_norm": 0.42163360118865967, + "learning_rate": 1.15587989150308e-05, + "loss": 0.5449, + "step": 32847 + }, + { + "epoch": 0.9019220208676552, + "grad_norm": 0.3756042718887329, + "learning_rate": 1.1558372299626339e-05, + "loss": 0.4562, + "step": 32848 + }, + { + "epoch": 0.9019494783086216, + "grad_norm": 0.4711083471775055, + "learning_rate": 1.1557945681315015e-05, + "loss": 0.4328, + "step": 32849 + }, + { + "epoch": 0.9019769357495881, + "grad_norm": 0.39452067017555237, + "learning_rate": 1.1557519060097626e-05, + "loss": 0.4663, + "step": 32850 + }, + { + "epoch": 0.9020043931905546, + "grad_norm": 0.4209335446357727, + "learning_rate": 1.1557092435974968e-05, + "loss": 0.5205, + "step": 32851 + }, + { + "epoch": 0.9020318506315211, + "grad_norm": 0.3873645067214966, + "learning_rate": 1.1556665808947834e-05, + "loss": 0.456, + "step": 32852 + }, + { + "epoch": 0.9020593080724877, + "grad_norm": 0.3915815055370331, + "learning_rate": 1.1556239179017021e-05, + "loss": 0.5009, + "step": 32853 + }, + { + "epoch": 0.9020867655134541, + "grad_norm": 0.5327669978141785, + "learning_rate": 1.1555812546183324e-05, + "loss": 0.5084, + "step": 32854 + }, + { + "epoch": 0.9021142229544207, + "grad_norm": 0.4577653110027313, + "learning_rate": 1.1555385910447545e-05, + "loss": 0.5065, + "step": 32855 + }, + { + "epoch": 0.9021416803953871, + "grad_norm": 0.4147915542125702, + "learning_rate": 1.1554959271810474e-05, + "loss": 0.4882, + "step": 32856 + }, + { + "epoch": 0.9021691378363537, + "grad_norm": 0.44013750553131104, + "learning_rate": 1.1554532630272903e-05, + "loss": 0.5654, + "step": 32857 + }, + { + "epoch": 0.9021965952773201, + "grad_norm": 0.41192954778671265, + "learning_rate": 1.1554105985835635e-05, + "loss": 0.5268, + "step": 32858 + }, + { + "epoch": 0.9022240527182867, + "grad_norm": 0.38591504096984863, + "learning_rate": 1.1553679338499467e-05, + "loss": 0.5358, + "step": 32859 + }, + { + "epoch": 0.9022515101592532, + "grad_norm": 0.3745200037956238, + "learning_rate": 1.1553252688265183e-05, + "loss": 0.4868, + "step": 32860 + }, + { + "epoch": 0.9022789676002196, + "grad_norm": 0.3922201991081238, + "learning_rate": 1.1552826035133595e-05, + "loss": 0.4588, + "step": 32861 + }, + { + "epoch": 0.9023064250411862, + "grad_norm": 0.42202863097190857, + "learning_rate": 1.1552399379105486e-05, + "loss": 0.4834, + "step": 32862 + }, + { + "epoch": 0.9023338824821526, + "grad_norm": 0.3742457330226898, + "learning_rate": 1.155197272018166e-05, + "loss": 0.5273, + "step": 32863 + }, + { + "epoch": 0.9023613399231192, + "grad_norm": 0.4507403075695038, + "learning_rate": 1.1551546058362908e-05, + "loss": 0.5198, + "step": 32864 + }, + { + "epoch": 0.9023887973640856, + "grad_norm": 0.3837156593799591, + "learning_rate": 1.1551119393650025e-05, + "loss": 0.4969, + "step": 32865 + }, + { + "epoch": 0.9024162548050522, + "grad_norm": 0.37271416187286377, + "learning_rate": 1.1550692726043812e-05, + "loss": 0.4556, + "step": 32866 + }, + { + "epoch": 0.9024437122460187, + "grad_norm": 0.4330938458442688, + "learning_rate": 1.155026605554506e-05, + "loss": 0.5093, + "step": 32867 + }, + { + "epoch": 0.9024711696869852, + "grad_norm": 0.6025408506393433, + "learning_rate": 1.1549839382154572e-05, + "loss": 0.5015, + "step": 32868 + }, + { + "epoch": 0.9024986271279517, + "grad_norm": 0.37133339047431946, + "learning_rate": 1.1549412705873136e-05, + "loss": 0.4664, + "step": 32869 + }, + { + "epoch": 0.9025260845689181, + "grad_norm": 0.43310031294822693, + "learning_rate": 1.1548986026701549e-05, + "loss": 0.5061, + "step": 32870 + }, + { + "epoch": 0.9025535420098847, + "grad_norm": 0.39530283212661743, + "learning_rate": 1.154855934464061e-05, + "loss": 0.4724, + "step": 32871 + }, + { + "epoch": 0.9025809994508511, + "grad_norm": 0.353142648935318, + "learning_rate": 1.1548132659691114e-05, + "loss": 0.4358, + "step": 32872 + }, + { + "epoch": 0.9026084568918177, + "grad_norm": 0.35216620564460754, + "learning_rate": 1.1547705971853855e-05, + "loss": 0.4821, + "step": 32873 + }, + { + "epoch": 0.9026359143327842, + "grad_norm": 0.3865971565246582, + "learning_rate": 1.1547279281129633e-05, + "loss": 0.519, + "step": 32874 + }, + { + "epoch": 0.9026633717737507, + "grad_norm": 0.48260316252708435, + "learning_rate": 1.154685258751924e-05, + "loss": 0.489, + "step": 32875 + }, + { + "epoch": 0.9026908292147172, + "grad_norm": 0.4394557774066925, + "learning_rate": 1.1546425891023471e-05, + "loss": 0.4473, + "step": 32876 + }, + { + "epoch": 0.9027182866556837, + "grad_norm": 0.45452961325645447, + "learning_rate": 1.1545999191643127e-05, + "loss": 0.5361, + "step": 32877 + }, + { + "epoch": 0.9027457440966502, + "grad_norm": 0.40256431698799133, + "learning_rate": 1.1545572489379e-05, + "loss": 0.5123, + "step": 32878 + }, + { + "epoch": 0.9027732015376166, + "grad_norm": 0.3568284809589386, + "learning_rate": 1.1545145784231889e-05, + "loss": 0.4567, + "step": 32879 + }, + { + "epoch": 0.9028006589785832, + "grad_norm": 0.39027783274650574, + "learning_rate": 1.1544719076202584e-05, + "loss": 0.5149, + "step": 32880 + }, + { + "epoch": 0.9028281164195497, + "grad_norm": 0.42398491501808167, + "learning_rate": 1.154429236529189e-05, + "loss": 0.477, + "step": 32881 + }, + { + "epoch": 0.9028555738605162, + "grad_norm": 0.37882938981056213, + "learning_rate": 1.1543865651500593e-05, + "loss": 0.4288, + "step": 32882 + }, + { + "epoch": 0.9028830313014827, + "grad_norm": 0.4132899045944214, + "learning_rate": 1.1543438934829495e-05, + "loss": 0.5506, + "step": 32883 + }, + { + "epoch": 0.9029104887424492, + "grad_norm": 0.3987480401992798, + "learning_rate": 1.154301221527939e-05, + "loss": 0.5557, + "step": 32884 + }, + { + "epoch": 0.9029379461834157, + "grad_norm": 0.5017302632331848, + "learning_rate": 1.1542585492851076e-05, + "loss": 0.483, + "step": 32885 + }, + { + "epoch": 0.9029654036243822, + "grad_norm": 0.34274178743362427, + "learning_rate": 1.154215876754535e-05, + "loss": 0.4593, + "step": 32886 + }, + { + "epoch": 0.9029928610653487, + "grad_norm": 0.48922625184059143, + "learning_rate": 1.1541732039363003e-05, + "loss": 0.4626, + "step": 32887 + }, + { + "epoch": 0.9030203185063153, + "grad_norm": 0.39497941732406616, + "learning_rate": 1.1541305308304835e-05, + "loss": 0.4256, + "step": 32888 + }, + { + "epoch": 0.9030477759472817, + "grad_norm": 0.43135184049606323, + "learning_rate": 1.1540878574371639e-05, + "loss": 0.4747, + "step": 32889 + }, + { + "epoch": 0.9030752333882482, + "grad_norm": 0.3749432861804962, + "learning_rate": 1.1540451837564215e-05, + "loss": 0.4414, + "step": 32890 + }, + { + "epoch": 0.9031026908292147, + "grad_norm": 0.3845439553260803, + "learning_rate": 1.1540025097883356e-05, + "loss": 0.4969, + "step": 32891 + }, + { + "epoch": 0.9031301482701812, + "grad_norm": 0.3509191572666168, + "learning_rate": 1.1539598355329857e-05, + "loss": 0.4094, + "step": 32892 + }, + { + "epoch": 0.9031576057111477, + "grad_norm": 0.42373889684677124, + "learning_rate": 1.1539171609904519e-05, + "loss": 0.523, + "step": 32893 + }, + { + "epoch": 0.9031850631521142, + "grad_norm": 0.5177987217903137, + "learning_rate": 1.1538744861608132e-05, + "loss": 0.4096, + "step": 32894 + }, + { + "epoch": 0.9032125205930808, + "grad_norm": 0.40102165937423706, + "learning_rate": 1.1538318110441494e-05, + "loss": 0.5546, + "step": 32895 + }, + { + "epoch": 0.9032399780340472, + "grad_norm": 0.3825879395008087, + "learning_rate": 1.1537891356405403e-05, + "loss": 0.5111, + "step": 32896 + }, + { + "epoch": 0.9032674354750138, + "grad_norm": 0.38967806100845337, + "learning_rate": 1.1537464599500654e-05, + "loss": 0.4187, + "step": 32897 + }, + { + "epoch": 0.9032948929159802, + "grad_norm": 0.3750987648963928, + "learning_rate": 1.1537037839728043e-05, + "loss": 0.4756, + "step": 32898 + }, + { + "epoch": 0.9033223503569467, + "grad_norm": 0.4920770227909088, + "learning_rate": 1.1536611077088364e-05, + "loss": 0.4632, + "step": 32899 + }, + { + "epoch": 0.9033498077979132, + "grad_norm": 0.40388619899749756, + "learning_rate": 1.1536184311582417e-05, + "loss": 0.4709, + "step": 32900 + }, + { + "epoch": 0.9033772652388797, + "grad_norm": 0.39074090123176575, + "learning_rate": 1.1535757543210995e-05, + "loss": 0.4544, + "step": 32901 + }, + { + "epoch": 0.9034047226798463, + "grad_norm": 0.3648351728916168, + "learning_rate": 1.1535330771974895e-05, + "loss": 0.5118, + "step": 32902 + }, + { + "epoch": 0.9034321801208127, + "grad_norm": 0.3560994565486908, + "learning_rate": 1.1534903997874916e-05, + "loss": 0.4601, + "step": 32903 + }, + { + "epoch": 0.9034596375617793, + "grad_norm": 0.3732188045978546, + "learning_rate": 1.1534477220911846e-05, + "loss": 0.5302, + "step": 32904 + }, + { + "epoch": 0.9034870950027457, + "grad_norm": 0.36562395095825195, + "learning_rate": 1.153405044108649e-05, + "loss": 0.3998, + "step": 32905 + }, + { + "epoch": 0.9035145524437123, + "grad_norm": 0.48745468258857727, + "learning_rate": 1.1533623658399637e-05, + "loss": 0.4606, + "step": 32906 + }, + { + "epoch": 0.9035420098846787, + "grad_norm": 0.3853543698787689, + "learning_rate": 1.1533196872852091e-05, + "loss": 0.4803, + "step": 32907 + }, + { + "epoch": 0.9035694673256452, + "grad_norm": 0.40514639019966125, + "learning_rate": 1.153277008444464e-05, + "loss": 0.5204, + "step": 32908 + }, + { + "epoch": 0.9035969247666118, + "grad_norm": 0.3845973014831543, + "learning_rate": 1.1532343293178084e-05, + "loss": 0.5801, + "step": 32909 + }, + { + "epoch": 0.9036243822075782, + "grad_norm": 0.37984955310821533, + "learning_rate": 1.153191649905322e-05, + "loss": 0.4962, + "step": 32910 + }, + { + "epoch": 0.9036518396485448, + "grad_norm": 0.3460088074207306, + "learning_rate": 1.1531489702070842e-05, + "loss": 0.4586, + "step": 32911 + }, + { + "epoch": 0.9036792970895112, + "grad_norm": 0.3737717568874359, + "learning_rate": 1.1531062902231746e-05, + "loss": 0.4167, + "step": 32912 + }, + { + "epoch": 0.9037067545304778, + "grad_norm": 0.4000113308429718, + "learning_rate": 1.1530636099536732e-05, + "loss": 0.5129, + "step": 32913 + }, + { + "epoch": 0.9037342119714442, + "grad_norm": 0.4116954207420349, + "learning_rate": 1.1530209293986589e-05, + "loss": 0.4977, + "step": 32914 + }, + { + "epoch": 0.9037616694124108, + "grad_norm": 0.3910031318664551, + "learning_rate": 1.1529782485582122e-05, + "loss": 0.4666, + "step": 32915 + }, + { + "epoch": 0.9037891268533773, + "grad_norm": 0.41079604625701904, + "learning_rate": 1.1529355674324117e-05, + "loss": 0.4753, + "step": 32916 + }, + { + "epoch": 0.9038165842943438, + "grad_norm": 0.3570103049278259, + "learning_rate": 1.152892886021338e-05, + "loss": 0.4743, + "step": 32917 + }, + { + "epoch": 0.9038440417353103, + "grad_norm": 0.42445775866508484, + "learning_rate": 1.1528502043250702e-05, + "loss": 0.5181, + "step": 32918 + }, + { + "epoch": 0.9038714991762767, + "grad_norm": 0.5094816088676453, + "learning_rate": 1.1528075223436877e-05, + "loss": 0.5479, + "step": 32919 + }, + { + "epoch": 0.9038989566172433, + "grad_norm": 0.42870694398880005, + "learning_rate": 1.1527648400772708e-05, + "loss": 0.49, + "step": 32920 + }, + { + "epoch": 0.9039264140582097, + "grad_norm": 0.39834684133529663, + "learning_rate": 1.1527221575258985e-05, + "loss": 0.5414, + "step": 32921 + }, + { + "epoch": 0.9039538714991763, + "grad_norm": 0.34285876154899597, + "learning_rate": 1.1526794746896505e-05, + "loss": 0.4484, + "step": 32922 + }, + { + "epoch": 0.9039813289401428, + "grad_norm": 0.4397629499435425, + "learning_rate": 1.1526367915686067e-05, + "loss": 0.4998, + "step": 32923 + }, + { + "epoch": 0.9040087863811093, + "grad_norm": 0.42304080724716187, + "learning_rate": 1.1525941081628467e-05, + "loss": 0.452, + "step": 32924 + }, + { + "epoch": 0.9040362438220758, + "grad_norm": 0.379070520401001, + "learning_rate": 1.1525514244724498e-05, + "loss": 0.5259, + "step": 32925 + }, + { + "epoch": 0.9040637012630423, + "grad_norm": 0.40844911336898804, + "learning_rate": 1.152508740497496e-05, + "loss": 0.4711, + "step": 32926 + }, + { + "epoch": 0.9040911587040088, + "grad_norm": 0.36439648270606995, + "learning_rate": 1.1524660562380645e-05, + "loss": 0.4361, + "step": 32927 + }, + { + "epoch": 0.9041186161449752, + "grad_norm": 0.4355107545852661, + "learning_rate": 1.1524233716942352e-05, + "loss": 0.5593, + "step": 32928 + }, + { + "epoch": 0.9041460735859418, + "grad_norm": 0.3967171013355255, + "learning_rate": 1.1523806868660876e-05, + "loss": 0.5241, + "step": 32929 + }, + { + "epoch": 0.9041735310269083, + "grad_norm": 0.3897009491920471, + "learning_rate": 1.1523380017537017e-05, + "loss": 0.4738, + "step": 32930 + }, + { + "epoch": 0.9042009884678748, + "grad_norm": 0.4444959759712219, + "learning_rate": 1.1522953163571564e-05, + "loss": 0.5064, + "step": 32931 + }, + { + "epoch": 0.9042284459088413, + "grad_norm": 0.3896864354610443, + "learning_rate": 1.1522526306765318e-05, + "loss": 0.5227, + "step": 32932 + }, + { + "epoch": 0.9042559033498078, + "grad_norm": 0.37202221155166626, + "learning_rate": 1.1522099447119077e-05, + "loss": 0.5338, + "step": 32933 + }, + { + "epoch": 0.9042833607907743, + "grad_norm": 0.42200562357902527, + "learning_rate": 1.1521672584633631e-05, + "loss": 0.5341, + "step": 32934 + }, + { + "epoch": 0.9043108182317408, + "grad_norm": 0.4063025414943695, + "learning_rate": 1.1521245719309783e-05, + "loss": 0.4567, + "step": 32935 + }, + { + "epoch": 0.9043382756727073, + "grad_norm": 0.3777836859226227, + "learning_rate": 1.1520818851148327e-05, + "loss": 0.4761, + "step": 32936 + }, + { + "epoch": 0.9043657331136739, + "grad_norm": 0.40472519397735596, + "learning_rate": 1.1520391980150056e-05, + "loss": 0.5333, + "step": 32937 + }, + { + "epoch": 0.9043931905546403, + "grad_norm": 0.43095266819000244, + "learning_rate": 1.1519965106315771e-05, + "loss": 0.5037, + "step": 32938 + }, + { + "epoch": 0.9044206479956068, + "grad_norm": 0.37650883197784424, + "learning_rate": 1.1519538229646266e-05, + "loss": 0.4781, + "step": 32939 + }, + { + "epoch": 0.9044481054365733, + "grad_norm": 0.404582679271698, + "learning_rate": 1.1519111350142334e-05, + "loss": 0.4849, + "step": 32940 + }, + { + "epoch": 0.9044755628775398, + "grad_norm": 0.41306978464126587, + "learning_rate": 1.1518684467804777e-05, + "loss": 0.5527, + "step": 32941 + }, + { + "epoch": 0.9045030203185063, + "grad_norm": 0.37526121735572815, + "learning_rate": 1.1518257582634386e-05, + "loss": 0.4938, + "step": 32942 + }, + { + "epoch": 0.9045304777594728, + "grad_norm": 0.39458754658699036, + "learning_rate": 1.1517830694631964e-05, + "loss": 0.4854, + "step": 32943 + }, + { + "epoch": 0.9045579352004394, + "grad_norm": 0.41725480556488037, + "learning_rate": 1.1517403803798301e-05, + "loss": 0.4677, + "step": 32944 + }, + { + "epoch": 0.9045853926414058, + "grad_norm": 0.3576461970806122, + "learning_rate": 1.1516976910134197e-05, + "loss": 0.4947, + "step": 32945 + }, + { + "epoch": 0.9046128500823724, + "grad_norm": 0.36625936627388, + "learning_rate": 1.1516550013640447e-05, + "loss": 0.397, + "step": 32946 + }, + { + "epoch": 0.9046403075233388, + "grad_norm": 0.49743062257766724, + "learning_rate": 1.1516123114317843e-05, + "loss": 0.5405, + "step": 32947 + }, + { + "epoch": 0.9046677649643053, + "grad_norm": 0.4104597866535187, + "learning_rate": 1.1515696212167189e-05, + "loss": 0.5287, + "step": 32948 + }, + { + "epoch": 0.9046952224052718, + "grad_norm": 0.43194350600242615, + "learning_rate": 1.1515269307189278e-05, + "loss": 0.4088, + "step": 32949 + }, + { + "epoch": 0.9047226798462383, + "grad_norm": 0.4039981961250305, + "learning_rate": 1.1514842399384905e-05, + "loss": 0.548, + "step": 32950 + }, + { + "epoch": 0.9047501372872049, + "grad_norm": 0.4241907298564911, + "learning_rate": 1.1514415488754869e-05, + "loss": 0.538, + "step": 32951 + }, + { + "epoch": 0.9047775947281713, + "grad_norm": 0.3832662105560303, + "learning_rate": 1.1513988575299966e-05, + "loss": 0.5216, + "step": 32952 + }, + { + "epoch": 0.9048050521691379, + "grad_norm": 0.3564370572566986, + "learning_rate": 1.1513561659020987e-05, + "loss": 0.5423, + "step": 32953 + }, + { + "epoch": 0.9048325096101043, + "grad_norm": 0.4619560241699219, + "learning_rate": 1.1513134739918734e-05, + "loss": 0.5836, + "step": 32954 + }, + { + "epoch": 0.9048599670510709, + "grad_norm": 0.4107954800128937, + "learning_rate": 1.1512707817994001e-05, + "loss": 0.4686, + "step": 32955 + }, + { + "epoch": 0.9048874244920373, + "grad_norm": 0.42462560534477234, + "learning_rate": 1.1512280893247587e-05, + "loss": 0.5381, + "step": 32956 + }, + { + "epoch": 0.9049148819330038, + "grad_norm": 0.5784880518913269, + "learning_rate": 1.1511853965680287e-05, + "loss": 0.5949, + "step": 32957 + }, + { + "epoch": 0.9049423393739704, + "grad_norm": 0.3743045926094055, + "learning_rate": 1.1511427035292895e-05, + "loss": 0.4741, + "step": 32958 + }, + { + "epoch": 0.9049697968149368, + "grad_norm": 0.38537949323654175, + "learning_rate": 1.1511000102086209e-05, + "loss": 0.548, + "step": 32959 + }, + { + "epoch": 0.9049972542559034, + "grad_norm": 0.46489158272743225, + "learning_rate": 1.1510573166061025e-05, + "loss": 0.5181, + "step": 32960 + }, + { + "epoch": 0.9050247116968698, + "grad_norm": 0.4267701506614685, + "learning_rate": 1.1510146227218143e-05, + "loss": 0.5751, + "step": 32961 + }, + { + "epoch": 0.9050521691378364, + "grad_norm": 0.40937256813049316, + "learning_rate": 1.1509719285558357e-05, + "loss": 0.4276, + "step": 32962 + }, + { + "epoch": 0.9050796265788028, + "grad_norm": 0.4053786098957062, + "learning_rate": 1.1509292341082456e-05, + "loss": 0.4718, + "step": 32963 + }, + { + "epoch": 0.9051070840197694, + "grad_norm": 0.6522948145866394, + "learning_rate": 1.1508865393791248e-05, + "loss": 0.5772, + "step": 32964 + }, + { + "epoch": 0.9051345414607359, + "grad_norm": 0.4114609360694885, + "learning_rate": 1.1508438443685523e-05, + "loss": 0.4913, + "step": 32965 + }, + { + "epoch": 0.9051619989017023, + "grad_norm": 0.4251357316970825, + "learning_rate": 1.150801149076608e-05, + "loss": 0.5286, + "step": 32966 + }, + { + "epoch": 0.9051894563426689, + "grad_norm": 0.4173600971698761, + "learning_rate": 1.1507584535033715e-05, + "loss": 0.4648, + "step": 32967 + }, + { + "epoch": 0.9052169137836353, + "grad_norm": 0.4281451404094696, + "learning_rate": 1.150715757648922e-05, + "loss": 0.5118, + "step": 32968 + }, + { + "epoch": 0.9052443712246019, + "grad_norm": 0.42779749631881714, + "learning_rate": 1.1506730615133402e-05, + "loss": 0.4871, + "step": 32969 + }, + { + "epoch": 0.9052718286655683, + "grad_norm": 0.38229596614837646, + "learning_rate": 1.1506303650967046e-05, + "loss": 0.4882, + "step": 32970 + }, + { + "epoch": 0.9052992861065349, + "grad_norm": 0.3938492238521576, + "learning_rate": 1.150587668399095e-05, + "loss": 0.4867, + "step": 32971 + }, + { + "epoch": 0.9053267435475014, + "grad_norm": 0.3812597990036011, + "learning_rate": 1.1505449714205918e-05, + "loss": 0.5283, + "step": 32972 + }, + { + "epoch": 0.9053542009884679, + "grad_norm": 0.4416571259498596, + "learning_rate": 1.1505022741612739e-05, + "loss": 0.5223, + "step": 32973 + }, + { + "epoch": 0.9053816584294344, + "grad_norm": 0.42086246609687805, + "learning_rate": 1.1504595766212214e-05, + "loss": 0.5976, + "step": 32974 + }, + { + "epoch": 0.9054091158704008, + "grad_norm": 0.36024028062820435, + "learning_rate": 1.1504168788005139e-05, + "loss": 0.4152, + "step": 32975 + }, + { + "epoch": 0.9054365733113674, + "grad_norm": 0.4255084693431854, + "learning_rate": 1.1503741806992306e-05, + "loss": 0.526, + "step": 32976 + }, + { + "epoch": 0.9054640307523338, + "grad_norm": 0.4079798758029938, + "learning_rate": 1.1503314823174516e-05, + "loss": 0.4006, + "step": 32977 + }, + { + "epoch": 0.9054914881933004, + "grad_norm": 0.41032978892326355, + "learning_rate": 1.1502887836552564e-05, + "loss": 0.4013, + "step": 32978 + }, + { + "epoch": 0.9055189456342669, + "grad_norm": 0.3880426585674286, + "learning_rate": 1.1502460847127245e-05, + "loss": 0.5364, + "step": 32979 + }, + { + "epoch": 0.9055464030752334, + "grad_norm": 0.38841116428375244, + "learning_rate": 1.1502033854899359e-05, + "loss": 0.4574, + "step": 32980 + }, + { + "epoch": 0.9055738605161999, + "grad_norm": 0.4404136538505554, + "learning_rate": 1.1501606859869701e-05, + "loss": 0.5541, + "step": 32981 + }, + { + "epoch": 0.9056013179571664, + "grad_norm": 0.4262426793575287, + "learning_rate": 1.1501179862039064e-05, + "loss": 0.4072, + "step": 32982 + }, + { + "epoch": 0.9056287753981329, + "grad_norm": 0.3924223482608795, + "learning_rate": 1.150075286140825e-05, + "loss": 0.4274, + "step": 32983 + }, + { + "epoch": 0.9056562328390994, + "grad_norm": 0.41102784872055054, + "learning_rate": 1.1500325857978052e-05, + "loss": 0.4257, + "step": 32984 + }, + { + "epoch": 0.9056836902800659, + "grad_norm": 0.4040723443031311, + "learning_rate": 1.149989885174927e-05, + "loss": 0.558, + "step": 32985 + }, + { + "epoch": 0.9057111477210325, + "grad_norm": 0.49618181586265564, + "learning_rate": 1.1499471842722694e-05, + "loss": 0.5083, + "step": 32986 + }, + { + "epoch": 0.9057386051619989, + "grad_norm": 0.4482768774032593, + "learning_rate": 1.1499044830899126e-05, + "loss": 0.5094, + "step": 32987 + }, + { + "epoch": 0.9057660626029654, + "grad_norm": 0.37323126196861267, + "learning_rate": 1.1498617816279363e-05, + "loss": 0.4034, + "step": 32988 + }, + { + "epoch": 0.9057935200439319, + "grad_norm": 0.39099809527397156, + "learning_rate": 1.1498190798864195e-05, + "loss": 0.4955, + "step": 32989 + }, + { + "epoch": 0.9058209774848984, + "grad_norm": 0.38071009516716003, + "learning_rate": 1.1497763778654428e-05, + "loss": 0.4645, + "step": 32990 + }, + { + "epoch": 0.9058484349258649, + "grad_norm": 0.3178936541080475, + "learning_rate": 1.1497336755650848e-05, + "loss": 0.3868, + "step": 32991 + }, + { + "epoch": 0.9058758923668314, + "grad_norm": 0.41623392701148987, + "learning_rate": 1.149690972985426e-05, + "loss": 0.4889, + "step": 32992 + }, + { + "epoch": 0.905903349807798, + "grad_norm": 0.379284530878067, + "learning_rate": 1.1496482701265458e-05, + "loss": 0.5209, + "step": 32993 + }, + { + "epoch": 0.9059308072487644, + "grad_norm": 0.3935716152191162, + "learning_rate": 1.1496055669885237e-05, + "loss": 0.4199, + "step": 32994 + }, + { + "epoch": 0.905958264689731, + "grad_norm": 0.45475319027900696, + "learning_rate": 1.1495628635714397e-05, + "loss": 0.5454, + "step": 32995 + }, + { + "epoch": 0.9059857221306974, + "grad_norm": 0.3842626214027405, + "learning_rate": 1.1495201598753729e-05, + "loss": 0.5389, + "step": 32996 + }, + { + "epoch": 0.9060131795716639, + "grad_norm": 0.4071104824542999, + "learning_rate": 1.1494774559004036e-05, + "loss": 0.5542, + "step": 32997 + }, + { + "epoch": 0.9060406370126304, + "grad_norm": 0.41031256318092346, + "learning_rate": 1.1494347516466111e-05, + "loss": 0.5407, + "step": 32998 + }, + { + "epoch": 0.9060680944535969, + "grad_norm": 0.4070524275302887, + "learning_rate": 1.1493920471140747e-05, + "loss": 0.497, + "step": 32999 + }, + { + "epoch": 0.9060955518945635, + "grad_norm": 0.3654446303844452, + "learning_rate": 1.149349342302875e-05, + "loss": 0.5342, + "step": 33000 + }, + { + "epoch": 0.9061230093355299, + "grad_norm": 0.4330393075942993, + "learning_rate": 1.1493066372130907e-05, + "loss": 0.4364, + "step": 33001 + }, + { + "epoch": 0.9061504667764965, + "grad_norm": 0.3173982799053192, + "learning_rate": 1.149263931844802e-05, + "loss": 0.3231, + "step": 33002 + }, + { + "epoch": 0.9061779242174629, + "grad_norm": 0.41309812664985657, + "learning_rate": 1.1492212261980886e-05, + "loss": 0.5254, + "step": 33003 + }, + { + "epoch": 0.9062053816584295, + "grad_norm": 0.3755654990673065, + "learning_rate": 1.1491785202730297e-05, + "loss": 0.4709, + "step": 33004 + }, + { + "epoch": 0.9062328390993959, + "grad_norm": 0.3582751154899597, + "learning_rate": 1.1491358140697053e-05, + "loss": 0.5126, + "step": 33005 + }, + { + "epoch": 0.9062602965403624, + "grad_norm": 0.42414960265159607, + "learning_rate": 1.1490931075881953e-05, + "loss": 0.3944, + "step": 33006 + }, + { + "epoch": 0.906287753981329, + "grad_norm": 0.44700804352760315, + "learning_rate": 1.1490504008285785e-05, + "loss": 0.485, + "step": 33007 + }, + { + "epoch": 0.9063152114222954, + "grad_norm": 0.36089470982551575, + "learning_rate": 1.1490076937909355e-05, + "loss": 0.5075, + "step": 33008 + }, + { + "epoch": 0.906342668863262, + "grad_norm": 0.42052751779556274, + "learning_rate": 1.1489649864753456e-05, + "loss": 0.3861, + "step": 33009 + }, + { + "epoch": 0.9063701263042284, + "grad_norm": 0.3929709792137146, + "learning_rate": 1.1489222788818885e-05, + "loss": 0.5151, + "step": 33010 + }, + { + "epoch": 0.906397583745195, + "grad_norm": 0.3185974359512329, + "learning_rate": 1.148879571010644e-05, + "loss": 0.3736, + "step": 33011 + }, + { + "epoch": 0.9064250411861614, + "grad_norm": 0.365153431892395, + "learning_rate": 1.148836862861691e-05, + "loss": 0.5556, + "step": 33012 + }, + { + "epoch": 0.906452498627128, + "grad_norm": 0.4031786024570465, + "learning_rate": 1.1487941544351102e-05, + "loss": 0.5241, + "step": 33013 + }, + { + "epoch": 0.9064799560680945, + "grad_norm": 0.3949236571788788, + "learning_rate": 1.1487514457309808e-05, + "loss": 0.4355, + "step": 33014 + }, + { + "epoch": 0.9065074135090609, + "grad_norm": 0.4149945080280304, + "learning_rate": 1.1487087367493823e-05, + "loss": 0.5168, + "step": 33015 + }, + { + "epoch": 0.9065348709500275, + "grad_norm": 0.4180516004562378, + "learning_rate": 1.1486660274903947e-05, + "loss": 0.4564, + "step": 33016 + }, + { + "epoch": 0.9065623283909939, + "grad_norm": 0.4485757648944855, + "learning_rate": 1.1486233179540973e-05, + "loss": 0.4751, + "step": 33017 + }, + { + "epoch": 0.9065897858319605, + "grad_norm": 0.3575478792190552, + "learning_rate": 1.1485806081405701e-05, + "loss": 0.4638, + "step": 33018 + }, + { + "epoch": 0.9066172432729269, + "grad_norm": 0.4810888469219208, + "learning_rate": 1.1485378980498928e-05, + "loss": 0.5429, + "step": 33019 + }, + { + "epoch": 0.9066447007138935, + "grad_norm": 0.4262485206127167, + "learning_rate": 1.1484951876821446e-05, + "loss": 0.5169, + "step": 33020 + }, + { + "epoch": 0.90667215815486, + "grad_norm": 0.4077582359313965, + "learning_rate": 1.1484524770374057e-05, + "loss": 0.4346, + "step": 33021 + }, + { + "epoch": 0.9066996155958265, + "grad_norm": 0.4152623116970062, + "learning_rate": 1.1484097661157555e-05, + "loss": 0.5829, + "step": 33022 + }, + { + "epoch": 0.906727073036793, + "grad_norm": 0.3998049199581146, + "learning_rate": 1.1483670549172737e-05, + "loss": 0.5453, + "step": 33023 + }, + { + "epoch": 0.9067545304777594, + "grad_norm": 0.4217193126678467, + "learning_rate": 1.14832434344204e-05, + "loss": 0.4469, + "step": 33024 + }, + { + "epoch": 0.906781987918726, + "grad_norm": 0.44294941425323486, + "learning_rate": 1.148281631690134e-05, + "loss": 0.5336, + "step": 33025 + }, + { + "epoch": 0.9068094453596924, + "grad_norm": 0.38631778955459595, + "learning_rate": 1.1482389196616354e-05, + "loss": 0.5107, + "step": 33026 + }, + { + "epoch": 0.906836902800659, + "grad_norm": 0.4077947735786438, + "learning_rate": 1.148196207356624e-05, + "loss": 0.5287, + "step": 33027 + }, + { + "epoch": 0.9068643602416255, + "grad_norm": 0.47865787148475647, + "learning_rate": 1.1481534947751793e-05, + "loss": 0.5445, + "step": 33028 + }, + { + "epoch": 0.906891817682592, + "grad_norm": 0.4410680830478668, + "learning_rate": 1.1481107819173813e-05, + "loss": 0.5032, + "step": 33029 + }, + { + "epoch": 0.9069192751235585, + "grad_norm": 0.3489164412021637, + "learning_rate": 1.1480680687833092e-05, + "loss": 0.5227, + "step": 33030 + }, + { + "epoch": 0.906946732564525, + "grad_norm": 0.4052288234233856, + "learning_rate": 1.1480253553730429e-05, + "loss": 0.5352, + "step": 33031 + }, + { + "epoch": 0.9069741900054915, + "grad_norm": 0.3693874776363373, + "learning_rate": 1.1479826416866622e-05, + "loss": 0.4897, + "step": 33032 + }, + { + "epoch": 0.907001647446458, + "grad_norm": 0.4372846484184265, + "learning_rate": 1.1479399277242465e-05, + "loss": 0.4229, + "step": 33033 + }, + { + "epoch": 0.9070291048874245, + "grad_norm": 0.35235658288002014, + "learning_rate": 1.1478972134858756e-05, + "loss": 0.4621, + "step": 33034 + }, + { + "epoch": 0.907056562328391, + "grad_norm": 0.4636097848415375, + "learning_rate": 1.1478544989716292e-05, + "loss": 0.5149, + "step": 33035 + }, + { + "epoch": 0.9070840197693575, + "grad_norm": 0.4334105849266052, + "learning_rate": 1.147811784181587e-05, + "loss": 0.478, + "step": 33036 + }, + { + "epoch": 0.907111477210324, + "grad_norm": 0.5043298602104187, + "learning_rate": 1.1477690691158288e-05, + "loss": 0.4831, + "step": 33037 + }, + { + "epoch": 0.9071389346512905, + "grad_norm": 0.3736336827278137, + "learning_rate": 1.1477263537744338e-05, + "loss": 0.4796, + "step": 33038 + }, + { + "epoch": 0.907166392092257, + "grad_norm": 0.3924731910228729, + "learning_rate": 1.1476836381574823e-05, + "loss": 0.4515, + "step": 33039 + }, + { + "epoch": 0.9071938495332235, + "grad_norm": 0.44168519973754883, + "learning_rate": 1.1476409222650535e-05, + "loss": 0.5674, + "step": 33040 + }, + { + "epoch": 0.90722130697419, + "grad_norm": 0.48789212107658386, + "learning_rate": 1.1475982060972273e-05, + "loss": 0.4636, + "step": 33041 + }, + { + "epoch": 0.9072487644151566, + "grad_norm": 0.4664080739021301, + "learning_rate": 1.1475554896540835e-05, + "loss": 0.5362, + "step": 33042 + }, + { + "epoch": 0.907276221856123, + "grad_norm": 0.4528812766075134, + "learning_rate": 1.1475127729357015e-05, + "loss": 0.5751, + "step": 33043 + }, + { + "epoch": 0.9073036792970895, + "grad_norm": 0.37311992049217224, + "learning_rate": 1.147470055942161e-05, + "loss": 0.4323, + "step": 33044 + }, + { + "epoch": 0.907331136738056, + "grad_norm": 0.48098689317703247, + "learning_rate": 1.1474273386735419e-05, + "loss": 0.6199, + "step": 33045 + }, + { + "epoch": 0.9073585941790225, + "grad_norm": 0.3694537878036499, + "learning_rate": 1.1473846211299237e-05, + "loss": 0.454, + "step": 33046 + }, + { + "epoch": 0.907386051619989, + "grad_norm": 0.39207932353019714, + "learning_rate": 1.1473419033113862e-05, + "loss": 0.4812, + "step": 33047 + }, + { + "epoch": 0.9074135090609555, + "grad_norm": 0.39676544070243835, + "learning_rate": 1.1472991852180089e-05, + "loss": 0.4986, + "step": 33048 + }, + { + "epoch": 0.9074409665019221, + "grad_norm": 0.34334975481033325, + "learning_rate": 1.1472564668498718e-05, + "loss": 0.388, + "step": 33049 + }, + { + "epoch": 0.9074684239428885, + "grad_norm": 0.3459285497665405, + "learning_rate": 1.1472137482070544e-05, + "loss": 0.47, + "step": 33050 + }, + { + "epoch": 0.9074958813838551, + "grad_norm": 0.4609956443309784, + "learning_rate": 1.1471710292896361e-05, + "loss": 0.4975, + "step": 33051 + }, + { + "epoch": 0.9075233388248215, + "grad_norm": 0.35907086730003357, + "learning_rate": 1.1471283100976971e-05, + "loss": 0.5185, + "step": 33052 + }, + { + "epoch": 0.907550796265788, + "grad_norm": 0.41893360018730164, + "learning_rate": 1.1470855906313166e-05, + "loss": 0.545, + "step": 33053 + }, + { + "epoch": 0.9075782537067545, + "grad_norm": 0.4535546898841858, + "learning_rate": 1.1470428708905747e-05, + "loss": 0.4447, + "step": 33054 + }, + { + "epoch": 0.907605711147721, + "grad_norm": 0.38038602471351624, + "learning_rate": 1.1470001508755511e-05, + "loss": 0.5353, + "step": 33055 + }, + { + "epoch": 0.9076331685886876, + "grad_norm": 0.41682732105255127, + "learning_rate": 1.146957430586325e-05, + "loss": 0.5267, + "step": 33056 + }, + { + "epoch": 0.907660626029654, + "grad_norm": 0.39049237966537476, + "learning_rate": 1.1469147100229765e-05, + "loss": 0.521, + "step": 33057 + }, + { + "epoch": 0.9076880834706206, + "grad_norm": 0.4202724099159241, + "learning_rate": 1.146871989185585e-05, + "loss": 0.4754, + "step": 33058 + }, + { + "epoch": 0.907715540911587, + "grad_norm": 0.42585301399230957, + "learning_rate": 1.1468292680742305e-05, + "loss": 0.4963, + "step": 33059 + }, + { + "epoch": 0.9077429983525536, + "grad_norm": 0.36066341400146484, + "learning_rate": 1.1467865466889929e-05, + "loss": 0.4135, + "step": 33060 + }, + { + "epoch": 0.90777045579352, + "grad_norm": 0.33537542819976807, + "learning_rate": 1.146743825029951e-05, + "loss": 0.4249, + "step": 33061 + }, + { + "epoch": 0.9077979132344866, + "grad_norm": 0.40968504548072815, + "learning_rate": 1.1467011030971853e-05, + "loss": 0.5412, + "step": 33062 + }, + { + "epoch": 0.9078253706754531, + "grad_norm": 0.47990649938583374, + "learning_rate": 1.1466583808907752e-05, + "loss": 0.4929, + "step": 33063 + }, + { + "epoch": 0.9078528281164195, + "grad_norm": 0.40321773290634155, + "learning_rate": 1.1466156584108003e-05, + "loss": 0.5787, + "step": 33064 + }, + { + "epoch": 0.9078802855573861, + "grad_norm": 0.3833484649658203, + "learning_rate": 1.1465729356573405e-05, + "loss": 0.4618, + "step": 33065 + }, + { + "epoch": 0.9079077429983525, + "grad_norm": 0.3453899025917053, + "learning_rate": 1.1465302126304754e-05, + "loss": 0.3459, + "step": 33066 + }, + { + "epoch": 0.9079352004393191, + "grad_norm": 0.4606780409812927, + "learning_rate": 1.1464874893302849e-05, + "loss": 0.5134, + "step": 33067 + }, + { + "epoch": 0.9079626578802855, + "grad_norm": 0.41391780972480774, + "learning_rate": 1.146444765756848e-05, + "loss": 0.4312, + "step": 33068 + }, + { + "epoch": 0.9079901153212521, + "grad_norm": 0.4663473963737488, + "learning_rate": 1.1464020419102453e-05, + "loss": 0.6139, + "step": 33069 + }, + { + "epoch": 0.9080175727622186, + "grad_norm": 0.36777111887931824, + "learning_rate": 1.1463593177905556e-05, + "loss": 0.424, + "step": 33070 + }, + { + "epoch": 0.908045030203185, + "grad_norm": 0.35914888978004456, + "learning_rate": 1.1463165933978596e-05, + "loss": 0.483, + "step": 33071 + }, + { + "epoch": 0.9080724876441516, + "grad_norm": 0.3916766047477722, + "learning_rate": 1.1462738687322361e-05, + "loss": 0.4851, + "step": 33072 + }, + { + "epoch": 0.908099945085118, + "grad_norm": 0.5756719708442688, + "learning_rate": 1.1462311437937652e-05, + "loss": 0.5248, + "step": 33073 + }, + { + "epoch": 0.9081274025260846, + "grad_norm": 0.43903452157974243, + "learning_rate": 1.1461884185825267e-05, + "loss": 0.5006, + "step": 33074 + }, + { + "epoch": 0.908154859967051, + "grad_norm": 0.37008246779441833, + "learning_rate": 1.1461456930986e-05, + "loss": 0.5273, + "step": 33075 + }, + { + "epoch": 0.9081823174080176, + "grad_norm": 0.38046038150787354, + "learning_rate": 1.146102967342065e-05, + "loss": 0.4714, + "step": 33076 + }, + { + "epoch": 0.908209774848984, + "grad_norm": 0.3595348000526428, + "learning_rate": 1.1460602413130014e-05, + "loss": 0.4793, + "step": 33077 + }, + { + "epoch": 0.9082372322899506, + "grad_norm": 0.40073370933532715, + "learning_rate": 1.1460175150114888e-05, + "loss": 0.4496, + "step": 33078 + }, + { + "epoch": 0.9082646897309171, + "grad_norm": 0.35275787115097046, + "learning_rate": 1.1459747884376067e-05, + "loss": 0.4755, + "step": 33079 + }, + { + "epoch": 0.9082921471718836, + "grad_norm": 0.38199031352996826, + "learning_rate": 1.1459320615914351e-05, + "loss": 0.4718, + "step": 33080 + }, + { + "epoch": 0.9083196046128501, + "grad_norm": 0.699432909488678, + "learning_rate": 1.145889334473054e-05, + "loss": 0.481, + "step": 33081 + }, + { + "epoch": 0.9083470620538165, + "grad_norm": 0.36426064372062683, + "learning_rate": 1.1458466070825423e-05, + "loss": 0.4945, + "step": 33082 + }, + { + "epoch": 0.9083745194947831, + "grad_norm": 0.39134448766708374, + "learning_rate": 1.1458038794199804e-05, + "loss": 0.495, + "step": 33083 + }, + { + "epoch": 0.9084019769357495, + "grad_norm": 0.4984046518802643, + "learning_rate": 1.1457611514854476e-05, + "loss": 0.4985, + "step": 33084 + }, + { + "epoch": 0.9084294343767161, + "grad_norm": 0.4959951341152191, + "learning_rate": 1.1457184232790237e-05, + "loss": 0.4877, + "step": 33085 + }, + { + "epoch": 0.9084568918176826, + "grad_norm": 0.37100088596343994, + "learning_rate": 1.1456756948007885e-05, + "loss": 0.5471, + "step": 33086 + }, + { + "epoch": 0.9084843492586491, + "grad_norm": 0.4096302092075348, + "learning_rate": 1.1456329660508213e-05, + "loss": 0.4722, + "step": 33087 + }, + { + "epoch": 0.9085118066996156, + "grad_norm": 0.42612117528915405, + "learning_rate": 1.1455902370292027e-05, + "loss": 0.5493, + "step": 33088 + }, + { + "epoch": 0.9085392641405821, + "grad_norm": 0.39556312561035156, + "learning_rate": 1.1455475077360116e-05, + "loss": 0.4611, + "step": 33089 + }, + { + "epoch": 0.9085667215815486, + "grad_norm": 0.3768816590309143, + "learning_rate": 1.145504778171328e-05, + "loss": 0.4938, + "step": 33090 + }, + { + "epoch": 0.908594179022515, + "grad_norm": 0.3724977374076843, + "learning_rate": 1.1454620483352314e-05, + "loss": 0.4276, + "step": 33091 + }, + { + "epoch": 0.9086216364634816, + "grad_norm": 0.37662702798843384, + "learning_rate": 1.1454193182278018e-05, + "loss": 0.4884, + "step": 33092 + }, + { + "epoch": 0.9086490939044481, + "grad_norm": 0.3518172800540924, + "learning_rate": 1.1453765878491188e-05, + "loss": 0.4056, + "step": 33093 + }, + { + "epoch": 0.9086765513454146, + "grad_norm": 0.3737923800945282, + "learning_rate": 1.145333857199262e-05, + "loss": 0.4392, + "step": 33094 + }, + { + "epoch": 0.9087040087863811, + "grad_norm": 0.42448192834854126, + "learning_rate": 1.145291126278311e-05, + "loss": 0.4812, + "step": 33095 + }, + { + "epoch": 0.9087314662273476, + "grad_norm": 0.38830214738845825, + "learning_rate": 1.145248395086346e-05, + "loss": 0.4852, + "step": 33096 + }, + { + "epoch": 0.9087589236683141, + "grad_norm": 0.41828227043151855, + "learning_rate": 1.1452056636234461e-05, + "loss": 0.5038, + "step": 33097 + }, + { + "epoch": 0.9087863811092806, + "grad_norm": 0.3983025550842285, + "learning_rate": 1.1451629318896913e-05, + "loss": 0.5138, + "step": 33098 + }, + { + "epoch": 0.9088138385502471, + "grad_norm": 0.3972274363040924, + "learning_rate": 1.1451201998851615e-05, + "loss": 0.4733, + "step": 33099 + }, + { + "epoch": 0.9088412959912137, + "grad_norm": 0.406499445438385, + "learning_rate": 1.1450774676099362e-05, + "loss": 0.5821, + "step": 33100 + }, + { + "epoch": 0.9088687534321801, + "grad_norm": 0.3908696472644806, + "learning_rate": 1.1450347350640948e-05, + "loss": 0.4955, + "step": 33101 + }, + { + "epoch": 0.9088962108731466, + "grad_norm": 0.36769968271255493, + "learning_rate": 1.1449920022477176e-05, + "loss": 0.4736, + "step": 33102 + }, + { + "epoch": 0.9089236683141131, + "grad_norm": 0.38542038202285767, + "learning_rate": 1.1449492691608841e-05, + "loss": 0.465, + "step": 33103 + }, + { + "epoch": 0.9089511257550796, + "grad_norm": 0.41010528802871704, + "learning_rate": 1.144906535803674e-05, + "loss": 0.4709, + "step": 33104 + }, + { + "epoch": 0.9089785831960461, + "grad_norm": 0.39042380452156067, + "learning_rate": 1.1448638021761667e-05, + "loss": 0.411, + "step": 33105 + }, + { + "epoch": 0.9090060406370126, + "grad_norm": 0.41408607363700867, + "learning_rate": 1.1448210682784425e-05, + "loss": 0.4852, + "step": 33106 + }, + { + "epoch": 0.9090334980779792, + "grad_norm": 0.4114986062049866, + "learning_rate": 1.1447783341105803e-05, + "loss": 0.5183, + "step": 33107 + }, + { + "epoch": 0.9090609555189456, + "grad_norm": 0.3943922221660614, + "learning_rate": 1.1447355996726605e-05, + "loss": 0.4033, + "step": 33108 + }, + { + "epoch": 0.9090884129599122, + "grad_norm": 0.3915098011493683, + "learning_rate": 1.1446928649647628e-05, + "loss": 0.4571, + "step": 33109 + }, + { + "epoch": 0.9091158704008786, + "grad_norm": 0.36691197752952576, + "learning_rate": 1.1446501299869666e-05, + "loss": 0.4208, + "step": 33110 + }, + { + "epoch": 0.9091433278418452, + "grad_norm": 0.3648339807987213, + "learning_rate": 1.1446073947393517e-05, + "loss": 0.4578, + "step": 33111 + }, + { + "epoch": 0.9091707852828116, + "grad_norm": 0.429741770029068, + "learning_rate": 1.1445646592219982e-05, + "loss": 0.5627, + "step": 33112 + }, + { + "epoch": 0.9091982427237781, + "grad_norm": 0.521229088306427, + "learning_rate": 1.144521923434985e-05, + "loss": 0.6492, + "step": 33113 + }, + { + "epoch": 0.9092257001647447, + "grad_norm": 0.37684911489486694, + "learning_rate": 1.1444791873783924e-05, + "loss": 0.5183, + "step": 33114 + }, + { + "epoch": 0.9092531576057111, + "grad_norm": 0.4135623872280121, + "learning_rate": 1.1444364510523e-05, + "loss": 0.4603, + "step": 33115 + }, + { + "epoch": 0.9092806150466777, + "grad_norm": 0.350619912147522, + "learning_rate": 1.1443937144567875e-05, + "loss": 0.4815, + "step": 33116 + }, + { + "epoch": 0.9093080724876441, + "grad_norm": 0.3928729295730591, + "learning_rate": 1.144350977591935e-05, + "loss": 0.4593, + "step": 33117 + }, + { + "epoch": 0.9093355299286107, + "grad_norm": 0.34866589307785034, + "learning_rate": 1.1443082404578214e-05, + "loss": 0.4963, + "step": 33118 + }, + { + "epoch": 0.9093629873695771, + "grad_norm": 0.44498178362846375, + "learning_rate": 1.1442655030545272e-05, + "loss": 0.5314, + "step": 33119 + }, + { + "epoch": 0.9093904448105437, + "grad_norm": 0.40811270475387573, + "learning_rate": 1.1442227653821316e-05, + "loss": 0.4644, + "step": 33120 + }, + { + "epoch": 0.9094179022515102, + "grad_norm": 0.39578723907470703, + "learning_rate": 1.1441800274407146e-05, + "loss": 0.507, + "step": 33121 + }, + { + "epoch": 0.9094453596924766, + "grad_norm": 0.3812606930732727, + "learning_rate": 1.1441372892303559e-05, + "loss": 0.4041, + "step": 33122 + }, + { + "epoch": 0.9094728171334432, + "grad_norm": 0.41026046872138977, + "learning_rate": 1.144094550751135e-05, + "loss": 0.5135, + "step": 33123 + }, + { + "epoch": 0.9095002745744096, + "grad_norm": 0.4123243987560272, + "learning_rate": 1.1440518120031317e-05, + "loss": 0.5179, + "step": 33124 + }, + { + "epoch": 0.9095277320153762, + "grad_norm": 0.38606804609298706, + "learning_rate": 1.1440090729864259e-05, + "loss": 0.5389, + "step": 33125 + }, + { + "epoch": 0.9095551894563426, + "grad_norm": 0.33387258648872375, + "learning_rate": 1.1439663337010973e-05, + "loss": 0.4215, + "step": 33126 + }, + { + "epoch": 0.9095826468973092, + "grad_norm": 0.42763251066207886, + "learning_rate": 1.1439235941472255e-05, + "loss": 0.5469, + "step": 33127 + }, + { + "epoch": 0.9096101043382757, + "grad_norm": 0.36759066581726074, + "learning_rate": 1.14388085432489e-05, + "loss": 0.4486, + "step": 33128 + }, + { + "epoch": 0.9096375617792422, + "grad_norm": 0.3896561861038208, + "learning_rate": 1.1438381142341712e-05, + "loss": 0.4949, + "step": 33129 + }, + { + "epoch": 0.9096650192202087, + "grad_norm": 0.42469435930252075, + "learning_rate": 1.1437953738751484e-05, + "loss": 0.5195, + "step": 33130 + }, + { + "epoch": 0.9096924766611751, + "grad_norm": 0.400480180978775, + "learning_rate": 1.143752633247901e-05, + "loss": 0.4695, + "step": 33131 + }, + { + "epoch": 0.9097199341021417, + "grad_norm": 0.3548296391963959, + "learning_rate": 1.1437098923525092e-05, + "loss": 0.4624, + "step": 33132 + }, + { + "epoch": 0.9097473915431081, + "grad_norm": 0.3553624451160431, + "learning_rate": 1.1436671511890525e-05, + "loss": 0.4312, + "step": 33133 + }, + { + "epoch": 0.9097748489840747, + "grad_norm": 0.4570797383785248, + "learning_rate": 1.1436244097576108e-05, + "loss": 0.4984, + "step": 33134 + }, + { + "epoch": 0.9098023064250412, + "grad_norm": 0.34384825825691223, + "learning_rate": 1.1435816680582639e-05, + "loss": 0.4069, + "step": 33135 + }, + { + "epoch": 0.9098297638660077, + "grad_norm": 0.3956947922706604, + "learning_rate": 1.1435389260910912e-05, + "loss": 0.4894, + "step": 33136 + }, + { + "epoch": 0.9098572213069742, + "grad_norm": 0.42510855197906494, + "learning_rate": 1.1434961838561726e-05, + "loss": 0.5387, + "step": 33137 + }, + { + "epoch": 0.9098846787479407, + "grad_norm": 0.6288581490516663, + "learning_rate": 1.1434534413535876e-05, + "loss": 0.5392, + "step": 33138 + }, + { + "epoch": 0.9099121361889072, + "grad_norm": 0.4366309344768524, + "learning_rate": 1.1434106985834165e-05, + "loss": 0.5184, + "step": 33139 + }, + { + "epoch": 0.9099395936298736, + "grad_norm": 0.4251192808151245, + "learning_rate": 1.1433679555457386e-05, + "loss": 0.5746, + "step": 33140 + }, + { + "epoch": 0.9099670510708402, + "grad_norm": 0.45134472846984863, + "learning_rate": 1.1433252122406335e-05, + "loss": 0.515, + "step": 33141 + }, + { + "epoch": 0.9099945085118067, + "grad_norm": 0.3993982970714569, + "learning_rate": 1.1432824686681813e-05, + "loss": 0.4802, + "step": 33142 + }, + { + "epoch": 0.9100219659527732, + "grad_norm": 0.3716014325618744, + "learning_rate": 1.1432397248284618e-05, + "loss": 0.439, + "step": 33143 + }, + { + "epoch": 0.9100494233937397, + "grad_norm": 0.3878976106643677, + "learning_rate": 1.1431969807215541e-05, + "loss": 0.4659, + "step": 33144 + }, + { + "epoch": 0.9100768808347062, + "grad_norm": 0.37241220474243164, + "learning_rate": 1.1431542363475386e-05, + "loss": 0.523, + "step": 33145 + }, + { + "epoch": 0.9101043382756727, + "grad_norm": 0.39115074276924133, + "learning_rate": 1.1431114917064945e-05, + "loss": 0.551, + "step": 33146 + }, + { + "epoch": 0.9101317957166392, + "grad_norm": 0.4192158877849579, + "learning_rate": 1.143068746798502e-05, + "loss": 0.517, + "step": 33147 + }, + { + "epoch": 0.9101592531576057, + "grad_norm": 0.3560570180416107, + "learning_rate": 1.1430260016236405e-05, + "loss": 0.4375, + "step": 33148 + }, + { + "epoch": 0.9101867105985723, + "grad_norm": 0.39886757731437683, + "learning_rate": 1.1429832561819897e-05, + "loss": 0.521, + "step": 33149 + }, + { + "epoch": 0.9102141680395387, + "grad_norm": 0.4082705080509186, + "learning_rate": 1.14294051047363e-05, + "loss": 0.5029, + "step": 33150 + }, + { + "epoch": 0.9102416254805052, + "grad_norm": 0.5149462819099426, + "learning_rate": 1.1428977644986401e-05, + "loss": 0.4814, + "step": 33151 + }, + { + "epoch": 0.9102690829214717, + "grad_norm": 0.41780000925064087, + "learning_rate": 1.1428550182571005e-05, + "loss": 0.4784, + "step": 33152 + }, + { + "epoch": 0.9102965403624382, + "grad_norm": 0.3567448556423187, + "learning_rate": 1.1428122717490908e-05, + "loss": 0.5316, + "step": 33153 + }, + { + "epoch": 0.9103239978034047, + "grad_norm": 0.3563809394836426, + "learning_rate": 1.1427695249746904e-05, + "loss": 0.4572, + "step": 33154 + }, + { + "epoch": 0.9103514552443712, + "grad_norm": 0.39646559953689575, + "learning_rate": 1.1427267779339795e-05, + "loss": 0.462, + "step": 33155 + }, + { + "epoch": 0.9103789126853378, + "grad_norm": 1.426276683807373, + "learning_rate": 1.1426840306270375e-05, + "loss": 0.5162, + "step": 33156 + }, + { + "epoch": 0.9104063701263042, + "grad_norm": 0.40234455466270447, + "learning_rate": 1.1426412830539441e-05, + "loss": 0.4827, + "step": 33157 + }, + { + "epoch": 0.9104338275672708, + "grad_norm": 0.4022219181060791, + "learning_rate": 1.1425985352147794e-05, + "loss": 0.4799, + "step": 33158 + }, + { + "epoch": 0.9104612850082372, + "grad_norm": 0.39916443824768066, + "learning_rate": 1.1425557871096227e-05, + "loss": 0.4613, + "step": 33159 + }, + { + "epoch": 0.9104887424492037, + "grad_norm": 0.36313945055007935, + "learning_rate": 1.1425130387385543e-05, + "loss": 0.3822, + "step": 33160 + }, + { + "epoch": 0.9105161998901702, + "grad_norm": 0.3736327886581421, + "learning_rate": 1.1424702901016534e-05, + "loss": 0.4125, + "step": 33161 + }, + { + "epoch": 0.9105436573311367, + "grad_norm": 0.4331866502761841, + "learning_rate": 1.1424275411989998e-05, + "loss": 0.4915, + "step": 33162 + }, + { + "epoch": 0.9105711147721033, + "grad_norm": 0.36839228868484497, + "learning_rate": 1.1423847920306736e-05, + "loss": 0.4627, + "step": 33163 + }, + { + "epoch": 0.9105985722130697, + "grad_norm": 0.3644300401210785, + "learning_rate": 1.1423420425967542e-05, + "loss": 0.4745, + "step": 33164 + }, + { + "epoch": 0.9106260296540363, + "grad_norm": 0.40666380524635315, + "learning_rate": 1.1422992928973216e-05, + "loss": 0.451, + "step": 33165 + }, + { + "epoch": 0.9106534870950027, + "grad_norm": 0.428646445274353, + "learning_rate": 1.1422565429324552e-05, + "loss": 0.6008, + "step": 33166 + }, + { + "epoch": 0.9106809445359693, + "grad_norm": 0.35679861903190613, + "learning_rate": 1.142213792702235e-05, + "loss": 0.4887, + "step": 33167 + }, + { + "epoch": 0.9107084019769357, + "grad_norm": 0.37931734323501587, + "learning_rate": 1.1421710422067407e-05, + "loss": 0.482, + "step": 33168 + }, + { + "epoch": 0.9107358594179022, + "grad_norm": 0.340728759765625, + "learning_rate": 1.1421282914460522e-05, + "loss": 0.4521, + "step": 33169 + }, + { + "epoch": 0.9107633168588688, + "grad_norm": 0.4264187812805176, + "learning_rate": 1.1420855404202489e-05, + "loss": 0.5635, + "step": 33170 + }, + { + "epoch": 0.9107907742998352, + "grad_norm": 0.41844844818115234, + "learning_rate": 1.1420427891294107e-05, + "loss": 0.4753, + "step": 33171 + }, + { + "epoch": 0.9108182317408018, + "grad_norm": 0.4013041853904724, + "learning_rate": 1.1420000375736173e-05, + "loss": 0.4856, + "step": 33172 + }, + { + "epoch": 0.9108456891817682, + "grad_norm": 0.4040120244026184, + "learning_rate": 1.1419572857529486e-05, + "loss": 0.4956, + "step": 33173 + }, + { + "epoch": 0.9108731466227348, + "grad_norm": 0.36364638805389404, + "learning_rate": 1.1419145336674844e-05, + "loss": 0.3901, + "step": 33174 + }, + { + "epoch": 0.9109006040637012, + "grad_norm": 0.3928261995315552, + "learning_rate": 1.1418717813173042e-05, + "loss": 0.474, + "step": 33175 + }, + { + "epoch": 0.9109280615046678, + "grad_norm": 0.4124365746974945, + "learning_rate": 1.1418290287024878e-05, + "loss": 0.3647, + "step": 33176 + }, + { + "epoch": 0.9109555189456343, + "grad_norm": 0.41284263134002686, + "learning_rate": 1.1417862758231149e-05, + "loss": 0.4631, + "step": 33177 + }, + { + "epoch": 0.9109829763866008, + "grad_norm": 0.39145007729530334, + "learning_rate": 1.1417435226792654e-05, + "loss": 0.5445, + "step": 33178 + }, + { + "epoch": 0.9110104338275673, + "grad_norm": 0.4044134318828583, + "learning_rate": 1.1417007692710194e-05, + "loss": 0.4594, + "step": 33179 + }, + { + "epoch": 0.9110378912685337, + "grad_norm": 0.36351510882377625, + "learning_rate": 1.1416580155984555e-05, + "loss": 0.4532, + "step": 33180 + }, + { + "epoch": 0.9110653487095003, + "grad_norm": 0.39040520787239075, + "learning_rate": 1.1416152616616548e-05, + "loss": 0.5043, + "step": 33181 + }, + { + "epoch": 0.9110928061504667, + "grad_norm": 0.45861244201660156, + "learning_rate": 1.141572507460696e-05, + "loss": 0.4347, + "step": 33182 + }, + { + "epoch": 0.9111202635914333, + "grad_norm": 0.343658983707428, + "learning_rate": 1.1415297529956598e-05, + "loss": 0.378, + "step": 33183 + }, + { + "epoch": 0.9111477210323998, + "grad_norm": 0.42099499702453613, + "learning_rate": 1.141486998266625e-05, + "loss": 0.524, + "step": 33184 + }, + { + "epoch": 0.9111751784733663, + "grad_norm": 0.3802451193332672, + "learning_rate": 1.1414442432736719e-05, + "loss": 0.534, + "step": 33185 + }, + { + "epoch": 0.9112026359143328, + "grad_norm": 0.40459051728248596, + "learning_rate": 1.1414014880168803e-05, + "loss": 0.5352, + "step": 33186 + }, + { + "epoch": 0.9112300933552993, + "grad_norm": 0.4014900326728821, + "learning_rate": 1.1413587324963297e-05, + "loss": 0.5115, + "step": 33187 + }, + { + "epoch": 0.9112575507962658, + "grad_norm": 0.39440059661865234, + "learning_rate": 1.1413159767120998e-05, + "loss": 0.5525, + "step": 33188 + }, + { + "epoch": 0.9112850082372322, + "grad_norm": 0.3816300928592682, + "learning_rate": 1.1412732206642708e-05, + "loss": 0.5211, + "step": 33189 + }, + { + "epoch": 0.9113124656781988, + "grad_norm": 0.3885083496570587, + "learning_rate": 1.1412304643529218e-05, + "loss": 0.3763, + "step": 33190 + }, + { + "epoch": 0.9113399231191653, + "grad_norm": 0.34326091408729553, + "learning_rate": 1.141187707778133e-05, + "loss": 0.4163, + "step": 33191 + }, + { + "epoch": 0.9113673805601318, + "grad_norm": 0.3493137061595917, + "learning_rate": 1.1411449509399844e-05, + "loss": 0.4517, + "step": 33192 + }, + { + "epoch": 0.9113948380010983, + "grad_norm": 0.3930020332336426, + "learning_rate": 1.141102193838555e-05, + "loss": 0.5476, + "step": 33193 + }, + { + "epoch": 0.9114222954420648, + "grad_norm": 0.4072372615337372, + "learning_rate": 1.1410594364739252e-05, + "loss": 0.4332, + "step": 33194 + }, + { + "epoch": 0.9114497528830313, + "grad_norm": 0.41620534658432007, + "learning_rate": 1.1410166788461742e-05, + "loss": 0.5207, + "step": 33195 + }, + { + "epoch": 0.9114772103239978, + "grad_norm": 0.4529667794704437, + "learning_rate": 1.1409739209553825e-05, + "loss": 0.5028, + "step": 33196 + }, + { + "epoch": 0.9115046677649643, + "grad_norm": 0.3901026248931885, + "learning_rate": 1.1409311628016295e-05, + "loss": 0.505, + "step": 33197 + }, + { + "epoch": 0.9115321252059309, + "grad_norm": 0.48599866032600403, + "learning_rate": 1.1408884043849947e-05, + "loss": 0.5226, + "step": 33198 + }, + { + "epoch": 0.9115595826468973, + "grad_norm": 0.4142121374607086, + "learning_rate": 1.1408456457055579e-05, + "loss": 0.5065, + "step": 33199 + }, + { + "epoch": 0.9115870400878638, + "grad_norm": 0.40359431505203247, + "learning_rate": 1.1408028867633991e-05, + "loss": 0.4704, + "step": 33200 + }, + { + "epoch": 0.9116144975288303, + "grad_norm": 0.43472006916999817, + "learning_rate": 1.1407601275585983e-05, + "loss": 0.5448, + "step": 33201 + }, + { + "epoch": 0.9116419549697968, + "grad_norm": 0.3834371268749237, + "learning_rate": 1.1407173680912347e-05, + "loss": 0.4486, + "step": 33202 + }, + { + "epoch": 0.9116694124107633, + "grad_norm": 0.402173787355423, + "learning_rate": 1.1406746083613883e-05, + "loss": 0.5357, + "step": 33203 + }, + { + "epoch": 0.9116968698517298, + "grad_norm": 0.895419180393219, + "learning_rate": 1.1406318483691387e-05, + "loss": 0.5253, + "step": 33204 + }, + { + "epoch": 0.9117243272926964, + "grad_norm": 0.35169127583503723, + "learning_rate": 1.140589088114566e-05, + "loss": 0.503, + "step": 33205 + }, + { + "epoch": 0.9117517847336628, + "grad_norm": 0.42632538080215454, + "learning_rate": 1.1405463275977498e-05, + "loss": 0.4935, + "step": 33206 + }, + { + "epoch": 0.9117792421746294, + "grad_norm": 0.35165640711784363, + "learning_rate": 1.1405035668187698e-05, + "loss": 0.4133, + "step": 33207 + }, + { + "epoch": 0.9118066996155958, + "grad_norm": 0.4498632550239563, + "learning_rate": 1.1404608057777055e-05, + "loss": 0.3807, + "step": 33208 + }, + { + "epoch": 0.9118341570565623, + "grad_norm": 0.4246140718460083, + "learning_rate": 1.1404180444746374e-05, + "loss": 0.5027, + "step": 33209 + }, + { + "epoch": 0.9118616144975288, + "grad_norm": 0.4031001329421997, + "learning_rate": 1.1403752829096448e-05, + "loss": 0.507, + "step": 33210 + }, + { + "epoch": 0.9118890719384953, + "grad_norm": 0.3955107629299164, + "learning_rate": 1.1403325210828074e-05, + "loss": 0.4631, + "step": 33211 + }, + { + "epoch": 0.9119165293794619, + "grad_norm": 0.40645620226860046, + "learning_rate": 1.140289758994205e-05, + "loss": 0.5691, + "step": 33212 + }, + { + "epoch": 0.9119439868204283, + "grad_norm": 0.4885683059692383, + "learning_rate": 1.1402469966439173e-05, + "loss": 0.4177, + "step": 33213 + }, + { + "epoch": 0.9119714442613949, + "grad_norm": 0.3716363310813904, + "learning_rate": 1.1402042340320245e-05, + "loss": 0.4417, + "step": 33214 + }, + { + "epoch": 0.9119989017023613, + "grad_norm": 0.4086598753929138, + "learning_rate": 1.140161471158606e-05, + "loss": 0.4599, + "step": 33215 + }, + { + "epoch": 0.9120263591433279, + "grad_norm": 0.36536434292793274, + "learning_rate": 1.1401187080237413e-05, + "loss": 0.3802, + "step": 33216 + }, + { + "epoch": 0.9120538165842943, + "grad_norm": 0.39356744289398193, + "learning_rate": 1.140075944627511e-05, + "loss": 0.4777, + "step": 33217 + }, + { + "epoch": 0.9120812740252608, + "grad_norm": 0.4061787724494934, + "learning_rate": 1.1400331809699941e-05, + "loss": 0.4756, + "step": 33218 + }, + { + "epoch": 0.9121087314662274, + "grad_norm": 0.4425891637802124, + "learning_rate": 1.1399904170512703e-05, + "loss": 0.5082, + "step": 33219 + }, + { + "epoch": 0.9121361889071938, + "grad_norm": 0.7024703621864319, + "learning_rate": 1.1399476528714202e-05, + "loss": 0.3965, + "step": 33220 + }, + { + "epoch": 0.9121636463481604, + "grad_norm": 0.43580636382102966, + "learning_rate": 1.1399048884305226e-05, + "loss": 0.5464, + "step": 33221 + }, + { + "epoch": 0.9121911037891268, + "grad_norm": 0.38742151856422424, + "learning_rate": 1.1398621237286582e-05, + "loss": 0.4095, + "step": 33222 + }, + { + "epoch": 0.9122185612300934, + "grad_norm": 0.4103691875934601, + "learning_rate": 1.1398193587659061e-05, + "loss": 0.4926, + "step": 33223 + }, + { + "epoch": 0.9122460186710598, + "grad_norm": 0.42901286482810974, + "learning_rate": 1.1397765935423462e-05, + "loss": 0.5242, + "step": 33224 + }, + { + "epoch": 0.9122734761120264, + "grad_norm": 0.3242127597332001, + "learning_rate": 1.1397338280580586e-05, + "loss": 0.3945, + "step": 33225 + }, + { + "epoch": 0.9123009335529929, + "grad_norm": 0.3531312048435211, + "learning_rate": 1.1396910623131223e-05, + "loss": 0.48, + "step": 33226 + }, + { + "epoch": 0.9123283909939593, + "grad_norm": 0.39910373091697693, + "learning_rate": 1.139648296307618e-05, + "loss": 0.4583, + "step": 33227 + }, + { + "epoch": 0.9123558484349259, + "grad_norm": 0.4098438620567322, + "learning_rate": 1.1396055300416252e-05, + "loss": 0.4681, + "step": 33228 + }, + { + "epoch": 0.9123833058758923, + "grad_norm": 0.6536081433296204, + "learning_rate": 1.1395627635152231e-05, + "loss": 0.4699, + "step": 33229 + }, + { + "epoch": 0.9124107633168589, + "grad_norm": 0.4102379381656647, + "learning_rate": 1.139519996728492e-05, + "loss": 0.4892, + "step": 33230 + }, + { + "epoch": 0.9124382207578253, + "grad_norm": 0.3981364369392395, + "learning_rate": 1.1394772296815115e-05, + "loss": 0.5093, + "step": 33231 + }, + { + "epoch": 0.9124656781987919, + "grad_norm": 0.3571736514568329, + "learning_rate": 1.1394344623743618e-05, + "loss": 0.4622, + "step": 33232 + }, + { + "epoch": 0.9124931356397584, + "grad_norm": 0.37555983662605286, + "learning_rate": 1.1393916948071221e-05, + "loss": 0.4773, + "step": 33233 + }, + { + "epoch": 0.9125205930807249, + "grad_norm": 0.5989987850189209, + "learning_rate": 1.1393489269798725e-05, + "loss": 0.5654, + "step": 33234 + }, + { + "epoch": 0.9125480505216914, + "grad_norm": 0.36564117670059204, + "learning_rate": 1.1393061588926925e-05, + "loss": 0.4865, + "step": 33235 + }, + { + "epoch": 0.9125755079626579, + "grad_norm": 0.3931315243244171, + "learning_rate": 1.1392633905456625e-05, + "loss": 0.4216, + "step": 33236 + }, + { + "epoch": 0.9126029654036244, + "grad_norm": 0.4013591706752777, + "learning_rate": 1.1392206219388613e-05, + "loss": 0.5932, + "step": 33237 + }, + { + "epoch": 0.9126304228445908, + "grad_norm": 0.37055855989456177, + "learning_rate": 1.1391778530723694e-05, + "loss": 0.4976, + "step": 33238 + }, + { + "epoch": 0.9126578802855574, + "grad_norm": 0.609919548034668, + "learning_rate": 1.1391350839462662e-05, + "loss": 0.425, + "step": 33239 + }, + { + "epoch": 0.9126853377265239, + "grad_norm": 0.4533447325229645, + "learning_rate": 1.1390923145606321e-05, + "loss": 0.5435, + "step": 33240 + }, + { + "epoch": 0.9127127951674904, + "grad_norm": 0.37283217906951904, + "learning_rate": 1.1390495449155461e-05, + "loss": 0.4633, + "step": 33241 + }, + { + "epoch": 0.9127402526084569, + "grad_norm": 0.4493156969547272, + "learning_rate": 1.1390067750110885e-05, + "loss": 0.5318, + "step": 33242 + }, + { + "epoch": 0.9127677100494234, + "grad_norm": 0.3815068006515503, + "learning_rate": 1.1389640048473388e-05, + "loss": 0.5023, + "step": 33243 + }, + { + "epoch": 0.9127951674903899, + "grad_norm": 0.3648495078086853, + "learning_rate": 1.1389212344243767e-05, + "loss": 0.4297, + "step": 33244 + }, + { + "epoch": 0.9128226249313564, + "grad_norm": 0.5694277286529541, + "learning_rate": 1.1388784637422825e-05, + "loss": 0.5709, + "step": 33245 + }, + { + "epoch": 0.9128500823723229, + "grad_norm": 0.7918714880943298, + "learning_rate": 1.1388356928011352e-05, + "loss": 0.5502, + "step": 33246 + }, + { + "epoch": 0.9128775398132895, + "grad_norm": 0.42878714203834534, + "learning_rate": 1.1387929216010153e-05, + "loss": 0.4994, + "step": 33247 + }, + { + "epoch": 0.9129049972542559, + "grad_norm": 0.36424633860588074, + "learning_rate": 1.1387501501420027e-05, + "loss": 0.4685, + "step": 33248 + }, + { + "epoch": 0.9129324546952224, + "grad_norm": 0.35172098875045776, + "learning_rate": 1.1387073784241762e-05, + "loss": 0.4075, + "step": 33249 + }, + { + "epoch": 0.9129599121361889, + "grad_norm": 0.38967764377593994, + "learning_rate": 1.1386646064476163e-05, + "loss": 0.4615, + "step": 33250 + }, + { + "epoch": 0.9129873695771554, + "grad_norm": 0.3774595558643341, + "learning_rate": 1.1386218342124026e-05, + "loss": 0.5283, + "step": 33251 + }, + { + "epoch": 0.9130148270181219, + "grad_norm": 0.41250696778297424, + "learning_rate": 1.1385790617186152e-05, + "loss": 0.5552, + "step": 33252 + }, + { + "epoch": 0.9130422844590884, + "grad_norm": 0.3871211111545563, + "learning_rate": 1.1385362889663337e-05, + "loss": 0.4794, + "step": 33253 + }, + { + "epoch": 0.913069741900055, + "grad_norm": 0.3958478271961212, + "learning_rate": 1.1384935159556375e-05, + "loss": 0.4888, + "step": 33254 + }, + { + "epoch": 0.9130971993410214, + "grad_norm": 0.38556355237960815, + "learning_rate": 1.1384507426866067e-05, + "loss": 0.5029, + "step": 33255 + }, + { + "epoch": 0.913124656781988, + "grad_norm": 0.37854525446891785, + "learning_rate": 1.138407969159321e-05, + "loss": 0.4649, + "step": 33256 + }, + { + "epoch": 0.9131521142229544, + "grad_norm": 0.4754444658756256, + "learning_rate": 1.1383651953738606e-05, + "loss": 0.4898, + "step": 33257 + }, + { + "epoch": 0.9131795716639209, + "grad_norm": 0.4187973141670227, + "learning_rate": 1.1383224213303046e-05, + "loss": 0.5086, + "step": 33258 + }, + { + "epoch": 0.9132070291048874, + "grad_norm": 0.34257519245147705, + "learning_rate": 1.1382796470287332e-05, + "loss": 0.329, + "step": 33259 + }, + { + "epoch": 0.9132344865458539, + "grad_norm": 0.38011637330055237, + "learning_rate": 1.1382368724692262e-05, + "loss": 0.527, + "step": 33260 + }, + { + "epoch": 0.9132619439868205, + "grad_norm": 0.38932064175605774, + "learning_rate": 1.1381940976518635e-05, + "loss": 0.5154, + "step": 33261 + }, + { + "epoch": 0.9132894014277869, + "grad_norm": 0.3985004425048828, + "learning_rate": 1.1381513225767245e-05, + "loss": 0.5086, + "step": 33262 + }, + { + "epoch": 0.9133168588687535, + "grad_norm": 0.3966637849807739, + "learning_rate": 1.138108547243889e-05, + "loss": 0.4992, + "step": 33263 + }, + { + "epoch": 0.9133443163097199, + "grad_norm": 0.42909038066864014, + "learning_rate": 1.1380657716534374e-05, + "loss": 0.5458, + "step": 33264 + }, + { + "epoch": 0.9133717737506865, + "grad_norm": 0.5516573786735535, + "learning_rate": 1.138022995805449e-05, + "loss": 0.4232, + "step": 33265 + }, + { + "epoch": 0.9133992311916529, + "grad_norm": 0.3379818797111511, + "learning_rate": 1.1379802197000034e-05, + "loss": 0.4589, + "step": 33266 + }, + { + "epoch": 0.9134266886326194, + "grad_norm": 0.37583765387535095, + "learning_rate": 1.1379374433371807e-05, + "loss": 0.5003, + "step": 33267 + }, + { + "epoch": 0.913454146073586, + "grad_norm": 0.5642133355140686, + "learning_rate": 1.1378946667170606e-05, + "loss": 0.501, + "step": 33268 + }, + { + "epoch": 0.9134816035145524, + "grad_norm": 0.4027680456638336, + "learning_rate": 1.1378518898397232e-05, + "loss": 0.5164, + "step": 33269 + }, + { + "epoch": 0.913509060955519, + "grad_norm": 0.3852553069591522, + "learning_rate": 1.1378091127052481e-05, + "loss": 0.4956, + "step": 33270 + }, + { + "epoch": 0.9135365183964854, + "grad_norm": 0.4264400601387024, + "learning_rate": 1.1377663353137146e-05, + "loss": 0.5565, + "step": 33271 + }, + { + "epoch": 0.913563975837452, + "grad_norm": 0.376498818397522, + "learning_rate": 1.1377235576652032e-05, + "loss": 0.453, + "step": 33272 + }, + { + "epoch": 0.9135914332784184, + "grad_norm": 0.37323880195617676, + "learning_rate": 1.1376807797597933e-05, + "loss": 0.5102, + "step": 33273 + }, + { + "epoch": 0.913618890719385, + "grad_norm": 0.3864535689353943, + "learning_rate": 1.137638001597565e-05, + "loss": 0.5491, + "step": 33274 + }, + { + "epoch": 0.9136463481603515, + "grad_norm": 0.3389412462711334, + "learning_rate": 1.1375952231785976e-05, + "loss": 0.4043, + "step": 33275 + }, + { + "epoch": 0.913673805601318, + "grad_norm": 0.3731346130371094, + "learning_rate": 1.1375524445029713e-05, + "loss": 0.5637, + "step": 33276 + }, + { + "epoch": 0.9137012630422845, + "grad_norm": 0.3787975609302521, + "learning_rate": 1.137509665570766e-05, + "loss": 0.4392, + "step": 33277 + }, + { + "epoch": 0.9137287204832509, + "grad_norm": 0.40640127658843994, + "learning_rate": 1.137466886382061e-05, + "loss": 0.48, + "step": 33278 + }, + { + "epoch": 0.9137561779242175, + "grad_norm": 0.39419639110565186, + "learning_rate": 1.1374241069369365e-05, + "loss": 0.4102, + "step": 33279 + }, + { + "epoch": 0.9137836353651839, + "grad_norm": 0.4369790554046631, + "learning_rate": 1.1373813272354723e-05, + "loss": 0.4657, + "step": 33280 + }, + { + "epoch": 0.9138110928061505, + "grad_norm": 0.3530455231666565, + "learning_rate": 1.1373385472777478e-05, + "loss": 0.481, + "step": 33281 + }, + { + "epoch": 0.913838550247117, + "grad_norm": 0.43074536323547363, + "learning_rate": 1.1372957670638434e-05, + "loss": 0.5782, + "step": 33282 + }, + { + "epoch": 0.9138660076880835, + "grad_norm": 0.39321398735046387, + "learning_rate": 1.1372529865938381e-05, + "loss": 0.4834, + "step": 33283 + }, + { + "epoch": 0.91389346512905, + "grad_norm": 0.43572160601615906, + "learning_rate": 1.1372102058678127e-05, + "loss": 0.473, + "step": 33284 + }, + { + "epoch": 0.9139209225700164, + "grad_norm": 0.39124253392219543, + "learning_rate": 1.1371674248858462e-05, + "loss": 0.4502, + "step": 33285 + }, + { + "epoch": 0.913948380010983, + "grad_norm": 0.37752214074134827, + "learning_rate": 1.1371246436480185e-05, + "loss": 0.4985, + "step": 33286 + }, + { + "epoch": 0.9139758374519494, + "grad_norm": 0.408319890499115, + "learning_rate": 1.13708186215441e-05, + "loss": 0.5272, + "step": 33287 + }, + { + "epoch": 0.914003294892916, + "grad_norm": 0.490293025970459, + "learning_rate": 1.1370390804050997e-05, + "loss": 0.5763, + "step": 33288 + }, + { + "epoch": 0.9140307523338825, + "grad_norm": 0.38309866189956665, + "learning_rate": 1.136996298400168e-05, + "loss": 0.4547, + "step": 33289 + }, + { + "epoch": 0.914058209774849, + "grad_norm": 0.38430216908454895, + "learning_rate": 1.1369535161396947e-05, + "loss": 0.5118, + "step": 33290 + }, + { + "epoch": 0.9140856672158155, + "grad_norm": 0.5054970979690552, + "learning_rate": 1.1369107336237587e-05, + "loss": 0.5558, + "step": 33291 + }, + { + "epoch": 0.914113124656782, + "grad_norm": 0.393523633480072, + "learning_rate": 1.1368679508524412e-05, + "loss": 0.4609, + "step": 33292 + }, + { + "epoch": 0.9141405820977485, + "grad_norm": 0.37956392765045166, + "learning_rate": 1.1368251678258209e-05, + "loss": 0.4152, + "step": 33293 + }, + { + "epoch": 0.914168039538715, + "grad_norm": 0.45804134011268616, + "learning_rate": 1.1367823845439781e-05, + "loss": 0.5968, + "step": 33294 + }, + { + "epoch": 0.9141954969796815, + "grad_norm": 0.3888421654701233, + "learning_rate": 1.1367396010069927e-05, + "loss": 0.5077, + "step": 33295 + }, + { + "epoch": 0.914222954420648, + "grad_norm": 0.4175409972667694, + "learning_rate": 1.1366968172149437e-05, + "loss": 0.4433, + "step": 33296 + }, + { + "epoch": 0.9142504118616145, + "grad_norm": 0.3859711289405823, + "learning_rate": 1.1366540331679122e-05, + "loss": 0.5088, + "step": 33297 + }, + { + "epoch": 0.914277869302581, + "grad_norm": 0.37746697664260864, + "learning_rate": 1.136611248865977e-05, + "loss": 0.5056, + "step": 33298 + }, + { + "epoch": 0.9143053267435475, + "grad_norm": 0.37914538383483887, + "learning_rate": 1.136568464309218e-05, + "loss": 0.5299, + "step": 33299 + }, + { + "epoch": 0.914332784184514, + "grad_norm": 0.38242048025131226, + "learning_rate": 1.1365256794977156e-05, + "loss": 0.4841, + "step": 33300 + }, + { + "epoch": 0.9143602416254805, + "grad_norm": 0.3920416235923767, + "learning_rate": 1.136482894431549e-05, + "loss": 0.5374, + "step": 33301 + }, + { + "epoch": 0.914387699066447, + "grad_norm": 0.5411343574523926, + "learning_rate": 1.1364401091107984e-05, + "loss": 0.5014, + "step": 33302 + }, + { + "epoch": 0.9144151565074136, + "grad_norm": 0.4681203365325928, + "learning_rate": 1.1363973235355434e-05, + "loss": 0.5132, + "step": 33303 + }, + { + "epoch": 0.91444261394838, + "grad_norm": 0.46322840452194214, + "learning_rate": 1.1363545377058636e-05, + "loss": 0.5653, + "step": 33304 + }, + { + "epoch": 0.9144700713893466, + "grad_norm": 0.4795876145362854, + "learning_rate": 1.1363117516218393e-05, + "loss": 0.5017, + "step": 33305 + }, + { + "epoch": 0.914497528830313, + "grad_norm": 0.3977147936820984, + "learning_rate": 1.1362689652835501e-05, + "loss": 0.4927, + "step": 33306 + }, + { + "epoch": 0.9145249862712795, + "grad_norm": 0.40267637372016907, + "learning_rate": 1.1362261786910758e-05, + "loss": 0.5577, + "step": 33307 + }, + { + "epoch": 0.914552443712246, + "grad_norm": 0.4260820746421814, + "learning_rate": 1.1361833918444962e-05, + "loss": 0.5044, + "step": 33308 + }, + { + "epoch": 0.9145799011532125, + "grad_norm": 0.3954068720340729, + "learning_rate": 1.136140604743891e-05, + "loss": 0.464, + "step": 33309 + }, + { + "epoch": 0.9146073585941791, + "grad_norm": 0.3910139501094818, + "learning_rate": 1.1360978173893403e-05, + "loss": 0.624, + "step": 33310 + }, + { + "epoch": 0.9146348160351455, + "grad_norm": 0.5402277708053589, + "learning_rate": 1.1360550297809236e-05, + "loss": 0.5854, + "step": 33311 + }, + { + "epoch": 0.9146622734761121, + "grad_norm": 0.3928172290325165, + "learning_rate": 1.1360122419187207e-05, + "loss": 0.4608, + "step": 33312 + }, + { + "epoch": 0.9146897309170785, + "grad_norm": 0.544653594493866, + "learning_rate": 1.1359694538028119e-05, + "loss": 0.502, + "step": 33313 + }, + { + "epoch": 0.914717188358045, + "grad_norm": 0.40395694971084595, + "learning_rate": 1.1359266654332764e-05, + "loss": 0.4432, + "step": 33314 + }, + { + "epoch": 0.9147446457990115, + "grad_norm": 0.3679070770740509, + "learning_rate": 1.1358838768101946e-05, + "loss": 0.4824, + "step": 33315 + }, + { + "epoch": 0.914772103239978, + "grad_norm": 0.35480719804763794, + "learning_rate": 1.1358410879336456e-05, + "loss": 0.49, + "step": 33316 + }, + { + "epoch": 0.9147995606809446, + "grad_norm": 0.3823293447494507, + "learning_rate": 1.1357982988037097e-05, + "loss": 0.4662, + "step": 33317 + }, + { + "epoch": 0.914827018121911, + "grad_norm": 0.3486495912075043, + "learning_rate": 1.1357555094204668e-05, + "loss": 0.3936, + "step": 33318 + }, + { + "epoch": 0.9148544755628776, + "grad_norm": 0.3745483458042145, + "learning_rate": 1.1357127197839965e-05, + "loss": 0.4649, + "step": 33319 + }, + { + "epoch": 0.914881933003844, + "grad_norm": 0.3733184039592743, + "learning_rate": 1.1356699298943787e-05, + "loss": 0.4638, + "step": 33320 + }, + { + "epoch": 0.9149093904448106, + "grad_norm": 0.4469298720359802, + "learning_rate": 1.1356271397516932e-05, + "loss": 0.438, + "step": 33321 + }, + { + "epoch": 0.914936847885777, + "grad_norm": 0.39146697521209717, + "learning_rate": 1.1355843493560196e-05, + "loss": 0.5252, + "step": 33322 + }, + { + "epoch": 0.9149643053267436, + "grad_norm": 0.42951714992523193, + "learning_rate": 1.1355415587074381e-05, + "loss": 0.5603, + "step": 33323 + }, + { + "epoch": 0.9149917627677101, + "grad_norm": 0.4340181350708008, + "learning_rate": 1.1354987678060281e-05, + "loss": 0.5098, + "step": 33324 + }, + { + "epoch": 0.9150192202086765, + "grad_norm": 0.41008666157722473, + "learning_rate": 1.13545597665187e-05, + "loss": 0.4528, + "step": 33325 + }, + { + "epoch": 0.9150466776496431, + "grad_norm": 0.386633962392807, + "learning_rate": 1.1354131852450431e-05, + "loss": 0.4582, + "step": 33326 + }, + { + "epoch": 0.9150741350906095, + "grad_norm": 0.48039302229881287, + "learning_rate": 1.1353703935856272e-05, + "loss": 0.4647, + "step": 33327 + }, + { + "epoch": 0.9151015925315761, + "grad_norm": 0.3808883726596832, + "learning_rate": 1.1353276016737027e-05, + "loss": 0.4408, + "step": 33328 + }, + { + "epoch": 0.9151290499725425, + "grad_norm": 0.41079777479171753, + "learning_rate": 1.135284809509349e-05, + "loss": 0.5378, + "step": 33329 + }, + { + "epoch": 0.9151565074135091, + "grad_norm": 0.4387950599193573, + "learning_rate": 1.1352420170926454e-05, + "loss": 0.5315, + "step": 33330 + }, + { + "epoch": 0.9151839648544756, + "grad_norm": 0.3882382810115814, + "learning_rate": 1.135199224423673e-05, + "loss": 0.5075, + "step": 33331 + }, + { + "epoch": 0.9152114222954421, + "grad_norm": 0.5053169131278992, + "learning_rate": 1.1351564315025104e-05, + "loss": 0.5215, + "step": 33332 + }, + { + "epoch": 0.9152388797364086, + "grad_norm": 0.3969508409500122, + "learning_rate": 1.135113638329238e-05, + "loss": 0.4355, + "step": 33333 + }, + { + "epoch": 0.915266337177375, + "grad_norm": 0.3586532175540924, + "learning_rate": 1.1350708449039357e-05, + "loss": 0.5047, + "step": 33334 + }, + { + "epoch": 0.9152937946183416, + "grad_norm": 0.3917367458343506, + "learning_rate": 1.135028051226683e-05, + "loss": 0.4978, + "step": 33335 + }, + { + "epoch": 0.915321252059308, + "grad_norm": 0.3701058030128479, + "learning_rate": 1.13498525729756e-05, + "loss": 0.5167, + "step": 33336 + }, + { + "epoch": 0.9153487095002746, + "grad_norm": 0.5030088424682617, + "learning_rate": 1.134942463116646e-05, + "loss": 0.4383, + "step": 33337 + }, + { + "epoch": 0.9153761669412411, + "grad_norm": 0.4423559904098511, + "learning_rate": 1.134899668684022e-05, + "loss": 0.5634, + "step": 33338 + }, + { + "epoch": 0.9154036243822076, + "grad_norm": 0.4709896445274353, + "learning_rate": 1.1348568739997667e-05, + "loss": 0.522, + "step": 33339 + }, + { + "epoch": 0.9154310818231741, + "grad_norm": 0.4995152950286865, + "learning_rate": 1.1348140790639601e-05, + "loss": 0.5774, + "step": 33340 + }, + { + "epoch": 0.9154585392641406, + "grad_norm": 0.3408285677433014, + "learning_rate": 1.1347712838766825e-05, + "loss": 0.4756, + "step": 33341 + }, + { + "epoch": 0.9154859967051071, + "grad_norm": 0.401239275932312, + "learning_rate": 1.1347284884380134e-05, + "loss": 0.4874, + "step": 33342 + }, + { + "epoch": 0.9155134541460735, + "grad_norm": 0.3945220112800598, + "learning_rate": 1.1346856927480323e-05, + "loss": 0.4712, + "step": 33343 + }, + { + "epoch": 0.9155409115870401, + "grad_norm": 0.36609897017478943, + "learning_rate": 1.13464289680682e-05, + "loss": 0.4436, + "step": 33344 + }, + { + "epoch": 0.9155683690280065, + "grad_norm": 0.39063096046447754, + "learning_rate": 1.1346001006144552e-05, + "loss": 0.4698, + "step": 33345 + }, + { + "epoch": 0.9155958264689731, + "grad_norm": 0.410415917634964, + "learning_rate": 1.1345573041710186e-05, + "loss": 0.474, + "step": 33346 + }, + { + "epoch": 0.9156232839099396, + "grad_norm": 0.3661254346370697, + "learning_rate": 1.1345145074765895e-05, + "loss": 0.4799, + "step": 33347 + }, + { + "epoch": 0.9156507413509061, + "grad_norm": 0.3786481022834778, + "learning_rate": 1.1344717105312477e-05, + "loss": 0.5218, + "step": 33348 + }, + { + "epoch": 0.9156781987918726, + "grad_norm": 0.41108810901641846, + "learning_rate": 1.1344289133350737e-05, + "loss": 0.5067, + "step": 33349 + }, + { + "epoch": 0.9157056562328391, + "grad_norm": 0.4208597242832184, + "learning_rate": 1.1343861158881466e-05, + "loss": 0.4616, + "step": 33350 + }, + { + "epoch": 0.9157331136738056, + "grad_norm": 0.37221208214759827, + "learning_rate": 1.1343433181905466e-05, + "loss": 0.5027, + "step": 33351 + }, + { + "epoch": 0.915760571114772, + "grad_norm": 0.4566466510295868, + "learning_rate": 1.1343005202423534e-05, + "loss": 0.4878, + "step": 33352 + }, + { + "epoch": 0.9157880285557386, + "grad_norm": 0.40486958622932434, + "learning_rate": 1.1342577220436468e-05, + "loss": 0.4784, + "step": 33353 + }, + { + "epoch": 0.9158154859967051, + "grad_norm": 0.41305986046791077, + "learning_rate": 1.1342149235945068e-05, + "loss": 0.4748, + "step": 33354 + }, + { + "epoch": 0.9158429434376716, + "grad_norm": 0.32712265849113464, + "learning_rate": 1.134172124895013e-05, + "loss": 0.4158, + "step": 33355 + }, + { + "epoch": 0.9158704008786381, + "grad_norm": 0.4024880528450012, + "learning_rate": 1.1341293259452455e-05, + "loss": 0.4429, + "step": 33356 + }, + { + "epoch": 0.9158978583196046, + "grad_norm": 0.5985063314437866, + "learning_rate": 1.1340865267452841e-05, + "loss": 0.5642, + "step": 33357 + }, + { + "epoch": 0.9159253157605711, + "grad_norm": 0.43173718452453613, + "learning_rate": 1.1340437272952083e-05, + "loss": 0.5793, + "step": 33358 + }, + { + "epoch": 0.9159527732015376, + "grad_norm": 0.4314613938331604, + "learning_rate": 1.1340009275950982e-05, + "loss": 0.574, + "step": 33359 + }, + { + "epoch": 0.9159802306425041, + "grad_norm": 0.4476182162761688, + "learning_rate": 1.133958127645034e-05, + "loss": 0.4807, + "step": 33360 + }, + { + "epoch": 0.9160076880834707, + "grad_norm": 0.3915746808052063, + "learning_rate": 1.1339153274450945e-05, + "loss": 0.546, + "step": 33361 + }, + { + "epoch": 0.9160351455244371, + "grad_norm": 0.3582809865474701, + "learning_rate": 1.1338725269953605e-05, + "loss": 0.4877, + "step": 33362 + }, + { + "epoch": 0.9160626029654036, + "grad_norm": 0.43572449684143066, + "learning_rate": 1.1338297262959115e-05, + "loss": 0.4609, + "step": 33363 + }, + { + "epoch": 0.9160900604063701, + "grad_norm": 0.3788011968135834, + "learning_rate": 1.1337869253468274e-05, + "loss": 0.5154, + "step": 33364 + }, + { + "epoch": 0.9161175178473366, + "grad_norm": 0.31946220993995667, + "learning_rate": 1.133744124148188e-05, + "loss": 0.4591, + "step": 33365 + }, + { + "epoch": 0.9161449752883031, + "grad_norm": 0.4189729690551758, + "learning_rate": 1.1337013227000727e-05, + "loss": 0.5374, + "step": 33366 + }, + { + "epoch": 0.9161724327292696, + "grad_norm": 0.4191599488258362, + "learning_rate": 1.1336585210025624e-05, + "loss": 0.4532, + "step": 33367 + }, + { + "epoch": 0.9161998901702362, + "grad_norm": 0.3796944320201874, + "learning_rate": 1.1336157190557357e-05, + "loss": 0.4485, + "step": 33368 + }, + { + "epoch": 0.9162273476112026, + "grad_norm": 0.34947705268859863, + "learning_rate": 1.1335729168596736e-05, + "loss": 0.4781, + "step": 33369 + }, + { + "epoch": 0.9162548050521692, + "grad_norm": 0.3946007788181305, + "learning_rate": 1.1335301144144551e-05, + "loss": 0.4554, + "step": 33370 + }, + { + "epoch": 0.9162822624931356, + "grad_norm": 0.4053274393081665, + "learning_rate": 1.1334873117201601e-05, + "loss": 0.4609, + "step": 33371 + }, + { + "epoch": 0.9163097199341022, + "grad_norm": 0.39245977997779846, + "learning_rate": 1.1334445087768691e-05, + "loss": 0.4162, + "step": 33372 + }, + { + "epoch": 0.9163371773750686, + "grad_norm": 0.41766420006752014, + "learning_rate": 1.1334017055846614e-05, + "loss": 0.5275, + "step": 33373 + }, + { + "epoch": 0.9163646348160351, + "grad_norm": 0.417843759059906, + "learning_rate": 1.1333589021436167e-05, + "loss": 0.4748, + "step": 33374 + }, + { + "epoch": 0.9163920922570017, + "grad_norm": 0.3954229950904846, + "learning_rate": 1.1333160984538152e-05, + "loss": 0.4482, + "step": 33375 + }, + { + "epoch": 0.9164195496979681, + "grad_norm": 0.3804294466972351, + "learning_rate": 1.1332732945153366e-05, + "loss": 0.427, + "step": 33376 + }, + { + "epoch": 0.9164470071389347, + "grad_norm": 0.4269979000091553, + "learning_rate": 1.1332304903282609e-05, + "loss": 0.4944, + "step": 33377 + }, + { + "epoch": 0.9164744645799011, + "grad_norm": 0.43120691180229187, + "learning_rate": 1.1331876858926677e-05, + "loss": 0.5462, + "step": 33378 + }, + { + "epoch": 0.9165019220208677, + "grad_norm": 0.4055705666542053, + "learning_rate": 1.133144881208637e-05, + "loss": 0.498, + "step": 33379 + }, + { + "epoch": 0.9165293794618341, + "grad_norm": 0.3698061406612396, + "learning_rate": 1.1331020762762487e-05, + "loss": 0.4683, + "step": 33380 + }, + { + "epoch": 0.9165568369028007, + "grad_norm": 0.4034044146537781, + "learning_rate": 1.1330592710955823e-05, + "loss": 0.4935, + "step": 33381 + }, + { + "epoch": 0.9165842943437672, + "grad_norm": 0.5911890268325806, + "learning_rate": 1.133016465666718e-05, + "loss": 0.4225, + "step": 33382 + }, + { + "epoch": 0.9166117517847336, + "grad_norm": 0.35995280742645264, + "learning_rate": 1.1329736599897356e-05, + "loss": 0.4425, + "step": 33383 + }, + { + "epoch": 0.9166392092257002, + "grad_norm": 0.46822601556777954, + "learning_rate": 1.1329308540647148e-05, + "loss": 0.4848, + "step": 33384 + }, + { + "epoch": 0.9166666666666666, + "grad_norm": 0.44043850898742676, + "learning_rate": 1.1328880478917356e-05, + "loss": 0.4833, + "step": 33385 + }, + { + "epoch": 0.9166941241076332, + "grad_norm": 0.384834885597229, + "learning_rate": 1.1328452414708776e-05, + "loss": 0.5434, + "step": 33386 + }, + { + "epoch": 0.9167215815485996, + "grad_norm": 0.34816452860832214, + "learning_rate": 1.132802434802221e-05, + "loss": 0.4968, + "step": 33387 + }, + { + "epoch": 0.9167490389895662, + "grad_norm": 0.4711938500404358, + "learning_rate": 1.1327596278858458e-05, + "loss": 0.5106, + "step": 33388 + }, + { + "epoch": 0.9167764964305327, + "grad_norm": 0.3905113935470581, + "learning_rate": 1.132716820721831e-05, + "loss": 0.5243, + "step": 33389 + }, + { + "epoch": 0.9168039538714992, + "grad_norm": 0.5038570165634155, + "learning_rate": 1.1326740133102572e-05, + "loss": 0.5167, + "step": 33390 + }, + { + "epoch": 0.9168314113124657, + "grad_norm": 0.3778712749481201, + "learning_rate": 1.132631205651204e-05, + "loss": 0.4911, + "step": 33391 + }, + { + "epoch": 0.9168588687534321, + "grad_norm": 0.37944290041923523, + "learning_rate": 1.1325883977447511e-05, + "loss": 0.4005, + "step": 33392 + }, + { + "epoch": 0.9168863261943987, + "grad_norm": 0.39735177159309387, + "learning_rate": 1.1325455895909788e-05, + "loss": 0.5391, + "step": 33393 + }, + { + "epoch": 0.9169137836353651, + "grad_norm": 0.4089530110359192, + "learning_rate": 1.1325027811899665e-05, + "loss": 0.4677, + "step": 33394 + }, + { + "epoch": 0.9169412410763317, + "grad_norm": 0.36829906702041626, + "learning_rate": 1.1324599725417941e-05, + "loss": 0.4666, + "step": 33395 + }, + { + "epoch": 0.9169686985172982, + "grad_norm": 0.37514519691467285, + "learning_rate": 1.132417163646542e-05, + "loss": 0.4571, + "step": 33396 + }, + { + "epoch": 0.9169961559582647, + "grad_norm": 0.37303340435028076, + "learning_rate": 1.1323743545042892e-05, + "loss": 0.4497, + "step": 33397 + }, + { + "epoch": 0.9170236133992312, + "grad_norm": 0.40318402647972107, + "learning_rate": 1.1323315451151162e-05, + "loss": 0.4872, + "step": 33398 + }, + { + "epoch": 0.9170510708401977, + "grad_norm": 0.408075749874115, + "learning_rate": 1.1322887354791023e-05, + "loss": 0.4356, + "step": 33399 + }, + { + "epoch": 0.9170785282811642, + "grad_norm": 0.36907851696014404, + "learning_rate": 1.1322459255963284e-05, + "loss": 0.5218, + "step": 33400 + }, + { + "epoch": 0.9171059857221306, + "grad_norm": 0.4453272223472595, + "learning_rate": 1.132203115466873e-05, + "loss": 0.5288, + "step": 33401 + }, + { + "epoch": 0.9171334431630972, + "grad_norm": 0.41873544454574585, + "learning_rate": 1.1321603050908168e-05, + "loss": 0.4758, + "step": 33402 + }, + { + "epoch": 0.9171609006040637, + "grad_norm": 0.3583988845348358, + "learning_rate": 1.1321174944682393e-05, + "loss": 0.4866, + "step": 33403 + }, + { + "epoch": 0.9171883580450302, + "grad_norm": 0.39775407314300537, + "learning_rate": 1.1320746835992209e-05, + "loss": 0.4739, + "step": 33404 + }, + { + "epoch": 0.9172158154859967, + "grad_norm": 0.3745875358581543, + "learning_rate": 1.1320318724838406e-05, + "loss": 0.4637, + "step": 33405 + }, + { + "epoch": 0.9172432729269632, + "grad_norm": 0.45228666067123413, + "learning_rate": 1.1319890611221791e-05, + "loss": 0.5564, + "step": 33406 + }, + { + "epoch": 0.9172707303679297, + "grad_norm": 0.42812275886535645, + "learning_rate": 1.1319462495143157e-05, + "loss": 0.5017, + "step": 33407 + }, + { + "epoch": 0.9172981878088962, + "grad_norm": 0.3478068709373474, + "learning_rate": 1.1319034376603304e-05, + "loss": 0.5026, + "step": 33408 + }, + { + "epoch": 0.9173256452498627, + "grad_norm": 0.3622744083404541, + "learning_rate": 1.131860625560303e-05, + "loss": 0.4602, + "step": 33409 + }, + { + "epoch": 0.9173531026908293, + "grad_norm": 0.3829619586467743, + "learning_rate": 1.1318178132143136e-05, + "loss": 0.5416, + "step": 33410 + }, + { + "epoch": 0.9173805601317957, + "grad_norm": 0.5032811760902405, + "learning_rate": 1.131775000622442e-05, + "loss": 0.4844, + "step": 33411 + }, + { + "epoch": 0.9174080175727622, + "grad_norm": 0.4044209420681, + "learning_rate": 1.1317321877847677e-05, + "loss": 0.4953, + "step": 33412 + }, + { + "epoch": 0.9174354750137287, + "grad_norm": 0.36343812942504883, + "learning_rate": 1.131689374701371e-05, + "loss": 0.492, + "step": 33413 + }, + { + "epoch": 0.9174629324546952, + "grad_norm": 0.39200693368911743, + "learning_rate": 1.1316465613723318e-05, + "loss": 0.535, + "step": 33414 + }, + { + "epoch": 0.9174903898956617, + "grad_norm": 0.4196426570415497, + "learning_rate": 1.1316037477977293e-05, + "loss": 0.4337, + "step": 33415 + }, + { + "epoch": 0.9175178473366282, + "grad_norm": 0.3903834819793701, + "learning_rate": 1.1315609339776442e-05, + "loss": 0.4928, + "step": 33416 + }, + { + "epoch": 0.9175453047775948, + "grad_norm": 0.4253719449043274, + "learning_rate": 1.1315181199121555e-05, + "loss": 0.5044, + "step": 33417 + }, + { + "epoch": 0.9175727622185612, + "grad_norm": 0.3729749023914337, + "learning_rate": 1.1314753056013439e-05, + "loss": 0.452, + "step": 33418 + }, + { + "epoch": 0.9176002196595278, + "grad_norm": 0.3898993730545044, + "learning_rate": 1.131432491045289e-05, + "loss": 0.5351, + "step": 33419 + }, + { + "epoch": 0.9176276771004942, + "grad_norm": 0.3984692394733429, + "learning_rate": 1.1313896762440701e-05, + "loss": 0.5305, + "step": 33420 + }, + { + "epoch": 0.9176551345414607, + "grad_norm": 0.41409561038017273, + "learning_rate": 1.1313468611977678e-05, + "loss": 0.491, + "step": 33421 + }, + { + "epoch": 0.9176825919824272, + "grad_norm": 0.3614175319671631, + "learning_rate": 1.1313040459064614e-05, + "loss": 0.4762, + "step": 33422 + }, + { + "epoch": 0.9177100494233937, + "grad_norm": 0.4203396737575531, + "learning_rate": 1.1312612303702315e-05, + "loss": 0.4798, + "step": 33423 + }, + { + "epoch": 0.9177375068643603, + "grad_norm": 0.3898731768131256, + "learning_rate": 1.1312184145891575e-05, + "loss": 0.5486, + "step": 33424 + }, + { + "epoch": 0.9177649643053267, + "grad_norm": 0.39737266302108765, + "learning_rate": 1.131175598563319e-05, + "loss": 0.5299, + "step": 33425 + }, + { + "epoch": 0.9177924217462933, + "grad_norm": 0.3494638204574585, + "learning_rate": 1.1311327822927962e-05, + "loss": 0.4162, + "step": 33426 + }, + { + "epoch": 0.9178198791872597, + "grad_norm": 0.40365323424339294, + "learning_rate": 1.1310899657776689e-05, + "loss": 0.5354, + "step": 33427 + }, + { + "epoch": 0.9178473366282263, + "grad_norm": 0.42206481099128723, + "learning_rate": 1.131047149018017e-05, + "loss": 0.5162, + "step": 33428 + }, + { + "epoch": 0.9178747940691927, + "grad_norm": 0.41986334323883057, + "learning_rate": 1.1310043320139205e-05, + "loss": 0.4878, + "step": 33429 + }, + { + "epoch": 0.9179022515101593, + "grad_norm": 0.3610078692436218, + "learning_rate": 1.1309615147654589e-05, + "loss": 0.473, + "step": 33430 + }, + { + "epoch": 0.9179297089511258, + "grad_norm": 0.38934507966041565, + "learning_rate": 1.1309186972727124e-05, + "loss": 0.4662, + "step": 33431 + }, + { + "epoch": 0.9179571663920922, + "grad_norm": 0.40797901153564453, + "learning_rate": 1.1308758795357606e-05, + "loss": 0.5193, + "step": 33432 + }, + { + "epoch": 0.9179846238330588, + "grad_norm": 0.4180936813354492, + "learning_rate": 1.1308330615546837e-05, + "loss": 0.513, + "step": 33433 + }, + { + "epoch": 0.9180120812740252, + "grad_norm": 0.49750569462776184, + "learning_rate": 1.1307902433295614e-05, + "loss": 0.517, + "step": 33434 + }, + { + "epoch": 0.9180395387149918, + "grad_norm": 0.3997799754142761, + "learning_rate": 1.1307474248604733e-05, + "loss": 0.5241, + "step": 33435 + }, + { + "epoch": 0.9180669961559582, + "grad_norm": 0.372850626707077, + "learning_rate": 1.1307046061474998e-05, + "loss": 0.4812, + "step": 33436 + }, + { + "epoch": 0.9180944535969248, + "grad_norm": 0.4008029103279114, + "learning_rate": 1.1306617871907202e-05, + "loss": 0.5255, + "step": 33437 + }, + { + "epoch": 0.9181219110378913, + "grad_norm": 0.459622859954834, + "learning_rate": 1.130618967990215e-05, + "loss": 0.547, + "step": 33438 + }, + { + "epoch": 0.9181493684788578, + "grad_norm": 0.3851218521595001, + "learning_rate": 1.1305761485460633e-05, + "loss": 0.4473, + "step": 33439 + }, + { + "epoch": 0.9181768259198243, + "grad_norm": 0.41296154260635376, + "learning_rate": 1.1305333288583458e-05, + "loss": 0.4465, + "step": 33440 + }, + { + "epoch": 0.9182042833607907, + "grad_norm": 0.41113805770874023, + "learning_rate": 1.1304905089271419e-05, + "loss": 0.5198, + "step": 33441 + }, + { + "epoch": 0.9182317408017573, + "grad_norm": 0.39154544472694397, + "learning_rate": 1.1304476887525313e-05, + "loss": 0.4781, + "step": 33442 + }, + { + "epoch": 0.9182591982427237, + "grad_norm": 0.38461583852767944, + "learning_rate": 1.1304048683345945e-05, + "loss": 0.4653, + "step": 33443 + }, + { + "epoch": 0.9182866556836903, + "grad_norm": 0.3371267020702362, + "learning_rate": 1.1303620476734108e-05, + "loss": 0.5302, + "step": 33444 + }, + { + "epoch": 0.9183141131246568, + "grad_norm": 0.35716357827186584, + "learning_rate": 1.1303192267690604e-05, + "loss": 0.4777, + "step": 33445 + }, + { + "epoch": 0.9183415705656233, + "grad_norm": 0.43677666783332825, + "learning_rate": 1.130276405621623e-05, + "loss": 0.4496, + "step": 33446 + }, + { + "epoch": 0.9183690280065898, + "grad_norm": 0.5293024778366089, + "learning_rate": 1.1302335842311783e-05, + "loss": 0.5157, + "step": 33447 + }, + { + "epoch": 0.9183964854475563, + "grad_norm": 0.47139036655426025, + "learning_rate": 1.1301907625978067e-05, + "loss": 0.5269, + "step": 33448 + }, + { + "epoch": 0.9184239428885228, + "grad_norm": 0.46659836173057556, + "learning_rate": 1.1301479407215874e-05, + "loss": 0.5411, + "step": 33449 + }, + { + "epoch": 0.9184514003294892, + "grad_norm": 0.3640977442264557, + "learning_rate": 1.1301051186026011e-05, + "loss": 0.4557, + "step": 33450 + }, + { + "epoch": 0.9184788577704558, + "grad_norm": 0.37758591771125793, + "learning_rate": 1.1300622962409272e-05, + "loss": 0.5494, + "step": 33451 + }, + { + "epoch": 0.9185063152114223, + "grad_norm": 0.41483551263809204, + "learning_rate": 1.1300194736366452e-05, + "loss": 0.4968, + "step": 33452 + }, + { + "epoch": 0.9185337726523888, + "grad_norm": 0.38981249928474426, + "learning_rate": 1.1299766507898358e-05, + "loss": 0.4954, + "step": 33453 + }, + { + "epoch": 0.9185612300933553, + "grad_norm": 0.3855620324611664, + "learning_rate": 1.129933827700578e-05, + "loss": 0.5208, + "step": 33454 + }, + { + "epoch": 0.9185886875343218, + "grad_norm": 0.4112050235271454, + "learning_rate": 1.1298910043689526e-05, + "loss": 0.4459, + "step": 33455 + }, + { + "epoch": 0.9186161449752883, + "grad_norm": 0.5465002655982971, + "learning_rate": 1.1298481807950386e-05, + "loss": 0.5222, + "step": 33456 + }, + { + "epoch": 0.9186436024162548, + "grad_norm": 0.413661390542984, + "learning_rate": 1.1298053569789166e-05, + "loss": 0.4244, + "step": 33457 + }, + { + "epoch": 0.9186710598572213, + "grad_norm": 0.4459654986858368, + "learning_rate": 1.1297625329206663e-05, + "loss": 0.4601, + "step": 33458 + }, + { + "epoch": 0.9186985172981879, + "grad_norm": 0.40080907940864563, + "learning_rate": 1.129719708620367e-05, + "loss": 0.5517, + "step": 33459 + }, + { + "epoch": 0.9187259747391543, + "grad_norm": 0.38507068157196045, + "learning_rate": 1.1296768840780996e-05, + "loss": 0.5813, + "step": 33460 + }, + { + "epoch": 0.9187534321801208, + "grad_norm": 0.3660943806171417, + "learning_rate": 1.129634059293943e-05, + "loss": 0.4672, + "step": 33461 + }, + { + "epoch": 0.9187808896210873, + "grad_norm": 0.4039769172668457, + "learning_rate": 1.1295912342679778e-05, + "loss": 0.5615, + "step": 33462 + }, + { + "epoch": 0.9188083470620538, + "grad_norm": 0.4475967586040497, + "learning_rate": 1.1295484090002834e-05, + "loss": 0.5334, + "step": 33463 + }, + { + "epoch": 0.9188358045030203, + "grad_norm": 0.4221562445163727, + "learning_rate": 1.1295055834909398e-05, + "loss": 0.5382, + "step": 33464 + }, + { + "epoch": 0.9188632619439868, + "grad_norm": 0.46099525690078735, + "learning_rate": 1.1294627577400272e-05, + "loss": 0.5086, + "step": 33465 + }, + { + "epoch": 0.9188907193849534, + "grad_norm": 0.40127599239349365, + "learning_rate": 1.129419931747625e-05, + "loss": 0.4864, + "step": 33466 + }, + { + "epoch": 0.9189181768259198, + "grad_norm": 0.3171643316745758, + "learning_rate": 1.1293771055138136e-05, + "loss": 0.3604, + "step": 33467 + }, + { + "epoch": 0.9189456342668864, + "grad_norm": 0.40399375557899475, + "learning_rate": 1.1293342790386723e-05, + "loss": 0.5067, + "step": 33468 + }, + { + "epoch": 0.9189730917078528, + "grad_norm": 0.4128081202507019, + "learning_rate": 1.1292914523222814e-05, + "loss": 0.5054, + "step": 33469 + }, + { + "epoch": 0.9190005491488193, + "grad_norm": 0.36236825585365295, + "learning_rate": 1.1292486253647208e-05, + "loss": 0.4375, + "step": 33470 + }, + { + "epoch": 0.9190280065897858, + "grad_norm": 0.3873863220214844, + "learning_rate": 1.1292057981660702e-05, + "loss": 0.4811, + "step": 33471 + }, + { + "epoch": 0.9190554640307523, + "grad_norm": 0.42211177945137024, + "learning_rate": 1.1291629707264093e-05, + "loss": 0.5194, + "step": 33472 + }, + { + "epoch": 0.9190829214717189, + "grad_norm": 0.4072783589363098, + "learning_rate": 1.1291201430458187e-05, + "loss": 0.6063, + "step": 33473 + }, + { + "epoch": 0.9191103789126853, + "grad_norm": 0.34950846433639526, + "learning_rate": 1.1290773151243775e-05, + "loss": 0.482, + "step": 33474 + }, + { + "epoch": 0.9191378363536519, + "grad_norm": 0.38999101519584656, + "learning_rate": 1.1290344869621659e-05, + "loss": 0.4876, + "step": 33475 + }, + { + "epoch": 0.9191652937946183, + "grad_norm": 0.38662028312683105, + "learning_rate": 1.1289916585592639e-05, + "loss": 0.4591, + "step": 33476 + }, + { + "epoch": 0.9191927512355849, + "grad_norm": 0.359781414270401, + "learning_rate": 1.1289488299157512e-05, + "loss": 0.4328, + "step": 33477 + }, + { + "epoch": 0.9192202086765513, + "grad_norm": 0.39228641986846924, + "learning_rate": 1.128906001031708e-05, + "loss": 0.496, + "step": 33478 + }, + { + "epoch": 0.9192476661175178, + "grad_norm": 0.4122947156429291, + "learning_rate": 1.1288631719072136e-05, + "loss": 0.5148, + "step": 33479 + }, + { + "epoch": 0.9192751235584844, + "grad_norm": 0.4801008999347687, + "learning_rate": 1.1288203425423487e-05, + "loss": 0.5098, + "step": 33480 + }, + { + "epoch": 0.9193025809994508, + "grad_norm": 0.3764784038066864, + "learning_rate": 1.1287775129371925e-05, + "loss": 0.4949, + "step": 33481 + }, + { + "epoch": 0.9193300384404174, + "grad_norm": 0.4128127694129944, + "learning_rate": 1.128734683091825e-05, + "loss": 0.5328, + "step": 33482 + }, + { + "epoch": 0.9193574958813838, + "grad_norm": 0.4790257215499878, + "learning_rate": 1.1286918530063264e-05, + "loss": 0.4603, + "step": 33483 + }, + { + "epoch": 0.9193849533223504, + "grad_norm": 0.4688652455806732, + "learning_rate": 1.1286490226807766e-05, + "loss": 0.5365, + "step": 33484 + }, + { + "epoch": 0.9194124107633168, + "grad_norm": 0.39019814133644104, + "learning_rate": 1.1286061921152549e-05, + "loss": 0.414, + "step": 33485 + }, + { + "epoch": 0.9194398682042834, + "grad_norm": 0.36712315678596497, + "learning_rate": 1.1285633613098418e-05, + "loss": 0.4494, + "step": 33486 + }, + { + "epoch": 0.9194673256452499, + "grad_norm": 0.4300134778022766, + "learning_rate": 1.1285205302646172e-05, + "loss": 0.5066, + "step": 33487 + }, + { + "epoch": 0.9194947830862163, + "grad_norm": 0.34062299132347107, + "learning_rate": 1.1284776989796607e-05, + "loss": 0.3781, + "step": 33488 + }, + { + "epoch": 0.9195222405271829, + "grad_norm": 0.4115528166294098, + "learning_rate": 1.1284348674550523e-05, + "loss": 0.5562, + "step": 33489 + }, + { + "epoch": 0.9195496979681493, + "grad_norm": 0.37328290939331055, + "learning_rate": 1.1283920356908715e-05, + "loss": 0.4737, + "step": 33490 + }, + { + "epoch": 0.9195771554091159, + "grad_norm": 0.3767518699169159, + "learning_rate": 1.128349203687199e-05, + "loss": 0.4985, + "step": 33491 + }, + { + "epoch": 0.9196046128500823, + "grad_norm": 0.4659891128540039, + "learning_rate": 1.128306371444114e-05, + "loss": 0.5271, + "step": 33492 + }, + { + "epoch": 0.9196320702910489, + "grad_norm": 0.427915096282959, + "learning_rate": 1.1282635389616967e-05, + "loss": 0.5196, + "step": 33493 + }, + { + "epoch": 0.9196595277320154, + "grad_norm": 0.409264475107193, + "learning_rate": 1.1282207062400273e-05, + "loss": 0.5095, + "step": 33494 + }, + { + "epoch": 0.9196869851729819, + "grad_norm": 0.36344513297080994, + "learning_rate": 1.1281778732791848e-05, + "loss": 0.4092, + "step": 33495 + }, + { + "epoch": 0.9197144426139484, + "grad_norm": 0.3781796395778656, + "learning_rate": 1.12813504007925e-05, + "loss": 0.468, + "step": 33496 + }, + { + "epoch": 0.9197419000549149, + "grad_norm": 0.3973129093647003, + "learning_rate": 1.1280922066403023e-05, + "loss": 0.5297, + "step": 33497 + }, + { + "epoch": 0.9197693574958814, + "grad_norm": 0.35214295983314514, + "learning_rate": 1.1280493729624222e-05, + "loss": 0.4751, + "step": 33498 + }, + { + "epoch": 0.9197968149368478, + "grad_norm": 0.349069744348526, + "learning_rate": 1.128006539045689e-05, + "loss": 0.4378, + "step": 33499 + }, + { + "epoch": 0.9198242723778144, + "grad_norm": 0.382144033908844, + "learning_rate": 1.1279637048901822e-05, + "loss": 0.4571, + "step": 33500 + }, + { + "epoch": 0.9198517298187809, + "grad_norm": 0.46528345346450806, + "learning_rate": 1.1279208704959827e-05, + "loss": 0.5267, + "step": 33501 + }, + { + "epoch": 0.9198791872597474, + "grad_norm": 0.37248528003692627, + "learning_rate": 1.12787803586317e-05, + "loss": 0.5013, + "step": 33502 + }, + { + "epoch": 0.9199066447007139, + "grad_norm": 0.41320520639419556, + "learning_rate": 1.1278352009918236e-05, + "loss": 0.5511, + "step": 33503 + }, + { + "epoch": 0.9199341021416804, + "grad_norm": 0.38980379700660706, + "learning_rate": 1.1277923658820243e-05, + "loss": 0.52, + "step": 33504 + }, + { + "epoch": 0.9199615595826469, + "grad_norm": 0.40667998790740967, + "learning_rate": 1.1277495305338508e-05, + "loss": 0.4823, + "step": 33505 + }, + { + "epoch": 0.9199890170236134, + "grad_norm": 0.3956132233142853, + "learning_rate": 1.127706694947384e-05, + "loss": 0.5076, + "step": 33506 + }, + { + "epoch": 0.9200164744645799, + "grad_norm": 0.4420919120311737, + "learning_rate": 1.1276638591227036e-05, + "loss": 0.4476, + "step": 33507 + }, + { + "epoch": 0.9200439319055465, + "grad_norm": 0.42444223165512085, + "learning_rate": 1.1276210230598892e-05, + "loss": 0.483, + "step": 33508 + }, + { + "epoch": 0.9200713893465129, + "grad_norm": 0.3794093430042267, + "learning_rate": 1.1275781867590208e-05, + "loss": 0.5105, + "step": 33509 + }, + { + "epoch": 0.9200988467874794, + "grad_norm": 0.3961133658885956, + "learning_rate": 1.1275353502201782e-05, + "loss": 0.5031, + "step": 33510 + }, + { + "epoch": 0.9201263042284459, + "grad_norm": 0.406255304813385, + "learning_rate": 1.1274925134434418e-05, + "loss": 0.4303, + "step": 33511 + }, + { + "epoch": 0.9201537616694124, + "grad_norm": 0.3567756116390228, + "learning_rate": 1.1274496764288912e-05, + "loss": 0.4447, + "step": 33512 + }, + { + "epoch": 0.9201812191103789, + "grad_norm": 0.38023263216018677, + "learning_rate": 1.127406839176606e-05, + "loss": 0.4664, + "step": 33513 + }, + { + "epoch": 0.9202086765513454, + "grad_norm": 0.39149847626686096, + "learning_rate": 1.1273640016866665e-05, + "loss": 0.4465, + "step": 33514 + }, + { + "epoch": 0.920236133992312, + "grad_norm": 0.4221875071525574, + "learning_rate": 1.1273211639591527e-05, + "loss": 0.4736, + "step": 33515 + }, + { + "epoch": 0.9202635914332784, + "grad_norm": 0.38551434874534607, + "learning_rate": 1.127278325994144e-05, + "loss": 0.4871, + "step": 33516 + }, + { + "epoch": 0.920291048874245, + "grad_norm": 0.39533379673957825, + "learning_rate": 1.1272354877917207e-05, + "loss": 0.5403, + "step": 33517 + }, + { + "epoch": 0.9203185063152114, + "grad_norm": 0.39006808400154114, + "learning_rate": 1.1271926493519624e-05, + "loss": 0.552, + "step": 33518 + }, + { + "epoch": 0.9203459637561779, + "grad_norm": 0.4117191433906555, + "learning_rate": 1.1271498106749497e-05, + "loss": 0.4498, + "step": 33519 + }, + { + "epoch": 0.9203734211971444, + "grad_norm": 0.4306970536708832, + "learning_rate": 1.1271069717607617e-05, + "loss": 0.5553, + "step": 33520 + }, + { + "epoch": 0.9204008786381109, + "grad_norm": 0.3882336914539337, + "learning_rate": 1.1270641326094785e-05, + "loss": 0.4652, + "step": 33521 + }, + { + "epoch": 0.9204283360790775, + "grad_norm": 0.4410727620124817, + "learning_rate": 1.1270212932211804e-05, + "loss": 0.4949, + "step": 33522 + }, + { + "epoch": 0.9204557935200439, + "grad_norm": 0.366113543510437, + "learning_rate": 1.1269784535959467e-05, + "loss": 0.5247, + "step": 33523 + }, + { + "epoch": 0.9204832509610105, + "grad_norm": 0.4300064742565155, + "learning_rate": 1.126935613733858e-05, + "loss": 0.546, + "step": 33524 + }, + { + "epoch": 0.9205107084019769, + "grad_norm": 0.4883978068828583, + "learning_rate": 1.126892773634994e-05, + "loss": 0.5061, + "step": 33525 + }, + { + "epoch": 0.9205381658429435, + "grad_norm": 1.0996953248977661, + "learning_rate": 1.126849933299434e-05, + "loss": 0.5013, + "step": 33526 + }, + { + "epoch": 0.9205656232839099, + "grad_norm": 0.36610347032546997, + "learning_rate": 1.1268070927272588e-05, + "loss": 0.5125, + "step": 33527 + }, + { + "epoch": 0.9205930807248764, + "grad_norm": 0.4561777412891388, + "learning_rate": 1.1267642519185476e-05, + "loss": 0.5618, + "step": 33528 + }, + { + "epoch": 0.920620538165843, + "grad_norm": 0.3861973285675049, + "learning_rate": 1.1267214108733809e-05, + "loss": 0.5092, + "step": 33529 + }, + { + "epoch": 0.9206479956068094, + "grad_norm": 0.3433125913143158, + "learning_rate": 1.126678569591838e-05, + "loss": 0.4671, + "step": 33530 + }, + { + "epoch": 0.920675453047776, + "grad_norm": 0.4042072594165802, + "learning_rate": 1.1266357280739992e-05, + "loss": 0.5009, + "step": 33531 + }, + { + "epoch": 0.9207029104887424, + "grad_norm": 0.4350067973136902, + "learning_rate": 1.1265928863199447e-05, + "loss": 0.4858, + "step": 33532 + }, + { + "epoch": 0.920730367929709, + "grad_norm": 0.49120238423347473, + "learning_rate": 1.126550044329754e-05, + "loss": 0.4963, + "step": 33533 + }, + { + "epoch": 0.9207578253706754, + "grad_norm": 0.36439359188079834, + "learning_rate": 1.1265072021035065e-05, + "loss": 0.5197, + "step": 33534 + }, + { + "epoch": 0.920785282811642, + "grad_norm": 0.3887249231338501, + "learning_rate": 1.1264643596412834e-05, + "loss": 0.4786, + "step": 33535 + }, + { + "epoch": 0.9208127402526085, + "grad_norm": 0.39717593789100647, + "learning_rate": 1.1264215169431634e-05, + "loss": 0.5339, + "step": 33536 + }, + { + "epoch": 0.920840197693575, + "grad_norm": 0.3658789396286011, + "learning_rate": 1.126378674009227e-05, + "loss": 0.5187, + "step": 33537 + }, + { + "epoch": 0.9208676551345415, + "grad_norm": 0.3783651888370514, + "learning_rate": 1.1263358308395544e-05, + "loss": 0.5213, + "step": 33538 + }, + { + "epoch": 0.9208951125755079, + "grad_norm": 0.3881324529647827, + "learning_rate": 1.1262929874342248e-05, + "loss": 0.4964, + "step": 33539 + }, + { + "epoch": 0.9209225700164745, + "grad_norm": 0.5824635624885559, + "learning_rate": 1.1262501437933187e-05, + "loss": 0.4346, + "step": 33540 + }, + { + "epoch": 0.9209500274574409, + "grad_norm": 0.41080838441848755, + "learning_rate": 1.1262072999169155e-05, + "loss": 0.5333, + "step": 33541 + }, + { + "epoch": 0.9209774848984075, + "grad_norm": 0.43444502353668213, + "learning_rate": 1.1261644558050957e-05, + "loss": 0.6005, + "step": 33542 + }, + { + "epoch": 0.921004942339374, + "grad_norm": 0.3883773386478424, + "learning_rate": 1.1261216114579387e-05, + "loss": 0.5352, + "step": 33543 + }, + { + "epoch": 0.9210323997803405, + "grad_norm": 0.4077460467815399, + "learning_rate": 1.1260787668755246e-05, + "loss": 0.5099, + "step": 33544 + }, + { + "epoch": 0.921059857221307, + "grad_norm": 0.37841638922691345, + "learning_rate": 1.1260359220579337e-05, + "loss": 0.4623, + "step": 33545 + }, + { + "epoch": 0.9210873146622734, + "grad_norm": 0.3361000716686249, + "learning_rate": 1.1259930770052454e-05, + "loss": 0.3612, + "step": 33546 + }, + { + "epoch": 0.92111477210324, + "grad_norm": 0.4301963150501251, + "learning_rate": 1.1259502317175397e-05, + "loss": 0.6004, + "step": 33547 + }, + { + "epoch": 0.9211422295442064, + "grad_norm": 0.39785677194595337, + "learning_rate": 1.1259073861948969e-05, + "loss": 0.5126, + "step": 33548 + }, + { + "epoch": 0.921169686985173, + "grad_norm": 0.39541199803352356, + "learning_rate": 1.1258645404373962e-05, + "loss": 0.4653, + "step": 33549 + }, + { + "epoch": 0.9211971444261395, + "grad_norm": 0.4626021683216095, + "learning_rate": 1.1258216944451183e-05, + "loss": 0.5293, + "step": 33550 + }, + { + "epoch": 0.921224601867106, + "grad_norm": 0.4504675567150116, + "learning_rate": 1.1257788482181426e-05, + "loss": 0.3701, + "step": 33551 + }, + { + "epoch": 0.9212520593080725, + "grad_norm": 0.3415599763393402, + "learning_rate": 1.1257360017565494e-05, + "loss": 0.4642, + "step": 33552 + }, + { + "epoch": 0.921279516749039, + "grad_norm": 0.34440863132476807, + "learning_rate": 1.1256931550604183e-05, + "loss": 0.3549, + "step": 33553 + }, + { + "epoch": 0.9213069741900055, + "grad_norm": 0.42272457480430603, + "learning_rate": 1.1256503081298291e-05, + "loss": 0.5225, + "step": 33554 + }, + { + "epoch": 0.921334431630972, + "grad_norm": 0.37755006551742554, + "learning_rate": 1.1256074609648624e-05, + "loss": 0.4905, + "step": 33555 + }, + { + "epoch": 0.9213618890719385, + "grad_norm": 0.4045681357383728, + "learning_rate": 1.1255646135655976e-05, + "loss": 0.5122, + "step": 33556 + }, + { + "epoch": 0.921389346512905, + "grad_norm": 0.39848488569259644, + "learning_rate": 1.1255217659321144e-05, + "loss": 0.4962, + "step": 33557 + }, + { + "epoch": 0.9214168039538715, + "grad_norm": 0.40182968974113464, + "learning_rate": 1.1254789180644935e-05, + "loss": 0.47, + "step": 33558 + }, + { + "epoch": 0.921444261394838, + "grad_norm": 0.3683414161205292, + "learning_rate": 1.125436069962814e-05, + "loss": 0.4621, + "step": 33559 + }, + { + "epoch": 0.9214717188358045, + "grad_norm": 0.34530675411224365, + "learning_rate": 1.1253932216271565e-05, + "loss": 0.5285, + "step": 33560 + }, + { + "epoch": 0.921499176276771, + "grad_norm": 0.4015193283557892, + "learning_rate": 1.1253503730576005e-05, + "loss": 0.4516, + "step": 33561 + }, + { + "epoch": 0.9215266337177375, + "grad_norm": 0.4156322479248047, + "learning_rate": 1.1253075242542259e-05, + "loss": 0.4732, + "step": 33562 + }, + { + "epoch": 0.921554091158704, + "grad_norm": 0.38956162333488464, + "learning_rate": 1.1252646752171131e-05, + "loss": 0.474, + "step": 33563 + }, + { + "epoch": 0.9215815485996706, + "grad_norm": 0.40862521529197693, + "learning_rate": 1.1252218259463415e-05, + "loss": 0.4503, + "step": 33564 + }, + { + "epoch": 0.921609006040637, + "grad_norm": 0.40014010667800903, + "learning_rate": 1.125178976441991e-05, + "loss": 0.5492, + "step": 33565 + }, + { + "epoch": 0.9216364634816036, + "grad_norm": 0.416425496339798, + "learning_rate": 1.1251361267041421e-05, + "loss": 0.513, + "step": 33566 + }, + { + "epoch": 0.92166392092257, + "grad_norm": 0.4537571370601654, + "learning_rate": 1.1250932767328741e-05, + "loss": 0.5258, + "step": 33567 + }, + { + "epoch": 0.9216913783635365, + "grad_norm": 0.40234842896461487, + "learning_rate": 1.1250504265282675e-05, + "loss": 0.3747, + "step": 33568 + }, + { + "epoch": 0.921718835804503, + "grad_norm": 0.4015073776245117, + "learning_rate": 1.125007576090402e-05, + "loss": 0.4262, + "step": 33569 + }, + { + "epoch": 0.9217462932454695, + "grad_norm": 0.40984046459198, + "learning_rate": 1.1249647254193573e-05, + "loss": 0.5113, + "step": 33570 + }, + { + "epoch": 0.9217737506864361, + "grad_norm": 0.48574188351631165, + "learning_rate": 1.1249218745152134e-05, + "loss": 0.606, + "step": 33571 + }, + { + "epoch": 0.9218012081274025, + "grad_norm": 0.39738237857818604, + "learning_rate": 1.1248790233780503e-05, + "loss": 0.4859, + "step": 33572 + }, + { + "epoch": 0.9218286655683691, + "grad_norm": 0.39110249280929565, + "learning_rate": 1.1248361720079482e-05, + "loss": 0.5017, + "step": 33573 + }, + { + "epoch": 0.9218561230093355, + "grad_norm": 5.463809013366699, + "learning_rate": 1.1247933204049867e-05, + "loss": 0.3788, + "step": 33574 + }, + { + "epoch": 0.921883580450302, + "grad_norm": 0.3050614297389984, + "learning_rate": 1.1247504685692459e-05, + "loss": 0.4238, + "step": 33575 + }, + { + "epoch": 0.9219110378912685, + "grad_norm": 0.3856457769870758, + "learning_rate": 1.1247076165008055e-05, + "loss": 0.3857, + "step": 33576 + }, + { + "epoch": 0.921938495332235, + "grad_norm": 0.41746076941490173, + "learning_rate": 1.1246647641997457e-05, + "loss": 0.5095, + "step": 33577 + }, + { + "epoch": 0.9219659527732016, + "grad_norm": 0.38316890597343445, + "learning_rate": 1.1246219116661463e-05, + "loss": 0.4982, + "step": 33578 + }, + { + "epoch": 0.921993410214168, + "grad_norm": 0.3629477918148041, + "learning_rate": 1.1245790589000873e-05, + "loss": 0.4627, + "step": 33579 + }, + { + "epoch": 0.9220208676551346, + "grad_norm": 0.38064074516296387, + "learning_rate": 1.1245362059016485e-05, + "loss": 0.4444, + "step": 33580 + }, + { + "epoch": 0.922048325096101, + "grad_norm": 0.3856874704360962, + "learning_rate": 1.1244933526709102e-05, + "loss": 0.4453, + "step": 33581 + }, + { + "epoch": 0.9220757825370676, + "grad_norm": 0.5371312499046326, + "learning_rate": 1.1244504992079516e-05, + "loss": 0.5265, + "step": 33582 + }, + { + "epoch": 0.922103239978034, + "grad_norm": 0.3926154375076294, + "learning_rate": 1.1244076455128535e-05, + "loss": 0.5041, + "step": 33583 + }, + { + "epoch": 0.9221306974190006, + "grad_norm": 0.40004971623420715, + "learning_rate": 1.1243647915856953e-05, + "loss": 0.5302, + "step": 33584 + }, + { + "epoch": 0.9221581548599671, + "grad_norm": 0.40102419257164, + "learning_rate": 1.1243219374265568e-05, + "loss": 0.5301, + "step": 33585 + }, + { + "epoch": 0.9221856123009335, + "grad_norm": 0.3785547912120819, + "learning_rate": 1.1242790830355186e-05, + "loss": 0.5245, + "step": 33586 + }, + { + "epoch": 0.9222130697419001, + "grad_norm": 0.4194906949996948, + "learning_rate": 1.1242362284126603e-05, + "loss": 0.5051, + "step": 33587 + }, + { + "epoch": 0.9222405271828665, + "grad_norm": 0.4297010898590088, + "learning_rate": 1.1241933735580613e-05, + "loss": 0.4791, + "step": 33588 + }, + { + "epoch": 0.9222679846238331, + "grad_norm": 0.3865632712841034, + "learning_rate": 1.1241505184718024e-05, + "loss": 0.4313, + "step": 33589 + }, + { + "epoch": 0.9222954420647995, + "grad_norm": 0.38926079869270325, + "learning_rate": 1.124107663153963e-05, + "loss": 0.6159, + "step": 33590 + }, + { + "epoch": 0.9223228995057661, + "grad_norm": 0.3743076026439667, + "learning_rate": 1.1240648076046232e-05, + "loss": 0.5165, + "step": 33591 + }, + { + "epoch": 0.9223503569467326, + "grad_norm": 0.3619472086429596, + "learning_rate": 1.1240219518238631e-05, + "loss": 0.4953, + "step": 33592 + }, + { + "epoch": 0.9223778143876991, + "grad_norm": 0.40082281827926636, + "learning_rate": 1.1239790958117622e-05, + "loss": 0.5064, + "step": 33593 + }, + { + "epoch": 0.9224052718286656, + "grad_norm": 0.44288012385368347, + "learning_rate": 1.123936239568401e-05, + "loss": 0.5128, + "step": 33594 + }, + { + "epoch": 0.922432729269632, + "grad_norm": 0.3568223714828491, + "learning_rate": 1.1238933830938591e-05, + "loss": 0.4861, + "step": 33595 + }, + { + "epoch": 0.9224601867105986, + "grad_norm": 0.40344366431236267, + "learning_rate": 1.1238505263882164e-05, + "loss": 0.4914, + "step": 33596 + }, + { + "epoch": 0.922487644151565, + "grad_norm": 0.35338926315307617, + "learning_rate": 1.1238076694515532e-05, + "loss": 0.4207, + "step": 33597 + }, + { + "epoch": 0.9225151015925316, + "grad_norm": 0.41320326924324036, + "learning_rate": 1.1237648122839488e-05, + "loss": 0.5127, + "step": 33598 + }, + { + "epoch": 0.9225425590334981, + "grad_norm": 0.38725799322128296, + "learning_rate": 1.1237219548854839e-05, + "loss": 0.5229, + "step": 33599 + }, + { + "epoch": 0.9225700164744646, + "grad_norm": 0.3916696012020111, + "learning_rate": 1.123679097256238e-05, + "loss": 0.4373, + "step": 33600 + }, + { + "epoch": 0.9225974739154311, + "grad_norm": 0.39490580558776855, + "learning_rate": 1.1236362393962907e-05, + "loss": 0.4639, + "step": 33601 + }, + { + "epoch": 0.9226249313563976, + "grad_norm": 0.5567611455917358, + "learning_rate": 1.1235933813057226e-05, + "loss": 0.5732, + "step": 33602 + }, + { + "epoch": 0.9226523887973641, + "grad_norm": 0.34825384616851807, + "learning_rate": 1.1235505229846134e-05, + "loss": 0.416, + "step": 33603 + }, + { + "epoch": 0.9226798462383305, + "grad_norm": 0.39929965138435364, + "learning_rate": 1.123507664433043e-05, + "loss": 0.5219, + "step": 33604 + }, + { + "epoch": 0.9227073036792971, + "grad_norm": 0.39863914251327515, + "learning_rate": 1.1234648056510917e-05, + "loss": 0.5215, + "step": 33605 + }, + { + "epoch": 0.9227347611202636, + "grad_norm": 0.4329266846179962, + "learning_rate": 1.1234219466388388e-05, + "loss": 0.3729, + "step": 33606 + }, + { + "epoch": 0.9227622185612301, + "grad_norm": 0.42079782485961914, + "learning_rate": 1.1233790873963647e-05, + "loss": 0.5132, + "step": 33607 + }, + { + "epoch": 0.9227896760021966, + "grad_norm": 0.4082830846309662, + "learning_rate": 1.123336227923749e-05, + "loss": 0.5151, + "step": 33608 + }, + { + "epoch": 0.9228171334431631, + "grad_norm": 0.4498313367366791, + "learning_rate": 1.1232933682210721e-05, + "loss": 0.549, + "step": 33609 + }, + { + "epoch": 0.9228445908841296, + "grad_norm": 0.4211365580558777, + "learning_rate": 1.1232505082884137e-05, + "loss": 0.5022, + "step": 33610 + }, + { + "epoch": 0.9228720483250961, + "grad_norm": 0.4276675283908844, + "learning_rate": 1.1232076481258537e-05, + "loss": 0.4958, + "step": 33611 + }, + { + "epoch": 0.9228995057660626, + "grad_norm": 0.47678443789482117, + "learning_rate": 1.1231647877334722e-05, + "loss": 0.4522, + "step": 33612 + }, + { + "epoch": 0.922926963207029, + "grad_norm": 0.3725253641605377, + "learning_rate": 1.123121927111349e-05, + "loss": 0.5073, + "step": 33613 + }, + { + "epoch": 0.9229544206479956, + "grad_norm": 0.374092161655426, + "learning_rate": 1.1230790662595642e-05, + "loss": 0.5128, + "step": 33614 + }, + { + "epoch": 0.9229818780889621, + "grad_norm": 0.37868228554725647, + "learning_rate": 1.1230362051781976e-05, + "loss": 0.5053, + "step": 33615 + }, + { + "epoch": 0.9230093355299286, + "grad_norm": 0.40199270844459534, + "learning_rate": 1.122993343867329e-05, + "loss": 0.5052, + "step": 33616 + }, + { + "epoch": 0.9230367929708951, + "grad_norm": 0.391030877828598, + "learning_rate": 1.1229504823270389e-05, + "loss": 0.5132, + "step": 33617 + }, + { + "epoch": 0.9230642504118616, + "grad_norm": 0.3649752736091614, + "learning_rate": 1.1229076205574066e-05, + "loss": 0.4619, + "step": 33618 + }, + { + "epoch": 0.9230917078528281, + "grad_norm": 0.3879531919956207, + "learning_rate": 1.1228647585585127e-05, + "loss": 0.4552, + "step": 33619 + }, + { + "epoch": 0.9231191652937946, + "grad_norm": 0.3533819317817688, + "learning_rate": 1.1228218963304365e-05, + "loss": 0.4025, + "step": 33620 + }, + { + "epoch": 0.9231466227347611, + "grad_norm": 0.38680699467658997, + "learning_rate": 1.1227790338732584e-05, + "loss": 0.5288, + "step": 33621 + }, + { + "epoch": 0.9231740801757277, + "grad_norm": 0.4144706428050995, + "learning_rate": 1.1227361711870584e-05, + "loss": 0.5602, + "step": 33622 + }, + { + "epoch": 0.9232015376166941, + "grad_norm": 0.36380189657211304, + "learning_rate": 1.1226933082719158e-05, + "loss": 0.4701, + "step": 33623 + }, + { + "epoch": 0.9232289950576607, + "grad_norm": 0.3585013747215271, + "learning_rate": 1.1226504451279114e-05, + "loss": 0.3908, + "step": 33624 + }, + { + "epoch": 0.9232564524986271, + "grad_norm": 0.36456671357154846, + "learning_rate": 1.1226075817551245e-05, + "loss": 0.4361, + "step": 33625 + }, + { + "epoch": 0.9232839099395936, + "grad_norm": 0.38512855768203735, + "learning_rate": 1.1225647181536357e-05, + "loss": 0.427, + "step": 33626 + }, + { + "epoch": 0.9233113673805601, + "grad_norm": 0.42477795481681824, + "learning_rate": 1.1225218543235243e-05, + "loss": 0.4212, + "step": 33627 + }, + { + "epoch": 0.9233388248215266, + "grad_norm": 0.3935146629810333, + "learning_rate": 1.1224789902648705e-05, + "loss": 0.5103, + "step": 33628 + }, + { + "epoch": 0.9233662822624932, + "grad_norm": 0.3841167390346527, + "learning_rate": 1.1224361259777545e-05, + "loss": 0.4974, + "step": 33629 + }, + { + "epoch": 0.9233937397034596, + "grad_norm": 0.5502886176109314, + "learning_rate": 1.1223932614622558e-05, + "loss": 0.4713, + "step": 33630 + }, + { + "epoch": 0.9234211971444262, + "grad_norm": 0.4251832664012909, + "learning_rate": 1.122350396718455e-05, + "loss": 0.5076, + "step": 33631 + }, + { + "epoch": 0.9234486545853926, + "grad_norm": 0.3963448703289032, + "learning_rate": 1.1223075317464316e-05, + "loss": 0.4181, + "step": 33632 + }, + { + "epoch": 0.9234761120263592, + "grad_norm": 0.3985423147678375, + "learning_rate": 1.122264666546265e-05, + "loss": 0.4625, + "step": 33633 + }, + { + "epoch": 0.9235035694673256, + "grad_norm": 0.3526234030723572, + "learning_rate": 1.1222218011180366e-05, + "loss": 0.3928, + "step": 33634 + }, + { + "epoch": 0.9235310269082921, + "grad_norm": 0.39430487155914307, + "learning_rate": 1.122178935461825e-05, + "loss": 0.5714, + "step": 33635 + }, + { + "epoch": 0.9235584843492587, + "grad_norm": 0.3298690617084503, + "learning_rate": 1.1221360695777108e-05, + "loss": 0.4407, + "step": 33636 + }, + { + "epoch": 0.9235859417902251, + "grad_norm": 0.45379438996315, + "learning_rate": 1.1220932034657736e-05, + "loss": 0.5513, + "step": 33637 + }, + { + "epoch": 0.9236133992311917, + "grad_norm": 0.385770320892334, + "learning_rate": 1.1220503371260943e-05, + "loss": 0.527, + "step": 33638 + }, + { + "epoch": 0.9236408566721581, + "grad_norm": 0.3812829852104187, + "learning_rate": 1.1220074705587519e-05, + "loss": 0.5202, + "step": 33639 + }, + { + "epoch": 0.9236683141131247, + "grad_norm": 0.3865065276622772, + "learning_rate": 1.121964603763826e-05, + "loss": 0.5284, + "step": 33640 + }, + { + "epoch": 0.9236957715540911, + "grad_norm": 0.4120789170265198, + "learning_rate": 1.1219217367413979e-05, + "loss": 0.4651, + "step": 33641 + }, + { + "epoch": 0.9237232289950577, + "grad_norm": 0.3702388405799866, + "learning_rate": 1.1218788694915466e-05, + "loss": 0.4325, + "step": 33642 + }, + { + "epoch": 0.9237506864360242, + "grad_norm": 0.46189385652542114, + "learning_rate": 1.1218360020143524e-05, + "loss": 0.4568, + "step": 33643 + }, + { + "epoch": 0.9237781438769906, + "grad_norm": 0.4478963017463684, + "learning_rate": 1.1217931343098952e-05, + "loss": 0.4875, + "step": 33644 + }, + { + "epoch": 0.9238056013179572, + "grad_norm": 0.4301338791847229, + "learning_rate": 1.1217502663782547e-05, + "loss": 0.4901, + "step": 33645 + }, + { + "epoch": 0.9238330587589236, + "grad_norm": 0.35821956396102905, + "learning_rate": 1.1217073982195112e-05, + "loss": 0.4788, + "step": 33646 + }, + { + "epoch": 0.9238605161998902, + "grad_norm": 0.3755975067615509, + "learning_rate": 1.1216645298337445e-05, + "loss": 0.5667, + "step": 33647 + }, + { + "epoch": 0.9238879736408566, + "grad_norm": 0.4175121784210205, + "learning_rate": 1.1216216612210349e-05, + "loss": 0.4209, + "step": 33648 + }, + { + "epoch": 0.9239154310818232, + "grad_norm": 0.3758852481842041, + "learning_rate": 1.1215787923814618e-05, + "loss": 0.4178, + "step": 33649 + }, + { + "epoch": 0.9239428885227897, + "grad_norm": 0.39671725034713745, + "learning_rate": 1.1215359233151056e-05, + "loss": 0.6035, + "step": 33650 + }, + { + "epoch": 0.9239703459637562, + "grad_norm": 0.4684582054615021, + "learning_rate": 1.1214930540220462e-05, + "loss": 0.4977, + "step": 33651 + }, + { + "epoch": 0.9239978034047227, + "grad_norm": 0.4517892301082611, + "learning_rate": 1.1214501845023631e-05, + "loss": 0.555, + "step": 33652 + }, + { + "epoch": 0.9240252608456891, + "grad_norm": 0.5156253576278687, + "learning_rate": 1.1214073147561369e-05, + "loss": 0.5429, + "step": 33653 + }, + { + "epoch": 0.9240527182866557, + "grad_norm": 0.46190550923347473, + "learning_rate": 1.1213644447834474e-05, + "loss": 0.5137, + "step": 33654 + }, + { + "epoch": 0.9240801757276221, + "grad_norm": 0.4488618075847626, + "learning_rate": 1.1213215745843743e-05, + "loss": 0.511, + "step": 33655 + }, + { + "epoch": 0.9241076331685887, + "grad_norm": 0.3764214515686035, + "learning_rate": 1.1212787041589978e-05, + "loss": 0.5374, + "step": 33656 + }, + { + "epoch": 0.9241350906095552, + "grad_norm": 0.40162402391433716, + "learning_rate": 1.1212358335073979e-05, + "loss": 0.4172, + "step": 33657 + }, + { + "epoch": 0.9241625480505217, + "grad_norm": 0.3412168622016907, + "learning_rate": 1.1211929626296542e-05, + "loss": 0.4621, + "step": 33658 + }, + { + "epoch": 0.9241900054914882, + "grad_norm": 0.38725045323371887, + "learning_rate": 1.1211500915258472e-05, + "loss": 0.4513, + "step": 33659 + }, + { + "epoch": 0.9242174629324547, + "grad_norm": 0.4163564443588257, + "learning_rate": 1.1211072201960565e-05, + "loss": 0.5021, + "step": 33660 + }, + { + "epoch": 0.9242449203734212, + "grad_norm": 0.38413307070732117, + "learning_rate": 1.1210643486403623e-05, + "loss": 0.4716, + "step": 33661 + }, + { + "epoch": 0.9242723778143876, + "grad_norm": 0.40503302216529846, + "learning_rate": 1.1210214768588445e-05, + "loss": 0.6095, + "step": 33662 + }, + { + "epoch": 0.9242998352553542, + "grad_norm": 0.38034528493881226, + "learning_rate": 1.1209786048515827e-05, + "loss": 0.5355, + "step": 33663 + }, + { + "epoch": 0.9243272926963207, + "grad_norm": 0.46727293729782104, + "learning_rate": 1.1209357326186576e-05, + "loss": 0.5375, + "step": 33664 + }, + { + "epoch": 0.9243547501372872, + "grad_norm": 0.34815046191215515, + "learning_rate": 1.1208928601601483e-05, + "loss": 0.4239, + "step": 33665 + }, + { + "epoch": 0.9243822075782537, + "grad_norm": 0.3737133741378784, + "learning_rate": 1.1208499874761355e-05, + "loss": 0.4795, + "step": 33666 + }, + { + "epoch": 0.9244096650192202, + "grad_norm": 0.5055474042892456, + "learning_rate": 1.1208071145666993e-05, + "loss": 0.514, + "step": 33667 + }, + { + "epoch": 0.9244371224601867, + "grad_norm": 0.38395246863365173, + "learning_rate": 1.1207642414319186e-05, + "loss": 0.4731, + "step": 33668 + }, + { + "epoch": 0.9244645799011532, + "grad_norm": 0.44666358828544617, + "learning_rate": 1.1207213680718742e-05, + "loss": 0.5773, + "step": 33669 + }, + { + "epoch": 0.9244920373421197, + "grad_norm": 0.38091903924942017, + "learning_rate": 1.1206784944866463e-05, + "loss": 0.4236, + "step": 33670 + }, + { + "epoch": 0.9245194947830863, + "grad_norm": 0.5094394683837891, + "learning_rate": 1.120635620676314e-05, + "loss": 0.418, + "step": 33671 + }, + { + "epoch": 0.9245469522240527, + "grad_norm": 0.4583567678928375, + "learning_rate": 1.1205927466409581e-05, + "loss": 0.5887, + "step": 33672 + }, + { + "epoch": 0.9245744096650192, + "grad_norm": 0.42334020137786865, + "learning_rate": 1.120549872380658e-05, + "loss": 0.6005, + "step": 33673 + }, + { + "epoch": 0.9246018671059857, + "grad_norm": 0.43318885564804077, + "learning_rate": 1.1205069978954941e-05, + "loss": 0.5211, + "step": 33674 + }, + { + "epoch": 0.9246293245469522, + "grad_norm": 0.3873750865459442, + "learning_rate": 1.1204641231855462e-05, + "loss": 0.4964, + "step": 33675 + }, + { + "epoch": 0.9246567819879187, + "grad_norm": 0.39056259393692017, + "learning_rate": 1.120421248250894e-05, + "loss": 0.4828, + "step": 33676 + }, + { + "epoch": 0.9246842394288852, + "grad_norm": 0.3860107958316803, + "learning_rate": 1.120378373091618e-05, + "loss": 0.5072, + "step": 33677 + }, + { + "epoch": 0.9247116968698518, + "grad_norm": 0.3684338331222534, + "learning_rate": 1.1203354977077978e-05, + "loss": 0.4948, + "step": 33678 + }, + { + "epoch": 0.9247391543108182, + "grad_norm": 0.5751407146453857, + "learning_rate": 1.1202926220995136e-05, + "loss": 0.529, + "step": 33679 + }, + { + "epoch": 0.9247666117517848, + "grad_norm": 0.3608757257461548, + "learning_rate": 1.1202497462668454e-05, + "loss": 0.4246, + "step": 33680 + }, + { + "epoch": 0.9247940691927512, + "grad_norm": 0.41896677017211914, + "learning_rate": 1.1202068702098725e-05, + "loss": 0.4965, + "step": 33681 + }, + { + "epoch": 0.9248215266337177, + "grad_norm": 0.3959046006202698, + "learning_rate": 1.120163993928676e-05, + "loss": 0.5508, + "step": 33682 + }, + { + "epoch": 0.9248489840746842, + "grad_norm": 0.42785173654556274, + "learning_rate": 1.1201211174233348e-05, + "loss": 0.5252, + "step": 33683 + }, + { + "epoch": 0.9248764415156507, + "grad_norm": 0.41327404975891113, + "learning_rate": 1.1200782406939297e-05, + "loss": 0.4862, + "step": 33684 + }, + { + "epoch": 0.9249038989566173, + "grad_norm": 0.3773987293243408, + "learning_rate": 1.1200353637405405e-05, + "loss": 0.4606, + "step": 33685 + }, + { + "epoch": 0.9249313563975837, + "grad_norm": 0.4350607693195343, + "learning_rate": 1.1199924865632466e-05, + "loss": 0.5487, + "step": 33686 + }, + { + "epoch": 0.9249588138385503, + "grad_norm": 0.36752796173095703, + "learning_rate": 1.1199496091621286e-05, + "loss": 0.5359, + "step": 33687 + }, + { + "epoch": 0.9249862712795167, + "grad_norm": 0.4487992525100708, + "learning_rate": 1.1199067315372668e-05, + "loss": 0.497, + "step": 33688 + }, + { + "epoch": 0.9250137287204833, + "grad_norm": 0.40307527780532837, + "learning_rate": 1.1198638536887399e-05, + "loss": 0.4288, + "step": 33689 + }, + { + "epoch": 0.9250411861614497, + "grad_norm": 1.5735903978347778, + "learning_rate": 1.1198209756166289e-05, + "loss": 0.52, + "step": 33690 + }, + { + "epoch": 0.9250686436024163, + "grad_norm": 0.35398319363594055, + "learning_rate": 1.1197780973210138e-05, + "loss": 0.4699, + "step": 33691 + }, + { + "epoch": 0.9250961010433828, + "grad_norm": 0.3859114944934845, + "learning_rate": 1.1197352188019741e-05, + "loss": 0.4535, + "step": 33692 + }, + { + "epoch": 0.9251235584843492, + "grad_norm": 0.3988330662250519, + "learning_rate": 1.11969234005959e-05, + "loss": 0.4977, + "step": 33693 + }, + { + "epoch": 0.9251510159253158, + "grad_norm": 0.39542096853256226, + "learning_rate": 1.1196494610939414e-05, + "loss": 0.5189, + "step": 33694 + }, + { + "epoch": 0.9251784733662822, + "grad_norm": 0.35293254256248474, + "learning_rate": 1.1196065819051085e-05, + "loss": 0.5145, + "step": 33695 + }, + { + "epoch": 0.9252059308072488, + "grad_norm": 0.37011709809303284, + "learning_rate": 1.1195637024931711e-05, + "loss": 0.3979, + "step": 33696 + }, + { + "epoch": 0.9252333882482152, + "grad_norm": 0.3985292315483093, + "learning_rate": 1.1195208228582093e-05, + "loss": 0.5932, + "step": 33697 + }, + { + "epoch": 0.9252608456891818, + "grad_norm": 0.3876800835132599, + "learning_rate": 1.119477943000303e-05, + "loss": 0.5276, + "step": 33698 + }, + { + "epoch": 0.9252883031301483, + "grad_norm": 0.3846152126789093, + "learning_rate": 1.119435062919532e-05, + "loss": 0.5021, + "step": 33699 + }, + { + "epoch": 0.9253157605711148, + "grad_norm": 0.3558667302131653, + "learning_rate": 1.119392182615977e-05, + "loss": 0.4635, + "step": 33700 + }, + { + "epoch": 0.9253432180120813, + "grad_norm": 0.442457914352417, + "learning_rate": 1.1193493020897173e-05, + "loss": 0.5351, + "step": 33701 + }, + { + "epoch": 0.9253706754530477, + "grad_norm": 0.4747838079929352, + "learning_rate": 1.1193064213408326e-05, + "loss": 0.4875, + "step": 33702 + }, + { + "epoch": 0.9253981328940143, + "grad_norm": 0.41236746311187744, + "learning_rate": 1.1192635403694038e-05, + "loss": 0.5509, + "step": 33703 + }, + { + "epoch": 0.9254255903349807, + "grad_norm": 0.386768102645874, + "learning_rate": 1.1192206591755102e-05, + "loss": 0.3994, + "step": 33704 + }, + { + "epoch": 0.9254530477759473, + "grad_norm": 0.3642443120479584, + "learning_rate": 1.1191777777592321e-05, + "loss": 0.4538, + "step": 33705 + }, + { + "epoch": 0.9254805052169138, + "grad_norm": 0.3774529993534088, + "learning_rate": 1.1191348961206496e-05, + "loss": 0.4885, + "step": 33706 + }, + { + "epoch": 0.9255079626578803, + "grad_norm": 0.3954784870147705, + "learning_rate": 1.119092014259842e-05, + "loss": 0.4429, + "step": 33707 + }, + { + "epoch": 0.9255354200988468, + "grad_norm": 0.3751177489757538, + "learning_rate": 1.1190491321768903e-05, + "loss": 0.5509, + "step": 33708 + }, + { + "epoch": 0.9255628775398133, + "grad_norm": 0.38177812099456787, + "learning_rate": 1.1190062498718736e-05, + "loss": 0.4489, + "step": 33709 + }, + { + "epoch": 0.9255903349807798, + "grad_norm": 0.3579184114933014, + "learning_rate": 1.1189633673448726e-05, + "loss": 0.4666, + "step": 33710 + }, + { + "epoch": 0.9256177924217462, + "grad_norm": 0.37936633825302124, + "learning_rate": 1.1189204845959669e-05, + "loss": 0.456, + "step": 33711 + }, + { + "epoch": 0.9256452498627128, + "grad_norm": 0.35279977321624756, + "learning_rate": 1.1188776016252363e-05, + "loss": 0.4518, + "step": 33712 + }, + { + "epoch": 0.9256727073036793, + "grad_norm": 0.654310941696167, + "learning_rate": 1.1188347184327613e-05, + "loss": 0.4764, + "step": 33713 + }, + { + "epoch": 0.9257001647446458, + "grad_norm": 0.37166836857795715, + "learning_rate": 1.1187918350186213e-05, + "loss": 0.5291, + "step": 33714 + }, + { + "epoch": 0.9257276221856123, + "grad_norm": 0.5453556180000305, + "learning_rate": 1.1187489513828969e-05, + "loss": 0.5095, + "step": 33715 + }, + { + "epoch": 0.9257550796265788, + "grad_norm": 0.4025064706802368, + "learning_rate": 1.1187060675256679e-05, + "loss": 0.506, + "step": 33716 + }, + { + "epoch": 0.9257825370675453, + "grad_norm": 0.40015554428100586, + "learning_rate": 1.1186631834470136e-05, + "loss": 0.3935, + "step": 33717 + }, + { + "epoch": 0.9258099945085118, + "grad_norm": 0.3717177212238312, + "learning_rate": 1.1186202991470153e-05, + "loss": 0.4678, + "step": 33718 + }, + { + "epoch": 0.9258374519494783, + "grad_norm": 0.4700607657432556, + "learning_rate": 1.118577414625752e-05, + "loss": 0.4678, + "step": 33719 + }, + { + "epoch": 0.9258649093904449, + "grad_norm": 0.3869458734989166, + "learning_rate": 1.1185345298833037e-05, + "loss": 0.573, + "step": 33720 + }, + { + "epoch": 0.9258923668314113, + "grad_norm": 0.43616998195648193, + "learning_rate": 1.1184916449197509e-05, + "loss": 0.5071, + "step": 33721 + }, + { + "epoch": 0.9259198242723778, + "grad_norm": 0.40419280529022217, + "learning_rate": 1.1184487597351735e-05, + "loss": 0.4879, + "step": 33722 + }, + { + "epoch": 0.9259472817133443, + "grad_norm": 0.45413118600845337, + "learning_rate": 1.118405874329651e-05, + "loss": 0.5514, + "step": 33723 + }, + { + "epoch": 0.9259747391543108, + "grad_norm": 0.3713375926017761, + "learning_rate": 1.1183629887032643e-05, + "loss": 0.4473, + "step": 33724 + }, + { + "epoch": 0.9260021965952773, + "grad_norm": 0.3848397433757782, + "learning_rate": 1.1183201028560924e-05, + "loss": 0.488, + "step": 33725 + }, + { + "epoch": 0.9260296540362438, + "grad_norm": 0.38258564472198486, + "learning_rate": 1.1182772167882158e-05, + "loss": 0.4143, + "step": 33726 + }, + { + "epoch": 0.9260571114772104, + "grad_norm": 0.36616215109825134, + "learning_rate": 1.1182343304997146e-05, + "loss": 0.5755, + "step": 33727 + }, + { + "epoch": 0.9260845689181768, + "grad_norm": 0.379401296377182, + "learning_rate": 1.1181914439906685e-05, + "loss": 0.5442, + "step": 33728 + }, + { + "epoch": 0.9261120263591434, + "grad_norm": 0.35111039876937866, + "learning_rate": 1.1181485572611579e-05, + "loss": 0.4614, + "step": 33729 + }, + { + "epoch": 0.9261394838001098, + "grad_norm": 0.38678061962127686, + "learning_rate": 1.118105670311262e-05, + "loss": 0.4746, + "step": 33730 + }, + { + "epoch": 0.9261669412410763, + "grad_norm": 0.34825825691223145, + "learning_rate": 1.118062783141062e-05, + "loss": 0.4912, + "step": 33731 + }, + { + "epoch": 0.9261943986820428, + "grad_norm": 0.4604443907737732, + "learning_rate": 1.1180198957506367e-05, + "loss": 0.4953, + "step": 33732 + }, + { + "epoch": 0.9262218561230093, + "grad_norm": 0.37800613045692444, + "learning_rate": 1.1179770081400666e-05, + "loss": 0.4462, + "step": 33733 + }, + { + "epoch": 0.9262493135639759, + "grad_norm": 0.40627744793891907, + "learning_rate": 1.1179341203094323e-05, + "loss": 0.5028, + "step": 33734 + }, + { + "epoch": 0.9262767710049423, + "grad_norm": 0.3819639980792999, + "learning_rate": 1.1178912322588127e-05, + "loss": 0.4146, + "step": 33735 + }, + { + "epoch": 0.9263042284459089, + "grad_norm": 0.48233404755592346, + "learning_rate": 1.1178483439882885e-05, + "loss": 0.5592, + "step": 33736 + }, + { + "epoch": 0.9263316858868753, + "grad_norm": 0.39878585934638977, + "learning_rate": 1.1178054554979397e-05, + "loss": 0.4754, + "step": 33737 + }, + { + "epoch": 0.9263591433278419, + "grad_norm": 0.4042608141899109, + "learning_rate": 1.1177625667878458e-05, + "loss": 0.4607, + "step": 33738 + }, + { + "epoch": 0.9263866007688083, + "grad_norm": 0.41842836141586304, + "learning_rate": 1.1177196778580872e-05, + "loss": 0.5571, + "step": 33739 + }, + { + "epoch": 0.9264140582097748, + "grad_norm": 0.40282154083251953, + "learning_rate": 1.1176767887087439e-05, + "loss": 0.466, + "step": 33740 + }, + { + "epoch": 0.9264415156507414, + "grad_norm": 0.4075777232646942, + "learning_rate": 1.1176338993398959e-05, + "loss": 0.4561, + "step": 33741 + }, + { + "epoch": 0.9264689730917078, + "grad_norm": 0.39593324065208435, + "learning_rate": 1.1175910097516232e-05, + "loss": 0.4649, + "step": 33742 + }, + { + "epoch": 0.9264964305326744, + "grad_norm": 0.3583887815475464, + "learning_rate": 1.1175481199440055e-05, + "loss": 0.4336, + "step": 33743 + }, + { + "epoch": 0.9265238879736408, + "grad_norm": 0.36977893114089966, + "learning_rate": 1.1175052299171232e-05, + "loss": 0.4772, + "step": 33744 + }, + { + "epoch": 0.9265513454146074, + "grad_norm": 0.348395973443985, + "learning_rate": 1.117462339671056e-05, + "loss": 0.4498, + "step": 33745 + }, + { + "epoch": 0.9265788028555738, + "grad_norm": 0.40515995025634766, + "learning_rate": 1.1174194492058841e-05, + "loss": 0.4831, + "step": 33746 + }, + { + "epoch": 0.9266062602965404, + "grad_norm": 0.43619462847709656, + "learning_rate": 1.1173765585216876e-05, + "loss": 0.4644, + "step": 33747 + }, + { + "epoch": 0.9266337177375069, + "grad_norm": 0.39459624886512756, + "learning_rate": 1.117333667618546e-05, + "loss": 0.4698, + "step": 33748 + }, + { + "epoch": 0.9266611751784734, + "grad_norm": 0.5384092926979065, + "learning_rate": 1.1172907764965402e-05, + "loss": 0.439, + "step": 33749 + }, + { + "epoch": 0.9266886326194399, + "grad_norm": 0.4248936176300049, + "learning_rate": 1.1172478851557494e-05, + "loss": 0.4991, + "step": 33750 + }, + { + "epoch": 0.9267160900604063, + "grad_norm": 0.5084558725357056, + "learning_rate": 1.1172049935962536e-05, + "loss": 0.5107, + "step": 33751 + }, + { + "epoch": 0.9267435475013729, + "grad_norm": 0.5645139813423157, + "learning_rate": 1.1171621018181334e-05, + "loss": 0.5247, + "step": 33752 + }, + { + "epoch": 0.9267710049423393, + "grad_norm": 0.37131965160369873, + "learning_rate": 1.1171192098214682e-05, + "loss": 0.4313, + "step": 33753 + }, + { + "epoch": 0.9267984623833059, + "grad_norm": 0.4022738039493561, + "learning_rate": 1.1170763176063386e-05, + "loss": 0.4684, + "step": 33754 + }, + { + "epoch": 0.9268259198242724, + "grad_norm": 0.403987854719162, + "learning_rate": 1.1170334251728243e-05, + "loss": 0.5059, + "step": 33755 + }, + { + "epoch": 0.9268533772652389, + "grad_norm": 0.3477700650691986, + "learning_rate": 1.1169905325210052e-05, + "loss": 0.4283, + "step": 33756 + }, + { + "epoch": 0.9268808347062054, + "grad_norm": 0.3964722454547882, + "learning_rate": 1.1169476396509614e-05, + "loss": 0.4483, + "step": 33757 + }, + { + "epoch": 0.9269082921471719, + "grad_norm": 0.3661794662475586, + "learning_rate": 1.116904746562773e-05, + "loss": 0.4664, + "step": 33758 + }, + { + "epoch": 0.9269357495881384, + "grad_norm": 0.4023735225200653, + "learning_rate": 1.1168618532565199e-05, + "loss": 0.5469, + "step": 33759 + }, + { + "epoch": 0.9269632070291048, + "grad_norm": 0.36157602071762085, + "learning_rate": 1.116818959732282e-05, + "loss": 0.4258, + "step": 33760 + }, + { + "epoch": 0.9269906644700714, + "grad_norm": 0.4121299982070923, + "learning_rate": 1.1167760659901396e-05, + "loss": 0.5207, + "step": 33761 + }, + { + "epoch": 0.9270181219110379, + "grad_norm": 0.38283249735832214, + "learning_rate": 1.1167331720301726e-05, + "loss": 0.5539, + "step": 33762 + }, + { + "epoch": 0.9270455793520044, + "grad_norm": 0.4154175817966461, + "learning_rate": 1.1166902778524611e-05, + "loss": 0.5222, + "step": 33763 + }, + { + "epoch": 0.9270730367929709, + "grad_norm": 0.4225829541683197, + "learning_rate": 1.1166473834570846e-05, + "loss": 0.5536, + "step": 33764 + }, + { + "epoch": 0.9271004942339374, + "grad_norm": 0.4028390347957611, + "learning_rate": 1.1166044888441239e-05, + "loss": 0.4487, + "step": 33765 + }, + { + "epoch": 0.9271279516749039, + "grad_norm": 0.41038772463798523, + "learning_rate": 1.1165615940136582e-05, + "loss": 0.5245, + "step": 33766 + }, + { + "epoch": 0.9271554091158704, + "grad_norm": 0.4044465720653534, + "learning_rate": 1.1165186989657684e-05, + "loss": 0.5709, + "step": 33767 + }, + { + "epoch": 0.9271828665568369, + "grad_norm": 0.4244092106819153, + "learning_rate": 1.1164758037005338e-05, + "loss": 0.5198, + "step": 33768 + }, + { + "epoch": 0.9272103239978035, + "grad_norm": 0.4254436194896698, + "learning_rate": 1.1164329082180345e-05, + "loss": 0.5044, + "step": 33769 + }, + { + "epoch": 0.9272377814387699, + "grad_norm": 0.3905060589313507, + "learning_rate": 1.1163900125183509e-05, + "loss": 0.3832, + "step": 33770 + }, + { + "epoch": 0.9272652388797364, + "grad_norm": 0.3727901577949524, + "learning_rate": 1.1163471166015625e-05, + "loss": 0.5081, + "step": 33771 + }, + { + "epoch": 0.9272926963207029, + "grad_norm": 0.3684048354625702, + "learning_rate": 1.1163042204677498e-05, + "loss": 0.5439, + "step": 33772 + }, + { + "epoch": 0.9273201537616694, + "grad_norm": 0.3536275029182434, + "learning_rate": 1.1162613241169929e-05, + "loss": 0.4175, + "step": 33773 + }, + { + "epoch": 0.9273476112026359, + "grad_norm": 0.36262378096580505, + "learning_rate": 1.1162184275493711e-05, + "loss": 0.5014, + "step": 33774 + }, + { + "epoch": 0.9273750686436024, + "grad_norm": 0.4231962263584137, + "learning_rate": 1.1161755307649648e-05, + "loss": 0.5499, + "step": 33775 + }, + { + "epoch": 0.927402526084569, + "grad_norm": 0.39009636640548706, + "learning_rate": 1.1161326337638543e-05, + "loss": 0.5345, + "step": 33776 + }, + { + "epoch": 0.9274299835255354, + "grad_norm": 0.3861329257488251, + "learning_rate": 1.1160897365461195e-05, + "loss": 0.4999, + "step": 33777 + }, + { + "epoch": 0.927457440966502, + "grad_norm": 0.43086180090904236, + "learning_rate": 1.11604683911184e-05, + "loss": 0.5434, + "step": 33778 + }, + { + "epoch": 0.9274848984074684, + "grad_norm": 0.4597713053226471, + "learning_rate": 1.1160039414610959e-05, + "loss": 0.5626, + "step": 33779 + }, + { + "epoch": 0.9275123558484349, + "grad_norm": 0.39543578028678894, + "learning_rate": 1.1159610435939679e-05, + "loss": 0.5107, + "step": 33780 + }, + { + "epoch": 0.9275398132894014, + "grad_norm": 0.3632591962814331, + "learning_rate": 1.1159181455105354e-05, + "loss": 0.4892, + "step": 33781 + }, + { + "epoch": 0.9275672707303679, + "grad_norm": 0.39018383622169495, + "learning_rate": 1.1158752472108785e-05, + "loss": 0.5837, + "step": 33782 + }, + { + "epoch": 0.9275947281713345, + "grad_norm": 0.3927360475063324, + "learning_rate": 1.1158323486950774e-05, + "loss": 0.4742, + "step": 33783 + }, + { + "epoch": 0.9276221856123009, + "grad_norm": 0.39533814787864685, + "learning_rate": 1.1157894499632115e-05, + "loss": 0.5651, + "step": 33784 + }, + { + "epoch": 0.9276496430532675, + "grad_norm": 0.5675268173217773, + "learning_rate": 1.115746551015362e-05, + "loss": 0.5023, + "step": 33785 + }, + { + "epoch": 0.9276771004942339, + "grad_norm": 0.3743692636489868, + "learning_rate": 1.1157036518516081e-05, + "loss": 0.4384, + "step": 33786 + }, + { + "epoch": 0.9277045579352005, + "grad_norm": 0.36857888102531433, + "learning_rate": 1.1156607524720295e-05, + "loss": 0.4245, + "step": 33787 + }, + { + "epoch": 0.9277320153761669, + "grad_norm": 0.4698542654514313, + "learning_rate": 1.1156178528767071e-05, + "loss": 0.5483, + "step": 33788 + }, + { + "epoch": 0.9277594728171334, + "grad_norm": 0.42110148072242737, + "learning_rate": 1.1155749530657203e-05, + "loss": 0.4558, + "step": 33789 + }, + { + "epoch": 0.9277869302581, + "grad_norm": 0.35499307513237, + "learning_rate": 1.1155320530391496e-05, + "loss": 0.4726, + "step": 33790 + }, + { + "epoch": 0.9278143876990664, + "grad_norm": 0.44440871477127075, + "learning_rate": 1.1154891527970746e-05, + "loss": 0.5533, + "step": 33791 + }, + { + "epoch": 0.927841845140033, + "grad_norm": 0.34942129254341125, + "learning_rate": 1.1154462523395753e-05, + "loss": 0.5547, + "step": 33792 + }, + { + "epoch": 0.9278693025809994, + "grad_norm": 0.45745357871055603, + "learning_rate": 1.1154033516667322e-05, + "loss": 0.6138, + "step": 33793 + }, + { + "epoch": 0.927896760021966, + "grad_norm": 0.9136677980422974, + "learning_rate": 1.1153604507786247e-05, + "loss": 0.4854, + "step": 33794 + }, + { + "epoch": 0.9279242174629324, + "grad_norm": 0.44959521293640137, + "learning_rate": 1.1153175496753334e-05, + "loss": 0.5357, + "step": 33795 + }, + { + "epoch": 0.927951674903899, + "grad_norm": 0.36974024772644043, + "learning_rate": 1.1152746483569382e-05, + "loss": 0.4779, + "step": 33796 + }, + { + "epoch": 0.9279791323448655, + "grad_norm": 0.4513631761074066, + "learning_rate": 1.1152317468235185e-05, + "loss": 0.5342, + "step": 33797 + }, + { + "epoch": 0.928006589785832, + "grad_norm": 0.3882017135620117, + "learning_rate": 1.115188845075155e-05, + "loss": 0.4443, + "step": 33798 + }, + { + "epoch": 0.9280340472267985, + "grad_norm": 0.46073058247566223, + "learning_rate": 1.1151459431119276e-05, + "loss": 0.4752, + "step": 33799 + }, + { + "epoch": 0.9280615046677649, + "grad_norm": 0.3850788176059723, + "learning_rate": 1.1151030409339162e-05, + "loss": 0.4809, + "step": 33800 + }, + { + "epoch": 0.9280889621087315, + "grad_norm": 0.3836579918861389, + "learning_rate": 1.115060138541201e-05, + "loss": 0.4349, + "step": 33801 + }, + { + "epoch": 0.9281164195496979, + "grad_norm": 0.41672343015670776, + "learning_rate": 1.115017235933862e-05, + "loss": 0.4594, + "step": 33802 + }, + { + "epoch": 0.9281438769906645, + "grad_norm": 0.3871751129627228, + "learning_rate": 1.114974333111979e-05, + "loss": 0.4863, + "step": 33803 + }, + { + "epoch": 0.928171334431631, + "grad_norm": 0.49239087104797363, + "learning_rate": 1.1149314300756322e-05, + "loss": 0.4479, + "step": 33804 + }, + { + "epoch": 0.9281987918725975, + "grad_norm": 0.5685789585113525, + "learning_rate": 1.1148885268249017e-05, + "loss": 0.5479, + "step": 33805 + }, + { + "epoch": 0.928226249313564, + "grad_norm": 0.40041565895080566, + "learning_rate": 1.114845623359867e-05, + "loss": 0.475, + "step": 33806 + }, + { + "epoch": 0.9282537067545304, + "grad_norm": 0.43159371614456177, + "learning_rate": 1.114802719680609e-05, + "loss": 0.5594, + "step": 33807 + }, + { + "epoch": 0.928281164195497, + "grad_norm": 0.38588353991508484, + "learning_rate": 1.1147598157872072e-05, + "loss": 0.5371, + "step": 33808 + }, + { + "epoch": 0.9283086216364634, + "grad_norm": 0.4354581832885742, + "learning_rate": 1.1147169116797416e-05, + "loss": 0.5771, + "step": 33809 + }, + { + "epoch": 0.92833607907743, + "grad_norm": 0.4921262860298157, + "learning_rate": 1.1146740073582927e-05, + "loss": 0.4894, + "step": 33810 + }, + { + "epoch": 0.9283635365183965, + "grad_norm": 0.4122196137905121, + "learning_rate": 1.1146311028229398e-05, + "loss": 0.5269, + "step": 33811 + }, + { + "epoch": 0.928390993959363, + "grad_norm": 0.3391551077365875, + "learning_rate": 1.1145881980737634e-05, + "loss": 0.4253, + "step": 33812 + }, + { + "epoch": 0.9284184514003295, + "grad_norm": 0.4376925528049469, + "learning_rate": 1.1145452931108435e-05, + "loss": 0.5566, + "step": 33813 + }, + { + "epoch": 0.928445908841296, + "grad_norm": 0.36611905694007874, + "learning_rate": 1.1145023879342601e-05, + "loss": 0.4264, + "step": 33814 + }, + { + "epoch": 0.9284733662822625, + "grad_norm": 0.3687635362148285, + "learning_rate": 1.114459482544093e-05, + "loss": 0.3909, + "step": 33815 + }, + { + "epoch": 0.928500823723229, + "grad_norm": 0.39441466331481934, + "learning_rate": 1.1144165769404224e-05, + "loss": 0.5287, + "step": 33816 + }, + { + "epoch": 0.9285282811641955, + "grad_norm": 0.39531370997428894, + "learning_rate": 1.1143736711233288e-05, + "loss": 0.4482, + "step": 33817 + }, + { + "epoch": 0.928555738605162, + "grad_norm": 0.36510592699050903, + "learning_rate": 1.1143307650928914e-05, + "loss": 0.4813, + "step": 33818 + }, + { + "epoch": 0.9285831960461285, + "grad_norm": 0.44977667927742004, + "learning_rate": 1.1142878588491908e-05, + "loss": 0.5056, + "step": 33819 + }, + { + "epoch": 0.928610653487095, + "grad_norm": 0.4156414568424225, + "learning_rate": 1.1142449523923069e-05, + "loss": 0.5385, + "step": 33820 + }, + { + "epoch": 0.9286381109280615, + "grad_norm": 0.35752302408218384, + "learning_rate": 1.1142020457223195e-05, + "loss": 0.4431, + "step": 33821 + }, + { + "epoch": 0.928665568369028, + "grad_norm": 0.5257189869880676, + "learning_rate": 1.1141591388393091e-05, + "loss": 0.4534, + "step": 33822 + }, + { + "epoch": 0.9286930258099945, + "grad_norm": 0.39845407009124756, + "learning_rate": 1.1141162317433551e-05, + "loss": 0.5232, + "step": 33823 + }, + { + "epoch": 0.928720483250961, + "grad_norm": 0.49510690569877625, + "learning_rate": 1.1140733244345383e-05, + "loss": 0.5124, + "step": 33824 + }, + { + "epoch": 0.9287479406919276, + "grad_norm": 0.43215081095695496, + "learning_rate": 1.1140304169129383e-05, + "loss": 0.4653, + "step": 33825 + }, + { + "epoch": 0.928775398132894, + "grad_norm": 0.34577497839927673, + "learning_rate": 1.113987509178635e-05, + "loss": 0.4415, + "step": 33826 + }, + { + "epoch": 0.9288028555738606, + "grad_norm": 0.46081218123435974, + "learning_rate": 1.1139446012317085e-05, + "loss": 0.5002, + "step": 33827 + }, + { + "epoch": 0.928830313014827, + "grad_norm": 0.41249343752861023, + "learning_rate": 1.1139016930722391e-05, + "loss": 0.4091, + "step": 33828 + }, + { + "epoch": 0.9288577704557935, + "grad_norm": 0.4276910424232483, + "learning_rate": 1.1138587847003069e-05, + "loss": 0.521, + "step": 33829 + }, + { + "epoch": 0.92888522789676, + "grad_norm": 0.4002867341041565, + "learning_rate": 1.1138158761159917e-05, + "loss": 0.5203, + "step": 33830 + }, + { + "epoch": 0.9289126853377265, + "grad_norm": 0.3752381205558777, + "learning_rate": 1.113772967319373e-05, + "loss": 0.484, + "step": 33831 + }, + { + "epoch": 0.9289401427786931, + "grad_norm": 0.42566466331481934, + "learning_rate": 1.1137300583105322e-05, + "loss": 0.5363, + "step": 33832 + }, + { + "epoch": 0.9289676002196595, + "grad_norm": 0.41964268684387207, + "learning_rate": 1.113687149089548e-05, + "loss": 0.4418, + "step": 33833 + }, + { + "epoch": 0.9289950576606261, + "grad_norm": 0.4044785499572754, + "learning_rate": 1.1136442396565012e-05, + "loss": 0.4846, + "step": 33834 + }, + { + "epoch": 0.9290225151015925, + "grad_norm": 0.441012442111969, + "learning_rate": 1.1136013300114717e-05, + "loss": 0.4762, + "step": 33835 + }, + { + "epoch": 0.929049972542559, + "grad_norm": 0.397538423538208, + "learning_rate": 1.1135584201545391e-05, + "loss": 0.5129, + "step": 33836 + }, + { + "epoch": 0.9290774299835255, + "grad_norm": 0.4115763008594513, + "learning_rate": 1.1135155100857841e-05, + "loss": 0.4733, + "step": 33837 + }, + { + "epoch": 0.929104887424492, + "grad_norm": 0.39494460821151733, + "learning_rate": 1.1134725998052862e-05, + "loss": 0.4582, + "step": 33838 + }, + { + "epoch": 0.9291323448654586, + "grad_norm": 0.4547971189022064, + "learning_rate": 1.113429689313126e-05, + "loss": 0.4234, + "step": 33839 + }, + { + "epoch": 0.929159802306425, + "grad_norm": 0.487589567899704, + "learning_rate": 1.1133867786093833e-05, + "loss": 0.4136, + "step": 33840 + }, + { + "epoch": 0.9291872597473916, + "grad_norm": 0.4620743691921234, + "learning_rate": 1.1133438676941376e-05, + "loss": 0.5002, + "step": 33841 + }, + { + "epoch": 0.929214717188358, + "grad_norm": 0.3997475802898407, + "learning_rate": 1.1133009565674697e-05, + "loss": 0.4955, + "step": 33842 + }, + { + "epoch": 0.9292421746293246, + "grad_norm": 0.3674972653388977, + "learning_rate": 1.1132580452294595e-05, + "loss": 0.4933, + "step": 33843 + }, + { + "epoch": 0.929269632070291, + "grad_norm": 0.46377331018447876, + "learning_rate": 1.1132151336801866e-05, + "loss": 0.5189, + "step": 33844 + }, + { + "epoch": 0.9292970895112576, + "grad_norm": 0.5363406538963318, + "learning_rate": 1.1131722219197315e-05, + "loss": 0.5663, + "step": 33845 + }, + { + "epoch": 0.9293245469522241, + "grad_norm": 0.40679609775543213, + "learning_rate": 1.113129309948174e-05, + "loss": 0.5147, + "step": 33846 + }, + { + "epoch": 0.9293520043931905, + "grad_norm": 0.34440159797668457, + "learning_rate": 1.1130863977655944e-05, + "loss": 0.4205, + "step": 33847 + }, + { + "epoch": 0.9293794618341571, + "grad_norm": 0.39028841257095337, + "learning_rate": 1.1130434853720724e-05, + "loss": 0.4779, + "step": 33848 + }, + { + "epoch": 0.9294069192751235, + "grad_norm": 0.368453711271286, + "learning_rate": 1.113000572767688e-05, + "loss": 0.4592, + "step": 33849 + }, + { + "epoch": 0.9294343767160901, + "grad_norm": 0.3854160010814667, + "learning_rate": 1.1129576599525221e-05, + "loss": 0.4851, + "step": 33850 + }, + { + "epoch": 0.9294618341570565, + "grad_norm": 0.44401976466178894, + "learning_rate": 1.1129147469266534e-05, + "loss": 0.5356, + "step": 33851 + }, + { + "epoch": 0.9294892915980231, + "grad_norm": 0.4123310148715973, + "learning_rate": 1.1128718336901632e-05, + "loss": 0.4402, + "step": 33852 + }, + { + "epoch": 0.9295167490389896, + "grad_norm": 0.4242653250694275, + "learning_rate": 1.1128289202431308e-05, + "loss": 0.4488, + "step": 33853 + }, + { + "epoch": 0.9295442064799561, + "grad_norm": 0.3958868384361267, + "learning_rate": 1.1127860065856364e-05, + "loss": 0.4932, + "step": 33854 + }, + { + "epoch": 0.9295716639209226, + "grad_norm": 0.33216503262519836, + "learning_rate": 1.1127430927177602e-05, + "loss": 0.3584, + "step": 33855 + }, + { + "epoch": 0.929599121361889, + "grad_norm": 0.36972710490226746, + "learning_rate": 1.1127001786395822e-05, + "loss": 0.5546, + "step": 33856 + }, + { + "epoch": 0.9296265788028556, + "grad_norm": 0.37614285945892334, + "learning_rate": 1.112657264351182e-05, + "loss": 0.493, + "step": 33857 + }, + { + "epoch": 0.929654036243822, + "grad_norm": 0.38602781295776367, + "learning_rate": 1.1126143498526407e-05, + "loss": 0.4987, + "step": 33858 + }, + { + "epoch": 0.9296814936847886, + "grad_norm": 0.38770607113838196, + "learning_rate": 1.1125714351440373e-05, + "loss": 0.5444, + "step": 33859 + }, + { + "epoch": 0.9297089511257551, + "grad_norm": 0.46298202872276306, + "learning_rate": 1.1125285202254523e-05, + "loss": 0.5073, + "step": 33860 + }, + { + "epoch": 0.9297364085667216, + "grad_norm": 0.5163595676422119, + "learning_rate": 1.1124856050969658e-05, + "loss": 0.4022, + "step": 33861 + }, + { + "epoch": 0.9297638660076881, + "grad_norm": 0.41576531529426575, + "learning_rate": 1.1124426897586575e-05, + "loss": 0.5659, + "step": 33862 + }, + { + "epoch": 0.9297913234486546, + "grad_norm": 0.3902250826358795, + "learning_rate": 1.112399774210608e-05, + "loss": 0.4423, + "step": 33863 + }, + { + "epoch": 0.9298187808896211, + "grad_norm": 0.3809581995010376, + "learning_rate": 1.1123568584528967e-05, + "loss": 0.4702, + "step": 33864 + }, + { + "epoch": 0.9298462383305875, + "grad_norm": 0.4223405420780182, + "learning_rate": 1.1123139424856041e-05, + "loss": 0.5005, + "step": 33865 + }, + { + "epoch": 0.9298736957715541, + "grad_norm": 0.4074265658855438, + "learning_rate": 1.1122710263088103e-05, + "loss": 0.5337, + "step": 33866 + }, + { + "epoch": 0.9299011532125206, + "grad_norm": 0.4387895464897156, + "learning_rate": 1.112228109922595e-05, + "loss": 0.5343, + "step": 33867 + }, + { + "epoch": 0.9299286106534871, + "grad_norm": 0.3715851306915283, + "learning_rate": 1.1121851933270388e-05, + "loss": 0.4628, + "step": 33868 + }, + { + "epoch": 0.9299560680944536, + "grad_norm": 0.38258302211761475, + "learning_rate": 1.1121422765222213e-05, + "loss": 0.5047, + "step": 33869 + }, + { + "epoch": 0.9299835255354201, + "grad_norm": 0.449955016374588, + "learning_rate": 1.1120993595082223e-05, + "loss": 0.5602, + "step": 33870 + }, + { + "epoch": 0.9300109829763866, + "grad_norm": 0.38227182626724243, + "learning_rate": 1.1120564422851227e-05, + "loss": 0.4758, + "step": 33871 + }, + { + "epoch": 0.9300384404173531, + "grad_norm": 0.38537880778312683, + "learning_rate": 1.1120135248530017e-05, + "loss": 0.4663, + "step": 33872 + }, + { + "epoch": 0.9300658978583196, + "grad_norm": 0.39640843868255615, + "learning_rate": 1.11197060721194e-05, + "loss": 0.5013, + "step": 33873 + }, + { + "epoch": 0.9300933552992862, + "grad_norm": 0.37811675667762756, + "learning_rate": 1.1119276893620174e-05, + "loss": 0.5478, + "step": 33874 + }, + { + "epoch": 0.9301208127402526, + "grad_norm": 0.3421507477760315, + "learning_rate": 1.1118847713033136e-05, + "loss": 0.4359, + "step": 33875 + }, + { + "epoch": 0.9301482701812192, + "grad_norm": 0.37096068263053894, + "learning_rate": 1.1118418530359091e-05, + "loss": 0.4946, + "step": 33876 + }, + { + "epoch": 0.9301757276221856, + "grad_norm": 0.3866284489631653, + "learning_rate": 1.111798934559884e-05, + "loss": 0.4707, + "step": 33877 + }, + { + "epoch": 0.9302031850631521, + "grad_norm": 0.5095987915992737, + "learning_rate": 1.1117560158753182e-05, + "loss": 0.533, + "step": 33878 + }, + { + "epoch": 0.9302306425041186, + "grad_norm": 0.37969622015953064, + "learning_rate": 1.1117130969822919e-05, + "loss": 0.453, + "step": 33879 + }, + { + "epoch": 0.9302580999450851, + "grad_norm": 0.42900756001472473, + "learning_rate": 1.1116701778808845e-05, + "loss": 0.4888, + "step": 33880 + }, + { + "epoch": 0.9302855573860516, + "grad_norm": 0.5101460218429565, + "learning_rate": 1.111627258571177e-05, + "loss": 0.5881, + "step": 33881 + }, + { + "epoch": 0.9303130148270181, + "grad_norm": 0.39419567584991455, + "learning_rate": 1.1115843390532488e-05, + "loss": 0.5334, + "step": 33882 + }, + { + "epoch": 0.9303404722679847, + "grad_norm": 0.39638853073120117, + "learning_rate": 1.1115414193271806e-05, + "loss": 0.4994, + "step": 33883 + }, + { + "epoch": 0.9303679297089511, + "grad_norm": 0.41686609387397766, + "learning_rate": 1.1114984993930518e-05, + "loss": 0.4689, + "step": 33884 + }, + { + "epoch": 0.9303953871499177, + "grad_norm": 0.3739151656627655, + "learning_rate": 1.1114555792509425e-05, + "loss": 0.4686, + "step": 33885 + }, + { + "epoch": 0.9304228445908841, + "grad_norm": 0.40521782636642456, + "learning_rate": 1.1114126589009332e-05, + "loss": 0.5613, + "step": 33886 + }, + { + "epoch": 0.9304503020318506, + "grad_norm": 0.42222103476524353, + "learning_rate": 1.1113697383431038e-05, + "loss": 0.4861, + "step": 33887 + }, + { + "epoch": 0.9304777594728171, + "grad_norm": 26.999393463134766, + "learning_rate": 1.111326817577534e-05, + "loss": 0.5188, + "step": 33888 + }, + { + "epoch": 0.9305052169137836, + "grad_norm": 0.4805575907230377, + "learning_rate": 1.1112838966043045e-05, + "loss": 0.5503, + "step": 33889 + }, + { + "epoch": 0.9305326743547502, + "grad_norm": 0.37578314542770386, + "learning_rate": 1.1112409754234947e-05, + "loss": 0.4546, + "step": 33890 + }, + { + "epoch": 0.9305601317957166, + "grad_norm": 0.4563201069831848, + "learning_rate": 1.1111980540351851e-05, + "loss": 0.4951, + "step": 33891 + }, + { + "epoch": 0.9305875892366832, + "grad_norm": 0.4154123365879059, + "learning_rate": 1.1111551324394559e-05, + "loss": 0.5337, + "step": 33892 + }, + { + "epoch": 0.9306150466776496, + "grad_norm": 0.891053318977356, + "learning_rate": 1.1111122106363865e-05, + "loss": 0.445, + "step": 33893 + }, + { + "epoch": 0.9306425041186162, + "grad_norm": 0.3605031669139862, + "learning_rate": 1.1110692886260576e-05, + "loss": 0.4976, + "step": 33894 + }, + { + "epoch": 0.9306699615595826, + "grad_norm": 0.40836673974990845, + "learning_rate": 1.111026366408549e-05, + "loss": 0.4951, + "step": 33895 + }, + { + "epoch": 0.9306974190005491, + "grad_norm": 0.41237586736679077, + "learning_rate": 1.1109834439839408e-05, + "loss": 0.4365, + "step": 33896 + }, + { + "epoch": 0.9307248764415157, + "grad_norm": 0.45696115493774414, + "learning_rate": 1.110940521352313e-05, + "loss": 0.5467, + "step": 33897 + }, + { + "epoch": 0.9307523338824821, + "grad_norm": 0.3778960704803467, + "learning_rate": 1.1108975985137458e-05, + "loss": 0.4981, + "step": 33898 + }, + { + "epoch": 0.9307797913234487, + "grad_norm": 0.3816961348056793, + "learning_rate": 1.1108546754683191e-05, + "loss": 0.5561, + "step": 33899 + }, + { + "epoch": 0.9308072487644151, + "grad_norm": 0.47894003987312317, + "learning_rate": 1.1108117522161129e-05, + "loss": 0.4996, + "step": 33900 + }, + { + "epoch": 0.9308347062053817, + "grad_norm": 0.44234177470207214, + "learning_rate": 1.1107688287572076e-05, + "loss": 0.5434, + "step": 33901 + }, + { + "epoch": 0.9308621636463481, + "grad_norm": 0.38688674569129944, + "learning_rate": 1.1107259050916832e-05, + "loss": 0.4291, + "step": 33902 + }, + { + "epoch": 0.9308896210873147, + "grad_norm": 0.3314630687236786, + "learning_rate": 1.1106829812196194e-05, + "loss": 0.3991, + "step": 33903 + }, + { + "epoch": 0.9309170785282812, + "grad_norm": 0.3693886399269104, + "learning_rate": 1.1106400571410967e-05, + "loss": 0.4634, + "step": 33904 + }, + { + "epoch": 0.9309445359692476, + "grad_norm": 0.49525579810142517, + "learning_rate": 1.1105971328561952e-05, + "loss": 0.442, + "step": 33905 + }, + { + "epoch": 0.9309719934102142, + "grad_norm": 0.3619783818721771, + "learning_rate": 1.110554208364994e-05, + "loss": 0.3978, + "step": 33906 + }, + { + "epoch": 0.9309994508511806, + "grad_norm": 0.48886600136756897, + "learning_rate": 1.1105112836675746e-05, + "loss": 0.4939, + "step": 33907 + }, + { + "epoch": 0.9310269082921472, + "grad_norm": 0.3621227443218231, + "learning_rate": 1.110468358764016e-05, + "loss": 0.482, + "step": 33908 + }, + { + "epoch": 0.9310543657331136, + "grad_norm": 0.3662101924419403, + "learning_rate": 1.110425433654399e-05, + "loss": 0.4986, + "step": 33909 + }, + { + "epoch": 0.9310818231740802, + "grad_norm": 0.3719158172607422, + "learning_rate": 1.110382508338803e-05, + "loss": 0.5122, + "step": 33910 + }, + { + "epoch": 0.9311092806150467, + "grad_norm": 0.3949300944805145, + "learning_rate": 1.1103395828173085e-05, + "loss": 0.5311, + "step": 33911 + }, + { + "epoch": 0.9311367380560132, + "grad_norm": 0.35517632961273193, + "learning_rate": 1.1102966570899956e-05, + "loss": 0.4186, + "step": 33912 + }, + { + "epoch": 0.9311641954969797, + "grad_norm": 0.4252881407737732, + "learning_rate": 1.1102537311569439e-05, + "loss": 0.4513, + "step": 33913 + }, + { + "epoch": 0.9311916529379461, + "grad_norm": 1.6019890308380127, + "learning_rate": 1.1102108050182343e-05, + "loss": 0.4907, + "step": 33914 + }, + { + "epoch": 0.9312191103789127, + "grad_norm": 0.4416545629501343, + "learning_rate": 1.110167878673946e-05, + "loss": 0.4711, + "step": 33915 + }, + { + "epoch": 0.9312465678198791, + "grad_norm": 0.9622458219528198, + "learning_rate": 1.1101249521241594e-05, + "loss": 0.493, + "step": 33916 + }, + { + "epoch": 0.9312740252608457, + "grad_norm": 0.36465930938720703, + "learning_rate": 1.1100820253689548e-05, + "loss": 0.4796, + "step": 33917 + }, + { + "epoch": 0.9313014827018122, + "grad_norm": 0.4122917354106903, + "learning_rate": 1.1100390984084119e-05, + "loss": 0.4735, + "step": 33918 + }, + { + "epoch": 0.9313289401427787, + "grad_norm": 0.43964335322380066, + "learning_rate": 1.1099961712426112e-05, + "loss": 0.5396, + "step": 33919 + }, + { + "epoch": 0.9313563975837452, + "grad_norm": 0.4035888910293579, + "learning_rate": 1.1099532438716328e-05, + "loss": 0.4925, + "step": 33920 + }, + { + "epoch": 0.9313838550247117, + "grad_norm": 0.41078150272369385, + "learning_rate": 1.1099103162955558e-05, + "loss": 0.4373, + "step": 33921 + }, + { + "epoch": 0.9314113124656782, + "grad_norm": 0.38426533341407776, + "learning_rate": 1.1098673885144616e-05, + "loss": 0.4941, + "step": 33922 + }, + { + "epoch": 0.9314387699066446, + "grad_norm": 0.38748496770858765, + "learning_rate": 1.1098244605284295e-05, + "loss": 0.5191, + "step": 33923 + }, + { + "epoch": 0.9314662273476112, + "grad_norm": 0.5053889751434326, + "learning_rate": 1.1097815323375394e-05, + "loss": 0.485, + "step": 33924 + }, + { + "epoch": 0.9314936847885777, + "grad_norm": 0.40390557050704956, + "learning_rate": 1.1097386039418718e-05, + "loss": 0.5811, + "step": 33925 + }, + { + "epoch": 0.9315211422295442, + "grad_norm": 0.4088670611381531, + "learning_rate": 1.1096956753415066e-05, + "loss": 0.5318, + "step": 33926 + }, + { + "epoch": 0.9315485996705107, + "grad_norm": 0.4980161190032959, + "learning_rate": 1.1096527465365243e-05, + "loss": 0.5016, + "step": 33927 + }, + { + "epoch": 0.9315760571114772, + "grad_norm": 0.36377063393592834, + "learning_rate": 1.1096098175270046e-05, + "loss": 0.511, + "step": 33928 + }, + { + "epoch": 0.9316035145524437, + "grad_norm": 0.513690710067749, + "learning_rate": 1.1095668883130275e-05, + "loss": 0.4711, + "step": 33929 + }, + { + "epoch": 0.9316309719934102, + "grad_norm": 0.3738226592540741, + "learning_rate": 1.1095239588946731e-05, + "loss": 0.4798, + "step": 33930 + }, + { + "epoch": 0.9316584294343767, + "grad_norm": 0.37531641125679016, + "learning_rate": 1.1094810292720216e-05, + "loss": 0.4493, + "step": 33931 + }, + { + "epoch": 0.9316858868753433, + "grad_norm": 0.3708914518356323, + "learning_rate": 1.1094380994451532e-05, + "loss": 0.4586, + "step": 33932 + }, + { + "epoch": 0.9317133443163097, + "grad_norm": 0.39712953567504883, + "learning_rate": 1.1093951694141478e-05, + "loss": 0.5125, + "step": 33933 + }, + { + "epoch": 0.9317408017572762, + "grad_norm": 0.37745749950408936, + "learning_rate": 1.1093522391790851e-05, + "loss": 0.4628, + "step": 33934 + }, + { + "epoch": 0.9317682591982427, + "grad_norm": 0.3886372745037079, + "learning_rate": 1.1093093087400459e-05, + "loss": 0.4504, + "step": 33935 + }, + { + "epoch": 0.9317957166392092, + "grad_norm": 0.4136604070663452, + "learning_rate": 1.10926637809711e-05, + "loss": 0.4652, + "step": 33936 + }, + { + "epoch": 0.9318231740801757, + "grad_norm": 0.4370853900909424, + "learning_rate": 1.1092234472503574e-05, + "loss": 0.4487, + "step": 33937 + }, + { + "epoch": 0.9318506315211422, + "grad_norm": 0.39789655804634094, + "learning_rate": 1.1091805161998681e-05, + "loss": 0.5612, + "step": 33938 + }, + { + "epoch": 0.9318780889621088, + "grad_norm": 0.5252246260643005, + "learning_rate": 1.1091375849457222e-05, + "loss": 0.5116, + "step": 33939 + }, + { + "epoch": 0.9319055464030752, + "grad_norm": 0.3528762459754944, + "learning_rate": 1.109094653488e-05, + "loss": 0.3937, + "step": 33940 + }, + { + "epoch": 0.9319330038440418, + "grad_norm": 0.4359288513660431, + "learning_rate": 1.1090517218267819e-05, + "loss": 0.4634, + "step": 33941 + }, + { + "epoch": 0.9319604612850082, + "grad_norm": 0.395525187253952, + "learning_rate": 1.1090087899621467e-05, + "loss": 0.4477, + "step": 33942 + }, + { + "epoch": 0.9319879187259748, + "grad_norm": 0.4318186640739441, + "learning_rate": 1.1089658578941758e-05, + "loss": 0.4712, + "step": 33943 + }, + { + "epoch": 0.9320153761669412, + "grad_norm": 0.3591901957988739, + "learning_rate": 1.1089229256229486e-05, + "loss": 0.4313, + "step": 33944 + }, + { + "epoch": 0.9320428336079077, + "grad_norm": 0.4308105707168579, + "learning_rate": 1.1088799931485456e-05, + "loss": 0.5312, + "step": 33945 + }, + { + "epoch": 0.9320702910488743, + "grad_norm": 0.43616336584091187, + "learning_rate": 1.1088370604710465e-05, + "loss": 0.4165, + "step": 33946 + }, + { + "epoch": 0.9320977484898407, + "grad_norm": 0.3756295442581177, + "learning_rate": 1.1087941275905316e-05, + "loss": 0.5139, + "step": 33947 + }, + { + "epoch": 0.9321252059308073, + "grad_norm": 0.394901305437088, + "learning_rate": 1.1087511945070811e-05, + "loss": 0.513, + "step": 33948 + }, + { + "epoch": 0.9321526633717737, + "grad_norm": 0.36664795875549316, + "learning_rate": 1.1087082612207745e-05, + "loss": 0.5158, + "step": 33949 + }, + { + "epoch": 0.9321801208127403, + "grad_norm": 0.4760986268520355, + "learning_rate": 1.1086653277316925e-05, + "loss": 0.5935, + "step": 33950 + }, + { + "epoch": 0.9322075782537067, + "grad_norm": 0.398243248462677, + "learning_rate": 1.1086223940399153e-05, + "loss": 0.4619, + "step": 33951 + }, + { + "epoch": 0.9322350356946733, + "grad_norm": 0.3957809507846832, + "learning_rate": 1.1085794601455223e-05, + "loss": 0.4999, + "step": 33952 + }, + { + "epoch": 0.9322624931356398, + "grad_norm": 0.3805885314941406, + "learning_rate": 1.1085365260485941e-05, + "loss": 0.4863, + "step": 33953 + }, + { + "epoch": 0.9322899505766062, + "grad_norm": 0.449421226978302, + "learning_rate": 1.1084935917492106e-05, + "loss": 0.4666, + "step": 33954 + }, + { + "epoch": 0.9323174080175728, + "grad_norm": 0.40944716334342957, + "learning_rate": 1.1084506572474516e-05, + "loss": 0.5331, + "step": 33955 + }, + { + "epoch": 0.9323448654585392, + "grad_norm": 0.39124244451522827, + "learning_rate": 1.108407722543398e-05, + "loss": 0.4781, + "step": 33956 + }, + { + "epoch": 0.9323723228995058, + "grad_norm": 0.40173661708831787, + "learning_rate": 1.108364787637129e-05, + "loss": 0.4633, + "step": 33957 + }, + { + "epoch": 0.9323997803404722, + "grad_norm": 0.37273404002189636, + "learning_rate": 1.1083218525287254e-05, + "loss": 0.4449, + "step": 33958 + }, + { + "epoch": 0.9324272377814388, + "grad_norm": 0.3965570628643036, + "learning_rate": 1.108278917218267e-05, + "loss": 0.4455, + "step": 33959 + }, + { + "epoch": 0.9324546952224053, + "grad_norm": 0.39474308490753174, + "learning_rate": 1.1082359817058335e-05, + "loss": 0.5068, + "step": 33960 + }, + { + "epoch": 0.9324821526633718, + "grad_norm": 0.3441825211048126, + "learning_rate": 1.1081930459915058e-05, + "loss": 0.4438, + "step": 33961 + }, + { + "epoch": 0.9325096101043383, + "grad_norm": 0.343740314245224, + "learning_rate": 1.108150110075363e-05, + "loss": 0.4, + "step": 33962 + }, + { + "epoch": 0.9325370675453047, + "grad_norm": 0.41003623604774475, + "learning_rate": 1.1081071739574863e-05, + "loss": 0.5445, + "step": 33963 + }, + { + "epoch": 0.9325645249862713, + "grad_norm": 0.3499546945095062, + "learning_rate": 1.108064237637955e-05, + "loss": 0.4333, + "step": 33964 + }, + { + "epoch": 0.9325919824272377, + "grad_norm": 0.4043712019920349, + "learning_rate": 1.1080213011168492e-05, + "loss": 0.5794, + "step": 33965 + }, + { + "epoch": 0.9326194398682043, + "grad_norm": 0.4591403901576996, + "learning_rate": 1.1079783643942494e-05, + "loss": 0.4861, + "step": 33966 + }, + { + "epoch": 0.9326468973091708, + "grad_norm": 0.32576507329940796, + "learning_rate": 1.1079354274702356e-05, + "loss": 0.4081, + "step": 33967 + }, + { + "epoch": 0.9326743547501373, + "grad_norm": 0.3941132426261902, + "learning_rate": 1.1078924903448876e-05, + "loss": 0.446, + "step": 33968 + }, + { + "epoch": 0.9327018121911038, + "grad_norm": 0.3657786250114441, + "learning_rate": 1.1078495530182855e-05, + "loss": 0.4306, + "step": 33969 + }, + { + "epoch": 0.9327292696320703, + "grad_norm": 0.39685970544815063, + "learning_rate": 1.1078066154905098e-05, + "loss": 0.4987, + "step": 33970 + }, + { + "epoch": 0.9327567270730368, + "grad_norm": 0.4271356463432312, + "learning_rate": 1.1077636777616402e-05, + "loss": 0.5007, + "step": 33971 + }, + { + "epoch": 0.9327841845140032, + "grad_norm": 0.4209103286266327, + "learning_rate": 1.1077207398317573e-05, + "loss": 0.4904, + "step": 33972 + }, + { + "epoch": 0.9328116419549698, + "grad_norm": 0.40894943475723267, + "learning_rate": 1.1076778017009404e-05, + "loss": 0.453, + "step": 33973 + }, + { + "epoch": 0.9328390993959363, + "grad_norm": 0.5085077285766602, + "learning_rate": 1.1076348633692704e-05, + "loss": 0.4341, + "step": 33974 + }, + { + "epoch": 0.9328665568369028, + "grad_norm": 0.37718498706817627, + "learning_rate": 1.1075919248368268e-05, + "loss": 0.4726, + "step": 33975 + }, + { + "epoch": 0.9328940142778693, + "grad_norm": 0.43744465708732605, + "learning_rate": 1.1075489861036898e-05, + "loss": 0.4749, + "step": 33976 + }, + { + "epoch": 0.9329214717188358, + "grad_norm": 0.3458517789840698, + "learning_rate": 1.1075060471699401e-05, + "loss": 0.4704, + "step": 33977 + }, + { + "epoch": 0.9329489291598023, + "grad_norm": 0.4000091552734375, + "learning_rate": 1.1074631080356568e-05, + "loss": 0.4959, + "step": 33978 + }, + { + "epoch": 0.9329763866007688, + "grad_norm": 1.368048906326294, + "learning_rate": 1.1074201687009207e-05, + "loss": 0.5002, + "step": 33979 + }, + { + "epoch": 0.9330038440417353, + "grad_norm": 0.4064599573612213, + "learning_rate": 1.1073772291658118e-05, + "loss": 0.553, + "step": 33980 + }, + { + "epoch": 0.9330313014827019, + "grad_norm": 0.4002901315689087, + "learning_rate": 1.10733428943041e-05, + "loss": 0.5453, + "step": 33981 + }, + { + "epoch": 0.9330587589236683, + "grad_norm": 0.3875720500946045, + "learning_rate": 1.1072913494947954e-05, + "loss": 0.455, + "step": 33982 + }, + { + "epoch": 0.9330862163646348, + "grad_norm": 0.3715759217739105, + "learning_rate": 1.1072484093590484e-05, + "loss": 0.4912, + "step": 33983 + }, + { + "epoch": 0.9331136738056013, + "grad_norm": 0.47581928968429565, + "learning_rate": 1.1072054690232488e-05, + "loss": 0.5731, + "step": 33984 + }, + { + "epoch": 0.9331411312465678, + "grad_norm": 0.371591180562973, + "learning_rate": 1.1071625284874765e-05, + "loss": 0.4458, + "step": 33985 + }, + { + "epoch": 0.9331685886875343, + "grad_norm": 0.3524874448776245, + "learning_rate": 1.1071195877518123e-05, + "loss": 0.5685, + "step": 33986 + }, + { + "epoch": 0.9331960461285008, + "grad_norm": 0.3871762752532959, + "learning_rate": 1.1070766468163356e-05, + "loss": 0.4401, + "step": 33987 + }, + { + "epoch": 0.9332235035694674, + "grad_norm": 0.38296979665756226, + "learning_rate": 1.1070337056811272e-05, + "loss": 0.5333, + "step": 33988 + }, + { + "epoch": 0.9332509610104338, + "grad_norm": 0.3662991523742676, + "learning_rate": 1.1069907643462662e-05, + "loss": 0.5472, + "step": 33989 + }, + { + "epoch": 0.9332784184514004, + "grad_norm": 0.3898472487926483, + "learning_rate": 1.1069478228118335e-05, + "loss": 0.4795, + "step": 33990 + }, + { + "epoch": 0.9333058758923668, + "grad_norm": 0.4285678267478943, + "learning_rate": 1.1069048810779093e-05, + "loss": 0.4859, + "step": 33991 + }, + { + "epoch": 0.9333333333333333, + "grad_norm": 0.38592728972435, + "learning_rate": 1.1068619391445729e-05, + "loss": 0.4416, + "step": 33992 + }, + { + "epoch": 0.9333607907742998, + "grad_norm": 0.39011332392692566, + "learning_rate": 1.1068189970119052e-05, + "loss": 0.4918, + "step": 33993 + }, + { + "epoch": 0.9333882482152663, + "grad_norm": 0.39472246170043945, + "learning_rate": 1.1067760546799857e-05, + "loss": 0.4644, + "step": 33994 + }, + { + "epoch": 0.9334157056562329, + "grad_norm": 0.400867223739624, + "learning_rate": 1.1067331121488951e-05, + "loss": 0.4224, + "step": 33995 + }, + { + "epoch": 0.9334431630971993, + "grad_norm": 1.5188970565795898, + "learning_rate": 1.1066901694187131e-05, + "loss": 0.4582, + "step": 33996 + }, + { + "epoch": 0.9334706205381659, + "grad_norm": 0.3935699760913849, + "learning_rate": 1.1066472264895195e-05, + "loss": 0.496, + "step": 33997 + }, + { + "epoch": 0.9334980779791323, + "grad_norm": 0.34767046570777893, + "learning_rate": 1.1066042833613952e-05, + "loss": 0.5268, + "step": 33998 + }, + { + "epoch": 0.9335255354200989, + "grad_norm": 0.4188593626022339, + "learning_rate": 1.1065613400344197e-05, + "loss": 0.4764, + "step": 33999 + }, + { + "epoch": 0.9335529928610653, + "grad_norm": 0.41617467999458313, + "learning_rate": 1.1065183965086735e-05, + "loss": 0.4645, + "step": 34000 + }, + { + "epoch": 0.9335804503020319, + "grad_norm": 0.3855729401111603, + "learning_rate": 1.1064754527842364e-05, + "loss": 0.5222, + "step": 34001 + }, + { + "epoch": 0.9336079077429984, + "grad_norm": 0.41145092248916626, + "learning_rate": 1.1064325088611884e-05, + "loss": 0.5324, + "step": 34002 + }, + { + "epoch": 0.9336353651839648, + "grad_norm": 0.3413155674934387, + "learning_rate": 1.1063895647396101e-05, + "loss": 0.4994, + "step": 34003 + }, + { + "epoch": 0.9336628226249314, + "grad_norm": 0.3970710039138794, + "learning_rate": 1.106346620419581e-05, + "loss": 0.528, + "step": 34004 + }, + { + "epoch": 0.9336902800658978, + "grad_norm": 0.39236852526664734, + "learning_rate": 1.1063036759011818e-05, + "loss": 0.4915, + "step": 34005 + }, + { + "epoch": 0.9337177375068644, + "grad_norm": 0.3829016387462616, + "learning_rate": 1.1062607311844924e-05, + "loss": 0.4606, + "step": 34006 + }, + { + "epoch": 0.9337451949478308, + "grad_norm": 0.39378589391708374, + "learning_rate": 1.1062177862695924e-05, + "loss": 0.4749, + "step": 34007 + }, + { + "epoch": 0.9337726523887974, + "grad_norm": 0.42544811964035034, + "learning_rate": 1.1061748411565627e-05, + "loss": 0.4165, + "step": 34008 + }, + { + "epoch": 0.9338001098297639, + "grad_norm": 0.6946243643760681, + "learning_rate": 1.1061318958454827e-05, + "loss": 0.4912, + "step": 34009 + }, + { + "epoch": 0.9338275672707304, + "grad_norm": 0.3871946334838867, + "learning_rate": 1.1060889503364331e-05, + "loss": 0.582, + "step": 34010 + }, + { + "epoch": 0.9338550247116969, + "grad_norm": 0.5361049771308899, + "learning_rate": 1.1060460046294934e-05, + "loss": 0.4617, + "step": 34011 + }, + { + "epoch": 0.9338824821526633, + "grad_norm": 0.5163166522979736, + "learning_rate": 1.1060030587247446e-05, + "loss": 0.5956, + "step": 34012 + }, + { + "epoch": 0.9339099395936299, + "grad_norm": 0.3849034011363983, + "learning_rate": 1.1059601126222659e-05, + "loss": 0.5147, + "step": 34013 + }, + { + "epoch": 0.9339373970345963, + "grad_norm": 0.43492400646209717, + "learning_rate": 1.1059171663221378e-05, + "loss": 0.6157, + "step": 34014 + }, + { + "epoch": 0.9339648544755629, + "grad_norm": 0.3841484785079956, + "learning_rate": 1.1058742198244405e-05, + "loss": 0.5126, + "step": 34015 + }, + { + "epoch": 0.9339923119165294, + "grad_norm": 0.363029420375824, + "learning_rate": 1.1058312731292539e-05, + "loss": 0.4801, + "step": 34016 + }, + { + "epoch": 0.9340197693574959, + "grad_norm": 0.36999085545539856, + "learning_rate": 1.1057883262366582e-05, + "loss": 0.4638, + "step": 34017 + }, + { + "epoch": 0.9340472267984624, + "grad_norm": 0.39883914589881897, + "learning_rate": 1.1057453791467337e-05, + "loss": 0.5429, + "step": 34018 + }, + { + "epoch": 0.9340746842394289, + "grad_norm": 0.4001128673553467, + "learning_rate": 1.10570243185956e-05, + "loss": 0.4807, + "step": 34019 + }, + { + "epoch": 0.9341021416803954, + "grad_norm": 0.3562025725841522, + "learning_rate": 1.1056594843752177e-05, + "loss": 0.4578, + "step": 34020 + }, + { + "epoch": 0.9341295991213618, + "grad_norm": 0.37068548798561096, + "learning_rate": 1.1056165366937868e-05, + "loss": 0.5466, + "step": 34021 + }, + { + "epoch": 0.9341570565623284, + "grad_norm": 0.3746839761734009, + "learning_rate": 1.1055735888153472e-05, + "loss": 0.4901, + "step": 34022 + }, + { + "epoch": 0.9341845140032949, + "grad_norm": 0.4139995872974396, + "learning_rate": 1.1055306407399794e-05, + "loss": 0.5109, + "step": 34023 + }, + { + "epoch": 0.9342119714442614, + "grad_norm": 0.4165017902851105, + "learning_rate": 1.105487692467763e-05, + "loss": 0.5985, + "step": 34024 + }, + { + "epoch": 0.9342394288852279, + "grad_norm": 0.38847458362579346, + "learning_rate": 1.1054447439987785e-05, + "loss": 0.5263, + "step": 34025 + }, + { + "epoch": 0.9342668863261944, + "grad_norm": 0.4455024302005768, + "learning_rate": 1.105401795333106e-05, + "loss": 0.5066, + "step": 34026 + }, + { + "epoch": 0.9342943437671609, + "grad_norm": 0.38331758975982666, + "learning_rate": 1.105358846470825e-05, + "loss": 0.4463, + "step": 34027 + }, + { + "epoch": 0.9343218012081274, + "grad_norm": 0.3850729763507843, + "learning_rate": 1.1053158974120168e-05, + "loss": 0.4737, + "step": 34028 + }, + { + "epoch": 0.9343492586490939, + "grad_norm": 0.41613122820854187, + "learning_rate": 1.1052729481567605e-05, + "loss": 0.4437, + "step": 34029 + }, + { + "epoch": 0.9343767160900605, + "grad_norm": 0.41709765791893005, + "learning_rate": 1.1052299987051368e-05, + "loss": 0.4271, + "step": 34030 + }, + { + "epoch": 0.9344041735310269, + "grad_norm": 0.35880815982818604, + "learning_rate": 1.1051870490572253e-05, + "loss": 0.4031, + "step": 34031 + }, + { + "epoch": 0.9344316309719934, + "grad_norm": 0.46162381768226624, + "learning_rate": 1.1051440992131063e-05, + "loss": 0.5287, + "step": 34032 + }, + { + "epoch": 0.9344590884129599, + "grad_norm": 0.42325034737586975, + "learning_rate": 1.1051011491728603e-05, + "loss": 0.4789, + "step": 34033 + }, + { + "epoch": 0.9344865458539264, + "grad_norm": 0.38202816247940063, + "learning_rate": 1.105058198936567e-05, + "loss": 0.4761, + "step": 34034 + }, + { + "epoch": 0.9345140032948929, + "grad_norm": 0.4627980887889862, + "learning_rate": 1.1050152485043065e-05, + "loss": 0.5546, + "step": 34035 + }, + { + "epoch": 0.9345414607358594, + "grad_norm": 0.3907657265663147, + "learning_rate": 1.1049722978761592e-05, + "loss": 0.5408, + "step": 34036 + }, + { + "epoch": 0.934568918176826, + "grad_norm": 0.387103796005249, + "learning_rate": 1.1049293470522049e-05, + "loss": 0.531, + "step": 34037 + }, + { + "epoch": 0.9345963756177924, + "grad_norm": 0.3866257667541504, + "learning_rate": 1.104886396032524e-05, + "loss": 0.452, + "step": 34038 + }, + { + "epoch": 0.934623833058759, + "grad_norm": 0.35728907585144043, + "learning_rate": 1.1048434448171967e-05, + "loss": 0.4624, + "step": 34039 + }, + { + "epoch": 0.9346512904997254, + "grad_norm": 0.3859559893608093, + "learning_rate": 1.1048004934063025e-05, + "loss": 0.4516, + "step": 34040 + }, + { + "epoch": 0.934678747940692, + "grad_norm": 0.4023617208003998, + "learning_rate": 1.1047575417999222e-05, + "loss": 0.5199, + "step": 34041 + }, + { + "epoch": 0.9347062053816584, + "grad_norm": 0.38834381103515625, + "learning_rate": 1.1047145899981356e-05, + "loss": 0.4903, + "step": 34042 + }, + { + "epoch": 0.9347336628226249, + "grad_norm": 0.35221409797668457, + "learning_rate": 1.1046716380010228e-05, + "loss": 0.5066, + "step": 34043 + }, + { + "epoch": 0.9347611202635915, + "grad_norm": 0.37664228677749634, + "learning_rate": 1.104628685808664e-05, + "loss": 0.4035, + "step": 34044 + }, + { + "epoch": 0.9347885777045579, + "grad_norm": 0.42759084701538086, + "learning_rate": 1.1045857334211394e-05, + "loss": 0.5176, + "step": 34045 + }, + { + "epoch": 0.9348160351455245, + "grad_norm": 0.4385153353214264, + "learning_rate": 1.104542780838529e-05, + "loss": 0.5129, + "step": 34046 + }, + { + "epoch": 0.9348434925864909, + "grad_norm": 0.36124637722969055, + "learning_rate": 1.104499828060913e-05, + "loss": 0.3727, + "step": 34047 + }, + { + "epoch": 0.9348709500274575, + "grad_norm": 0.44429200887680054, + "learning_rate": 1.1044568750883713e-05, + "loss": 0.5255, + "step": 34048 + }, + { + "epoch": 0.9348984074684239, + "grad_norm": 0.43668457865715027, + "learning_rate": 1.1044139219209841e-05, + "loss": 0.4886, + "step": 34049 + }, + { + "epoch": 0.9349258649093904, + "grad_norm": 0.40713199973106384, + "learning_rate": 1.1043709685588317e-05, + "loss": 0.4534, + "step": 34050 + }, + { + "epoch": 0.934953322350357, + "grad_norm": 0.5649691820144653, + "learning_rate": 1.1043280150019943e-05, + "loss": 0.4552, + "step": 34051 + }, + { + "epoch": 0.9349807797913234, + "grad_norm": 0.4544326364994049, + "learning_rate": 1.104285061250552e-05, + "loss": 0.5296, + "step": 34052 + }, + { + "epoch": 0.93500823723229, + "grad_norm": 0.3777252733707428, + "learning_rate": 1.1042421073045843e-05, + "loss": 0.4184, + "step": 34053 + }, + { + "epoch": 0.9350356946732564, + "grad_norm": 0.37462204694747925, + "learning_rate": 1.1041991531641723e-05, + "loss": 0.4721, + "step": 34054 + }, + { + "epoch": 0.935063152114223, + "grad_norm": 0.43971070647239685, + "learning_rate": 1.1041561988293951e-05, + "loss": 0.4915, + "step": 34055 + }, + { + "epoch": 0.9350906095551894, + "grad_norm": 0.4311192035675049, + "learning_rate": 1.1041132443003337e-05, + "loss": 0.4813, + "step": 34056 + }, + { + "epoch": 0.935118066996156, + "grad_norm": 0.41088420152664185, + "learning_rate": 1.1040702895770679e-05, + "loss": 0.4913, + "step": 34057 + }, + { + "epoch": 0.9351455244371225, + "grad_norm": 0.38271889090538025, + "learning_rate": 1.1040273346596774e-05, + "loss": 0.4999, + "step": 34058 + }, + { + "epoch": 0.935172981878089, + "grad_norm": 0.47614309191703796, + "learning_rate": 1.1039843795482432e-05, + "loss": 0.4872, + "step": 34059 + }, + { + "epoch": 0.9352004393190555, + "grad_norm": 0.3648003041744232, + "learning_rate": 1.1039414242428447e-05, + "loss": 0.4188, + "step": 34060 + }, + { + "epoch": 0.9352278967600219, + "grad_norm": 0.360645592212677, + "learning_rate": 1.1038984687435623e-05, + "loss": 0.4178, + "step": 34061 + }, + { + "epoch": 0.9352553542009885, + "grad_norm": 0.3796738088130951, + "learning_rate": 1.1038555130504761e-05, + "loss": 0.4733, + "step": 34062 + }, + { + "epoch": 0.9352828116419549, + "grad_norm": 0.3753422498703003, + "learning_rate": 1.1038125571636661e-05, + "loss": 0.4421, + "step": 34063 + }, + { + "epoch": 0.9353102690829215, + "grad_norm": 0.381559818983078, + "learning_rate": 1.1037696010832128e-05, + "loss": 0.4723, + "step": 34064 + }, + { + "epoch": 0.935337726523888, + "grad_norm": 0.40872710943222046, + "learning_rate": 1.1037266448091961e-05, + "loss": 0.4775, + "step": 34065 + }, + { + "epoch": 0.9353651839648545, + "grad_norm": 0.41046789288520813, + "learning_rate": 1.1036836883416959e-05, + "loss": 0.5231, + "step": 34066 + }, + { + "epoch": 0.935392641405821, + "grad_norm": 0.37612971663475037, + "learning_rate": 1.1036407316807926e-05, + "loss": 0.3919, + "step": 34067 + }, + { + "epoch": 0.9354200988467875, + "grad_norm": 0.3987826108932495, + "learning_rate": 1.1035977748265661e-05, + "loss": 0.4033, + "step": 34068 + }, + { + "epoch": 0.935447556287754, + "grad_norm": 0.3781321942806244, + "learning_rate": 1.103554817779097e-05, + "loss": 0.4447, + "step": 34069 + }, + { + "epoch": 0.9354750137287204, + "grad_norm": 0.41998839378356934, + "learning_rate": 1.1035118605384649e-05, + "loss": 0.5554, + "step": 34070 + }, + { + "epoch": 0.935502471169687, + "grad_norm": 0.36286255717277527, + "learning_rate": 1.1034689031047501e-05, + "loss": 0.3956, + "step": 34071 + }, + { + "epoch": 0.9355299286106535, + "grad_norm": 0.3930697441101074, + "learning_rate": 1.103425945478033e-05, + "loss": 0.4851, + "step": 34072 + }, + { + "epoch": 0.93555738605162, + "grad_norm": 0.37034595012664795, + "learning_rate": 1.1033829876583933e-05, + "loss": 0.5221, + "step": 34073 + }, + { + "epoch": 0.9355848434925865, + "grad_norm": 0.3654637932777405, + "learning_rate": 1.1033400296459114e-05, + "loss": 0.5642, + "step": 34074 + }, + { + "epoch": 0.935612300933553, + "grad_norm": 0.38869455456733704, + "learning_rate": 1.1032970714406675e-05, + "loss": 0.4977, + "step": 34075 + }, + { + "epoch": 0.9356397583745195, + "grad_norm": 0.36387914419174194, + "learning_rate": 1.1032541130427413e-05, + "loss": 0.4434, + "step": 34076 + }, + { + "epoch": 0.935667215815486, + "grad_norm": 0.377834290266037, + "learning_rate": 1.1032111544522132e-05, + "loss": 0.4759, + "step": 34077 + }, + { + "epoch": 0.9356946732564525, + "grad_norm": 0.34992319345474243, + "learning_rate": 1.1031681956691638e-05, + "loss": 0.503, + "step": 34078 + }, + { + "epoch": 0.935722130697419, + "grad_norm": 0.3653177320957184, + "learning_rate": 1.1031252366936722e-05, + "loss": 0.4447, + "step": 34079 + }, + { + "epoch": 0.9357495881383855, + "grad_norm": 0.41903015971183777, + "learning_rate": 1.1030822775258197e-05, + "loss": 0.5556, + "step": 34080 + }, + { + "epoch": 0.935777045579352, + "grad_norm": 0.3756762742996216, + "learning_rate": 1.1030393181656854e-05, + "loss": 0.5458, + "step": 34081 + }, + { + "epoch": 0.9358045030203185, + "grad_norm": 0.4192802906036377, + "learning_rate": 1.1029963586133501e-05, + "loss": 0.4368, + "step": 34082 + }, + { + "epoch": 0.935831960461285, + "grad_norm": 0.36061546206474304, + "learning_rate": 1.1029533988688939e-05, + "loss": 0.4756, + "step": 34083 + }, + { + "epoch": 0.9358594179022515, + "grad_norm": 0.3724822402000427, + "learning_rate": 1.1029104389323963e-05, + "loss": 0.5181, + "step": 34084 + }, + { + "epoch": 0.935886875343218, + "grad_norm": 0.4200810194015503, + "learning_rate": 1.1028674788039382e-05, + "loss": 0.4702, + "step": 34085 + }, + { + "epoch": 0.9359143327841846, + "grad_norm": 0.3883519172668457, + "learning_rate": 1.1028245184835993e-05, + "loss": 0.4691, + "step": 34086 + }, + { + "epoch": 0.935941790225151, + "grad_norm": 0.3525243401527405, + "learning_rate": 1.10278155797146e-05, + "loss": 0.4656, + "step": 34087 + }, + { + "epoch": 0.9359692476661176, + "grad_norm": 0.38445210456848145, + "learning_rate": 1.1027385972676002e-05, + "loss": 0.4809, + "step": 34088 + }, + { + "epoch": 0.935996705107084, + "grad_norm": 0.4525030255317688, + "learning_rate": 1.1026956363721e-05, + "loss": 0.5228, + "step": 34089 + }, + { + "epoch": 0.9360241625480505, + "grad_norm": 0.38695308566093445, + "learning_rate": 1.10265267528504e-05, + "loss": 0.5057, + "step": 34090 + }, + { + "epoch": 0.936051619989017, + "grad_norm": 0.4670450985431671, + "learning_rate": 1.1026097140064997e-05, + "loss": 0.549, + "step": 34091 + }, + { + "epoch": 0.9360790774299835, + "grad_norm": 0.35982105135917664, + "learning_rate": 1.1025667525365596e-05, + "loss": 0.4496, + "step": 34092 + }, + { + "epoch": 0.9361065348709501, + "grad_norm": 0.4590197205543518, + "learning_rate": 1.1025237908752998e-05, + "loss": 0.5536, + "step": 34093 + }, + { + "epoch": 0.9361339923119165, + "grad_norm": 0.4020620584487915, + "learning_rate": 1.1024808290228001e-05, + "loss": 0.527, + "step": 34094 + }, + { + "epoch": 0.9361614497528831, + "grad_norm": 0.3984156548976898, + "learning_rate": 1.1024378669791414e-05, + "loss": 0.5438, + "step": 34095 + }, + { + "epoch": 0.9361889071938495, + "grad_norm": 0.4083051383495331, + "learning_rate": 1.1023949047444033e-05, + "loss": 0.558, + "step": 34096 + }, + { + "epoch": 0.9362163646348161, + "grad_norm": 0.41695886850357056, + "learning_rate": 1.102351942318666e-05, + "loss": 0.431, + "step": 34097 + }, + { + "epoch": 0.9362438220757825, + "grad_norm": 0.3727676570415497, + "learning_rate": 1.1023089797020095e-05, + "loss": 0.4698, + "step": 34098 + }, + { + "epoch": 0.936271279516749, + "grad_norm": 0.4032125771045685, + "learning_rate": 1.102266016894514e-05, + "loss": 0.4804, + "step": 34099 + }, + { + "epoch": 0.9362987369577156, + "grad_norm": 0.38922083377838135, + "learning_rate": 1.1022230538962602e-05, + "loss": 0.473, + "step": 34100 + }, + { + "epoch": 0.936326194398682, + "grad_norm": 0.48115548491477966, + "learning_rate": 1.1021800907073275e-05, + "loss": 0.4194, + "step": 34101 + }, + { + "epoch": 0.9363536518396486, + "grad_norm": 0.3662664592266083, + "learning_rate": 1.1021371273277964e-05, + "loss": 0.4936, + "step": 34102 + }, + { + "epoch": 0.936381109280615, + "grad_norm": 0.4099118411540985, + "learning_rate": 1.1020941637577466e-05, + "loss": 0.4077, + "step": 34103 + }, + { + "epoch": 0.9364085667215816, + "grad_norm": 0.41471174359321594, + "learning_rate": 1.102051199997259e-05, + "loss": 0.5543, + "step": 34104 + }, + { + "epoch": 0.936436024162548, + "grad_norm": 0.3946733772754669, + "learning_rate": 1.1020082360464133e-05, + "loss": 0.4788, + "step": 34105 + }, + { + "epoch": 0.9364634816035146, + "grad_norm": 0.3789057433605194, + "learning_rate": 1.1019652719052899e-05, + "loss": 0.4923, + "step": 34106 + }, + { + "epoch": 0.9364909390444811, + "grad_norm": 0.37840285897254944, + "learning_rate": 1.1019223075739683e-05, + "loss": 0.5341, + "step": 34107 + }, + { + "epoch": 0.9365183964854475, + "grad_norm": 0.38380861282348633, + "learning_rate": 1.1018793430525291e-05, + "loss": 0.4722, + "step": 34108 + }, + { + "epoch": 0.9365458539264141, + "grad_norm": 0.3561320900917053, + "learning_rate": 1.1018363783410525e-05, + "loss": 0.4453, + "step": 34109 + }, + { + "epoch": 0.9365733113673805, + "grad_norm": 0.4056204557418823, + "learning_rate": 1.1017934134396186e-05, + "loss": 0.4526, + "step": 34110 + }, + { + "epoch": 0.9366007688083471, + "grad_norm": 0.4097292423248291, + "learning_rate": 1.1017504483483074e-05, + "loss": 0.5305, + "step": 34111 + }, + { + "epoch": 0.9366282262493135, + "grad_norm": 0.47967031598091125, + "learning_rate": 1.1017074830671991e-05, + "loss": 0.4992, + "step": 34112 + }, + { + "epoch": 0.9366556836902801, + "grad_norm": 0.3612501621246338, + "learning_rate": 1.1016645175963743e-05, + "loss": 0.3997, + "step": 34113 + }, + { + "epoch": 0.9366831411312466, + "grad_norm": 0.3712849020957947, + "learning_rate": 1.1016215519359123e-05, + "loss": 0.5492, + "step": 34114 + }, + { + "epoch": 0.9367105985722131, + "grad_norm": 0.40944692492485046, + "learning_rate": 1.1015785860858937e-05, + "loss": 0.4759, + "step": 34115 + }, + { + "epoch": 0.9367380560131796, + "grad_norm": 0.36818262934684753, + "learning_rate": 1.1015356200463989e-05, + "loss": 0.5602, + "step": 34116 + }, + { + "epoch": 0.936765513454146, + "grad_norm": 0.4300045073032379, + "learning_rate": 1.1014926538175073e-05, + "loss": 0.5892, + "step": 34117 + }, + { + "epoch": 0.9367929708951126, + "grad_norm": 0.41327106952667236, + "learning_rate": 1.1014496873993e-05, + "loss": 0.4864, + "step": 34118 + }, + { + "epoch": 0.936820428336079, + "grad_norm": 0.4379746615886688, + "learning_rate": 1.1014067207918563e-05, + "loss": 0.5722, + "step": 34119 + }, + { + "epoch": 0.9368478857770456, + "grad_norm": 0.4393318295478821, + "learning_rate": 1.1013637539952569e-05, + "loss": 0.4582, + "step": 34120 + }, + { + "epoch": 0.9368753432180121, + "grad_norm": 0.389230877161026, + "learning_rate": 1.1013207870095817e-05, + "loss": 0.5665, + "step": 34121 + }, + { + "epoch": 0.9369028006589786, + "grad_norm": 0.3755532503128052, + "learning_rate": 1.101277819834911e-05, + "loss": 0.5639, + "step": 34122 + }, + { + "epoch": 0.9369302580999451, + "grad_norm": 0.41278600692749023, + "learning_rate": 1.1012348524713246e-05, + "loss": 0.4977, + "step": 34123 + }, + { + "epoch": 0.9369577155409116, + "grad_norm": 0.46407657861709595, + "learning_rate": 1.1011918849189029e-05, + "loss": 0.4606, + "step": 34124 + }, + { + "epoch": 0.9369851729818781, + "grad_norm": 0.40687209367752075, + "learning_rate": 1.1011489171777261e-05, + "loss": 0.5541, + "step": 34125 + }, + { + "epoch": 0.9370126304228446, + "grad_norm": 0.3833810091018677, + "learning_rate": 1.1011059492478743e-05, + "loss": 0.4878, + "step": 34126 + }, + { + "epoch": 0.9370400878638111, + "grad_norm": 0.4015711545944214, + "learning_rate": 1.1010629811294278e-05, + "loss": 0.5272, + "step": 34127 + }, + { + "epoch": 0.9370675453047776, + "grad_norm": 0.37204477190971375, + "learning_rate": 1.1010200128224664e-05, + "loss": 0.5155, + "step": 34128 + }, + { + "epoch": 0.9370950027457441, + "grad_norm": 0.4277205169200897, + "learning_rate": 1.1009770443270707e-05, + "loss": 0.5034, + "step": 34129 + }, + { + "epoch": 0.9371224601867106, + "grad_norm": 0.4123181402683258, + "learning_rate": 1.10093407564332e-05, + "loss": 0.4962, + "step": 34130 + }, + { + "epoch": 0.9371499176276771, + "grad_norm": 0.4069671034812927, + "learning_rate": 1.1008911067712955e-05, + "loss": 0.5003, + "step": 34131 + }, + { + "epoch": 0.9371773750686436, + "grad_norm": 0.4363509714603424, + "learning_rate": 1.1008481377110768e-05, + "loss": 0.5021, + "step": 34132 + }, + { + "epoch": 0.9372048325096101, + "grad_norm": 0.39486226439476013, + "learning_rate": 1.100805168462744e-05, + "loss": 0.5632, + "step": 34133 + }, + { + "epoch": 0.9372322899505766, + "grad_norm": 0.456407368183136, + "learning_rate": 1.1007621990263778e-05, + "loss": 0.4583, + "step": 34134 + }, + { + "epoch": 0.9372597473915432, + "grad_norm": 0.40210938453674316, + "learning_rate": 1.1007192294020574e-05, + "loss": 0.5345, + "step": 34135 + }, + { + "epoch": 0.9372872048325096, + "grad_norm": 0.40387752652168274, + "learning_rate": 1.100676259589864e-05, + "loss": 0.5218, + "step": 34136 + }, + { + "epoch": 0.9373146622734762, + "grad_norm": 0.40913301706314087, + "learning_rate": 1.100633289589877e-05, + "loss": 0.5435, + "step": 34137 + }, + { + "epoch": 0.9373421197144426, + "grad_norm": 0.396953284740448, + "learning_rate": 1.1005903194021766e-05, + "loss": 0.4255, + "step": 34138 + }, + { + "epoch": 0.9373695771554091, + "grad_norm": 0.43009939789772034, + "learning_rate": 1.1005473490268434e-05, + "loss": 0.4633, + "step": 34139 + }, + { + "epoch": 0.9373970345963756, + "grad_norm": 0.3662225604057312, + "learning_rate": 1.1005043784639573e-05, + "loss": 0.5021, + "step": 34140 + }, + { + "epoch": 0.9374244920373421, + "grad_norm": 0.435918390750885, + "learning_rate": 1.1004614077135982e-05, + "loss": 0.508, + "step": 34141 + }, + { + "epoch": 0.9374519494783087, + "grad_norm": 0.4059765040874481, + "learning_rate": 1.1004184367758468e-05, + "loss": 0.5146, + "step": 34142 + }, + { + "epoch": 0.9374794069192751, + "grad_norm": 0.4313197731971741, + "learning_rate": 1.1003754656507828e-05, + "loss": 0.5451, + "step": 34143 + }, + { + "epoch": 0.9375068643602417, + "grad_norm": 0.3622271418571472, + "learning_rate": 1.1003324943384865e-05, + "loss": 0.3988, + "step": 34144 + }, + { + "epoch": 0.9375343218012081, + "grad_norm": 0.358955055475235, + "learning_rate": 1.1002895228390385e-05, + "loss": 0.464, + "step": 34145 + }, + { + "epoch": 0.9375617792421747, + "grad_norm": 0.3763884902000427, + "learning_rate": 1.100246551152518e-05, + "loss": 0.468, + "step": 34146 + }, + { + "epoch": 0.9375892366831411, + "grad_norm": 0.40837815403938293, + "learning_rate": 1.100203579279006e-05, + "loss": 0.4669, + "step": 34147 + }, + { + "epoch": 0.9376166941241076, + "grad_norm": 0.3665204346179962, + "learning_rate": 1.100160607218582e-05, + "loss": 0.4162, + "step": 34148 + }, + { + "epoch": 0.9376441515650741, + "grad_norm": 0.3869040906429291, + "learning_rate": 1.1001176349713268e-05, + "loss": 0.4353, + "step": 34149 + }, + { + "epoch": 0.9376716090060406, + "grad_norm": 0.39645808935165405, + "learning_rate": 1.1000746625373202e-05, + "loss": 0.5253, + "step": 34150 + }, + { + "epoch": 0.9376990664470072, + "grad_norm": 0.44957008957862854, + "learning_rate": 1.1000316899166423e-05, + "loss": 0.5459, + "step": 34151 + }, + { + "epoch": 0.9377265238879736, + "grad_norm": 0.39621973037719727, + "learning_rate": 1.0999887171093736e-05, + "loss": 0.4841, + "step": 34152 + }, + { + "epoch": 0.9377539813289402, + "grad_norm": 0.4622866213321686, + "learning_rate": 1.0999457441155941e-05, + "loss": 0.5601, + "step": 34153 + }, + { + "epoch": 0.9377814387699066, + "grad_norm": 0.42490309476852417, + "learning_rate": 1.0999027709353833e-05, + "loss": 0.4931, + "step": 34154 + }, + { + "epoch": 0.9378088962108732, + "grad_norm": 0.640728235244751, + "learning_rate": 1.0998597975688226e-05, + "loss": 0.5319, + "step": 34155 + }, + { + "epoch": 0.9378363536518396, + "grad_norm": 0.5038220882415771, + "learning_rate": 1.099816824015991e-05, + "loss": 0.5998, + "step": 34156 + }, + { + "epoch": 0.9378638110928061, + "grad_norm": 0.42235735058784485, + "learning_rate": 1.0997738502769694e-05, + "loss": 0.421, + "step": 34157 + }, + { + "epoch": 0.9378912685337727, + "grad_norm": 0.3779776692390442, + "learning_rate": 1.0997308763518378e-05, + "loss": 0.4672, + "step": 34158 + }, + { + "epoch": 0.9379187259747391, + "grad_norm": 0.3780769109725952, + "learning_rate": 1.0996879022406763e-05, + "loss": 0.4722, + "step": 34159 + }, + { + "epoch": 0.9379461834157057, + "grad_norm": 0.6078518033027649, + "learning_rate": 1.099644927943565e-05, + "loss": 0.4404, + "step": 34160 + }, + { + "epoch": 0.9379736408566721, + "grad_norm": 0.4144550561904907, + "learning_rate": 1.0996019534605839e-05, + "loss": 0.5264, + "step": 34161 + }, + { + "epoch": 0.9380010982976387, + "grad_norm": 0.4366423785686493, + "learning_rate": 1.0995589787918137e-05, + "loss": 0.4385, + "step": 34162 + }, + { + "epoch": 0.9380285557386051, + "grad_norm": 0.43204960227012634, + "learning_rate": 1.0995160039373339e-05, + "loss": 0.4902, + "step": 34163 + }, + { + "epoch": 0.9380560131795717, + "grad_norm": 0.3818499743938446, + "learning_rate": 1.0994730288972253e-05, + "loss": 0.4383, + "step": 34164 + }, + { + "epoch": 0.9380834706205382, + "grad_norm": 0.3784787058830261, + "learning_rate": 1.0994300536715677e-05, + "loss": 0.5286, + "step": 34165 + }, + { + "epoch": 0.9381109280615046, + "grad_norm": 0.38721731305122375, + "learning_rate": 1.099387078260441e-05, + "loss": 0.489, + "step": 34166 + }, + { + "epoch": 0.9381383855024712, + "grad_norm": 0.3442370295524597, + "learning_rate": 1.099344102663926e-05, + "loss": 0.4599, + "step": 34167 + }, + { + "epoch": 0.9381658429434376, + "grad_norm": 0.3921426236629486, + "learning_rate": 1.0993011268821023e-05, + "loss": 0.5967, + "step": 34168 + }, + { + "epoch": 0.9381933003844042, + "grad_norm": 0.39721623063087463, + "learning_rate": 1.0992581509150507e-05, + "loss": 0.5402, + "step": 34169 + }, + { + "epoch": 0.9382207578253706, + "grad_norm": 0.4162483513355255, + "learning_rate": 1.0992151747628505e-05, + "loss": 0.5028, + "step": 34170 + }, + { + "epoch": 0.9382482152663372, + "grad_norm": 0.3767414689064026, + "learning_rate": 1.0991721984255825e-05, + "loss": 0.5194, + "step": 34171 + }, + { + "epoch": 0.9382756727073037, + "grad_norm": 0.39086925983428955, + "learning_rate": 1.099129221903327e-05, + "loss": 0.5738, + "step": 34172 + }, + { + "epoch": 0.9383031301482702, + "grad_norm": 0.39010483026504517, + "learning_rate": 1.0990862451961632e-05, + "loss": 0.3546, + "step": 34173 + }, + { + "epoch": 0.9383305875892367, + "grad_norm": 0.3959309756755829, + "learning_rate": 1.0990432683041726e-05, + "loss": 0.4614, + "step": 34174 + }, + { + "epoch": 0.9383580450302031, + "grad_norm": 0.38558048009872437, + "learning_rate": 1.0990002912274342e-05, + "loss": 0.478, + "step": 34175 + }, + { + "epoch": 0.9383855024711697, + "grad_norm": 0.41956856846809387, + "learning_rate": 1.098957313966029e-05, + "loss": 0.508, + "step": 34176 + }, + { + "epoch": 0.9384129599121361, + "grad_norm": 0.3705075979232788, + "learning_rate": 1.0989143365200368e-05, + "loss": 0.4939, + "step": 34177 + }, + { + "epoch": 0.9384404173531027, + "grad_norm": 0.38121771812438965, + "learning_rate": 1.0988713588895377e-05, + "loss": 0.5324, + "step": 34178 + }, + { + "epoch": 0.9384678747940692, + "grad_norm": 0.40335068106651306, + "learning_rate": 1.0988283810746118e-05, + "loss": 0.4643, + "step": 34179 + }, + { + "epoch": 0.9384953322350357, + "grad_norm": 0.9911664128303528, + "learning_rate": 1.0987854030753396e-05, + "loss": 0.4532, + "step": 34180 + }, + { + "epoch": 0.9385227896760022, + "grad_norm": 0.42643287777900696, + "learning_rate": 1.0987424248918013e-05, + "loss": 0.5873, + "step": 34181 + }, + { + "epoch": 0.9385502471169687, + "grad_norm": 0.3522125482559204, + "learning_rate": 1.0986994465240766e-05, + "loss": 0.4877, + "step": 34182 + }, + { + "epoch": 0.9385777045579352, + "grad_norm": 0.38926488161087036, + "learning_rate": 1.098656467972246e-05, + "loss": 0.4501, + "step": 34183 + }, + { + "epoch": 0.9386051619989016, + "grad_norm": 0.3781294524669647, + "learning_rate": 1.0986134892363895e-05, + "loss": 0.5551, + "step": 34184 + }, + { + "epoch": 0.9386326194398682, + "grad_norm": 0.4107251763343811, + "learning_rate": 1.0985705103165873e-05, + "loss": 0.5202, + "step": 34185 + }, + { + "epoch": 0.9386600768808347, + "grad_norm": 0.3944930136203766, + "learning_rate": 1.09852753121292e-05, + "loss": 0.4275, + "step": 34186 + }, + { + "epoch": 0.9386875343218012, + "grad_norm": 0.37033870816230774, + "learning_rate": 1.0984845519254671e-05, + "loss": 0.5067, + "step": 34187 + }, + { + "epoch": 0.9387149917627677, + "grad_norm": 0.4689740538597107, + "learning_rate": 1.0984415724543091e-05, + "loss": 0.5684, + "step": 34188 + }, + { + "epoch": 0.9387424492037342, + "grad_norm": 0.4010498523712158, + "learning_rate": 1.0983985927995264e-05, + "loss": 0.5387, + "step": 34189 + }, + { + "epoch": 0.9387699066447007, + "grad_norm": 0.41086992621421814, + "learning_rate": 1.0983556129611984e-05, + "loss": 0.4914, + "step": 34190 + }, + { + "epoch": 0.9387973640856672, + "grad_norm": 0.38165196776390076, + "learning_rate": 1.0983126329394065e-05, + "loss": 0.4495, + "step": 34191 + }, + { + "epoch": 0.9388248215266337, + "grad_norm": 0.432559072971344, + "learning_rate": 1.0982696527342296e-05, + "loss": 0.4522, + "step": 34192 + }, + { + "epoch": 0.9388522789676003, + "grad_norm": 0.3728601336479187, + "learning_rate": 1.0982266723457486e-05, + "loss": 0.3997, + "step": 34193 + }, + { + "epoch": 0.9388797364085667, + "grad_norm": 0.38037121295928955, + "learning_rate": 1.0981836917740438e-05, + "loss": 0.4486, + "step": 34194 + }, + { + "epoch": 0.9389071938495333, + "grad_norm": 0.379643976688385, + "learning_rate": 1.0981407110191946e-05, + "loss": 0.4863, + "step": 34195 + }, + { + "epoch": 0.9389346512904997, + "grad_norm": 0.3734078109264374, + "learning_rate": 1.098097730081282e-05, + "loss": 0.4143, + "step": 34196 + }, + { + "epoch": 0.9389621087314662, + "grad_norm": 0.3700469136238098, + "learning_rate": 1.0980547489603854e-05, + "loss": 0.46, + "step": 34197 + }, + { + "epoch": 0.9389895661724327, + "grad_norm": 0.40587592124938965, + "learning_rate": 1.0980117676565857e-05, + "loss": 0.56, + "step": 34198 + }, + { + "epoch": 0.9390170236133992, + "grad_norm": 0.45718255639076233, + "learning_rate": 1.097968786169963e-05, + "loss": 0.3858, + "step": 34199 + }, + { + "epoch": 0.9390444810543658, + "grad_norm": 0.4867539405822754, + "learning_rate": 1.097925804500597e-05, + "loss": 0.5575, + "step": 34200 + }, + { + "epoch": 0.9390719384953322, + "grad_norm": 0.46798643469810486, + "learning_rate": 1.097882822648568e-05, + "loss": 0.4891, + "step": 34201 + }, + { + "epoch": 0.9390993959362988, + "grad_norm": 0.4035952389240265, + "learning_rate": 1.0978398406139565e-05, + "loss": 0.541, + "step": 34202 + }, + { + "epoch": 0.9391268533772652, + "grad_norm": 0.383949339389801, + "learning_rate": 1.0977968583968424e-05, + "loss": 0.518, + "step": 34203 + }, + { + "epoch": 0.9391543108182318, + "grad_norm": 0.39391234517097473, + "learning_rate": 1.0977538759973061e-05, + "loss": 0.4706, + "step": 34204 + }, + { + "epoch": 0.9391817682591982, + "grad_norm": 0.42308664321899414, + "learning_rate": 1.0977108934154273e-05, + "loss": 0.4839, + "step": 34205 + }, + { + "epoch": 0.9392092257001647, + "grad_norm": 0.4556795060634613, + "learning_rate": 1.0976679106512867e-05, + "loss": 0.5768, + "step": 34206 + }, + { + "epoch": 0.9392366831411313, + "grad_norm": 0.5138306021690369, + "learning_rate": 1.0976249277049643e-05, + "loss": 0.5105, + "step": 34207 + }, + { + "epoch": 0.9392641405820977, + "grad_norm": 0.47561928629875183, + "learning_rate": 1.0975819445765401e-05, + "loss": 0.4394, + "step": 34208 + }, + { + "epoch": 0.9392915980230643, + "grad_norm": 0.3677525818347931, + "learning_rate": 1.0975389612660945e-05, + "loss": 0.4439, + "step": 34209 + }, + { + "epoch": 0.9393190554640307, + "grad_norm": 0.3644654154777527, + "learning_rate": 1.0974959777737075e-05, + "loss": 0.472, + "step": 34210 + }, + { + "epoch": 0.9393465129049973, + "grad_norm": 0.42126184701919556, + "learning_rate": 1.0974529940994596e-05, + "loss": 0.4983, + "step": 34211 + }, + { + "epoch": 0.9393739703459637, + "grad_norm": 0.4833011031150818, + "learning_rate": 1.0974100102434309e-05, + "loss": 0.4433, + "step": 34212 + }, + { + "epoch": 0.9394014277869303, + "grad_norm": 0.42344996333122253, + "learning_rate": 1.097367026205701e-05, + "loss": 0.558, + "step": 34213 + }, + { + "epoch": 0.9394288852278968, + "grad_norm": 0.3769623041152954, + "learning_rate": 1.0973240419863506e-05, + "loss": 0.4736, + "step": 34214 + }, + { + "epoch": 0.9394563426688632, + "grad_norm": 0.42467862367630005, + "learning_rate": 1.09728105758546e-05, + "loss": 0.5147, + "step": 34215 + }, + { + "epoch": 0.9394838001098298, + "grad_norm": 0.36711591482162476, + "learning_rate": 1.097238073003109e-05, + "loss": 0.5192, + "step": 34216 + }, + { + "epoch": 0.9395112575507962, + "grad_norm": 0.38382554054260254, + "learning_rate": 1.0971950882393782e-05, + "loss": 0.4458, + "step": 34217 + }, + { + "epoch": 0.9395387149917628, + "grad_norm": 0.3765406608581543, + "learning_rate": 1.0971521032943474e-05, + "loss": 0.4324, + "step": 34218 + }, + { + "epoch": 0.9395661724327292, + "grad_norm": 0.39666980504989624, + "learning_rate": 1.097109118168097e-05, + "loss": 0.448, + "step": 34219 + }, + { + "epoch": 0.9395936298736958, + "grad_norm": 0.3724711239337921, + "learning_rate": 1.097066132860707e-05, + "loss": 0.4523, + "step": 34220 + }, + { + "epoch": 0.9396210873146623, + "grad_norm": 0.4666835069656372, + "learning_rate": 1.0970231473722576e-05, + "loss": 0.488, + "step": 34221 + }, + { + "epoch": 0.9396485447556288, + "grad_norm": 0.5439835786819458, + "learning_rate": 1.0969801617028293e-05, + "loss": 0.5327, + "step": 34222 + }, + { + "epoch": 0.9396760021965953, + "grad_norm": 0.45710933208465576, + "learning_rate": 1.0969371758525019e-05, + "loss": 0.5808, + "step": 34223 + }, + { + "epoch": 0.9397034596375617, + "grad_norm": 0.747260332107544, + "learning_rate": 1.0968941898213556e-05, + "loss": 0.5605, + "step": 34224 + }, + { + "epoch": 0.9397309170785283, + "grad_norm": 0.39793887734413147, + "learning_rate": 1.0968512036094711e-05, + "loss": 0.4759, + "step": 34225 + }, + { + "epoch": 0.9397583745194947, + "grad_norm": 0.4846106767654419, + "learning_rate": 1.0968082172169279e-05, + "loss": 0.504, + "step": 34226 + }, + { + "epoch": 0.9397858319604613, + "grad_norm": 0.49286824464797974, + "learning_rate": 1.0967652306438066e-05, + "loss": 0.5315, + "step": 34227 + }, + { + "epoch": 0.9398132894014278, + "grad_norm": 0.43346303701400757, + "learning_rate": 1.096722243890187e-05, + "loss": 0.5133, + "step": 34228 + }, + { + "epoch": 0.9398407468423943, + "grad_norm": 0.4159921705722809, + "learning_rate": 1.0966792569561498e-05, + "loss": 0.5071, + "step": 34229 + }, + { + "epoch": 0.9398682042833608, + "grad_norm": 0.3812239170074463, + "learning_rate": 1.0966362698417749e-05, + "loss": 0.5256, + "step": 34230 + }, + { + "epoch": 0.9398956617243273, + "grad_norm": 0.34175023436546326, + "learning_rate": 1.0965932825471424e-05, + "loss": 0.4965, + "step": 34231 + }, + { + "epoch": 0.9399231191652938, + "grad_norm": 0.4412270784378052, + "learning_rate": 1.0965502950723328e-05, + "loss": 0.505, + "step": 34232 + }, + { + "epoch": 0.9399505766062602, + "grad_norm": 0.3378070890903473, + "learning_rate": 1.0965073074174259e-05, + "loss": 0.3658, + "step": 34233 + }, + { + "epoch": 0.9399780340472268, + "grad_norm": 0.3936670124530792, + "learning_rate": 1.096464319582502e-05, + "loss": 0.535, + "step": 34234 + }, + { + "epoch": 0.9400054914881933, + "grad_norm": 0.38661515712738037, + "learning_rate": 1.0964213315676418e-05, + "loss": 0.4207, + "step": 34235 + }, + { + "epoch": 0.9400329489291598, + "grad_norm": 0.42031341791152954, + "learning_rate": 1.0963783433729245e-05, + "loss": 0.4327, + "step": 34236 + }, + { + "epoch": 0.9400604063701263, + "grad_norm": 0.3859327733516693, + "learning_rate": 1.096335354998431e-05, + "loss": 0.4522, + "step": 34237 + }, + { + "epoch": 0.9400878638110928, + "grad_norm": 0.41897261142730713, + "learning_rate": 1.0962923664442415e-05, + "loss": 0.4092, + "step": 34238 + }, + { + "epoch": 0.9401153212520593, + "grad_norm": 0.370349645614624, + "learning_rate": 1.0962493777104356e-05, + "loss": 0.4446, + "step": 34239 + }, + { + "epoch": 0.9401427786930258, + "grad_norm": 0.4193271994590759, + "learning_rate": 1.0962063887970944e-05, + "loss": 0.4652, + "step": 34240 + }, + { + "epoch": 0.9401702361339923, + "grad_norm": 0.43900418281555176, + "learning_rate": 1.0961633997042972e-05, + "loss": 0.562, + "step": 34241 + }, + { + "epoch": 0.9401976935749589, + "grad_norm": 0.3703499138355255, + "learning_rate": 1.0961204104321247e-05, + "loss": 0.447, + "step": 34242 + }, + { + "epoch": 0.9402251510159253, + "grad_norm": 0.4241028130054474, + "learning_rate": 1.0960774209806572e-05, + "loss": 0.5169, + "step": 34243 + }, + { + "epoch": 0.9402526084568918, + "grad_norm": 0.3693733215332031, + "learning_rate": 1.0960344313499743e-05, + "loss": 0.4872, + "step": 34244 + }, + { + "epoch": 0.9402800658978583, + "grad_norm": 0.5232754945755005, + "learning_rate": 1.0959914415401567e-05, + "loss": 0.4656, + "step": 34245 + }, + { + "epoch": 0.9403075233388248, + "grad_norm": 0.3382304012775421, + "learning_rate": 1.0959484515512844e-05, + "loss": 0.5292, + "step": 34246 + }, + { + "epoch": 0.9403349807797913, + "grad_norm": 0.4224871098995209, + "learning_rate": 1.0959054613834374e-05, + "loss": 0.5505, + "step": 34247 + }, + { + "epoch": 0.9403624382207578, + "grad_norm": 0.40477797389030457, + "learning_rate": 1.0958624710366966e-05, + "loss": 0.5574, + "step": 34248 + }, + { + "epoch": 0.9403898956617244, + "grad_norm": 0.4357987642288208, + "learning_rate": 1.0958194805111413e-05, + "loss": 0.5254, + "step": 34249 + }, + { + "epoch": 0.9404173531026908, + "grad_norm": 0.3562454581260681, + "learning_rate": 1.095776489806852e-05, + "loss": 0.4557, + "step": 34250 + }, + { + "epoch": 0.9404448105436574, + "grad_norm": 0.371247261762619, + "learning_rate": 1.0957334989239095e-05, + "loss": 0.4838, + "step": 34251 + }, + { + "epoch": 0.9404722679846238, + "grad_norm": 0.42819562554359436, + "learning_rate": 1.095690507862393e-05, + "loss": 0.4581, + "step": 34252 + }, + { + "epoch": 0.9404997254255903, + "grad_norm": 0.3985109329223633, + "learning_rate": 1.0956475166223832e-05, + "loss": 0.5126, + "step": 34253 + }, + { + "epoch": 0.9405271828665568, + "grad_norm": 0.3684120178222656, + "learning_rate": 1.0956045252039601e-05, + "loss": 0.4032, + "step": 34254 + }, + { + "epoch": 0.9405546403075233, + "grad_norm": 0.47738027572631836, + "learning_rate": 1.0955615336072045e-05, + "loss": 0.4891, + "step": 34255 + }, + { + "epoch": 0.9405820977484899, + "grad_norm": 0.4095081090927124, + "learning_rate": 1.0955185418321961e-05, + "loss": 0.495, + "step": 34256 + }, + { + "epoch": 0.9406095551894563, + "grad_norm": 0.3756501078605652, + "learning_rate": 1.0954755498790148e-05, + "loss": 0.5013, + "step": 34257 + }, + { + "epoch": 0.9406370126304229, + "grad_norm": 0.36128130555152893, + "learning_rate": 1.0954325577477413e-05, + "loss": 0.4673, + "step": 34258 + }, + { + "epoch": 0.9406644700713893, + "grad_norm": 0.3968930244445801, + "learning_rate": 1.0953895654384556e-05, + "loss": 0.4882, + "step": 34259 + }, + { + "epoch": 0.9406919275123559, + "grad_norm": 0.3544952869415283, + "learning_rate": 1.095346572951238e-05, + "loss": 0.4747, + "step": 34260 + }, + { + "epoch": 0.9407193849533223, + "grad_norm": 0.42824578285217285, + "learning_rate": 1.0953035802861688e-05, + "loss": 0.5112, + "step": 34261 + }, + { + "epoch": 0.9407468423942889, + "grad_norm": 0.39310553669929504, + "learning_rate": 1.0952605874433275e-05, + "loss": 0.5193, + "step": 34262 + }, + { + "epoch": 0.9407742998352554, + "grad_norm": 0.6414165496826172, + "learning_rate": 1.095217594422795e-05, + "loss": 0.4967, + "step": 34263 + }, + { + "epoch": 0.9408017572762218, + "grad_norm": 0.39318719506263733, + "learning_rate": 1.0951746012246516e-05, + "loss": 0.5287, + "step": 34264 + }, + { + "epoch": 0.9408292147171884, + "grad_norm": 0.3813847303390503, + "learning_rate": 1.0951316078489766e-05, + "loss": 0.5626, + "step": 34265 + }, + { + "epoch": 0.9408566721581548, + "grad_norm": 0.4017113745212555, + "learning_rate": 1.0950886142958513e-05, + "loss": 0.5897, + "step": 34266 + }, + { + "epoch": 0.9408841295991214, + "grad_norm": 0.4142880141735077, + "learning_rate": 1.0950456205653551e-05, + "loss": 0.5404, + "step": 34267 + }, + { + "epoch": 0.9409115870400878, + "grad_norm": 0.4135507345199585, + "learning_rate": 1.0950026266575686e-05, + "loss": 0.4316, + "step": 34268 + }, + { + "epoch": 0.9409390444810544, + "grad_norm": 0.38454189896583557, + "learning_rate": 1.094959632572572e-05, + "loss": 0.4793, + "step": 34269 + }, + { + "epoch": 0.9409665019220209, + "grad_norm": 0.46356332302093506, + "learning_rate": 1.0949166383104451e-05, + "loss": 0.4791, + "step": 34270 + }, + { + "epoch": 0.9409939593629874, + "grad_norm": 0.38309699296951294, + "learning_rate": 1.0948736438712686e-05, + "loss": 0.5198, + "step": 34271 + }, + { + "epoch": 0.9410214168039539, + "grad_norm": 0.38631248474121094, + "learning_rate": 1.0948306492551224e-05, + "loss": 0.4869, + "step": 34272 + }, + { + "epoch": 0.9410488742449203, + "grad_norm": 0.49224069714546204, + "learning_rate": 1.0947876544620869e-05, + "loss": 0.4848, + "step": 34273 + }, + { + "epoch": 0.9410763316858869, + "grad_norm": 0.3964882493019104, + "learning_rate": 1.094744659492242e-05, + "loss": 0.4255, + "step": 34274 + }, + { + "epoch": 0.9411037891268533, + "grad_norm": 0.4052979350090027, + "learning_rate": 1.094701664345668e-05, + "loss": 0.461, + "step": 34275 + }, + { + "epoch": 0.9411312465678199, + "grad_norm": 0.33843037486076355, + "learning_rate": 1.0946586690224455e-05, + "loss": 0.4049, + "step": 34276 + }, + { + "epoch": 0.9411587040087864, + "grad_norm": 0.37387216091156006, + "learning_rate": 1.0946156735226538e-05, + "loss": 0.5377, + "step": 34277 + }, + { + "epoch": 0.9411861614497529, + "grad_norm": 0.3806493580341339, + "learning_rate": 1.0945726778463742e-05, + "loss": 0.4464, + "step": 34278 + }, + { + "epoch": 0.9412136188907194, + "grad_norm": 0.4058409333229065, + "learning_rate": 1.0945296819936862e-05, + "loss": 0.5457, + "step": 34279 + }, + { + "epoch": 0.9412410763316859, + "grad_norm": 0.4173944592475891, + "learning_rate": 1.09448668596467e-05, + "loss": 0.4558, + "step": 34280 + }, + { + "epoch": 0.9412685337726524, + "grad_norm": 0.39042699337005615, + "learning_rate": 1.0944436897594063e-05, + "loss": 0.5066, + "step": 34281 + }, + { + "epoch": 0.9412959912136188, + "grad_norm": 0.40072834491729736, + "learning_rate": 1.094400693377975e-05, + "loss": 0.4732, + "step": 34282 + }, + { + "epoch": 0.9413234486545854, + "grad_norm": 0.3889920711517334, + "learning_rate": 1.0943576968204558e-05, + "loss": 0.4397, + "step": 34283 + }, + { + "epoch": 0.9413509060955519, + "grad_norm": 0.3991536498069763, + "learning_rate": 1.0943147000869299e-05, + "loss": 0.4608, + "step": 34284 + }, + { + "epoch": 0.9413783635365184, + "grad_norm": 0.4085448682308197, + "learning_rate": 1.0942717031774765e-05, + "loss": 0.4417, + "step": 34285 + }, + { + "epoch": 0.9414058209774849, + "grad_norm": 0.3954102694988251, + "learning_rate": 1.0942287060921769e-05, + "loss": 0.4841, + "step": 34286 + }, + { + "epoch": 0.9414332784184514, + "grad_norm": 0.38158246874809265, + "learning_rate": 1.0941857088311103e-05, + "loss": 0.4609, + "step": 34287 + }, + { + "epoch": 0.9414607358594179, + "grad_norm": 0.3868821859359741, + "learning_rate": 1.0941427113943572e-05, + "loss": 0.5054, + "step": 34288 + }, + { + "epoch": 0.9414881933003844, + "grad_norm": 0.5771820545196533, + "learning_rate": 1.094099713781998e-05, + "loss": 0.4897, + "step": 34289 + }, + { + "epoch": 0.9415156507413509, + "grad_norm": 0.3376457095146179, + "learning_rate": 1.0940567159941126e-05, + "loss": 0.418, + "step": 34290 + }, + { + "epoch": 0.9415431081823175, + "grad_norm": 0.5123295783996582, + "learning_rate": 1.0940137180307819e-05, + "loss": 0.5313, + "step": 34291 + }, + { + "epoch": 0.9415705656232839, + "grad_norm": 0.3634682893753052, + "learning_rate": 1.0939707198920855e-05, + "loss": 0.4437, + "step": 34292 + }, + { + "epoch": 0.9415980230642504, + "grad_norm": 0.4365377426147461, + "learning_rate": 1.0939277215781033e-05, + "loss": 0.5429, + "step": 34293 + }, + { + "epoch": 0.9416254805052169, + "grad_norm": 0.4119139015674591, + "learning_rate": 1.0938847230889162e-05, + "loss": 0.5333, + "step": 34294 + }, + { + "epoch": 0.9416529379461834, + "grad_norm": 0.41945919394493103, + "learning_rate": 1.0938417244246042e-05, + "loss": 0.5041, + "step": 34295 + }, + { + "epoch": 0.9416803953871499, + "grad_norm": 0.40743187069892883, + "learning_rate": 1.0937987255852474e-05, + "loss": 0.465, + "step": 34296 + }, + { + "epoch": 0.9417078528281164, + "grad_norm": 0.37929290533065796, + "learning_rate": 1.093755726570926e-05, + "loss": 0.4893, + "step": 34297 + }, + { + "epoch": 0.941735310269083, + "grad_norm": 0.43689969182014465, + "learning_rate": 1.0937127273817201e-05, + "loss": 0.5104, + "step": 34298 + }, + { + "epoch": 0.9417627677100494, + "grad_norm": 0.3844442069530487, + "learning_rate": 1.0936697280177103e-05, + "loss": 0.4626, + "step": 34299 + }, + { + "epoch": 0.941790225151016, + "grad_norm": 0.4615590274333954, + "learning_rate": 1.0936267284789763e-05, + "loss": 0.5386, + "step": 34300 + }, + { + "epoch": 0.9418176825919824, + "grad_norm": 0.46896442770957947, + "learning_rate": 1.0935837287655986e-05, + "loss": 0.5425, + "step": 34301 + }, + { + "epoch": 0.941845140032949, + "grad_norm": 0.4365326166152954, + "learning_rate": 1.0935407288776576e-05, + "loss": 0.4838, + "step": 34302 + }, + { + "epoch": 0.9418725974739154, + "grad_norm": 0.359025776386261, + "learning_rate": 1.0934977288152331e-05, + "loss": 0.4457, + "step": 34303 + }, + { + "epoch": 0.9419000549148819, + "grad_norm": 0.37281250953674316, + "learning_rate": 1.0934547285784056e-05, + "loss": 0.463, + "step": 34304 + }, + { + "epoch": 0.9419275123558485, + "grad_norm": 0.43920400738716125, + "learning_rate": 1.0934117281672554e-05, + "loss": 0.5551, + "step": 34305 + }, + { + "epoch": 0.9419549697968149, + "grad_norm": 0.36720436811447144, + "learning_rate": 1.0933687275818621e-05, + "loss": 0.4559, + "step": 34306 + }, + { + "epoch": 0.9419824272377815, + "grad_norm": 0.4490230977535248, + "learning_rate": 1.0933257268223066e-05, + "loss": 0.5115, + "step": 34307 + }, + { + "epoch": 0.9420098846787479, + "grad_norm": 0.37182432413101196, + "learning_rate": 1.0932827258886684e-05, + "loss": 0.4932, + "step": 34308 + }, + { + "epoch": 0.9420373421197145, + "grad_norm": 0.3384484648704529, + "learning_rate": 1.0932397247810287e-05, + "loss": 0.4854, + "step": 34309 + }, + { + "epoch": 0.9420647995606809, + "grad_norm": 0.45778846740722656, + "learning_rate": 1.0931967234994669e-05, + "loss": 0.4618, + "step": 34310 + }, + { + "epoch": 0.9420922570016474, + "grad_norm": 0.396276593208313, + "learning_rate": 1.0931537220440636e-05, + "loss": 0.5317, + "step": 34311 + }, + { + "epoch": 0.942119714442614, + "grad_norm": 0.35565200448036194, + "learning_rate": 1.0931107204148988e-05, + "loss": 0.4509, + "step": 34312 + }, + { + "epoch": 0.9421471718835804, + "grad_norm": 0.3821839690208435, + "learning_rate": 1.0930677186120529e-05, + "loss": 0.4731, + "step": 34313 + }, + { + "epoch": 0.942174629324547, + "grad_norm": 0.4325721859931946, + "learning_rate": 1.0930247166356056e-05, + "loss": 0.5405, + "step": 34314 + }, + { + "epoch": 0.9422020867655134, + "grad_norm": 0.36549898982048035, + "learning_rate": 1.0929817144856378e-05, + "loss": 0.4802, + "step": 34315 + }, + { + "epoch": 0.94222954420648, + "grad_norm": 0.41403937339782715, + "learning_rate": 1.0929387121622295e-05, + "loss": 0.5059, + "step": 34316 + }, + { + "epoch": 0.9422570016474464, + "grad_norm": 0.3875843584537506, + "learning_rate": 1.0928957096654607e-05, + "loss": 0.5124, + "step": 34317 + }, + { + "epoch": 0.942284459088413, + "grad_norm": 0.49516770243644714, + "learning_rate": 1.092852706995412e-05, + "loss": 0.4322, + "step": 34318 + }, + { + "epoch": 0.9423119165293795, + "grad_norm": 0.4186944365501404, + "learning_rate": 1.0928097041521631e-05, + "loss": 0.473, + "step": 34319 + }, + { + "epoch": 0.942339373970346, + "grad_norm": 0.42165496945381165, + "learning_rate": 1.0927667011357945e-05, + "loss": 0.516, + "step": 34320 + }, + { + "epoch": 0.9423668314113125, + "grad_norm": 0.37026122212409973, + "learning_rate": 1.0927236979463863e-05, + "loss": 0.4752, + "step": 34321 + }, + { + "epoch": 0.9423942888522789, + "grad_norm": 0.4103066027164459, + "learning_rate": 1.0926806945840191e-05, + "loss": 0.4909, + "step": 34322 + }, + { + "epoch": 0.9424217462932455, + "grad_norm": 0.35498419404029846, + "learning_rate": 1.0926376910487728e-05, + "loss": 0.4712, + "step": 34323 + }, + { + "epoch": 0.9424492037342119, + "grad_norm": 0.3811432719230652, + "learning_rate": 1.0925946873407274e-05, + "loss": 0.5211, + "step": 34324 + }, + { + "epoch": 0.9424766611751785, + "grad_norm": 0.4976426661014557, + "learning_rate": 1.0925516834599635e-05, + "loss": 0.4688, + "step": 34325 + }, + { + "epoch": 0.942504118616145, + "grad_norm": 0.3707168698310852, + "learning_rate": 1.0925086794065612e-05, + "loss": 0.45, + "step": 34326 + }, + { + "epoch": 0.9425315760571115, + "grad_norm": 0.41980940103530884, + "learning_rate": 1.0924656751806004e-05, + "loss": 0.492, + "step": 34327 + }, + { + "epoch": 0.942559033498078, + "grad_norm": 0.39333900809288025, + "learning_rate": 1.092422670782162e-05, + "loss": 0.4943, + "step": 34328 + }, + { + "epoch": 0.9425864909390445, + "grad_norm": 0.3975180387496948, + "learning_rate": 1.0923796662113255e-05, + "loss": 0.5146, + "step": 34329 + }, + { + "epoch": 0.942613948380011, + "grad_norm": 0.39977720379829407, + "learning_rate": 1.0923366614681716e-05, + "loss": 0.4842, + "step": 34330 + }, + { + "epoch": 0.9426414058209774, + "grad_norm": 0.3974045515060425, + "learning_rate": 1.0922936565527807e-05, + "loss": 0.4486, + "step": 34331 + }, + { + "epoch": 0.942668863261944, + "grad_norm": 0.4272530972957611, + "learning_rate": 1.092250651465232e-05, + "loss": 0.5364, + "step": 34332 + }, + { + "epoch": 0.9426963207029105, + "grad_norm": 0.42066338658332825, + "learning_rate": 1.0922076462056069e-05, + "loss": 0.5035, + "step": 34333 + }, + { + "epoch": 0.942723778143877, + "grad_norm": 0.35441818833351135, + "learning_rate": 1.0921646407739848e-05, + "loss": 0.4076, + "step": 34334 + }, + { + "epoch": 0.9427512355848435, + "grad_norm": 0.40083596110343933, + "learning_rate": 1.0921216351704465e-05, + "loss": 0.4796, + "step": 34335 + }, + { + "epoch": 0.94277869302581, + "grad_norm": 0.3732988238334656, + "learning_rate": 1.0920786293950718e-05, + "loss": 0.468, + "step": 34336 + }, + { + "epoch": 0.9428061504667765, + "grad_norm": 0.46196866035461426, + "learning_rate": 1.092035623447941e-05, + "loss": 0.5268, + "step": 34337 + }, + { + "epoch": 0.942833607907743, + "grad_norm": 0.387746661901474, + "learning_rate": 1.0919926173291345e-05, + "loss": 0.4524, + "step": 34338 + }, + { + "epoch": 0.9428610653487095, + "grad_norm": 0.3928948938846588, + "learning_rate": 1.0919496110387323e-05, + "loss": 0.5138, + "step": 34339 + }, + { + "epoch": 0.942888522789676, + "grad_norm": 0.3738824129104614, + "learning_rate": 1.0919066045768147e-05, + "loss": 0.4436, + "step": 34340 + }, + { + "epoch": 0.9429159802306425, + "grad_norm": 0.35403409600257874, + "learning_rate": 1.0918635979434622e-05, + "loss": 0.512, + "step": 34341 + }, + { + "epoch": 0.942943437671609, + "grad_norm": 0.48494502902030945, + "learning_rate": 1.0918205911387545e-05, + "loss": 0.5569, + "step": 34342 + }, + { + "epoch": 0.9429708951125755, + "grad_norm": 0.4632241129875183, + "learning_rate": 1.0917775841627722e-05, + "loss": 0.5595, + "step": 34343 + }, + { + "epoch": 0.942998352553542, + "grad_norm": 0.3741208612918854, + "learning_rate": 1.0917345770155953e-05, + "loss": 0.5038, + "step": 34344 + }, + { + "epoch": 0.9430258099945085, + "grad_norm": 0.45713791251182556, + "learning_rate": 1.0916915696973044e-05, + "loss": 0.4361, + "step": 34345 + }, + { + "epoch": 0.943053267435475, + "grad_norm": 0.43308812379837036, + "learning_rate": 1.0916485622079794e-05, + "loss": 0.458, + "step": 34346 + }, + { + "epoch": 0.9430807248764416, + "grad_norm": 0.35965099930763245, + "learning_rate": 1.0916055545477004e-05, + "loss": 0.4716, + "step": 34347 + }, + { + "epoch": 0.943108182317408, + "grad_norm": 0.3790360987186432, + "learning_rate": 1.091562546716548e-05, + "loss": 0.4769, + "step": 34348 + }, + { + "epoch": 0.9431356397583746, + "grad_norm": 0.464787095785141, + "learning_rate": 1.091519538714602e-05, + "loss": 0.558, + "step": 34349 + }, + { + "epoch": 0.943163097199341, + "grad_norm": 0.3460358679294586, + "learning_rate": 1.091476530541943e-05, + "loss": 0.502, + "step": 34350 + }, + { + "epoch": 0.9431905546403075, + "grad_norm": 0.4164578914642334, + "learning_rate": 1.0914335221986511e-05, + "loss": 0.4873, + "step": 34351 + }, + { + "epoch": 0.943218012081274, + "grad_norm": 0.4739196300506592, + "learning_rate": 1.0913905136848064e-05, + "loss": 0.6225, + "step": 34352 + }, + { + "epoch": 0.9432454695222405, + "grad_norm": 0.4238758981227875, + "learning_rate": 1.0913475050004893e-05, + "loss": 0.5059, + "step": 34353 + }, + { + "epoch": 0.9432729269632071, + "grad_norm": 0.36139002442359924, + "learning_rate": 1.0913044961457799e-05, + "loss": 0.4647, + "step": 34354 + }, + { + "epoch": 0.9433003844041735, + "grad_norm": 0.38505256175994873, + "learning_rate": 1.0912614871207586e-05, + "loss": 0.5057, + "step": 34355 + }, + { + "epoch": 0.9433278418451401, + "grad_norm": 0.3610764443874359, + "learning_rate": 1.0912184779255053e-05, + "loss": 0.5094, + "step": 34356 + }, + { + "epoch": 0.9433552992861065, + "grad_norm": 0.4792734384536743, + "learning_rate": 1.0911754685601006e-05, + "loss": 0.5365, + "step": 34357 + }, + { + "epoch": 0.9433827567270731, + "grad_norm": 0.3834724724292755, + "learning_rate": 1.0911324590246243e-05, + "loss": 0.4327, + "step": 34358 + }, + { + "epoch": 0.9434102141680395, + "grad_norm": 0.4602283537387848, + "learning_rate": 1.091089449319157e-05, + "loss": 0.4416, + "step": 34359 + }, + { + "epoch": 0.943437671609006, + "grad_norm": 0.3879586160182953, + "learning_rate": 1.091046439443779e-05, + "loss": 0.5875, + "step": 34360 + }, + { + "epoch": 0.9434651290499726, + "grad_norm": 0.3724091947078705, + "learning_rate": 1.09100342939857e-05, + "loss": 0.4451, + "step": 34361 + }, + { + "epoch": 0.943492586490939, + "grad_norm": 0.45191749930381775, + "learning_rate": 1.0909604191836109e-05, + "loss": 0.4995, + "step": 34362 + }, + { + "epoch": 0.9435200439319056, + "grad_norm": 0.3331415057182312, + "learning_rate": 1.0909174087989816e-05, + "loss": 0.3892, + "step": 34363 + }, + { + "epoch": 0.943547501372872, + "grad_norm": 0.3681741952896118, + "learning_rate": 1.090874398244762e-05, + "loss": 0.4977, + "step": 34364 + }, + { + "epoch": 0.9435749588138386, + "grad_norm": 0.4593007266521454, + "learning_rate": 1.0908313875210327e-05, + "loss": 0.5419, + "step": 34365 + }, + { + "epoch": 0.943602416254805, + "grad_norm": 0.4323198199272156, + "learning_rate": 1.090788376627874e-05, + "loss": 0.4281, + "step": 34366 + }, + { + "epoch": 0.9436298736957716, + "grad_norm": 0.3505270183086395, + "learning_rate": 1.0907453655653659e-05, + "loss": 0.4224, + "step": 34367 + }, + { + "epoch": 0.9436573311367381, + "grad_norm": 0.3718193769454956, + "learning_rate": 1.0907023543335891e-05, + "loss": 0.4709, + "step": 34368 + }, + { + "epoch": 0.9436847885777045, + "grad_norm": 0.41437041759490967, + "learning_rate": 1.090659342932623e-05, + "loss": 0.5635, + "step": 34369 + }, + { + "epoch": 0.9437122460186711, + "grad_norm": 0.4005882740020752, + "learning_rate": 1.0906163313625485e-05, + "loss": 0.4437, + "step": 34370 + }, + { + "epoch": 0.9437397034596375, + "grad_norm": 0.3827400505542755, + "learning_rate": 1.0905733196234454e-05, + "loss": 0.4833, + "step": 34371 + }, + { + "epoch": 0.9437671609006041, + "grad_norm": 0.4193068742752075, + "learning_rate": 1.0905303077153944e-05, + "loss": 0.469, + "step": 34372 + }, + { + "epoch": 0.9437946183415705, + "grad_norm": 0.3999626636505127, + "learning_rate": 1.0904872956384753e-05, + "loss": 0.4577, + "step": 34373 + }, + { + "epoch": 0.9438220757825371, + "grad_norm": 0.3991954028606415, + "learning_rate": 1.0904442833927686e-05, + "loss": 0.5321, + "step": 34374 + }, + { + "epoch": 0.9438495332235036, + "grad_norm": 0.4415748417377472, + "learning_rate": 1.0904012709783547e-05, + "loss": 0.5468, + "step": 34375 + }, + { + "epoch": 0.9438769906644701, + "grad_norm": 0.5460970997810364, + "learning_rate": 1.0903582583953132e-05, + "loss": 0.5767, + "step": 34376 + }, + { + "epoch": 0.9439044481054366, + "grad_norm": 0.3361993730068207, + "learning_rate": 1.0903152456437249e-05, + "loss": 0.493, + "step": 34377 + }, + { + "epoch": 0.943931905546403, + "grad_norm": 0.48174232244491577, + "learning_rate": 1.0902722327236697e-05, + "loss": 0.5659, + "step": 34378 + }, + { + "epoch": 0.9439593629873696, + "grad_norm": 0.6249967813491821, + "learning_rate": 1.090229219635228e-05, + "loss": 0.4965, + "step": 34379 + }, + { + "epoch": 0.943986820428336, + "grad_norm": 0.36034998297691345, + "learning_rate": 1.0901862063784802e-05, + "loss": 0.4334, + "step": 34380 + }, + { + "epoch": 0.9440142778693026, + "grad_norm": 0.38194742798805237, + "learning_rate": 1.090143192953506e-05, + "loss": 0.4557, + "step": 34381 + }, + { + "epoch": 0.9440417353102691, + "grad_norm": 0.38510337471961975, + "learning_rate": 1.0901001793603863e-05, + "loss": 0.5041, + "step": 34382 + }, + { + "epoch": 0.9440691927512356, + "grad_norm": 0.38397330045700073, + "learning_rate": 1.0900571655992007e-05, + "loss": 0.4619, + "step": 34383 + }, + { + "epoch": 0.9440966501922021, + "grad_norm": 0.3415156900882721, + "learning_rate": 1.09001415167003e-05, + "loss": 0.4541, + "step": 34384 + }, + { + "epoch": 0.9441241076331686, + "grad_norm": 0.37674885988235474, + "learning_rate": 1.0899711375729543e-05, + "loss": 0.4509, + "step": 34385 + }, + { + "epoch": 0.9441515650741351, + "grad_norm": 0.40644222497940063, + "learning_rate": 1.0899281233080535e-05, + "loss": 0.4911, + "step": 34386 + }, + { + "epoch": 0.9441790225151016, + "grad_norm": 0.5093401074409485, + "learning_rate": 1.0898851088754082e-05, + "loss": 0.5369, + "step": 34387 + }, + { + "epoch": 0.9442064799560681, + "grad_norm": 0.41910773515701294, + "learning_rate": 1.0898420942750982e-05, + "loss": 0.5758, + "step": 34388 + }, + { + "epoch": 0.9442339373970347, + "grad_norm": 0.4840419590473175, + "learning_rate": 1.0897990795072042e-05, + "loss": 0.5092, + "step": 34389 + }, + { + "epoch": 0.9442613948380011, + "grad_norm": 0.443649023771286, + "learning_rate": 1.0897560645718064e-05, + "loss": 0.4409, + "step": 34390 + }, + { + "epoch": 0.9442888522789676, + "grad_norm": 0.4136585593223572, + "learning_rate": 1.0897130494689845e-05, + "loss": 0.5634, + "step": 34391 + }, + { + "epoch": 0.9443163097199341, + "grad_norm": 0.37525373697280884, + "learning_rate": 1.0896700341988194e-05, + "loss": 0.4666, + "step": 34392 + }, + { + "epoch": 0.9443437671609006, + "grad_norm": 0.43561896681785583, + "learning_rate": 1.089627018761391e-05, + "loss": 0.5693, + "step": 34393 + }, + { + "epoch": 0.9443712246018671, + "grad_norm": 0.3583577871322632, + "learning_rate": 1.0895840031567798e-05, + "loss": 0.5314, + "step": 34394 + }, + { + "epoch": 0.9443986820428336, + "grad_norm": 0.4119987189769745, + "learning_rate": 1.0895409873850654e-05, + "loss": 0.4923, + "step": 34395 + }, + { + "epoch": 0.9444261394838002, + "grad_norm": 0.40650293231010437, + "learning_rate": 1.0894979714463288e-05, + "loss": 0.4935, + "step": 34396 + }, + { + "epoch": 0.9444535969247666, + "grad_norm": 0.3826306462287903, + "learning_rate": 1.0894549553406499e-05, + "loss": 0.5094, + "step": 34397 + }, + { + "epoch": 0.9444810543657332, + "grad_norm": 0.42364734411239624, + "learning_rate": 1.089411939068109e-05, + "loss": 0.5203, + "step": 34398 + }, + { + "epoch": 0.9445085118066996, + "grad_norm": 0.42609867453575134, + "learning_rate": 1.0893689226287863e-05, + "loss": 0.4878, + "step": 34399 + }, + { + "epoch": 0.9445359692476661, + "grad_norm": 0.39209553599357605, + "learning_rate": 1.089325906022762e-05, + "loss": 0.5345, + "step": 34400 + }, + { + "epoch": 0.9445634266886326, + "grad_norm": 0.4364708960056305, + "learning_rate": 1.0892828892501161e-05, + "loss": 0.4793, + "step": 34401 + }, + { + "epoch": 0.9445908841295991, + "grad_norm": 0.41138479113578796, + "learning_rate": 1.0892398723109295e-05, + "loss": 0.5339, + "step": 34402 + }, + { + "epoch": 0.9446183415705657, + "grad_norm": 0.49692976474761963, + "learning_rate": 1.0891968552052821e-05, + "loss": 0.5082, + "step": 34403 + }, + { + "epoch": 0.9446457990115321, + "grad_norm": 0.393375039100647, + "learning_rate": 1.0891538379332536e-05, + "loss": 0.4356, + "step": 34404 + }, + { + "epoch": 0.9446732564524987, + "grad_norm": 0.39509040117263794, + "learning_rate": 1.0891108204949252e-05, + "loss": 0.4949, + "step": 34405 + }, + { + "epoch": 0.9447007138934651, + "grad_norm": 0.36306121945381165, + "learning_rate": 1.0890678028903765e-05, + "loss": 0.4716, + "step": 34406 + }, + { + "epoch": 0.9447281713344317, + "grad_norm": 0.4500408470630646, + "learning_rate": 1.0890247851196878e-05, + "loss": 0.4417, + "step": 34407 + }, + { + "epoch": 0.9447556287753981, + "grad_norm": 0.4679945707321167, + "learning_rate": 1.0889817671829397e-05, + "loss": 0.5257, + "step": 34408 + }, + { + "epoch": 0.9447830862163646, + "grad_norm": 0.4572781026363373, + "learning_rate": 1.0889387490802122e-05, + "loss": 0.514, + "step": 34409 + }, + { + "epoch": 0.9448105436573312, + "grad_norm": 0.42408308386802673, + "learning_rate": 1.0888957308115855e-05, + "loss": 0.5051, + "step": 34410 + }, + { + "epoch": 0.9448380010982976, + "grad_norm": 0.41350463032722473, + "learning_rate": 1.0888527123771398e-05, + "loss": 0.5055, + "step": 34411 + }, + { + "epoch": 0.9448654585392642, + "grad_norm": 0.41754159331321716, + "learning_rate": 1.0888096937769555e-05, + "loss": 0.4847, + "step": 34412 + }, + { + "epoch": 0.9448929159802306, + "grad_norm": 0.41462400555610657, + "learning_rate": 1.0887666750111127e-05, + "loss": 0.5183, + "step": 34413 + }, + { + "epoch": 0.9449203734211972, + "grad_norm": 0.4070112109184265, + "learning_rate": 1.0887236560796916e-05, + "loss": 0.4307, + "step": 34414 + }, + { + "epoch": 0.9449478308621636, + "grad_norm": 0.40161454677581787, + "learning_rate": 1.088680636982773e-05, + "loss": 0.5281, + "step": 34415 + }, + { + "epoch": 0.9449752883031302, + "grad_norm": 0.3873355984687805, + "learning_rate": 1.0886376177204365e-05, + "loss": 0.5526, + "step": 34416 + }, + { + "epoch": 0.9450027457440966, + "grad_norm": 0.3865630030632019, + "learning_rate": 1.0885945982927623e-05, + "loss": 0.4048, + "step": 34417 + }, + { + "epoch": 0.9450302031850631, + "grad_norm": 0.4034617245197296, + "learning_rate": 1.088551578699831e-05, + "loss": 0.5144, + "step": 34418 + }, + { + "epoch": 0.9450576606260297, + "grad_norm": 0.38899025321006775, + "learning_rate": 1.088508558941723e-05, + "loss": 0.447, + "step": 34419 + }, + { + "epoch": 0.9450851180669961, + "grad_norm": 0.43000245094299316, + "learning_rate": 1.0884655390185181e-05, + "loss": 0.4331, + "step": 34420 + }, + { + "epoch": 0.9451125755079627, + "grad_norm": 0.4323467016220093, + "learning_rate": 1.0884225189302968e-05, + "loss": 0.4963, + "step": 34421 + }, + { + "epoch": 0.9451400329489291, + "grad_norm": 0.4045164883136749, + "learning_rate": 1.0883794986771392e-05, + "loss": 0.4927, + "step": 34422 + }, + { + "epoch": 0.9451674903898957, + "grad_norm": 0.3759559690952301, + "learning_rate": 1.0883364782591256e-05, + "loss": 0.4532, + "step": 34423 + }, + { + "epoch": 0.9451949478308621, + "grad_norm": 0.3807232975959778, + "learning_rate": 1.0882934576763367e-05, + "loss": 0.5352, + "step": 34424 + }, + { + "epoch": 0.9452224052718287, + "grad_norm": 0.42324236035346985, + "learning_rate": 1.0882504369288519e-05, + "loss": 0.5165, + "step": 34425 + }, + { + "epoch": 0.9452498627127952, + "grad_norm": 0.4306406080722809, + "learning_rate": 1.0882074160167518e-05, + "loss": 0.483, + "step": 34426 + }, + { + "epoch": 0.9452773201537616, + "grad_norm": 0.5811260938644409, + "learning_rate": 1.0881643949401169e-05, + "loss": 0.4887, + "step": 34427 + }, + { + "epoch": 0.9453047775947282, + "grad_norm": 0.39248019456863403, + "learning_rate": 1.088121373699027e-05, + "loss": 0.5391, + "step": 34428 + }, + { + "epoch": 0.9453322350356946, + "grad_norm": 0.3769646883010864, + "learning_rate": 1.0880783522935632e-05, + "loss": 0.475, + "step": 34429 + }, + { + "epoch": 0.9453596924766612, + "grad_norm": 0.43171361088752747, + "learning_rate": 1.0880353307238045e-05, + "loss": 0.4915, + "step": 34430 + }, + { + "epoch": 0.9453871499176276, + "grad_norm": 0.40790534019470215, + "learning_rate": 1.0879923089898322e-05, + "loss": 0.4766, + "step": 34431 + }, + { + "epoch": 0.9454146073585942, + "grad_norm": 0.43836453557014465, + "learning_rate": 1.0879492870917258e-05, + "loss": 0.4892, + "step": 34432 + }, + { + "epoch": 0.9454420647995607, + "grad_norm": 0.3763732612133026, + "learning_rate": 1.0879062650295664e-05, + "loss": 0.4055, + "step": 34433 + }, + { + "epoch": 0.9454695222405272, + "grad_norm": 0.39247700572013855, + "learning_rate": 1.0878632428034338e-05, + "loss": 0.5451, + "step": 34434 + }, + { + "epoch": 0.9454969796814937, + "grad_norm": 0.36050668358802795, + "learning_rate": 1.0878202204134075e-05, + "loss": 0.4928, + "step": 34435 + }, + { + "epoch": 0.9455244371224601, + "grad_norm": 0.38116422295570374, + "learning_rate": 1.0877771978595692e-05, + "loss": 0.4784, + "step": 34436 + }, + { + "epoch": 0.9455518945634267, + "grad_norm": 0.4020107090473175, + "learning_rate": 1.087734175141998e-05, + "loss": 0.4782, + "step": 34437 + }, + { + "epoch": 0.9455793520043931, + "grad_norm": 0.35953962802886963, + "learning_rate": 1.0876911522607747e-05, + "loss": 0.4318, + "step": 34438 + }, + { + "epoch": 0.9456068094453597, + "grad_norm": 0.38134926557540894, + "learning_rate": 1.0876481292159795e-05, + "loss": 0.4545, + "step": 34439 + }, + { + "epoch": 0.9456342668863262, + "grad_norm": 0.36404499411582947, + "learning_rate": 1.0876051060076921e-05, + "loss": 0.433, + "step": 34440 + }, + { + "epoch": 0.9456617243272927, + "grad_norm": 0.3868614137172699, + "learning_rate": 1.0875620826359939e-05, + "loss": 0.5758, + "step": 34441 + }, + { + "epoch": 0.9456891817682592, + "grad_norm": 0.39338114857673645, + "learning_rate": 1.0875190591009641e-05, + "loss": 0.5147, + "step": 34442 + }, + { + "epoch": 0.9457166392092257, + "grad_norm": 0.36203157901763916, + "learning_rate": 1.0874760354026833e-05, + "loss": 0.5015, + "step": 34443 + }, + { + "epoch": 0.9457440966501922, + "grad_norm": 0.412622332572937, + "learning_rate": 1.0874330115412319e-05, + "loss": 0.5225, + "step": 34444 + }, + { + "epoch": 0.9457715540911587, + "grad_norm": 0.4001764953136444, + "learning_rate": 1.0873899875166898e-05, + "loss": 0.5224, + "step": 34445 + }, + { + "epoch": 0.9457990115321252, + "grad_norm": 0.4492122232913971, + "learning_rate": 1.0873469633291378e-05, + "loss": 0.4715, + "step": 34446 + }, + { + "epoch": 0.9458264689730917, + "grad_norm": 0.3963572680950165, + "learning_rate": 1.0873039389786557e-05, + "loss": 0.5476, + "step": 34447 + }, + { + "epoch": 0.9458539264140582, + "grad_norm": 0.38389208912849426, + "learning_rate": 1.0872609144653239e-05, + "loss": 0.4593, + "step": 34448 + }, + { + "epoch": 0.9458813838550247, + "grad_norm": 0.4157109260559082, + "learning_rate": 1.0872178897892228e-05, + "loss": 0.505, + "step": 34449 + }, + { + "epoch": 0.9459088412959912, + "grad_norm": 0.35179412364959717, + "learning_rate": 1.0871748649504321e-05, + "loss": 0.5026, + "step": 34450 + }, + { + "epoch": 0.9459362987369577, + "grad_norm": 0.3608151078224182, + "learning_rate": 1.0871318399490328e-05, + "loss": 0.4851, + "step": 34451 + }, + { + "epoch": 0.9459637561779242, + "grad_norm": 0.38519468903541565, + "learning_rate": 1.0870888147851046e-05, + "loss": 0.5254, + "step": 34452 + }, + { + "epoch": 0.9459912136188907, + "grad_norm": 0.3617476224899292, + "learning_rate": 1.0870457894587279e-05, + "loss": 0.5086, + "step": 34453 + }, + { + "epoch": 0.9460186710598573, + "grad_norm": 0.44527456164360046, + "learning_rate": 1.0870027639699834e-05, + "loss": 0.5188, + "step": 34454 + }, + { + "epoch": 0.9460461285008237, + "grad_norm": 0.39935028553009033, + "learning_rate": 1.0869597383189508e-05, + "loss": 0.4663, + "step": 34455 + }, + { + "epoch": 0.9460735859417903, + "grad_norm": 0.3997553884983063, + "learning_rate": 1.0869167125057104e-05, + "loss": 0.4284, + "step": 34456 + }, + { + "epoch": 0.9461010433827567, + "grad_norm": 0.4336923658847809, + "learning_rate": 1.0868736865303426e-05, + "loss": 0.4941, + "step": 34457 + }, + { + "epoch": 0.9461285008237232, + "grad_norm": 0.3632863461971283, + "learning_rate": 1.0868306603929276e-05, + "loss": 0.5155, + "step": 34458 + }, + { + "epoch": 0.9461559582646897, + "grad_norm": 0.4428729712963104, + "learning_rate": 1.086787634093546e-05, + "loss": 0.4743, + "step": 34459 + }, + { + "epoch": 0.9461834157056562, + "grad_norm": 0.4982559382915497, + "learning_rate": 1.0867446076322776e-05, + "loss": 0.5912, + "step": 34460 + }, + { + "epoch": 0.9462108731466228, + "grad_norm": 0.4042717516422272, + "learning_rate": 1.0867015810092025e-05, + "loss": 0.4501, + "step": 34461 + }, + { + "epoch": 0.9462383305875892, + "grad_norm": 0.3389650881290436, + "learning_rate": 1.0866585542244017e-05, + "loss": 0.4894, + "step": 34462 + }, + { + "epoch": 0.9462657880285558, + "grad_norm": 0.41241028904914856, + "learning_rate": 1.0866155272779549e-05, + "loss": 0.5685, + "step": 34463 + }, + { + "epoch": 0.9462932454695222, + "grad_norm": 0.5016523003578186, + "learning_rate": 1.0865725001699426e-05, + "loss": 0.5448, + "step": 34464 + }, + { + "epoch": 0.9463207029104888, + "grad_norm": 0.4708002209663391, + "learning_rate": 1.086529472900445e-05, + "loss": 0.5423, + "step": 34465 + }, + { + "epoch": 0.9463481603514552, + "grad_norm": 0.39178112149238586, + "learning_rate": 1.086486445469542e-05, + "loss": 0.4177, + "step": 34466 + }, + { + "epoch": 0.9463756177924217, + "grad_norm": 0.4410799443721771, + "learning_rate": 1.0864434178773144e-05, + "loss": 0.5192, + "step": 34467 + }, + { + "epoch": 0.9464030752333883, + "grad_norm": 0.37554121017456055, + "learning_rate": 1.0864003901238422e-05, + "loss": 0.4589, + "step": 34468 + }, + { + "epoch": 0.9464305326743547, + "grad_norm": 0.4408629536628723, + "learning_rate": 1.0863573622092055e-05, + "loss": 0.5092, + "step": 34469 + }, + { + "epoch": 0.9464579901153213, + "grad_norm": 0.3760436773300171, + "learning_rate": 1.0863143341334852e-05, + "loss": 0.5242, + "step": 34470 + }, + { + "epoch": 0.9464854475562877, + "grad_norm": 0.3909115493297577, + "learning_rate": 1.0862713058967609e-05, + "loss": 0.5372, + "step": 34471 + }, + { + "epoch": 0.9465129049972543, + "grad_norm": 0.391248881816864, + "learning_rate": 1.0862282774991132e-05, + "loss": 0.5147, + "step": 34472 + }, + { + "epoch": 0.9465403624382207, + "grad_norm": 0.3821565806865692, + "learning_rate": 1.086185248940622e-05, + "loss": 0.4907, + "step": 34473 + }, + { + "epoch": 0.9465678198791873, + "grad_norm": 0.3716680109500885, + "learning_rate": 1.0861422202213679e-05, + "loss": 0.4957, + "step": 34474 + }, + { + "epoch": 0.9465952773201538, + "grad_norm": 0.37615591287612915, + "learning_rate": 1.086099191341431e-05, + "loss": 0.4607, + "step": 34475 + }, + { + "epoch": 0.9466227347611202, + "grad_norm": 0.39856359362602234, + "learning_rate": 1.0860561623008917e-05, + "loss": 0.4648, + "step": 34476 + }, + { + "epoch": 0.9466501922020868, + "grad_norm": 0.37461385130882263, + "learning_rate": 1.0860131330998303e-05, + "loss": 0.4865, + "step": 34477 + }, + { + "epoch": 0.9466776496430532, + "grad_norm": 0.39435747265815735, + "learning_rate": 1.0859701037383269e-05, + "loss": 0.5698, + "step": 34478 + }, + { + "epoch": 0.9467051070840198, + "grad_norm": 0.4208971858024597, + "learning_rate": 1.0859270742164616e-05, + "loss": 0.4537, + "step": 34479 + }, + { + "epoch": 0.9467325645249862, + "grad_norm": 0.37801435589790344, + "learning_rate": 1.085884044534315e-05, + "loss": 0.4694, + "step": 34480 + }, + { + "epoch": 0.9467600219659528, + "grad_norm": 0.3790411949157715, + "learning_rate": 1.0858410146919674e-05, + "loss": 0.4308, + "step": 34481 + }, + { + "epoch": 0.9467874794069193, + "grad_norm": 0.37484246492385864, + "learning_rate": 1.0857979846894985e-05, + "loss": 0.478, + "step": 34482 + }, + { + "epoch": 0.9468149368478858, + "grad_norm": 0.3938000202178955, + "learning_rate": 1.0857549545269897e-05, + "loss": 0.4706, + "step": 34483 + }, + { + "epoch": 0.9468423942888523, + "grad_norm": 0.37731337547302246, + "learning_rate": 1.0857119242045198e-05, + "loss": 0.5506, + "step": 34484 + }, + { + "epoch": 0.9468698517298187, + "grad_norm": 0.481862872838974, + "learning_rate": 1.0856688937221701e-05, + "loss": 0.4481, + "step": 34485 + }, + { + "epoch": 0.9468973091707853, + "grad_norm": 0.3912605941295624, + "learning_rate": 1.0856258630800208e-05, + "loss": 0.5244, + "step": 34486 + }, + { + "epoch": 0.9469247666117517, + "grad_norm": 0.35409078001976013, + "learning_rate": 1.0855828322781517e-05, + "loss": 0.4708, + "step": 34487 + }, + { + "epoch": 0.9469522240527183, + "grad_norm": 0.37956178188323975, + "learning_rate": 1.0855398013166433e-05, + "loss": 0.4735, + "step": 34488 + }, + { + "epoch": 0.9469796814936848, + "grad_norm": 0.35397493839263916, + "learning_rate": 1.0854967701955758e-05, + "loss": 0.467, + "step": 34489 + }, + { + "epoch": 0.9470071389346513, + "grad_norm": 0.3804638087749481, + "learning_rate": 1.0854537389150298e-05, + "loss": 0.359, + "step": 34490 + }, + { + "epoch": 0.9470345963756178, + "grad_norm": 0.3835681080818176, + "learning_rate": 1.085410707475085e-05, + "loss": 0.4271, + "step": 34491 + }, + { + "epoch": 0.9470620538165843, + "grad_norm": 0.37310591340065, + "learning_rate": 1.085367675875822e-05, + "loss": 0.471, + "step": 34492 + }, + { + "epoch": 0.9470895112575508, + "grad_norm": 0.39213475584983826, + "learning_rate": 1.0853246441173213e-05, + "loss": 0.5181, + "step": 34493 + }, + { + "epoch": 0.9471169686985172, + "grad_norm": 0.4634478688240051, + "learning_rate": 1.0852816121996625e-05, + "loss": 0.5628, + "step": 34494 + }, + { + "epoch": 0.9471444261394838, + "grad_norm": 0.37715181708335876, + "learning_rate": 1.0852385801229268e-05, + "loss": 0.5188, + "step": 34495 + }, + { + "epoch": 0.9471718835804503, + "grad_norm": 0.37696731090545654, + "learning_rate": 1.0851955478871936e-05, + "loss": 0.4275, + "step": 34496 + }, + { + "epoch": 0.9471993410214168, + "grad_norm": 0.35155847668647766, + "learning_rate": 1.0851525154925436e-05, + "loss": 0.4431, + "step": 34497 + }, + { + "epoch": 0.9472267984623833, + "grad_norm": 0.3785460889339447, + "learning_rate": 1.085109482939057e-05, + "loss": 0.4827, + "step": 34498 + }, + { + "epoch": 0.9472542559033498, + "grad_norm": 0.4514460563659668, + "learning_rate": 1.085066450226814e-05, + "loss": 0.5107, + "step": 34499 + }, + { + "epoch": 0.9472817133443163, + "grad_norm": 0.3865106403827667, + "learning_rate": 1.085023417355895e-05, + "loss": 0.4409, + "step": 34500 + }, + { + "epoch": 0.9473091707852828, + "grad_norm": 0.40282949805259705, + "learning_rate": 1.0849803843263802e-05, + "loss": 0.5318, + "step": 34501 + }, + { + "epoch": 0.9473366282262493, + "grad_norm": 0.5125645399093628, + "learning_rate": 1.0849373511383497e-05, + "loss": 0.5474, + "step": 34502 + }, + { + "epoch": 0.9473640856672159, + "grad_norm": 0.3417424261569977, + "learning_rate": 1.084894317791884e-05, + "loss": 0.4402, + "step": 34503 + }, + { + "epoch": 0.9473915431081823, + "grad_norm": 0.3710900843143463, + "learning_rate": 1.0848512842870633e-05, + "loss": 0.4719, + "step": 34504 + }, + { + "epoch": 0.9474190005491488, + "grad_norm": 0.42811399698257446, + "learning_rate": 1.084808250623968e-05, + "loss": 0.5406, + "step": 34505 + }, + { + "epoch": 0.9474464579901153, + "grad_norm": 0.37875255942344666, + "learning_rate": 1.0847652168026781e-05, + "loss": 0.4712, + "step": 34506 + }, + { + "epoch": 0.9474739154310818, + "grad_norm": 0.4103841483592987, + "learning_rate": 1.0847221828232738e-05, + "loss": 0.5172, + "step": 34507 + }, + { + "epoch": 0.9475013728720483, + "grad_norm": 0.4241507053375244, + "learning_rate": 1.084679148685836e-05, + "loss": 0.5843, + "step": 34508 + }, + { + "epoch": 0.9475288303130148, + "grad_norm": 0.46203652024269104, + "learning_rate": 1.0846361143904445e-05, + "loss": 0.4832, + "step": 34509 + }, + { + "epoch": 0.9475562877539814, + "grad_norm": 0.36751148104667664, + "learning_rate": 1.0845930799371793e-05, + "loss": 0.5307, + "step": 34510 + }, + { + "epoch": 0.9475837451949478, + "grad_norm": 0.3751312792301178, + "learning_rate": 1.0845500453261213e-05, + "loss": 0.5214, + "step": 34511 + }, + { + "epoch": 0.9476112026359144, + "grad_norm": 0.3261478543281555, + "learning_rate": 1.0845070105573502e-05, + "loss": 0.3886, + "step": 34512 + }, + { + "epoch": 0.9476386600768808, + "grad_norm": 0.5468853116035461, + "learning_rate": 1.084463975630947e-05, + "loss": 0.5087, + "step": 34513 + }, + { + "epoch": 0.9476661175178474, + "grad_norm": 0.4350268840789795, + "learning_rate": 1.0844209405469915e-05, + "loss": 0.5036, + "step": 34514 + }, + { + "epoch": 0.9476935749588138, + "grad_norm": 0.5203311443328857, + "learning_rate": 1.0843779053055637e-05, + "loss": 0.4999, + "step": 34515 + }, + { + "epoch": 0.9477210323997803, + "grad_norm": 0.41822293400764465, + "learning_rate": 1.0843348699067441e-05, + "loss": 0.4987, + "step": 34516 + }, + { + "epoch": 0.9477484898407469, + "grad_norm": 0.39376115798950195, + "learning_rate": 1.0842918343506135e-05, + "loss": 0.4959, + "step": 34517 + }, + { + "epoch": 0.9477759472817133, + "grad_norm": 0.3882836699485779, + "learning_rate": 1.0842487986372514e-05, + "loss": 0.4225, + "step": 34518 + }, + { + "epoch": 0.9478034047226799, + "grad_norm": 0.4086126387119293, + "learning_rate": 1.0842057627667384e-05, + "loss": 0.5251, + "step": 34519 + }, + { + "epoch": 0.9478308621636463, + "grad_norm": 0.41174447536468506, + "learning_rate": 1.0841627267391547e-05, + "loss": 0.5245, + "step": 34520 + }, + { + "epoch": 0.9478583196046129, + "grad_norm": 0.40575921535491943, + "learning_rate": 1.084119690554581e-05, + "loss": 0.4772, + "step": 34521 + }, + { + "epoch": 0.9478857770455793, + "grad_norm": 0.3806719481945038, + "learning_rate": 1.0840766542130971e-05, + "loss": 0.5257, + "step": 34522 + }, + { + "epoch": 0.9479132344865459, + "grad_norm": 0.42496222257614136, + "learning_rate": 1.0840336177147832e-05, + "loss": 0.5608, + "step": 34523 + }, + { + "epoch": 0.9479406919275124, + "grad_norm": 0.3672117590904236, + "learning_rate": 1.08399058105972e-05, + "loss": 0.4846, + "step": 34524 + }, + { + "epoch": 0.9479681493684788, + "grad_norm": 0.42602258920669556, + "learning_rate": 1.0839475442479873e-05, + "loss": 0.5212, + "step": 34525 + }, + { + "epoch": 0.9479956068094454, + "grad_norm": 0.5399554371833801, + "learning_rate": 1.0839045072796658e-05, + "loss": 0.4987, + "step": 34526 + }, + { + "epoch": 0.9480230642504118, + "grad_norm": 0.926740288734436, + "learning_rate": 1.0838614701548357e-05, + "loss": 0.541, + "step": 34527 + }, + { + "epoch": 0.9480505216913784, + "grad_norm": 0.411208838224411, + "learning_rate": 1.083818432873577e-05, + "loss": 0.4973, + "step": 34528 + }, + { + "epoch": 0.9480779791323448, + "grad_norm": 0.4116266965866089, + "learning_rate": 1.0837753954359704e-05, + "loss": 0.4954, + "step": 34529 + }, + { + "epoch": 0.9481054365733114, + "grad_norm": 0.4084867835044861, + "learning_rate": 1.0837323578420958e-05, + "loss": 0.5325, + "step": 34530 + }, + { + "epoch": 0.9481328940142779, + "grad_norm": 0.3790270686149597, + "learning_rate": 1.0836893200920337e-05, + "loss": 0.4421, + "step": 34531 + }, + { + "epoch": 0.9481603514552444, + "grad_norm": 0.33989396691322327, + "learning_rate": 1.0836462821858645e-05, + "loss": 0.5026, + "step": 34532 + }, + { + "epoch": 0.9481878088962109, + "grad_norm": 0.4065544307231903, + "learning_rate": 1.0836032441236677e-05, + "loss": 0.4216, + "step": 34533 + }, + { + "epoch": 0.9482152663371773, + "grad_norm": 0.388295978307724, + "learning_rate": 1.0835602059055248e-05, + "loss": 0.5044, + "step": 34534 + }, + { + "epoch": 0.9482427237781439, + "grad_norm": 0.35684844851493835, + "learning_rate": 1.083517167531515e-05, + "loss": 0.3939, + "step": 34535 + }, + { + "epoch": 0.9482701812191103, + "grad_norm": 0.38088706135749817, + "learning_rate": 1.0834741290017192e-05, + "loss": 0.5185, + "step": 34536 + }, + { + "epoch": 0.9482976386600769, + "grad_norm": 0.4406750500202179, + "learning_rate": 1.0834310903162173e-05, + "loss": 0.4802, + "step": 34537 + }, + { + "epoch": 0.9483250961010434, + "grad_norm": 0.35414063930511475, + "learning_rate": 1.08338805147509e-05, + "loss": 0.3912, + "step": 34538 + }, + { + "epoch": 0.9483525535420099, + "grad_norm": 0.393359899520874, + "learning_rate": 1.0833450124784174e-05, + "loss": 0.468, + "step": 34539 + }, + { + "epoch": 0.9483800109829764, + "grad_norm": 0.4828825294971466, + "learning_rate": 1.0833019733262797e-05, + "loss": 0.4854, + "step": 34540 + }, + { + "epoch": 0.9484074684239429, + "grad_norm": 0.4260897934436798, + "learning_rate": 1.0832589340187573e-05, + "loss": 0.497, + "step": 34541 + }, + { + "epoch": 0.9484349258649094, + "grad_norm": 0.41364359855651855, + "learning_rate": 1.0832158945559304e-05, + "loss": 0.4826, + "step": 34542 + }, + { + "epoch": 0.9484623833058758, + "grad_norm": 0.401533305644989, + "learning_rate": 1.0831728549378793e-05, + "loss": 0.4692, + "step": 34543 + }, + { + "epoch": 0.9484898407468424, + "grad_norm": 0.512458324432373, + "learning_rate": 1.0831298151646843e-05, + "loss": 0.4722, + "step": 34544 + }, + { + "epoch": 0.9485172981878089, + "grad_norm": 0.39818015694618225, + "learning_rate": 1.0830867752364255e-05, + "loss": 0.5466, + "step": 34545 + }, + { + "epoch": 0.9485447556287754, + "grad_norm": 0.4256613850593567, + "learning_rate": 1.0830437351531834e-05, + "loss": 0.594, + "step": 34546 + }, + { + "epoch": 0.9485722130697419, + "grad_norm": 0.36860036849975586, + "learning_rate": 1.0830006949150382e-05, + "loss": 0.4862, + "step": 34547 + }, + { + "epoch": 0.9485996705107084, + "grad_norm": 0.43739053606987, + "learning_rate": 1.0829576545220703e-05, + "loss": 0.5095, + "step": 34548 + }, + { + "epoch": 0.9486271279516749, + "grad_norm": 0.37709423899650574, + "learning_rate": 1.0829146139743601e-05, + "loss": 0.5352, + "step": 34549 + }, + { + "epoch": 0.9486545853926414, + "grad_norm": 0.4190336763858795, + "learning_rate": 1.0828715732719874e-05, + "loss": 0.4322, + "step": 34550 + }, + { + "epoch": 0.9486820428336079, + "grad_norm": 0.34288254380226135, + "learning_rate": 1.0828285324150329e-05, + "loss": 0.4426, + "step": 34551 + }, + { + "epoch": 0.9487095002745745, + "grad_norm": 0.38621872663497925, + "learning_rate": 1.0827854914035765e-05, + "loss": 0.5352, + "step": 34552 + }, + { + "epoch": 0.9487369577155409, + "grad_norm": 0.3894032835960388, + "learning_rate": 1.082742450237699e-05, + "loss": 0.5133, + "step": 34553 + }, + { + "epoch": 0.9487644151565074, + "grad_norm": 0.34158602356910706, + "learning_rate": 1.0826994089174802e-05, + "loss": 0.4923, + "step": 34554 + }, + { + "epoch": 0.9487918725974739, + "grad_norm": 0.41160640120506287, + "learning_rate": 1.0826563674430009e-05, + "loss": 0.4881, + "step": 34555 + }, + { + "epoch": 0.9488193300384404, + "grad_norm": 0.40888187289237976, + "learning_rate": 1.082613325814341e-05, + "loss": 0.5437, + "step": 34556 + }, + { + "epoch": 0.9488467874794069, + "grad_norm": 0.39908212423324585, + "learning_rate": 1.0825702840315807e-05, + "loss": 0.5945, + "step": 34557 + }, + { + "epoch": 0.9488742449203734, + "grad_norm": 0.3669831454753876, + "learning_rate": 1.0825272420948007e-05, + "loss": 0.4953, + "step": 34558 + }, + { + "epoch": 0.94890170236134, + "grad_norm": 0.38587436079978943, + "learning_rate": 1.0824842000040807e-05, + "loss": 0.4642, + "step": 34559 + }, + { + "epoch": 0.9489291598023064, + "grad_norm": 1.354231834411621, + "learning_rate": 1.0824411577595017e-05, + "loss": 0.5105, + "step": 34560 + }, + { + "epoch": 0.948956617243273, + "grad_norm": 0.4161090850830078, + "learning_rate": 1.0823981153611437e-05, + "loss": 0.5473, + "step": 34561 + }, + { + "epoch": 0.9489840746842394, + "grad_norm": 0.5999234914779663, + "learning_rate": 1.0823550728090865e-05, + "loss": 0.5215, + "step": 34562 + }, + { + "epoch": 0.949011532125206, + "grad_norm": 0.4099833071231842, + "learning_rate": 1.0823120301034111e-05, + "loss": 0.5335, + "step": 34563 + }, + { + "epoch": 0.9490389895661724, + "grad_norm": 0.4114846885204315, + "learning_rate": 1.0822689872441974e-05, + "loss": 0.4973, + "step": 34564 + }, + { + "epoch": 0.9490664470071389, + "grad_norm": 0.42816853523254395, + "learning_rate": 1.082225944231526e-05, + "loss": 0.4918, + "step": 34565 + }, + { + "epoch": 0.9490939044481055, + "grad_norm": 0.4268355369567871, + "learning_rate": 1.0821829010654768e-05, + "loss": 0.5144, + "step": 34566 + }, + { + "epoch": 0.9491213618890719, + "grad_norm": 0.37574535608291626, + "learning_rate": 1.0821398577461302e-05, + "loss": 0.4993, + "step": 34567 + }, + { + "epoch": 0.9491488193300385, + "grad_norm": 0.37180137634277344, + "learning_rate": 1.0820968142735666e-05, + "loss": 0.519, + "step": 34568 + }, + { + "epoch": 0.9491762767710049, + "grad_norm": 0.37404707074165344, + "learning_rate": 1.082053770647866e-05, + "loss": 0.4734, + "step": 34569 + }, + { + "epoch": 0.9492037342119715, + "grad_norm": 0.4269319772720337, + "learning_rate": 1.0820107268691093e-05, + "loss": 0.5173, + "step": 34570 + }, + { + "epoch": 0.9492311916529379, + "grad_norm": 0.37533214688301086, + "learning_rate": 1.0819676829373764e-05, + "loss": 0.5355, + "step": 34571 + }, + { + "epoch": 0.9492586490939044, + "grad_norm": 0.3804558217525482, + "learning_rate": 1.0819246388527473e-05, + "loss": 0.4381, + "step": 34572 + }, + { + "epoch": 0.949286106534871, + "grad_norm": 0.5545331239700317, + "learning_rate": 1.0818815946153029e-05, + "loss": 0.5632, + "step": 34573 + }, + { + "epoch": 0.9493135639758374, + "grad_norm": 0.3963298499584198, + "learning_rate": 1.0818385502251229e-05, + "loss": 0.4793, + "step": 34574 + }, + { + "epoch": 0.949341021416804, + "grad_norm": 0.40409693121910095, + "learning_rate": 1.0817955056822882e-05, + "loss": 0.5401, + "step": 34575 + }, + { + "epoch": 0.9493684788577704, + "grad_norm": 0.38733068108558655, + "learning_rate": 1.0817524609868787e-05, + "loss": 0.489, + "step": 34576 + }, + { + "epoch": 0.949395936298737, + "grad_norm": 0.3964945375919342, + "learning_rate": 1.0817094161389747e-05, + "loss": 0.5126, + "step": 34577 + }, + { + "epoch": 0.9494233937397034, + "grad_norm": 0.4345715045928955, + "learning_rate": 1.0816663711386565e-05, + "loss": 0.5729, + "step": 34578 + }, + { + "epoch": 0.94945085118067, + "grad_norm": 0.41696643829345703, + "learning_rate": 1.0816233259860047e-05, + "loss": 0.5911, + "step": 34579 + }, + { + "epoch": 0.9494783086216365, + "grad_norm": 0.4535013735294342, + "learning_rate": 1.0815802806810991e-05, + "loss": 0.5009, + "step": 34580 + }, + { + "epoch": 0.949505766062603, + "grad_norm": 0.35992076992988586, + "learning_rate": 1.0815372352240203e-05, + "loss": 0.4447, + "step": 34581 + }, + { + "epoch": 0.9495332235035695, + "grad_norm": 0.3586016893386841, + "learning_rate": 1.0814941896148486e-05, + "loss": 0.4528, + "step": 34582 + }, + { + "epoch": 0.9495606809445359, + "grad_norm": 0.40384766459465027, + "learning_rate": 1.0814511438536641e-05, + "loss": 0.4937, + "step": 34583 + }, + { + "epoch": 0.9495881383855025, + "grad_norm": 0.4180490970611572, + "learning_rate": 1.0814080979405475e-05, + "loss": 0.5094, + "step": 34584 + }, + { + "epoch": 0.9496155958264689, + "grad_norm": 0.5167869329452515, + "learning_rate": 1.0813650518755784e-05, + "loss": 0.511, + "step": 34585 + }, + { + "epoch": 0.9496430532674355, + "grad_norm": 0.40917059779167175, + "learning_rate": 1.0813220056588379e-05, + "loss": 0.5148, + "step": 34586 + }, + { + "epoch": 0.949670510708402, + "grad_norm": 0.44848203659057617, + "learning_rate": 1.0812789592904053e-05, + "loss": 0.4817, + "step": 34587 + }, + { + "epoch": 0.9496979681493685, + "grad_norm": 0.4109098017215729, + "learning_rate": 1.0812359127703621e-05, + "loss": 0.527, + "step": 34588 + }, + { + "epoch": 0.949725425590335, + "grad_norm": 0.4482218325138092, + "learning_rate": 1.081192866098788e-05, + "loss": 0.4546, + "step": 34589 + }, + { + "epoch": 0.9497528830313015, + "grad_norm": 0.38739490509033203, + "learning_rate": 1.081149819275763e-05, + "loss": 0.5531, + "step": 34590 + }, + { + "epoch": 0.949780340472268, + "grad_norm": 0.43590620160102844, + "learning_rate": 1.0811067723013677e-05, + "loss": 0.5458, + "step": 34591 + }, + { + "epoch": 0.9498077979132344, + "grad_norm": 0.41062605381011963, + "learning_rate": 1.0810637251756826e-05, + "loss": 0.5591, + "step": 34592 + }, + { + "epoch": 0.949835255354201, + "grad_norm": 0.3849201500415802, + "learning_rate": 1.0810206778987875e-05, + "loss": 0.4577, + "step": 34593 + }, + { + "epoch": 0.9498627127951675, + "grad_norm": 0.37505054473876953, + "learning_rate": 1.0809776304707633e-05, + "loss": 0.4927, + "step": 34594 + }, + { + "epoch": 0.949890170236134, + "grad_norm": 0.3688153922557831, + "learning_rate": 1.0809345828916898e-05, + "loss": 0.4504, + "step": 34595 + }, + { + "epoch": 0.9499176276771005, + "grad_norm": 0.3523881137371063, + "learning_rate": 1.0808915351616473e-05, + "loss": 0.4086, + "step": 34596 + }, + { + "epoch": 0.949945085118067, + "grad_norm": 0.6429381370544434, + "learning_rate": 1.0808484872807165e-05, + "loss": 0.5521, + "step": 34597 + }, + { + "epoch": 0.9499725425590335, + "grad_norm": 0.40183520317077637, + "learning_rate": 1.0808054392489775e-05, + "loss": 0.4554, + "step": 34598 + }, + { + "epoch": 0.95, + "grad_norm": 0.42293375730514526, + "learning_rate": 1.0807623910665106e-05, + "loss": 0.521, + "step": 34599 + }, + { + "epoch": 0.9500274574409665, + "grad_norm": 0.5955321192741394, + "learning_rate": 1.0807193427333958e-05, + "loss": 0.5488, + "step": 34600 + }, + { + "epoch": 0.950054914881933, + "grad_norm": 0.36651739478111267, + "learning_rate": 1.0806762942497137e-05, + "loss": 0.5514, + "step": 34601 + }, + { + "epoch": 0.9500823723228995, + "grad_norm": 0.41866442561149597, + "learning_rate": 1.0806332456155448e-05, + "loss": 0.4887, + "step": 34602 + }, + { + "epoch": 0.950109829763866, + "grad_norm": 0.4281187355518341, + "learning_rate": 1.0805901968309688e-05, + "loss": 0.5025, + "step": 34603 + }, + { + "epoch": 0.9501372872048325, + "grad_norm": 0.4019439220428467, + "learning_rate": 1.0805471478960668e-05, + "loss": 0.5312, + "step": 34604 + }, + { + "epoch": 0.950164744645799, + "grad_norm": 0.35646089911460876, + "learning_rate": 1.0805040988109183e-05, + "loss": 0.3947, + "step": 34605 + }, + { + "epoch": 0.9501922020867655, + "grad_norm": 0.367939829826355, + "learning_rate": 1.0804610495756043e-05, + "loss": 0.4399, + "step": 34606 + }, + { + "epoch": 0.950219659527732, + "grad_norm": 0.4174533188343048, + "learning_rate": 1.0804180001902047e-05, + "loss": 0.4466, + "step": 34607 + }, + { + "epoch": 0.9502471169686986, + "grad_norm": 0.3951793909072876, + "learning_rate": 1.0803749506547997e-05, + "loss": 0.5332, + "step": 34608 + }, + { + "epoch": 0.950274574409665, + "grad_norm": 0.44432249665260315, + "learning_rate": 1.0803319009694698e-05, + "loss": 0.5475, + "step": 34609 + }, + { + "epoch": 0.9503020318506316, + "grad_norm": 0.37314918637275696, + "learning_rate": 1.0802888511342953e-05, + "loss": 0.4531, + "step": 34610 + }, + { + "epoch": 0.950329489291598, + "grad_norm": 0.4050174653530121, + "learning_rate": 1.0802458011493563e-05, + "loss": 0.5284, + "step": 34611 + }, + { + "epoch": 0.9503569467325645, + "grad_norm": 0.41504377126693726, + "learning_rate": 1.0802027510147336e-05, + "loss": 0.477, + "step": 34612 + }, + { + "epoch": 0.950384404173531, + "grad_norm": 0.6143400073051453, + "learning_rate": 1.0801597007305066e-05, + "loss": 0.5294, + "step": 34613 + }, + { + "epoch": 0.9504118616144975, + "grad_norm": 0.3837282061576843, + "learning_rate": 1.0801166502967569e-05, + "loss": 0.4892, + "step": 34614 + }, + { + "epoch": 0.9504393190554641, + "grad_norm": 0.38294512033462524, + "learning_rate": 1.0800735997135636e-05, + "loss": 0.4197, + "step": 34615 + }, + { + "epoch": 0.9504667764964305, + "grad_norm": 0.37327584624290466, + "learning_rate": 1.0800305489810077e-05, + "loss": 0.5724, + "step": 34616 + }, + { + "epoch": 0.9504942339373971, + "grad_norm": 0.380982905626297, + "learning_rate": 1.0799874980991691e-05, + "loss": 0.4297, + "step": 34617 + }, + { + "epoch": 0.9505216913783635, + "grad_norm": 0.3949768543243408, + "learning_rate": 1.0799444470681285e-05, + "loss": 0.4403, + "step": 34618 + }, + { + "epoch": 0.9505491488193301, + "grad_norm": 0.46822884678840637, + "learning_rate": 1.0799013958879658e-05, + "loss": 0.4757, + "step": 34619 + }, + { + "epoch": 0.9505766062602965, + "grad_norm": 0.3878385126590729, + "learning_rate": 1.0798583445587617e-05, + "loss": 0.5409, + "step": 34620 + }, + { + "epoch": 0.950604063701263, + "grad_norm": 0.3857860863208771, + "learning_rate": 1.0798152930805959e-05, + "loss": 0.4436, + "step": 34621 + }, + { + "epoch": 0.9506315211422296, + "grad_norm": 0.40292465686798096, + "learning_rate": 1.0797722414535494e-05, + "loss": 0.483, + "step": 34622 + }, + { + "epoch": 0.950658978583196, + "grad_norm": 0.40507784485816956, + "learning_rate": 1.0797291896777025e-05, + "loss": 0.5562, + "step": 34623 + }, + { + "epoch": 0.9506864360241626, + "grad_norm": 0.5070155262947083, + "learning_rate": 1.0796861377531346e-05, + "loss": 0.4017, + "step": 34624 + }, + { + "epoch": 0.950713893465129, + "grad_norm": 0.41590598225593567, + "learning_rate": 1.0796430856799272e-05, + "loss": 0.5457, + "step": 34625 + }, + { + "epoch": 0.9507413509060956, + "grad_norm": 0.38884925842285156, + "learning_rate": 1.0796000334581595e-05, + "loss": 0.4489, + "step": 34626 + }, + { + "epoch": 0.950768808347062, + "grad_norm": 0.3932308256626129, + "learning_rate": 1.0795569810879125e-05, + "loss": 0.5175, + "step": 34627 + }, + { + "epoch": 0.9507962657880286, + "grad_norm": 0.3416912257671356, + "learning_rate": 1.0795139285692666e-05, + "loss": 0.4722, + "step": 34628 + }, + { + "epoch": 0.9508237232289951, + "grad_norm": 0.36683398485183716, + "learning_rate": 1.0794708759023016e-05, + "loss": 0.4641, + "step": 34629 + }, + { + "epoch": 0.9508511806699615, + "grad_norm": 0.42926549911499023, + "learning_rate": 1.079427823087098e-05, + "loss": 0.5298, + "step": 34630 + }, + { + "epoch": 0.9508786381109281, + "grad_norm": 0.38897180557250977, + "learning_rate": 1.0793847701237364e-05, + "loss": 0.4809, + "step": 34631 + }, + { + "epoch": 0.9509060955518945, + "grad_norm": 0.4393659830093384, + "learning_rate": 1.0793417170122966e-05, + "loss": 0.5766, + "step": 34632 + }, + { + "epoch": 0.9509335529928611, + "grad_norm": 0.38501331210136414, + "learning_rate": 1.0792986637528596e-05, + "loss": 0.4315, + "step": 34633 + }, + { + "epoch": 0.9509610104338275, + "grad_norm": 0.4359462559223175, + "learning_rate": 1.0792556103455048e-05, + "loss": 0.5829, + "step": 34634 + }, + { + "epoch": 0.9509884678747941, + "grad_norm": 0.40449580550193787, + "learning_rate": 1.079212556790313e-05, + "loss": 0.5016, + "step": 34635 + }, + { + "epoch": 0.9510159253157606, + "grad_norm": 0.38746634125709534, + "learning_rate": 1.0791695030873648e-05, + "loss": 0.4953, + "step": 34636 + }, + { + "epoch": 0.9510433827567271, + "grad_norm": 0.37471598386764526, + "learning_rate": 1.0791264492367402e-05, + "loss": 0.478, + "step": 34637 + }, + { + "epoch": 0.9510708401976936, + "grad_norm": 0.4485347867012024, + "learning_rate": 1.0790833952385196e-05, + "loss": 0.505, + "step": 34638 + }, + { + "epoch": 0.95109829763866, + "grad_norm": 0.40403518080711365, + "learning_rate": 1.0790403410927828e-05, + "loss": 0.4795, + "step": 34639 + }, + { + "epoch": 0.9511257550796266, + "grad_norm": 0.4250563085079193, + "learning_rate": 1.0789972867996108e-05, + "loss": 0.5395, + "step": 34640 + }, + { + "epoch": 0.951153212520593, + "grad_norm": 0.647186815738678, + "learning_rate": 1.0789542323590839e-05, + "loss": 0.5047, + "step": 34641 + }, + { + "epoch": 0.9511806699615596, + "grad_norm": 0.5865503549575806, + "learning_rate": 1.0789111777712816e-05, + "loss": 0.4569, + "step": 34642 + }, + { + "epoch": 0.9512081274025261, + "grad_norm": 0.6084676384925842, + "learning_rate": 1.0788681230362852e-05, + "loss": 0.4093, + "step": 34643 + }, + { + "epoch": 0.9512355848434926, + "grad_norm": 0.4614333510398865, + "learning_rate": 1.0788250681541743e-05, + "loss": 0.4772, + "step": 34644 + }, + { + "epoch": 0.9512630422844591, + "grad_norm": 3.5255706310272217, + "learning_rate": 1.0787820131250298e-05, + "loss": 0.5459, + "step": 34645 + }, + { + "epoch": 0.9512904997254256, + "grad_norm": 0.3893234431743622, + "learning_rate": 1.0787389579489313e-05, + "loss": 0.462, + "step": 34646 + }, + { + "epoch": 0.9513179571663921, + "grad_norm": 0.4093390107154846, + "learning_rate": 1.0786959026259598e-05, + "loss": 0.4802, + "step": 34647 + }, + { + "epoch": 0.9513454146073586, + "grad_norm": 0.3936988115310669, + "learning_rate": 1.0786528471561952e-05, + "loss": 0.5192, + "step": 34648 + }, + { + "epoch": 0.9513728720483251, + "grad_norm": 0.40760713815689087, + "learning_rate": 1.078609791539718e-05, + "loss": 0.5061, + "step": 34649 + }, + { + "epoch": 0.9514003294892917, + "grad_norm": 0.41084179282188416, + "learning_rate": 1.0785667357766085e-05, + "loss": 0.4805, + "step": 34650 + }, + { + "epoch": 0.9514277869302581, + "grad_norm": 0.46504950523376465, + "learning_rate": 1.078523679866947e-05, + "loss": 0.5393, + "step": 34651 + }, + { + "epoch": 0.9514552443712246, + "grad_norm": 0.3490103483200073, + "learning_rate": 1.0784806238108134e-05, + "loss": 0.4689, + "step": 34652 + }, + { + "epoch": 0.9514827018121911, + "grad_norm": 0.4992573857307434, + "learning_rate": 1.0784375676082887e-05, + "loss": 0.4846, + "step": 34653 + }, + { + "epoch": 0.9515101592531576, + "grad_norm": 0.45588934421539307, + "learning_rate": 1.0783945112594531e-05, + "loss": 0.5361, + "step": 34654 + }, + { + "epoch": 0.9515376166941241, + "grad_norm": 0.6914562582969666, + "learning_rate": 1.0783514547643862e-05, + "loss": 0.5089, + "step": 34655 + }, + { + "epoch": 0.9515650741350906, + "grad_norm": 0.4092187285423279, + "learning_rate": 1.0783083981231694e-05, + "loss": 0.546, + "step": 34656 + }, + { + "epoch": 0.9515925315760572, + "grad_norm": 0.42216941714286804, + "learning_rate": 1.078265341335882e-05, + "loss": 0.5202, + "step": 34657 + }, + { + "epoch": 0.9516199890170236, + "grad_norm": 0.42972487211227417, + "learning_rate": 1.078222284402605e-05, + "loss": 0.4927, + "step": 34658 + }, + { + "epoch": 0.9516474464579902, + "grad_norm": 0.4035871922969818, + "learning_rate": 1.0781792273234184e-05, + "loss": 0.5409, + "step": 34659 + }, + { + "epoch": 0.9516749038989566, + "grad_norm": 0.4005240797996521, + "learning_rate": 1.0781361700984025e-05, + "loss": 0.544, + "step": 34660 + }, + { + "epoch": 0.9517023613399231, + "grad_norm": 0.38408926129341125, + "learning_rate": 1.0780931127276379e-05, + "loss": 0.3526, + "step": 34661 + }, + { + "epoch": 0.9517298187808896, + "grad_norm": 0.4006398022174835, + "learning_rate": 1.0780500552112046e-05, + "loss": 0.4808, + "step": 34662 + }, + { + "epoch": 0.9517572762218561, + "grad_norm": 0.4167468249797821, + "learning_rate": 1.0780069975491832e-05, + "loss": 0.5772, + "step": 34663 + }, + { + "epoch": 0.9517847336628227, + "grad_norm": 0.40460366010665894, + "learning_rate": 1.0779639397416537e-05, + "loss": 0.4344, + "step": 34664 + }, + { + "epoch": 0.9518121911037891, + "grad_norm": 0.3931076228618622, + "learning_rate": 1.0779208817886964e-05, + "loss": 0.4909, + "step": 34665 + }, + { + "epoch": 0.9518396485447557, + "grad_norm": 0.37698519229888916, + "learning_rate": 1.077877823690392e-05, + "loss": 0.5221, + "step": 34666 + }, + { + "epoch": 0.9518671059857221, + "grad_norm": 0.39005914330482483, + "learning_rate": 1.0778347654468204e-05, + "loss": 0.4571, + "step": 34667 + }, + { + "epoch": 0.9518945634266887, + "grad_norm": 0.45187491178512573, + "learning_rate": 1.0777917070580623e-05, + "loss": 0.4705, + "step": 34668 + }, + { + "epoch": 0.9519220208676551, + "grad_norm": 0.362453430891037, + "learning_rate": 1.0777486485241978e-05, + "loss": 0.4763, + "step": 34669 + }, + { + "epoch": 0.9519494783086216, + "grad_norm": 0.8331220746040344, + "learning_rate": 1.0777055898453072e-05, + "loss": 0.5448, + "step": 34670 + }, + { + "epoch": 0.9519769357495882, + "grad_norm": 0.38890767097473145, + "learning_rate": 1.0776625310214712e-05, + "loss": 0.4775, + "step": 34671 + }, + { + "epoch": 0.9520043931905546, + "grad_norm": 0.3732020854949951, + "learning_rate": 1.0776194720527694e-05, + "loss": 0.4215, + "step": 34672 + }, + { + "epoch": 0.9520318506315212, + "grad_norm": 0.357217937707901, + "learning_rate": 1.0775764129392827e-05, + "loss": 0.4668, + "step": 34673 + }, + { + "epoch": 0.9520593080724876, + "grad_norm": 0.3584648668766022, + "learning_rate": 1.0775333536810912e-05, + "loss": 0.455, + "step": 34674 + }, + { + "epoch": 0.9520867655134542, + "grad_norm": 0.5152673125267029, + "learning_rate": 1.0774902942782752e-05, + "loss": 0.5568, + "step": 34675 + }, + { + "epoch": 0.9521142229544206, + "grad_norm": 0.40125662088394165, + "learning_rate": 1.077447234730915e-05, + "loss": 0.5002, + "step": 34676 + }, + { + "epoch": 0.9521416803953872, + "grad_norm": 0.3741495609283447, + "learning_rate": 1.0774041750390914e-05, + "loss": 0.5062, + "step": 34677 + }, + { + "epoch": 0.9521691378363537, + "grad_norm": 0.3984069526195526, + "learning_rate": 1.0773611152028838e-05, + "loss": 0.4728, + "step": 34678 + }, + { + "epoch": 0.9521965952773201, + "grad_norm": 0.3916851282119751, + "learning_rate": 1.0773180552223732e-05, + "loss": 0.5101, + "step": 34679 + }, + { + "epoch": 0.9522240527182867, + "grad_norm": 0.39156991243362427, + "learning_rate": 1.0772749950976397e-05, + "loss": 0.4236, + "step": 34680 + }, + { + "epoch": 0.9522515101592531, + "grad_norm": 0.39904576539993286, + "learning_rate": 1.077231934828764e-05, + "loss": 0.5385, + "step": 34681 + }, + { + "epoch": 0.9522789676002197, + "grad_norm": 0.36566048860549927, + "learning_rate": 1.0771888744158256e-05, + "loss": 0.4054, + "step": 34682 + }, + { + "epoch": 0.9523064250411861, + "grad_norm": 0.4323814809322357, + "learning_rate": 1.0771458138589054e-05, + "loss": 0.5066, + "step": 34683 + }, + { + "epoch": 0.9523338824821527, + "grad_norm": 0.3970227539539337, + "learning_rate": 1.077102753158084e-05, + "loss": 0.5013, + "step": 34684 + }, + { + "epoch": 0.9523613399231191, + "grad_norm": 0.3950727880001068, + "learning_rate": 1.0770596923134413e-05, + "loss": 0.4348, + "step": 34685 + }, + { + "epoch": 0.9523887973640857, + "grad_norm": 0.3963001072406769, + "learning_rate": 1.0770166313250574e-05, + "loss": 0.5456, + "step": 34686 + }, + { + "epoch": 0.9524162548050522, + "grad_norm": 0.40517473220825195, + "learning_rate": 1.076973570193013e-05, + "loss": 0.6088, + "step": 34687 + }, + { + "epoch": 0.9524437122460186, + "grad_norm": 0.35962149500846863, + "learning_rate": 1.0769305089173883e-05, + "loss": 0.4579, + "step": 34688 + }, + { + "epoch": 0.9524711696869852, + "grad_norm": 0.3653494417667389, + "learning_rate": 1.0768874474982638e-05, + "loss": 0.4521, + "step": 34689 + }, + { + "epoch": 0.9524986271279516, + "grad_norm": 0.3759037256240845, + "learning_rate": 1.0768443859357196e-05, + "loss": 0.4815, + "step": 34690 + }, + { + "epoch": 0.9525260845689182, + "grad_norm": 0.37586599588394165, + "learning_rate": 1.076801324229836e-05, + "loss": 0.4908, + "step": 34691 + }, + { + "epoch": 0.9525535420098846, + "grad_norm": 0.4187792241573334, + "learning_rate": 1.0767582623806935e-05, + "loss": 0.4505, + "step": 34692 + }, + { + "epoch": 0.9525809994508512, + "grad_norm": 0.3643229007720947, + "learning_rate": 1.0767152003883724e-05, + "loss": 0.4856, + "step": 34693 + }, + { + "epoch": 0.9526084568918177, + "grad_norm": 0.3992065489292145, + "learning_rate": 1.0766721382529527e-05, + "loss": 0.5307, + "step": 34694 + }, + { + "epoch": 0.9526359143327842, + "grad_norm": 0.3604598343372345, + "learning_rate": 1.0766290759745153e-05, + "loss": 0.4063, + "step": 34695 + }, + { + "epoch": 0.9526633717737507, + "grad_norm": 0.3935020864009857, + "learning_rate": 1.0765860135531401e-05, + "loss": 0.5994, + "step": 34696 + }, + { + "epoch": 0.9526908292147171, + "grad_norm": 0.41195449233055115, + "learning_rate": 1.0765429509889077e-05, + "loss": 0.4959, + "step": 34697 + }, + { + "epoch": 0.9527182866556837, + "grad_norm": 0.39934781193733215, + "learning_rate": 1.0764998882818977e-05, + "loss": 0.5226, + "step": 34698 + }, + { + "epoch": 0.9527457440966501, + "grad_norm": 0.40157195925712585, + "learning_rate": 1.0764568254321916e-05, + "loss": 0.4989, + "step": 34699 + }, + { + "epoch": 0.9527732015376167, + "grad_norm": 0.36365458369255066, + "learning_rate": 1.076413762439869e-05, + "loss": 0.3707, + "step": 34700 + }, + { + "epoch": 0.9528006589785832, + "grad_norm": 0.38507965207099915, + "learning_rate": 1.07637069930501e-05, + "loss": 0.5191, + "step": 34701 + }, + { + "epoch": 0.9528281164195497, + "grad_norm": 0.37922927737236023, + "learning_rate": 1.0763276360276957e-05, + "loss": 0.4894, + "step": 34702 + }, + { + "epoch": 0.9528555738605162, + "grad_norm": 0.4038503170013428, + "learning_rate": 1.076284572608006e-05, + "loss": 0.446, + "step": 34703 + }, + { + "epoch": 0.9528830313014827, + "grad_norm": 0.3981234133243561, + "learning_rate": 1.076241509046021e-05, + "loss": 0.4322, + "step": 34704 + }, + { + "epoch": 0.9529104887424492, + "grad_norm": 0.36783644556999207, + "learning_rate": 1.0761984453418213e-05, + "loss": 0.4191, + "step": 34705 + }, + { + "epoch": 0.9529379461834157, + "grad_norm": 0.3733910024166107, + "learning_rate": 1.0761553814954871e-05, + "loss": 0.3462, + "step": 34706 + }, + { + "epoch": 0.9529654036243822, + "grad_norm": 0.3762595057487488, + "learning_rate": 1.0761123175070989e-05, + "loss": 0.4605, + "step": 34707 + }, + { + "epoch": 0.9529928610653488, + "grad_norm": 0.4503692090511322, + "learning_rate": 1.076069253376737e-05, + "loss": 0.5273, + "step": 34708 + }, + { + "epoch": 0.9530203185063152, + "grad_norm": 0.38910171389579773, + "learning_rate": 1.0760261891044814e-05, + "loss": 0.465, + "step": 34709 + }, + { + "epoch": 0.9530477759472817, + "grad_norm": 0.4605788290500641, + "learning_rate": 1.075983124690413e-05, + "loss": 0.5668, + "step": 34710 + }, + { + "epoch": 0.9530752333882482, + "grad_norm": 0.38245296478271484, + "learning_rate": 1.0759400601346117e-05, + "loss": 0.4487, + "step": 34711 + }, + { + "epoch": 0.9531026908292147, + "grad_norm": 0.38746464252471924, + "learning_rate": 1.0758969954371578e-05, + "loss": 0.3899, + "step": 34712 + }, + { + "epoch": 0.9531301482701812, + "grad_norm": 0.3762000799179077, + "learning_rate": 1.0758539305981322e-05, + "loss": 0.4369, + "step": 34713 + }, + { + "epoch": 0.9531576057111477, + "grad_norm": 0.4149077236652374, + "learning_rate": 1.0758108656176143e-05, + "loss": 0.4594, + "step": 34714 + }, + { + "epoch": 0.9531850631521143, + "grad_norm": 0.39852505922317505, + "learning_rate": 1.0757678004956852e-05, + "loss": 0.4671, + "step": 34715 + }, + { + "epoch": 0.9532125205930807, + "grad_norm": 0.3510734438896179, + "learning_rate": 1.075724735232425e-05, + "loss": 0.4414, + "step": 34716 + }, + { + "epoch": 0.9532399780340473, + "grad_norm": 0.42359659075737, + "learning_rate": 1.075681669827914e-05, + "loss": 0.5653, + "step": 34717 + }, + { + "epoch": 0.9532674354750137, + "grad_norm": 0.44597098231315613, + "learning_rate": 1.0756386042822324e-05, + "loss": 0.5209, + "step": 34718 + }, + { + "epoch": 0.9532948929159802, + "grad_norm": 0.41036367416381836, + "learning_rate": 1.0755955385954608e-05, + "loss": 0.5195, + "step": 34719 + }, + { + "epoch": 0.9533223503569467, + "grad_norm": 0.3762610852718353, + "learning_rate": 1.0755524727676794e-05, + "loss": 0.4536, + "step": 34720 + }, + { + "epoch": 0.9533498077979132, + "grad_norm": 0.4061617851257324, + "learning_rate": 1.0755094067989685e-05, + "loss": 0.5099, + "step": 34721 + }, + { + "epoch": 0.9533772652388798, + "grad_norm": 0.3931017518043518, + "learning_rate": 1.0754663406894083e-05, + "loss": 0.4892, + "step": 34722 + }, + { + "epoch": 0.9534047226798462, + "grad_norm": 0.3837074339389801, + "learning_rate": 1.0754232744390793e-05, + "loss": 0.4752, + "step": 34723 + }, + { + "epoch": 0.9534321801208128, + "grad_norm": 0.5086297988891602, + "learning_rate": 1.075380208048062e-05, + "loss": 0.494, + "step": 34724 + }, + { + "epoch": 0.9534596375617792, + "grad_norm": 0.4013034701347351, + "learning_rate": 1.0753371415164366e-05, + "loss": 0.4861, + "step": 34725 + }, + { + "epoch": 0.9534870950027458, + "grad_norm": 0.41513222455978394, + "learning_rate": 1.0752940748442833e-05, + "loss": 0.5575, + "step": 34726 + }, + { + "epoch": 0.9535145524437122, + "grad_norm": 0.4832100570201874, + "learning_rate": 1.0752510080316824e-05, + "loss": 0.4796, + "step": 34727 + }, + { + "epoch": 0.9535420098846787, + "grad_norm": 0.3570649325847626, + "learning_rate": 1.0752079410787145e-05, + "loss": 0.4733, + "step": 34728 + }, + { + "epoch": 0.9535694673256453, + "grad_norm": 0.4189216196537018, + "learning_rate": 1.0751648739854597e-05, + "loss": 0.4836, + "step": 34729 + }, + { + "epoch": 0.9535969247666117, + "grad_norm": 0.47783076763153076, + "learning_rate": 1.0751218067519983e-05, + "loss": 0.449, + "step": 34730 + }, + { + "epoch": 0.9536243822075783, + "grad_norm": 0.6438360214233398, + "learning_rate": 1.075078739378411e-05, + "loss": 0.5261, + "step": 34731 + }, + { + "epoch": 0.9536518396485447, + "grad_norm": 0.37879130244255066, + "learning_rate": 1.0750356718647778e-05, + "loss": 0.523, + "step": 34732 + }, + { + "epoch": 0.9536792970895113, + "grad_norm": 0.37147605419158936, + "learning_rate": 1.0749926042111791e-05, + "loss": 0.5151, + "step": 34733 + }, + { + "epoch": 0.9537067545304777, + "grad_norm": 0.4015268087387085, + "learning_rate": 1.0749495364176954e-05, + "loss": 0.4552, + "step": 34734 + }, + { + "epoch": 0.9537342119714443, + "grad_norm": 0.4174935221672058, + "learning_rate": 1.0749064684844066e-05, + "loss": 0.4657, + "step": 34735 + }, + { + "epoch": 0.9537616694124108, + "grad_norm": 0.38166555762290955, + "learning_rate": 1.0748634004113936e-05, + "loss": 0.5, + "step": 34736 + }, + { + "epoch": 0.9537891268533772, + "grad_norm": 0.40984830260276794, + "learning_rate": 1.0748203321987365e-05, + "loss": 0.4693, + "step": 34737 + }, + { + "epoch": 0.9538165842943438, + "grad_norm": 0.41083550453186035, + "learning_rate": 1.0747772638465152e-05, + "loss": 0.5232, + "step": 34738 + }, + { + "epoch": 0.9538440417353102, + "grad_norm": 0.4378269612789154, + "learning_rate": 1.074734195354811e-05, + "loss": 0.4738, + "step": 34739 + }, + { + "epoch": 0.9538714991762768, + "grad_norm": 0.3730698823928833, + "learning_rate": 1.074691126723703e-05, + "loss": 0.4676, + "step": 34740 + }, + { + "epoch": 0.9538989566172432, + "grad_norm": 0.372438907623291, + "learning_rate": 1.0746480579532727e-05, + "loss": 0.5168, + "step": 34741 + }, + { + "epoch": 0.9539264140582098, + "grad_norm": 0.4215037226676941, + "learning_rate": 1.0746049890435999e-05, + "loss": 0.4673, + "step": 34742 + }, + { + "epoch": 0.9539538714991763, + "grad_norm": 0.3262975513935089, + "learning_rate": 1.0745619199947648e-05, + "loss": 0.4282, + "step": 34743 + }, + { + "epoch": 0.9539813289401428, + "grad_norm": 0.3862096667289734, + "learning_rate": 1.0745188508068482e-05, + "loss": 0.4854, + "step": 34744 + }, + { + "epoch": 0.9540087863811093, + "grad_norm": 0.37320539355278015, + "learning_rate": 1.0744757814799298e-05, + "loss": 0.476, + "step": 34745 + }, + { + "epoch": 0.9540362438220757, + "grad_norm": 0.39405685663223267, + "learning_rate": 1.0744327120140907e-05, + "loss": 0.4945, + "step": 34746 + }, + { + "epoch": 0.9540637012630423, + "grad_norm": 0.3867015242576599, + "learning_rate": 1.0743896424094107e-05, + "loss": 0.4906, + "step": 34747 + }, + { + "epoch": 0.9540911587040087, + "grad_norm": 0.37037187814712524, + "learning_rate": 1.0743465726659702e-05, + "loss": 0.5063, + "step": 34748 + }, + { + "epoch": 0.9541186161449753, + "grad_norm": 0.35702237486839294, + "learning_rate": 1.0743035027838496e-05, + "loss": 0.4612, + "step": 34749 + }, + { + "epoch": 0.9541460735859418, + "grad_norm": 0.3745707869529724, + "learning_rate": 1.0742604327631289e-05, + "loss": 0.3941, + "step": 34750 + }, + { + "epoch": 0.9541735310269083, + "grad_norm": 0.4496639668941498, + "learning_rate": 1.0742173626038895e-05, + "loss": 0.5241, + "step": 34751 + }, + { + "epoch": 0.9542009884678748, + "grad_norm": 0.38597381114959717, + "learning_rate": 1.0741742923062107e-05, + "loss": 0.455, + "step": 34752 + }, + { + "epoch": 0.9542284459088413, + "grad_norm": 0.4154333174228668, + "learning_rate": 1.074131221870173e-05, + "loss": 0.571, + "step": 34753 + }, + { + "epoch": 0.9542559033498078, + "grad_norm": 0.41185128688812256, + "learning_rate": 1.0740881512958573e-05, + "loss": 0.4914, + "step": 34754 + }, + { + "epoch": 0.9542833607907742, + "grad_norm": 0.3850076198577881, + "learning_rate": 1.074045080583343e-05, + "loss": 0.4266, + "step": 34755 + }, + { + "epoch": 0.9543108182317408, + "grad_norm": 0.5379679203033447, + "learning_rate": 1.0740020097327115e-05, + "loss": 0.4525, + "step": 34756 + }, + { + "epoch": 0.9543382756727073, + "grad_norm": 0.43927571177482605, + "learning_rate": 1.0739589387440425e-05, + "loss": 0.5382, + "step": 34757 + }, + { + "epoch": 0.9543657331136738, + "grad_norm": 0.3820250928401947, + "learning_rate": 1.0739158676174161e-05, + "loss": 0.5428, + "step": 34758 + }, + { + "epoch": 0.9543931905546403, + "grad_norm": 0.4412820637226105, + "learning_rate": 1.0738727963529136e-05, + "loss": 0.4866, + "step": 34759 + }, + { + "epoch": 0.9544206479956068, + "grad_norm": 0.3923195004463196, + "learning_rate": 1.0738297249506145e-05, + "loss": 0.4643, + "step": 34760 + }, + { + "epoch": 0.9544481054365733, + "grad_norm": 0.41386234760284424, + "learning_rate": 1.0737866534105994e-05, + "loss": 0.4449, + "step": 34761 + }, + { + "epoch": 0.9544755628775398, + "grad_norm": 0.38445335626602173, + "learning_rate": 1.0737435817329486e-05, + "loss": 0.4712, + "step": 34762 + }, + { + "epoch": 0.9545030203185063, + "grad_norm": 0.3935084939002991, + "learning_rate": 1.0737005099177424e-05, + "loss": 0.5247, + "step": 34763 + }, + { + "epoch": 0.9545304777594729, + "grad_norm": 0.43227770924568176, + "learning_rate": 1.0736574379650615e-05, + "loss": 0.5012, + "step": 34764 + }, + { + "epoch": 0.9545579352004393, + "grad_norm": 0.37367716431617737, + "learning_rate": 1.073614365874986e-05, + "loss": 0.4458, + "step": 34765 + }, + { + "epoch": 0.9545853926414058, + "grad_norm": 0.3809497356414795, + "learning_rate": 1.0735712936475958e-05, + "loss": 0.5056, + "step": 34766 + }, + { + "epoch": 0.9546128500823723, + "grad_norm": 0.3848358392715454, + "learning_rate": 1.0735282212829721e-05, + "loss": 0.5126, + "step": 34767 + }, + { + "epoch": 0.9546403075233388, + "grad_norm": 0.42073187232017517, + "learning_rate": 1.0734851487811943e-05, + "loss": 0.4787, + "step": 34768 + }, + { + "epoch": 0.9546677649643053, + "grad_norm": 0.4400480389595032, + "learning_rate": 1.0734420761423437e-05, + "loss": 0.4682, + "step": 34769 + }, + { + "epoch": 0.9546952224052718, + "grad_norm": 0.3849635124206543, + "learning_rate": 1.0733990033665001e-05, + "loss": 0.5302, + "step": 34770 + }, + { + "epoch": 0.9547226798462384, + "grad_norm": 0.3995858132839203, + "learning_rate": 1.0733559304537438e-05, + "loss": 0.5, + "step": 34771 + }, + { + "epoch": 0.9547501372872048, + "grad_norm": 0.4570026993751526, + "learning_rate": 1.0733128574041554e-05, + "loss": 0.5658, + "step": 34772 + }, + { + "epoch": 0.9547775947281714, + "grad_norm": 0.4237518906593323, + "learning_rate": 1.0732697842178147e-05, + "loss": 0.5569, + "step": 34773 + }, + { + "epoch": 0.9548050521691378, + "grad_norm": 0.3687550723552704, + "learning_rate": 1.0732267108948031e-05, + "loss": 0.4124, + "step": 34774 + }, + { + "epoch": 0.9548325096101044, + "grad_norm": 0.4449837803840637, + "learning_rate": 1.0731836374352001e-05, + "loss": 0.5949, + "step": 34775 + }, + { + "epoch": 0.9548599670510708, + "grad_norm": 0.3826906383037567, + "learning_rate": 1.0731405638390859e-05, + "loss": 0.4732, + "step": 34776 + }, + { + "epoch": 0.9548874244920373, + "grad_norm": 0.4545523524284363, + "learning_rate": 1.0730974901065416e-05, + "loss": 0.4883, + "step": 34777 + }, + { + "epoch": 0.9549148819330039, + "grad_norm": 0.3914506733417511, + "learning_rate": 1.0730544162376472e-05, + "loss": 0.5674, + "step": 34778 + }, + { + "epoch": 0.9549423393739703, + "grad_norm": 0.41449588537216187, + "learning_rate": 1.0730113422324828e-05, + "loss": 0.4166, + "step": 34779 + }, + { + "epoch": 0.9549697968149369, + "grad_norm": 0.3721112012863159, + "learning_rate": 1.0729682680911291e-05, + "loss": 0.4684, + "step": 34780 + }, + { + "epoch": 0.9549972542559033, + "grad_norm": 0.3512680232524872, + "learning_rate": 1.072925193813666e-05, + "loss": 0.4774, + "step": 34781 + }, + { + "epoch": 0.9550247116968699, + "grad_norm": 0.412949800491333, + "learning_rate": 1.0728821194001743e-05, + "loss": 0.4603, + "step": 34782 + }, + { + "epoch": 0.9550521691378363, + "grad_norm": 0.41856303811073303, + "learning_rate": 1.0728390448507344e-05, + "loss": 0.5, + "step": 34783 + }, + { + "epoch": 0.9550796265788029, + "grad_norm": 0.3839171826839447, + "learning_rate": 1.072795970165426e-05, + "loss": 0.4395, + "step": 34784 + }, + { + "epoch": 0.9551070840197694, + "grad_norm": 0.38706550002098083, + "learning_rate": 1.0727528953443303e-05, + "loss": 0.4543, + "step": 34785 + }, + { + "epoch": 0.9551345414607358, + "grad_norm": 0.36836111545562744, + "learning_rate": 1.0727098203875268e-05, + "loss": 0.4543, + "step": 34786 + }, + { + "epoch": 0.9551619989017024, + "grad_norm": 0.401315301656723, + "learning_rate": 1.0726667452950967e-05, + "loss": 0.4767, + "step": 34787 + }, + { + "epoch": 0.9551894563426688, + "grad_norm": 0.41283655166625977, + "learning_rate": 1.0726236700671198e-05, + "loss": 0.4993, + "step": 34788 + }, + { + "epoch": 0.9552169137836354, + "grad_norm": 0.396470308303833, + "learning_rate": 1.0725805947036764e-05, + "loss": 0.4736, + "step": 34789 + }, + { + "epoch": 0.9552443712246018, + "grad_norm": 0.41168782114982605, + "learning_rate": 1.0725375192048472e-05, + "loss": 0.4901, + "step": 34790 + }, + { + "epoch": 0.9552718286655684, + "grad_norm": 0.4027193784713745, + "learning_rate": 1.0724944435707123e-05, + "loss": 0.5488, + "step": 34791 + }, + { + "epoch": 0.9552992861065349, + "grad_norm": 0.3457355201244354, + "learning_rate": 1.0724513678013522e-05, + "loss": 0.4108, + "step": 34792 + }, + { + "epoch": 0.9553267435475014, + "grad_norm": 0.35651129484176636, + "learning_rate": 1.0724082918968473e-05, + "loss": 0.4476, + "step": 34793 + }, + { + "epoch": 0.9553542009884679, + "grad_norm": 0.43909981846809387, + "learning_rate": 1.0723652158572774e-05, + "loss": 0.4891, + "step": 34794 + }, + { + "epoch": 0.9553816584294343, + "grad_norm": 0.3919883072376251, + "learning_rate": 1.0723221396827236e-05, + "loss": 0.4685, + "step": 34795 + }, + { + "epoch": 0.9554091158704009, + "grad_norm": 0.3805553615093231, + "learning_rate": 1.0722790633732661e-05, + "loss": 0.4198, + "step": 34796 + }, + { + "epoch": 0.9554365733113673, + "grad_norm": 0.4319494962692261, + "learning_rate": 1.0722359869289846e-05, + "loss": 0.4865, + "step": 34797 + }, + { + "epoch": 0.9554640307523339, + "grad_norm": 0.4117768406867981, + "learning_rate": 1.0721929103499604e-05, + "loss": 0.5326, + "step": 34798 + }, + { + "epoch": 0.9554914881933004, + "grad_norm": 0.3864593505859375, + "learning_rate": 1.072149833636273e-05, + "loss": 0.4621, + "step": 34799 + }, + { + "epoch": 0.9555189456342669, + "grad_norm": 0.4172755777835846, + "learning_rate": 1.0721067567880032e-05, + "loss": 0.4923, + "step": 34800 + }, + { + "epoch": 0.9555464030752334, + "grad_norm": 0.3540259897708893, + "learning_rate": 1.0720636798052315e-05, + "loss": 0.3603, + "step": 34801 + }, + { + "epoch": 0.9555738605161999, + "grad_norm": 0.4306371808052063, + "learning_rate": 1.0720206026880378e-05, + "loss": 0.4626, + "step": 34802 + }, + { + "epoch": 0.9556013179571664, + "grad_norm": 0.3914659917354584, + "learning_rate": 1.0719775254365031e-05, + "loss": 0.5021, + "step": 34803 + }, + { + "epoch": 0.9556287753981328, + "grad_norm": 0.3891967236995697, + "learning_rate": 1.0719344480507067e-05, + "loss": 0.4491, + "step": 34804 + }, + { + "epoch": 0.9556562328390994, + "grad_norm": 0.38523900508880615, + "learning_rate": 1.0718913705307302e-05, + "loss": 0.5565, + "step": 34805 + }, + { + "epoch": 0.955683690280066, + "grad_norm": 0.4280299246311188, + "learning_rate": 1.0718482928766529e-05, + "loss": 0.5774, + "step": 34806 + }, + { + "epoch": 0.9557111477210324, + "grad_norm": 0.38938504457473755, + "learning_rate": 1.0718052150885559e-05, + "loss": 0.52, + "step": 34807 + }, + { + "epoch": 0.9557386051619989, + "grad_norm": 0.5108791589736938, + "learning_rate": 1.071762137166519e-05, + "loss": 0.5308, + "step": 34808 + }, + { + "epoch": 0.9557660626029654, + "grad_norm": 0.4434411823749542, + "learning_rate": 1.0717190591106232e-05, + "loss": 0.4761, + "step": 34809 + }, + { + "epoch": 0.9557935200439319, + "grad_norm": 0.42142194509506226, + "learning_rate": 1.071675980920948e-05, + "loss": 0.4864, + "step": 34810 + }, + { + "epoch": 0.9558209774848984, + "grad_norm": 0.3614460527896881, + "learning_rate": 1.0716329025975745e-05, + "loss": 0.453, + "step": 34811 + }, + { + "epoch": 0.9558484349258649, + "grad_norm": 0.5181878209114075, + "learning_rate": 1.0715898241405826e-05, + "loss": 0.4137, + "step": 34812 + }, + { + "epoch": 0.9558758923668315, + "grad_norm": 0.48544925451278687, + "learning_rate": 1.071546745550053e-05, + "loss": 0.481, + "step": 34813 + }, + { + "epoch": 0.9559033498077979, + "grad_norm": 0.40290987491607666, + "learning_rate": 1.0715036668260659e-05, + "loss": 0.5476, + "step": 34814 + }, + { + "epoch": 0.9559308072487644, + "grad_norm": 0.4516204297542572, + "learning_rate": 1.0714605879687014e-05, + "loss": 0.4418, + "step": 34815 + }, + { + "epoch": 0.9559582646897309, + "grad_norm": 0.387788325548172, + "learning_rate": 1.0714175089780405e-05, + "loss": 0.4073, + "step": 34816 + }, + { + "epoch": 0.9559857221306974, + "grad_norm": 0.37849318981170654, + "learning_rate": 1.0713744298541627e-05, + "loss": 0.5072, + "step": 34817 + }, + { + "epoch": 0.9560131795716639, + "grad_norm": 0.4947929084300995, + "learning_rate": 1.0713313505971492e-05, + "loss": 0.4438, + "step": 34818 + }, + { + "epoch": 0.9560406370126304, + "grad_norm": 0.4259732663631439, + "learning_rate": 1.0712882712070798e-05, + "loss": 0.459, + "step": 34819 + }, + { + "epoch": 0.956068094453597, + "grad_norm": 0.4463706612586975, + "learning_rate": 1.071245191684035e-05, + "loss": 0.5479, + "step": 34820 + }, + { + "epoch": 0.9560955518945634, + "grad_norm": 0.401046484708786, + "learning_rate": 1.0712021120280951e-05, + "loss": 0.4624, + "step": 34821 + }, + { + "epoch": 0.95612300933553, + "grad_norm": 0.3881068825721741, + "learning_rate": 1.0711590322393406e-05, + "loss": 0.4595, + "step": 34822 + }, + { + "epoch": 0.9561504667764964, + "grad_norm": 0.434304416179657, + "learning_rate": 1.0711159523178519e-05, + "loss": 0.4379, + "step": 34823 + }, + { + "epoch": 0.956177924217463, + "grad_norm": 0.42218950390815735, + "learning_rate": 1.0710728722637091e-05, + "loss": 0.4977, + "step": 34824 + }, + { + "epoch": 0.9562053816584294, + "grad_norm": 0.3998181223869324, + "learning_rate": 1.0710297920769928e-05, + "loss": 0.4608, + "step": 34825 + }, + { + "epoch": 0.9562328390993959, + "grad_norm": 0.3457197844982147, + "learning_rate": 1.0709867117577833e-05, + "loss": 0.5315, + "step": 34826 + }, + { + "epoch": 0.9562602965403625, + "grad_norm": 0.4187474250793457, + "learning_rate": 1.070943631306161e-05, + "loss": 0.6175, + "step": 34827 + }, + { + "epoch": 0.9562877539813289, + "grad_norm": 0.4373100697994232, + "learning_rate": 1.0709005507222059e-05, + "loss": 0.5093, + "step": 34828 + }, + { + "epoch": 0.9563152114222955, + "grad_norm": 0.3753771185874939, + "learning_rate": 1.0708574700059988e-05, + "loss": 0.4248, + "step": 34829 + }, + { + "epoch": 0.9563426688632619, + "grad_norm": 0.3930647671222687, + "learning_rate": 1.0708143891576197e-05, + "loss": 0.4954, + "step": 34830 + }, + { + "epoch": 0.9563701263042285, + "grad_norm": 0.4742387533187866, + "learning_rate": 1.0707713081771497e-05, + "loss": 0.506, + "step": 34831 + }, + { + "epoch": 0.9563975837451949, + "grad_norm": 0.388192355632782, + "learning_rate": 1.0707282270646682e-05, + "loss": 0.4136, + "step": 34832 + }, + { + "epoch": 0.9564250411861615, + "grad_norm": 0.4137936234474182, + "learning_rate": 1.070685145820256e-05, + "loss": 0.4913, + "step": 34833 + }, + { + "epoch": 0.956452498627128, + "grad_norm": 0.33610060811042786, + "learning_rate": 1.0706420644439935e-05, + "loss": 0.4579, + "step": 34834 + }, + { + "epoch": 0.9564799560680944, + "grad_norm": 2.745361566543579, + "learning_rate": 1.0705989829359609e-05, + "loss": 0.4532, + "step": 34835 + }, + { + "epoch": 0.956507413509061, + "grad_norm": 0.36817431449890137, + "learning_rate": 1.0705559012962388e-05, + "loss": 0.4346, + "step": 34836 + }, + { + "epoch": 0.9565348709500274, + "grad_norm": 0.3892417550086975, + "learning_rate": 1.0705128195249075e-05, + "loss": 0.5056, + "step": 34837 + }, + { + "epoch": 0.956562328390994, + "grad_norm": 0.4678867757320404, + "learning_rate": 1.0704697376220472e-05, + "loss": 0.4908, + "step": 34838 + }, + { + "epoch": 0.9565897858319604, + "grad_norm": 0.438326358795166, + "learning_rate": 1.0704266555877383e-05, + "loss": 0.5581, + "step": 34839 + }, + { + "epoch": 0.956617243272927, + "grad_norm": 0.40344902873039246, + "learning_rate": 1.0703835734220613e-05, + "loss": 0.4517, + "step": 34840 + }, + { + "epoch": 0.9566447007138935, + "grad_norm": 0.43591317534446716, + "learning_rate": 1.0703404911250963e-05, + "loss": 0.512, + "step": 34841 + }, + { + "epoch": 0.95667215815486, + "grad_norm": 0.39156022667884827, + "learning_rate": 1.0702974086969243e-05, + "loss": 0.5534, + "step": 34842 + }, + { + "epoch": 0.9566996155958265, + "grad_norm": 0.40702155232429504, + "learning_rate": 1.0702543261376247e-05, + "loss": 0.5487, + "step": 34843 + }, + { + "epoch": 0.9567270730367929, + "grad_norm": 0.5510063767433167, + "learning_rate": 1.0702112434472784e-05, + "loss": 0.5157, + "step": 34844 + }, + { + "epoch": 0.9567545304777595, + "grad_norm": 0.5046184659004211, + "learning_rate": 1.0701681606259659e-05, + "loss": 0.5603, + "step": 34845 + }, + { + "epoch": 0.9567819879187259, + "grad_norm": 0.4307788610458374, + "learning_rate": 1.0701250776737673e-05, + "loss": 0.5312, + "step": 34846 + }, + { + "epoch": 0.9568094453596925, + "grad_norm": 0.37545645236968994, + "learning_rate": 1.070081994590763e-05, + "loss": 0.5139, + "step": 34847 + }, + { + "epoch": 0.956836902800659, + "grad_norm": 0.39865291118621826, + "learning_rate": 1.0700389113770335e-05, + "loss": 0.4783, + "step": 34848 + }, + { + "epoch": 0.9568643602416255, + "grad_norm": 0.3642352819442749, + "learning_rate": 1.069995828032659e-05, + "loss": 0.4745, + "step": 34849 + }, + { + "epoch": 0.956891817682592, + "grad_norm": 0.43077394366264343, + "learning_rate": 1.0699527445577203e-05, + "loss": 0.5855, + "step": 34850 + }, + { + "epoch": 0.9569192751235585, + "grad_norm": 0.3709624707698822, + "learning_rate": 1.0699096609522967e-05, + "loss": 0.5048, + "step": 34851 + }, + { + "epoch": 0.956946732564525, + "grad_norm": 0.35273241996765137, + "learning_rate": 1.0698665772164698e-05, + "loss": 0.4708, + "step": 34852 + }, + { + "epoch": 0.9569741900054914, + "grad_norm": 0.3982519805431366, + "learning_rate": 1.0698234933503193e-05, + "loss": 0.5059, + "step": 34853 + }, + { + "epoch": 0.957001647446458, + "grad_norm": 0.3662751019001007, + "learning_rate": 1.0697804093539257e-05, + "loss": 0.4868, + "step": 34854 + }, + { + "epoch": 0.9570291048874245, + "grad_norm": 0.37330344319343567, + "learning_rate": 1.0697373252273694e-05, + "loss": 0.5255, + "step": 34855 + }, + { + "epoch": 0.957056562328391, + "grad_norm": 0.34841397404670715, + "learning_rate": 1.0696942409707306e-05, + "loss": 0.4455, + "step": 34856 + }, + { + "epoch": 0.9570840197693575, + "grad_norm": 0.45505964756011963, + "learning_rate": 1.06965115658409e-05, + "loss": 0.5799, + "step": 34857 + }, + { + "epoch": 0.957111477210324, + "grad_norm": 0.3779584765434265, + "learning_rate": 1.0696080720675279e-05, + "loss": 0.5103, + "step": 34858 + }, + { + "epoch": 0.9571389346512905, + "grad_norm": 0.38087788224220276, + "learning_rate": 1.069564987421124e-05, + "loss": 0.4767, + "step": 34859 + }, + { + "epoch": 0.957166392092257, + "grad_norm": 0.6527154445648193, + "learning_rate": 1.0695219026449597e-05, + "loss": 0.4805, + "step": 34860 + }, + { + "epoch": 0.9571938495332235, + "grad_norm": 0.4667859673500061, + "learning_rate": 1.0694788177391144e-05, + "loss": 0.4675, + "step": 34861 + }, + { + "epoch": 0.9572213069741901, + "grad_norm": 0.4071952998638153, + "learning_rate": 1.0694357327036693e-05, + "loss": 0.4492, + "step": 34862 + }, + { + "epoch": 0.9572487644151565, + "grad_norm": 0.3688203990459442, + "learning_rate": 1.0693926475387044e-05, + "loss": 0.4331, + "step": 34863 + }, + { + "epoch": 0.957276221856123, + "grad_norm": 0.35221922397613525, + "learning_rate": 1.0693495622442998e-05, + "loss": 0.4752, + "step": 34864 + }, + { + "epoch": 0.9573036792970895, + "grad_norm": 0.4402889609336853, + "learning_rate": 1.0693064768205363e-05, + "loss": 0.4076, + "step": 34865 + }, + { + "epoch": 0.957331136738056, + "grad_norm": 0.43967124819755554, + "learning_rate": 1.0692633912674941e-05, + "loss": 0.5201, + "step": 34866 + }, + { + "epoch": 0.9573585941790225, + "grad_norm": 0.3597395420074463, + "learning_rate": 1.0692203055852535e-05, + "loss": 0.5286, + "step": 34867 + }, + { + "epoch": 0.957386051619989, + "grad_norm": 0.4124549329280853, + "learning_rate": 1.0691772197738952e-05, + "loss": 0.4498, + "step": 34868 + }, + { + "epoch": 0.9574135090609556, + "grad_norm": 0.4075019955635071, + "learning_rate": 1.0691341338334989e-05, + "loss": 0.5651, + "step": 34869 + }, + { + "epoch": 0.957440966501922, + "grad_norm": 0.4862573742866516, + "learning_rate": 1.0690910477641456e-05, + "loss": 0.5161, + "step": 34870 + }, + { + "epoch": 0.9574684239428886, + "grad_norm": 0.34486445784568787, + "learning_rate": 1.0690479615659153e-05, + "loss": 0.4252, + "step": 34871 + }, + { + "epoch": 0.957495881383855, + "grad_norm": 0.3771526515483856, + "learning_rate": 1.0690048752388886e-05, + "loss": 0.5074, + "step": 34872 + }, + { + "epoch": 0.9575233388248215, + "grad_norm": 0.34537914395332336, + "learning_rate": 1.068961788783146e-05, + "loss": 0.4176, + "step": 34873 + }, + { + "epoch": 0.957550796265788, + "grad_norm": 0.39992812275886536, + "learning_rate": 1.0689187021987673e-05, + "loss": 0.457, + "step": 34874 + }, + { + "epoch": 0.9575782537067545, + "grad_norm": 0.43850424885749817, + "learning_rate": 1.0688756154858336e-05, + "loss": 0.5059, + "step": 34875 + }, + { + "epoch": 0.9576057111477211, + "grad_norm": 0.42774802446365356, + "learning_rate": 1.0688325286444248e-05, + "loss": 0.4925, + "step": 34876 + }, + { + "epoch": 0.9576331685886875, + "grad_norm": 0.37670019268989563, + "learning_rate": 1.0687894416746209e-05, + "loss": 0.4339, + "step": 34877 + }, + { + "epoch": 0.9576606260296541, + "grad_norm": 0.5222710967063904, + "learning_rate": 1.0687463545765033e-05, + "loss": 0.3915, + "step": 34878 + }, + { + "epoch": 0.9576880834706205, + "grad_norm": 0.35696977376937866, + "learning_rate": 1.0687032673501514e-05, + "loss": 0.5217, + "step": 34879 + }, + { + "epoch": 0.9577155409115871, + "grad_norm": 0.3915060758590698, + "learning_rate": 1.0686601799956462e-05, + "loss": 0.4881, + "step": 34880 + }, + { + "epoch": 0.9577429983525535, + "grad_norm": 0.4251434803009033, + "learning_rate": 1.0686170925130678e-05, + "loss": 0.4153, + "step": 34881 + }, + { + "epoch": 0.95777045579352, + "grad_norm": 0.3931671679019928, + "learning_rate": 1.0685740049024967e-05, + "loss": 0.496, + "step": 34882 + }, + { + "epoch": 0.9577979132344866, + "grad_norm": 0.44885900616645813, + "learning_rate": 1.0685309171640133e-05, + "loss": 0.4612, + "step": 34883 + }, + { + "epoch": 0.957825370675453, + "grad_norm": 0.4375171959400177, + "learning_rate": 1.0684878292976975e-05, + "loss": 0.4914, + "step": 34884 + }, + { + "epoch": 0.9578528281164196, + "grad_norm": 0.370837926864624, + "learning_rate": 1.0684447413036303e-05, + "loss": 0.4798, + "step": 34885 + }, + { + "epoch": 0.957880285557386, + "grad_norm": 0.41526880860328674, + "learning_rate": 1.068401653181892e-05, + "loss": 0.4982, + "step": 34886 + }, + { + "epoch": 0.9579077429983526, + "grad_norm": 0.4293258488178253, + "learning_rate": 1.0683585649325623e-05, + "loss": 0.5682, + "step": 34887 + }, + { + "epoch": 0.957935200439319, + "grad_norm": 0.357089638710022, + "learning_rate": 1.0683154765557223e-05, + "loss": 0.488, + "step": 34888 + }, + { + "epoch": 0.9579626578802856, + "grad_norm": 0.43192946910858154, + "learning_rate": 1.0682723880514523e-05, + "loss": 0.4487, + "step": 34889 + }, + { + "epoch": 0.9579901153212521, + "grad_norm": 0.3651573657989502, + "learning_rate": 1.0682292994198323e-05, + "loss": 0.4809, + "step": 34890 + }, + { + "epoch": 0.9580175727622185, + "grad_norm": 0.3752998411655426, + "learning_rate": 1.0681862106609428e-05, + "loss": 0.5925, + "step": 34891 + }, + { + "epoch": 0.9580450302031851, + "grad_norm": 0.4575667977333069, + "learning_rate": 1.0681431217748644e-05, + "loss": 0.4078, + "step": 34892 + }, + { + "epoch": 0.9580724876441515, + "grad_norm": 0.3816748559474945, + "learning_rate": 1.0681000327616772e-05, + "loss": 0.4645, + "step": 34893 + }, + { + "epoch": 0.9580999450851181, + "grad_norm": 0.4466944634914398, + "learning_rate": 1.0680569436214618e-05, + "loss": 0.51, + "step": 34894 + }, + { + "epoch": 0.9581274025260845, + "grad_norm": 0.370303750038147, + "learning_rate": 1.0680138543542986e-05, + "loss": 0.4647, + "step": 34895 + }, + { + "epoch": 0.9581548599670511, + "grad_norm": 0.5035239458084106, + "learning_rate": 1.0679707649602677e-05, + "loss": 0.4758, + "step": 34896 + }, + { + "epoch": 0.9581823174080176, + "grad_norm": 0.3818584084510803, + "learning_rate": 1.0679276754394496e-05, + "loss": 0.4845, + "step": 34897 + }, + { + "epoch": 0.9582097748489841, + "grad_norm": 0.3622732162475586, + "learning_rate": 1.0678845857919248e-05, + "loss": 0.5461, + "step": 34898 + }, + { + "epoch": 0.9582372322899506, + "grad_norm": 0.3862021267414093, + "learning_rate": 1.0678414960177734e-05, + "loss": 0.4556, + "step": 34899 + }, + { + "epoch": 0.958264689730917, + "grad_norm": 0.4323843717575073, + "learning_rate": 1.0677984061170762e-05, + "loss": 0.6047, + "step": 34900 + }, + { + "epoch": 0.9582921471718836, + "grad_norm": 0.49893391132354736, + "learning_rate": 1.0677553160899135e-05, + "loss": 0.5005, + "step": 34901 + }, + { + "epoch": 0.95831960461285, + "grad_norm": 0.406418114900589, + "learning_rate": 1.067712225936365e-05, + "loss": 0.4594, + "step": 34902 + }, + { + "epoch": 0.9583470620538166, + "grad_norm": 0.39285388588905334, + "learning_rate": 1.0676691356565119e-05, + "loss": 0.4661, + "step": 34903 + }, + { + "epoch": 0.9583745194947831, + "grad_norm": 0.4982675313949585, + "learning_rate": 1.0676260452504342e-05, + "loss": 0.4923, + "step": 34904 + }, + { + "epoch": 0.9584019769357496, + "grad_norm": 0.44003966450691223, + "learning_rate": 1.0675829547182123e-05, + "loss": 0.5194, + "step": 34905 + }, + { + "epoch": 0.9584294343767161, + "grad_norm": 0.4342060983181, + "learning_rate": 1.0675398640599269e-05, + "loss": 0.4561, + "step": 34906 + }, + { + "epoch": 0.9584568918176826, + "grad_norm": 0.3819846212863922, + "learning_rate": 1.0674967732756576e-05, + "loss": 0.5379, + "step": 34907 + }, + { + "epoch": 0.9584843492586491, + "grad_norm": 0.36857354640960693, + "learning_rate": 1.0674536823654856e-05, + "loss": 0.459, + "step": 34908 + }, + { + "epoch": 0.9585118066996156, + "grad_norm": 0.47545304894447327, + "learning_rate": 1.0674105913294907e-05, + "loss": 0.5873, + "step": 34909 + }, + { + "epoch": 0.9585392641405821, + "grad_norm": 0.4128669202327728, + "learning_rate": 1.067367500167754e-05, + "loss": 0.4599, + "step": 34910 + }, + { + "epoch": 0.9585667215815487, + "grad_norm": 0.36019137501716614, + "learning_rate": 1.067324408880355e-05, + "loss": 0.4215, + "step": 34911 + }, + { + "epoch": 0.9585941790225151, + "grad_norm": 0.4185422658920288, + "learning_rate": 1.0672813174673746e-05, + "loss": 0.4546, + "step": 34912 + }, + { + "epoch": 0.9586216364634816, + "grad_norm": 0.4084666073322296, + "learning_rate": 1.0672382259288932e-05, + "loss": 0.5096, + "step": 34913 + }, + { + "epoch": 0.9586490939044481, + "grad_norm": 0.4329037666320801, + "learning_rate": 1.067195134264991e-05, + "loss": 0.5479, + "step": 34914 + }, + { + "epoch": 0.9586765513454146, + "grad_norm": 0.39354780316352844, + "learning_rate": 1.0671520424757483e-05, + "loss": 0.4783, + "step": 34915 + }, + { + "epoch": 0.9587040087863811, + "grad_norm": 0.38188549876213074, + "learning_rate": 1.0671089505612455e-05, + "loss": 0.4903, + "step": 34916 + }, + { + "epoch": 0.9587314662273476, + "grad_norm": 0.4154590666294098, + "learning_rate": 1.0670658585215634e-05, + "loss": 0.4761, + "step": 34917 + }, + { + "epoch": 0.9587589236683142, + "grad_norm": 0.39028167724609375, + "learning_rate": 1.067022766356782e-05, + "loss": 0.5124, + "step": 34918 + }, + { + "epoch": 0.9587863811092806, + "grad_norm": 0.3643805980682373, + "learning_rate": 1.0669796740669815e-05, + "loss": 0.4813, + "step": 34919 + }, + { + "epoch": 0.9588138385502472, + "grad_norm": 0.3959442973136902, + "learning_rate": 1.0669365816522428e-05, + "loss": 0.516, + "step": 34920 + }, + { + "epoch": 0.9588412959912136, + "grad_norm": 0.41326841711997986, + "learning_rate": 1.0668934891126459e-05, + "loss": 0.501, + "step": 34921 + }, + { + "epoch": 0.9588687534321801, + "grad_norm": 0.4199479818344116, + "learning_rate": 1.0668503964482715e-05, + "loss": 0.5567, + "step": 34922 + }, + { + "epoch": 0.9588962108731466, + "grad_norm": 0.3919658660888672, + "learning_rate": 1.0668073036591994e-05, + "loss": 0.4702, + "step": 34923 + }, + { + "epoch": 0.9589236683141131, + "grad_norm": 0.3695708215236664, + "learning_rate": 1.0667642107455105e-05, + "loss": 0.4946, + "step": 34924 + }, + { + "epoch": 0.9589511257550797, + "grad_norm": 0.40789997577667236, + "learning_rate": 1.0667211177072854e-05, + "loss": 0.5156, + "step": 34925 + }, + { + "epoch": 0.9589785831960461, + "grad_norm": 0.38275912404060364, + "learning_rate": 1.0666780245446036e-05, + "loss": 0.4477, + "step": 34926 + }, + { + "epoch": 0.9590060406370127, + "grad_norm": 0.5019537806510925, + "learning_rate": 1.0666349312575463e-05, + "loss": 0.5935, + "step": 34927 + }, + { + "epoch": 0.9590334980779791, + "grad_norm": 0.42169657349586487, + "learning_rate": 1.0665918378461932e-05, + "loss": 0.4904, + "step": 34928 + }, + { + "epoch": 0.9590609555189457, + "grad_norm": 0.39031538367271423, + "learning_rate": 1.0665487443106254e-05, + "loss": 0.4719, + "step": 34929 + }, + { + "epoch": 0.9590884129599121, + "grad_norm": 0.40948596596717834, + "learning_rate": 1.0665056506509233e-05, + "loss": 0.3821, + "step": 34930 + }, + { + "epoch": 0.9591158704008786, + "grad_norm": 0.45648789405822754, + "learning_rate": 1.0664625568671662e-05, + "loss": 0.5657, + "step": 34931 + }, + { + "epoch": 0.9591433278418452, + "grad_norm": 0.3506152927875519, + "learning_rate": 1.0664194629594357e-05, + "loss": 0.506, + "step": 34932 + }, + { + "epoch": 0.9591707852828116, + "grad_norm": 0.47944191098213196, + "learning_rate": 1.0663763689278114e-05, + "loss": 0.4945, + "step": 34933 + }, + { + "epoch": 0.9591982427237782, + "grad_norm": 0.4017898738384247, + "learning_rate": 1.0663332747723744e-05, + "loss": 0.4702, + "step": 34934 + }, + { + "epoch": 0.9592257001647446, + "grad_norm": 0.3807298243045807, + "learning_rate": 1.0662901804932044e-05, + "loss": 0.4459, + "step": 34935 + }, + { + "epoch": 0.9592531576057112, + "grad_norm": 0.4639369547367096, + "learning_rate": 1.0662470860903821e-05, + "loss": 0.4881, + "step": 34936 + }, + { + "epoch": 0.9592806150466776, + "grad_norm": 0.3568728566169739, + "learning_rate": 1.0662039915639879e-05, + "loss": 0.4172, + "step": 34937 + }, + { + "epoch": 0.9593080724876442, + "grad_norm": 0.36428168416023254, + "learning_rate": 1.0661608969141022e-05, + "loss": 0.4858, + "step": 34938 + }, + { + "epoch": 0.9593355299286107, + "grad_norm": 0.39018502831459045, + "learning_rate": 1.066117802140805e-05, + "loss": 0.4766, + "step": 34939 + }, + { + "epoch": 0.9593629873695771, + "grad_norm": 0.4842870533466339, + "learning_rate": 1.0660747072441772e-05, + "loss": 0.4549, + "step": 34940 + }, + { + "epoch": 0.9593904448105437, + "grad_norm": 0.4068223834037781, + "learning_rate": 1.066031612224299e-05, + "loss": 0.4995, + "step": 34941 + }, + { + "epoch": 0.9594179022515101, + "grad_norm": 0.4166308641433716, + "learning_rate": 1.0659885170812507e-05, + "loss": 0.5024, + "step": 34942 + }, + { + "epoch": 0.9594453596924767, + "grad_norm": 0.4557074010372162, + "learning_rate": 1.065945421815113e-05, + "loss": 0.4655, + "step": 34943 + }, + { + "epoch": 0.9594728171334431, + "grad_norm": 0.38663241267204285, + "learning_rate": 1.0659023264259655e-05, + "loss": 0.4498, + "step": 34944 + }, + { + "epoch": 0.9595002745744097, + "grad_norm": 0.36388587951660156, + "learning_rate": 1.0658592309138895e-05, + "loss": 0.4899, + "step": 34945 + }, + { + "epoch": 0.9595277320153762, + "grad_norm": 0.40665486454963684, + "learning_rate": 1.0658161352789648e-05, + "loss": 0.5525, + "step": 34946 + }, + { + "epoch": 0.9595551894563427, + "grad_norm": 0.4209216833114624, + "learning_rate": 1.0657730395212722e-05, + "loss": 0.5322, + "step": 34947 + }, + { + "epoch": 0.9595826468973092, + "grad_norm": 0.431682288646698, + "learning_rate": 1.0657299436408921e-05, + "loss": 0.4703, + "step": 34948 + }, + { + "epoch": 0.9596101043382756, + "grad_norm": 0.33075401186943054, + "learning_rate": 1.0656868476379043e-05, + "loss": 0.4127, + "step": 34949 + }, + { + "epoch": 0.9596375617792422, + "grad_norm": 0.7805789113044739, + "learning_rate": 1.0656437515123896e-05, + "loss": 0.5576, + "step": 34950 + }, + { + "epoch": 0.9596650192202086, + "grad_norm": 0.36675503849983215, + "learning_rate": 1.0656006552644287e-05, + "loss": 0.4508, + "step": 34951 + }, + { + "epoch": 0.9596924766611752, + "grad_norm": 0.398968368768692, + "learning_rate": 1.0655575588941012e-05, + "loss": 0.5475, + "step": 34952 + }, + { + "epoch": 0.9597199341021416, + "grad_norm": 0.3925941288471222, + "learning_rate": 1.0655144624014882e-05, + "loss": 0.4807, + "step": 34953 + }, + { + "epoch": 0.9597473915431082, + "grad_norm": 0.37263157963752747, + "learning_rate": 1.0654713657866696e-05, + "loss": 0.4413, + "step": 34954 + }, + { + "epoch": 0.9597748489840747, + "grad_norm": 0.35503727197647095, + "learning_rate": 1.0654282690497262e-05, + "loss": 0.4794, + "step": 34955 + }, + { + "epoch": 0.9598023064250412, + "grad_norm": 0.34454530477523804, + "learning_rate": 1.0653851721907383e-05, + "loss": 0.4516, + "step": 34956 + }, + { + "epoch": 0.9598297638660077, + "grad_norm": 0.5871736407279968, + "learning_rate": 1.0653420752097859e-05, + "loss": 0.4686, + "step": 34957 + }, + { + "epoch": 0.9598572213069742, + "grad_norm": 0.3943098783493042, + "learning_rate": 1.0652989781069499e-05, + "loss": 0.469, + "step": 34958 + }, + { + "epoch": 0.9598846787479407, + "grad_norm": 0.36203649640083313, + "learning_rate": 1.0652558808823102e-05, + "loss": 0.4276, + "step": 34959 + }, + { + "epoch": 0.9599121361889071, + "grad_norm": 0.4105449318885803, + "learning_rate": 1.0652127835359476e-05, + "loss": 0.5127, + "step": 34960 + }, + { + "epoch": 0.9599395936298737, + "grad_norm": 0.45714834332466125, + "learning_rate": 1.0651696860679426e-05, + "loss": 0.5721, + "step": 34961 + }, + { + "epoch": 0.9599670510708402, + "grad_norm": 0.42296817898750305, + "learning_rate": 1.0651265884783747e-05, + "loss": 0.5012, + "step": 34962 + }, + { + "epoch": 0.9599945085118067, + "grad_norm": 0.35250380635261536, + "learning_rate": 1.0650834907673255e-05, + "loss": 0.45, + "step": 34963 + }, + { + "epoch": 0.9600219659527732, + "grad_norm": 0.36401161551475525, + "learning_rate": 1.0650403929348745e-05, + "loss": 0.4194, + "step": 34964 + }, + { + "epoch": 0.9600494233937397, + "grad_norm": 0.42181822657585144, + "learning_rate": 1.0649972949811027e-05, + "loss": 0.4733, + "step": 34965 + }, + { + "epoch": 0.9600768808347062, + "grad_norm": 0.4121161103248596, + "learning_rate": 1.0649541969060902e-05, + "loss": 0.4471, + "step": 34966 + }, + { + "epoch": 0.9601043382756727, + "grad_norm": 0.41662853956222534, + "learning_rate": 1.064911098709917e-05, + "loss": 0.5157, + "step": 34967 + }, + { + "epoch": 0.9601317957166392, + "grad_norm": 0.7949236631393433, + "learning_rate": 1.0648680003926642e-05, + "loss": 0.4322, + "step": 34968 + }, + { + "epoch": 0.9601592531576058, + "grad_norm": 0.38606762886047363, + "learning_rate": 1.064824901954412e-05, + "loss": 0.5529, + "step": 34969 + }, + { + "epoch": 0.9601867105985722, + "grad_norm": 0.43026822805404663, + "learning_rate": 1.0647818033952402e-05, + "loss": 0.5097, + "step": 34970 + }, + { + "epoch": 0.9602141680395387, + "grad_norm": 0.37150654196739197, + "learning_rate": 1.06473870471523e-05, + "loss": 0.5578, + "step": 34971 + }, + { + "epoch": 0.9602416254805052, + "grad_norm": 0.5877676606178284, + "learning_rate": 1.0646956059144614e-05, + "loss": 0.5176, + "step": 34972 + }, + { + "epoch": 0.9602690829214717, + "grad_norm": 0.41161447763442993, + "learning_rate": 1.0646525069930146e-05, + "loss": 0.46, + "step": 34973 + }, + { + "epoch": 0.9602965403624382, + "grad_norm": 0.9766450524330139, + "learning_rate": 1.0646094079509709e-05, + "loss": 0.5327, + "step": 34974 + }, + { + "epoch": 0.9603239978034047, + "grad_norm": 0.38749200105667114, + "learning_rate": 1.0645663087884092e-05, + "loss": 0.4959, + "step": 34975 + }, + { + "epoch": 0.9603514552443713, + "grad_norm": 0.47836920619010925, + "learning_rate": 1.0645232095054114e-05, + "loss": 0.471, + "step": 34976 + }, + { + "epoch": 0.9603789126853377, + "grad_norm": 0.354869544506073, + "learning_rate": 1.0644801101020567e-05, + "loss": 0.4321, + "step": 34977 + }, + { + "epoch": 0.9604063701263043, + "grad_norm": 0.45461058616638184, + "learning_rate": 1.0644370105784263e-05, + "loss": 0.4931, + "step": 34978 + }, + { + "epoch": 0.9604338275672707, + "grad_norm": 0.4363061785697937, + "learning_rate": 1.0643939109346004e-05, + "loss": 0.5794, + "step": 34979 + }, + { + "epoch": 0.9604612850082372, + "grad_norm": 0.39782387018203735, + "learning_rate": 1.064350811170659e-05, + "loss": 0.4233, + "step": 34980 + }, + { + "epoch": 0.9604887424492037, + "grad_norm": 0.4188481867313385, + "learning_rate": 1.064307711286683e-05, + "loss": 0.544, + "step": 34981 + }, + { + "epoch": 0.9605161998901702, + "grad_norm": 0.47666648030281067, + "learning_rate": 1.0642646112827527e-05, + "loss": 0.4657, + "step": 34982 + }, + { + "epoch": 0.9605436573311368, + "grad_norm": 0.4199239909648895, + "learning_rate": 1.0642215111589483e-05, + "loss": 0.5658, + "step": 34983 + }, + { + "epoch": 0.9605711147721032, + "grad_norm": 0.3540232479572296, + "learning_rate": 1.0641784109153504e-05, + "loss": 0.4458, + "step": 34984 + }, + { + "epoch": 0.9605985722130698, + "grad_norm": 0.3639153838157654, + "learning_rate": 1.0641353105520389e-05, + "loss": 0.4236, + "step": 34985 + }, + { + "epoch": 0.9606260296540362, + "grad_norm": 0.4380652904510498, + "learning_rate": 1.0640922100690949e-05, + "loss": 0.409, + "step": 34986 + }, + { + "epoch": 0.9606534870950028, + "grad_norm": 0.3747994005680084, + "learning_rate": 1.0640491094665984e-05, + "loss": 0.4352, + "step": 34987 + }, + { + "epoch": 0.9606809445359692, + "grad_norm": 0.43306323885917664, + "learning_rate": 1.0640060087446298e-05, + "loss": 0.5169, + "step": 34988 + }, + { + "epoch": 0.9607084019769357, + "grad_norm": 0.43436717987060547, + "learning_rate": 1.0639629079032695e-05, + "loss": 0.4293, + "step": 34989 + }, + { + "epoch": 0.9607358594179023, + "grad_norm": 0.40504080057144165, + "learning_rate": 1.063919806942598e-05, + "loss": 0.5101, + "step": 34990 + }, + { + "epoch": 0.9607633168588687, + "grad_norm": 0.40714162588119507, + "learning_rate": 1.0638767058626957e-05, + "loss": 0.4652, + "step": 34991 + }, + { + "epoch": 0.9607907742998353, + "grad_norm": 0.4359044134616852, + "learning_rate": 1.0638336046636433e-05, + "loss": 0.4639, + "step": 34992 + }, + { + "epoch": 0.9608182317408017, + "grad_norm": 0.4037801921367645, + "learning_rate": 1.0637905033455203e-05, + "loss": 0.4759, + "step": 34993 + }, + { + "epoch": 0.9608456891817683, + "grad_norm": 0.4182799458503723, + "learning_rate": 1.0637474019084079e-05, + "loss": 0.3785, + "step": 34994 + }, + { + "epoch": 0.9608731466227347, + "grad_norm": 0.40907973051071167, + "learning_rate": 1.0637043003523861e-05, + "loss": 0.5232, + "step": 34995 + }, + { + "epoch": 0.9609006040637013, + "grad_norm": 0.3561069965362549, + "learning_rate": 1.0636611986775358e-05, + "loss": 0.4887, + "step": 34996 + }, + { + "epoch": 0.9609280615046678, + "grad_norm": 0.35056272149086, + "learning_rate": 1.0636180968839368e-05, + "loss": 0.4124, + "step": 34997 + }, + { + "epoch": 0.9609555189456342, + "grad_norm": 0.45574283599853516, + "learning_rate": 1.0635749949716698e-05, + "loss": 0.5439, + "step": 34998 + }, + { + "epoch": 0.9609829763866008, + "grad_norm": 0.40283820033073425, + "learning_rate": 1.0635318929408152e-05, + "loss": 0.4661, + "step": 34999 + }, + { + "epoch": 0.9610104338275672, + "grad_norm": 0.38372161984443665, + "learning_rate": 1.0634887907914533e-05, + "loss": 0.4803, + "step": 35000 + }, + { + "epoch": 0.9610378912685338, + "grad_norm": 0.37232908606529236, + "learning_rate": 1.0634456885236643e-05, + "loss": 0.4327, + "step": 35001 + }, + { + "epoch": 0.9610653487095002, + "grad_norm": 0.4131854176521301, + "learning_rate": 1.0634025861375292e-05, + "loss": 0.5196, + "step": 35002 + }, + { + "epoch": 0.9610928061504668, + "grad_norm": 0.38301974534988403, + "learning_rate": 1.0633594836331278e-05, + "loss": 0.4645, + "step": 35003 + }, + { + "epoch": 0.9611202635914333, + "grad_norm": 0.3789653778076172, + "learning_rate": 1.063316381010541e-05, + "loss": 0.5257, + "step": 35004 + }, + { + "epoch": 0.9611477210323998, + "grad_norm": 0.4055803120136261, + "learning_rate": 1.0632732782698487e-05, + "loss": 0.5063, + "step": 35005 + }, + { + "epoch": 0.9611751784733663, + "grad_norm": 0.3278353810310364, + "learning_rate": 1.0632301754111315e-05, + "loss": 0.4184, + "step": 35006 + }, + { + "epoch": 0.9612026359143327, + "grad_norm": 0.3692885935306549, + "learning_rate": 1.0631870724344699e-05, + "loss": 0.4538, + "step": 35007 + }, + { + "epoch": 0.9612300933552993, + "grad_norm": 0.37450674176216125, + "learning_rate": 1.0631439693399444e-05, + "loss": 0.5461, + "step": 35008 + }, + { + "epoch": 0.9612575507962657, + "grad_norm": 0.42019468545913696, + "learning_rate": 1.0631008661276352e-05, + "loss": 0.5064, + "step": 35009 + }, + { + "epoch": 0.9612850082372323, + "grad_norm": 0.3656025826931, + "learning_rate": 1.0630577627976226e-05, + "loss": 0.4422, + "step": 35010 + }, + { + "epoch": 0.9613124656781988, + "grad_norm": 0.3657659590244293, + "learning_rate": 1.0630146593499873e-05, + "loss": 0.3879, + "step": 35011 + }, + { + "epoch": 0.9613399231191653, + "grad_norm": 0.37266483902931213, + "learning_rate": 1.0629715557848095e-05, + "loss": 0.3717, + "step": 35012 + }, + { + "epoch": 0.9613673805601318, + "grad_norm": 0.3925817608833313, + "learning_rate": 1.0629284521021697e-05, + "loss": 0.5479, + "step": 35013 + }, + { + "epoch": 0.9613948380010983, + "grad_norm": 0.39141324162483215, + "learning_rate": 1.0628853483021479e-05, + "loss": 0.455, + "step": 35014 + }, + { + "epoch": 0.9614222954420648, + "grad_norm": 0.44831138849258423, + "learning_rate": 1.0628422443848254e-05, + "loss": 0.4602, + "step": 35015 + }, + { + "epoch": 0.9614497528830313, + "grad_norm": 0.4011302590370178, + "learning_rate": 1.0627991403502816e-05, + "loss": 0.5449, + "step": 35016 + }, + { + "epoch": 0.9614772103239978, + "grad_norm": 0.39860227704048157, + "learning_rate": 1.0627560361985976e-05, + "loss": 0.4135, + "step": 35017 + }, + { + "epoch": 0.9615046677649643, + "grad_norm": 0.4384708106517792, + "learning_rate": 1.0627129319298537e-05, + "loss": 0.5327, + "step": 35018 + }, + { + "epoch": 0.9615321252059308, + "grad_norm": 0.42406749725341797, + "learning_rate": 1.0626698275441298e-05, + "loss": 0.5896, + "step": 35019 + }, + { + "epoch": 0.9615595826468973, + "grad_norm": 0.5301697850227356, + "learning_rate": 1.0626267230415071e-05, + "loss": 0.5204, + "step": 35020 + }, + { + "epoch": 0.9615870400878638, + "grad_norm": 0.3955535590648651, + "learning_rate": 1.062583618422065e-05, + "loss": 0.576, + "step": 35021 + }, + { + "epoch": 0.9616144975288303, + "grad_norm": 0.4350195527076721, + "learning_rate": 1.0625405136858849e-05, + "loss": 0.4935, + "step": 35022 + }, + { + "epoch": 0.9616419549697968, + "grad_norm": 0.4172796905040741, + "learning_rate": 1.0624974088330469e-05, + "loss": 0.4573, + "step": 35023 + }, + { + "epoch": 0.9616694124107633, + "grad_norm": 0.3821723461151123, + "learning_rate": 1.0624543038636307e-05, + "loss": 0.4419, + "step": 35024 + }, + { + "epoch": 0.9616968698517299, + "grad_norm": 0.4648793935775757, + "learning_rate": 1.0624111987777179e-05, + "loss": 0.5604, + "step": 35025 + }, + { + "epoch": 0.9617243272926963, + "grad_norm": 0.3987381160259247, + "learning_rate": 1.062368093575388e-05, + "loss": 0.5309, + "step": 35026 + }, + { + "epoch": 0.9617517847336629, + "grad_norm": 0.36720743775367737, + "learning_rate": 1.0623249882567217e-05, + "loss": 0.4276, + "step": 35027 + }, + { + "epoch": 0.9617792421746293, + "grad_norm": 0.4243626296520233, + "learning_rate": 1.0622818828217996e-05, + "loss": 0.5121, + "step": 35028 + }, + { + "epoch": 0.9618066996155958, + "grad_norm": 0.4060686528682709, + "learning_rate": 1.0622387772707017e-05, + "loss": 0.4381, + "step": 35029 + }, + { + "epoch": 0.9618341570565623, + "grad_norm": 0.38944101333618164, + "learning_rate": 1.0621956716035086e-05, + "loss": 0.4591, + "step": 35030 + }, + { + "epoch": 0.9618616144975288, + "grad_norm": 0.4655141830444336, + "learning_rate": 1.0621525658203011e-05, + "loss": 0.4465, + "step": 35031 + }, + { + "epoch": 0.9618890719384954, + "grad_norm": 0.40796688199043274, + "learning_rate": 1.0621094599211587e-05, + "loss": 0.5322, + "step": 35032 + }, + { + "epoch": 0.9619165293794618, + "grad_norm": 0.3869318962097168, + "learning_rate": 1.0620663539061626e-05, + "loss": 0.454, + "step": 35033 + }, + { + "epoch": 0.9619439868204284, + "grad_norm": 0.352305144071579, + "learning_rate": 1.0620232477753929e-05, + "loss": 0.5454, + "step": 35034 + }, + { + "epoch": 0.9619714442613948, + "grad_norm": 0.429841548204422, + "learning_rate": 1.06198014152893e-05, + "loss": 0.5174, + "step": 35035 + }, + { + "epoch": 0.9619989017023614, + "grad_norm": 0.39434313774108887, + "learning_rate": 1.0619370351668545e-05, + "loss": 0.5338, + "step": 35036 + }, + { + "epoch": 0.9620263591433278, + "grad_norm": 0.41424140334129333, + "learning_rate": 1.0618939286892464e-05, + "loss": 0.5293, + "step": 35037 + }, + { + "epoch": 0.9620538165842943, + "grad_norm": 0.40168675780296326, + "learning_rate": 1.0618508220961867e-05, + "loss": 0.5343, + "step": 35038 + }, + { + "epoch": 0.9620812740252609, + "grad_norm": 0.4102764427661896, + "learning_rate": 1.0618077153877553e-05, + "loss": 0.5074, + "step": 35039 + }, + { + "epoch": 0.9621087314662273, + "grad_norm": 0.34754207730293274, + "learning_rate": 1.0617646085640329e-05, + "loss": 0.3947, + "step": 35040 + }, + { + "epoch": 0.9621361889071939, + "grad_norm": 0.40536993741989136, + "learning_rate": 1.0617215016250996e-05, + "loss": 0.4384, + "step": 35041 + }, + { + "epoch": 0.9621636463481603, + "grad_norm": 0.429420530796051, + "learning_rate": 1.0616783945710361e-05, + "loss": 0.4603, + "step": 35042 + }, + { + "epoch": 0.9621911037891269, + "grad_norm": 0.37821823358535767, + "learning_rate": 1.061635287401923e-05, + "loss": 0.49, + "step": 35043 + }, + { + "epoch": 0.9622185612300933, + "grad_norm": 0.4219171702861786, + "learning_rate": 1.0615921801178399e-05, + "loss": 0.5034, + "step": 35044 + }, + { + "epoch": 0.9622460186710599, + "grad_norm": 0.5050997138023376, + "learning_rate": 1.061549072718868e-05, + "loss": 0.5042, + "step": 35045 + }, + { + "epoch": 0.9622734761120264, + "grad_norm": 0.41063323616981506, + "learning_rate": 1.0615059652050874e-05, + "loss": 0.5285, + "step": 35046 + }, + { + "epoch": 0.9623009335529928, + "grad_norm": 0.3942725658416748, + "learning_rate": 1.0614628575765784e-05, + "loss": 0.4923, + "step": 35047 + }, + { + "epoch": 0.9623283909939594, + "grad_norm": 0.37701573967933655, + "learning_rate": 1.0614197498334217e-05, + "loss": 0.3617, + "step": 35048 + }, + { + "epoch": 0.9623558484349258, + "grad_norm": 0.38888129591941833, + "learning_rate": 1.0613766419756977e-05, + "loss": 0.4445, + "step": 35049 + }, + { + "epoch": 0.9623833058758924, + "grad_norm": 0.39639461040496826, + "learning_rate": 1.0613335340034864e-05, + "loss": 0.4896, + "step": 35050 + }, + { + "epoch": 0.9624107633168588, + "grad_norm": 0.46504074335098267, + "learning_rate": 1.0612904259168686e-05, + "loss": 0.5968, + "step": 35051 + }, + { + "epoch": 0.9624382207578254, + "grad_norm": 0.3956160545349121, + "learning_rate": 1.0612473177159246e-05, + "loss": 0.5043, + "step": 35052 + }, + { + "epoch": 0.9624656781987919, + "grad_norm": 0.35144513845443726, + "learning_rate": 1.061204209400735e-05, + "loss": 0.3847, + "step": 35053 + }, + { + "epoch": 0.9624931356397584, + "grad_norm": 0.38380151987075806, + "learning_rate": 1.0611611009713798e-05, + "loss": 0.5894, + "step": 35054 + }, + { + "epoch": 0.9625205930807249, + "grad_norm": 0.395063579082489, + "learning_rate": 1.0611179924279396e-05, + "loss": 0.517, + "step": 35055 + }, + { + "epoch": 0.9625480505216913, + "grad_norm": 0.3883573114871979, + "learning_rate": 1.0610748837704947e-05, + "loss": 0.5131, + "step": 35056 + }, + { + "epoch": 0.9625755079626579, + "grad_norm": 0.36344966292381287, + "learning_rate": 1.061031774999126e-05, + "loss": 0.4635, + "step": 35057 + }, + { + "epoch": 0.9626029654036243, + "grad_norm": 0.6213597059249878, + "learning_rate": 1.0609886661139132e-05, + "loss": 0.5232, + "step": 35058 + }, + { + "epoch": 0.9626304228445909, + "grad_norm": 0.39458954334259033, + "learning_rate": 1.0609455571149374e-05, + "loss": 0.5304, + "step": 35059 + }, + { + "epoch": 0.9626578802855574, + "grad_norm": 0.39464592933654785, + "learning_rate": 1.0609024480022785e-05, + "loss": 0.4731, + "step": 35060 + }, + { + "epoch": 0.9626853377265239, + "grad_norm": 0.36968302726745605, + "learning_rate": 1.0608593387760172e-05, + "loss": 0.4203, + "step": 35061 + }, + { + "epoch": 0.9627127951674904, + "grad_norm": 0.3899702727794647, + "learning_rate": 1.0608162294362339e-05, + "loss": 0.5298, + "step": 35062 + }, + { + "epoch": 0.9627402526084569, + "grad_norm": 0.5001720786094666, + "learning_rate": 1.0607731199830087e-05, + "loss": 0.4226, + "step": 35063 + }, + { + "epoch": 0.9627677100494234, + "grad_norm": 0.40031319856643677, + "learning_rate": 1.0607300104164221e-05, + "loss": 0.5354, + "step": 35064 + }, + { + "epoch": 0.9627951674903898, + "grad_norm": 0.38719120621681213, + "learning_rate": 1.060686900736555e-05, + "loss": 0.4466, + "step": 35065 + }, + { + "epoch": 0.9628226249313564, + "grad_norm": 0.46716395020484924, + "learning_rate": 1.0606437909434873e-05, + "loss": 0.5345, + "step": 35066 + }, + { + "epoch": 0.962850082372323, + "grad_norm": 0.39573055505752563, + "learning_rate": 1.0606006810372997e-05, + "loss": 0.4312, + "step": 35067 + }, + { + "epoch": 0.9628775398132894, + "grad_norm": 0.35435113310813904, + "learning_rate": 1.0605575710180723e-05, + "loss": 0.4688, + "step": 35068 + }, + { + "epoch": 0.9629049972542559, + "grad_norm": 0.3999537527561188, + "learning_rate": 1.0605144608858858e-05, + "loss": 0.4927, + "step": 35069 + }, + { + "epoch": 0.9629324546952224, + "grad_norm": 0.37166735529899597, + "learning_rate": 1.0604713506408205e-05, + "loss": 0.471, + "step": 35070 + }, + { + "epoch": 0.9629599121361889, + "grad_norm": 0.35434702038764954, + "learning_rate": 1.060428240282957e-05, + "loss": 0.4971, + "step": 35071 + }, + { + "epoch": 0.9629873695771554, + "grad_norm": 0.3562745153903961, + "learning_rate": 1.0603851298123754e-05, + "loss": 0.4535, + "step": 35072 + }, + { + "epoch": 0.9630148270181219, + "grad_norm": 0.3910257816314697, + "learning_rate": 1.060342019229156e-05, + "loss": 0.4734, + "step": 35073 + }, + { + "epoch": 0.9630422844590885, + "grad_norm": 0.38433828949928284, + "learning_rate": 1.0602989085333797e-05, + "loss": 0.4947, + "step": 35074 + }, + { + "epoch": 0.9630697419000549, + "grad_norm": 0.3929502069950104, + "learning_rate": 1.0602557977251266e-05, + "loss": 0.4886, + "step": 35075 + }, + { + "epoch": 0.9630971993410214, + "grad_norm": 0.6152403354644775, + "learning_rate": 1.0602126868044776e-05, + "loss": 0.5205, + "step": 35076 + }, + { + "epoch": 0.9631246567819879, + "grad_norm": 0.3721361458301544, + "learning_rate": 1.0601695757715122e-05, + "loss": 0.436, + "step": 35077 + }, + { + "epoch": 0.9631521142229544, + "grad_norm": 0.3764816224575043, + "learning_rate": 1.0601264646263116e-05, + "loss": 0.5671, + "step": 35078 + }, + { + "epoch": 0.9631795716639209, + "grad_norm": 0.4254549443721771, + "learning_rate": 1.060083353368956e-05, + "loss": 0.5363, + "step": 35079 + }, + { + "epoch": 0.9632070291048874, + "grad_norm": 0.5082731246948242, + "learning_rate": 1.0600402419995254e-05, + "loss": 0.4796, + "step": 35080 + }, + { + "epoch": 0.963234486545854, + "grad_norm": 0.3443826735019684, + "learning_rate": 1.0599971305181012e-05, + "loss": 0.3866, + "step": 35081 + }, + { + "epoch": 0.9632619439868204, + "grad_norm": 0.43061211705207825, + "learning_rate": 1.0599540189247628e-05, + "loss": 0.4774, + "step": 35082 + }, + { + "epoch": 0.963289401427787, + "grad_norm": 0.7771103978157043, + "learning_rate": 1.0599109072195911e-05, + "loss": 0.5787, + "step": 35083 + }, + { + "epoch": 0.9633168588687534, + "grad_norm": 0.428404837846756, + "learning_rate": 1.0598677954026664e-05, + "loss": 0.5589, + "step": 35084 + }, + { + "epoch": 0.96334431630972, + "grad_norm": 0.3395729064941406, + "learning_rate": 1.059824683474069e-05, + "loss": 0.3543, + "step": 35085 + }, + { + "epoch": 0.9633717737506864, + "grad_norm": 0.4284113645553589, + "learning_rate": 1.05978157143388e-05, + "loss": 0.5227, + "step": 35086 + }, + { + "epoch": 0.9633992311916529, + "grad_norm": 0.4060446619987488, + "learning_rate": 1.059738459282179e-05, + "loss": 0.5498, + "step": 35087 + }, + { + "epoch": 0.9634266886326195, + "grad_norm": 0.3957080543041229, + "learning_rate": 1.0596953470190466e-05, + "loss": 0.535, + "step": 35088 + }, + { + "epoch": 0.9634541460735859, + "grad_norm": 0.3488420844078064, + "learning_rate": 1.0596522346445633e-05, + "loss": 0.4056, + "step": 35089 + }, + { + "epoch": 0.9634816035145525, + "grad_norm": 0.3663260042667389, + "learning_rate": 1.0596091221588094e-05, + "loss": 0.411, + "step": 35090 + }, + { + "epoch": 0.9635090609555189, + "grad_norm": 0.3592241108417511, + "learning_rate": 1.0595660095618659e-05, + "loss": 0.4567, + "step": 35091 + }, + { + "epoch": 0.9635365183964855, + "grad_norm": 0.4094405472278595, + "learning_rate": 1.0595228968538124e-05, + "loss": 0.5345, + "step": 35092 + }, + { + "epoch": 0.9635639758374519, + "grad_norm": 0.39761242270469666, + "learning_rate": 1.05947978403473e-05, + "loss": 0.5087, + "step": 35093 + }, + { + "epoch": 0.9635914332784185, + "grad_norm": 0.41071638464927673, + "learning_rate": 1.0594366711046988e-05, + "loss": 0.5127, + "step": 35094 + }, + { + "epoch": 0.963618890719385, + "grad_norm": 0.40036240220069885, + "learning_rate": 1.0593935580637988e-05, + "loss": 0.4668, + "step": 35095 + }, + { + "epoch": 0.9636463481603514, + "grad_norm": 0.3595679998397827, + "learning_rate": 1.0593504449121115e-05, + "loss": 0.4405, + "step": 35096 + }, + { + "epoch": 0.963673805601318, + "grad_norm": 0.3773963153362274, + "learning_rate": 1.0593073316497162e-05, + "loss": 0.4912, + "step": 35097 + }, + { + "epoch": 0.9637012630422844, + "grad_norm": 0.43004322052001953, + "learning_rate": 1.0592642182766939e-05, + "loss": 0.5251, + "step": 35098 + }, + { + "epoch": 0.963728720483251, + "grad_norm": 0.40358099341392517, + "learning_rate": 1.0592211047931252e-05, + "loss": 0.5691, + "step": 35099 + }, + { + "epoch": 0.9637561779242174, + "grad_norm": 0.39737117290496826, + "learning_rate": 1.0591779911990898e-05, + "loss": 0.508, + "step": 35100 + }, + { + "epoch": 0.963783635365184, + "grad_norm": 0.44219550490379333, + "learning_rate": 1.0591348774946689e-05, + "loss": 0.5063, + "step": 35101 + }, + { + "epoch": 0.9638110928061505, + "grad_norm": 0.45346418023109436, + "learning_rate": 1.0590917636799423e-05, + "loss": 0.5213, + "step": 35102 + }, + { + "epoch": 0.963838550247117, + "grad_norm": 0.41193434596061707, + "learning_rate": 1.059048649754991e-05, + "loss": 0.5503, + "step": 35103 + }, + { + "epoch": 0.9638660076880835, + "grad_norm": 0.40078970789909363, + "learning_rate": 1.059005535719895e-05, + "loss": 0.4897, + "step": 35104 + }, + { + "epoch": 0.9638934651290499, + "grad_norm": 0.4443824589252472, + "learning_rate": 1.0589624215747347e-05, + "loss": 0.5466, + "step": 35105 + }, + { + "epoch": 0.9639209225700165, + "grad_norm": 0.39751601219177246, + "learning_rate": 1.058919307319591e-05, + "loss": 0.4454, + "step": 35106 + }, + { + "epoch": 0.9639483800109829, + "grad_norm": 0.41033726930618286, + "learning_rate": 1.0588761929545435e-05, + "loss": 0.5007, + "step": 35107 + }, + { + "epoch": 0.9639758374519495, + "grad_norm": 0.8612298965454102, + "learning_rate": 1.0588330784796734e-05, + "loss": 0.5292, + "step": 35108 + }, + { + "epoch": 0.964003294892916, + "grad_norm": 0.36748945713043213, + "learning_rate": 1.0587899638950608e-05, + "loss": 0.4952, + "step": 35109 + }, + { + "epoch": 0.9640307523338825, + "grad_norm": 0.35057544708251953, + "learning_rate": 1.0587468492007862e-05, + "loss": 0.4472, + "step": 35110 + }, + { + "epoch": 0.964058209774849, + "grad_norm": 0.4168172776699066, + "learning_rate": 1.05870373439693e-05, + "loss": 0.5443, + "step": 35111 + }, + { + "epoch": 0.9640856672158155, + "grad_norm": 0.5152775049209595, + "learning_rate": 1.0586606194835724e-05, + "loss": 0.5767, + "step": 35112 + }, + { + "epoch": 0.964113124656782, + "grad_norm": 0.35513848066329956, + "learning_rate": 1.0586175044607942e-05, + "loss": 0.5042, + "step": 35113 + }, + { + "epoch": 0.9641405820977484, + "grad_norm": 0.5160991549491882, + "learning_rate": 1.0585743893286755e-05, + "loss": 0.5442, + "step": 35114 + }, + { + "epoch": 0.964168039538715, + "grad_norm": 0.4300702214241028, + "learning_rate": 1.0585312740872973e-05, + "loss": 0.4969, + "step": 35115 + }, + { + "epoch": 0.9641954969796815, + "grad_norm": 0.41512665152549744, + "learning_rate": 1.0584881587367393e-05, + "loss": 0.5251, + "step": 35116 + }, + { + "epoch": 0.964222954420648, + "grad_norm": 0.4042361080646515, + "learning_rate": 1.058445043277082e-05, + "loss": 0.5582, + "step": 35117 + }, + { + "epoch": 0.9642504118616145, + "grad_norm": 0.5207343101501465, + "learning_rate": 1.0584019277084063e-05, + "loss": 0.443, + "step": 35118 + }, + { + "epoch": 0.964277869302581, + "grad_norm": 0.40139713883399963, + "learning_rate": 1.0583588120307922e-05, + "loss": 0.4428, + "step": 35119 + }, + { + "epoch": 0.9643053267435475, + "grad_norm": 0.38012877106666565, + "learning_rate": 1.0583156962443204e-05, + "loss": 0.4526, + "step": 35120 + }, + { + "epoch": 0.964332784184514, + "grad_norm": 0.3876039385795593, + "learning_rate": 1.0582725803490715e-05, + "loss": 0.5092, + "step": 35121 + }, + { + "epoch": 0.9643602416254805, + "grad_norm": 0.36622312664985657, + "learning_rate": 1.058229464345125e-05, + "loss": 0.4824, + "step": 35122 + }, + { + "epoch": 0.9643876990664471, + "grad_norm": 0.39014801383018494, + "learning_rate": 1.0581863482325625e-05, + "loss": 0.4996, + "step": 35123 + }, + { + "epoch": 0.9644151565074135, + "grad_norm": 0.3894929885864258, + "learning_rate": 1.0581432320114638e-05, + "loss": 0.4653, + "step": 35124 + }, + { + "epoch": 0.96444261394838, + "grad_norm": 0.3953203856945038, + "learning_rate": 1.058100115681909e-05, + "loss": 0.5198, + "step": 35125 + }, + { + "epoch": 0.9644700713893465, + "grad_norm": 0.4546075463294983, + "learning_rate": 1.0580569992439795e-05, + "loss": 0.5523, + "step": 35126 + }, + { + "epoch": 0.964497528830313, + "grad_norm": 0.3914923071861267, + "learning_rate": 1.0580138826977546e-05, + "loss": 0.47, + "step": 35127 + }, + { + "epoch": 0.9645249862712795, + "grad_norm": 0.3735159635543823, + "learning_rate": 1.0579707660433158e-05, + "loss": 0.4768, + "step": 35128 + }, + { + "epoch": 0.964552443712246, + "grad_norm": 0.4204539656639099, + "learning_rate": 1.057927649280743e-05, + "loss": 0.4671, + "step": 35129 + }, + { + "epoch": 0.9645799011532126, + "grad_norm": 0.40397655963897705, + "learning_rate": 1.0578845324101163e-05, + "loss": 0.493, + "step": 35130 + }, + { + "epoch": 0.964607358594179, + "grad_norm": 0.4602471590042114, + "learning_rate": 1.0578414154315166e-05, + "loss": 0.5096, + "step": 35131 + }, + { + "epoch": 0.9646348160351456, + "grad_norm": 0.4990037977695465, + "learning_rate": 1.0577982983450241e-05, + "loss": 0.4683, + "step": 35132 + }, + { + "epoch": 0.964662273476112, + "grad_norm": 0.4145471751689911, + "learning_rate": 1.0577551811507196e-05, + "loss": 0.4563, + "step": 35133 + }, + { + "epoch": 0.9646897309170785, + "grad_norm": 0.4233947992324829, + "learning_rate": 1.0577120638486833e-05, + "loss": 0.4518, + "step": 35134 + }, + { + "epoch": 0.964717188358045, + "grad_norm": 0.39893364906311035, + "learning_rate": 1.0576689464389953e-05, + "loss": 0.4232, + "step": 35135 + }, + { + "epoch": 0.9647446457990115, + "grad_norm": 0.3843309283256531, + "learning_rate": 1.0576258289217364e-05, + "loss": 0.4624, + "step": 35136 + }, + { + "epoch": 0.9647721032399781, + "grad_norm": 0.4080811142921448, + "learning_rate": 1.0575827112969872e-05, + "loss": 0.5289, + "step": 35137 + }, + { + "epoch": 0.9647995606809445, + "grad_norm": 0.7142015695571899, + "learning_rate": 1.0575395935648274e-05, + "loss": 0.4281, + "step": 35138 + }, + { + "epoch": 0.9648270181219111, + "grad_norm": 0.33257949352264404, + "learning_rate": 1.057496475725338e-05, + "loss": 0.3195, + "step": 35139 + }, + { + "epoch": 0.9648544755628775, + "grad_norm": 0.44068193435668945, + "learning_rate": 1.0574533577785995e-05, + "loss": 0.5046, + "step": 35140 + }, + { + "epoch": 0.9648819330038441, + "grad_norm": 0.43139567971229553, + "learning_rate": 1.0574102397246922e-05, + "loss": 0.517, + "step": 35141 + }, + { + "epoch": 0.9649093904448105, + "grad_norm": 0.4764642119407654, + "learning_rate": 1.0573671215636964e-05, + "loss": 0.4783, + "step": 35142 + }, + { + "epoch": 0.964936847885777, + "grad_norm": 0.4366532862186432, + "learning_rate": 1.0573240032956924e-05, + "loss": 0.5374, + "step": 35143 + }, + { + "epoch": 0.9649643053267436, + "grad_norm": 0.4292571544647217, + "learning_rate": 1.0572808849207611e-05, + "loss": 0.5128, + "step": 35144 + }, + { + "epoch": 0.96499176276771, + "grad_norm": 0.40761685371398926, + "learning_rate": 1.0572377664389826e-05, + "loss": 0.4853, + "step": 35145 + }, + { + "epoch": 0.9650192202086766, + "grad_norm": 0.3948383033275604, + "learning_rate": 1.0571946478504374e-05, + "loss": 0.5176, + "step": 35146 + }, + { + "epoch": 0.965046677649643, + "grad_norm": 0.452436625957489, + "learning_rate": 1.0571515291552059e-05, + "loss": 0.5091, + "step": 35147 + }, + { + "epoch": 0.9650741350906096, + "grad_norm": 0.398105263710022, + "learning_rate": 1.0571084103533685e-05, + "loss": 0.4739, + "step": 35148 + }, + { + "epoch": 0.965101592531576, + "grad_norm": 0.3942793905735016, + "learning_rate": 1.0570652914450059e-05, + "loss": 0.5113, + "step": 35149 + }, + { + "epoch": 0.9651290499725426, + "grad_norm": 0.43880605697631836, + "learning_rate": 1.057022172430198e-05, + "loss": 0.502, + "step": 35150 + }, + { + "epoch": 0.9651565074135091, + "grad_norm": 0.3824721872806549, + "learning_rate": 1.0569790533090258e-05, + "loss": 0.4859, + "step": 35151 + }, + { + "epoch": 0.9651839648544756, + "grad_norm": 0.40516743063926697, + "learning_rate": 1.0569359340815694e-05, + "loss": 0.4933, + "step": 35152 + }, + { + "epoch": 0.9652114222954421, + "grad_norm": 0.3819863200187683, + "learning_rate": 1.0568928147479091e-05, + "loss": 0.5001, + "step": 35153 + }, + { + "epoch": 0.9652388797364085, + "grad_norm": 0.39252305030822754, + "learning_rate": 1.056849695308126e-05, + "loss": 0.4421, + "step": 35154 + }, + { + "epoch": 0.9652663371773751, + "grad_norm": 0.35672587156295776, + "learning_rate": 1.0568065757622998e-05, + "loss": 0.4695, + "step": 35155 + }, + { + "epoch": 0.9652937946183415, + "grad_norm": 0.4153802990913391, + "learning_rate": 1.056763456110511e-05, + "loss": 0.4696, + "step": 35156 + }, + { + "epoch": 0.9653212520593081, + "grad_norm": 0.4669193625450134, + "learning_rate": 1.0567203363528407e-05, + "loss": 0.5195, + "step": 35157 + }, + { + "epoch": 0.9653487095002746, + "grad_norm": 0.4475369155406952, + "learning_rate": 1.0566772164893686e-05, + "loss": 0.4907, + "step": 35158 + }, + { + "epoch": 0.9653761669412411, + "grad_norm": 0.42069876194000244, + "learning_rate": 1.0566340965201753e-05, + "loss": 0.4239, + "step": 35159 + }, + { + "epoch": 0.9654036243822076, + "grad_norm": 0.41112130880355835, + "learning_rate": 1.0565909764453418e-05, + "loss": 0.4425, + "step": 35160 + }, + { + "epoch": 0.965431081823174, + "grad_norm": 0.3801395893096924, + "learning_rate": 1.0565478562649476e-05, + "loss": 0.4993, + "step": 35161 + }, + { + "epoch": 0.9654585392641406, + "grad_norm": 0.392825186252594, + "learning_rate": 1.0565047359790736e-05, + "loss": 0.4294, + "step": 35162 + }, + { + "epoch": 0.965485996705107, + "grad_norm": 0.38481423258781433, + "learning_rate": 1.0564616155878005e-05, + "loss": 0.4969, + "step": 35163 + }, + { + "epoch": 0.9655134541460736, + "grad_norm": 0.33402401208877563, + "learning_rate": 1.0564184950912084e-05, + "loss": 0.4137, + "step": 35164 + }, + { + "epoch": 0.9655409115870401, + "grad_norm": 0.37079188227653503, + "learning_rate": 1.0563753744893776e-05, + "loss": 0.4682, + "step": 35165 + }, + { + "epoch": 0.9655683690280066, + "grad_norm": 0.4097210764884949, + "learning_rate": 1.0563322537823888e-05, + "loss": 0.5084, + "step": 35166 + }, + { + "epoch": 0.9655958264689731, + "grad_norm": 0.36722564697265625, + "learning_rate": 1.0562891329703227e-05, + "loss": 0.496, + "step": 35167 + }, + { + "epoch": 0.9656232839099396, + "grad_norm": 0.4203089773654938, + "learning_rate": 1.0562460120532592e-05, + "loss": 0.4919, + "step": 35168 + }, + { + "epoch": 0.9656507413509061, + "grad_norm": 0.4010378420352936, + "learning_rate": 1.0562028910312786e-05, + "loss": 0.5215, + "step": 35169 + }, + { + "epoch": 0.9656781987918726, + "grad_norm": 0.3728638291358948, + "learning_rate": 1.0561597699044622e-05, + "loss": 0.5531, + "step": 35170 + }, + { + "epoch": 0.9657056562328391, + "grad_norm": 0.3910488784313202, + "learning_rate": 1.0561166486728896e-05, + "loss": 0.5017, + "step": 35171 + }, + { + "epoch": 0.9657331136738057, + "grad_norm": 0.4086301624774933, + "learning_rate": 1.0560735273366415e-05, + "loss": 0.4937, + "step": 35172 + }, + { + "epoch": 0.9657605711147721, + "grad_norm": 0.3600023686885834, + "learning_rate": 1.0560304058957986e-05, + "loss": 0.386, + "step": 35173 + }, + { + "epoch": 0.9657880285557386, + "grad_norm": 0.4839244484901428, + "learning_rate": 1.0559872843504408e-05, + "loss": 0.4936, + "step": 35174 + }, + { + "epoch": 0.9658154859967051, + "grad_norm": 0.42023321986198425, + "learning_rate": 1.0559441627006493e-05, + "loss": 0.5811, + "step": 35175 + }, + { + "epoch": 0.9658429434376716, + "grad_norm": 0.45257657766342163, + "learning_rate": 1.0559010409465036e-05, + "loss": 0.4665, + "step": 35176 + }, + { + "epoch": 0.9658704008786381, + "grad_norm": 0.36065351963043213, + "learning_rate": 1.0558579190880848e-05, + "loss": 0.4051, + "step": 35177 + }, + { + "epoch": 0.9658978583196046, + "grad_norm": 0.4357685446739197, + "learning_rate": 1.0558147971254734e-05, + "loss": 0.4756, + "step": 35178 + }, + { + "epoch": 0.9659253157605712, + "grad_norm": 0.48981696367263794, + "learning_rate": 1.0557716750587492e-05, + "loss": 0.4548, + "step": 35179 + }, + { + "epoch": 0.9659527732015376, + "grad_norm": 0.4535842537879944, + "learning_rate": 1.0557285528879932e-05, + "loss": 0.4248, + "step": 35180 + }, + { + "epoch": 0.9659802306425042, + "grad_norm": 0.3603714108467102, + "learning_rate": 1.0556854306132855e-05, + "loss": 0.4412, + "step": 35181 + }, + { + "epoch": 0.9660076880834706, + "grad_norm": 0.3603188395500183, + "learning_rate": 1.055642308234707e-05, + "loss": 0.4841, + "step": 35182 + }, + { + "epoch": 0.9660351455244371, + "grad_norm": 0.40588152408599854, + "learning_rate": 1.0555991857523378e-05, + "loss": 0.4654, + "step": 35183 + }, + { + "epoch": 0.9660626029654036, + "grad_norm": 0.47059527039527893, + "learning_rate": 1.055556063166258e-05, + "loss": 0.5162, + "step": 35184 + }, + { + "epoch": 0.9660900604063701, + "grad_norm": 0.4862455129623413, + "learning_rate": 1.0555129404765488e-05, + "loss": 0.5763, + "step": 35185 + }, + { + "epoch": 0.9661175178473367, + "grad_norm": 0.35452792048454285, + "learning_rate": 1.0554698176832902e-05, + "loss": 0.47, + "step": 35186 + }, + { + "epoch": 0.9661449752883031, + "grad_norm": 0.4169101417064667, + "learning_rate": 1.0554266947865625e-05, + "loss": 0.5494, + "step": 35187 + }, + { + "epoch": 0.9661724327292697, + "grad_norm": 0.4525775611400604, + "learning_rate": 1.0553835717864464e-05, + "loss": 0.495, + "step": 35188 + }, + { + "epoch": 0.9661998901702361, + "grad_norm": 0.3601551651954651, + "learning_rate": 1.0553404486830224e-05, + "loss": 0.4646, + "step": 35189 + }, + { + "epoch": 0.9662273476112027, + "grad_norm": 0.38217419385910034, + "learning_rate": 1.0552973254763707e-05, + "loss": 0.5612, + "step": 35190 + }, + { + "epoch": 0.9662548050521691, + "grad_norm": 0.40226560831069946, + "learning_rate": 1.055254202166572e-05, + "loss": 0.468, + "step": 35191 + }, + { + "epoch": 0.9662822624931356, + "grad_norm": 0.4273045063018799, + "learning_rate": 1.0552110787537063e-05, + "loss": 0.4992, + "step": 35192 + }, + { + "epoch": 0.9663097199341022, + "grad_norm": 0.4824993908405304, + "learning_rate": 1.0551679552378544e-05, + "loss": 0.4656, + "step": 35193 + }, + { + "epoch": 0.9663371773750686, + "grad_norm": 0.35287633538246155, + "learning_rate": 1.0551248316190965e-05, + "loss": 0.4669, + "step": 35194 + }, + { + "epoch": 0.9663646348160352, + "grad_norm": 0.4394061863422394, + "learning_rate": 1.0550817078975135e-05, + "loss": 0.447, + "step": 35195 + }, + { + "epoch": 0.9663920922570016, + "grad_norm": 0.4234198033809662, + "learning_rate": 1.0550385840731856e-05, + "loss": 0.5361, + "step": 35196 + }, + { + "epoch": 0.9664195496979682, + "grad_norm": 0.3381689488887787, + "learning_rate": 1.0549954601461927e-05, + "loss": 0.5038, + "step": 35197 + }, + { + "epoch": 0.9664470071389346, + "grad_norm": 0.38648736476898193, + "learning_rate": 1.054952336116616e-05, + "loss": 0.5509, + "step": 35198 + }, + { + "epoch": 0.9664744645799012, + "grad_norm": 0.3836428225040436, + "learning_rate": 1.0549092119845359e-05, + "loss": 0.4694, + "step": 35199 + }, + { + "epoch": 0.9665019220208677, + "grad_norm": 0.3683060109615326, + "learning_rate": 1.0548660877500323e-05, + "loss": 0.5108, + "step": 35200 + }, + { + "epoch": 0.9665293794618341, + "grad_norm": 0.40123680233955383, + "learning_rate": 1.0548229634131859e-05, + "loss": 0.5055, + "step": 35201 + }, + { + "epoch": 0.9665568369028007, + "grad_norm": 0.42846211791038513, + "learning_rate": 1.0547798389740773e-05, + "loss": 0.5492, + "step": 35202 + }, + { + "epoch": 0.9665842943437671, + "grad_norm": 0.4050486981868744, + "learning_rate": 1.0547367144327866e-05, + "loss": 0.4686, + "step": 35203 + }, + { + "epoch": 0.9666117517847337, + "grad_norm": 0.547946035861969, + "learning_rate": 1.0546935897893948e-05, + "loss": 0.597, + "step": 35204 + }, + { + "epoch": 0.9666392092257001, + "grad_norm": 0.39013683795928955, + "learning_rate": 1.0546504650439818e-05, + "loss": 0.5224, + "step": 35205 + }, + { + "epoch": 0.9666666666666667, + "grad_norm": 0.3873266279697418, + "learning_rate": 1.0546073401966283e-05, + "loss": 0.4872, + "step": 35206 + }, + { + "epoch": 0.9666941241076332, + "grad_norm": 0.8980872631072998, + "learning_rate": 1.0545642152474148e-05, + "loss": 0.5151, + "step": 35207 + }, + { + "epoch": 0.9667215815485997, + "grad_norm": 0.48618969321250916, + "learning_rate": 1.0545210901964214e-05, + "loss": 0.4614, + "step": 35208 + }, + { + "epoch": 0.9667490389895662, + "grad_norm": 0.37211450934410095, + "learning_rate": 1.0544779650437288e-05, + "loss": 0.4552, + "step": 35209 + }, + { + "epoch": 0.9667764964305327, + "grad_norm": 0.42884454131126404, + "learning_rate": 1.0544348397894176e-05, + "loss": 0.5103, + "step": 35210 + }, + { + "epoch": 0.9668039538714992, + "grad_norm": 0.42203205823898315, + "learning_rate": 1.0543917144335679e-05, + "loss": 0.4653, + "step": 35211 + }, + { + "epoch": 0.9668314113124656, + "grad_norm": 0.38505497574806213, + "learning_rate": 1.05434858897626e-05, + "loss": 0.481, + "step": 35212 + }, + { + "epoch": 0.9668588687534322, + "grad_norm": 0.36655324697494507, + "learning_rate": 1.0543054634175751e-05, + "loss": 0.497, + "step": 35213 + }, + { + "epoch": 0.9668863261943987, + "grad_norm": 0.39480558037757874, + "learning_rate": 1.0542623377575932e-05, + "loss": 0.4578, + "step": 35214 + }, + { + "epoch": 0.9669137836353652, + "grad_norm": 0.41965165734291077, + "learning_rate": 1.0542192119963943e-05, + "loss": 0.5423, + "step": 35215 + }, + { + "epoch": 0.9669412410763317, + "grad_norm": 0.382213294506073, + "learning_rate": 1.0541760861340596e-05, + "loss": 0.5494, + "step": 35216 + }, + { + "epoch": 0.9669686985172982, + "grad_norm": 0.4118514657020569, + "learning_rate": 1.0541329601706691e-05, + "loss": 0.4941, + "step": 35217 + }, + { + "epoch": 0.9669961559582647, + "grad_norm": 0.39539626240730286, + "learning_rate": 1.0540898341063033e-05, + "loss": 0.4422, + "step": 35218 + }, + { + "epoch": 0.9670236133992312, + "grad_norm": 0.35536989569664, + "learning_rate": 1.0540467079410428e-05, + "loss": 0.4879, + "step": 35219 + }, + { + "epoch": 0.9670510708401977, + "grad_norm": 0.4473486542701721, + "learning_rate": 1.0540035816749678e-05, + "loss": 0.5038, + "step": 35220 + }, + { + "epoch": 0.9670785282811641, + "grad_norm": 0.4342859983444214, + "learning_rate": 1.053960455308159e-05, + "loss": 0.5208, + "step": 35221 + }, + { + "epoch": 0.9671059857221307, + "grad_norm": 0.39077940583229065, + "learning_rate": 1.053917328840697e-05, + "loss": 0.4907, + "step": 35222 + }, + { + "epoch": 0.9671334431630972, + "grad_norm": 0.3444693684577942, + "learning_rate": 1.0538742022726613e-05, + "loss": 0.4775, + "step": 35223 + }, + { + "epoch": 0.9671609006040637, + "grad_norm": 0.3865779638290405, + "learning_rate": 1.0538310756041337e-05, + "loss": 0.4334, + "step": 35224 + }, + { + "epoch": 0.9671883580450302, + "grad_norm": 0.5065836906433105, + "learning_rate": 1.0537879488351933e-05, + "loss": 0.4743, + "step": 35225 + }, + { + "epoch": 0.9672158154859967, + "grad_norm": 0.3807596266269684, + "learning_rate": 1.053744821965922e-05, + "loss": 0.4907, + "step": 35226 + }, + { + "epoch": 0.9672432729269632, + "grad_norm": 0.3936893343925476, + "learning_rate": 1.053701694996399e-05, + "loss": 0.4417, + "step": 35227 + }, + { + "epoch": 0.9672707303679297, + "grad_norm": 0.36001309752464294, + "learning_rate": 1.0536585679267051e-05, + "loss": 0.3984, + "step": 35228 + }, + { + "epoch": 0.9672981878088962, + "grad_norm": 0.3549315929412842, + "learning_rate": 1.053615440756921e-05, + "loss": 0.4323, + "step": 35229 + }, + { + "epoch": 0.9673256452498628, + "grad_norm": 0.434817910194397, + "learning_rate": 1.0535723134871267e-05, + "loss": 0.5069, + "step": 35230 + }, + { + "epoch": 0.9673531026908292, + "grad_norm": 0.47934338450431824, + "learning_rate": 1.0535291861174033e-05, + "loss": 0.4308, + "step": 35231 + }, + { + "epoch": 0.9673805601317957, + "grad_norm": 0.344530314207077, + "learning_rate": 1.053486058647831e-05, + "loss": 0.3591, + "step": 35232 + }, + { + "epoch": 0.9674080175727622, + "grad_norm": 0.4028472900390625, + "learning_rate": 1.0534429310784897e-05, + "loss": 0.5576, + "step": 35233 + }, + { + "epoch": 0.9674354750137287, + "grad_norm": 0.38141193985939026, + "learning_rate": 1.0533998034094607e-05, + "loss": 0.5124, + "step": 35234 + }, + { + "epoch": 0.9674629324546952, + "grad_norm": 0.42573490738868713, + "learning_rate": 1.0533566756408237e-05, + "loss": 0.4404, + "step": 35235 + }, + { + "epoch": 0.9674903898956617, + "grad_norm": 0.4179020822048187, + "learning_rate": 1.0533135477726594e-05, + "loss": 0.4503, + "step": 35236 + }, + { + "epoch": 0.9675178473366283, + "grad_norm": 0.35687190294265747, + "learning_rate": 1.0532704198050487e-05, + "loss": 0.4617, + "step": 35237 + }, + { + "epoch": 0.9675453047775947, + "grad_norm": 0.3918001651763916, + "learning_rate": 1.0532272917380713e-05, + "loss": 0.5092, + "step": 35238 + }, + { + "epoch": 0.9675727622185613, + "grad_norm": 0.4563337564468384, + "learning_rate": 1.0531841635718083e-05, + "loss": 0.453, + "step": 35239 + }, + { + "epoch": 0.9676002196595277, + "grad_norm": 0.36510682106018066, + "learning_rate": 1.0531410353063398e-05, + "loss": 0.5322, + "step": 35240 + }, + { + "epoch": 0.9676276771004942, + "grad_norm": 0.3872332274913788, + "learning_rate": 1.0530979069417463e-05, + "loss": 0.5008, + "step": 35241 + }, + { + "epoch": 0.9676551345414607, + "grad_norm": 0.45500314235687256, + "learning_rate": 1.0530547784781083e-05, + "loss": 0.5574, + "step": 35242 + }, + { + "epoch": 0.9676825919824272, + "grad_norm": 0.37396931648254395, + "learning_rate": 1.0530116499155059e-05, + "loss": 0.4345, + "step": 35243 + }, + { + "epoch": 0.9677100494233938, + "grad_norm": 0.327628493309021, + "learning_rate": 1.0529685212540202e-05, + "loss": 0.4362, + "step": 35244 + }, + { + "epoch": 0.9677375068643602, + "grad_norm": 0.42485496401786804, + "learning_rate": 1.0529253924937311e-05, + "loss": 0.4356, + "step": 35245 + }, + { + "epoch": 0.9677649643053268, + "grad_norm": 0.3608970046043396, + "learning_rate": 1.0528822636347191e-05, + "loss": 0.4904, + "step": 35246 + }, + { + "epoch": 0.9677924217462932, + "grad_norm": 0.44611889123916626, + "learning_rate": 1.0528391346770653e-05, + "loss": 0.6327, + "step": 35247 + }, + { + "epoch": 0.9678198791872598, + "grad_norm": 0.45783281326293945, + "learning_rate": 1.0527960056208494e-05, + "loss": 0.5318, + "step": 35248 + }, + { + "epoch": 0.9678473366282262, + "grad_norm": 0.34609296917915344, + "learning_rate": 1.052752876466152e-05, + "loss": 0.4791, + "step": 35249 + }, + { + "epoch": 0.9678747940691927, + "grad_norm": 0.4316384196281433, + "learning_rate": 1.0527097472130537e-05, + "loss": 0.4989, + "step": 35250 + }, + { + "epoch": 0.9679022515101593, + "grad_norm": 0.4224710166454315, + "learning_rate": 1.0526666178616347e-05, + "loss": 0.4596, + "step": 35251 + }, + { + "epoch": 0.9679297089511257, + "grad_norm": 0.41421663761138916, + "learning_rate": 1.0526234884119759e-05, + "loss": 0.423, + "step": 35252 + }, + { + "epoch": 0.9679571663920923, + "grad_norm": 0.5299590826034546, + "learning_rate": 1.0525803588641576e-05, + "loss": 0.4575, + "step": 35253 + }, + { + "epoch": 0.9679846238330587, + "grad_norm": 0.3687589764595032, + "learning_rate": 1.0525372292182598e-05, + "loss": 0.4962, + "step": 35254 + }, + { + "epoch": 0.9680120812740253, + "grad_norm": 0.39064204692840576, + "learning_rate": 1.0524940994743637e-05, + "loss": 0.4771, + "step": 35255 + }, + { + "epoch": 0.9680395387149917, + "grad_norm": 0.3631851375102997, + "learning_rate": 1.0524509696325491e-05, + "loss": 0.4643, + "step": 35256 + }, + { + "epoch": 0.9680669961559583, + "grad_norm": 0.5421507358551025, + "learning_rate": 1.0524078396928968e-05, + "loss": 0.4116, + "step": 35257 + }, + { + "epoch": 0.9680944535969248, + "grad_norm": 0.46866410970687866, + "learning_rate": 1.0523647096554872e-05, + "loss": 0.4194, + "step": 35258 + }, + { + "epoch": 0.9681219110378912, + "grad_norm": 0.5175915956497192, + "learning_rate": 1.0523215795204005e-05, + "loss": 0.4652, + "step": 35259 + }, + { + "epoch": 0.9681493684788578, + "grad_norm": 0.356082558631897, + "learning_rate": 1.0522784492877176e-05, + "loss": 0.5102, + "step": 35260 + }, + { + "epoch": 0.9681768259198242, + "grad_norm": 0.44077247381210327, + "learning_rate": 1.0522353189575183e-05, + "loss": 0.4829, + "step": 35261 + }, + { + "epoch": 0.9682042833607908, + "grad_norm": 0.4087403118610382, + "learning_rate": 1.0521921885298839e-05, + "loss": 0.5354, + "step": 35262 + }, + { + "epoch": 0.9682317408017572, + "grad_norm": 0.4179949462413788, + "learning_rate": 1.0521490580048943e-05, + "loss": 0.4846, + "step": 35263 + }, + { + "epoch": 0.9682591982427238, + "grad_norm": 0.36364561319351196, + "learning_rate": 1.05210592738263e-05, + "loss": 0.4447, + "step": 35264 + }, + { + "epoch": 0.9682866556836903, + "grad_norm": 0.4151965081691742, + "learning_rate": 1.0520627966631715e-05, + "loss": 0.4686, + "step": 35265 + }, + { + "epoch": 0.9683141131246568, + "grad_norm": 0.37374067306518555, + "learning_rate": 1.052019665846599e-05, + "loss": 0.4936, + "step": 35266 + }, + { + "epoch": 0.9683415705656233, + "grad_norm": 0.49310266971588135, + "learning_rate": 1.0519765349329938e-05, + "loss": 0.5591, + "step": 35267 + }, + { + "epoch": 0.9683690280065897, + "grad_norm": 0.37933409214019775, + "learning_rate": 1.0519334039224357e-05, + "loss": 0.4465, + "step": 35268 + }, + { + "epoch": 0.9683964854475563, + "grad_norm": 0.3648313581943512, + "learning_rate": 1.0518902728150046e-05, + "loss": 0.439, + "step": 35269 + }, + { + "epoch": 0.9684239428885227, + "grad_norm": 0.38375306129455566, + "learning_rate": 1.0518471416107822e-05, + "loss": 0.6307, + "step": 35270 + }, + { + "epoch": 0.9684514003294893, + "grad_norm": 0.3378183841705322, + "learning_rate": 1.051804010309848e-05, + "loss": 0.4507, + "step": 35271 + }, + { + "epoch": 0.9684788577704558, + "grad_norm": 0.4398214817047119, + "learning_rate": 1.0517608789122829e-05, + "loss": 0.4443, + "step": 35272 + }, + { + "epoch": 0.9685063152114223, + "grad_norm": 0.3931901156902313, + "learning_rate": 1.0517177474181672e-05, + "loss": 0.4875, + "step": 35273 + }, + { + "epoch": 0.9685337726523888, + "grad_norm": 0.37506967782974243, + "learning_rate": 1.0516746158275814e-05, + "loss": 0.4567, + "step": 35274 + }, + { + "epoch": 0.9685612300933553, + "grad_norm": 0.38987135887145996, + "learning_rate": 1.0516314841406061e-05, + "loss": 0.5107, + "step": 35275 + }, + { + "epoch": 0.9685886875343218, + "grad_norm": 0.3943865895271301, + "learning_rate": 1.0515883523573213e-05, + "loss": 0.4887, + "step": 35276 + }, + { + "epoch": 0.9686161449752883, + "grad_norm": 0.39142268896102905, + "learning_rate": 1.0515452204778082e-05, + "loss": 0.4862, + "step": 35277 + }, + { + "epoch": 0.9686436024162548, + "grad_norm": 0.4039900302886963, + "learning_rate": 1.0515020885021465e-05, + "loss": 0.5247, + "step": 35278 + }, + { + "epoch": 0.9686710598572214, + "grad_norm": 0.3889152407646179, + "learning_rate": 1.051458956430417e-05, + "loss": 0.5058, + "step": 35279 + }, + { + "epoch": 0.9686985172981878, + "grad_norm": 0.5194513201713562, + "learning_rate": 1.0514158242627004e-05, + "loss": 0.5264, + "step": 35280 + }, + { + "epoch": 0.9687259747391543, + "grad_norm": 0.4398387372493744, + "learning_rate": 1.0513726919990763e-05, + "loss": 0.5597, + "step": 35281 + }, + { + "epoch": 0.9687534321801208, + "grad_norm": 0.3662051856517792, + "learning_rate": 1.0513295596396262e-05, + "loss": 0.4112, + "step": 35282 + }, + { + "epoch": 0.9687808896210873, + "grad_norm": 0.37442800402641296, + "learning_rate": 1.0512864271844298e-05, + "loss": 0.4814, + "step": 35283 + }, + { + "epoch": 0.9688083470620538, + "grad_norm": 0.40492838621139526, + "learning_rate": 1.0512432946335682e-05, + "loss": 0.4686, + "step": 35284 + }, + { + "epoch": 0.9688358045030203, + "grad_norm": 0.3571101427078247, + "learning_rate": 1.0512001619871213e-05, + "loss": 0.4343, + "step": 35285 + }, + { + "epoch": 0.9688632619439869, + "grad_norm": 0.41798287630081177, + "learning_rate": 1.0511570292451695e-05, + "loss": 0.6321, + "step": 35286 + }, + { + "epoch": 0.9688907193849533, + "grad_norm": 0.3878006637096405, + "learning_rate": 1.0511138964077941e-05, + "loss": 0.456, + "step": 35287 + }, + { + "epoch": 0.9689181768259199, + "grad_norm": 0.4293641746044159, + "learning_rate": 1.0510707634750746e-05, + "loss": 0.4226, + "step": 35288 + }, + { + "epoch": 0.9689456342668863, + "grad_norm": 0.38315150141716003, + "learning_rate": 1.0510276304470919e-05, + "loss": 0.5005, + "step": 35289 + }, + { + "epoch": 0.9689730917078528, + "grad_norm": 0.3696073889732361, + "learning_rate": 1.050984497323926e-05, + "loss": 0.4421, + "step": 35290 + }, + { + "epoch": 0.9690005491488193, + "grad_norm": 0.3762882947921753, + "learning_rate": 1.0509413641056583e-05, + "loss": 0.5326, + "step": 35291 + }, + { + "epoch": 0.9690280065897858, + "grad_norm": 0.3689131736755371, + "learning_rate": 1.0508982307923686e-05, + "loss": 0.4537, + "step": 35292 + }, + { + "epoch": 0.9690554640307524, + "grad_norm": 0.46575266122817993, + "learning_rate": 1.0508550973841372e-05, + "loss": 0.6473, + "step": 35293 + }, + { + "epoch": 0.9690829214717188, + "grad_norm": 0.40208199620246887, + "learning_rate": 1.050811963881045e-05, + "loss": 0.5051, + "step": 35294 + }, + { + "epoch": 0.9691103789126854, + "grad_norm": 0.35573604702949524, + "learning_rate": 1.050768830283172e-05, + "loss": 0.4128, + "step": 35295 + }, + { + "epoch": 0.9691378363536518, + "grad_norm": 0.5084167718887329, + "learning_rate": 1.0507256965905995e-05, + "loss": 0.4701, + "step": 35296 + }, + { + "epoch": 0.9691652937946184, + "grad_norm": 0.37014779448509216, + "learning_rate": 1.050682562803407e-05, + "loss": 0.5334, + "step": 35297 + }, + { + "epoch": 0.9691927512355848, + "grad_norm": 0.3922448754310608, + "learning_rate": 1.0506394289216752e-05, + "loss": 0.4568, + "step": 35298 + }, + { + "epoch": 0.9692202086765513, + "grad_norm": 0.40157178044319153, + "learning_rate": 1.0505962949454852e-05, + "loss": 0.4894, + "step": 35299 + }, + { + "epoch": 0.9692476661175179, + "grad_norm": 0.3696901798248291, + "learning_rate": 1.0505531608749162e-05, + "loss": 0.4434, + "step": 35300 + }, + { + "epoch": 0.9692751235584843, + "grad_norm": 0.3998410105705261, + "learning_rate": 1.05051002671005e-05, + "loss": 0.41, + "step": 35301 + }, + { + "epoch": 0.9693025809994509, + "grad_norm": 0.40089818835258484, + "learning_rate": 1.0504668924509664e-05, + "loss": 0.524, + "step": 35302 + }, + { + "epoch": 0.9693300384404173, + "grad_norm": 0.4077780246734619, + "learning_rate": 1.0504237580977459e-05, + "loss": 0.532, + "step": 35303 + }, + { + "epoch": 0.9693574958813839, + "grad_norm": 0.571808397769928, + "learning_rate": 1.050380623650469e-05, + "loss": 0.5187, + "step": 35304 + }, + { + "epoch": 0.9693849533223503, + "grad_norm": 0.39623314142227173, + "learning_rate": 1.050337489109216e-05, + "loss": 0.4226, + "step": 35305 + }, + { + "epoch": 0.9694124107633169, + "grad_norm": 0.41927003860473633, + "learning_rate": 1.0502943544740678e-05, + "loss": 0.5073, + "step": 35306 + }, + { + "epoch": 0.9694398682042834, + "grad_norm": 0.42365169525146484, + "learning_rate": 1.0502512197451045e-05, + "loss": 0.4597, + "step": 35307 + }, + { + "epoch": 0.9694673256452498, + "grad_norm": 0.39856570959091187, + "learning_rate": 1.0502080849224063e-05, + "loss": 0.483, + "step": 35308 + }, + { + "epoch": 0.9694947830862164, + "grad_norm": 0.4248426556587219, + "learning_rate": 1.0501649500060544e-05, + "loss": 0.4632, + "step": 35309 + }, + { + "epoch": 0.9695222405271828, + "grad_norm": 0.3828819692134857, + "learning_rate": 1.050121814996129e-05, + "loss": 0.4544, + "step": 35310 + }, + { + "epoch": 0.9695496979681494, + "grad_norm": 0.4011395275592804, + "learning_rate": 1.0500786798927098e-05, + "loss": 0.5924, + "step": 35311 + }, + { + "epoch": 0.9695771554091158, + "grad_norm": 0.46283507347106934, + "learning_rate": 1.0500355446958784e-05, + "loss": 0.6021, + "step": 35312 + }, + { + "epoch": 0.9696046128500824, + "grad_norm": 0.5221994519233704, + "learning_rate": 1.0499924094057144e-05, + "loss": 0.4555, + "step": 35313 + }, + { + "epoch": 0.9696320702910489, + "grad_norm": 0.45823419094085693, + "learning_rate": 1.049949274022299e-05, + "loss": 0.4055, + "step": 35314 + }, + { + "epoch": 0.9696595277320154, + "grad_norm": 0.36934179067611694, + "learning_rate": 1.049906138545712e-05, + "loss": 0.4038, + "step": 35315 + }, + { + "epoch": 0.9696869851729819, + "grad_norm": 0.3776303827762604, + "learning_rate": 1.049863002976034e-05, + "loss": 0.5189, + "step": 35316 + }, + { + "epoch": 0.9697144426139483, + "grad_norm": 0.42835167050361633, + "learning_rate": 1.049819867313346e-05, + "loss": 0.4966, + "step": 35317 + }, + { + "epoch": 0.9697419000549149, + "grad_norm": 0.377970814704895, + "learning_rate": 1.0497767315577274e-05, + "loss": 0.4941, + "step": 35318 + }, + { + "epoch": 0.9697693574958813, + "grad_norm": 0.4862080216407776, + "learning_rate": 1.0497335957092598e-05, + "loss": 0.5093, + "step": 35319 + }, + { + "epoch": 0.9697968149368479, + "grad_norm": 0.3646729290485382, + "learning_rate": 1.0496904597680233e-05, + "loss": 0.465, + "step": 35320 + }, + { + "epoch": 0.9698242723778144, + "grad_norm": 0.4391828179359436, + "learning_rate": 1.0496473237340978e-05, + "loss": 0.483, + "step": 35321 + }, + { + "epoch": 0.9698517298187809, + "grad_norm": 0.38260188698768616, + "learning_rate": 1.0496041876075645e-05, + "loss": 0.5498, + "step": 35322 + }, + { + "epoch": 0.9698791872597474, + "grad_norm": 0.3941715359687805, + "learning_rate": 1.0495610513885034e-05, + "loss": 0.4861, + "step": 35323 + }, + { + "epoch": 0.9699066447007139, + "grad_norm": 0.39455848932266235, + "learning_rate": 1.0495179150769952e-05, + "loss": 0.4654, + "step": 35324 + }, + { + "epoch": 0.9699341021416804, + "grad_norm": 0.38729339838027954, + "learning_rate": 1.0494747786731203e-05, + "loss": 0.547, + "step": 35325 + }, + { + "epoch": 0.9699615595826468, + "grad_norm": 0.41798803210258484, + "learning_rate": 1.0494316421769591e-05, + "loss": 0.5022, + "step": 35326 + }, + { + "epoch": 0.9699890170236134, + "grad_norm": 0.4365840256214142, + "learning_rate": 1.049388505588592e-05, + "loss": 0.5822, + "step": 35327 + }, + { + "epoch": 0.97001647446458, + "grad_norm": 0.38248059153556824, + "learning_rate": 1.0493453689081e-05, + "loss": 0.4805, + "step": 35328 + }, + { + "epoch": 0.9700439319055464, + "grad_norm": 0.3681471049785614, + "learning_rate": 1.0493022321355624e-05, + "loss": 0.4746, + "step": 35329 + }, + { + "epoch": 0.9700713893465129, + "grad_norm": 0.353360652923584, + "learning_rate": 1.049259095271061e-05, + "loss": 0.4517, + "step": 35330 + }, + { + "epoch": 0.9700988467874794, + "grad_norm": 0.40918052196502686, + "learning_rate": 1.0492159583146753e-05, + "loss": 0.4803, + "step": 35331 + }, + { + "epoch": 0.9701263042284459, + "grad_norm": 0.447299063205719, + "learning_rate": 1.0491728212664865e-05, + "loss": 0.5057, + "step": 35332 + }, + { + "epoch": 0.9701537616694124, + "grad_norm": 0.37964752316474915, + "learning_rate": 1.0491296841265745e-05, + "loss": 0.4548, + "step": 35333 + }, + { + "epoch": 0.9701812191103789, + "grad_norm": 0.9088616967201233, + "learning_rate": 1.04908654689502e-05, + "loss": 0.5351, + "step": 35334 + }, + { + "epoch": 0.9702086765513455, + "grad_norm": 0.43339815735816956, + "learning_rate": 1.0490434095719033e-05, + "loss": 0.5219, + "step": 35335 + }, + { + "epoch": 0.9702361339923119, + "grad_norm": 0.3695591986179352, + "learning_rate": 1.049000272157305e-05, + "loss": 0.3895, + "step": 35336 + }, + { + "epoch": 0.9702635914332784, + "grad_norm": 0.35835427045822144, + "learning_rate": 1.0489571346513057e-05, + "loss": 0.4565, + "step": 35337 + }, + { + "epoch": 0.9702910488742449, + "grad_norm": 0.4142257571220398, + "learning_rate": 1.0489139970539859e-05, + "loss": 0.4117, + "step": 35338 + }, + { + "epoch": 0.9703185063152114, + "grad_norm": 0.44836097955703735, + "learning_rate": 1.0488708593654253e-05, + "loss": 0.5355, + "step": 35339 + }, + { + "epoch": 0.9703459637561779, + "grad_norm": 0.39956575632095337, + "learning_rate": 1.0488277215857054e-05, + "loss": 0.4669, + "step": 35340 + }, + { + "epoch": 0.9703734211971444, + "grad_norm": 0.3683744966983795, + "learning_rate": 1.0487845837149062e-05, + "loss": 0.4581, + "step": 35341 + }, + { + "epoch": 0.970400878638111, + "grad_norm": 0.4864336848258972, + "learning_rate": 1.048741445753108e-05, + "loss": 0.5039, + "step": 35342 + }, + { + "epoch": 0.9704283360790774, + "grad_norm": 0.37478581070899963, + "learning_rate": 1.0486983077003916e-05, + "loss": 0.4951, + "step": 35343 + }, + { + "epoch": 0.970455793520044, + "grad_norm": 0.3930501937866211, + "learning_rate": 1.0486551695568372e-05, + "loss": 0.4844, + "step": 35344 + }, + { + "epoch": 0.9704832509610104, + "grad_norm": 0.4828488826751709, + "learning_rate": 1.0486120313225255e-05, + "loss": 0.4417, + "step": 35345 + }, + { + "epoch": 0.970510708401977, + "grad_norm": 0.44286757707595825, + "learning_rate": 1.0485688929975368e-05, + "loss": 0.5165, + "step": 35346 + }, + { + "epoch": 0.9705381658429434, + "grad_norm": 0.5261597037315369, + "learning_rate": 1.0485257545819514e-05, + "loss": 0.4865, + "step": 35347 + }, + { + "epoch": 0.9705656232839099, + "grad_norm": 0.4111712574958801, + "learning_rate": 1.0484826160758503e-05, + "loss": 0.5037, + "step": 35348 + }, + { + "epoch": 0.9705930807248765, + "grad_norm": 0.5316330194473267, + "learning_rate": 1.0484394774793134e-05, + "loss": 0.5375, + "step": 35349 + }, + { + "epoch": 0.9706205381658429, + "grad_norm": 0.35026755928993225, + "learning_rate": 1.0483963387924216e-05, + "loss": 0.4055, + "step": 35350 + }, + { + "epoch": 0.9706479956068095, + "grad_norm": 0.41622406244277954, + "learning_rate": 1.0483532000152552e-05, + "loss": 0.5322, + "step": 35351 + }, + { + "epoch": 0.9706754530477759, + "grad_norm": 0.41895249485969543, + "learning_rate": 1.0483100611478944e-05, + "loss": 0.5284, + "step": 35352 + }, + { + "epoch": 0.9707029104887425, + "grad_norm": 0.3625195324420929, + "learning_rate": 1.0482669221904203e-05, + "loss": 0.415, + "step": 35353 + }, + { + "epoch": 0.9707303679297089, + "grad_norm": 0.4268939793109894, + "learning_rate": 1.0482237831429125e-05, + "loss": 0.4894, + "step": 35354 + }, + { + "epoch": 0.9707578253706755, + "grad_norm": 0.3716507852077484, + "learning_rate": 1.0481806440054522e-05, + "loss": 0.4924, + "step": 35355 + }, + { + "epoch": 0.970785282811642, + "grad_norm": 0.8084063529968262, + "learning_rate": 1.0481375047781199e-05, + "loss": 0.576, + "step": 35356 + }, + { + "epoch": 0.9708127402526084, + "grad_norm": 0.48432406783103943, + "learning_rate": 1.0480943654609954e-05, + "loss": 0.4939, + "step": 35357 + }, + { + "epoch": 0.970840197693575, + "grad_norm": 0.41810891032218933, + "learning_rate": 1.0480512260541597e-05, + "loss": 0.4876, + "step": 35358 + }, + { + "epoch": 0.9708676551345414, + "grad_norm": 0.37597423791885376, + "learning_rate": 1.0480080865576931e-05, + "loss": 0.4973, + "step": 35359 + }, + { + "epoch": 0.970895112575508, + "grad_norm": 0.390659362077713, + "learning_rate": 1.0479649469716762e-05, + "loss": 0.4367, + "step": 35360 + }, + { + "epoch": 0.9709225700164744, + "grad_norm": 0.3939586281776428, + "learning_rate": 1.0479218072961892e-05, + "loss": 0.4013, + "step": 35361 + }, + { + "epoch": 0.970950027457441, + "grad_norm": 0.4134536683559418, + "learning_rate": 1.0478786675313128e-05, + "loss": 0.4821, + "step": 35362 + }, + { + "epoch": 0.9709774848984075, + "grad_norm": 0.40027040243148804, + "learning_rate": 1.0478355276771275e-05, + "loss": 0.4297, + "step": 35363 + }, + { + "epoch": 0.971004942339374, + "grad_norm": 0.4115735590457916, + "learning_rate": 1.0477923877337135e-05, + "loss": 0.4577, + "step": 35364 + }, + { + "epoch": 0.9710323997803405, + "grad_norm": 0.427608847618103, + "learning_rate": 1.0477492477011514e-05, + "loss": 0.5014, + "step": 35365 + }, + { + "epoch": 0.9710598572213069, + "grad_norm": 0.36481326818466187, + "learning_rate": 1.047706107579522e-05, + "loss": 0.5228, + "step": 35366 + }, + { + "epoch": 0.9710873146622735, + "grad_norm": 0.3926599621772766, + "learning_rate": 1.0476629673689052e-05, + "loss": 0.5393, + "step": 35367 + }, + { + "epoch": 0.9711147721032399, + "grad_norm": 0.37156835198402405, + "learning_rate": 1.047619827069382e-05, + "loss": 0.5211, + "step": 35368 + }, + { + "epoch": 0.9711422295442065, + "grad_norm": 0.3369171917438507, + "learning_rate": 1.0475766866810326e-05, + "loss": 0.4539, + "step": 35369 + }, + { + "epoch": 0.971169686985173, + "grad_norm": 0.3951667547225952, + "learning_rate": 1.0475335462039373e-05, + "loss": 0.4646, + "step": 35370 + }, + { + "epoch": 0.9711971444261395, + "grad_norm": 0.46727484464645386, + "learning_rate": 1.047490405638177e-05, + "loss": 0.4513, + "step": 35371 + }, + { + "epoch": 0.971224601867106, + "grad_norm": 0.44600722193717957, + "learning_rate": 1.0474472649838318e-05, + "loss": 0.46, + "step": 35372 + }, + { + "epoch": 0.9712520593080725, + "grad_norm": 0.3673432171344757, + "learning_rate": 1.047404124240982e-05, + "loss": 0.4174, + "step": 35373 + }, + { + "epoch": 0.971279516749039, + "grad_norm": 0.44221073389053345, + "learning_rate": 1.047360983409709e-05, + "loss": 0.5845, + "step": 35374 + }, + { + "epoch": 0.9713069741900054, + "grad_norm": 0.39678990840911865, + "learning_rate": 1.047317842490092e-05, + "loss": 0.4527, + "step": 35375 + }, + { + "epoch": 0.971334431630972, + "grad_norm": 0.4277998208999634, + "learning_rate": 1.0472747014822127e-05, + "loss": 0.4347, + "step": 35376 + }, + { + "epoch": 0.9713618890719385, + "grad_norm": 0.3877805769443512, + "learning_rate": 1.0472315603861507e-05, + "loss": 0.4699, + "step": 35377 + }, + { + "epoch": 0.971389346512905, + "grad_norm": 0.36032816767692566, + "learning_rate": 1.0471884192019867e-05, + "loss": 0.4557, + "step": 35378 + }, + { + "epoch": 0.9714168039538715, + "grad_norm": 0.40084442496299744, + "learning_rate": 1.0471452779298015e-05, + "loss": 0.4652, + "step": 35379 + }, + { + "epoch": 0.971444261394838, + "grad_norm": 0.3588332533836365, + "learning_rate": 1.047102136569675e-05, + "loss": 0.4735, + "step": 35380 + }, + { + "epoch": 0.9714717188358045, + "grad_norm": 0.3769461512565613, + "learning_rate": 1.0470589951216883e-05, + "loss": 0.4502, + "step": 35381 + }, + { + "epoch": 0.971499176276771, + "grad_norm": 0.41019707918167114, + "learning_rate": 1.0470158535859214e-05, + "loss": 0.4452, + "step": 35382 + }, + { + "epoch": 0.9715266337177375, + "grad_norm": 0.3764742910861969, + "learning_rate": 1.0469727119624548e-05, + "loss": 0.4747, + "step": 35383 + }, + { + "epoch": 0.9715540911587041, + "grad_norm": 0.38040587306022644, + "learning_rate": 1.0469295702513693e-05, + "loss": 0.4518, + "step": 35384 + }, + { + "epoch": 0.9715815485996705, + "grad_norm": 0.38592129945755005, + "learning_rate": 1.0468864284527451e-05, + "loss": 0.4655, + "step": 35385 + }, + { + "epoch": 0.971609006040637, + "grad_norm": 0.35410210490226746, + "learning_rate": 1.0468432865666627e-05, + "loss": 0.4987, + "step": 35386 + }, + { + "epoch": 0.9716364634816035, + "grad_norm": 0.621239960193634, + "learning_rate": 1.0468001445932028e-05, + "loss": 0.4679, + "step": 35387 + }, + { + "epoch": 0.97166392092257, + "grad_norm": 0.40800392627716064, + "learning_rate": 1.0467570025324455e-05, + "loss": 0.4619, + "step": 35388 + }, + { + "epoch": 0.9716913783635365, + "grad_norm": 0.4141329824924469, + "learning_rate": 1.0467138603844715e-05, + "loss": 0.468, + "step": 35389 + }, + { + "epoch": 0.971718835804503, + "grad_norm": 0.41259297728538513, + "learning_rate": 1.0466707181493614e-05, + "loss": 0.4847, + "step": 35390 + }, + { + "epoch": 0.9717462932454696, + "grad_norm": 0.41452136635780334, + "learning_rate": 1.0466275758271953e-05, + "loss": 0.5279, + "step": 35391 + }, + { + "epoch": 0.971773750686436, + "grad_norm": 0.3620988726615906, + "learning_rate": 1.046584433418054e-05, + "loss": 0.4825, + "step": 35392 + }, + { + "epoch": 0.9718012081274026, + "grad_norm": 0.4282592833042145, + "learning_rate": 1.0465412909220176e-05, + "loss": 0.4838, + "step": 35393 + }, + { + "epoch": 0.971828665568369, + "grad_norm": 0.3774849474430084, + "learning_rate": 1.0464981483391671e-05, + "loss": 0.4821, + "step": 35394 + }, + { + "epoch": 0.9718561230093355, + "grad_norm": 0.42511358857154846, + "learning_rate": 1.046455005669583e-05, + "loss": 0.5358, + "step": 35395 + }, + { + "epoch": 0.971883580450302, + "grad_norm": 0.4402100741863251, + "learning_rate": 1.046411862913345e-05, + "loss": 0.5471, + "step": 35396 + }, + { + "epoch": 0.9719110378912685, + "grad_norm": 0.4291347563266754, + "learning_rate": 1.0463687200705343e-05, + "loss": 0.4387, + "step": 35397 + }, + { + "epoch": 0.9719384953322351, + "grad_norm": 0.3888815939426422, + "learning_rate": 1.0463255771412311e-05, + "loss": 0.5291, + "step": 35398 + }, + { + "epoch": 0.9719659527732015, + "grad_norm": 0.3810955286026001, + "learning_rate": 1.0462824341255158e-05, + "loss": 0.5237, + "step": 35399 + }, + { + "epoch": 0.9719934102141681, + "grad_norm": 0.38539084792137146, + "learning_rate": 1.0462392910234692e-05, + "loss": 0.5618, + "step": 35400 + }, + { + "epoch": 0.9720208676551345, + "grad_norm": 0.4059462249279022, + "learning_rate": 1.0461961478351712e-05, + "loss": 0.4944, + "step": 35401 + }, + { + "epoch": 0.9720483250961011, + "grad_norm": 0.40650659799575806, + "learning_rate": 1.046153004560703e-05, + "loss": 0.5987, + "step": 35402 + }, + { + "epoch": 0.9720757825370675, + "grad_norm": 0.44722166657447815, + "learning_rate": 1.0461098612001449e-05, + "loss": 0.4993, + "step": 35403 + }, + { + "epoch": 0.972103239978034, + "grad_norm": 0.4423178732395172, + "learning_rate": 1.0460667177535766e-05, + "loss": 0.4215, + "step": 35404 + }, + { + "epoch": 0.9721306974190006, + "grad_norm": 0.47665658593177795, + "learning_rate": 1.0460235742210797e-05, + "loss": 0.4136, + "step": 35405 + }, + { + "epoch": 0.972158154859967, + "grad_norm": 0.4222513437271118, + "learning_rate": 1.0459804306027338e-05, + "loss": 0.5032, + "step": 35406 + }, + { + "epoch": 0.9721856123009336, + "grad_norm": 0.4050971567630768, + "learning_rate": 1.0459372868986197e-05, + "loss": 0.4598, + "step": 35407 + }, + { + "epoch": 0.9722130697419, + "grad_norm": 0.391973614692688, + "learning_rate": 1.045894143108818e-05, + "loss": 0.5774, + "step": 35408 + }, + { + "epoch": 0.9722405271828666, + "grad_norm": 0.5274940133094788, + "learning_rate": 1.0458509992334092e-05, + "loss": 0.5961, + "step": 35409 + }, + { + "epoch": 0.972267984623833, + "grad_norm": 0.5191110372543335, + "learning_rate": 1.0458078552724735e-05, + "loss": 0.4117, + "step": 35410 + }, + { + "epoch": 0.9722954420647996, + "grad_norm": 0.382641464471817, + "learning_rate": 1.0457647112260915e-05, + "loss": 0.4512, + "step": 35411 + }, + { + "epoch": 0.9723228995057661, + "grad_norm": 0.4201924502849579, + "learning_rate": 1.0457215670943439e-05, + "loss": 0.4548, + "step": 35412 + }, + { + "epoch": 0.9723503569467326, + "grad_norm": 0.398602694272995, + "learning_rate": 1.0456784228773109e-05, + "loss": 0.4804, + "step": 35413 + }, + { + "epoch": 0.9723778143876991, + "grad_norm": 0.3306635320186615, + "learning_rate": 1.045635278575073e-05, + "loss": 0.4602, + "step": 35414 + }, + { + "epoch": 0.9724052718286655, + "grad_norm": 0.3934793174266815, + "learning_rate": 1.0455921341877109e-05, + "loss": 0.5047, + "step": 35415 + }, + { + "epoch": 0.9724327292696321, + "grad_norm": 0.3212246298789978, + "learning_rate": 1.0455489897153045e-05, + "loss": 0.3287, + "step": 35416 + }, + { + "epoch": 0.9724601867105985, + "grad_norm": 0.4272630214691162, + "learning_rate": 1.0455058451579352e-05, + "loss": 0.4695, + "step": 35417 + }, + { + "epoch": 0.9724876441515651, + "grad_norm": 0.38015344738960266, + "learning_rate": 1.0454627005156828e-05, + "loss": 0.5063, + "step": 35418 + }, + { + "epoch": 0.9725151015925316, + "grad_norm": 0.41025152802467346, + "learning_rate": 1.0454195557886277e-05, + "loss": 0.4949, + "step": 35419 + }, + { + "epoch": 0.9725425590334981, + "grad_norm": 0.3468673825263977, + "learning_rate": 1.0453764109768509e-05, + "loss": 0.4876, + "step": 35420 + }, + { + "epoch": 0.9725700164744646, + "grad_norm": 0.41677606105804443, + "learning_rate": 1.0453332660804327e-05, + "loss": 0.5154, + "step": 35421 + }, + { + "epoch": 0.972597473915431, + "grad_norm": 0.4540795385837555, + "learning_rate": 1.0452901210994532e-05, + "loss": 0.6024, + "step": 35422 + }, + { + "epoch": 0.9726249313563976, + "grad_norm": 0.35282301902770996, + "learning_rate": 1.0452469760339935e-05, + "loss": 0.4304, + "step": 35423 + }, + { + "epoch": 0.972652388797364, + "grad_norm": 0.3482348322868347, + "learning_rate": 1.0452038308841335e-05, + "loss": 0.5325, + "step": 35424 + }, + { + "epoch": 0.9726798462383306, + "grad_norm": 0.4009857177734375, + "learning_rate": 1.0451606856499541e-05, + "loss": 0.4892, + "step": 35425 + }, + { + "epoch": 0.9727073036792971, + "grad_norm": 0.3899390399456024, + "learning_rate": 1.0451175403315355e-05, + "loss": 0.5077, + "step": 35426 + }, + { + "epoch": 0.9727347611202636, + "grad_norm": 0.38577014207839966, + "learning_rate": 1.0450743949289581e-05, + "loss": 0.5133, + "step": 35427 + }, + { + "epoch": 0.9727622185612301, + "grad_norm": 0.5092644095420837, + "learning_rate": 1.045031249442303e-05, + "loss": 0.5407, + "step": 35428 + }, + { + "epoch": 0.9727896760021966, + "grad_norm": 0.3680839240550995, + "learning_rate": 1.0449881038716498e-05, + "loss": 0.4855, + "step": 35429 + }, + { + "epoch": 0.9728171334431631, + "grad_norm": 0.34423157572746277, + "learning_rate": 1.0449449582170796e-05, + "loss": 0.4315, + "step": 35430 + }, + { + "epoch": 0.9728445908841296, + "grad_norm": 0.4277671277523041, + "learning_rate": 1.0449018124786729e-05, + "loss": 0.4939, + "step": 35431 + }, + { + "epoch": 0.9728720483250961, + "grad_norm": 0.37755700945854187, + "learning_rate": 1.0448586666565097e-05, + "loss": 0.3719, + "step": 35432 + }, + { + "epoch": 0.9728995057660627, + "grad_norm": 0.45846495032310486, + "learning_rate": 1.0448155207506709e-05, + "loss": 0.4881, + "step": 35433 + }, + { + "epoch": 0.9729269632070291, + "grad_norm": 0.4090428054332733, + "learning_rate": 1.044772374761237e-05, + "loss": 0.4847, + "step": 35434 + }, + { + "epoch": 0.9729544206479956, + "grad_norm": 0.39559710025787354, + "learning_rate": 1.0447292286882882e-05, + "loss": 0.4499, + "step": 35435 + }, + { + "epoch": 0.9729818780889621, + "grad_norm": 0.3551698327064514, + "learning_rate": 1.0446860825319051e-05, + "loss": 0.4736, + "step": 35436 + }, + { + "epoch": 0.9730093355299286, + "grad_norm": 0.3916212022304535, + "learning_rate": 1.0446429362921681e-05, + "loss": 0.4328, + "step": 35437 + }, + { + "epoch": 0.9730367929708951, + "grad_norm": 0.37326064705848694, + "learning_rate": 1.044599789969158e-05, + "loss": 0.472, + "step": 35438 + }, + { + "epoch": 0.9730642504118616, + "grad_norm": 0.42463308572769165, + "learning_rate": 1.044556643562955e-05, + "loss": 0.4637, + "step": 35439 + }, + { + "epoch": 0.9730917078528282, + "grad_norm": 4.449949264526367, + "learning_rate": 1.0445134970736394e-05, + "loss": 0.4832, + "step": 35440 + }, + { + "epoch": 0.9731191652937946, + "grad_norm": 0.4110054075717926, + "learning_rate": 1.044470350501292e-05, + "loss": 0.4848, + "step": 35441 + }, + { + "epoch": 0.9731466227347612, + "grad_norm": 0.40774405002593994, + "learning_rate": 1.0444272038459933e-05, + "loss": 0.5706, + "step": 35442 + }, + { + "epoch": 0.9731740801757276, + "grad_norm": 0.3579520285129547, + "learning_rate": 1.0443840571078238e-05, + "loss": 0.4399, + "step": 35443 + }, + { + "epoch": 0.9732015376166941, + "grad_norm": 0.41052699089050293, + "learning_rate": 1.0443409102868638e-05, + "loss": 0.4693, + "step": 35444 + }, + { + "epoch": 0.9732289950576606, + "grad_norm": 0.5138821601867676, + "learning_rate": 1.0442977633831937e-05, + "loss": 0.5236, + "step": 35445 + }, + { + "epoch": 0.9732564524986271, + "grad_norm": 0.4262307584285736, + "learning_rate": 1.0442546163968942e-05, + "loss": 0.531, + "step": 35446 + }, + { + "epoch": 0.9732839099395937, + "grad_norm": 0.3664458990097046, + "learning_rate": 1.0442114693280456e-05, + "loss": 0.4321, + "step": 35447 + }, + { + "epoch": 0.9733113673805601, + "grad_norm": 0.4016006886959076, + "learning_rate": 1.0441683221767287e-05, + "loss": 0.4855, + "step": 35448 + }, + { + "epoch": 0.9733388248215267, + "grad_norm": 0.3579981029033661, + "learning_rate": 1.0441251749430239e-05, + "loss": 0.4541, + "step": 35449 + }, + { + "epoch": 0.9733662822624931, + "grad_norm": 0.3864275813102722, + "learning_rate": 1.0440820276270112e-05, + "loss": 0.4886, + "step": 35450 + }, + { + "epoch": 0.9733937397034597, + "grad_norm": 0.3712902069091797, + "learning_rate": 1.0440388802287718e-05, + "loss": 0.5047, + "step": 35451 + }, + { + "epoch": 0.9734211971444261, + "grad_norm": 0.4008288085460663, + "learning_rate": 1.0439957327483855e-05, + "loss": 0.5125, + "step": 35452 + }, + { + "epoch": 0.9734486545853926, + "grad_norm": 0.3411776125431061, + "learning_rate": 1.0439525851859333e-05, + "loss": 0.4019, + "step": 35453 + }, + { + "epoch": 0.9734761120263592, + "grad_norm": 0.41069066524505615, + "learning_rate": 1.0439094375414953e-05, + "loss": 0.5073, + "step": 35454 + }, + { + "epoch": 0.9735035694673256, + "grad_norm": 0.3839914798736572, + "learning_rate": 1.0438662898151526e-05, + "loss": 0.4277, + "step": 35455 + }, + { + "epoch": 0.9735310269082922, + "grad_norm": 0.4457632601261139, + "learning_rate": 1.0438231420069852e-05, + "loss": 0.5826, + "step": 35456 + }, + { + "epoch": 0.9735584843492586, + "grad_norm": 0.369057297706604, + "learning_rate": 1.0437799941170731e-05, + "loss": 0.4477, + "step": 35457 + }, + { + "epoch": 0.9735859417902252, + "grad_norm": 0.3898696303367615, + "learning_rate": 1.043736846145498e-05, + "loss": 0.4929, + "step": 35458 + }, + { + "epoch": 0.9736133992311916, + "grad_norm": 0.4229927361011505, + "learning_rate": 1.0436936980923393e-05, + "loss": 0.4832, + "step": 35459 + }, + { + "epoch": 0.9736408566721582, + "grad_norm": 0.4249531030654907, + "learning_rate": 1.043650549957678e-05, + "loss": 0.6034, + "step": 35460 + }, + { + "epoch": 0.9736683141131247, + "grad_norm": 0.4411903917789459, + "learning_rate": 1.0436074017415947e-05, + "loss": 0.4941, + "step": 35461 + }, + { + "epoch": 0.9736957715540911, + "grad_norm": 0.6471553444862366, + "learning_rate": 1.0435642534441695e-05, + "loss": 0.5333, + "step": 35462 + }, + { + "epoch": 0.9737232289950577, + "grad_norm": 0.4284100830554962, + "learning_rate": 1.0435211050654832e-05, + "loss": 0.5512, + "step": 35463 + }, + { + "epoch": 0.9737506864360241, + "grad_norm": 0.3816041052341461, + "learning_rate": 1.0434779566056159e-05, + "loss": 0.5308, + "step": 35464 + }, + { + "epoch": 0.9737781438769907, + "grad_norm": 0.4088088572025299, + "learning_rate": 1.0434348080646485e-05, + "loss": 0.5234, + "step": 35465 + }, + { + "epoch": 0.9738056013179571, + "grad_norm": 0.3623411953449249, + "learning_rate": 1.0433916594426612e-05, + "loss": 0.4233, + "step": 35466 + }, + { + "epoch": 0.9738330587589237, + "grad_norm": 0.3964916467666626, + "learning_rate": 1.0433485107397347e-05, + "loss": 0.4772, + "step": 35467 + }, + { + "epoch": 0.9738605161998902, + "grad_norm": 0.3861945569515228, + "learning_rate": 1.0433053619559496e-05, + "loss": 0.5036, + "step": 35468 + }, + { + "epoch": 0.9738879736408567, + "grad_norm": 1.1954463720321655, + "learning_rate": 1.0432622130913859e-05, + "loss": 0.5544, + "step": 35469 + }, + { + "epoch": 0.9739154310818232, + "grad_norm": 0.3810594081878662, + "learning_rate": 1.0432190641461243e-05, + "loss": 0.4936, + "step": 35470 + }, + { + "epoch": 0.9739428885227897, + "grad_norm": 1.0837926864624023, + "learning_rate": 1.0431759151202454e-05, + "loss": 0.4146, + "step": 35471 + }, + { + "epoch": 0.9739703459637562, + "grad_norm": 0.4835358262062073, + "learning_rate": 1.0431327660138299e-05, + "loss": 0.5289, + "step": 35472 + }, + { + "epoch": 0.9739978034047226, + "grad_norm": 0.6596868634223938, + "learning_rate": 1.043089616826958e-05, + "loss": 0.499, + "step": 35473 + }, + { + "epoch": 0.9740252608456892, + "grad_norm": 0.37806734442710876, + "learning_rate": 1.0430464675597098e-05, + "loss": 0.4721, + "step": 35474 + }, + { + "epoch": 0.9740527182866557, + "grad_norm": 0.4226413071155548, + "learning_rate": 1.0430033182121666e-05, + "loss": 0.4962, + "step": 35475 + }, + { + "epoch": 0.9740801757276222, + "grad_norm": 0.35174471139907837, + "learning_rate": 1.0429601687844083e-05, + "loss": 0.4699, + "step": 35476 + }, + { + "epoch": 0.9741076331685887, + "grad_norm": 0.36453482508659363, + "learning_rate": 1.0429170192765157e-05, + "loss": 0.4962, + "step": 35477 + }, + { + "epoch": 0.9741350906095552, + "grad_norm": 0.4542052745819092, + "learning_rate": 1.0428738696885691e-05, + "loss": 0.5278, + "step": 35478 + }, + { + "epoch": 0.9741625480505217, + "grad_norm": 0.4061007499694824, + "learning_rate": 1.0428307200206488e-05, + "loss": 0.5051, + "step": 35479 + }, + { + "epoch": 0.9741900054914882, + "grad_norm": 0.46456584334373474, + "learning_rate": 1.0427875702728361e-05, + "loss": 0.4781, + "step": 35480 + }, + { + "epoch": 0.9742174629324547, + "grad_norm": 0.3900653123855591, + "learning_rate": 1.0427444204452105e-05, + "loss": 0.5493, + "step": 35481 + }, + { + "epoch": 0.9742449203734213, + "grad_norm": 0.39813268184661865, + "learning_rate": 1.042701270537853e-05, + "loss": 0.6131, + "step": 35482 + }, + { + "epoch": 0.9742723778143877, + "grad_norm": 0.40616950392723083, + "learning_rate": 1.0426581205508442e-05, + "loss": 0.489, + "step": 35483 + }, + { + "epoch": 0.9742998352553542, + "grad_norm": 0.3851638436317444, + "learning_rate": 1.0426149704842638e-05, + "loss": 0.4657, + "step": 35484 + }, + { + "epoch": 0.9743272926963207, + "grad_norm": 0.3745310604572296, + "learning_rate": 1.0425718203381937e-05, + "loss": 0.5329, + "step": 35485 + }, + { + "epoch": 0.9743547501372872, + "grad_norm": 0.4325345456600189, + "learning_rate": 1.042528670112713e-05, + "loss": 0.4653, + "step": 35486 + }, + { + "epoch": 0.9743822075782537, + "grad_norm": 0.40713319182395935, + "learning_rate": 1.0424855198079028e-05, + "loss": 0.5605, + "step": 35487 + }, + { + "epoch": 0.9744096650192202, + "grad_norm": 0.45896539092063904, + "learning_rate": 1.0424423694238437e-05, + "loss": 0.5148, + "step": 35488 + }, + { + "epoch": 0.9744371224601867, + "grad_norm": 0.4434948265552521, + "learning_rate": 1.042399218960616e-05, + "loss": 0.5338, + "step": 35489 + }, + { + "epoch": 0.9744645799011532, + "grad_norm": 0.4000347852706909, + "learning_rate": 1.0423560684183003e-05, + "loss": 0.466, + "step": 35490 + }, + { + "epoch": 0.9744920373421198, + "grad_norm": 0.46256157755851746, + "learning_rate": 1.0423129177969767e-05, + "loss": 0.5897, + "step": 35491 + }, + { + "epoch": 0.9745194947830862, + "grad_norm": 0.44732221961021423, + "learning_rate": 1.0422697670967264e-05, + "loss": 0.5327, + "step": 35492 + }, + { + "epoch": 0.9745469522240527, + "grad_norm": 0.3578735888004303, + "learning_rate": 1.0422266163176293e-05, + "loss": 0.4635, + "step": 35493 + }, + { + "epoch": 0.9745744096650192, + "grad_norm": 0.42395085096359253, + "learning_rate": 1.042183465459766e-05, + "loss": 0.4467, + "step": 35494 + }, + { + "epoch": 0.9746018671059857, + "grad_norm": 0.35897156596183777, + "learning_rate": 1.0421403145232172e-05, + "loss": 0.3549, + "step": 35495 + }, + { + "epoch": 0.9746293245469522, + "grad_norm": 0.39451852440834045, + "learning_rate": 1.0420971635080635e-05, + "loss": 0.4561, + "step": 35496 + }, + { + "epoch": 0.9746567819879187, + "grad_norm": 0.39227989315986633, + "learning_rate": 1.0420540124143848e-05, + "loss": 0.5153, + "step": 35497 + }, + { + "epoch": 0.9746842394288853, + "grad_norm": 0.4376016855239868, + "learning_rate": 1.042010861242262e-05, + "loss": 0.4188, + "step": 35498 + }, + { + "epoch": 0.9747116968698517, + "grad_norm": 0.4387200176715851, + "learning_rate": 1.0419677099917755e-05, + "loss": 0.5042, + "step": 35499 + }, + { + "epoch": 0.9747391543108183, + "grad_norm": 0.3767997920513153, + "learning_rate": 1.041924558663006e-05, + "loss": 0.4181, + "step": 35500 + }, + { + "epoch": 0.9747666117517847, + "grad_norm": 0.3659318685531616, + "learning_rate": 1.0418814072560336e-05, + "loss": 0.4667, + "step": 35501 + }, + { + "epoch": 0.9747940691927512, + "grad_norm": 0.4739714562892914, + "learning_rate": 1.0418382557709392e-05, + "loss": 0.5227, + "step": 35502 + }, + { + "epoch": 0.9748215266337177, + "grad_norm": 0.392075777053833, + "learning_rate": 1.0417951042078032e-05, + "loss": 0.5335, + "step": 35503 + }, + { + "epoch": 0.9748489840746842, + "grad_norm": 0.37919655442237854, + "learning_rate": 1.0417519525667055e-05, + "loss": 0.4502, + "step": 35504 + }, + { + "epoch": 0.9748764415156508, + "grad_norm": 0.40832674503326416, + "learning_rate": 1.0417088008477276e-05, + "loss": 0.5082, + "step": 35505 + }, + { + "epoch": 0.9749038989566172, + "grad_norm": 0.40492209792137146, + "learning_rate": 1.0416656490509493e-05, + "loss": 0.4666, + "step": 35506 + }, + { + "epoch": 0.9749313563975838, + "grad_norm": 0.37874260544776917, + "learning_rate": 1.0416224971764511e-05, + "loss": 0.4996, + "step": 35507 + }, + { + "epoch": 0.9749588138385502, + "grad_norm": 0.4097778797149658, + "learning_rate": 1.0415793452243139e-05, + "loss": 0.4006, + "step": 35508 + }, + { + "epoch": 0.9749862712795168, + "grad_norm": 0.36957693099975586, + "learning_rate": 1.0415361931946178e-05, + "loss": 0.4718, + "step": 35509 + }, + { + "epoch": 0.9750137287204832, + "grad_norm": 0.4080786406993866, + "learning_rate": 1.0414930410874435e-05, + "loss": 0.4539, + "step": 35510 + }, + { + "epoch": 0.9750411861614497, + "grad_norm": 0.38348156213760376, + "learning_rate": 1.0414498889028715e-05, + "loss": 0.4384, + "step": 35511 + }, + { + "epoch": 0.9750686436024163, + "grad_norm": 0.43673014640808105, + "learning_rate": 1.0414067366409821e-05, + "loss": 0.5203, + "step": 35512 + }, + { + "epoch": 0.9750961010433827, + "grad_norm": 0.7365438342094421, + "learning_rate": 1.0413635843018562e-05, + "loss": 0.4814, + "step": 35513 + }, + { + "epoch": 0.9751235584843493, + "grad_norm": 0.3693685829639435, + "learning_rate": 1.0413204318855736e-05, + "loss": 0.4945, + "step": 35514 + }, + { + "epoch": 0.9751510159253157, + "grad_norm": 0.3943171203136444, + "learning_rate": 1.0412772793922155e-05, + "loss": 0.4798, + "step": 35515 + }, + { + "epoch": 0.9751784733662823, + "grad_norm": 0.3828733265399933, + "learning_rate": 1.041234126821862e-05, + "loss": 0.514, + "step": 35516 + }, + { + "epoch": 0.9752059308072487, + "grad_norm": 0.3972199559211731, + "learning_rate": 1.0411909741745936e-05, + "loss": 0.4664, + "step": 35517 + }, + { + "epoch": 0.9752333882482153, + "grad_norm": 0.367360919713974, + "learning_rate": 1.041147821450491e-05, + "loss": 0.5323, + "step": 35518 + }, + { + "epoch": 0.9752608456891818, + "grad_norm": 0.35214126110076904, + "learning_rate": 1.0411046686496346e-05, + "loss": 0.3837, + "step": 35519 + }, + { + "epoch": 0.9752883031301482, + "grad_norm": 0.356715589761734, + "learning_rate": 1.0410615157721048e-05, + "loss": 0.4585, + "step": 35520 + }, + { + "epoch": 0.9753157605711148, + "grad_norm": 0.41109955310821533, + "learning_rate": 1.0410183628179821e-05, + "loss": 0.4898, + "step": 35521 + }, + { + "epoch": 0.9753432180120812, + "grad_norm": 0.3209027349948883, + "learning_rate": 1.0409752097873471e-05, + "loss": 0.4247, + "step": 35522 + }, + { + "epoch": 0.9753706754530478, + "grad_norm": 0.42218342423439026, + "learning_rate": 1.0409320566802805e-05, + "loss": 0.5165, + "step": 35523 + }, + { + "epoch": 0.9753981328940142, + "grad_norm": 0.4721704125404358, + "learning_rate": 1.0408889034968625e-05, + "loss": 0.5376, + "step": 35524 + }, + { + "epoch": 0.9754255903349808, + "grad_norm": 0.37771743535995483, + "learning_rate": 1.0408457502371734e-05, + "loss": 0.4214, + "step": 35525 + }, + { + "epoch": 0.9754530477759473, + "grad_norm": 0.3869120478630066, + "learning_rate": 1.040802596901294e-05, + "loss": 0.5191, + "step": 35526 + }, + { + "epoch": 0.9754805052169138, + "grad_norm": 0.3876940608024597, + "learning_rate": 1.040759443489305e-05, + "loss": 0.515, + "step": 35527 + }, + { + "epoch": 0.9755079626578803, + "grad_norm": 0.3877614438533783, + "learning_rate": 1.0407162900012861e-05, + "loss": 0.4541, + "step": 35528 + }, + { + "epoch": 0.9755354200988468, + "grad_norm": 0.4480344355106354, + "learning_rate": 1.0406731364373189e-05, + "loss": 0.4391, + "step": 35529 + }, + { + "epoch": 0.9755628775398133, + "grad_norm": 0.4219900667667389, + "learning_rate": 1.040629982797483e-05, + "loss": 0.4997, + "step": 35530 + }, + { + "epoch": 0.9755903349807797, + "grad_norm": 0.3863013684749603, + "learning_rate": 1.0405868290818592e-05, + "loss": 0.5303, + "step": 35531 + }, + { + "epoch": 0.9756177924217463, + "grad_norm": 0.39891764521598816, + "learning_rate": 1.0405436752905283e-05, + "loss": 0.5131, + "step": 35532 + }, + { + "epoch": 0.9756452498627128, + "grad_norm": 0.3717018663883209, + "learning_rate": 1.0405005214235701e-05, + "loss": 0.5029, + "step": 35533 + }, + { + "epoch": 0.9756727073036793, + "grad_norm": 0.3987565338611603, + "learning_rate": 1.0404573674810659e-05, + "loss": 0.4924, + "step": 35534 + }, + { + "epoch": 0.9757001647446458, + "grad_norm": 0.3658978044986725, + "learning_rate": 1.0404142134630953e-05, + "loss": 0.5103, + "step": 35535 + }, + { + "epoch": 0.9757276221856123, + "grad_norm": 0.3738052248954773, + "learning_rate": 1.0403710593697396e-05, + "loss": 0.4949, + "step": 35536 + }, + { + "epoch": 0.9757550796265788, + "grad_norm": 0.3871084153652191, + "learning_rate": 1.0403279052010791e-05, + "loss": 0.5325, + "step": 35537 + }, + { + "epoch": 0.9757825370675453, + "grad_norm": 0.45978736877441406, + "learning_rate": 1.0402847509571941e-05, + "loss": 0.4831, + "step": 35538 + }, + { + "epoch": 0.9758099945085118, + "grad_norm": 0.4115150272846222, + "learning_rate": 1.0402415966381651e-05, + "loss": 0.4754, + "step": 35539 + }, + { + "epoch": 0.9758374519494784, + "grad_norm": 0.375669926404953, + "learning_rate": 1.0401984422440726e-05, + "loss": 0.4943, + "step": 35540 + }, + { + "epoch": 0.9758649093904448, + "grad_norm": 0.43864935636520386, + "learning_rate": 1.0401552877749975e-05, + "loss": 0.4812, + "step": 35541 + }, + { + "epoch": 0.9758923668314113, + "grad_norm": 0.4595482647418976, + "learning_rate": 1.0401121332310197e-05, + "loss": 0.4688, + "step": 35542 + }, + { + "epoch": 0.9759198242723778, + "grad_norm": 0.3689318001270294, + "learning_rate": 1.04006897861222e-05, + "loss": 0.4288, + "step": 35543 + }, + { + "epoch": 0.9759472817133443, + "grad_norm": 0.3703960180282593, + "learning_rate": 1.040025823918679e-05, + "loss": 0.4995, + "step": 35544 + }, + { + "epoch": 0.9759747391543108, + "grad_norm": 0.4074922204017639, + "learning_rate": 1.039982669150477e-05, + "loss": 0.4971, + "step": 35545 + }, + { + "epoch": 0.9760021965952773, + "grad_norm": 0.416729599237442, + "learning_rate": 1.0399395143076942e-05, + "loss": 0.531, + "step": 35546 + }, + { + "epoch": 0.9760296540362439, + "grad_norm": 0.43740129470825195, + "learning_rate": 1.039896359390412e-05, + "loss": 0.5094, + "step": 35547 + }, + { + "epoch": 0.9760571114772103, + "grad_norm": 0.4539346396923065, + "learning_rate": 1.03985320439871e-05, + "loss": 0.4478, + "step": 35548 + }, + { + "epoch": 0.9760845689181769, + "grad_norm": 0.3982706367969513, + "learning_rate": 1.0398100493326693e-05, + "loss": 0.5407, + "step": 35549 + }, + { + "epoch": 0.9761120263591433, + "grad_norm": 0.3888145983219147, + "learning_rate": 1.0397668941923702e-05, + "loss": 0.4899, + "step": 35550 + }, + { + "epoch": 0.9761394838001098, + "grad_norm": 0.4596226215362549, + "learning_rate": 1.039723738977893e-05, + "loss": 0.5656, + "step": 35551 + }, + { + "epoch": 0.9761669412410763, + "grad_norm": 0.4190945625305176, + "learning_rate": 1.0396805836893182e-05, + "loss": 0.4811, + "step": 35552 + }, + { + "epoch": 0.9761943986820428, + "grad_norm": 0.3958438038825989, + "learning_rate": 1.0396374283267265e-05, + "loss": 0.533, + "step": 35553 + }, + { + "epoch": 0.9762218561230094, + "grad_norm": 0.5106979608535767, + "learning_rate": 1.0395942728901985e-05, + "loss": 0.4534, + "step": 35554 + }, + { + "epoch": 0.9762493135639758, + "grad_norm": 0.4133162796497345, + "learning_rate": 1.0395511173798144e-05, + "loss": 0.4746, + "step": 35555 + }, + { + "epoch": 0.9762767710049424, + "grad_norm": 0.38522419333457947, + "learning_rate": 1.039507961795655e-05, + "loss": 0.4734, + "step": 35556 + }, + { + "epoch": 0.9763042284459088, + "grad_norm": 0.4371401071548462, + "learning_rate": 1.0394648061378005e-05, + "loss": 0.5741, + "step": 35557 + }, + { + "epoch": 0.9763316858868754, + "grad_norm": 0.351757675409317, + "learning_rate": 1.0394216504063318e-05, + "loss": 0.4048, + "step": 35558 + }, + { + "epoch": 0.9763591433278418, + "grad_norm": 0.42419517040252686, + "learning_rate": 1.0393784946013287e-05, + "loss": 0.5217, + "step": 35559 + }, + { + "epoch": 0.9763866007688083, + "grad_norm": 0.3822602927684784, + "learning_rate": 1.0393353387228726e-05, + "loss": 0.4415, + "step": 35560 + }, + { + "epoch": 0.9764140582097749, + "grad_norm": 0.4374276101589203, + "learning_rate": 1.039292182771043e-05, + "loss": 0.6365, + "step": 35561 + }, + { + "epoch": 0.9764415156507413, + "grad_norm": 0.3865448832511902, + "learning_rate": 1.0392490267459217e-05, + "loss": 0.4044, + "step": 35562 + }, + { + "epoch": 0.9764689730917079, + "grad_norm": 0.4057079553604126, + "learning_rate": 1.039205870647588e-05, + "loss": 0.5168, + "step": 35563 + }, + { + "epoch": 0.9764964305326743, + "grad_norm": 0.4008456766605377, + "learning_rate": 1.0391627144761227e-05, + "loss": 0.6055, + "step": 35564 + }, + { + "epoch": 0.9765238879736409, + "grad_norm": 0.369427353143692, + "learning_rate": 1.0391195582316067e-05, + "loss": 0.474, + "step": 35565 + }, + { + "epoch": 0.9765513454146073, + "grad_norm": 0.41058972477912903, + "learning_rate": 1.03907640191412e-05, + "loss": 0.5387, + "step": 35566 + }, + { + "epoch": 0.9765788028555739, + "grad_norm": 0.4455486238002777, + "learning_rate": 1.0390332455237436e-05, + "loss": 0.4876, + "step": 35567 + }, + { + "epoch": 0.9766062602965404, + "grad_norm": 0.5007911920547485, + "learning_rate": 1.0389900890605576e-05, + "loss": 0.543, + "step": 35568 + }, + { + "epoch": 0.9766337177375068, + "grad_norm": 0.38961565494537354, + "learning_rate": 1.0389469325246425e-05, + "loss": 0.4965, + "step": 35569 + }, + { + "epoch": 0.9766611751784734, + "grad_norm": 0.37068209052085876, + "learning_rate": 1.0389037759160792e-05, + "loss": 0.4495, + "step": 35570 + }, + { + "epoch": 0.9766886326194398, + "grad_norm": 0.48533377051353455, + "learning_rate": 1.0388606192349475e-05, + "loss": 0.4638, + "step": 35571 + }, + { + "epoch": 0.9767160900604064, + "grad_norm": 0.3933269679546356, + "learning_rate": 1.0388174624813288e-05, + "loss": 0.5139, + "step": 35572 + }, + { + "epoch": 0.9767435475013728, + "grad_norm": 0.441095769405365, + "learning_rate": 1.0387743056553032e-05, + "loss": 0.4521, + "step": 35573 + }, + { + "epoch": 0.9767710049423394, + "grad_norm": 0.40632206201553345, + "learning_rate": 1.0387311487569506e-05, + "loss": 0.5453, + "step": 35574 + }, + { + "epoch": 0.9767984623833059, + "grad_norm": 0.35889536142349243, + "learning_rate": 1.0386879917863526e-05, + "loss": 0.4354, + "step": 35575 + }, + { + "epoch": 0.9768259198242724, + "grad_norm": 0.4335116744041443, + "learning_rate": 1.0386448347435892e-05, + "loss": 0.4763, + "step": 35576 + }, + { + "epoch": 0.9768533772652389, + "grad_norm": 0.3621806502342224, + "learning_rate": 1.0386016776287403e-05, + "loss": 0.4217, + "step": 35577 + }, + { + "epoch": 0.9768808347062053, + "grad_norm": 0.3836539685726166, + "learning_rate": 1.0385585204418872e-05, + "loss": 0.4908, + "step": 35578 + }, + { + "epoch": 0.9769082921471719, + "grad_norm": 0.42796656489372253, + "learning_rate": 1.0385153631831101e-05, + "loss": 0.4895, + "step": 35579 + }, + { + "epoch": 0.9769357495881383, + "grad_norm": 0.5622440576553345, + "learning_rate": 1.0384722058524899e-05, + "loss": 0.4463, + "step": 35580 + }, + { + "epoch": 0.9769632070291049, + "grad_norm": 0.48616674542427063, + "learning_rate": 1.0384290484501064e-05, + "loss": 0.4955, + "step": 35581 + }, + { + "epoch": 0.9769906644700714, + "grad_norm": 0.43931734561920166, + "learning_rate": 1.0383858909760405e-05, + "loss": 0.4869, + "step": 35582 + }, + { + "epoch": 0.9770181219110379, + "grad_norm": 0.40451472997665405, + "learning_rate": 1.0383427334303728e-05, + "loss": 0.4634, + "step": 35583 + }, + { + "epoch": 0.9770455793520044, + "grad_norm": 0.4192011058330536, + "learning_rate": 1.0382995758131835e-05, + "loss": 0.496, + "step": 35584 + }, + { + "epoch": 0.9770730367929709, + "grad_norm": 0.3665729761123657, + "learning_rate": 1.0382564181245536e-05, + "loss": 0.4304, + "step": 35585 + }, + { + "epoch": 0.9771004942339374, + "grad_norm": 0.39660534262657166, + "learning_rate": 1.0382132603645629e-05, + "loss": 0.3989, + "step": 35586 + }, + { + "epoch": 0.9771279516749038, + "grad_norm": 0.50513756275177, + "learning_rate": 1.0381701025332924e-05, + "loss": 0.455, + "step": 35587 + }, + { + "epoch": 0.9771554091158704, + "grad_norm": 0.7616722583770752, + "learning_rate": 1.0381269446308226e-05, + "loss": 0.5142, + "step": 35588 + }, + { + "epoch": 0.977182866556837, + "grad_norm": 0.4265362024307251, + "learning_rate": 1.0380837866572336e-05, + "loss": 0.4531, + "step": 35589 + }, + { + "epoch": 0.9772103239978034, + "grad_norm": 0.37749770283699036, + "learning_rate": 1.0380406286126064e-05, + "loss": 0.4863, + "step": 35590 + }, + { + "epoch": 0.9772377814387699, + "grad_norm": 0.37879884243011475, + "learning_rate": 1.0379974704970212e-05, + "loss": 0.5376, + "step": 35591 + }, + { + "epoch": 0.9772652388797364, + "grad_norm": 0.5268937945365906, + "learning_rate": 1.0379543123105587e-05, + "loss": 0.4774, + "step": 35592 + }, + { + "epoch": 0.9772926963207029, + "grad_norm": 0.4176996350288391, + "learning_rate": 1.0379111540532991e-05, + "loss": 0.4892, + "step": 35593 + }, + { + "epoch": 0.9773201537616694, + "grad_norm": 0.4020625054836273, + "learning_rate": 1.0378679957253235e-05, + "loss": 0.5698, + "step": 35594 + }, + { + "epoch": 0.9773476112026359, + "grad_norm": 0.38314497470855713, + "learning_rate": 1.0378248373267114e-05, + "loss": 0.4778, + "step": 35595 + }, + { + "epoch": 0.9773750686436025, + "grad_norm": 0.46012088656425476, + "learning_rate": 1.0377816788575444e-05, + "loss": 0.4958, + "step": 35596 + }, + { + "epoch": 0.9774025260845689, + "grad_norm": 0.3905937969684601, + "learning_rate": 1.0377385203179022e-05, + "loss": 0.4959, + "step": 35597 + }, + { + "epoch": 0.9774299835255355, + "grad_norm": 0.4023892283439636, + "learning_rate": 1.0376953617078658e-05, + "loss": 0.4735, + "step": 35598 + }, + { + "epoch": 0.9774574409665019, + "grad_norm": 0.36518847942352295, + "learning_rate": 1.0376522030275156e-05, + "loss": 0.4581, + "step": 35599 + }, + { + "epoch": 0.9774848984074684, + "grad_norm": 0.4208669662475586, + "learning_rate": 1.0376090442769317e-05, + "loss": 0.4651, + "step": 35600 + }, + { + "epoch": 0.9775123558484349, + "grad_norm": 0.6033281087875366, + "learning_rate": 1.0375658854561952e-05, + "loss": 0.5903, + "step": 35601 + }, + { + "epoch": 0.9775398132894014, + "grad_norm": 0.4088839888572693, + "learning_rate": 1.0375227265653861e-05, + "loss": 0.4659, + "step": 35602 + }, + { + "epoch": 0.977567270730368, + "grad_norm": 0.48979827761650085, + "learning_rate": 1.0374795676045854e-05, + "loss": 0.4842, + "step": 35603 + }, + { + "epoch": 0.9775947281713344, + "grad_norm": 0.37280383706092834, + "learning_rate": 1.0374364085738733e-05, + "loss": 0.4397, + "step": 35604 + }, + { + "epoch": 0.977622185612301, + "grad_norm": 0.4246499538421631, + "learning_rate": 1.0373932494733301e-05, + "loss": 0.5531, + "step": 35605 + }, + { + "epoch": 0.9776496430532674, + "grad_norm": 0.3848366141319275, + "learning_rate": 1.037350090303037e-05, + "loss": 0.4805, + "step": 35606 + }, + { + "epoch": 0.977677100494234, + "grad_norm": 0.4599844217300415, + "learning_rate": 1.0373069310630737e-05, + "loss": 0.55, + "step": 35607 + }, + { + "epoch": 0.9777045579352004, + "grad_norm": 0.3777739107608795, + "learning_rate": 1.037263771753521e-05, + "loss": 0.4257, + "step": 35608 + }, + { + "epoch": 0.9777320153761669, + "grad_norm": 0.37875357270240784, + "learning_rate": 1.0372206123744599e-05, + "loss": 0.5005, + "step": 35609 + }, + { + "epoch": 0.9777594728171335, + "grad_norm": 0.7079125046730042, + "learning_rate": 1.03717745292597e-05, + "loss": 0.5929, + "step": 35610 + }, + { + "epoch": 0.9777869302580999, + "grad_norm": 0.40653476119041443, + "learning_rate": 1.0371342934081326e-05, + "loss": 0.4161, + "step": 35611 + }, + { + "epoch": 0.9778143876990665, + "grad_norm": 0.39858266711235046, + "learning_rate": 1.0370911338210279e-05, + "loss": 0.5063, + "step": 35612 + }, + { + "epoch": 0.9778418451400329, + "grad_norm": 0.4415059983730316, + "learning_rate": 1.0370479741647362e-05, + "loss": 0.476, + "step": 35613 + }, + { + "epoch": 0.9778693025809995, + "grad_norm": 0.37019920349121094, + "learning_rate": 1.0370048144393383e-05, + "loss": 0.4135, + "step": 35614 + }, + { + "epoch": 0.9778967600219659, + "grad_norm": 0.3642041087150574, + "learning_rate": 1.0369616546449144e-05, + "loss": 0.4655, + "step": 35615 + }, + { + "epoch": 0.9779242174629325, + "grad_norm": 0.3798244595527649, + "learning_rate": 1.0369184947815457e-05, + "loss": 0.4259, + "step": 35616 + }, + { + "epoch": 0.977951674903899, + "grad_norm": 0.43585699796676636, + "learning_rate": 1.0368753348493118e-05, + "loss": 0.5485, + "step": 35617 + }, + { + "epoch": 0.9779791323448654, + "grad_norm": 0.3948533535003662, + "learning_rate": 1.0368321748482937e-05, + "loss": 0.5907, + "step": 35618 + }, + { + "epoch": 0.978006589785832, + "grad_norm": 0.39542654156684875, + "learning_rate": 1.036789014778572e-05, + "loss": 0.5484, + "step": 35619 + }, + { + "epoch": 0.9780340472267984, + "grad_norm": 0.42235615849494934, + "learning_rate": 1.0367458546402269e-05, + "loss": 0.4998, + "step": 35620 + }, + { + "epoch": 0.978061504667765, + "grad_norm": 0.3504272401332855, + "learning_rate": 1.036702694433339e-05, + "loss": 0.4655, + "step": 35621 + }, + { + "epoch": 0.9780889621087314, + "grad_norm": 0.39316079020500183, + "learning_rate": 1.0366595341579893e-05, + "loss": 0.5527, + "step": 35622 + }, + { + "epoch": 0.978116419549698, + "grad_norm": 0.35629552602767944, + "learning_rate": 1.0366163738142574e-05, + "loss": 0.3595, + "step": 35623 + }, + { + "epoch": 0.9781438769906645, + "grad_norm": 0.36042025685310364, + "learning_rate": 1.0365732134022244e-05, + "loss": 0.5045, + "step": 35624 + }, + { + "epoch": 0.978171334431631, + "grad_norm": 0.420318603515625, + "learning_rate": 1.0365300529219708e-05, + "loss": 0.542, + "step": 35625 + }, + { + "epoch": 0.9781987918725975, + "grad_norm": 0.40186813473701477, + "learning_rate": 1.0364868923735769e-05, + "loss": 0.5343, + "step": 35626 + }, + { + "epoch": 0.978226249313564, + "grad_norm": 0.3955312967300415, + "learning_rate": 1.0364437317571232e-05, + "loss": 0.5413, + "step": 35627 + }, + { + "epoch": 0.9782537067545305, + "grad_norm": 0.6639621257781982, + "learning_rate": 1.0364005710726902e-05, + "loss": 0.5103, + "step": 35628 + }, + { + "epoch": 0.9782811641954969, + "grad_norm": 0.3867313861846924, + "learning_rate": 1.0363574103203591e-05, + "loss": 0.506, + "step": 35629 + }, + { + "epoch": 0.9783086216364635, + "grad_norm": 0.3850685954093933, + "learning_rate": 1.0363142495002093e-05, + "loss": 0.5106, + "step": 35630 + }, + { + "epoch": 0.97833607907743, + "grad_norm": 0.40667724609375, + "learning_rate": 1.036271088612322e-05, + "loss": 0.5129, + "step": 35631 + }, + { + "epoch": 0.9783635365183965, + "grad_norm": 0.3990814685821533, + "learning_rate": 1.0362279276567775e-05, + "loss": 0.5063, + "step": 35632 + }, + { + "epoch": 0.978390993959363, + "grad_norm": 0.44331759214401245, + "learning_rate": 1.0361847666336564e-05, + "loss": 0.4797, + "step": 35633 + }, + { + "epoch": 0.9784184514003295, + "grad_norm": 0.357075572013855, + "learning_rate": 1.0361416055430391e-05, + "loss": 0.4954, + "step": 35634 + }, + { + "epoch": 0.978445908841296, + "grad_norm": 0.40576261281967163, + "learning_rate": 1.0360984443850061e-05, + "loss": 0.4499, + "step": 35635 + }, + { + "epoch": 0.9784733662822624, + "grad_norm": 0.3910224139690399, + "learning_rate": 1.0360552831596382e-05, + "loss": 0.5163, + "step": 35636 + }, + { + "epoch": 0.978500823723229, + "grad_norm": 0.3894016742706299, + "learning_rate": 1.0360121218670156e-05, + "loss": 0.5221, + "step": 35637 + }, + { + "epoch": 0.9785282811641955, + "grad_norm": 0.3835654854774475, + "learning_rate": 1.0359689605072187e-05, + "loss": 0.5276, + "step": 35638 + }, + { + "epoch": 0.978555738605162, + "grad_norm": 0.40052521228790283, + "learning_rate": 1.0359257990803287e-05, + "loss": 0.5099, + "step": 35639 + }, + { + "epoch": 0.9785831960461285, + "grad_norm": 0.44491758942604065, + "learning_rate": 1.0358826375864251e-05, + "loss": 0.4935, + "step": 35640 + }, + { + "epoch": 0.978610653487095, + "grad_norm": 0.40891605615615845, + "learning_rate": 1.0358394760255891e-05, + "loss": 0.5685, + "step": 35641 + }, + { + "epoch": 0.9786381109280615, + "grad_norm": 0.5101825594902039, + "learning_rate": 1.0357963143979013e-05, + "loss": 0.5444, + "step": 35642 + }, + { + "epoch": 0.978665568369028, + "grad_norm": 0.43782755732536316, + "learning_rate": 1.0357531527034415e-05, + "loss": 0.492, + "step": 35643 + }, + { + "epoch": 0.9786930258099945, + "grad_norm": 0.40683406591415405, + "learning_rate": 1.0357099909422909e-05, + "loss": 0.5724, + "step": 35644 + }, + { + "epoch": 0.9787204832509611, + "grad_norm": 0.41675204038619995, + "learning_rate": 1.0356668291145298e-05, + "loss": 0.4927, + "step": 35645 + }, + { + "epoch": 0.9787479406919275, + "grad_norm": 0.38973888754844666, + "learning_rate": 1.0356236672202387e-05, + "loss": 0.4508, + "step": 35646 + }, + { + "epoch": 0.978775398132894, + "grad_norm": 0.48215413093566895, + "learning_rate": 1.0355805052594978e-05, + "loss": 0.5227, + "step": 35647 + }, + { + "epoch": 0.9788028555738605, + "grad_norm": 0.3846275806427002, + "learning_rate": 1.0355373432323884e-05, + "loss": 0.4509, + "step": 35648 + }, + { + "epoch": 0.978830313014827, + "grad_norm": 0.3776712119579315, + "learning_rate": 1.0354941811389903e-05, + "loss": 0.5, + "step": 35649 + }, + { + "epoch": 0.9788577704557935, + "grad_norm": 0.5428509712219238, + "learning_rate": 1.0354510189793839e-05, + "loss": 0.5673, + "step": 35650 + }, + { + "epoch": 0.97888522789676, + "grad_norm": 0.4153428077697754, + "learning_rate": 1.0354078567536503e-05, + "loss": 0.5188, + "step": 35651 + }, + { + "epoch": 0.9789126853377266, + "grad_norm": 0.40773656964302063, + "learning_rate": 1.0353646944618696e-05, + "loss": 0.5352, + "step": 35652 + }, + { + "epoch": 0.978940142778693, + "grad_norm": 0.37280815839767456, + "learning_rate": 1.0353215321041227e-05, + "loss": 0.452, + "step": 35653 + }, + { + "epoch": 0.9789676002196596, + "grad_norm": 0.3427037000656128, + "learning_rate": 1.03527836968049e-05, + "loss": 0.4406, + "step": 35654 + }, + { + "epoch": 0.978995057660626, + "grad_norm": 0.38776543736457825, + "learning_rate": 1.0352352071910513e-05, + "loss": 0.3819, + "step": 35655 + }, + { + "epoch": 0.9790225151015925, + "grad_norm": 0.37816688418388367, + "learning_rate": 1.0351920446358883e-05, + "loss": 0.4466, + "step": 35656 + }, + { + "epoch": 0.979049972542559, + "grad_norm": 0.8049741983413696, + "learning_rate": 1.0351488820150806e-05, + "loss": 0.4766, + "step": 35657 + }, + { + "epoch": 0.9790774299835255, + "grad_norm": 0.41739746928215027, + "learning_rate": 1.035105719328709e-05, + "loss": 0.5074, + "step": 35658 + }, + { + "epoch": 0.9791048874244921, + "grad_norm": 0.5432574152946472, + "learning_rate": 1.0350625565768541e-05, + "loss": 0.4318, + "step": 35659 + }, + { + "epoch": 0.9791323448654585, + "grad_norm": 0.36295273900032043, + "learning_rate": 1.0350193937595962e-05, + "loss": 0.4025, + "step": 35660 + }, + { + "epoch": 0.9791598023064251, + "grad_norm": 0.36984434723854065, + "learning_rate": 1.0349762308770163e-05, + "loss": 0.5202, + "step": 35661 + }, + { + "epoch": 0.9791872597473915, + "grad_norm": 0.3596680164337158, + "learning_rate": 1.0349330679291943e-05, + "loss": 0.4842, + "step": 35662 + }, + { + "epoch": 0.9792147171883581, + "grad_norm": 0.4076487123966217, + "learning_rate": 1.0348899049162111e-05, + "loss": 0.5257, + "step": 35663 + }, + { + "epoch": 0.9792421746293245, + "grad_norm": 0.4103064239025116, + "learning_rate": 1.0348467418381468e-05, + "loss": 0.4525, + "step": 35664 + }, + { + "epoch": 0.979269632070291, + "grad_norm": 0.37104830145835876, + "learning_rate": 1.0348035786950826e-05, + "loss": 0.4073, + "step": 35665 + }, + { + "epoch": 0.9792970895112576, + "grad_norm": 0.40959396958351135, + "learning_rate": 1.0347604154870986e-05, + "loss": 0.5085, + "step": 35666 + }, + { + "epoch": 0.979324546952224, + "grad_norm": 0.420642226934433, + "learning_rate": 1.034717252214275e-05, + "loss": 0.5385, + "step": 35667 + }, + { + "epoch": 0.9793520043931906, + "grad_norm": 0.385292649269104, + "learning_rate": 1.0346740888766928e-05, + "loss": 0.4741, + "step": 35668 + }, + { + "epoch": 0.979379461834157, + "grad_norm": 0.4030839800834656, + "learning_rate": 1.0346309254744325e-05, + "loss": 0.4411, + "step": 35669 + }, + { + "epoch": 0.9794069192751236, + "grad_norm": 0.37368035316467285, + "learning_rate": 1.0345877620075742e-05, + "loss": 0.4393, + "step": 35670 + }, + { + "epoch": 0.97943437671609, + "grad_norm": 0.39844584465026855, + "learning_rate": 1.0345445984761987e-05, + "loss": 0.46, + "step": 35671 + }, + { + "epoch": 0.9794618341570566, + "grad_norm": 0.4679358899593353, + "learning_rate": 1.0345014348803868e-05, + "loss": 0.4443, + "step": 35672 + }, + { + "epoch": 0.9794892915980231, + "grad_norm": 0.4076031744480133, + "learning_rate": 1.0344582712202185e-05, + "loss": 0.5208, + "step": 35673 + }, + { + "epoch": 0.9795167490389896, + "grad_norm": 0.4593210220336914, + "learning_rate": 1.0344151074957746e-05, + "loss": 0.5319, + "step": 35674 + }, + { + "epoch": 0.9795442064799561, + "grad_norm": 0.3903653621673584, + "learning_rate": 1.0343719437071356e-05, + "loss": 0.4399, + "step": 35675 + }, + { + "epoch": 0.9795716639209225, + "grad_norm": 0.3877254128456116, + "learning_rate": 1.0343287798543817e-05, + "loss": 0.5079, + "step": 35676 + }, + { + "epoch": 0.9795991213618891, + "grad_norm": 0.4152837097644806, + "learning_rate": 1.0342856159375937e-05, + "loss": 0.5958, + "step": 35677 + }, + { + "epoch": 0.9796265788028555, + "grad_norm": 0.39961931109428406, + "learning_rate": 1.0342424519568524e-05, + "loss": 0.4691, + "step": 35678 + }, + { + "epoch": 0.9796540362438221, + "grad_norm": 0.3945414423942566, + "learning_rate": 1.034199287912238e-05, + "loss": 0.4748, + "step": 35679 + }, + { + "epoch": 0.9796814936847886, + "grad_norm": 0.38891908526420593, + "learning_rate": 1.0341561238038307e-05, + "loss": 0.5076, + "step": 35680 + }, + { + "epoch": 0.9797089511257551, + "grad_norm": 0.3681289255619049, + "learning_rate": 1.0341129596317115e-05, + "loss": 0.4399, + "step": 35681 + }, + { + "epoch": 0.9797364085667216, + "grad_norm": 0.35155782103538513, + "learning_rate": 1.0340697953959606e-05, + "loss": 0.4553, + "step": 35682 + }, + { + "epoch": 0.9797638660076881, + "grad_norm": 0.4182716906070709, + "learning_rate": 1.034026631096659e-05, + "loss": 0.4122, + "step": 35683 + }, + { + "epoch": 0.9797913234486546, + "grad_norm": 0.37505003809928894, + "learning_rate": 1.0339834667338867e-05, + "loss": 0.4164, + "step": 35684 + }, + { + "epoch": 0.979818780889621, + "grad_norm": 0.4123368263244629, + "learning_rate": 1.033940302307724e-05, + "loss": 0.5581, + "step": 35685 + }, + { + "epoch": 0.9798462383305876, + "grad_norm": 0.38026320934295654, + "learning_rate": 1.0338971378182523e-05, + "loss": 0.4095, + "step": 35686 + }, + { + "epoch": 0.9798736957715541, + "grad_norm": 0.41206711530685425, + "learning_rate": 1.0338539732655514e-05, + "loss": 0.499, + "step": 35687 + }, + { + "epoch": 0.9799011532125206, + "grad_norm": 0.3769885003566742, + "learning_rate": 1.0338108086497021e-05, + "loss": 0.4718, + "step": 35688 + }, + { + "epoch": 0.9799286106534871, + "grad_norm": 0.3917432129383087, + "learning_rate": 1.033767643970785e-05, + "loss": 0.4843, + "step": 35689 + }, + { + "epoch": 0.9799560680944536, + "grad_norm": 0.38277876377105713, + "learning_rate": 1.0337244792288802e-05, + "loss": 0.4921, + "step": 35690 + }, + { + "epoch": 0.9799835255354201, + "grad_norm": 0.4479775130748749, + "learning_rate": 1.0336813144240685e-05, + "loss": 0.528, + "step": 35691 + }, + { + "epoch": 0.9800109829763866, + "grad_norm": 0.3771226108074188, + "learning_rate": 1.0336381495564305e-05, + "loss": 0.4445, + "step": 35692 + }, + { + "epoch": 0.9800384404173531, + "grad_norm": 0.42708665132522583, + "learning_rate": 1.0335949846260465e-05, + "loss": 0.5931, + "step": 35693 + }, + { + "epoch": 0.9800658978583197, + "grad_norm": 0.3876267969608307, + "learning_rate": 1.0335518196329973e-05, + "loss": 0.4353, + "step": 35694 + }, + { + "epoch": 0.9800933552992861, + "grad_norm": 0.43144622445106506, + "learning_rate": 1.033508654577363e-05, + "loss": 0.4513, + "step": 35695 + }, + { + "epoch": 0.9801208127402526, + "grad_norm": 0.3793286681175232, + "learning_rate": 1.0334654894592247e-05, + "loss": 0.5133, + "step": 35696 + }, + { + "epoch": 0.9801482701812191, + "grad_norm": 0.44197043776512146, + "learning_rate": 1.0334223242786625e-05, + "loss": 0.4542, + "step": 35697 + }, + { + "epoch": 0.9801757276221856, + "grad_norm": 0.42545467615127563, + "learning_rate": 1.0333791590357568e-05, + "loss": 0.4293, + "step": 35698 + }, + { + "epoch": 0.9802031850631521, + "grad_norm": 0.3774081766605377, + "learning_rate": 1.0333359937305885e-05, + "loss": 0.5179, + "step": 35699 + }, + { + "epoch": 0.9802306425041186, + "grad_norm": 0.5611757040023804, + "learning_rate": 1.033292828363238e-05, + "loss": 0.5272, + "step": 35700 + }, + { + "epoch": 0.9802580999450852, + "grad_norm": 0.41780737042427063, + "learning_rate": 1.0332496629337854e-05, + "loss": 0.4865, + "step": 35701 + }, + { + "epoch": 0.9802855573860516, + "grad_norm": 0.351351797580719, + "learning_rate": 1.033206497442312e-05, + "loss": 0.422, + "step": 35702 + }, + { + "epoch": 0.9803130148270182, + "grad_norm": 0.46179863810539246, + "learning_rate": 1.0331633318888973e-05, + "loss": 0.5166, + "step": 35703 + }, + { + "epoch": 0.9803404722679846, + "grad_norm": 0.3404046893119812, + "learning_rate": 1.033120166273623e-05, + "loss": 0.4469, + "step": 35704 + }, + { + "epoch": 0.9803679297089511, + "grad_norm": 0.5359817743301392, + "learning_rate": 1.0330770005965688e-05, + "loss": 0.5063, + "step": 35705 + }, + { + "epoch": 0.9803953871499176, + "grad_norm": 0.44622230529785156, + "learning_rate": 1.0330338348578153e-05, + "loss": 0.5078, + "step": 35706 + }, + { + "epoch": 0.9804228445908841, + "grad_norm": 0.40354377031326294, + "learning_rate": 1.0329906690574434e-05, + "loss": 0.5105, + "step": 35707 + }, + { + "epoch": 0.9804503020318507, + "grad_norm": 0.3503013253211975, + "learning_rate": 1.032947503195533e-05, + "loss": 0.4871, + "step": 35708 + }, + { + "epoch": 0.9804777594728171, + "grad_norm": 0.35856467485427856, + "learning_rate": 1.0329043372721654e-05, + "loss": 0.4272, + "step": 35709 + }, + { + "epoch": 0.9805052169137837, + "grad_norm": 0.41298797726631165, + "learning_rate": 1.0328611712874206e-05, + "loss": 0.4742, + "step": 35710 + }, + { + "epoch": 0.9805326743547501, + "grad_norm": 0.38325366377830505, + "learning_rate": 1.032818005241379e-05, + "loss": 0.4976, + "step": 35711 + }, + { + "epoch": 0.9805601317957167, + "grad_norm": 0.4280915856361389, + "learning_rate": 1.0327748391341216e-05, + "loss": 0.5781, + "step": 35712 + }, + { + "epoch": 0.9805875892366831, + "grad_norm": 0.41334372758865356, + "learning_rate": 1.0327316729657283e-05, + "loss": 0.4822, + "step": 35713 + }, + { + "epoch": 0.9806150466776496, + "grad_norm": 0.4031146466732025, + "learning_rate": 1.0326885067362806e-05, + "loss": 0.4859, + "step": 35714 + }, + { + "epoch": 0.9806425041186162, + "grad_norm": 0.3922255337238312, + "learning_rate": 1.0326453404458581e-05, + "loss": 0.5559, + "step": 35715 + }, + { + "epoch": 0.9806699615595826, + "grad_norm": 0.4018302857875824, + "learning_rate": 1.0326021740945416e-05, + "loss": 0.472, + "step": 35716 + }, + { + "epoch": 0.9806974190005492, + "grad_norm": 0.42052239179611206, + "learning_rate": 1.0325590076824117e-05, + "loss": 0.4971, + "step": 35717 + }, + { + "epoch": 0.9807248764415156, + "grad_norm": 0.36463338136672974, + "learning_rate": 1.0325158412095489e-05, + "loss": 0.4983, + "step": 35718 + }, + { + "epoch": 0.9807523338824822, + "grad_norm": 0.3915511965751648, + "learning_rate": 1.0324726746760334e-05, + "loss": 0.5701, + "step": 35719 + }, + { + "epoch": 0.9807797913234486, + "grad_norm": 0.40413761138916016, + "learning_rate": 1.0324295080819462e-05, + "loss": 0.4923, + "step": 35720 + }, + { + "epoch": 0.9808072487644152, + "grad_norm": 0.39829888939857483, + "learning_rate": 1.0323863414273674e-05, + "loss": 0.491, + "step": 35721 + }, + { + "epoch": 0.9808347062053817, + "grad_norm": 0.3947107195854187, + "learning_rate": 1.032343174712378e-05, + "loss": 0.5053, + "step": 35722 + }, + { + "epoch": 0.9808621636463482, + "grad_norm": 0.4124845862388611, + "learning_rate": 1.0323000079370581e-05, + "loss": 0.5228, + "step": 35723 + }, + { + "epoch": 0.9808896210873147, + "grad_norm": 0.4729115664958954, + "learning_rate": 1.0322568411014887e-05, + "loss": 0.5812, + "step": 35724 + }, + { + "epoch": 0.9809170785282811, + "grad_norm": 0.43015727400779724, + "learning_rate": 1.0322136742057495e-05, + "loss": 0.4521, + "step": 35725 + }, + { + "epoch": 0.9809445359692477, + "grad_norm": 0.4306129515171051, + "learning_rate": 1.0321705072499217e-05, + "loss": 0.4745, + "step": 35726 + }, + { + "epoch": 0.9809719934102141, + "grad_norm": 0.41342827677726746, + "learning_rate": 1.0321273402340858e-05, + "loss": 0.5033, + "step": 35727 + }, + { + "epoch": 0.9809994508511807, + "grad_norm": 0.4113723039627075, + "learning_rate": 1.0320841731583222e-05, + "loss": 0.5204, + "step": 35728 + }, + { + "epoch": 0.9810269082921472, + "grad_norm": 0.3630935251712799, + "learning_rate": 1.032041006022711e-05, + "loss": 0.4625, + "step": 35729 + }, + { + "epoch": 0.9810543657331137, + "grad_norm": 0.34441471099853516, + "learning_rate": 1.0319978388273334e-05, + "loss": 0.4239, + "step": 35730 + }, + { + "epoch": 0.9810818231740802, + "grad_norm": 0.4040548503398895, + "learning_rate": 1.0319546715722697e-05, + "loss": 0.5595, + "step": 35731 + }, + { + "epoch": 0.9811092806150467, + "grad_norm": 0.39866748452186584, + "learning_rate": 1.0319115042576003e-05, + "loss": 0.4412, + "step": 35732 + }, + { + "epoch": 0.9811367380560132, + "grad_norm": 0.3829689621925354, + "learning_rate": 1.0318683368834055e-05, + "loss": 0.459, + "step": 35733 + }, + { + "epoch": 0.9811641954969796, + "grad_norm": 0.37972235679626465, + "learning_rate": 1.031825169449766e-05, + "loss": 0.401, + "step": 35734 + }, + { + "epoch": 0.9811916529379462, + "grad_norm": 0.4352414309978485, + "learning_rate": 1.0317820019567628e-05, + "loss": 0.5672, + "step": 35735 + }, + { + "epoch": 0.9812191103789127, + "grad_norm": 0.4058671295642853, + "learning_rate": 1.031738834404476e-05, + "loss": 0.4709, + "step": 35736 + }, + { + "epoch": 0.9812465678198792, + "grad_norm": 0.5926852226257324, + "learning_rate": 1.0316956667929858e-05, + "loss": 0.4901, + "step": 35737 + }, + { + "epoch": 0.9812740252608457, + "grad_norm": 0.3611038029193878, + "learning_rate": 1.0316524991223736e-05, + "loss": 0.4567, + "step": 35738 + }, + { + "epoch": 0.9813014827018122, + "grad_norm": 0.40065011382102966, + "learning_rate": 1.0316093313927187e-05, + "loss": 0.523, + "step": 35739 + }, + { + "epoch": 0.9813289401427787, + "grad_norm": 0.5626040101051331, + "learning_rate": 1.0315661636041028e-05, + "loss": 0.4703, + "step": 35740 + }, + { + "epoch": 0.9813563975837452, + "grad_norm": 0.3746773302555084, + "learning_rate": 1.031522995756606e-05, + "loss": 0.4648, + "step": 35741 + }, + { + "epoch": 0.9813838550247117, + "grad_norm": 0.3581002354621887, + "learning_rate": 1.0314798278503082e-05, + "loss": 0.463, + "step": 35742 + }, + { + "epoch": 0.9814113124656783, + "grad_norm": 0.41637828946113586, + "learning_rate": 1.031436659885291e-05, + "loss": 0.4215, + "step": 35743 + }, + { + "epoch": 0.9814387699066447, + "grad_norm": 0.5147737860679626, + "learning_rate": 1.031393491861634e-05, + "loss": 0.5295, + "step": 35744 + }, + { + "epoch": 0.9814662273476112, + "grad_norm": 0.35682910680770874, + "learning_rate": 1.0313503237794185e-05, + "loss": 0.4571, + "step": 35745 + }, + { + "epoch": 0.9814936847885777, + "grad_norm": 0.4223479926586151, + "learning_rate": 1.0313071556387246e-05, + "loss": 0.5104, + "step": 35746 + }, + { + "epoch": 0.9815211422295442, + "grad_norm": 0.3948824405670166, + "learning_rate": 1.0312639874396325e-05, + "loss": 0.4993, + "step": 35747 + }, + { + "epoch": 0.9815485996705107, + "grad_norm": 0.47979676723480225, + "learning_rate": 1.0312208191822235e-05, + "loss": 0.4884, + "step": 35748 + }, + { + "epoch": 0.9815760571114772, + "grad_norm": 0.38390734791755676, + "learning_rate": 1.0311776508665776e-05, + "loss": 0.4779, + "step": 35749 + }, + { + "epoch": 0.9816035145524438, + "grad_norm": 0.3709476888179779, + "learning_rate": 1.0311344824927752e-05, + "loss": 0.4721, + "step": 35750 + }, + { + "epoch": 0.9816309719934102, + "grad_norm": 0.42721185088157654, + "learning_rate": 1.0310913140608974e-05, + "loss": 0.5721, + "step": 35751 + }, + { + "epoch": 0.9816584294343768, + "grad_norm": 0.40163007378578186, + "learning_rate": 1.031048145571024e-05, + "loss": 0.4355, + "step": 35752 + }, + { + "epoch": 0.9816858868753432, + "grad_norm": 0.4352487027645111, + "learning_rate": 1.031004977023236e-05, + "loss": 0.5441, + "step": 35753 + }, + { + "epoch": 0.9817133443163097, + "grad_norm": 0.40151605010032654, + "learning_rate": 1.030961808417614e-05, + "loss": 0.5592, + "step": 35754 + }, + { + "epoch": 0.9817408017572762, + "grad_norm": 0.37884339690208435, + "learning_rate": 1.030918639754238e-05, + "loss": 0.4996, + "step": 35755 + }, + { + "epoch": 0.9817682591982427, + "grad_norm": 0.37323030829429626, + "learning_rate": 1.0308754710331893e-05, + "loss": 0.5175, + "step": 35756 + }, + { + "epoch": 0.9817957166392092, + "grad_norm": 0.6492051482200623, + "learning_rate": 1.0308323022545476e-05, + "loss": 0.4916, + "step": 35757 + }, + { + "epoch": 0.9818231740801757, + "grad_norm": 0.40114280581474304, + "learning_rate": 1.0307891334183944e-05, + "loss": 0.4317, + "step": 35758 + }, + { + "epoch": 0.9818506315211423, + "grad_norm": 0.44006869196891785, + "learning_rate": 1.0307459645248092e-05, + "loss": 0.5343, + "step": 35759 + }, + { + "epoch": 0.9818780889621087, + "grad_norm": 0.35076451301574707, + "learning_rate": 1.0307027955738728e-05, + "loss": 0.5254, + "step": 35760 + }, + { + "epoch": 0.9819055464030753, + "grad_norm": 0.3872314989566803, + "learning_rate": 1.0306596265656662e-05, + "loss": 0.3372, + "step": 35761 + }, + { + "epoch": 0.9819330038440417, + "grad_norm": 0.4234462082386017, + "learning_rate": 1.0306164575002697e-05, + "loss": 0.4356, + "step": 35762 + }, + { + "epoch": 0.9819604612850082, + "grad_norm": 0.4075971245765686, + "learning_rate": 1.0305732883777634e-05, + "loss": 0.5204, + "step": 35763 + }, + { + "epoch": 0.9819879187259747, + "grad_norm": 0.3958650827407837, + "learning_rate": 1.0305301191982285e-05, + "loss": 0.4831, + "step": 35764 + }, + { + "epoch": 0.9820153761669412, + "grad_norm": 0.38426199555397034, + "learning_rate": 1.030486949961745e-05, + "loss": 0.5056, + "step": 35765 + }, + { + "epoch": 0.9820428336079078, + "grad_norm": 0.3639121651649475, + "learning_rate": 1.0304437806683935e-05, + "loss": 0.452, + "step": 35766 + }, + { + "epoch": 0.9820702910488742, + "grad_norm": 0.3581767976284027, + "learning_rate": 1.030400611318255e-05, + "loss": 0.388, + "step": 35767 + }, + { + "epoch": 0.9820977484898408, + "grad_norm": 0.3984135687351227, + "learning_rate": 1.030357441911409e-05, + "loss": 0.4601, + "step": 35768 + }, + { + "epoch": 0.9821252059308072, + "grad_norm": 0.39617398381233215, + "learning_rate": 1.0303142724479373e-05, + "loss": 0.4566, + "step": 35769 + }, + { + "epoch": 0.9821526633717738, + "grad_norm": 0.3833697736263275, + "learning_rate": 1.0302711029279195e-05, + "loss": 0.4872, + "step": 35770 + }, + { + "epoch": 0.9821801208127402, + "grad_norm": 0.3621375560760498, + "learning_rate": 1.0302279333514367e-05, + "loss": 0.4709, + "step": 35771 + }, + { + "epoch": 0.9822075782537067, + "grad_norm": 0.42509233951568604, + "learning_rate": 1.0301847637185688e-05, + "loss": 0.4968, + "step": 35772 + }, + { + "epoch": 0.9822350356946733, + "grad_norm": 0.507575273513794, + "learning_rate": 1.0301415940293969e-05, + "loss": 0.6043, + "step": 35773 + }, + { + "epoch": 0.9822624931356397, + "grad_norm": 0.399944931268692, + "learning_rate": 1.0300984242840012e-05, + "loss": 0.4707, + "step": 35774 + }, + { + "epoch": 0.9822899505766063, + "grad_norm": 0.400417685508728, + "learning_rate": 1.0300552544824625e-05, + "loss": 0.515, + "step": 35775 + }, + { + "epoch": 0.9823174080175727, + "grad_norm": 0.38797062635421753, + "learning_rate": 1.0300120846248611e-05, + "loss": 0.4972, + "step": 35776 + }, + { + "epoch": 0.9823448654585393, + "grad_norm": 0.38659703731536865, + "learning_rate": 1.0299689147112778e-05, + "loss": 0.5407, + "step": 35777 + }, + { + "epoch": 0.9823723228995057, + "grad_norm": 0.36106589436531067, + "learning_rate": 1.0299257447417923e-05, + "loss": 0.4499, + "step": 35778 + }, + { + "epoch": 0.9823997803404723, + "grad_norm": 0.39751997590065, + "learning_rate": 1.0298825747164863e-05, + "loss": 0.4729, + "step": 35779 + }, + { + "epoch": 0.9824272377814388, + "grad_norm": 0.3831535577774048, + "learning_rate": 1.0298394046354396e-05, + "loss": 0.5205, + "step": 35780 + }, + { + "epoch": 0.9824546952224052, + "grad_norm": 0.39097270369529724, + "learning_rate": 1.0297962344987327e-05, + "loss": 0.5616, + "step": 35781 + }, + { + "epoch": 0.9824821526633718, + "grad_norm": 0.36214596033096313, + "learning_rate": 1.0297530643064464e-05, + "loss": 0.4269, + "step": 35782 + }, + { + "epoch": 0.9825096101043382, + "grad_norm": 0.39185577630996704, + "learning_rate": 1.0297098940586611e-05, + "loss": 0.4696, + "step": 35783 + }, + { + "epoch": 0.9825370675453048, + "grad_norm": 0.45833882689476013, + "learning_rate": 1.0296667237554577e-05, + "loss": 0.5139, + "step": 35784 + }, + { + "epoch": 0.9825645249862712, + "grad_norm": 0.3687548339366913, + "learning_rate": 1.029623553396916e-05, + "loss": 0.5075, + "step": 35785 + }, + { + "epoch": 0.9825919824272378, + "grad_norm": 0.4375058710575104, + "learning_rate": 1.029580382983117e-05, + "loss": 0.4792, + "step": 35786 + }, + { + "epoch": 0.9826194398682043, + "grad_norm": 0.3713165521621704, + "learning_rate": 1.0295372125141415e-05, + "loss": 0.4913, + "step": 35787 + }, + { + "epoch": 0.9826468973091708, + "grad_norm": 0.41743677854537964, + "learning_rate": 1.029494041990069e-05, + "loss": 0.5358, + "step": 35788 + }, + { + "epoch": 0.9826743547501373, + "grad_norm": 0.550894021987915, + "learning_rate": 1.0294508714109811e-05, + "loss": 0.5423, + "step": 35789 + }, + { + "epoch": 0.9827018121911038, + "grad_norm": 0.4720502495765686, + "learning_rate": 1.029407700776958e-05, + "loss": 0.4958, + "step": 35790 + }, + { + "epoch": 0.9827292696320703, + "grad_norm": 0.4038867652416229, + "learning_rate": 1.0293645300880801e-05, + "loss": 0.4347, + "step": 35791 + }, + { + "epoch": 0.9827567270730367, + "grad_norm": 0.39271071553230286, + "learning_rate": 1.029321359344428e-05, + "loss": 0.4234, + "step": 35792 + }, + { + "epoch": 0.9827841845140033, + "grad_norm": 0.41609352827072144, + "learning_rate": 1.029278188546082e-05, + "loss": 0.5538, + "step": 35793 + }, + { + "epoch": 0.9828116419549698, + "grad_norm": 0.5017848610877991, + "learning_rate": 1.029235017693123e-05, + "loss": 0.5566, + "step": 35794 + }, + { + "epoch": 0.9828390993959363, + "grad_norm": 0.3779248893260956, + "learning_rate": 1.0291918467856314e-05, + "loss": 0.4961, + "step": 35795 + }, + { + "epoch": 0.9828665568369028, + "grad_norm": 0.4280809462070465, + "learning_rate": 1.0291486758236875e-05, + "loss": 0.4466, + "step": 35796 + }, + { + "epoch": 0.9828940142778693, + "grad_norm": 0.3619506359100342, + "learning_rate": 1.0291055048073722e-05, + "loss": 0.4035, + "step": 35797 + }, + { + "epoch": 0.9829214717188358, + "grad_norm": 0.356212317943573, + "learning_rate": 1.029062333736766e-05, + "loss": 0.4435, + "step": 35798 + }, + { + "epoch": 0.9829489291598023, + "grad_norm": 0.4051038920879364, + "learning_rate": 1.0290191626119488e-05, + "loss": 0.4586, + "step": 35799 + }, + { + "epoch": 0.9829763866007688, + "grad_norm": 0.42294174432754517, + "learning_rate": 1.028975991433002e-05, + "loss": 0.5095, + "step": 35800 + }, + { + "epoch": 0.9830038440417354, + "grad_norm": 0.4230377674102783, + "learning_rate": 1.0289328202000055e-05, + "loss": 0.4434, + "step": 35801 + }, + { + "epoch": 0.9830313014827018, + "grad_norm": 0.40009137988090515, + "learning_rate": 1.0288896489130402e-05, + "loss": 0.5154, + "step": 35802 + }, + { + "epoch": 0.9830587589236683, + "grad_norm": 0.42640841007232666, + "learning_rate": 1.0288464775721865e-05, + "loss": 0.5104, + "step": 35803 + }, + { + "epoch": 0.9830862163646348, + "grad_norm": 0.44533705711364746, + "learning_rate": 1.0288033061775247e-05, + "loss": 0.5079, + "step": 35804 + }, + { + "epoch": 0.9831136738056013, + "grad_norm": 0.40413016080856323, + "learning_rate": 1.0287601347291358e-05, + "loss": 0.3989, + "step": 35805 + }, + { + "epoch": 0.9831411312465678, + "grad_norm": 0.38781237602233887, + "learning_rate": 1.0287169632270997e-05, + "loss": 0.5816, + "step": 35806 + }, + { + "epoch": 0.9831685886875343, + "grad_norm": 0.3577538728713989, + "learning_rate": 1.0286737916714977e-05, + "loss": 0.366, + "step": 35807 + }, + { + "epoch": 0.9831960461285009, + "grad_norm": 0.4122081398963928, + "learning_rate": 1.0286306200624099e-05, + "loss": 0.4865, + "step": 35808 + }, + { + "epoch": 0.9832235035694673, + "grad_norm": 0.41420477628707886, + "learning_rate": 1.0285874483999166e-05, + "loss": 0.4973, + "step": 35809 + }, + { + "epoch": 0.9832509610104339, + "grad_norm": 0.4152459502220154, + "learning_rate": 1.0285442766840987e-05, + "loss": 0.5331, + "step": 35810 + }, + { + "epoch": 0.9832784184514003, + "grad_norm": 0.4097081124782562, + "learning_rate": 1.0285011049150367e-05, + "loss": 0.4536, + "step": 35811 + }, + { + "epoch": 0.9833058758923668, + "grad_norm": 0.6296291947364807, + "learning_rate": 1.0284579330928108e-05, + "loss": 0.4552, + "step": 35812 + }, + { + "epoch": 0.9833333333333333, + "grad_norm": 0.4035436809062958, + "learning_rate": 1.0284147612175021e-05, + "loss": 0.5453, + "step": 35813 + }, + { + "epoch": 0.9833607907742998, + "grad_norm": 0.3731517493724823, + "learning_rate": 1.0283715892891906e-05, + "loss": 0.4227, + "step": 35814 + }, + { + "epoch": 0.9833882482152664, + "grad_norm": 0.36442381143569946, + "learning_rate": 1.0283284173079571e-05, + "loss": 0.3977, + "step": 35815 + }, + { + "epoch": 0.9834157056562328, + "grad_norm": 0.42816275358200073, + "learning_rate": 1.0282852452738819e-05, + "loss": 0.5991, + "step": 35816 + }, + { + "epoch": 0.9834431630971994, + "grad_norm": 0.3720617890357971, + "learning_rate": 1.028242073187046e-05, + "loss": 0.4241, + "step": 35817 + }, + { + "epoch": 0.9834706205381658, + "grad_norm": 0.4244568943977356, + "learning_rate": 1.0281989010475294e-05, + "loss": 0.4836, + "step": 35818 + }, + { + "epoch": 0.9834980779791324, + "grad_norm": 0.3819500505924225, + "learning_rate": 1.0281557288554127e-05, + "loss": 0.4071, + "step": 35819 + }, + { + "epoch": 0.9835255354200988, + "grad_norm": 0.4255686104297638, + "learning_rate": 1.0281125566107771e-05, + "loss": 0.5734, + "step": 35820 + }, + { + "epoch": 0.9835529928610653, + "grad_norm": 0.4852124750614166, + "learning_rate": 1.028069384313702e-05, + "loss": 0.522, + "step": 35821 + }, + { + "epoch": 0.9835804503020319, + "grad_norm": 0.4581727087497711, + "learning_rate": 1.028026211964269e-05, + "loss": 0.5145, + "step": 35822 + }, + { + "epoch": 0.9836079077429983, + "grad_norm": 0.39095133543014526, + "learning_rate": 1.0279830395625581e-05, + "loss": 0.4989, + "step": 35823 + }, + { + "epoch": 0.9836353651839649, + "grad_norm": 0.43285563588142395, + "learning_rate": 1.0279398671086496e-05, + "loss": 0.5299, + "step": 35824 + }, + { + "epoch": 0.9836628226249313, + "grad_norm": 0.5313538908958435, + "learning_rate": 1.0278966946026245e-05, + "loss": 0.5356, + "step": 35825 + }, + { + "epoch": 0.9836902800658979, + "grad_norm": 0.3843991160392761, + "learning_rate": 1.0278535220445631e-05, + "loss": 0.532, + "step": 35826 + }, + { + "epoch": 0.9837177375068643, + "grad_norm": 0.4050960838794708, + "learning_rate": 1.027810349434546e-05, + "loss": 0.5208, + "step": 35827 + }, + { + "epoch": 0.9837451949478309, + "grad_norm": 0.9955090880393982, + "learning_rate": 1.0277671767726539e-05, + "loss": 0.5487, + "step": 35828 + }, + { + "epoch": 0.9837726523887974, + "grad_norm": 0.3841996192932129, + "learning_rate": 1.0277240040589671e-05, + "loss": 0.4299, + "step": 35829 + }, + { + "epoch": 0.9838001098297638, + "grad_norm": 0.41949111223220825, + "learning_rate": 1.0276808312935661e-05, + "loss": 0.4351, + "step": 35830 + }, + { + "epoch": 0.9838275672707304, + "grad_norm": 0.3774500787258148, + "learning_rate": 1.0276376584765316e-05, + "loss": 0.5082, + "step": 35831 + }, + { + "epoch": 0.9838550247116968, + "grad_norm": 0.48335134983062744, + "learning_rate": 1.027594485607944e-05, + "loss": 0.4915, + "step": 35832 + }, + { + "epoch": 0.9838824821526634, + "grad_norm": 0.5616464614868164, + "learning_rate": 1.0275513126878838e-05, + "loss": 0.576, + "step": 35833 + }, + { + "epoch": 0.9839099395936298, + "grad_norm": 0.40466129779815674, + "learning_rate": 1.0275081397164317e-05, + "loss": 0.4787, + "step": 35834 + }, + { + "epoch": 0.9839373970345964, + "grad_norm": 0.3814612329006195, + "learning_rate": 1.0274649666936682e-05, + "loss": 0.4833, + "step": 35835 + }, + { + "epoch": 0.9839648544755629, + "grad_norm": 0.3728581368923187, + "learning_rate": 1.0274217936196735e-05, + "loss": 0.3849, + "step": 35836 + }, + { + "epoch": 0.9839923119165294, + "grad_norm": 0.42403820157051086, + "learning_rate": 1.0273786204945286e-05, + "loss": 0.4774, + "step": 35837 + }, + { + "epoch": 0.9840197693574959, + "grad_norm": 0.4510997235774994, + "learning_rate": 1.0273354473183136e-05, + "loss": 0.462, + "step": 35838 + }, + { + "epoch": 0.9840472267984623, + "grad_norm": 0.38059067726135254, + "learning_rate": 1.0272922740911098e-05, + "loss": 0.4805, + "step": 35839 + }, + { + "epoch": 0.9840746842394289, + "grad_norm": 0.43444588780403137, + "learning_rate": 1.027249100812997e-05, + "loss": 0.4237, + "step": 35840 + }, + { + "epoch": 0.9841021416803953, + "grad_norm": 0.3787662088871002, + "learning_rate": 1.0272059274840555e-05, + "loss": 0.3966, + "step": 35841 + }, + { + "epoch": 0.9841295991213619, + "grad_norm": 0.4093223214149475, + "learning_rate": 1.0271627541043668e-05, + "loss": 0.5684, + "step": 35842 + }, + { + "epoch": 0.9841570565623284, + "grad_norm": 0.43822282552719116, + "learning_rate": 1.0271195806740105e-05, + "loss": 0.4352, + "step": 35843 + }, + { + "epoch": 0.9841845140032949, + "grad_norm": 0.34668025374412537, + "learning_rate": 1.027076407193068e-05, + "loss": 0.387, + "step": 35844 + }, + { + "epoch": 0.9842119714442614, + "grad_norm": 0.36072438955307007, + "learning_rate": 1.0270332336616189e-05, + "loss": 0.5027, + "step": 35845 + }, + { + "epoch": 0.9842394288852279, + "grad_norm": 0.5118063688278198, + "learning_rate": 1.0269900600797445e-05, + "loss": 0.4277, + "step": 35846 + }, + { + "epoch": 0.9842668863261944, + "grad_norm": 0.39052069187164307, + "learning_rate": 1.0269468864475248e-05, + "loss": 0.484, + "step": 35847 + }, + { + "epoch": 0.9842943437671609, + "grad_norm": 0.4438621699810028, + "learning_rate": 1.0269037127650405e-05, + "loss": 0.5006, + "step": 35848 + }, + { + "epoch": 0.9843218012081274, + "grad_norm": 0.3682904541492462, + "learning_rate": 1.0268605390323725e-05, + "loss": 0.5157, + "step": 35849 + }, + { + "epoch": 0.984349258649094, + "grad_norm": 0.4207058250904083, + "learning_rate": 1.0268173652496007e-05, + "loss": 0.4588, + "step": 35850 + }, + { + "epoch": 0.9843767160900604, + "grad_norm": 0.4060781002044678, + "learning_rate": 1.026774191416806e-05, + "loss": 0.4721, + "step": 35851 + }, + { + "epoch": 0.9844041735310269, + "grad_norm": 0.4139321446418762, + "learning_rate": 1.0267310175340692e-05, + "loss": 0.4531, + "step": 35852 + }, + { + "epoch": 0.9844316309719934, + "grad_norm": 0.46698352694511414, + "learning_rate": 1.0266878436014702e-05, + "loss": 0.5048, + "step": 35853 + }, + { + "epoch": 0.9844590884129599, + "grad_norm": 0.40057873725891113, + "learning_rate": 1.0266446696190899e-05, + "loss": 0.5507, + "step": 35854 + }, + { + "epoch": 0.9844865458539264, + "grad_norm": 0.36569857597351074, + "learning_rate": 1.0266014955870092e-05, + "loss": 0.509, + "step": 35855 + }, + { + "epoch": 0.9845140032948929, + "grad_norm": 0.46380218863487244, + "learning_rate": 1.0265583215053077e-05, + "loss": 0.5641, + "step": 35856 + }, + { + "epoch": 0.9845414607358595, + "grad_norm": 0.40535977482795715, + "learning_rate": 1.0265151473740666e-05, + "loss": 0.4355, + "step": 35857 + }, + { + "epoch": 0.9845689181768259, + "grad_norm": 0.4088630676269531, + "learning_rate": 1.026471973193366e-05, + "loss": 0.4834, + "step": 35858 + }, + { + "epoch": 0.9845963756177925, + "grad_norm": 0.3284760117530823, + "learning_rate": 1.0264287989632872e-05, + "loss": 0.3934, + "step": 35859 + }, + { + "epoch": 0.9846238330587589, + "grad_norm": 0.4551207423210144, + "learning_rate": 1.0263856246839104e-05, + "loss": 0.4903, + "step": 35860 + }, + { + "epoch": 0.9846512904997254, + "grad_norm": 0.4031596779823303, + "learning_rate": 1.0263424503553154e-05, + "loss": 0.4841, + "step": 35861 + }, + { + "epoch": 0.9846787479406919, + "grad_norm": 0.4002750813961029, + "learning_rate": 1.0262992759775838e-05, + "loss": 0.5253, + "step": 35862 + }, + { + "epoch": 0.9847062053816584, + "grad_norm": 0.46921223402023315, + "learning_rate": 1.0262561015507952e-05, + "loss": 0.5081, + "step": 35863 + }, + { + "epoch": 0.984733662822625, + "grad_norm": 0.3614066243171692, + "learning_rate": 1.026212927075031e-05, + "loss": 0.4878, + "step": 35864 + }, + { + "epoch": 0.9847611202635914, + "grad_norm": 0.37360450625419617, + "learning_rate": 1.0261697525503713e-05, + "loss": 0.4975, + "step": 35865 + }, + { + "epoch": 0.984788577704558, + "grad_norm": 0.4042102098464966, + "learning_rate": 1.0261265779768964e-05, + "loss": 0.5485, + "step": 35866 + }, + { + "epoch": 0.9848160351455244, + "grad_norm": 0.43489503860473633, + "learning_rate": 1.0260834033546872e-05, + "loss": 0.4649, + "step": 35867 + }, + { + "epoch": 0.984843492586491, + "grad_norm": 0.3597380816936493, + "learning_rate": 1.026040228683824e-05, + "loss": 0.4583, + "step": 35868 + }, + { + "epoch": 0.9848709500274574, + "grad_norm": 0.42621827125549316, + "learning_rate": 1.0259970539643877e-05, + "loss": 0.4676, + "step": 35869 + }, + { + "epoch": 0.9848984074684239, + "grad_norm": 0.4464735686779022, + "learning_rate": 1.0259538791964587e-05, + "loss": 0.5105, + "step": 35870 + }, + { + "epoch": 0.9849258649093905, + "grad_norm": 0.4546196758747101, + "learning_rate": 1.025910704380117e-05, + "loss": 0.5113, + "step": 35871 + }, + { + "epoch": 0.9849533223503569, + "grad_norm": 0.3431130349636078, + "learning_rate": 1.0258675295154438e-05, + "loss": 0.4771, + "step": 35872 + }, + { + "epoch": 0.9849807797913235, + "grad_norm": 0.4267568588256836, + "learning_rate": 1.0258243546025197e-05, + "loss": 0.4783, + "step": 35873 + }, + { + "epoch": 0.9850082372322899, + "grad_norm": 0.402377724647522, + "learning_rate": 1.0257811796414245e-05, + "loss": 0.5318, + "step": 35874 + }, + { + "epoch": 0.9850356946732565, + "grad_norm": 0.3922879099845886, + "learning_rate": 1.0257380046322394e-05, + "loss": 0.4415, + "step": 35875 + }, + { + "epoch": 0.9850631521142229, + "grad_norm": 0.4758348762989044, + "learning_rate": 1.0256948295750444e-05, + "loss": 0.4104, + "step": 35876 + }, + { + "epoch": 0.9850906095551895, + "grad_norm": 0.3673476576805115, + "learning_rate": 1.0256516544699207e-05, + "loss": 0.4512, + "step": 35877 + }, + { + "epoch": 0.985118066996156, + "grad_norm": 0.36103764176368713, + "learning_rate": 1.0256084793169485e-05, + "loss": 0.4784, + "step": 35878 + }, + { + "epoch": 0.9851455244371224, + "grad_norm": 0.4713972508907318, + "learning_rate": 1.025565304116208e-05, + "loss": 0.5344, + "step": 35879 + }, + { + "epoch": 0.985172981878089, + "grad_norm": 0.430062472820282, + "learning_rate": 1.0255221288677804e-05, + "loss": 0.5182, + "step": 35880 + }, + { + "epoch": 0.9852004393190554, + "grad_norm": 0.4730035662651062, + "learning_rate": 1.0254789535717455e-05, + "loss": 0.5348, + "step": 35881 + }, + { + "epoch": 0.985227896760022, + "grad_norm": 0.3583771586418152, + "learning_rate": 1.0254357782281846e-05, + "loss": 0.4436, + "step": 35882 + }, + { + "epoch": 0.9852553542009884, + "grad_norm": 0.3776409327983856, + "learning_rate": 1.0253926028371776e-05, + "loss": 0.4663, + "step": 35883 + }, + { + "epoch": 0.985282811641955, + "grad_norm": 0.3553338348865509, + "learning_rate": 1.0253494273988051e-05, + "loss": 0.4745, + "step": 35884 + }, + { + "epoch": 0.9853102690829215, + "grad_norm": 0.39739060401916504, + "learning_rate": 1.0253062519131482e-05, + "loss": 0.4404, + "step": 35885 + }, + { + "epoch": 0.985337726523888, + "grad_norm": 0.39368781447410583, + "learning_rate": 1.0252630763802871e-05, + "loss": 0.4405, + "step": 35886 + }, + { + "epoch": 0.9853651839648545, + "grad_norm": 0.46305498480796814, + "learning_rate": 1.025219900800302e-05, + "loss": 0.5276, + "step": 35887 + }, + { + "epoch": 0.985392641405821, + "grad_norm": 0.37801069021224976, + "learning_rate": 1.025176725173274e-05, + "loss": 0.4145, + "step": 35888 + }, + { + "epoch": 0.9854200988467875, + "grad_norm": 0.4386086165904999, + "learning_rate": 1.025133549499283e-05, + "loss": 0.4771, + "step": 35889 + }, + { + "epoch": 0.9854475562877539, + "grad_norm": 0.429776132106781, + "learning_rate": 1.0250903737784103e-05, + "loss": 0.5094, + "step": 35890 + }, + { + "epoch": 0.9854750137287205, + "grad_norm": 0.4200189411640167, + "learning_rate": 1.0250471980107361e-05, + "loss": 0.4858, + "step": 35891 + }, + { + "epoch": 0.985502471169687, + "grad_norm": 0.37305933237075806, + "learning_rate": 1.0250040221963406e-05, + "loss": 0.4428, + "step": 35892 + }, + { + "epoch": 0.9855299286106535, + "grad_norm": 0.37603890895843506, + "learning_rate": 1.0249608463353048e-05, + "loss": 0.3943, + "step": 35893 + }, + { + "epoch": 0.98555738605162, + "grad_norm": 0.41333645582199097, + "learning_rate": 1.0249176704277088e-05, + "loss": 0.5184, + "step": 35894 + }, + { + "epoch": 0.9855848434925865, + "grad_norm": 0.3858993649482727, + "learning_rate": 1.0248744944736335e-05, + "loss": 0.5299, + "step": 35895 + }, + { + "epoch": 0.985612300933553, + "grad_norm": 0.41077783703804016, + "learning_rate": 1.0248313184731596e-05, + "loss": 0.4569, + "step": 35896 + }, + { + "epoch": 0.9856397583745194, + "grad_norm": 0.4076900780200958, + "learning_rate": 1.024788142426367e-05, + "loss": 0.467, + "step": 35897 + }, + { + "epoch": 0.985667215815486, + "grad_norm": 0.40137913823127747, + "learning_rate": 1.0247449663333368e-05, + "loss": 0.517, + "step": 35898 + }, + { + "epoch": 0.9856946732564525, + "grad_norm": 0.3849411606788635, + "learning_rate": 1.0247017901941492e-05, + "loss": 0.4351, + "step": 35899 + }, + { + "epoch": 0.985722130697419, + "grad_norm": 0.3792320787906647, + "learning_rate": 1.024658614008885e-05, + "loss": 0.456, + "step": 35900 + }, + { + "epoch": 0.9857495881383855, + "grad_norm": 0.4087684452533722, + "learning_rate": 1.0246154377776247e-05, + "loss": 0.4733, + "step": 35901 + }, + { + "epoch": 0.985777045579352, + "grad_norm": 0.39333704113960266, + "learning_rate": 1.0245722615004485e-05, + "loss": 0.4731, + "step": 35902 + }, + { + "epoch": 0.9858045030203185, + "grad_norm": 0.3806110918521881, + "learning_rate": 1.0245290851774374e-05, + "loss": 0.4651, + "step": 35903 + }, + { + "epoch": 0.985831960461285, + "grad_norm": 0.3945881724357605, + "learning_rate": 1.0244859088086719e-05, + "loss": 0.4748, + "step": 35904 + }, + { + "epoch": 0.9858594179022515, + "grad_norm": 0.47852659225463867, + "learning_rate": 1.0244427323942318e-05, + "loss": 0.4901, + "step": 35905 + }, + { + "epoch": 0.9858868753432181, + "grad_norm": 0.4202409088611603, + "learning_rate": 1.0243995559341986e-05, + "loss": 0.4864, + "step": 35906 + }, + { + "epoch": 0.9859143327841845, + "grad_norm": 0.439973384141922, + "learning_rate": 1.0243563794286524e-05, + "loss": 0.4348, + "step": 35907 + }, + { + "epoch": 0.985941790225151, + "grad_norm": 0.39073365926742554, + "learning_rate": 1.0243132028776736e-05, + "loss": 0.5319, + "step": 35908 + }, + { + "epoch": 0.9859692476661175, + "grad_norm": 0.43287861347198486, + "learning_rate": 1.0242700262813432e-05, + "loss": 0.502, + "step": 35909 + }, + { + "epoch": 0.985996705107084, + "grad_norm": 0.4187278747558594, + "learning_rate": 1.0242268496397412e-05, + "loss": 0.4923, + "step": 35910 + }, + { + "epoch": 0.9860241625480505, + "grad_norm": 0.3760363757610321, + "learning_rate": 1.0241836729529487e-05, + "loss": 0.4299, + "step": 35911 + }, + { + "epoch": 0.986051619989017, + "grad_norm": 0.41000309586524963, + "learning_rate": 1.0241404962210455e-05, + "loss": 0.4652, + "step": 35912 + }, + { + "epoch": 0.9860790774299836, + "grad_norm": 0.38554006814956665, + "learning_rate": 1.0240973194441131e-05, + "loss": 0.4703, + "step": 35913 + }, + { + "epoch": 0.98610653487095, + "grad_norm": 0.3600612282752991, + "learning_rate": 1.0240541426222312e-05, + "loss": 0.4266, + "step": 35914 + }, + { + "epoch": 0.9861339923119166, + "grad_norm": 0.46548303961753845, + "learning_rate": 1.0240109657554804e-05, + "loss": 0.5273, + "step": 35915 + }, + { + "epoch": 0.986161449752883, + "grad_norm": 0.4601188898086548, + "learning_rate": 1.023967788843942e-05, + "loss": 0.5782, + "step": 35916 + }, + { + "epoch": 0.9861889071938496, + "grad_norm": 0.41570940613746643, + "learning_rate": 1.0239246118876957e-05, + "loss": 0.4601, + "step": 35917 + }, + { + "epoch": 0.986216364634816, + "grad_norm": 0.4140207767486572, + "learning_rate": 1.0238814348868225e-05, + "loss": 0.4929, + "step": 35918 + }, + { + "epoch": 0.9862438220757825, + "grad_norm": 0.3649998605251312, + "learning_rate": 1.0238382578414028e-05, + "loss": 0.4412, + "step": 35919 + }, + { + "epoch": 0.9862712795167491, + "grad_norm": 0.3667534291744232, + "learning_rate": 1.023795080751517e-05, + "loss": 0.4639, + "step": 35920 + }, + { + "epoch": 0.9862987369577155, + "grad_norm": 0.41861391067504883, + "learning_rate": 1.0237519036172459e-05, + "loss": 0.5043, + "step": 35921 + }, + { + "epoch": 0.9863261943986821, + "grad_norm": 0.3923702538013458, + "learning_rate": 1.0237087264386698e-05, + "loss": 0.5248, + "step": 35922 + }, + { + "epoch": 0.9863536518396485, + "grad_norm": 0.39076361060142517, + "learning_rate": 1.0236655492158694e-05, + "loss": 0.5654, + "step": 35923 + }, + { + "epoch": 0.9863811092806151, + "grad_norm": 0.3658798635005951, + "learning_rate": 1.0236223719489254e-05, + "loss": 0.5016, + "step": 35924 + }, + { + "epoch": 0.9864085667215815, + "grad_norm": 0.37370914220809937, + "learning_rate": 1.0235791946379177e-05, + "loss": 0.5301, + "step": 35925 + }, + { + "epoch": 0.986436024162548, + "grad_norm": 0.41162198781967163, + "learning_rate": 1.0235360172829276e-05, + "loss": 0.4456, + "step": 35926 + }, + { + "epoch": 0.9864634816035146, + "grad_norm": 0.35184410214424133, + "learning_rate": 1.0234928398840353e-05, + "loss": 0.3996, + "step": 35927 + }, + { + "epoch": 0.986490939044481, + "grad_norm": 0.41203826665878296, + "learning_rate": 1.0234496624413213e-05, + "loss": 0.4319, + "step": 35928 + }, + { + "epoch": 0.9865183964854476, + "grad_norm": 0.38572144508361816, + "learning_rate": 1.0234064849548665e-05, + "loss": 0.4465, + "step": 35929 + }, + { + "epoch": 0.986545853926414, + "grad_norm": 0.3804638981819153, + "learning_rate": 1.0233633074247505e-05, + "loss": 0.4923, + "step": 35930 + }, + { + "epoch": 0.9865733113673806, + "grad_norm": 0.3943641781806946, + "learning_rate": 1.0233201298510548e-05, + "loss": 0.5037, + "step": 35931 + }, + { + "epoch": 0.986600768808347, + "grad_norm": 0.399070680141449, + "learning_rate": 1.02327695223386e-05, + "loss": 0.5261, + "step": 35932 + }, + { + "epoch": 0.9866282262493136, + "grad_norm": 0.38374871015548706, + "learning_rate": 1.0232337745732457e-05, + "loss": 0.3803, + "step": 35933 + }, + { + "epoch": 0.9866556836902801, + "grad_norm": 0.3979797661304474, + "learning_rate": 1.0231905968692933e-05, + "loss": 0.4631, + "step": 35934 + }, + { + "epoch": 0.9866831411312466, + "grad_norm": 0.3888411223888397, + "learning_rate": 1.023147419122083e-05, + "loss": 0.4597, + "step": 35935 + }, + { + "epoch": 0.9867105985722131, + "grad_norm": 0.41087114810943604, + "learning_rate": 1.0231042413316953e-05, + "loss": 0.4773, + "step": 35936 + }, + { + "epoch": 0.9867380560131795, + "grad_norm": 0.46334725618362427, + "learning_rate": 1.0230610634982107e-05, + "loss": 0.5158, + "step": 35937 + }, + { + "epoch": 0.9867655134541461, + "grad_norm": 0.3888024091720581, + "learning_rate": 1.0230178856217099e-05, + "loss": 0.3919, + "step": 35938 + }, + { + "epoch": 0.9867929708951125, + "grad_norm": 0.4669295847415924, + "learning_rate": 1.0229747077022734e-05, + "loss": 0.3931, + "step": 35939 + }, + { + "epoch": 0.9868204283360791, + "grad_norm": 0.3998545706272125, + "learning_rate": 1.022931529739982e-05, + "loss": 0.4804, + "step": 35940 + }, + { + "epoch": 0.9868478857770456, + "grad_norm": 0.3952127695083618, + "learning_rate": 1.0228883517349155e-05, + "loss": 0.5434, + "step": 35941 + }, + { + "epoch": 0.9868753432180121, + "grad_norm": 0.3617648780345917, + "learning_rate": 1.0228451736871554e-05, + "loss": 0.4526, + "step": 35942 + }, + { + "epoch": 0.9869028006589786, + "grad_norm": 0.40099674463272095, + "learning_rate": 1.0228019955967815e-05, + "loss": 0.5058, + "step": 35943 + }, + { + "epoch": 0.9869302580999451, + "grad_norm": 0.44349610805511475, + "learning_rate": 1.0227588174638746e-05, + "loss": 0.4988, + "step": 35944 + }, + { + "epoch": 0.9869577155409116, + "grad_norm": 0.3487011790275574, + "learning_rate": 1.0227156392885154e-05, + "loss": 0.5074, + "step": 35945 + }, + { + "epoch": 0.986985172981878, + "grad_norm": 0.40472981333732605, + "learning_rate": 1.0226724610707842e-05, + "loss": 0.4751, + "step": 35946 + }, + { + "epoch": 0.9870126304228446, + "grad_norm": 0.3828586935997009, + "learning_rate": 1.0226292828107616e-05, + "loss": 0.4945, + "step": 35947 + }, + { + "epoch": 0.9870400878638111, + "grad_norm": 0.4858611822128296, + "learning_rate": 1.0225861045085282e-05, + "loss": 0.4764, + "step": 35948 + }, + { + "epoch": 0.9870675453047776, + "grad_norm": 0.362066388130188, + "learning_rate": 1.0225429261641643e-05, + "loss": 0.409, + "step": 35949 + }, + { + "epoch": 0.9870950027457441, + "grad_norm": 0.43630310893058777, + "learning_rate": 1.0224997477777509e-05, + "loss": 0.507, + "step": 35950 + }, + { + "epoch": 0.9871224601867106, + "grad_norm": 0.4106808602809906, + "learning_rate": 1.0224565693493682e-05, + "loss": 0.5008, + "step": 35951 + }, + { + "epoch": 0.9871499176276771, + "grad_norm": 0.3993169665336609, + "learning_rate": 1.022413390879097e-05, + "loss": 0.4903, + "step": 35952 + }, + { + "epoch": 0.9871773750686436, + "grad_norm": 0.4855341911315918, + "learning_rate": 1.0223702123670177e-05, + "loss": 0.4535, + "step": 35953 + }, + { + "epoch": 0.9872048325096101, + "grad_norm": 0.401519238948822, + "learning_rate": 1.0223270338132102e-05, + "loss": 0.5815, + "step": 35954 + }, + { + "epoch": 0.9872322899505767, + "grad_norm": 0.46157002449035645, + "learning_rate": 1.0222838552177563e-05, + "loss": 0.4753, + "step": 35955 + }, + { + "epoch": 0.9872597473915431, + "grad_norm": 0.3718825876712799, + "learning_rate": 1.0222406765807355e-05, + "loss": 0.5589, + "step": 35956 + }, + { + "epoch": 0.9872872048325096, + "grad_norm": 0.41220787167549133, + "learning_rate": 1.0221974979022292e-05, + "loss": 0.4417, + "step": 35957 + }, + { + "epoch": 0.9873146622734761, + "grad_norm": 0.42461729049682617, + "learning_rate": 1.0221543191823174e-05, + "loss": 0.4668, + "step": 35958 + }, + { + "epoch": 0.9873421197144426, + "grad_norm": 0.4331369996070862, + "learning_rate": 1.0221111404210802e-05, + "loss": 0.4885, + "step": 35959 + }, + { + "epoch": 0.9873695771554091, + "grad_norm": 0.396995484828949, + "learning_rate": 1.022067961618599e-05, + "loss": 0.4971, + "step": 35960 + }, + { + "epoch": 0.9873970345963756, + "grad_norm": 0.39297986030578613, + "learning_rate": 1.022024782774954e-05, + "loss": 0.383, + "step": 35961 + }, + { + "epoch": 0.9874244920373422, + "grad_norm": 0.38801243901252747, + "learning_rate": 1.0219816038902259e-05, + "loss": 0.5059, + "step": 35962 + }, + { + "epoch": 0.9874519494783086, + "grad_norm": 0.3468981087207794, + "learning_rate": 1.021938424964495e-05, + "loss": 0.4669, + "step": 35963 + }, + { + "epoch": 0.9874794069192752, + "grad_norm": 0.3361624777317047, + "learning_rate": 1.0218952459978417e-05, + "loss": 0.4266, + "step": 35964 + }, + { + "epoch": 0.9875068643602416, + "grad_norm": 0.39264172315597534, + "learning_rate": 1.021852066990347e-05, + "loss": 0.5182, + "step": 35965 + }, + { + "epoch": 0.9875343218012081, + "grad_norm": 0.45283371210098267, + "learning_rate": 1.0218088879420914e-05, + "loss": 0.5123, + "step": 35966 + }, + { + "epoch": 0.9875617792421746, + "grad_norm": 0.39580830931663513, + "learning_rate": 1.021765708853155e-05, + "loss": 0.4768, + "step": 35967 + }, + { + "epoch": 0.9875892366831411, + "grad_norm": 0.43125712871551514, + "learning_rate": 1.0217225297236187e-05, + "loss": 0.5307, + "step": 35968 + }, + { + "epoch": 0.9876166941241077, + "grad_norm": 0.3786207437515259, + "learning_rate": 1.0216793505535627e-05, + "loss": 0.4942, + "step": 35969 + }, + { + "epoch": 0.9876441515650741, + "grad_norm": 0.41749677062034607, + "learning_rate": 1.021636171343068e-05, + "loss": 0.5383, + "step": 35970 + }, + { + "epoch": 0.9876716090060407, + "grad_norm": 0.3435963988304138, + "learning_rate": 1.0215929920922151e-05, + "loss": 0.3971, + "step": 35971 + }, + { + "epoch": 0.9876990664470071, + "grad_norm": 0.42613476514816284, + "learning_rate": 1.0215498128010841e-05, + "loss": 0.5299, + "step": 35972 + }, + { + "epoch": 0.9877265238879737, + "grad_norm": 0.3777581751346588, + "learning_rate": 1.0215066334697561e-05, + "loss": 0.4164, + "step": 35973 + }, + { + "epoch": 0.9877539813289401, + "grad_norm": 0.3604992628097534, + "learning_rate": 1.0214634540983111e-05, + "loss": 0.4832, + "step": 35974 + }, + { + "epoch": 0.9877814387699067, + "grad_norm": 0.46952196955680847, + "learning_rate": 1.0214202746868301e-05, + "loss": 0.5319, + "step": 35975 + }, + { + "epoch": 0.9878088962108732, + "grad_norm": 0.3638250529766083, + "learning_rate": 1.0213770952353934e-05, + "loss": 0.468, + "step": 35976 + }, + { + "epoch": 0.9878363536518396, + "grad_norm": 0.4293268918991089, + "learning_rate": 1.0213339157440813e-05, + "loss": 0.4913, + "step": 35977 + }, + { + "epoch": 0.9878638110928062, + "grad_norm": 0.353071391582489, + "learning_rate": 1.0212907362129752e-05, + "loss": 0.4686, + "step": 35978 + }, + { + "epoch": 0.9878912685337726, + "grad_norm": 0.5254124402999878, + "learning_rate": 1.0212475566421544e-05, + "loss": 0.4155, + "step": 35979 + }, + { + "epoch": 0.9879187259747392, + "grad_norm": 0.44358932971954346, + "learning_rate": 1.0212043770317006e-05, + "loss": 0.4711, + "step": 35980 + }, + { + "epoch": 0.9879461834157056, + "grad_norm": 0.3561739921569824, + "learning_rate": 1.021161197381694e-05, + "loss": 0.4329, + "step": 35981 + }, + { + "epoch": 0.9879736408566722, + "grad_norm": 0.368450790643692, + "learning_rate": 1.0211180176922147e-05, + "loss": 0.4202, + "step": 35982 + }, + { + "epoch": 0.9880010982976387, + "grad_norm": 0.35711756348609924, + "learning_rate": 1.0210748379633435e-05, + "loss": 0.5028, + "step": 35983 + }, + { + "epoch": 0.9880285557386052, + "grad_norm": 0.38752081990242004, + "learning_rate": 1.0210316581951613e-05, + "loss": 0.4793, + "step": 35984 + }, + { + "epoch": 0.9880560131795717, + "grad_norm": 0.4879627525806427, + "learning_rate": 1.0209884783877481e-05, + "loss": 0.4908, + "step": 35985 + }, + { + "epoch": 0.9880834706205381, + "grad_norm": 0.4289221167564392, + "learning_rate": 1.0209452985411847e-05, + "loss": 0.4995, + "step": 35986 + }, + { + "epoch": 0.9881109280615047, + "grad_norm": 0.3656303882598877, + "learning_rate": 1.0209021186555516e-05, + "loss": 0.423, + "step": 35987 + }, + { + "epoch": 0.9881383855024711, + "grad_norm": 0.38101592659950256, + "learning_rate": 1.0208589387309295e-05, + "loss": 0.41, + "step": 35988 + }, + { + "epoch": 0.9881658429434377, + "grad_norm": 0.39417845010757446, + "learning_rate": 1.020815758767399e-05, + "loss": 0.5106, + "step": 35989 + }, + { + "epoch": 0.9881933003844042, + "grad_norm": 0.39943158626556396, + "learning_rate": 1.0207725787650402e-05, + "loss": 0.446, + "step": 35990 + }, + { + "epoch": 0.9882207578253707, + "grad_norm": 0.4329339861869812, + "learning_rate": 1.020729398723934e-05, + "loss": 0.4945, + "step": 35991 + }, + { + "epoch": 0.9882482152663372, + "grad_norm": 0.45335114002227783, + "learning_rate": 1.0206862186441608e-05, + "loss": 0.5441, + "step": 35992 + }, + { + "epoch": 0.9882756727073037, + "grad_norm": 0.36752524971961975, + "learning_rate": 1.0206430385258013e-05, + "loss": 0.4126, + "step": 35993 + }, + { + "epoch": 0.9883031301482702, + "grad_norm": 0.41001251339912415, + "learning_rate": 1.020599858368936e-05, + "loss": 0.4513, + "step": 35994 + }, + { + "epoch": 0.9883305875892366, + "grad_norm": 0.42791658639907837, + "learning_rate": 1.0205566781736451e-05, + "loss": 0.4485, + "step": 35995 + }, + { + "epoch": 0.9883580450302032, + "grad_norm": 0.42211830615997314, + "learning_rate": 1.0205134979400097e-05, + "loss": 0.4363, + "step": 35996 + }, + { + "epoch": 0.9883855024711697, + "grad_norm": 0.43387001752853394, + "learning_rate": 1.0204703176681099e-05, + "loss": 0.4416, + "step": 35997 + }, + { + "epoch": 0.9884129599121362, + "grad_norm": 0.42837923765182495, + "learning_rate": 1.0204271373580265e-05, + "loss": 0.5469, + "step": 35998 + }, + { + "epoch": 0.9884404173531027, + "grad_norm": 0.40141788125038147, + "learning_rate": 1.0203839570098402e-05, + "loss": 0.4596, + "step": 35999 + }, + { + "epoch": 0.9884678747940692, + "grad_norm": 0.38804134726524353, + "learning_rate": 1.0203407766236308e-05, + "loss": 0.4534, + "step": 36000 + }, + { + "epoch": 0.9884953322350357, + "grad_norm": 0.4501250088214874, + "learning_rate": 1.0202975961994799e-05, + "loss": 0.5149, + "step": 36001 + }, + { + "epoch": 0.9885227896760022, + "grad_norm": 0.45680081844329834, + "learning_rate": 1.0202544157374669e-05, + "loss": 0.4822, + "step": 36002 + }, + { + "epoch": 0.9885502471169687, + "grad_norm": 0.40002503991127014, + "learning_rate": 1.0202112352376734e-05, + "loss": 0.5048, + "step": 36003 + }, + { + "epoch": 0.9885777045579353, + "grad_norm": 0.3943614363670349, + "learning_rate": 1.0201680547001795e-05, + "loss": 0.4682, + "step": 36004 + }, + { + "epoch": 0.9886051619989017, + "grad_norm": 0.3875051736831665, + "learning_rate": 1.0201248741250656e-05, + "loss": 0.4312, + "step": 36005 + }, + { + "epoch": 0.9886326194398682, + "grad_norm": 0.4051513969898224, + "learning_rate": 1.0200816935124124e-05, + "loss": 0.5095, + "step": 36006 + }, + { + "epoch": 0.9886600768808347, + "grad_norm": 0.3891288638114929, + "learning_rate": 1.0200385128623005e-05, + "loss": 0.4834, + "step": 36007 + }, + { + "epoch": 0.9886875343218012, + "grad_norm": 0.42494919896125793, + "learning_rate": 1.0199953321748105e-05, + "loss": 0.5856, + "step": 36008 + }, + { + "epoch": 0.9887149917627677, + "grad_norm": 0.40577074885368347, + "learning_rate": 1.0199521514500223e-05, + "loss": 0.5108, + "step": 36009 + }, + { + "epoch": 0.9887424492037342, + "grad_norm": 0.3763245642185211, + "learning_rate": 1.0199089706880174e-05, + "loss": 0.4856, + "step": 36010 + }, + { + "epoch": 0.9887699066447008, + "grad_norm": 0.37040647864341736, + "learning_rate": 1.019865789888876e-05, + "loss": 0.4615, + "step": 36011 + }, + { + "epoch": 0.9887973640856672, + "grad_norm": 0.4971982538700104, + "learning_rate": 1.0198226090526785e-05, + "loss": 0.4984, + "step": 36012 + }, + { + "epoch": 0.9888248215266338, + "grad_norm": 0.3937513828277588, + "learning_rate": 1.0197794281795052e-05, + "loss": 0.538, + "step": 36013 + }, + { + "epoch": 0.9888522789676002, + "grad_norm": 0.5317143201828003, + "learning_rate": 1.0197362472694372e-05, + "loss": 0.6008, + "step": 36014 + }, + { + "epoch": 0.9888797364085667, + "grad_norm": 0.4030527174472809, + "learning_rate": 1.0196930663225548e-05, + "loss": 0.5253, + "step": 36015 + }, + { + "epoch": 0.9889071938495332, + "grad_norm": 0.46623173356056213, + "learning_rate": 1.0196498853389387e-05, + "loss": 0.6136, + "step": 36016 + }, + { + "epoch": 0.9889346512904997, + "grad_norm": 0.448650985956192, + "learning_rate": 1.0196067043186688e-05, + "loss": 0.5188, + "step": 36017 + }, + { + "epoch": 0.9889621087314663, + "grad_norm": 0.3829805552959442, + "learning_rate": 1.0195635232618266e-05, + "loss": 0.4285, + "step": 36018 + }, + { + "epoch": 0.9889895661724327, + "grad_norm": 0.40322497487068176, + "learning_rate": 1.019520342168492e-05, + "loss": 0.3968, + "step": 36019 + }, + { + "epoch": 0.9890170236133993, + "grad_norm": 0.4361368715763092, + "learning_rate": 1.0194771610387458e-05, + "loss": 0.4896, + "step": 36020 + }, + { + "epoch": 0.9890444810543657, + "grad_norm": 0.43198761343955994, + "learning_rate": 1.0194339798726685e-05, + "loss": 0.5252, + "step": 36021 + }, + { + "epoch": 0.9890719384953323, + "grad_norm": 0.40842097997665405, + "learning_rate": 1.0193907986703405e-05, + "loss": 0.5189, + "step": 36022 + }, + { + "epoch": 0.9890993959362987, + "grad_norm": 0.43640220165252686, + "learning_rate": 1.0193476174318426e-05, + "loss": 0.505, + "step": 36023 + }, + { + "epoch": 0.9891268533772652, + "grad_norm": 0.4063473045825958, + "learning_rate": 1.019304436157255e-05, + "loss": 0.4753, + "step": 36024 + }, + { + "epoch": 0.9891543108182317, + "grad_norm": 0.49594804644584656, + "learning_rate": 1.0192612548466587e-05, + "loss": 0.514, + "step": 36025 + }, + { + "epoch": 0.9891817682591982, + "grad_norm": 0.4195566177368164, + "learning_rate": 1.0192180735001338e-05, + "loss": 0.5445, + "step": 36026 + }, + { + "epoch": 0.9892092257001648, + "grad_norm": 0.4353272616863251, + "learning_rate": 1.0191748921177613e-05, + "loss": 0.5492, + "step": 36027 + }, + { + "epoch": 0.9892366831411312, + "grad_norm": 0.40935418009757996, + "learning_rate": 1.0191317106996214e-05, + "loss": 0.5512, + "step": 36028 + }, + { + "epoch": 0.9892641405820978, + "grad_norm": 0.43198245763778687, + "learning_rate": 1.0190885292457949e-05, + "loss": 0.5127, + "step": 36029 + }, + { + "epoch": 0.9892915980230642, + "grad_norm": 0.44679170846939087, + "learning_rate": 1.019045347756362e-05, + "loss": 0.483, + "step": 36030 + }, + { + "epoch": 0.9893190554640308, + "grad_norm": 0.4192979037761688, + "learning_rate": 1.0190021662314033e-05, + "loss": 0.4273, + "step": 36031 + }, + { + "epoch": 0.9893465129049972, + "grad_norm": 0.36598774790763855, + "learning_rate": 1.0189589846709997e-05, + "loss": 0.4903, + "step": 36032 + }, + { + "epoch": 0.9893739703459637, + "grad_norm": 0.41282570362091064, + "learning_rate": 1.0189158030752318e-05, + "loss": 0.5171, + "step": 36033 + }, + { + "epoch": 0.9894014277869303, + "grad_norm": 0.3772784471511841, + "learning_rate": 1.0188726214441795e-05, + "loss": 0.5392, + "step": 36034 + }, + { + "epoch": 0.9894288852278967, + "grad_norm": 0.4146813452243805, + "learning_rate": 1.018829439777924e-05, + "loss": 0.4822, + "step": 36035 + }, + { + "epoch": 0.9894563426688633, + "grad_norm": 0.3918597102165222, + "learning_rate": 1.0187862580765452e-05, + "loss": 0.5157, + "step": 36036 + }, + { + "epoch": 0.9894838001098297, + "grad_norm": 0.4126170873641968, + "learning_rate": 1.0187430763401245e-05, + "loss": 0.4771, + "step": 36037 + }, + { + "epoch": 0.9895112575507963, + "grad_norm": 0.4012683928012848, + "learning_rate": 1.0186998945687417e-05, + "loss": 0.5143, + "step": 36038 + }, + { + "epoch": 0.9895387149917627, + "grad_norm": 0.39591649174690247, + "learning_rate": 1.0186567127624776e-05, + "loss": 0.4913, + "step": 36039 + }, + { + "epoch": 0.9895661724327293, + "grad_norm": 0.40440794825553894, + "learning_rate": 1.0186135309214129e-05, + "loss": 0.4801, + "step": 36040 + }, + { + "epoch": 0.9895936298736958, + "grad_norm": 0.39094114303588867, + "learning_rate": 1.0185703490456281e-05, + "loss": 0.6031, + "step": 36041 + }, + { + "epoch": 0.9896210873146623, + "grad_norm": 0.42453572154045105, + "learning_rate": 1.0185271671352035e-05, + "loss": 0.5239, + "step": 36042 + }, + { + "epoch": 0.9896485447556288, + "grad_norm": 0.37585943937301636, + "learning_rate": 1.01848398519022e-05, + "loss": 0.4269, + "step": 36043 + }, + { + "epoch": 0.9896760021965952, + "grad_norm": 0.3933779299259186, + "learning_rate": 1.0184408032107577e-05, + "loss": 0.4831, + "step": 36044 + }, + { + "epoch": 0.9897034596375618, + "grad_norm": 0.4277130663394928, + "learning_rate": 1.0183976211968976e-05, + "loss": 0.51, + "step": 36045 + }, + { + "epoch": 0.9897309170785282, + "grad_norm": 0.37007811665534973, + "learning_rate": 1.01835443914872e-05, + "loss": 0.4479, + "step": 36046 + }, + { + "epoch": 0.9897583745194948, + "grad_norm": 0.4101639688014984, + "learning_rate": 1.0183112570663056e-05, + "loss": 0.4381, + "step": 36047 + }, + { + "epoch": 0.9897858319604613, + "grad_norm": 0.33831095695495605, + "learning_rate": 1.018268074949735e-05, + "loss": 0.4583, + "step": 36048 + }, + { + "epoch": 0.9898132894014278, + "grad_norm": 0.5151986479759216, + "learning_rate": 1.0182248927990881e-05, + "loss": 0.4936, + "step": 36049 + }, + { + "epoch": 0.9898407468423943, + "grad_norm": 0.41054007411003113, + "learning_rate": 1.0181817106144465e-05, + "loss": 0.5571, + "step": 36050 + }, + { + "epoch": 0.9898682042833608, + "grad_norm": 0.39487960934638977, + "learning_rate": 1.01813852839589e-05, + "loss": 0.4481, + "step": 36051 + }, + { + "epoch": 0.9898956617243273, + "grad_norm": 0.3563891351222992, + "learning_rate": 1.0180953461434992e-05, + "loss": 0.416, + "step": 36052 + }, + { + "epoch": 0.9899231191652937, + "grad_norm": 0.39248061180114746, + "learning_rate": 1.018052163857355e-05, + "loss": 0.5432, + "step": 36053 + }, + { + "epoch": 0.9899505766062603, + "grad_norm": 0.43844056129455566, + "learning_rate": 1.0180089815375376e-05, + "loss": 0.5391, + "step": 36054 + }, + { + "epoch": 0.9899780340472268, + "grad_norm": 0.42017874121665955, + "learning_rate": 1.0179657991841279e-05, + "loss": 0.5749, + "step": 36055 + }, + { + "epoch": 0.9900054914881933, + "grad_norm": 0.45329874753952026, + "learning_rate": 1.0179226167972063e-05, + "loss": 0.4759, + "step": 36056 + }, + { + "epoch": 0.9900329489291598, + "grad_norm": 0.43620213866233826, + "learning_rate": 1.017879434376853e-05, + "loss": 0.4586, + "step": 36057 + }, + { + "epoch": 0.9900604063701263, + "grad_norm": 0.4587477445602417, + "learning_rate": 1.017836251923149e-05, + "loss": 0.4763, + "step": 36058 + }, + { + "epoch": 0.9900878638110928, + "grad_norm": 0.9359285831451416, + "learning_rate": 1.0177930694361748e-05, + "loss": 0.5253, + "step": 36059 + }, + { + "epoch": 0.9901153212520593, + "grad_norm": 0.3340023458003998, + "learning_rate": 1.0177498869160106e-05, + "loss": 0.379, + "step": 36060 + }, + { + "epoch": 0.9901427786930258, + "grad_norm": 0.4080970287322998, + "learning_rate": 1.0177067043627375e-05, + "loss": 0.4721, + "step": 36061 + }, + { + "epoch": 0.9901702361339924, + "grad_norm": 0.3415207266807556, + "learning_rate": 1.0176635217764355e-05, + "loss": 0.4366, + "step": 36062 + }, + { + "epoch": 0.9901976935749588, + "grad_norm": 0.4239104390144348, + "learning_rate": 1.0176203391571855e-05, + "loss": 0.5726, + "step": 36063 + }, + { + "epoch": 0.9902251510159253, + "grad_norm": 0.40568044781684875, + "learning_rate": 1.017577156505068e-05, + "loss": 0.4791, + "step": 36064 + }, + { + "epoch": 0.9902526084568918, + "grad_norm": 0.49049901962280273, + "learning_rate": 1.0175339738201633e-05, + "loss": 0.5048, + "step": 36065 + }, + { + "epoch": 0.9902800658978583, + "grad_norm": 0.4074021875858307, + "learning_rate": 1.0174907911025523e-05, + "loss": 0.4839, + "step": 36066 + }, + { + "epoch": 0.9903075233388248, + "grad_norm": 0.37671926617622375, + "learning_rate": 1.0174476083523152e-05, + "loss": 0.4702, + "step": 36067 + }, + { + "epoch": 0.9903349807797913, + "grad_norm": 0.4358291029930115, + "learning_rate": 1.017404425569533e-05, + "loss": 0.5155, + "step": 36068 + }, + { + "epoch": 0.9903624382207579, + "grad_norm": 0.38325536251068115, + "learning_rate": 1.017361242754286e-05, + "loss": 0.3921, + "step": 36069 + }, + { + "epoch": 0.9903898956617243, + "grad_norm": 0.4048998951911926, + "learning_rate": 1.0173180599066544e-05, + "loss": 0.416, + "step": 36070 + }, + { + "epoch": 0.9904173531026909, + "grad_norm": 0.4578258991241455, + "learning_rate": 1.0172748770267193e-05, + "loss": 0.5334, + "step": 36071 + }, + { + "epoch": 0.9904448105436573, + "grad_norm": 0.3807884454727173, + "learning_rate": 1.017231694114561e-05, + "loss": 0.4225, + "step": 36072 + }, + { + "epoch": 0.9904722679846238, + "grad_norm": 0.39607977867126465, + "learning_rate": 1.0171885111702604e-05, + "loss": 0.4918, + "step": 36073 + }, + { + "epoch": 0.9904997254255903, + "grad_norm": 0.41109538078308105, + "learning_rate": 1.0171453281938974e-05, + "loss": 0.5076, + "step": 36074 + }, + { + "epoch": 0.9905271828665568, + "grad_norm": 0.370983749628067, + "learning_rate": 1.017102145185553e-05, + "loss": 0.4498, + "step": 36075 + }, + { + "epoch": 0.9905546403075234, + "grad_norm": 0.35659804940223694, + "learning_rate": 1.0170589621453075e-05, + "loss": 0.4799, + "step": 36076 + }, + { + "epoch": 0.9905820977484898, + "grad_norm": 0.36830225586891174, + "learning_rate": 1.017015779073242e-05, + "loss": 0.4212, + "step": 36077 + }, + { + "epoch": 0.9906095551894564, + "grad_norm": 0.416180819272995, + "learning_rate": 1.016972595969436e-05, + "loss": 0.5222, + "step": 36078 + }, + { + "epoch": 0.9906370126304228, + "grad_norm": 0.4396577477455139, + "learning_rate": 1.0169294128339713e-05, + "loss": 0.5227, + "step": 36079 + }, + { + "epoch": 0.9906644700713894, + "grad_norm": 0.4329797029495239, + "learning_rate": 1.0168862296669274e-05, + "loss": 0.5099, + "step": 36080 + }, + { + "epoch": 0.9906919275123558, + "grad_norm": 0.4380686283111572, + "learning_rate": 1.0168430464683857e-05, + "loss": 0.5503, + "step": 36081 + }, + { + "epoch": 0.9907193849533223, + "grad_norm": 0.3878006935119629, + "learning_rate": 1.0167998632384262e-05, + "loss": 0.4948, + "step": 36082 + }, + { + "epoch": 0.9907468423942889, + "grad_norm": 0.4229143261909485, + "learning_rate": 1.0167566799771293e-05, + "loss": 0.4606, + "step": 36083 + }, + { + "epoch": 0.9907742998352553, + "grad_norm": 0.43099620938301086, + "learning_rate": 1.0167134966845762e-05, + "loss": 0.5353, + "step": 36084 + }, + { + "epoch": 0.9908017572762219, + "grad_norm": 0.7011162042617798, + "learning_rate": 1.0166703133608467e-05, + "loss": 0.5187, + "step": 36085 + }, + { + "epoch": 0.9908292147171883, + "grad_norm": 0.36763209104537964, + "learning_rate": 1.0166271300060222e-05, + "loss": 0.4524, + "step": 36086 + }, + { + "epoch": 0.9908566721581549, + "grad_norm": 0.42112892866134644, + "learning_rate": 1.0165839466201825e-05, + "loss": 0.5149, + "step": 36087 + }, + { + "epoch": 0.9908841295991213, + "grad_norm": 0.34965500235557556, + "learning_rate": 1.0165407632034086e-05, + "loss": 0.3704, + "step": 36088 + }, + { + "epoch": 0.9909115870400879, + "grad_norm": 0.38446009159088135, + "learning_rate": 1.0164975797557807e-05, + "loss": 0.4725, + "step": 36089 + }, + { + "epoch": 0.9909390444810544, + "grad_norm": 0.4125695824623108, + "learning_rate": 1.01645439627738e-05, + "loss": 0.4815, + "step": 36090 + }, + { + "epoch": 0.9909665019220208, + "grad_norm": 0.397901326417923, + "learning_rate": 1.0164112127682862e-05, + "loss": 0.4501, + "step": 36091 + }, + { + "epoch": 0.9909939593629874, + "grad_norm": 0.3927576541900635, + "learning_rate": 1.0163680292285802e-05, + "loss": 0.4966, + "step": 36092 + }, + { + "epoch": 0.9910214168039538, + "grad_norm": 0.36749890446662903, + "learning_rate": 1.0163248456583426e-05, + "loss": 0.4357, + "step": 36093 + }, + { + "epoch": 0.9910488742449204, + "grad_norm": 0.40724971890449524, + "learning_rate": 1.0162816620576543e-05, + "loss": 0.4818, + "step": 36094 + }, + { + "epoch": 0.9910763316858868, + "grad_norm": 0.4261913001537323, + "learning_rate": 1.0162384784265953e-05, + "loss": 0.5018, + "step": 36095 + }, + { + "epoch": 0.9911037891268534, + "grad_norm": 0.41237130761146545, + "learning_rate": 1.0161952947652463e-05, + "loss": 0.5238, + "step": 36096 + }, + { + "epoch": 0.9911312465678199, + "grad_norm": 0.43863511085510254, + "learning_rate": 1.0161521110736881e-05, + "loss": 0.4836, + "step": 36097 + }, + { + "epoch": 0.9911587040087864, + "grad_norm": 0.39005979895591736, + "learning_rate": 1.0161089273520007e-05, + "loss": 0.5476, + "step": 36098 + }, + { + "epoch": 0.9911861614497529, + "grad_norm": 0.39152541756629944, + "learning_rate": 1.0160657436002652e-05, + "loss": 0.501, + "step": 36099 + }, + { + "epoch": 0.9912136188907194, + "grad_norm": 0.38891759514808655, + "learning_rate": 1.0160225598185621e-05, + "loss": 0.4682, + "step": 36100 + }, + { + "epoch": 0.9912410763316859, + "grad_norm": 0.33485615253448486, + "learning_rate": 1.0159793760069716e-05, + "loss": 0.3817, + "step": 36101 + }, + { + "epoch": 0.9912685337726523, + "grad_norm": 0.4379116892814636, + "learning_rate": 1.0159361921655747e-05, + "loss": 0.5356, + "step": 36102 + }, + { + "epoch": 0.9912959912136189, + "grad_norm": 0.36743149161338806, + "learning_rate": 1.0158930082944514e-05, + "loss": 0.4416, + "step": 36103 + }, + { + "epoch": 0.9913234486545854, + "grad_norm": 0.39001327753067017, + "learning_rate": 1.0158498243936828e-05, + "loss": 0.4378, + "step": 36104 + }, + { + "epoch": 0.9913509060955519, + "grad_norm": 0.36384493112564087, + "learning_rate": 1.0158066404633492e-05, + "loss": 0.494, + "step": 36105 + }, + { + "epoch": 0.9913783635365184, + "grad_norm": 0.39939558506011963, + "learning_rate": 1.015763456503531e-05, + "loss": 0.4873, + "step": 36106 + }, + { + "epoch": 0.9914058209774849, + "grad_norm": 0.40751340985298157, + "learning_rate": 1.0157202725143092e-05, + "loss": 0.5455, + "step": 36107 + }, + { + "epoch": 0.9914332784184514, + "grad_norm": 0.4201103448867798, + "learning_rate": 1.0156770884957638e-05, + "loss": 0.4509, + "step": 36108 + }, + { + "epoch": 0.9914607358594179, + "grad_norm": 0.4375629723072052, + "learning_rate": 1.0156339044479756e-05, + "loss": 0.5482, + "step": 36109 + }, + { + "epoch": 0.9914881933003844, + "grad_norm": 0.35609275102615356, + "learning_rate": 1.0155907203710256e-05, + "loss": 0.42, + "step": 36110 + }, + { + "epoch": 0.991515650741351, + "grad_norm": 0.4169420003890991, + "learning_rate": 1.0155475362649936e-05, + "loss": 0.548, + "step": 36111 + }, + { + "epoch": 0.9915431081823174, + "grad_norm": 0.3904082179069519, + "learning_rate": 1.0155043521299605e-05, + "loss": 0.4788, + "step": 36112 + }, + { + "epoch": 0.9915705656232839, + "grad_norm": 0.39562007784843445, + "learning_rate": 1.015461167966007e-05, + "loss": 0.524, + "step": 36113 + }, + { + "epoch": 0.9915980230642504, + "grad_norm": 0.42130163311958313, + "learning_rate": 1.0154179837732132e-05, + "loss": 0.4606, + "step": 36114 + }, + { + "epoch": 0.9916254805052169, + "grad_norm": 0.4702787697315216, + "learning_rate": 1.0153747995516602e-05, + "loss": 0.486, + "step": 36115 + }, + { + "epoch": 0.9916529379461834, + "grad_norm": 0.41987180709838867, + "learning_rate": 1.0153316153014281e-05, + "loss": 0.5096, + "step": 36116 + }, + { + "epoch": 0.9916803953871499, + "grad_norm": 0.39047518372535706, + "learning_rate": 1.0152884310225978e-05, + "loss": 0.5243, + "step": 36117 + }, + { + "epoch": 0.9917078528281165, + "grad_norm": 0.3860674202442169, + "learning_rate": 1.0152452467152498e-05, + "loss": 0.4906, + "step": 36118 + }, + { + "epoch": 0.9917353102690829, + "grad_norm": 0.38789063692092896, + "learning_rate": 1.0152020623794643e-05, + "loss": 0.4972, + "step": 36119 + }, + { + "epoch": 0.9917627677100495, + "grad_norm": 0.4610002338886261, + "learning_rate": 1.0151588780153221e-05, + "loss": 0.5421, + "step": 36120 + }, + { + "epoch": 0.9917902251510159, + "grad_norm": 0.3975159525871277, + "learning_rate": 1.015115693622904e-05, + "loss": 0.4954, + "step": 36121 + }, + { + "epoch": 0.9918176825919824, + "grad_norm": 0.4000074565410614, + "learning_rate": 1.0150725092022899e-05, + "loss": 0.477, + "step": 36122 + }, + { + "epoch": 0.9918451400329489, + "grad_norm": 0.3887966275215149, + "learning_rate": 1.0150293247535612e-05, + "loss": 0.4871, + "step": 36123 + }, + { + "epoch": 0.9918725974739154, + "grad_norm": 0.797031819820404, + "learning_rate": 1.0149861402767977e-05, + "loss": 0.4577, + "step": 36124 + }, + { + "epoch": 0.991900054914882, + "grad_norm": 0.38122567534446716, + "learning_rate": 1.0149429557720805e-05, + "loss": 0.4537, + "step": 36125 + }, + { + "epoch": 0.9919275123558484, + "grad_norm": 0.5100933313369751, + "learning_rate": 1.01489977123949e-05, + "loss": 0.5788, + "step": 36126 + }, + { + "epoch": 0.991954969796815, + "grad_norm": 0.42949825525283813, + "learning_rate": 1.0148565866791062e-05, + "loss": 0.4646, + "step": 36127 + }, + { + "epoch": 0.9919824272377814, + "grad_norm": 0.43114349246025085, + "learning_rate": 1.0148134020910106e-05, + "loss": 0.5511, + "step": 36128 + }, + { + "epoch": 0.992009884678748, + "grad_norm": 0.43313583731651306, + "learning_rate": 1.014770217475283e-05, + "loss": 0.4891, + "step": 36129 + }, + { + "epoch": 0.9920373421197144, + "grad_norm": 0.3908849358558655, + "learning_rate": 1.0147270328320043e-05, + "loss": 0.4475, + "step": 36130 + }, + { + "epoch": 0.9920647995606809, + "grad_norm": 0.3966287672519684, + "learning_rate": 1.014683848161255e-05, + "loss": 0.5204, + "step": 36131 + }, + { + "epoch": 0.9920922570016475, + "grad_norm": 0.5394250750541687, + "learning_rate": 1.0146406634631155e-05, + "loss": 0.5024, + "step": 36132 + }, + { + "epoch": 0.9921197144426139, + "grad_norm": 0.3619430959224701, + "learning_rate": 1.0145974787376667e-05, + "loss": 0.4722, + "step": 36133 + }, + { + "epoch": 0.9921471718835805, + "grad_norm": 0.4126207232475281, + "learning_rate": 1.0145542939849885e-05, + "loss": 0.496, + "step": 36134 + }, + { + "epoch": 0.9921746293245469, + "grad_norm": 0.3828662931919098, + "learning_rate": 1.0145111092051625e-05, + "loss": 0.482, + "step": 36135 + }, + { + "epoch": 0.9922020867655135, + "grad_norm": 0.3600865602493286, + "learning_rate": 1.0144679243982684e-05, + "loss": 0.484, + "step": 36136 + }, + { + "epoch": 0.9922295442064799, + "grad_norm": 0.4285546541213989, + "learning_rate": 1.014424739564387e-05, + "loss": 0.5035, + "step": 36137 + }, + { + "epoch": 0.9922570016474465, + "grad_norm": 0.47009748220443726, + "learning_rate": 1.0143815547035989e-05, + "loss": 0.4958, + "step": 36138 + }, + { + "epoch": 0.992284459088413, + "grad_norm": 0.45229417085647583, + "learning_rate": 1.0143383698159848e-05, + "loss": 0.4866, + "step": 36139 + }, + { + "epoch": 0.9923119165293794, + "grad_norm": 0.4015055000782013, + "learning_rate": 1.0142951849016246e-05, + "loss": 0.5419, + "step": 36140 + }, + { + "epoch": 0.992339373970346, + "grad_norm": 0.3707212209701538, + "learning_rate": 1.0142519999605997e-05, + "loss": 0.4325, + "step": 36141 + }, + { + "epoch": 0.9923668314113124, + "grad_norm": 0.3791162073612213, + "learning_rate": 1.01420881499299e-05, + "loss": 0.3845, + "step": 36142 + }, + { + "epoch": 0.992394288852279, + "grad_norm": 0.36106806993484497, + "learning_rate": 1.0141656299988764e-05, + "loss": 0.3895, + "step": 36143 + }, + { + "epoch": 0.9924217462932454, + "grad_norm": 0.4328170120716095, + "learning_rate": 1.0141224449783395e-05, + "loss": 0.5701, + "step": 36144 + }, + { + "epoch": 0.992449203734212, + "grad_norm": 0.3614431321620941, + "learning_rate": 1.0140792599314597e-05, + "loss": 0.4342, + "step": 36145 + }, + { + "epoch": 0.9924766611751785, + "grad_norm": 0.42663243412971497, + "learning_rate": 1.0140360748583176e-05, + "loss": 0.543, + "step": 36146 + }, + { + "epoch": 0.992504118616145, + "grad_norm": 0.42506906390190125, + "learning_rate": 1.0139928897589937e-05, + "loss": 0.6689, + "step": 36147 + }, + { + "epoch": 0.9925315760571115, + "grad_norm": 0.41736626625061035, + "learning_rate": 1.0139497046335687e-05, + "loss": 0.4992, + "step": 36148 + }, + { + "epoch": 0.992559033498078, + "grad_norm": 0.3969409465789795, + "learning_rate": 1.0139065194821229e-05, + "loss": 0.4601, + "step": 36149 + }, + { + "epoch": 0.9925864909390445, + "grad_norm": 0.3834052085876465, + "learning_rate": 1.0138633343047368e-05, + "loss": 0.518, + "step": 36150 + }, + { + "epoch": 0.9926139483800109, + "grad_norm": 0.36072593927383423, + "learning_rate": 1.0138201491014916e-05, + "loss": 0.4995, + "step": 36151 + }, + { + "epoch": 0.9926414058209775, + "grad_norm": 0.7352675199508667, + "learning_rate": 1.0137769638724672e-05, + "loss": 0.5274, + "step": 36152 + }, + { + "epoch": 0.992668863261944, + "grad_norm": 0.4256822168827057, + "learning_rate": 1.0137337786177443e-05, + "loss": 0.4725, + "step": 36153 + }, + { + "epoch": 0.9926963207029105, + "grad_norm": 0.4190169870853424, + "learning_rate": 1.0136905933374038e-05, + "loss": 0.4936, + "step": 36154 + }, + { + "epoch": 0.992723778143877, + "grad_norm": 0.3602798581123352, + "learning_rate": 1.0136474080315255e-05, + "loss": 0.4094, + "step": 36155 + }, + { + "epoch": 0.9927512355848435, + "grad_norm": 0.4666096568107605, + "learning_rate": 1.0136042227001908e-05, + "loss": 0.5055, + "step": 36156 + }, + { + "epoch": 0.99277869302581, + "grad_norm": 0.3989529311656952, + "learning_rate": 1.01356103734348e-05, + "loss": 0.5336, + "step": 36157 + }, + { + "epoch": 0.9928061504667764, + "grad_norm": 0.3854342997074127, + "learning_rate": 1.013517851961473e-05, + "loss": 0.4392, + "step": 36158 + }, + { + "epoch": 0.992833607907743, + "grad_norm": 0.46753984689712524, + "learning_rate": 1.0134746665542513e-05, + "loss": 0.5098, + "step": 36159 + }, + { + "epoch": 0.9928610653487095, + "grad_norm": 0.43145301938056946, + "learning_rate": 1.0134314811218948e-05, + "loss": 0.4731, + "step": 36160 + }, + { + "epoch": 0.992888522789676, + "grad_norm": 0.3265478312969208, + "learning_rate": 1.0133882956644847e-05, + "loss": 0.3979, + "step": 36161 + }, + { + "epoch": 0.9929159802306425, + "grad_norm": 0.4319513440132141, + "learning_rate": 1.013345110182101e-05, + "loss": 0.5442, + "step": 36162 + }, + { + "epoch": 0.992943437671609, + "grad_norm": 0.4263421297073364, + "learning_rate": 1.0133019246748241e-05, + "loss": 0.4466, + "step": 36163 + }, + { + "epoch": 0.9929708951125755, + "grad_norm": 0.32617318630218506, + "learning_rate": 1.0132587391427352e-05, + "loss": 0.3597, + "step": 36164 + }, + { + "epoch": 0.992998352553542, + "grad_norm": 0.37918949127197266, + "learning_rate": 1.0132155535859142e-05, + "loss": 0.4141, + "step": 36165 + }, + { + "epoch": 0.9930258099945085, + "grad_norm": 0.4149651527404785, + "learning_rate": 1.0131723680044423e-05, + "loss": 0.5364, + "step": 36166 + }, + { + "epoch": 0.9930532674354751, + "grad_norm": 0.39559900760650635, + "learning_rate": 1.0131291823983998e-05, + "loss": 0.4716, + "step": 36167 + }, + { + "epoch": 0.9930807248764415, + "grad_norm": 0.537512481212616, + "learning_rate": 1.0130859967678668e-05, + "loss": 0.4492, + "step": 36168 + }, + { + "epoch": 0.993108182317408, + "grad_norm": 0.47782811522483826, + "learning_rate": 1.0130428111129244e-05, + "loss": 0.4964, + "step": 36169 + }, + { + "epoch": 0.9931356397583745, + "grad_norm": 0.4243136942386627, + "learning_rate": 1.0129996254336532e-05, + "loss": 0.6227, + "step": 36170 + }, + { + "epoch": 0.993163097199341, + "grad_norm": 0.4034324884414673, + "learning_rate": 1.0129564397301333e-05, + "loss": 0.4822, + "step": 36171 + }, + { + "epoch": 0.9931905546403075, + "grad_norm": 0.4176284372806549, + "learning_rate": 1.0129132540024456e-05, + "loss": 0.4898, + "step": 36172 + }, + { + "epoch": 0.993218012081274, + "grad_norm": 0.33006346225738525, + "learning_rate": 1.0128700682506704e-05, + "loss": 0.402, + "step": 36173 + }, + { + "epoch": 0.9932454695222406, + "grad_norm": 0.4284411668777466, + "learning_rate": 1.0128268824748886e-05, + "loss": 0.4758, + "step": 36174 + }, + { + "epoch": 0.993272926963207, + "grad_norm": 0.46221107244491577, + "learning_rate": 1.0127836966751808e-05, + "loss": 0.5044, + "step": 36175 + }, + { + "epoch": 0.9933003844041736, + "grad_norm": 0.4254124164581299, + "learning_rate": 1.0127405108516268e-05, + "loss": 0.4615, + "step": 36176 + }, + { + "epoch": 0.99332784184514, + "grad_norm": 0.36295586824417114, + "learning_rate": 1.012697325004308e-05, + "loss": 0.4409, + "step": 36177 + }, + { + "epoch": 0.9933552992861066, + "grad_norm": 0.42843642830848694, + "learning_rate": 1.0126541391333044e-05, + "loss": 0.456, + "step": 36178 + }, + { + "epoch": 0.993382756727073, + "grad_norm": 0.41248202323913574, + "learning_rate": 1.0126109532386971e-05, + "loss": 0.5275, + "step": 36179 + }, + { + "epoch": 0.9934102141680395, + "grad_norm": 0.3455396890640259, + "learning_rate": 1.0125677673205663e-05, + "loss": 0.4798, + "step": 36180 + }, + { + "epoch": 0.9934376716090061, + "grad_norm": 0.36558184027671814, + "learning_rate": 1.0125245813789924e-05, + "loss": 0.5049, + "step": 36181 + }, + { + "epoch": 0.9934651290499725, + "grad_norm": 0.3933556079864502, + "learning_rate": 1.0124813954140564e-05, + "loss": 0.4592, + "step": 36182 + }, + { + "epoch": 0.9934925864909391, + "grad_norm": 0.44083645939826965, + "learning_rate": 1.0124382094258382e-05, + "loss": 0.5139, + "step": 36183 + }, + { + "epoch": 0.9935200439319055, + "grad_norm": 0.45462659001350403, + "learning_rate": 1.0123950234144193e-05, + "loss": 0.4965, + "step": 36184 + }, + { + "epoch": 0.9935475013728721, + "grad_norm": 0.39680859446525574, + "learning_rate": 1.0123518373798795e-05, + "loss": 0.4676, + "step": 36185 + }, + { + "epoch": 0.9935749588138385, + "grad_norm": 0.4053981304168701, + "learning_rate": 1.0123086513222995e-05, + "loss": 0.5513, + "step": 36186 + }, + { + "epoch": 0.993602416254805, + "grad_norm": 0.4204159677028656, + "learning_rate": 1.01226546524176e-05, + "loss": 0.4387, + "step": 36187 + }, + { + "epoch": 0.9936298736957716, + "grad_norm": 0.38572728633880615, + "learning_rate": 1.0122222791383414e-05, + "loss": 0.4709, + "step": 36188 + }, + { + "epoch": 0.993657331136738, + "grad_norm": 0.37752124667167664, + "learning_rate": 1.0121790930121244e-05, + "loss": 0.4311, + "step": 36189 + }, + { + "epoch": 0.9936847885777046, + "grad_norm": 0.3952014148235321, + "learning_rate": 1.0121359068631895e-05, + "loss": 0.4829, + "step": 36190 + }, + { + "epoch": 0.993712246018671, + "grad_norm": 0.4968498945236206, + "learning_rate": 1.0120927206916173e-05, + "loss": 0.5262, + "step": 36191 + }, + { + "epoch": 0.9937397034596376, + "grad_norm": 0.3493383824825287, + "learning_rate": 1.0120495344974885e-05, + "loss": 0.4727, + "step": 36192 + }, + { + "epoch": 0.993767160900604, + "grad_norm": 0.39503079652786255, + "learning_rate": 1.0120063482808831e-05, + "loss": 0.4455, + "step": 36193 + }, + { + "epoch": 0.9937946183415706, + "grad_norm": 0.3911696970462799, + "learning_rate": 1.0119631620418823e-05, + "loss": 0.4877, + "step": 36194 + }, + { + "epoch": 0.9938220757825371, + "grad_norm": 0.38590800762176514, + "learning_rate": 1.0119199757805662e-05, + "loss": 0.4623, + "step": 36195 + }, + { + "epoch": 0.9938495332235036, + "grad_norm": 0.4091864824295044, + "learning_rate": 1.0118767894970159e-05, + "loss": 0.5051, + "step": 36196 + }, + { + "epoch": 0.9938769906644701, + "grad_norm": 0.3638581335544586, + "learning_rate": 1.0118336031913113e-05, + "loss": 0.4463, + "step": 36197 + }, + { + "epoch": 0.9939044481054365, + "grad_norm": 0.40671268105506897, + "learning_rate": 1.0117904168635331e-05, + "loss": 0.5925, + "step": 36198 + }, + { + "epoch": 0.9939319055464031, + "grad_norm": 0.38568761944770813, + "learning_rate": 1.0117472305137623e-05, + "loss": 0.4989, + "step": 36199 + }, + { + "epoch": 0.9939593629873695, + "grad_norm": 0.434415340423584, + "learning_rate": 1.011704044142079e-05, + "loss": 0.4667, + "step": 36200 + }, + { + "epoch": 0.9939868204283361, + "grad_norm": 0.5220248699188232, + "learning_rate": 1.0116608577485641e-05, + "loss": 0.455, + "step": 36201 + }, + { + "epoch": 0.9940142778693026, + "grad_norm": 0.6947186589241028, + "learning_rate": 1.0116176713332977e-05, + "loss": 0.5103, + "step": 36202 + }, + { + "epoch": 0.9940417353102691, + "grad_norm": 0.4247843325138092, + "learning_rate": 1.0115744848963609e-05, + "loss": 0.4795, + "step": 36203 + }, + { + "epoch": 0.9940691927512356, + "grad_norm": 0.4232196807861328, + "learning_rate": 1.0115312984378341e-05, + "loss": 0.5186, + "step": 36204 + }, + { + "epoch": 0.9940966501922021, + "grad_norm": 0.41762575507164, + "learning_rate": 1.0114881119577972e-05, + "loss": 0.5122, + "step": 36205 + }, + { + "epoch": 0.9941241076331686, + "grad_norm": 0.4278082251548767, + "learning_rate": 1.011444925456332e-05, + "loss": 0.483, + "step": 36206 + }, + { + "epoch": 0.994151565074135, + "grad_norm": 0.3635543882846832, + "learning_rate": 1.0114017389335177e-05, + "loss": 0.5355, + "step": 36207 + }, + { + "epoch": 0.9941790225151016, + "grad_norm": 0.406921923160553, + "learning_rate": 1.0113585523894359e-05, + "loss": 0.5077, + "step": 36208 + }, + { + "epoch": 0.9942064799560681, + "grad_norm": 0.44388648867607117, + "learning_rate": 1.0113153658241668e-05, + "loss": 0.4214, + "step": 36209 + }, + { + "epoch": 0.9942339373970346, + "grad_norm": 0.3634577691555023, + "learning_rate": 1.0112721792377907e-05, + "loss": 0.4434, + "step": 36210 + }, + { + "epoch": 0.9942613948380011, + "grad_norm": 0.37547120451927185, + "learning_rate": 1.0112289926303887e-05, + "loss": 0.4175, + "step": 36211 + }, + { + "epoch": 0.9942888522789676, + "grad_norm": 0.3991980254650116, + "learning_rate": 1.0111858060020408e-05, + "loss": 0.5866, + "step": 36212 + }, + { + "epoch": 0.9943163097199341, + "grad_norm": 0.3588138520717621, + "learning_rate": 1.0111426193528277e-05, + "loss": 0.4717, + "step": 36213 + }, + { + "epoch": 0.9943437671609006, + "grad_norm": 0.40688592195510864, + "learning_rate": 1.0110994326828305e-05, + "loss": 0.5251, + "step": 36214 + }, + { + "epoch": 0.9943712246018671, + "grad_norm": 0.40234455466270447, + "learning_rate": 1.011056245992129e-05, + "loss": 0.4943, + "step": 36215 + }, + { + "epoch": 0.9943986820428337, + "grad_norm": 0.3514452874660492, + "learning_rate": 1.0110130592808041e-05, + "loss": 0.4255, + "step": 36216 + }, + { + "epoch": 0.9944261394838001, + "grad_norm": 0.4270048141479492, + "learning_rate": 1.0109698725489361e-05, + "loss": 0.4849, + "step": 36217 + }, + { + "epoch": 0.9944535969247666, + "grad_norm": 0.40148279070854187, + "learning_rate": 1.0109266857966063e-05, + "loss": 0.5599, + "step": 36218 + }, + { + "epoch": 0.9944810543657331, + "grad_norm": 0.41165629029273987, + "learning_rate": 1.0108834990238944e-05, + "loss": 0.5061, + "step": 36219 + }, + { + "epoch": 0.9945085118066996, + "grad_norm": 0.4055713415145874, + "learning_rate": 1.0108403122308813e-05, + "loss": 0.4016, + "step": 36220 + }, + { + "epoch": 0.9945359692476661, + "grad_norm": 0.4241214394569397, + "learning_rate": 1.0107971254176476e-05, + "loss": 0.4553, + "step": 36221 + }, + { + "epoch": 0.9945634266886326, + "grad_norm": 0.4091799259185791, + "learning_rate": 1.0107539385842738e-05, + "loss": 0.5131, + "step": 36222 + }, + { + "epoch": 0.9945908841295992, + "grad_norm": 0.4368007779121399, + "learning_rate": 1.0107107517308406e-05, + "loss": 0.5082, + "step": 36223 + }, + { + "epoch": 0.9946183415705656, + "grad_norm": 0.3887348473072052, + "learning_rate": 1.0106675648574284e-05, + "loss": 0.4956, + "step": 36224 + }, + { + "epoch": 0.9946457990115322, + "grad_norm": 0.4033813178539276, + "learning_rate": 1.0106243779641176e-05, + "loss": 0.5412, + "step": 36225 + }, + { + "epoch": 0.9946732564524986, + "grad_norm": 0.4595888555049896, + "learning_rate": 1.0105811910509894e-05, + "loss": 0.3616, + "step": 36226 + }, + { + "epoch": 0.9947007138934651, + "grad_norm": 0.34764382243156433, + "learning_rate": 1.0105380041181233e-05, + "loss": 0.4592, + "step": 36227 + }, + { + "epoch": 0.9947281713344316, + "grad_norm": 0.421186238527298, + "learning_rate": 1.0104948171656007e-05, + "loss": 0.55, + "step": 36228 + }, + { + "epoch": 0.9947556287753981, + "grad_norm": 0.4288618564605713, + "learning_rate": 1.0104516301935021e-05, + "loss": 0.5172, + "step": 36229 + }, + { + "epoch": 0.9947830862163647, + "grad_norm": 0.3651333749294281, + "learning_rate": 1.0104084432019077e-05, + "loss": 0.4927, + "step": 36230 + }, + { + "epoch": 0.9948105436573311, + "grad_norm": 0.4012386202812195, + "learning_rate": 1.0103652561908984e-05, + "loss": 0.4281, + "step": 36231 + }, + { + "epoch": 0.9948380010982977, + "grad_norm": 0.4299789071083069, + "learning_rate": 1.0103220691605544e-05, + "loss": 0.5351, + "step": 36232 + }, + { + "epoch": 0.9948654585392641, + "grad_norm": 0.36181849241256714, + "learning_rate": 1.0102788821109563e-05, + "loss": 0.4879, + "step": 36233 + }, + { + "epoch": 0.9948929159802307, + "grad_norm": 0.3969278633594513, + "learning_rate": 1.0102356950421853e-05, + "loss": 0.4628, + "step": 36234 + }, + { + "epoch": 0.9949203734211971, + "grad_norm": 0.44546040892601013, + "learning_rate": 1.010192507954321e-05, + "loss": 0.4661, + "step": 36235 + }, + { + "epoch": 0.9949478308621637, + "grad_norm": 0.438734769821167, + "learning_rate": 1.0101493208474447e-05, + "loss": 0.5115, + "step": 36236 + }, + { + "epoch": 0.9949752883031302, + "grad_norm": 0.40464189648628235, + "learning_rate": 1.0101061337216367e-05, + "loss": 0.4514, + "step": 36237 + }, + { + "epoch": 0.9950027457440966, + "grad_norm": 0.4120367169380188, + "learning_rate": 1.010062946576977e-05, + "loss": 0.4527, + "step": 36238 + }, + { + "epoch": 0.9950302031850632, + "grad_norm": 0.39846712350845337, + "learning_rate": 1.0100197594135473e-05, + "loss": 0.455, + "step": 36239 + }, + { + "epoch": 0.9950576606260296, + "grad_norm": 0.3654295802116394, + "learning_rate": 1.0099765722314271e-05, + "loss": 0.5512, + "step": 36240 + }, + { + "epoch": 0.9950851180669962, + "grad_norm": 0.3982309401035309, + "learning_rate": 1.0099333850306979e-05, + "loss": 0.5415, + "step": 36241 + }, + { + "epoch": 0.9951125755079626, + "grad_norm": 0.40375617146492004, + "learning_rate": 1.0098901978114395e-05, + "loss": 0.445, + "step": 36242 + }, + { + "epoch": 0.9951400329489292, + "grad_norm": 0.44609084725379944, + "learning_rate": 1.0098470105737326e-05, + "loss": 0.5173, + "step": 36243 + }, + { + "epoch": 0.9951674903898957, + "grad_norm": 0.3651413023471832, + "learning_rate": 1.009803823317658e-05, + "loss": 0.4068, + "step": 36244 + }, + { + "epoch": 0.9951949478308622, + "grad_norm": 0.40080156922340393, + "learning_rate": 1.0097606360432966e-05, + "loss": 0.465, + "step": 36245 + }, + { + "epoch": 0.9952224052718287, + "grad_norm": 0.39775973558425903, + "learning_rate": 1.0097174487507278e-05, + "loss": 0.4779, + "step": 36246 + }, + { + "epoch": 0.9952498627127951, + "grad_norm": 0.39224958419799805, + "learning_rate": 1.0096742614400332e-05, + "loss": 0.5699, + "step": 36247 + }, + { + "epoch": 0.9952773201537617, + "grad_norm": 0.40632638335227966, + "learning_rate": 1.0096310741112927e-05, + "loss": 0.4647, + "step": 36248 + }, + { + "epoch": 0.9953047775947281, + "grad_norm": 0.4188957214355469, + "learning_rate": 1.0095878867645876e-05, + "loss": 0.4871, + "step": 36249 + }, + { + "epoch": 0.9953322350356947, + "grad_norm": 0.40434014797210693, + "learning_rate": 1.009544699399998e-05, + "loss": 0.5657, + "step": 36250 + }, + { + "epoch": 0.9953596924766612, + "grad_norm": 0.37849530577659607, + "learning_rate": 1.0095015120176042e-05, + "loss": 0.5226, + "step": 36251 + }, + { + "epoch": 0.9953871499176277, + "grad_norm": 0.454843133687973, + "learning_rate": 1.0094583246174871e-05, + "loss": 0.5727, + "step": 36252 + }, + { + "epoch": 0.9954146073585942, + "grad_norm": 0.4583251178264618, + "learning_rate": 1.0094151371997272e-05, + "loss": 0.5807, + "step": 36253 + }, + { + "epoch": 0.9954420647995607, + "grad_norm": 0.4191831648349762, + "learning_rate": 1.0093719497644052e-05, + "loss": 0.5221, + "step": 36254 + }, + { + "epoch": 0.9954695222405272, + "grad_norm": 0.38216009736061096, + "learning_rate": 1.0093287623116017e-05, + "loss": 0.4617, + "step": 36255 + }, + { + "epoch": 0.9954969796814936, + "grad_norm": 0.45879700779914856, + "learning_rate": 1.0092855748413965e-05, + "loss": 0.4463, + "step": 36256 + }, + { + "epoch": 0.9955244371224602, + "grad_norm": 0.40815335512161255, + "learning_rate": 1.0092423873538712e-05, + "loss": 0.4191, + "step": 36257 + }, + { + "epoch": 0.9955518945634267, + "grad_norm": 0.4069272577762604, + "learning_rate": 1.0091991998491057e-05, + "loss": 0.4581, + "step": 36258 + }, + { + "epoch": 0.9955793520043932, + "grad_norm": 0.3833152949810028, + "learning_rate": 1.009156012327181e-05, + "loss": 0.483, + "step": 36259 + }, + { + "epoch": 0.9956068094453597, + "grad_norm": 0.47492069005966187, + "learning_rate": 1.0091128247881773e-05, + "loss": 0.4847, + "step": 36260 + }, + { + "epoch": 0.9956342668863262, + "grad_norm": 0.41485142707824707, + "learning_rate": 1.009069637232175e-05, + "loss": 0.5146, + "step": 36261 + }, + { + "epoch": 0.9956617243272927, + "grad_norm": 0.40766990184783936, + "learning_rate": 1.0090264496592554e-05, + "loss": 0.4598, + "step": 36262 + }, + { + "epoch": 0.9956891817682592, + "grad_norm": 0.32176411151885986, + "learning_rate": 1.0089832620694983e-05, + "loss": 0.4094, + "step": 36263 + }, + { + "epoch": 0.9957166392092257, + "grad_norm": 0.42441073060035706, + "learning_rate": 1.0089400744629844e-05, + "loss": 0.4891, + "step": 36264 + }, + { + "epoch": 0.9957440966501923, + "grad_norm": 0.38113123178482056, + "learning_rate": 1.0088968868397948e-05, + "loss": 0.465, + "step": 36265 + }, + { + "epoch": 0.9957715540911587, + "grad_norm": 0.4230332672595978, + "learning_rate": 1.0088536992000094e-05, + "loss": 0.4708, + "step": 36266 + }, + { + "epoch": 0.9957990115321252, + "grad_norm": 0.4343395233154297, + "learning_rate": 1.0088105115437092e-05, + "loss": 0.4956, + "step": 36267 + }, + { + "epoch": 0.9958264689730917, + "grad_norm": 0.4333374798297882, + "learning_rate": 1.0087673238709746e-05, + "loss": 0.4784, + "step": 36268 + }, + { + "epoch": 0.9958539264140582, + "grad_norm": 0.45585885643959045, + "learning_rate": 1.0087241361818857e-05, + "loss": 0.5911, + "step": 36269 + }, + { + "epoch": 0.9958813838550247, + "grad_norm": 0.4042471945285797, + "learning_rate": 1.008680948476524e-05, + "loss": 0.506, + "step": 36270 + }, + { + "epoch": 0.9959088412959912, + "grad_norm": 0.41347214579582214, + "learning_rate": 1.0086377607549693e-05, + "loss": 0.4123, + "step": 36271 + }, + { + "epoch": 0.9959362987369578, + "grad_norm": 0.414064884185791, + "learning_rate": 1.0085945730173025e-05, + "loss": 0.4407, + "step": 36272 + }, + { + "epoch": 0.9959637561779242, + "grad_norm": 0.438445508480072, + "learning_rate": 1.0085513852636044e-05, + "loss": 0.4554, + "step": 36273 + }, + { + "epoch": 0.9959912136188908, + "grad_norm": 0.3808782994747162, + "learning_rate": 1.0085081974939548e-05, + "loss": 0.457, + "step": 36274 + }, + { + "epoch": 0.9960186710598572, + "grad_norm": 0.40578538179397583, + "learning_rate": 1.0084650097084348e-05, + "loss": 0.4288, + "step": 36275 + }, + { + "epoch": 0.9960461285008237, + "grad_norm": 0.40050065517425537, + "learning_rate": 1.0084218219071248e-05, + "loss": 0.5175, + "step": 36276 + }, + { + "epoch": 0.9960735859417902, + "grad_norm": 0.3736494183540344, + "learning_rate": 1.0083786340901056e-05, + "loss": 0.5295, + "step": 36277 + }, + { + "epoch": 0.9961010433827567, + "grad_norm": 0.38550063967704773, + "learning_rate": 1.0083354462574576e-05, + "loss": 0.4673, + "step": 36278 + }, + { + "epoch": 0.9961285008237233, + "grad_norm": 0.3951812982559204, + "learning_rate": 1.0082922584092609e-05, + "loss": 0.4422, + "step": 36279 + }, + { + "epoch": 0.9961559582646897, + "grad_norm": 0.3639346659183502, + "learning_rate": 1.0082490705455969e-05, + "loss": 0.4215, + "step": 36280 + }, + { + "epoch": 0.9961834157056563, + "grad_norm": 0.5479905605316162, + "learning_rate": 1.0082058826665457e-05, + "loss": 0.5565, + "step": 36281 + }, + { + "epoch": 0.9962108731466227, + "grad_norm": 0.3765161633491516, + "learning_rate": 1.008162694772188e-05, + "loss": 0.4785, + "step": 36282 + }, + { + "epoch": 0.9962383305875893, + "grad_norm": 0.37133052945137024, + "learning_rate": 1.0081195068626039e-05, + "loss": 0.4974, + "step": 36283 + }, + { + "epoch": 0.9962657880285557, + "grad_norm": 0.40959665179252625, + "learning_rate": 1.0080763189378744e-05, + "loss": 0.4587, + "step": 36284 + }, + { + "epoch": 0.9962932454695222, + "grad_norm": 0.4250530004501343, + "learning_rate": 1.0080331309980801e-05, + "loss": 0.4794, + "step": 36285 + }, + { + "epoch": 0.9963207029104888, + "grad_norm": 0.38587844371795654, + "learning_rate": 1.0079899430433018e-05, + "loss": 0.4384, + "step": 36286 + }, + { + "epoch": 0.9963481603514552, + "grad_norm": 0.4024626612663269, + "learning_rate": 1.0079467550736193e-05, + "loss": 0.5182, + "step": 36287 + }, + { + "epoch": 0.9963756177924218, + "grad_norm": 0.40284988284111023, + "learning_rate": 1.0079035670891137e-05, + "loss": 0.4743, + "step": 36288 + }, + { + "epoch": 0.9964030752333882, + "grad_norm": 0.3785414397716522, + "learning_rate": 1.0078603790898651e-05, + "loss": 0.4408, + "step": 36289 + }, + { + "epoch": 0.9964305326743548, + "grad_norm": 0.34609872102737427, + "learning_rate": 1.0078171910759549e-05, + "loss": 0.3987, + "step": 36290 + }, + { + "epoch": 0.9964579901153212, + "grad_norm": 0.38752779364585876, + "learning_rate": 1.007774003047463e-05, + "loss": 0.4381, + "step": 36291 + }, + { + "epoch": 0.9964854475562878, + "grad_norm": 0.42407986521720886, + "learning_rate": 1.0077308150044698e-05, + "loss": 0.5102, + "step": 36292 + }, + { + "epoch": 0.9965129049972542, + "grad_norm": 0.3780902922153473, + "learning_rate": 1.0076876269470565e-05, + "loss": 0.4051, + "step": 36293 + }, + { + "epoch": 0.9965403624382208, + "grad_norm": 0.31430232524871826, + "learning_rate": 1.0076444388753033e-05, + "loss": 0.4139, + "step": 36294 + }, + { + "epoch": 0.9965678198791873, + "grad_norm": 0.3889758288860321, + "learning_rate": 1.0076012507892904e-05, + "loss": 0.5418, + "step": 36295 + }, + { + "epoch": 0.9965952773201537, + "grad_norm": 0.38234633207321167, + "learning_rate": 1.0075580626890993e-05, + "loss": 0.4656, + "step": 36296 + }, + { + "epoch": 0.9966227347611203, + "grad_norm": 0.5597728490829468, + "learning_rate": 1.0075148745748096e-05, + "loss": 0.4174, + "step": 36297 + }, + { + "epoch": 0.9966501922020867, + "grad_norm": 0.4654043912887573, + "learning_rate": 1.0074716864465023e-05, + "loss": 0.5544, + "step": 36298 + }, + { + "epoch": 0.9966776496430533, + "grad_norm": 0.3833499848842621, + "learning_rate": 1.0074284983042582e-05, + "loss": 0.4713, + "step": 36299 + }, + { + "epoch": 0.9967051070840197, + "grad_norm": 0.4033399224281311, + "learning_rate": 1.0073853101481572e-05, + "loss": 0.4941, + "step": 36300 + }, + { + "epoch": 0.9967325645249863, + "grad_norm": 0.4356842637062073, + "learning_rate": 1.0073421219782803e-05, + "loss": 0.5201, + "step": 36301 + }, + { + "epoch": 0.9967600219659528, + "grad_norm": 0.3896835148334503, + "learning_rate": 1.007298933794708e-05, + "loss": 0.4783, + "step": 36302 + }, + { + "epoch": 0.9967874794069193, + "grad_norm": 0.5673554539680481, + "learning_rate": 1.0072557455975211e-05, + "loss": 0.4994, + "step": 36303 + }, + { + "epoch": 0.9968149368478858, + "grad_norm": 0.484427809715271, + "learning_rate": 1.0072125573868e-05, + "loss": 0.5901, + "step": 36304 + }, + { + "epoch": 0.9968423942888522, + "grad_norm": 0.36587032675743103, + "learning_rate": 1.0071693691626247e-05, + "loss": 0.4668, + "step": 36305 + }, + { + "epoch": 0.9968698517298188, + "grad_norm": 0.5702548027038574, + "learning_rate": 1.0071261809250768e-05, + "loss": 0.4935, + "step": 36306 + }, + { + "epoch": 0.9968973091707852, + "grad_norm": 0.4154992401599884, + "learning_rate": 1.0070829926742359e-05, + "loss": 0.4088, + "step": 36307 + }, + { + "epoch": 0.9969247666117518, + "grad_norm": 0.37238091230392456, + "learning_rate": 1.007039804410183e-05, + "loss": 0.5457, + "step": 36308 + }, + { + "epoch": 0.9969522240527183, + "grad_norm": 0.3797541558742523, + "learning_rate": 1.0069966161329988e-05, + "loss": 0.519, + "step": 36309 + }, + { + "epoch": 0.9969796814936848, + "grad_norm": 0.3842678368091583, + "learning_rate": 1.0069534278427635e-05, + "loss": 0.4812, + "step": 36310 + }, + { + "epoch": 0.9970071389346513, + "grad_norm": 0.34366562962532043, + "learning_rate": 1.006910239539558e-05, + "loss": 0.4585, + "step": 36311 + }, + { + "epoch": 0.9970345963756178, + "grad_norm": 0.4334462583065033, + "learning_rate": 1.0068670512234627e-05, + "loss": 0.5304, + "step": 36312 + }, + { + "epoch": 0.9970620538165843, + "grad_norm": 0.4129803478717804, + "learning_rate": 1.0068238628945579e-05, + "loss": 0.5031, + "step": 36313 + }, + { + "epoch": 0.9970895112575507, + "grad_norm": 0.40476194024086, + "learning_rate": 1.0067806745529245e-05, + "loss": 0.5306, + "step": 36314 + }, + { + "epoch": 0.9971169686985173, + "grad_norm": 0.41483527421951294, + "learning_rate": 1.006737486198643e-05, + "loss": 0.5017, + "step": 36315 + }, + { + "epoch": 0.9971444261394838, + "grad_norm": 0.4459932744503021, + "learning_rate": 1.0066942978317941e-05, + "loss": 0.5264, + "step": 36316 + }, + { + "epoch": 0.9971718835804503, + "grad_norm": 0.37012356519699097, + "learning_rate": 1.0066511094524582e-05, + "loss": 0.4977, + "step": 36317 + }, + { + "epoch": 0.9971993410214168, + "grad_norm": 0.5116080045700073, + "learning_rate": 1.0066079210607156e-05, + "loss": 0.4271, + "step": 36318 + }, + { + "epoch": 0.9972267984623833, + "grad_norm": 0.35533830523490906, + "learning_rate": 1.0065647326566473e-05, + "loss": 0.5219, + "step": 36319 + }, + { + "epoch": 0.9972542559033498, + "grad_norm": 0.4461471736431122, + "learning_rate": 1.0065215442403336e-05, + "loss": 0.5806, + "step": 36320 + }, + { + "epoch": 0.9972817133443163, + "grad_norm": 0.4027935266494751, + "learning_rate": 1.0064783558118551e-05, + "loss": 0.5011, + "step": 36321 + }, + { + "epoch": 0.9973091707852828, + "grad_norm": 0.32781893014907837, + "learning_rate": 1.0064351673712927e-05, + "loss": 0.457, + "step": 36322 + }, + { + "epoch": 0.9973366282262494, + "grad_norm": 0.46176549792289734, + "learning_rate": 1.0063919789187262e-05, + "loss": 0.5338, + "step": 36323 + }, + { + "epoch": 0.9973640856672158, + "grad_norm": 0.38303858041763306, + "learning_rate": 1.006348790454237e-05, + "loss": 0.4587, + "step": 36324 + }, + { + "epoch": 0.9973915431081823, + "grad_norm": 0.39849427342414856, + "learning_rate": 1.0063056019779051e-05, + "loss": 0.4359, + "step": 36325 + }, + { + "epoch": 0.9974190005491488, + "grad_norm": 0.3855247497558594, + "learning_rate": 1.0062624134898113e-05, + "loss": 0.52, + "step": 36326 + }, + { + "epoch": 0.9974464579901153, + "grad_norm": 0.3778969645500183, + "learning_rate": 1.0062192249900362e-05, + "loss": 0.4372, + "step": 36327 + }, + { + "epoch": 0.9974739154310818, + "grad_norm": 0.4757137596607208, + "learning_rate": 1.0061760364786599e-05, + "loss": 0.4953, + "step": 36328 + }, + { + "epoch": 0.9975013728720483, + "grad_norm": 0.4316854178905487, + "learning_rate": 1.0061328479557637e-05, + "loss": 0.4554, + "step": 36329 + }, + { + "epoch": 0.9975288303130149, + "grad_norm": 0.3986954391002655, + "learning_rate": 1.0060896594214278e-05, + "loss": 0.4779, + "step": 36330 + }, + { + "epoch": 0.9975562877539813, + "grad_norm": 0.4001389741897583, + "learning_rate": 1.0060464708757322e-05, + "loss": 0.5237, + "step": 36331 + }, + { + "epoch": 0.9975837451949479, + "grad_norm": 0.37973204255104065, + "learning_rate": 1.0060032823187587e-05, + "loss": 0.4825, + "step": 36332 + }, + { + "epoch": 0.9976112026359143, + "grad_norm": 0.37401750683784485, + "learning_rate": 1.0059600937505867e-05, + "loss": 0.4802, + "step": 36333 + }, + { + "epoch": 0.9976386600768808, + "grad_norm": 0.39543402194976807, + "learning_rate": 1.0059169051712973e-05, + "loss": 0.53, + "step": 36334 + }, + { + "epoch": 0.9976661175178473, + "grad_norm": 0.44876113533973694, + "learning_rate": 1.0058737165809713e-05, + "loss": 0.532, + "step": 36335 + }, + { + "epoch": 0.9976935749588138, + "grad_norm": 0.36361831426620483, + "learning_rate": 1.0058305279796885e-05, + "loss": 0.4945, + "step": 36336 + }, + { + "epoch": 0.9977210323997804, + "grad_norm": 0.442708820104599, + "learning_rate": 1.0057873393675301e-05, + "loss": 0.4277, + "step": 36337 + }, + { + "epoch": 0.9977484898407468, + "grad_norm": 0.41339465975761414, + "learning_rate": 1.0057441507445765e-05, + "loss": 0.4916, + "step": 36338 + }, + { + "epoch": 0.9977759472817134, + "grad_norm": 0.38590776920318604, + "learning_rate": 1.0057009621109082e-05, + "loss": 0.4657, + "step": 36339 + }, + { + "epoch": 0.9978034047226798, + "grad_norm": 0.46026915311813354, + "learning_rate": 1.0056577734666058e-05, + "loss": 0.4524, + "step": 36340 + }, + { + "epoch": 0.9978308621636464, + "grad_norm": 0.4422514736652374, + "learning_rate": 1.0056145848117498e-05, + "loss": 0.3864, + "step": 36341 + }, + { + "epoch": 0.9978583196046128, + "grad_norm": 0.400076687335968, + "learning_rate": 1.0055713961464208e-05, + "loss": 0.4821, + "step": 36342 + }, + { + "epoch": 0.9978857770455793, + "grad_norm": 0.3751876950263977, + "learning_rate": 1.0055282074706996e-05, + "loss": 0.4784, + "step": 36343 + }, + { + "epoch": 0.9979132344865459, + "grad_norm": 0.4664754271507263, + "learning_rate": 1.005485018784666e-05, + "loss": 0.5275, + "step": 36344 + }, + { + "epoch": 0.9979406919275123, + "grad_norm": 0.44040030241012573, + "learning_rate": 1.0054418300884017e-05, + "loss": 0.5284, + "step": 36345 + }, + { + "epoch": 0.9979681493684789, + "grad_norm": 0.38531965017318726, + "learning_rate": 1.0053986413819861e-05, + "loss": 0.4901, + "step": 36346 + }, + { + "epoch": 0.9979956068094453, + "grad_norm": 0.5174368619918823, + "learning_rate": 1.0053554526655007e-05, + "loss": 0.5175, + "step": 36347 + }, + { + "epoch": 0.9980230642504119, + "grad_norm": 0.41888707876205444, + "learning_rate": 1.0053122639390256e-05, + "loss": 0.414, + "step": 36348 + }, + { + "epoch": 0.9980505216913783, + "grad_norm": 0.35921308398246765, + "learning_rate": 1.0052690752026412e-05, + "loss": 0.4401, + "step": 36349 + }, + { + "epoch": 0.9980779791323449, + "grad_norm": 0.38348913192749023, + "learning_rate": 1.0052258864564287e-05, + "loss": 0.4988, + "step": 36350 + }, + { + "epoch": 0.9981054365733114, + "grad_norm": 0.3870282769203186, + "learning_rate": 1.005182697700468e-05, + "loss": 0.4242, + "step": 36351 + }, + { + "epoch": 0.9981328940142778, + "grad_norm": 0.3701067864894867, + "learning_rate": 1.00513950893484e-05, + "loss": 0.4714, + "step": 36352 + }, + { + "epoch": 0.9981603514552444, + "grad_norm": 0.3389052748680115, + "learning_rate": 1.0050963201596252e-05, + "loss": 0.4141, + "step": 36353 + }, + { + "epoch": 0.9981878088962108, + "grad_norm": 0.48538637161254883, + "learning_rate": 1.005053131374904e-05, + "loss": 0.607, + "step": 36354 + }, + { + "epoch": 0.9982152663371774, + "grad_norm": 0.4162866175174713, + "learning_rate": 1.0050099425807572e-05, + "loss": 0.4125, + "step": 36355 + }, + { + "epoch": 0.9982427237781438, + "grad_norm": 0.41572287678718567, + "learning_rate": 1.0049667537772652e-05, + "loss": 0.4966, + "step": 36356 + }, + { + "epoch": 0.9982701812191104, + "grad_norm": 0.39643585681915283, + "learning_rate": 1.0049235649645085e-05, + "loss": 0.5507, + "step": 36357 + }, + { + "epoch": 0.9982976386600769, + "grad_norm": 0.38855525851249695, + "learning_rate": 1.0048803761425678e-05, + "loss": 0.5656, + "step": 36358 + }, + { + "epoch": 0.9983250961010434, + "grad_norm": 0.37052029371261597, + "learning_rate": 1.0048371873115237e-05, + "loss": 0.5124, + "step": 36359 + }, + { + "epoch": 0.9983525535420099, + "grad_norm": 0.43298953771591187, + "learning_rate": 1.0047939984714569e-05, + "loss": 0.4407, + "step": 36360 + }, + { + "epoch": 0.9983800109829764, + "grad_norm": 0.4165111482143402, + "learning_rate": 1.0047508096224476e-05, + "loss": 0.5203, + "step": 36361 + }, + { + "epoch": 0.9984074684239429, + "grad_norm": 0.3698457181453705, + "learning_rate": 1.0047076207645765e-05, + "loss": 0.4831, + "step": 36362 + }, + { + "epoch": 0.9984349258649093, + "grad_norm": 0.3959658443927765, + "learning_rate": 1.0046644318979243e-05, + "loss": 0.4941, + "step": 36363 + }, + { + "epoch": 0.9984623833058759, + "grad_norm": 0.36237844824790955, + "learning_rate": 1.0046212430225713e-05, + "loss": 0.4455, + "step": 36364 + }, + { + "epoch": 0.9984898407468424, + "grad_norm": 0.38887614011764526, + "learning_rate": 1.0045780541385984e-05, + "loss": 0.4703, + "step": 36365 + }, + { + "epoch": 0.9985172981878089, + "grad_norm": 1.2514432668685913, + "learning_rate": 1.0045348652460856e-05, + "loss": 0.4946, + "step": 36366 + }, + { + "epoch": 0.9985447556287754, + "grad_norm": 0.44012251496315, + "learning_rate": 1.0044916763451143e-05, + "loss": 0.5305, + "step": 36367 + }, + { + "epoch": 0.9985722130697419, + "grad_norm": 0.4713853597640991, + "learning_rate": 1.0044484874357644e-05, + "loss": 0.6037, + "step": 36368 + }, + { + "epoch": 0.9985996705107084, + "grad_norm": 0.3711102604866028, + "learning_rate": 1.0044052985181163e-05, + "loss": 0.4534, + "step": 36369 + }, + { + "epoch": 0.9986271279516749, + "grad_norm": 0.3874654173851013, + "learning_rate": 1.0043621095922513e-05, + "loss": 0.5175, + "step": 36370 + }, + { + "epoch": 0.9986545853926414, + "grad_norm": 0.46532925963401794, + "learning_rate": 1.0043189206582495e-05, + "loss": 0.4821, + "step": 36371 + }, + { + "epoch": 0.998682042833608, + "grad_norm": 0.4072956144809723, + "learning_rate": 1.0042757317161914e-05, + "loss": 0.5053, + "step": 36372 + }, + { + "epoch": 0.9987095002745744, + "grad_norm": 0.361330509185791, + "learning_rate": 1.004232542766158e-05, + "loss": 0.4117, + "step": 36373 + }, + { + "epoch": 0.9987369577155409, + "grad_norm": 0.36337369680404663, + "learning_rate": 1.0041893538082295e-05, + "loss": 0.4027, + "step": 36374 + }, + { + "epoch": 0.9987644151565074, + "grad_norm": 0.45273157954216003, + "learning_rate": 1.0041461648424863e-05, + "loss": 0.5203, + "step": 36375 + }, + { + "epoch": 0.9987918725974739, + "grad_norm": 0.40028098225593567, + "learning_rate": 1.0041029758690093e-05, + "loss": 0.4689, + "step": 36376 + }, + { + "epoch": 0.9988193300384404, + "grad_norm": 0.3817903399467468, + "learning_rate": 1.004059786887879e-05, + "loss": 0.4809, + "step": 36377 + }, + { + "epoch": 0.9988467874794069, + "grad_norm": 0.47052136063575745, + "learning_rate": 1.0040165978991758e-05, + "loss": 0.4983, + "step": 36378 + }, + { + "epoch": 0.9988742449203735, + "grad_norm": 0.3913911283016205, + "learning_rate": 1.0039734089029803e-05, + "loss": 0.4402, + "step": 36379 + }, + { + "epoch": 0.9989017023613399, + "grad_norm": 0.3843168616294861, + "learning_rate": 1.0039302198993733e-05, + "loss": 0.4578, + "step": 36380 + }, + { + "epoch": 0.9989291598023065, + "grad_norm": 0.4350765347480774, + "learning_rate": 1.003887030888435e-05, + "loss": 0.4994, + "step": 36381 + }, + { + "epoch": 0.9989566172432729, + "grad_norm": 0.3930009603500366, + "learning_rate": 1.0038438418702463e-05, + "loss": 0.4973, + "step": 36382 + }, + { + "epoch": 0.9989840746842394, + "grad_norm": 0.3666399121284485, + "learning_rate": 1.0038006528448874e-05, + "loss": 0.441, + "step": 36383 + }, + { + "epoch": 0.9990115321252059, + "grad_norm": 0.3789503276348114, + "learning_rate": 1.0037574638124393e-05, + "loss": 0.5825, + "step": 36384 + }, + { + "epoch": 0.9990389895661724, + "grad_norm": 0.44380834698677063, + "learning_rate": 1.0037142747729824e-05, + "loss": 0.5408, + "step": 36385 + }, + { + "epoch": 0.999066447007139, + "grad_norm": 0.3423822820186615, + "learning_rate": 1.0036710857265968e-05, + "loss": 0.4424, + "step": 36386 + }, + { + "epoch": 0.9990939044481054, + "grad_norm": 0.38936883211135864, + "learning_rate": 1.003627896673364e-05, + "loss": 0.476, + "step": 36387 + }, + { + "epoch": 0.999121361889072, + "grad_norm": 0.47787803411483765, + "learning_rate": 1.0035847076133634e-05, + "loss": 0.4628, + "step": 36388 + }, + { + "epoch": 0.9991488193300384, + "grad_norm": 0.41769999265670776, + "learning_rate": 1.0035415185466766e-05, + "loss": 0.5046, + "step": 36389 + }, + { + "epoch": 0.999176276771005, + "grad_norm": 0.4646648168563843, + "learning_rate": 1.0034983294733838e-05, + "loss": 0.4007, + "step": 36390 + }, + { + "epoch": 0.9992037342119714, + "grad_norm": 0.41069120168685913, + "learning_rate": 1.003455140393565e-05, + "loss": 0.4588, + "step": 36391 + }, + { + "epoch": 0.999231191652938, + "grad_norm": 0.3705097436904907, + "learning_rate": 1.0034119513073018e-05, + "loss": 0.4533, + "step": 36392 + }, + { + "epoch": 0.9992586490939045, + "grad_norm": 0.3991020917892456, + "learning_rate": 1.003368762214674e-05, + "loss": 0.4932, + "step": 36393 + }, + { + "epoch": 0.9992861065348709, + "grad_norm": 0.48547086119651794, + "learning_rate": 1.0033255731157623e-05, + "loss": 0.4951, + "step": 36394 + }, + { + "epoch": 0.9993135639758375, + "grad_norm": 0.3986724019050598, + "learning_rate": 1.0032823840106475e-05, + "loss": 0.493, + "step": 36395 + }, + { + "epoch": 0.9993410214168039, + "grad_norm": 0.3785673975944519, + "learning_rate": 1.0032391948994097e-05, + "loss": 0.5354, + "step": 36396 + }, + { + "epoch": 0.9993684788577705, + "grad_norm": 0.4470888078212738, + "learning_rate": 1.0031960057821302e-05, + "loss": 0.5193, + "step": 36397 + }, + { + "epoch": 0.9993959362987369, + "grad_norm": 0.4191124141216278, + "learning_rate": 1.0031528166588889e-05, + "loss": 0.5244, + "step": 36398 + }, + { + "epoch": 0.9994233937397035, + "grad_norm": 0.36923474073410034, + "learning_rate": 1.0031096275297667e-05, + "loss": 0.4779, + "step": 36399 + }, + { + "epoch": 0.99945085118067, + "grad_norm": 0.47648778557777405, + "learning_rate": 1.003066438394844e-05, + "loss": 0.5157, + "step": 36400 + }, + { + "epoch": 0.9994783086216364, + "grad_norm": 0.36764925718307495, + "learning_rate": 1.0030232492542014e-05, + "loss": 0.4237, + "step": 36401 + }, + { + "epoch": 0.999505766062603, + "grad_norm": 0.44600096344947815, + "learning_rate": 1.0029800601079194e-05, + "loss": 0.3947, + "step": 36402 + }, + { + "epoch": 0.9995332235035694, + "grad_norm": 0.43304529786109924, + "learning_rate": 1.0029368709560787e-05, + "loss": 0.4483, + "step": 36403 + }, + { + "epoch": 0.999560680944536, + "grad_norm": 0.36925169825553894, + "learning_rate": 1.0028936817987598e-05, + "loss": 0.4596, + "step": 36404 + }, + { + "epoch": 0.9995881383855024, + "grad_norm": 0.3833598792552948, + "learning_rate": 1.0028504926360434e-05, + "loss": 0.4519, + "step": 36405 + }, + { + "epoch": 0.999615595826469, + "grad_norm": 0.43140727281570435, + "learning_rate": 1.0028073034680096e-05, + "loss": 0.5082, + "step": 36406 + }, + { + "epoch": 0.9996430532674355, + "grad_norm": 0.43111392855644226, + "learning_rate": 1.0027641142947395e-05, + "loss": 0.5655, + "step": 36407 + }, + { + "epoch": 0.999670510708402, + "grad_norm": 0.48607900738716125, + "learning_rate": 1.0027209251163135e-05, + "loss": 0.524, + "step": 36408 + }, + { + "epoch": 0.9996979681493685, + "grad_norm": 0.448324978351593, + "learning_rate": 1.002677735932812e-05, + "loss": 0.4694, + "step": 36409 + }, + { + "epoch": 0.999725425590335, + "grad_norm": 0.44446954131126404, + "learning_rate": 1.0026345467443157e-05, + "loss": 0.5283, + "step": 36410 + }, + { + "epoch": 0.9997528830313015, + "grad_norm": 0.4085082709789276, + "learning_rate": 1.002591357550905e-05, + "loss": 0.51, + "step": 36411 + }, + { + "epoch": 0.9997803404722679, + "grad_norm": 0.4036116600036621, + "learning_rate": 1.0025481683526609e-05, + "loss": 0.5039, + "step": 36412 + }, + { + "epoch": 0.9998077979132345, + "grad_norm": 0.4005512297153473, + "learning_rate": 1.0025049791496633e-05, + "loss": 0.4695, + "step": 36413 + }, + { + "epoch": 0.999835255354201, + "grad_norm": 0.4023047387599945, + "learning_rate": 1.0024617899419933e-05, + "loss": 0.4625, + "step": 36414 + }, + { + "epoch": 0.9998627127951675, + "grad_norm": 0.40903687477111816, + "learning_rate": 1.0024186007297314e-05, + "loss": 0.424, + "step": 36415 + }, + { + "epoch": 0.999890170236134, + "grad_norm": 0.3974151313304901, + "learning_rate": 1.0023754115129578e-05, + "loss": 0.4182, + "step": 36416 + }, + { + "epoch": 0.9999176276771005, + "grad_norm": 0.4098784923553467, + "learning_rate": 1.0023322222917533e-05, + "loss": 0.4391, + "step": 36417 + }, + { + "epoch": 0.999945085118067, + "grad_norm": 0.36589181423187256, + "learning_rate": 1.0022890330661987e-05, + "loss": 0.3978, + "step": 36418 + }, + { + "epoch": 0.9999725425590335, + "grad_norm": 0.587020754814148, + "learning_rate": 1.0022458438363741e-05, + "loss": 0.4859, + "step": 36419 + }, + { + "epoch": 1.0, + "grad_norm": 0.4352031946182251, + "learning_rate": 1.0022026546023604e-05, + "loss": 0.5471, + "step": 36420 + }, + { + "epoch": 1.0, + "eval_loss": 0.22710177302360535, + "eval_runtime": 189.9737, + "eval_samples_per_second": 126.328, + "eval_steps_per_second": 15.792, + "step": 36420 + } + ], + "logging_steps": 1, + "max_steps": 72840, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3.491414303730932e+20, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +}