| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.9943851768669285, | |
| "eval_steps": 500, | |
| "global_step": 333, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.008983717012914094, | |
| "grad_norm": 5.839109932109842, | |
| "learning_rate": 1.663780588235294e-06, | |
| "loss": 0.851, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.017967434025828188, | |
| "grad_norm": 5.796557129672097, | |
| "learning_rate": 3.327561176470588e-06, | |
| "loss": 0.8589, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.02695115103874228, | |
| "grad_norm": 5.7589149287471075, | |
| "learning_rate": 4.991341764705883e-06, | |
| "loss": 0.8612, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.035934868051656375, | |
| "grad_norm": 4.220444230457979, | |
| "learning_rate": 6.655122352941176e-06, | |
| "loss": 0.8097, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.044918585064570464, | |
| "grad_norm": 2.2279497783130267, | |
| "learning_rate": 8.318902941176471e-06, | |
| "loss": 0.7672, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.05390230207748456, | |
| "grad_norm": 3.814533547249801, | |
| "learning_rate": 9.982683529411766e-06, | |
| "loss": 0.7561, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.06288601909039865, | |
| "grad_norm": 4.0811292825217835, | |
| "learning_rate": 1.1646464117647058e-05, | |
| "loss": 0.76, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.07186973610331275, | |
| "grad_norm": 3.672264516349975, | |
| "learning_rate": 1.3310244705882352e-05, | |
| "loss": 0.7142, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.08085345311622684, | |
| "grad_norm": 3.4788052125571918, | |
| "learning_rate": 1.4974025294117648e-05, | |
| "loss": 0.7167, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.08983717012914093, | |
| "grad_norm": 2.107646195472596, | |
| "learning_rate": 1.6637805882352943e-05, | |
| "loss": 0.6916, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.09882088714205503, | |
| "grad_norm": 2.3107123413459094, | |
| "learning_rate": 1.8301586470588235e-05, | |
| "loss": 0.6619, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.10780460415496912, | |
| "grad_norm": 1.97063494813433, | |
| "learning_rate": 1.996536705882353e-05, | |
| "loss": 0.651, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.11678832116788321, | |
| "grad_norm": 1.172538415212184, | |
| "learning_rate": 2.1629147647058824e-05, | |
| "loss": 0.6292, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.1257720381807973, | |
| "grad_norm": 1.425434087428503, | |
| "learning_rate": 2.3292928235294116e-05, | |
| "loss": 0.624, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.13475575519371139, | |
| "grad_norm": 1.0298093152751933, | |
| "learning_rate": 2.4956708823529412e-05, | |
| "loss": 0.6177, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.1437394722066255, | |
| "grad_norm": 0.9184264462076409, | |
| "learning_rate": 2.6620489411764705e-05, | |
| "loss": 0.6094, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.1527231892195396, | |
| "grad_norm": 0.9876447317203728, | |
| "learning_rate": 2.828427e-05, | |
| "loss": 0.5913, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.16170690623245368, | |
| "grad_norm": 0.9186614069420004, | |
| "learning_rate": 2.9948050588235297e-05, | |
| "loss": 0.5853, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.17069062324536777, | |
| "grad_norm": 0.9898980015818278, | |
| "learning_rate": 3.161183117647059e-05, | |
| "loss": 0.5837, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.17967434025828186, | |
| "grad_norm": 1.0813008236428436, | |
| "learning_rate": 3.3275611764705885e-05, | |
| "loss": 0.5826, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.18865805727119594, | |
| "grad_norm": 1.1477562316475312, | |
| "learning_rate": 3.493939235294118e-05, | |
| "loss": 0.5722, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.19764177428411006, | |
| "grad_norm": 0.8464148409927464, | |
| "learning_rate": 3.660317294117647e-05, | |
| "loss": 0.5673, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.20662549129702415, | |
| "grad_norm": 0.7702840400962941, | |
| "learning_rate": 3.826695352941177e-05, | |
| "loss": 0.5588, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.21560920830993824, | |
| "grad_norm": 1.1144594493299889, | |
| "learning_rate": 3.993073411764706e-05, | |
| "loss": 0.5584, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.22459292532285233, | |
| "grad_norm": 1.1476160682417311, | |
| "learning_rate": 4.1594514705882355e-05, | |
| "loss": 0.5569, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.23357664233576642, | |
| "grad_norm": 1.1535268292624339, | |
| "learning_rate": 4.325829529411765e-05, | |
| "loss": 0.5553, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.2425603593486805, | |
| "grad_norm": 1.5110809161997512, | |
| "learning_rate": 4.492207588235294e-05, | |
| "loss": 0.554, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.2515440763615946, | |
| "grad_norm": 1.1832182446871022, | |
| "learning_rate": 4.658585647058823e-05, | |
| "loss": 0.5421, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.2605277933745087, | |
| "grad_norm": 1.1859526315720494, | |
| "learning_rate": 4.824963705882353e-05, | |
| "loss": 0.5413, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.26951151038742277, | |
| "grad_norm": 1.5427900149672942, | |
| "learning_rate": 4.9913417647058825e-05, | |
| "loss": 0.5447, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.2784952274003369, | |
| "grad_norm": 1.1712836835670162, | |
| "learning_rate": 5.157719823529412e-05, | |
| "loss": 0.5346, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.287478944413251, | |
| "grad_norm": 1.3657285701127917, | |
| "learning_rate": 5.324097882352941e-05, | |
| "loss": 0.54, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.29646266142616506, | |
| "grad_norm": 1.401113516783269, | |
| "learning_rate": 5.490475941176471e-05, | |
| "loss": 0.5272, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.3054463784390792, | |
| "grad_norm": 1.283015659035394, | |
| "learning_rate": 5.656854e-05, | |
| "loss": 0.5269, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.31443009545199324, | |
| "grad_norm": 1.1625204652087118, | |
| "learning_rate": 5.65669787647368e-05, | |
| "loss": 0.5254, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.32341381246490736, | |
| "grad_norm": 1.9389962167077017, | |
| "learning_rate": 5.6562295231301345e-05, | |
| "loss": 0.5325, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.3323975294778215, | |
| "grad_norm": 1.2095439312643286, | |
| "learning_rate": 5.655448991673705e-05, | |
| "loss": 0.5247, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.34138124649073553, | |
| "grad_norm": 2.1581962123054512, | |
| "learning_rate": 5.6543563682719456e-05, | |
| "loss": 0.5301, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.35036496350364965, | |
| "grad_norm": 1.645168282852934, | |
| "learning_rate": 5.652951773546123e-05, | |
| "loss": 0.5251, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.3593486805165637, | |
| "grad_norm": 1.7810864857463629, | |
| "learning_rate": 5.651235362557887e-05, | |
| "loss": 0.529, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.36833239752947783, | |
| "grad_norm": 1.3721422433398012, | |
| "learning_rate": 5.649207324792161e-05, | |
| "loss": 0.5263, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.3773161145423919, | |
| "grad_norm": 1.7315681950487964, | |
| "learning_rate": 5.646867884136221e-05, | |
| "loss": 0.5233, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.386299831555306, | |
| "grad_norm": 1.2328449836692366, | |
| "learning_rate": 5.644217298854978e-05, | |
| "loss": 0.5121, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.3952835485682201, | |
| "grad_norm": 1.688017129833438, | |
| "learning_rate": 5.641255861562469e-05, | |
| "loss": 0.5076, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.4042672655811342, | |
| "grad_norm": 1.3793575135144216, | |
| "learning_rate": 5.637983899189553e-05, | |
| "loss": 0.5178, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.4132509825940483, | |
| "grad_norm": 1.1818050497678425, | |
| "learning_rate": 5.634401772947817e-05, | |
| "loss": 0.5099, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.42223469960696236, | |
| "grad_norm": 1.334664385713327, | |
| "learning_rate": 5.630509878289701e-05, | |
| "loss": 0.5125, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.4312184166198765, | |
| "grad_norm": 1.1824189144503987, | |
| "learning_rate": 5.626308644864844e-05, | |
| "loss": 0.5035, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.4402021336327906, | |
| "grad_norm": 1.8310997335960193, | |
| "learning_rate": 5.621798536472649e-05, | |
| "loss": 0.5129, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.44918585064570465, | |
| "grad_norm": 0.9498119984179156, | |
| "learning_rate": 5.616980051011082e-05, | |
| "loss": 0.4933, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.45816956765861877, | |
| "grad_norm": 1.3738927247826827, | |
| "learning_rate": 5.611853720421709e-05, | |
| "loss": 0.5114, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.46715328467153283, | |
| "grad_norm": 1.3239245607536279, | |
| "learning_rate": 5.606420110630966e-05, | |
| "loss": 0.5051, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.47613700168444695, | |
| "grad_norm": 1.3491292246681823, | |
| "learning_rate": 5.6006798214876905e-05, | |
| "loss": 0.4984, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.485120718697361, | |
| "grad_norm": 1.556526288137193, | |
| "learning_rate": 5.5946334866968935e-05, | |
| "loss": 0.5057, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.4941044357102751, | |
| "grad_norm": 0.8396609232112651, | |
| "learning_rate": 5.5882817737498054e-05, | |
| "loss": 0.5067, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.5030881527231892, | |
| "grad_norm": 1.3086549421559865, | |
| "learning_rate": 5.581625383850187e-05, | |
| "loss": 0.5016, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.5120718697361033, | |
| "grad_norm": 1.6438518323894828, | |
| "learning_rate": 5.5746650518369164e-05, | |
| "loss": 0.5105, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.5210555867490174, | |
| "grad_norm": 0.9704759451995054, | |
| "learning_rate": 5.5674015461028693e-05, | |
| "loss": 0.501, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.5300393037619315, | |
| "grad_norm": 1.520513804956881, | |
| "learning_rate": 5.559835668510092e-05, | |
| "loss": 0.5055, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.5390230207748455, | |
| "grad_norm": 1.4886814176870304, | |
| "learning_rate": 5.5519682543012745e-05, | |
| "loss": 0.4997, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.5480067377877597, | |
| "grad_norm": 0.9983783244132893, | |
| "learning_rate": 5.5438001720075464e-05, | |
| "loss": 0.4999, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.5569904548006738, | |
| "grad_norm": 1.8027514771648614, | |
| "learning_rate": 5.535332323352595e-05, | |
| "loss": 0.4973, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.5659741718135879, | |
| "grad_norm": 1.5234595633592394, | |
| "learning_rate": 5.526565643153117e-05, | |
| "loss": 0.5027, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.574957888826502, | |
| "grad_norm": 1.5395421556206257, | |
| "learning_rate": 5.517501099215618e-05, | |
| "loss": 0.4927, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.583941605839416, | |
| "grad_norm": 1.2783343984201738, | |
| "learning_rate": 5.5081396922295734e-05, | |
| "loss": 0.4927, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.5929253228523301, | |
| "grad_norm": 1.3855491947405612, | |
| "learning_rate": 5.498482455656953e-05, | |
| "loss": 0.4922, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.6019090398652442, | |
| "grad_norm": 1.1582514991567736, | |
| "learning_rate": 5.488530455618133e-05, | |
| "loss": 0.4911, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.6108927568781584, | |
| "grad_norm": 1.4059224870864222, | |
| "learning_rate": 5.4782847907741996e-05, | |
| "loss": 0.4947, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.6198764738910725, | |
| "grad_norm": 1.1973724677392805, | |
| "learning_rate": 5.4677465922056604e-05, | |
| "loss": 0.4891, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.6288601909039865, | |
| "grad_norm": 1.3369194820930967, | |
| "learning_rate": 5.456917023287581e-05, | |
| "loss": 0.4877, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.6378439079169006, | |
| "grad_norm": 1.2691892558171078, | |
| "learning_rate": 5.445797279561149e-05, | |
| "loss": 0.4895, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.6468276249298147, | |
| "grad_norm": 1.0922464334569515, | |
| "learning_rate": 5.434388588601693e-05, | |
| "loss": 0.4803, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.6558113419427288, | |
| "grad_norm": 0.8753013887933451, | |
| "learning_rate": 5.422692209883164e-05, | |
| "loss": 0.4914, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.664795058955643, | |
| "grad_norm": 0.9796697285671012, | |
| "learning_rate": 5.4107094346390925e-05, | |
| "loss": 0.4869, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.673778775968557, | |
| "grad_norm": 1.2844167323369493, | |
| "learning_rate": 5.398441585720041e-05, | |
| "loss": 0.4867, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.6827624929814711, | |
| "grad_norm": 1.4887584341619542, | |
| "learning_rate": 5.3858900174475716e-05, | |
| "loss": 0.4991, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.6917462099943852, | |
| "grad_norm": 0.8921558529768149, | |
| "learning_rate": 5.373056115464729e-05, | |
| "loss": 0.4867, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.7007299270072993, | |
| "grad_norm": 1.2859329068604128, | |
| "learning_rate": 5.359941296583069e-05, | |
| "loss": 0.4882, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.7097136440202133, | |
| "grad_norm": 0.7544133782391679, | |
| "learning_rate": 5.346547008626259e-05, | |
| "loss": 0.4804, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.7186973610331274, | |
| "grad_norm": 1.2158913130951334, | |
| "learning_rate": 5.332874730270231e-05, | |
| "loss": 0.4965, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.7276810780460415, | |
| "grad_norm": 1.5397019534433478, | |
| "learning_rate": 5.3189259708799525e-05, | |
| "loss": 0.4863, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.7366647950589557, | |
| "grad_norm": 1.0368691373073795, | |
| "learning_rate": 5.304702270342788e-05, | |
| "loss": 0.4709, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.7456485120718698, | |
| "grad_norm": 1.6327070593356623, | |
| "learning_rate": 5.290205198898512e-05, | |
| "loss": 0.4935, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.7546322290847838, | |
| "grad_norm": 0.7996850897364002, | |
| "learning_rate": 5.275436356965955e-05, | |
| "loss": 0.4833, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.7636159460976979, | |
| "grad_norm": 1.3138953152120028, | |
| "learning_rate": 5.260397374966324e-05, | |
| "loss": 0.4831, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.772599663110612, | |
| "grad_norm": 1.0413080501496228, | |
| "learning_rate": 5.245089913143211e-05, | |
| "loss": 0.4857, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.7815833801235261, | |
| "grad_norm": 1.2505485373744896, | |
| "learning_rate": 5.229515661379311e-05, | |
| "loss": 0.4783, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.7905670971364402, | |
| "grad_norm": 0.8250922322384497, | |
| "learning_rate": 5.213676339009861e-05, | |
| "loss": 0.4888, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.7995508141493542, | |
| "grad_norm": 0.930860026309023, | |
| "learning_rate": 5.197573694632837e-05, | |
| "loss": 0.4722, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.8085345311622684, | |
| "grad_norm": 1.1999339483974405, | |
| "learning_rate": 5.181209505915914e-05, | |
| "loss": 0.4844, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.8175182481751825, | |
| "grad_norm": 0.9824068299768655, | |
| "learning_rate": 5.164585579400215e-05, | |
| "loss": 0.4835, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.8265019651880966, | |
| "grad_norm": 1.506768206352777, | |
| "learning_rate": 5.1477037503008845e-05, | |
| "loss": 0.4795, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.8354856822010107, | |
| "grad_norm": 0.7471067724218583, | |
| "learning_rate": 5.130565882304478e-05, | |
| "loss": 0.4752, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.8444693992139247, | |
| "grad_norm": 1.231448677263746, | |
| "learning_rate": 5.113173867363228e-05, | |
| "loss": 0.4789, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.8534531162268388, | |
| "grad_norm": 1.3024759491093874, | |
| "learning_rate": 5.095529625486171e-05, | |
| "loss": 0.4805, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.862436833239753, | |
| "grad_norm": 0.9214289630598883, | |
| "learning_rate": 5.0776351045271936e-05, | |
| "loss": 0.486, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.8714205502526671, | |
| "grad_norm": 1.0404626244101485, | |
| "learning_rate": 5.0594922799699925e-05, | |
| "loss": 0.4845, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.8804042672655812, | |
| "grad_norm": 0.849414300185437, | |
| "learning_rate": 5.0411031547099916e-05, | |
| "loss": 0.467, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.8893879842784952, | |
| "grad_norm": 0.6937685475658341, | |
| "learning_rate": 5.0224697588332306e-05, | |
| "loss": 0.4808, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.8983717012914093, | |
| "grad_norm": 0.9214456229044827, | |
| "learning_rate": 5.003594149392247e-05, | |
| "loss": 0.4686, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.9073554183043234, | |
| "grad_norm": 0.8633442974924467, | |
| "learning_rate": 4.984478410178992e-05, | |
| "loss": 0.481, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.9163391353172375, | |
| "grad_norm": 1.0904707694598144, | |
| "learning_rate": 4.965124651494785e-05, | |
| "loss": 0.4677, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.9253228523301515, | |
| "grad_norm": 0.8481076014465915, | |
| "learning_rate": 4.9455350099173434e-05, | |
| "loss": 0.4802, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.9343065693430657, | |
| "grad_norm": 0.6006636997635961, | |
| "learning_rate": 4.925711648064916e-05, | |
| "loss": 0.4694, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.9432902863559798, | |
| "grad_norm": 0.7238908358412791, | |
| "learning_rate": 4.9056567543575374e-05, | |
| "loss": 0.4809, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.9522740033688939, | |
| "grad_norm": 0.719964931081471, | |
| "learning_rate": 4.885372542775435e-05, | |
| "loss": 0.4677, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.961257720381808, | |
| "grad_norm": 0.6462909305498641, | |
| "learning_rate": 4.864861252614612e-05, | |
| "loss": 0.47, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.970241437394722, | |
| "grad_norm": 0.605236737922692, | |
| "learning_rate": 4.844125148239645e-05, | |
| "loss": 0.4604, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.9792251544076361, | |
| "grad_norm": 0.6196855103029525, | |
| "learning_rate": 4.823166518833697e-05, | |
| "loss": 0.4686, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.9882088714205502, | |
| "grad_norm": 0.5910630181346482, | |
| "learning_rate": 4.801987678145811e-05, | |
| "loss": 0.4771, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.9971925884334644, | |
| "grad_norm": 0.5265183055483387, | |
| "learning_rate": 4.7805909642354734e-05, | |
| "loss": 0.4701, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 1.0075800112296462, | |
| "grad_norm": 0.7416855722204436, | |
| "learning_rate": 4.7589787392145085e-05, | |
| "loss": 0.8384, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 1.0165637282425604, | |
| "grad_norm": 0.9235671375653838, | |
| "learning_rate": 4.737153388986303e-05, | |
| "loss": 0.4427, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 1.0255474452554745, | |
| "grad_norm": 1.2985752147785057, | |
| "learning_rate": 4.7151173229824185e-05, | |
| "loss": 0.4674, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 1.0345311622683886, | |
| "grad_norm": 0.8686104223850358, | |
| "learning_rate": 4.6928729738965966e-05, | |
| "loss": 0.4585, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 1.0435148792813027, | |
| "grad_norm": 1.4248620564884518, | |
| "learning_rate": 4.6704227974162e-05, | |
| "loss": 0.4664, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 1.0524985962942168, | |
| "grad_norm": 0.8090345615734776, | |
| "learning_rate": 4.647769271951114e-05, | |
| "loss": 0.4526, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 1.0614823133071307, | |
| "grad_norm": 1.8942652592210458, | |
| "learning_rate": 4.624914898360141e-05, | |
| "loss": 0.4616, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 1.0704660303200448, | |
| "grad_norm": 1.176745396077979, | |
| "learning_rate": 4.601862199674913e-05, | |
| "loss": 0.4517, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 1.079449747332959, | |
| "grad_norm": 1.9084152203109792, | |
| "learning_rate": 4.5786137208213634e-05, | |
| "loss": 0.4512, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.088433464345873, | |
| "grad_norm": 1.837711635191509, | |
| "learning_rate": 4.555172028338775e-05, | |
| "loss": 0.4642, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 1.0974171813587872, | |
| "grad_norm": 0.8682381358770546, | |
| "learning_rate": 4.531539710096439e-05, | |
| "loss": 0.4458, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 1.1064008983717013, | |
| "grad_norm": 1.2949713196196633, | |
| "learning_rate": 4.507719375007978e-05, | |
| "loss": 0.4659, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 1.1153846153846154, | |
| "grad_norm": 0.6820031143942866, | |
| "learning_rate": 4.483713652743316e-05, | |
| "loss": 0.4422, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 1.1243683323975295, | |
| "grad_norm": 0.9015249547203725, | |
| "learning_rate": 4.459525193438388e-05, | |
| "loss": 0.4674, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 1.1333520494104437, | |
| "grad_norm": 0.7596159903360272, | |
| "learning_rate": 4.4351566674025625e-05, | |
| "loss": 0.4397, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 1.1423357664233578, | |
| "grad_norm": 0.8048846249914353, | |
| "learning_rate": 4.410610764823863e-05, | |
| "loss": 0.4537, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 1.1513194834362717, | |
| "grad_norm": 0.5674553882616571, | |
| "learning_rate": 4.3858901954719706e-05, | |
| "loss": 0.4431, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 1.1603032004491858, | |
| "grad_norm": 0.7662516848679161, | |
| "learning_rate": 4.3609976883990836e-05, | |
| "loss": 0.4564, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 1.1692869174621, | |
| "grad_norm": 0.5438455650436745, | |
| "learning_rate": 4.335935991638637e-05, | |
| "loss": 0.4461, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 1.178270634475014, | |
| "grad_norm": 0.659318129425458, | |
| "learning_rate": 4.310707871901933e-05, | |
| "loss": 0.4489, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 1.1872543514879281, | |
| "grad_norm": 0.603233928019015, | |
| "learning_rate": 4.2853161142727056e-05, | |
| "loss": 0.4463, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 1.1962380685008422, | |
| "grad_norm": 0.5437863962201774, | |
| "learning_rate": 4.25976352189966e-05, | |
| "loss": 0.4476, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 1.2052217855137564, | |
| "grad_norm": 0.44198003824815413, | |
| "learning_rate": 4.234052915687014e-05, | |
| "loss": 0.4417, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 1.2142055025266705, | |
| "grad_norm": 0.4940269180275878, | |
| "learning_rate": 4.208187133983084e-05, | |
| "loss": 0.4437, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 1.2231892195395846, | |
| "grad_norm": 0.4676186442195625, | |
| "learning_rate": 4.18216903226694e-05, | |
| "loss": 0.4429, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 1.2321729365524985, | |
| "grad_norm": 0.557497711331083, | |
| "learning_rate": 4.156001482833174e-05, | |
| "loss": 0.4464, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 1.2411566535654126, | |
| "grad_norm": 0.47586024576684477, | |
| "learning_rate": 4.1296873744748095e-05, | |
| "loss": 0.4426, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 1.2501403705783267, | |
| "grad_norm": 0.32603863165308805, | |
| "learning_rate": 4.103229612164391e-05, | |
| "loss": 0.4416, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 1.2591240875912408, | |
| "grad_norm": 0.4351165089036505, | |
| "learning_rate": 4.076631116733286e-05, | |
| "loss": 0.4401, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.268107804604155, | |
| "grad_norm": 0.4958952788514796, | |
| "learning_rate": 4.0498948245492365e-05, | |
| "loss": 0.4452, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 1.277091521617069, | |
| "grad_norm": 0.5509848964383834, | |
| "learning_rate": 4.023023687192194e-05, | |
| "loss": 0.4411, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 1.2860752386299832, | |
| "grad_norm": 0.3808831227241545, | |
| "learning_rate": 3.996020671128483e-05, | |
| "loss": 0.4443, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 1.2950589556428973, | |
| "grad_norm": 0.36241744805641257, | |
| "learning_rate": 3.9688887573833065e-05, | |
| "loss": 0.45, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 1.3040426726558114, | |
| "grad_norm": 0.36700641622974883, | |
| "learning_rate": 3.941630941211662e-05, | |
| "loss": 0.4307, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 1.3130263896687255, | |
| "grad_norm": 0.3136123844114524, | |
| "learning_rate": 3.914250231767668e-05, | |
| "loss": 0.4428, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 1.3220101066816397, | |
| "grad_norm": 0.30609996444955184, | |
| "learning_rate": 3.886749651772372e-05, | |
| "loss": 0.4433, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 1.3309938236945535, | |
| "grad_norm": 0.35148333104030377, | |
| "learning_rate": 3.8591322371800516e-05, | |
| "loss": 0.448, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 1.3399775407074677, | |
| "grad_norm": 0.3406382310678225, | |
| "learning_rate": 3.831401036843058e-05, | |
| "loss": 0.4356, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 1.3489612577203818, | |
| "grad_norm": 0.32870885630433616, | |
| "learning_rate": 3.8035591121752334e-05, | |
| "loss": 0.4433, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.357944974733296, | |
| "grad_norm": 0.40610208793950014, | |
| "learning_rate": 3.7756095368139454e-05, | |
| "loss": 0.4419, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 1.36692869174621, | |
| "grad_norm": 0.3410029435891093, | |
| "learning_rate": 3.747555396280769e-05, | |
| "loss": 0.4284, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 1.3759124087591241, | |
| "grad_norm": 0.30509070420240164, | |
| "learning_rate": 3.719399787640854e-05, | |
| "loss": 0.4346, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 1.3848961257720382, | |
| "grad_norm": 0.30952788069600484, | |
| "learning_rate": 3.691145819161026e-05, | |
| "loss": 0.4366, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 1.3938798427849521, | |
| "grad_norm": 0.28009014545926486, | |
| "learning_rate": 3.6627966099666466e-05, | |
| "loss": 0.4322, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 1.4028635597978663, | |
| "grad_norm": 0.255197994228949, | |
| "learning_rate": 3.6343552896972686e-05, | |
| "loss": 0.4297, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 1.4118472768107804, | |
| "grad_norm": 0.26775780849454006, | |
| "learning_rate": 3.605824998161141e-05, | |
| "loss": 0.4423, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 1.4208309938236945, | |
| "grad_norm": 0.2748112146974553, | |
| "learning_rate": 3.5772088849885886e-05, | |
| "loss": 0.4316, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 1.4298147108366086, | |
| "grad_norm": 0.272040591675239, | |
| "learning_rate": 3.548510109284296e-05, | |
| "loss": 0.4464, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 1.4387984278495227, | |
| "grad_norm": 0.23246881072840514, | |
| "learning_rate": 3.519731839278567e-05, | |
| "loss": 0.4409, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.4477821448624368, | |
| "grad_norm": 0.24504527703462542, | |
| "learning_rate": 3.4908772519775565e-05, | |
| "loss": 0.4419, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 1.456765861875351, | |
| "grad_norm": 0.24765234500656855, | |
| "learning_rate": 3.461949532812546e-05, | |
| "loss": 0.4478, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 1.465749578888265, | |
| "grad_norm": 0.327477057941263, | |
| "learning_rate": 3.43295187528828e-05, | |
| "loss": 0.4411, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 1.4747332959011792, | |
| "grad_norm": 0.27408220536723477, | |
| "learning_rate": 3.403887480630422e-05, | |
| "loss": 0.4372, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 1.4837170129140933, | |
| "grad_norm": 0.2479972152670547, | |
| "learning_rate": 3.374759557432146e-05, | |
| "loss": 0.44, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 1.4927007299270074, | |
| "grad_norm": 0.2576799538776205, | |
| "learning_rate": 3.345571321299926e-05, | |
| "loss": 0.4345, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 1.5016844469399215, | |
| "grad_norm": 0.2949577781981658, | |
| "learning_rate": 3.316325994498539e-05, | |
| "loss": 0.4426, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 1.5106681639528357, | |
| "grad_norm": 0.25387337797793086, | |
| "learning_rate": 3.28702680559535e-05, | |
| "loss": 0.4414, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 1.5196518809657495, | |
| "grad_norm": 0.29898671970481416, | |
| "learning_rate": 3.2576769891038794e-05, | |
| "loss": 0.4295, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 1.5286355979786637, | |
| "grad_norm": 0.22913788989001677, | |
| "learning_rate": 3.2282797851267353e-05, | |
| "loss": 0.4402, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 1.5376193149915778, | |
| "grad_norm": 0.3095695481139338, | |
| "learning_rate": 3.198838438997912e-05, | |
| "loss": 0.4367, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 1.546603032004492, | |
| "grad_norm": 0.2942100195557735, | |
| "learning_rate": 3.169356200924522e-05, | |
| "loss": 0.441, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 1.5555867490174058, | |
| "grad_norm": 0.2009760877564207, | |
| "learning_rate": 3.1398363256279894e-05, | |
| "loss": 0.4361, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 1.56457046603032, | |
| "grad_norm": 0.22271313757898464, | |
| "learning_rate": 3.110282071984731e-05, | |
| "loss": 0.429, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 1.573554183043234, | |
| "grad_norm": 0.23273324004009835, | |
| "learning_rate": 3.080696702666401e-05, | |
| "loss": 0.444, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 1.5825379000561481, | |
| "grad_norm": 0.23105576612308468, | |
| "learning_rate": 3.051083483779696e-05, | |
| "loss": 0.4415, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 1.5915216170690623, | |
| "grad_norm": 0.2707194719098357, | |
| "learning_rate": 3.0214456845057964e-05, | |
| "loss": 0.4337, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 1.6005053340819764, | |
| "grad_norm": 0.19401437496621246, | |
| "learning_rate": 2.9917865767394592e-05, | |
| "loss": 0.4293, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 1.6094890510948905, | |
| "grad_norm": 0.23447424010049614, | |
| "learning_rate": 2.9621094347278115e-05, | |
| "loss": 0.4381, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 1.6184727681078046, | |
| "grad_norm": 0.24721825324120247, | |
| "learning_rate": 2.9324175347088936e-05, | |
| "loss": 0.4347, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 1.6274564851207187, | |
| "grad_norm": 0.18188660812700427, | |
| "learning_rate": 2.9027141545499668e-05, | |
| "loss": 0.4393, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 1.6364402021336328, | |
| "grad_norm": 0.190718000775263, | |
| "learning_rate": 2.873002573385654e-05, | |
| "loss": 0.4302, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 1.645423919146547, | |
| "grad_norm": 0.20646822886164018, | |
| "learning_rate": 2.84328607125594e-05, | |
| "loss": 0.4265, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 1.654407636159461, | |
| "grad_norm": 0.18501972197639874, | |
| "learning_rate": 2.813567928744061e-05, | |
| "loss": 0.4264, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 1.6633913531723752, | |
| "grad_norm": 0.2131469646078243, | |
| "learning_rate": 2.7838514266143464e-05, | |
| "loss": 0.4353, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 1.6723750701852893, | |
| "grad_norm": 0.20765635355407858, | |
| "learning_rate": 2.754139845450034e-05, | |
| "loss": 0.4358, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 1.6813587871982034, | |
| "grad_norm": 0.21408480102506994, | |
| "learning_rate": 2.7244364652911073e-05, | |
| "loss": 0.436, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 1.6903425042111173, | |
| "grad_norm": 0.20394614955447768, | |
| "learning_rate": 2.6947445652721887e-05, | |
| "loss": 0.4317, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 1.6993262212240314, | |
| "grad_norm": 0.2462732253149224, | |
| "learning_rate": 2.6650674232605416e-05, | |
| "loss": 0.4381, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 1.7083099382369455, | |
| "grad_norm": 0.19402664296982586, | |
| "learning_rate": 2.635408315494204e-05, | |
| "loss": 0.4289, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 1.7172936552498597, | |
| "grad_norm": 0.25323884789633694, | |
| "learning_rate": 2.6057705162203045e-05, | |
| "loss": 0.4387, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 1.7262773722627736, | |
| "grad_norm": 0.17952016249190222, | |
| "learning_rate": 2.5761572973335996e-05, | |
| "loss": 0.4392, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 1.7352610892756877, | |
| "grad_norm": 0.21550956031191665, | |
| "learning_rate": 2.5465719280152693e-05, | |
| "loss": 0.4385, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 1.7442448062886018, | |
| "grad_norm": 0.2260946384930941, | |
| "learning_rate": 2.5170176743720114e-05, | |
| "loss": 0.4297, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 1.753228523301516, | |
| "grad_norm": 0.189354294580711, | |
| "learning_rate": 2.487497799075478e-05, | |
| "loss": 0.4402, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 1.76221224031443, | |
| "grad_norm": 0.18347107243203223, | |
| "learning_rate": 2.4580155610020893e-05, | |
| "loss": 0.4436, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 1.7711959573273441, | |
| "grad_norm": 0.23718361693873785, | |
| "learning_rate": 2.4285742148732662e-05, | |
| "loss": 0.431, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 1.7801796743402583, | |
| "grad_norm": 0.15875452261920012, | |
| "learning_rate": 2.399177010896121e-05, | |
| "loss": 0.4227, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 1.7891633913531724, | |
| "grad_norm": 0.20090739499698712, | |
| "learning_rate": 2.3698271944046514e-05, | |
| "loss": 0.4287, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 1.7981471083660865, | |
| "grad_norm": 0.15078862855529346, | |
| "learning_rate": 2.3405280055014613e-05, | |
| "loss": 0.4293, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.8071308253790006, | |
| "grad_norm": 0.16864941614937132, | |
| "learning_rate": 2.3112826787000755e-05, | |
| "loss": 0.433, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 1.8161145423919147, | |
| "grad_norm": 0.1640970390638266, | |
| "learning_rate": 2.2820944425678543e-05, | |
| "loss": 0.4465, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 1.8250982594048288, | |
| "grad_norm": 0.181930840073729, | |
| "learning_rate": 2.2529665193695787e-05, | |
| "loss": 0.4311, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 1.834081976417743, | |
| "grad_norm": 0.1618511339670659, | |
| "learning_rate": 2.2239021247117203e-05, | |
| "loss": 0.425, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 1.843065693430657, | |
| "grad_norm": 0.1608105810202212, | |
| "learning_rate": 2.1949044671874553e-05, | |
| "loss": 0.4372, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 1.8520494104435712, | |
| "grad_norm": 0.17614884968053077, | |
| "learning_rate": 2.165976748022444e-05, | |
| "loss": 0.4242, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 1.861033127456485, | |
| "grad_norm": 0.18688103883770055, | |
| "learning_rate": 2.1371221607214342e-05, | |
| "loss": 0.4329, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 1.8700168444693992, | |
| "grad_norm": 0.1888649411522682, | |
| "learning_rate": 2.108343890715705e-05, | |
| "loss": 0.4421, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 1.8790005614823133, | |
| "grad_norm": 0.19568546233660988, | |
| "learning_rate": 2.0796451150114122e-05, | |
| "loss": 0.4292, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 1.8879842784952274, | |
| "grad_norm": 0.20130506786491167, | |
| "learning_rate": 2.0510290018388582e-05, | |
| "loss": 0.4305, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 1.8969679955081415, | |
| "grad_norm": 0.16622134218898874, | |
| "learning_rate": 2.0224987103027312e-05, | |
| "loss": 0.4304, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 1.9059517125210554, | |
| "grad_norm": 0.1714373409659067, | |
| "learning_rate": 1.9940573900333532e-05, | |
| "loss": 0.4388, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 1.9149354295339696, | |
| "grad_norm": 0.16165865567699295, | |
| "learning_rate": 1.9657081808389732e-05, | |
| "loss": 0.4199, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 1.9239191465468837, | |
| "grad_norm": 0.1588217221931874, | |
| "learning_rate": 1.9374542123591462e-05, | |
| "loss": 0.4391, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 1.9329028635597978, | |
| "grad_norm": 0.16789101350762428, | |
| "learning_rate": 1.9092986037192315e-05, | |
| "loss": 0.429, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 1.941886580572712, | |
| "grad_norm": 0.14933213528792, | |
| "learning_rate": 1.881244463186054e-05, | |
| "loss": 0.4327, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 1.950870297585626, | |
| "grad_norm": 0.15634986800174527, | |
| "learning_rate": 1.8532948878247664e-05, | |
| "loss": 0.4222, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 1.9598540145985401, | |
| "grad_norm": 0.16965600317101812, | |
| "learning_rate": 1.825452963156942e-05, | |
| "loss": 0.4422, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 1.9688377316114543, | |
| "grad_norm": 0.13903361538978226, | |
| "learning_rate": 1.7977217628199486e-05, | |
| "loss": 0.4324, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 1.9778214486243684, | |
| "grad_norm": 0.18809988345643205, | |
| "learning_rate": 1.770104348227628e-05, | |
| "loss": 0.4284, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.9868051656372825, | |
| "grad_norm": 0.16274715282926117, | |
| "learning_rate": 1.742603768232333e-05, | |
| "loss": 0.417, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 1.9957888826501966, | |
| "grad_norm": 0.18587407946726342, | |
| "learning_rate": 1.7152230587883387e-05, | |
| "loss": 0.4419, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 2.0061763054463784, | |
| "grad_norm": 0.35271858704421305, | |
| "learning_rate": 1.6879652426166937e-05, | |
| "loss": 0.7668, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 2.0151600224592925, | |
| "grad_norm": 0.18863183688272459, | |
| "learning_rate": 1.660833328871518e-05, | |
| "loss": 0.3965, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 2.0241437394722066, | |
| "grad_norm": 0.2508766464598683, | |
| "learning_rate": 1.6338303128078067e-05, | |
| "loss": 0.4199, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 2.0331274564851207, | |
| "grad_norm": 0.1846517351611734, | |
| "learning_rate": 1.6069591754507644e-05, | |
| "loss": 0.4059, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 2.042111173498035, | |
| "grad_norm": 0.20660957505878264, | |
| "learning_rate": 1.5802228832667142e-05, | |
| "loss": 0.4115, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 2.051094890510949, | |
| "grad_norm": 0.20768143407603465, | |
| "learning_rate": 1.553624387835609e-05, | |
| "loss": 0.4033, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 2.060078607523863, | |
| "grad_norm": 0.23623799408278454, | |
| "learning_rate": 1.5271666255251907e-05, | |
| "loss": 0.419, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 2.069062324536777, | |
| "grad_norm": 0.19328378884342695, | |
| "learning_rate": 1.5008525171668266e-05, | |
| "loss": 0.4045, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 2.0780460415496913, | |
| "grad_norm": 0.20354843925554086, | |
| "learning_rate": 1.474684967733061e-05, | |
| "loss": 0.4063, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 2.0870297585626054, | |
| "grad_norm": 0.1962670346347417, | |
| "learning_rate": 1.4486668660169169e-05, | |
| "loss": 0.4098, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 2.0960134755755195, | |
| "grad_norm": 0.1697063402554285, | |
| "learning_rate": 1.4228010843129864e-05, | |
| "loss": 0.3947, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 2.1049971925884337, | |
| "grad_norm": 0.21767004866396167, | |
| "learning_rate": 1.3970904781003401e-05, | |
| "loss": 0.4114, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 2.1139809096013478, | |
| "grad_norm": 0.16628750228694827, | |
| "learning_rate": 1.3715378857272944e-05, | |
| "loss": 0.4089, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 2.1229646266142614, | |
| "grad_norm": 0.17079039606673918, | |
| "learning_rate": 1.3461461280980681e-05, | |
| "loss": 0.4047, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 2.1319483436271756, | |
| "grad_norm": 0.1984666308638168, | |
| "learning_rate": 1.3209180083613638e-05, | |
| "loss": 0.4126, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 2.1409320606400897, | |
| "grad_norm": 0.15844397108776676, | |
| "learning_rate": 1.2958563116009172e-05, | |
| "loss": 0.401, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 2.149915777653004, | |
| "grad_norm": 0.1682087305867551, | |
| "learning_rate": 1.27096380452803e-05, | |
| "loss": 0.404, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 2.158899494665918, | |
| "grad_norm": 0.15876022024115735, | |
| "learning_rate": 1.2462432351761374e-05, | |
| "loss": 0.406, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 2.167883211678832, | |
| "grad_norm": 0.13010884230934192, | |
| "learning_rate": 1.2216973325974375e-05, | |
| "loss": 0.3955, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 2.176866928691746, | |
| "grad_norm": 0.15194484720161236, | |
| "learning_rate": 1.1973288065616129e-05, | |
| "loss": 0.4204, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 2.1858506457046603, | |
| "grad_norm": 0.146927657510162, | |
| "learning_rate": 1.1731403472566841e-05, | |
| "loss": 0.4259, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 2.1948343627175744, | |
| "grad_norm": 0.1357980790994208, | |
| "learning_rate": 1.1491346249920226e-05, | |
| "loss": 0.3998, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 2.2038180797304885, | |
| "grad_norm": 0.14520203880695473, | |
| "learning_rate": 1.1253142899035609e-05, | |
| "loss": 0.4038, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 2.2128017967434026, | |
| "grad_norm": 0.12180527212805993, | |
| "learning_rate": 1.101681971661226e-05, | |
| "loss": 0.3995, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 2.2217855137563167, | |
| "grad_norm": 0.16678443391851008, | |
| "learning_rate": 1.0782402791786366e-05, | |
| "loss": 0.4093, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 2.230769230769231, | |
| "grad_norm": 0.14169624933871403, | |
| "learning_rate": 1.054991800325088e-05, | |
| "loss": 0.3945, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 2.239752947782145, | |
| "grad_norm": 0.12385040853111917, | |
| "learning_rate": 1.0319391016398607e-05, | |
| "loss": 0.4045, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 2.248736664795059, | |
| "grad_norm": 0.13012596569465296, | |
| "learning_rate": 1.009084728048887e-05, | |
| "loss": 0.4037, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 2.257720381807973, | |
| "grad_norm": 0.12678805353118078, | |
| "learning_rate": 9.864312025838009e-06, | |
| "loss": 0.4085, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 2.2667040988208873, | |
| "grad_norm": 0.13122317737854305, | |
| "learning_rate": 9.63981026103404e-06, | |
| "loss": 0.415, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 2.2756878158338014, | |
| "grad_norm": 0.1228277598641854, | |
| "learning_rate": 9.417366770175821e-06, | |
| "loss": 0.4017, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 2.2846715328467155, | |
| "grad_norm": 0.1285711649465565, | |
| "learning_rate": 9.197006110136977e-06, | |
| "loss": 0.422, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 2.293655249859629, | |
| "grad_norm": 0.13481029664375577, | |
| "learning_rate": 8.978752607854924e-06, | |
| "loss": 0.4046, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 2.3026389668725433, | |
| "grad_norm": 0.11543534305483437, | |
| "learning_rate": 8.762630357645268e-06, | |
| "loss": 0.414, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 2.3116226838854574, | |
| "grad_norm": 0.13900679103109587, | |
| "learning_rate": 8.548663218541897e-06, | |
| "loss": 0.4125, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 2.3206064008983716, | |
| "grad_norm": 0.1259428788035887, | |
| "learning_rate": 8.336874811663032e-06, | |
| "loss": 0.3983, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 2.3295901179112857, | |
| "grad_norm": 0.11085457797406689, | |
| "learning_rate": 8.127288517603557e-06, | |
| "loss": 0.4193, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 2.3385738349242, | |
| "grad_norm": 0.11622214274255102, | |
| "learning_rate": 7.919927473853877e-06, | |
| "loss": 0.3937, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 2.347557551937114, | |
| "grad_norm": 0.12462100200017505, | |
| "learning_rate": 7.714814572245652e-06, | |
| "loss": 0.4155, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 2.356541268950028, | |
| "grad_norm": 0.12614101513377815, | |
| "learning_rate": 7.511972456424624e-06, | |
| "loss": 0.4121, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 2.365524985962942, | |
| "grad_norm": 0.12461680064110435, | |
| "learning_rate": 7.31142351935084e-06, | |
| "loss": 0.4068, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 2.3745087029758563, | |
| "grad_norm": 0.11950703045498604, | |
| "learning_rate": 7.113189900826568e-06, | |
| "loss": 0.4117, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 2.3834924199887704, | |
| "grad_norm": 0.13056991708575583, | |
| "learning_rate": 6.917293485052153e-06, | |
| "loss": 0.4128, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 2.3924761370016845, | |
| "grad_norm": 0.13745171444337193, | |
| "learning_rate": 6.723755898210081e-06, | |
| "loss": 0.4151, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 2.4014598540145986, | |
| "grad_norm": 0.13033817774915013, | |
| "learning_rate": 6.53259850607753e-06, | |
| "loss": 0.4016, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 2.4104435710275127, | |
| "grad_norm": 0.10688098668609738, | |
| "learning_rate": 6.343842411667697e-06, | |
| "loss": 0.4134, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 2.419427288040427, | |
| "grad_norm": 0.1287888983585153, | |
| "learning_rate": 6.157508452900079e-06, | |
| "loss": 0.4174, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 2.428411005053341, | |
| "grad_norm": 0.1248679388883586, | |
| "learning_rate": 5.973617200300082e-06, | |
| "loss": 0.3956, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 2.437394722066255, | |
| "grad_norm": 0.11070702147294058, | |
| "learning_rate": 5.792188954728074e-06, | |
| "loss": 0.3993, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 2.446378439079169, | |
| "grad_norm": 0.12330276650877879, | |
| "learning_rate": 5.6132437451382956e-06, | |
| "loss": 0.4085, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 2.4553621560920833, | |
| "grad_norm": 0.1119180851449193, | |
| "learning_rate": 5.436801326367725e-06, | |
| "loss": 0.3998, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 2.464345873104997, | |
| "grad_norm": 0.1328994320544872, | |
| "learning_rate": 5.262881176955216e-06, | |
| "loss": 0.419, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 2.473329590117911, | |
| "grad_norm": 0.11619373835235447, | |
| "learning_rate": 5.09150249699116e-06, | |
| "loss": 0.4097, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 2.482313307130825, | |
| "grad_norm": 0.11879193778970075, | |
| "learning_rate": 4.92268420599785e-06, | |
| "loss": 0.4154, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 2.4912970241437393, | |
| "grad_norm": 0.11150882823633602, | |
| "learning_rate": 4.756444940840868e-06, | |
| "loss": 0.3938, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 2.5002807411566534, | |
| "grad_norm": 0.10688470139111161, | |
| "learning_rate": 4.5928030536716305e-06, | |
| "loss": 0.3951, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 2.5092644581695676, | |
| "grad_norm": 0.10966725943546395, | |
| "learning_rate": 4.431776609901392e-06, | |
| "loss": 0.4067, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 2.5182481751824817, | |
| "grad_norm": 0.10615758090408767, | |
| "learning_rate": 4.273383386206893e-06, | |
| "loss": 0.4171, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 2.527231892195396, | |
| "grad_norm": 0.10796581041782187, | |
| "learning_rate": 4.11764086856789e-06, | |
| "loss": 0.3986, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 2.53621560920831, | |
| "grad_norm": 0.10704980127868732, | |
| "learning_rate": 3.964566250336768e-06, | |
| "loss": 0.4025, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 2.545199326221224, | |
| "grad_norm": 0.11367865379296356, | |
| "learning_rate": 3.814176430340453e-06, | |
| "loss": 0.4189, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 2.554183043234138, | |
| "grad_norm": 0.10114216928540735, | |
| "learning_rate": 3.6664880110148826e-06, | |
| "loss": 0.3926, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 2.5631667602470523, | |
| "grad_norm": 0.11361742890474415, | |
| "learning_rate": 3.5215172965721247e-06, | |
| "loss": 0.4245, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 2.5721504772599664, | |
| "grad_norm": 0.1084518684866817, | |
| "learning_rate": 3.3792802912004827e-06, | |
| "loss": 0.4107, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 2.5811341942728805, | |
| "grad_norm": 0.10549395673412279, | |
| "learning_rate": 3.2397926972976876e-06, | |
| "loss": 0.4107, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 2.5901179112857946, | |
| "grad_norm": 0.09969645074776698, | |
| "learning_rate": 3.1030699137374146e-06, | |
| "loss": 0.3994, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 2.5991016282987087, | |
| "grad_norm": 0.1056666118347554, | |
| "learning_rate": 2.969127034169312e-06, | |
| "loss": 0.3998, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 2.608085345311623, | |
| "grad_norm": 0.1035766308650711, | |
| "learning_rate": 2.837978845352723e-06, | |
| "loss": 0.417, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 2.6170690623245365, | |
| "grad_norm": 0.10302107528120776, | |
| "learning_rate": 2.709639825524283e-06, | |
| "loss": 0.4109, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 2.626052779337451, | |
| "grad_norm": 0.10759853807512873, | |
| "learning_rate": 2.5841241427995886e-06, | |
| "loss": 0.3955, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 2.6350364963503647, | |
| "grad_norm": 0.10880577647272674, | |
| "learning_rate": 2.461445653609079e-06, | |
| "loss": 0.4147, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 2.6440202133632793, | |
| "grad_norm": 0.09689142380499235, | |
| "learning_rate": 2.3416179011683658e-06, | |
| "loss": 0.4106, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 2.653003930376193, | |
| "grad_norm": 0.10685593466563702, | |
| "learning_rate": 2.2246541139830715e-06, | |
| "loss": 0.4014, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 2.661987647389107, | |
| "grad_norm": 0.0983605426363825, | |
| "learning_rate": 2.1105672043885143e-06, | |
| "loss": 0.4116, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 2.670971364402021, | |
| "grad_norm": 0.09655008561783156, | |
| "learning_rate": 1.9993697671241945e-06, | |
| "loss": 0.4081, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 2.6799550814149353, | |
| "grad_norm": 0.11364035199790146, | |
| "learning_rate": 1.8910740779434006e-06, | |
| "loss": 0.4076, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 2.6889387984278494, | |
| "grad_norm": 0.10418804191385413, | |
| "learning_rate": 1.7856920922580113e-06, | |
| "loss": 0.4011, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 2.6979225154407636, | |
| "grad_norm": 0.09941508460951572, | |
| "learning_rate": 1.683235443818673e-06, | |
| "loss": 0.3984, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 2.7069062324536777, | |
| "grad_norm": 0.09866363300713496, | |
| "learning_rate": 1.5837154434304704e-06, | |
| "loss": 0.4088, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 2.715889949466592, | |
| "grad_norm": 0.10476593966969794, | |
| "learning_rate": 1.4871430777042698e-06, | |
| "loss": 0.4158, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 2.724873666479506, | |
| "grad_norm": 0.1003488280191521, | |
| "learning_rate": 1.3935290078438229e-06, | |
| "loss": 0.4023, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 2.73385738349242, | |
| "grad_norm": 0.09404490189909534, | |
| "learning_rate": 1.3028835684688378e-06, | |
| "loss": 0.3971, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 2.742841100505334, | |
| "grad_norm": 0.10211715235272904, | |
| "learning_rate": 1.2152167664740558e-06, | |
| "loss": 0.4075, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 2.7518248175182483, | |
| "grad_norm": 0.1049198084000181, | |
| "learning_rate": 1.1305382799245437e-06, | |
| "loss": 0.3984, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 2.7608085345311624, | |
| "grad_norm": 0.09652931457838564, | |
| "learning_rate": 1.0488574569872655e-06, | |
| "loss": 0.4098, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 2.7697922515440765, | |
| "grad_norm": 0.08905719728483828, | |
| "learning_rate": 9.70183314899084e-07, | |
| "loss": 0.3915, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 2.7787759685569906, | |
| "grad_norm": 0.0938697467560363, | |
| "learning_rate": 8.945245389713029e-07, | |
| "loss": 0.4248, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 2.7877596855699043, | |
| "grad_norm": 0.1012541455507019, | |
| "learning_rate": 8.218894816308398e-07, | |
| "loss": 0.3914, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 2.796743402582819, | |
| "grad_norm": 0.09264156371198151, | |
| "learning_rate": 7.522861614981325e-07, | |
| "loss": 0.4122, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 2.8057271195957325, | |
| "grad_norm": 0.0957623449129902, | |
| "learning_rate": 6.85722262501941e-07, | |
| "loss": 0.4102, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 2.814710836608647, | |
| "grad_norm": 0.09713743297082324, | |
| "learning_rate": 6.222051330310619e-07, | |
| "loss": 0.4026, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 2.8236945536215607, | |
| "grad_norm": 0.09186827414249998, | |
| "learning_rate": 5.617417851230931e-07, | |
| "loss": 0.4096, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 2.832678270634475, | |
| "grad_norm": 0.09713737535786383, | |
| "learning_rate": 5.043388936903404e-07, | |
| "loss": 0.4032, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 2.841661987647389, | |
| "grad_norm": 0.09342384537711217, | |
| "learning_rate": 4.500027957829165e-07, | |
| "loss": 0.4071, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 2.850645704660303, | |
| "grad_norm": 0.09314756742095372, | |
| "learning_rate": 3.987394898891804e-07, | |
| "loss": 0.4137, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 2.859629421673217, | |
| "grad_norm": 0.0887933631925566, | |
| "learning_rate": 3.50554635273513e-07, | |
| "loss": 0.4081, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 2.8686131386861313, | |
| "grad_norm": 0.09565485915542679, | |
| "learning_rate": 3.0545355135156165e-07, | |
| "loss": 0.4025, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 2.8775968556990454, | |
| "grad_norm": 0.0950394236491987, | |
| "learning_rate": 2.634412171029911e-07, | |
| "loss": 0.4118, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 2.8865805727119596, | |
| "grad_norm": 0.08929953352976103, | |
| "learning_rate": 2.2452227052183503e-07, | |
| "loss": 0.4051, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 2.8955642897248737, | |
| "grad_norm": 0.087031212892392, | |
| "learning_rate": 1.8870100810447e-07, | |
| "loss": 0.4037, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 2.904548006737788, | |
| "grad_norm": 0.09107399363880533, | |
| "learning_rate": 1.559813843753062e-07, | |
| "loss": 0.4089, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 2.913531723750702, | |
| "grad_norm": 0.09057801956608469, | |
| "learning_rate": 1.2636701145022035e-07, | |
| "loss": 0.4016, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 2.922515440763616, | |
| "grad_norm": 0.09636376248734924, | |
| "learning_rate": 9.986115863779269e-08, | |
| "loss": 0.4139, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 2.93149915777653, | |
| "grad_norm": 0.08743246679042953, | |
| "learning_rate": 7.646675207839336e-08, | |
| "loss": 0.3973, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 2.9404828747894443, | |
| "grad_norm": 0.08981696851089446, | |
| "learning_rate": 5.618637442113561e-08, | |
| "loss": 0.3952, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 2.9494665918023584, | |
| "grad_norm": 0.09021046410712788, | |
| "learning_rate": 3.9022264538775445e-08, | |
| "loss": 0.4095, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 2.958450308815272, | |
| "grad_norm": 0.09125597188841794, | |
| "learning_rate": 2.4976317280541322e-08, | |
| "loss": 0.404, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 2.9674340258281866, | |
| "grad_norm": 0.09031347282702284, | |
| "learning_rate": 1.4050083262957056e-08, | |
| "loss": 0.4065, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 2.9764177428411003, | |
| "grad_norm": 0.09548041676594968, | |
| "learning_rate": 6.24476869865149e-09, | |
| "loss": 0.4085, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 2.985401459854015, | |
| "grad_norm": 0.08990746175705872, | |
| "learning_rate": 1.5612352631989697e-09, | |
| "loss": 0.4029, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 2.9943851768669285, | |
| "grad_norm": 0.09349234362985716, | |
| "learning_rate": 0.0, | |
| "loss": 0.407, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 2.9943851768669285, | |
| "step": 333, | |
| "total_flos": 4607052149424128.0, | |
| "train_loss": 0.4632815238949773, | |
| "train_runtime": 115748.1353, | |
| "train_samples_per_second": 2.954, | |
| "train_steps_per_second": 0.003 | |
| } | |
| ], | |
| "logging_steps": 1.0, | |
| "max_steps": 333, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 4607052149424128.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |