| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9997192588433464, |
| "eval_steps": 500, |
| "global_step": 1187, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0008422234699606962, |
| "grad_norm": 5.7768874168396, |
| "learning_rate": 8.403361344537815e-08, |
| "loss": 0.8113, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0016844469399213925, |
| "grad_norm": 5.731680393218994, |
| "learning_rate": 1.680672268907563e-07, |
| "loss": 0.818, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.0025266704098820887, |
| "grad_norm": 5.816023349761963, |
| "learning_rate": 2.5210084033613445e-07, |
| "loss": 0.842, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.003368893879842785, |
| "grad_norm": 5.791347503662109, |
| "learning_rate": 3.361344537815126e-07, |
| "loss": 0.842, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.004211117349803481, |
| "grad_norm": 5.896840572357178, |
| "learning_rate": 4.201680672268908e-07, |
| "loss": 0.8589, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.0050533408197641775, |
| "grad_norm": 5.944826602935791, |
| "learning_rate": 5.042016806722689e-07, |
| "loss": 0.8664, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.005895564289724873, |
| "grad_norm": 5.9079694747924805, |
| "learning_rate": 5.882352941176471e-07, |
| "loss": 0.8953, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.00673778775968557, |
| "grad_norm": 5.896533489227295, |
| "learning_rate": 6.722689075630252e-07, |
| "loss": 0.8605, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.007580011229646266, |
| "grad_norm": 5.51594352722168, |
| "learning_rate": 7.563025210084034e-07, |
| "loss": 0.8391, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.008422234699606962, |
| "grad_norm": 5.275195121765137, |
| "learning_rate": 8.403361344537816e-07, |
| "loss": 0.8306, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.009264458169567658, |
| "grad_norm": 5.213021278381348, |
| "learning_rate": 9.243697478991598e-07, |
| "loss": 0.8346, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.010106681639528355, |
| "grad_norm": 4.408111572265625, |
| "learning_rate": 1.0084033613445378e-06, |
| "loss": 0.8056, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.010948905109489052, |
| "grad_norm": 4.242059230804443, |
| "learning_rate": 1.092436974789916e-06, |
| "loss": 0.8157, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.011791128579449747, |
| "grad_norm": 4.183578014373779, |
| "learning_rate": 1.1764705882352942e-06, |
| "loss": 0.8271, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.012633352049410443, |
| "grad_norm": 3.4327707290649414, |
| "learning_rate": 1.2605042016806724e-06, |
| "loss": 0.7981, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.01347557551937114, |
| "grad_norm": 2.344697952270508, |
| "learning_rate": 1.3445378151260504e-06, |
| "loss": 0.7111, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.014317798989331837, |
| "grad_norm": 2.276667833328247, |
| "learning_rate": 1.4285714285714286e-06, |
| "loss": 0.7833, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.015160022459292532, |
| "grad_norm": 2.071214437484741, |
| "learning_rate": 1.5126050420168068e-06, |
| "loss": 0.7482, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.016002245929253228, |
| "grad_norm": 1.9965254068374634, |
| "learning_rate": 1.5966386554621848e-06, |
| "loss": 0.7637, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.016844469399213923, |
| "grad_norm": 1.9486117362976074, |
| "learning_rate": 1.6806722689075632e-06, |
| "loss": 0.7394, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.01768669286917462, |
| "grad_norm": 2.220632314682007, |
| "learning_rate": 1.7647058823529414e-06, |
| "loss": 0.7298, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.018528916339135316, |
| "grad_norm": 2.720287799835205, |
| "learning_rate": 1.8487394957983196e-06, |
| "loss": 0.7119, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.019371139809096015, |
| "grad_norm": 2.9575459957122803, |
| "learning_rate": 1.932773109243698e-06, |
| "loss": 0.6989, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.02021336327905671, |
| "grad_norm": 3.2116012573242188, |
| "learning_rate": 2.0168067226890756e-06, |
| "loss": 0.7339, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.021055586749017405, |
| "grad_norm": 2.941859722137451, |
| "learning_rate": 2.100840336134454e-06, |
| "loss": 0.7393, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.021897810218978103, |
| "grad_norm": 2.7433841228485107, |
| "learning_rate": 2.184873949579832e-06, |
| "loss": 0.7494, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.022740033688938798, |
| "grad_norm": 2.205691337585449, |
| "learning_rate": 2.2689075630252102e-06, |
| "loss": 0.7106, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.023582257158899493, |
| "grad_norm": 1.7549865245819092, |
| "learning_rate": 2.3529411764705885e-06, |
| "loss": 0.6942, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.02442448062886019, |
| "grad_norm": 1.133491039276123, |
| "learning_rate": 2.4369747899159667e-06, |
| "loss": 0.6527, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.025266704098820886, |
| "grad_norm": 1.1498465538024902, |
| "learning_rate": 2.521008403361345e-06, |
| "loss": 0.7023, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.026108927568781585, |
| "grad_norm": 1.1065810918807983, |
| "learning_rate": 2.605042016806723e-06, |
| "loss": 0.6585, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.02695115103874228, |
| "grad_norm": 1.1229432821273804, |
| "learning_rate": 2.689075630252101e-06, |
| "loss": 0.6625, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.027793374508702975, |
| "grad_norm": 1.0526050329208374, |
| "learning_rate": 2.7731092436974795e-06, |
| "loss": 0.6456, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.028635597978663673, |
| "grad_norm": 1.0395145416259766, |
| "learning_rate": 2.8571428571428573e-06, |
| "loss": 0.629, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.029477821448624368, |
| "grad_norm": 1.0640065670013428, |
| "learning_rate": 2.9411764705882355e-06, |
| "loss": 0.661, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.030320044918585063, |
| "grad_norm": 0.8369417786598206, |
| "learning_rate": 3.0252100840336137e-06, |
| "loss": 0.6184, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.03116226838854576, |
| "grad_norm": 0.7107870578765869, |
| "learning_rate": 3.109243697478992e-06, |
| "loss": 0.6024, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.032004491858506456, |
| "grad_norm": 0.6917080283164978, |
| "learning_rate": 3.1932773109243696e-06, |
| "loss": 0.6407, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.032846715328467155, |
| "grad_norm": 0.7435740828514099, |
| "learning_rate": 3.2773109243697483e-06, |
| "loss": 0.642, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.033688938798427846, |
| "grad_norm": 0.7839305996894836, |
| "learning_rate": 3.3613445378151265e-06, |
| "loss": 0.6068, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.034531162268388545, |
| "grad_norm": 0.8303804993629456, |
| "learning_rate": 3.4453781512605043e-06, |
| "loss": 0.6189, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.03537338573834924, |
| "grad_norm": 0.7673403024673462, |
| "learning_rate": 3.529411764705883e-06, |
| "loss": 0.6036, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.03621560920830994, |
| "grad_norm": 0.6664486527442932, |
| "learning_rate": 3.6134453781512607e-06, |
| "loss": 0.614, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.03705783267827063, |
| "grad_norm": 0.5672026872634888, |
| "learning_rate": 3.6974789915966393e-06, |
| "loss": 0.5862, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.03790005614823133, |
| "grad_norm": 0.7759891152381897, |
| "learning_rate": 3.781512605042017e-06, |
| "loss": 0.6116, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.03874227961819203, |
| "grad_norm": 0.7109480500221252, |
| "learning_rate": 3.865546218487396e-06, |
| "loss": 0.5891, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.03958450308815272, |
| "grad_norm": 0.7215145826339722, |
| "learning_rate": 3.9495798319327735e-06, |
| "loss": 0.6177, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.04042672655811342, |
| "grad_norm": 0.5773827433586121, |
| "learning_rate": 4.033613445378151e-06, |
| "loss": 0.5719, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.04126895002807412, |
| "grad_norm": 0.6216230988502502, |
| "learning_rate": 4.11764705882353e-06, |
| "loss": 0.5907, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.04211117349803481, |
| "grad_norm": 0.5774328708648682, |
| "learning_rate": 4.201680672268908e-06, |
| "loss": 0.5462, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.04295339696799551, |
| "grad_norm": 0.592668890953064, |
| "learning_rate": 4.2857142857142855e-06, |
| "loss": 0.5845, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.043795620437956206, |
| "grad_norm": 0.6327844858169556, |
| "learning_rate": 4.369747899159664e-06, |
| "loss": 0.5814, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.0446378439079169, |
| "grad_norm": 0.603449285030365, |
| "learning_rate": 4.453781512605043e-06, |
| "loss": 0.6044, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.045480067377877596, |
| "grad_norm": 0.5610288977622986, |
| "learning_rate": 4.5378151260504205e-06, |
| "loss": 0.597, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.046322290847838295, |
| "grad_norm": 0.49728789925575256, |
| "learning_rate": 4.621848739495799e-06, |
| "loss": 0.6002, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.047164514317798986, |
| "grad_norm": 0.5607268214225769, |
| "learning_rate": 4.705882352941177e-06, |
| "loss": 0.5839, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.048006737787759685, |
| "grad_norm": 0.4914703965187073, |
| "learning_rate": 4.7899159663865555e-06, |
| "loss": 0.582, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.04884896125772038, |
| "grad_norm": 0.4450504183769226, |
| "learning_rate": 4.873949579831933e-06, |
| "loss": 0.5652, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.04969118472768108, |
| "grad_norm": 0.4177859127521515, |
| "learning_rate": 4.957983193277311e-06, |
| "loss": 0.5312, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.05053340819764177, |
| "grad_norm": 0.47395533323287964, |
| "learning_rate": 5.04201680672269e-06, |
| "loss": 0.6046, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.05137563166760247, |
| "grad_norm": 0.43919098377227783, |
| "learning_rate": 5.1260504201680675e-06, |
| "loss": 0.5607, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.05221785513756317, |
| "grad_norm": 0.47760307788848877, |
| "learning_rate": 5.210084033613446e-06, |
| "loss": 0.5549, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.05306007860752386, |
| "grad_norm": 0.4238983392715454, |
| "learning_rate": 5.294117647058824e-06, |
| "loss": 0.5652, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.05390230207748456, |
| "grad_norm": 0.4531624913215637, |
| "learning_rate": 5.378151260504202e-06, |
| "loss": 0.5299, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.05474452554744526, |
| "grad_norm": 0.4546626806259155, |
| "learning_rate": 5.4621848739495795e-06, |
| "loss": 0.5402, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.05558674901740595, |
| "grad_norm": 0.4843672811985016, |
| "learning_rate": 5.546218487394959e-06, |
| "loss": 0.5637, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.05642897248736665, |
| "grad_norm": 0.4193810522556305, |
| "learning_rate": 5.630252100840337e-06, |
| "loss": 0.5422, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.057271195957327346, |
| "grad_norm": 0.40377169847488403, |
| "learning_rate": 5.7142857142857145e-06, |
| "loss": 0.5283, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.05811341942728804, |
| "grad_norm": 0.4971763491630554, |
| "learning_rate": 5.798319327731093e-06, |
| "loss": 0.5506, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.058955642897248736, |
| "grad_norm": 0.5227189660072327, |
| "learning_rate": 5.882352941176471e-06, |
| "loss": 0.5351, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.059797866367209435, |
| "grad_norm": 0.4623558223247528, |
| "learning_rate": 5.9663865546218495e-06, |
| "loss": 0.5565, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.060640089837170126, |
| "grad_norm": 0.4405473470687866, |
| "learning_rate": 6.050420168067227e-06, |
| "loss": 0.5565, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.061482313307130824, |
| "grad_norm": 0.47143077850341797, |
| "learning_rate": 6.134453781512606e-06, |
| "loss": 0.5559, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.06232453677709152, |
| "grad_norm": 0.4616853892803192, |
| "learning_rate": 6.218487394957984e-06, |
| "loss": 0.5442, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.06316676024705221, |
| "grad_norm": 0.48797038197517395, |
| "learning_rate": 6.3025210084033615e-06, |
| "loss": 0.5266, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.06400898371701291, |
| "grad_norm": 0.47729045152664185, |
| "learning_rate": 6.386554621848739e-06, |
| "loss": 0.514, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.06485120718697361, |
| "grad_norm": 0.4070008099079132, |
| "learning_rate": 6.470588235294119e-06, |
| "loss": 0.5352, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.06569343065693431, |
| "grad_norm": 0.4604280889034271, |
| "learning_rate": 6.5546218487394966e-06, |
| "loss": 0.5761, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.06653565412689501, |
| "grad_norm": 0.41718626022338867, |
| "learning_rate": 6.638655462184874e-06, |
| "loss": 0.5269, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.06737787759685569, |
| "grad_norm": 0.44142675399780273, |
| "learning_rate": 6.722689075630253e-06, |
| "loss": 0.5572, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.06822010106681639, |
| "grad_norm": 0.4859495759010315, |
| "learning_rate": 6.806722689075631e-06, |
| "loss": 0.5411, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.06906232453677709, |
| "grad_norm": 0.4969702959060669, |
| "learning_rate": 6.8907563025210085e-06, |
| "loss": 0.5432, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.06990454800673779, |
| "grad_norm": 0.42125970125198364, |
| "learning_rate": 6.974789915966387e-06, |
| "loss": 0.5136, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.07074677147669849, |
| "grad_norm": 0.43726083636283875, |
| "learning_rate": 7.058823529411766e-06, |
| "loss": 0.5413, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.07158899494665918, |
| "grad_norm": 0.4877779483795166, |
| "learning_rate": 7.1428571428571436e-06, |
| "loss": 0.5187, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.07243121841661988, |
| "grad_norm": 0.44341692328453064, |
| "learning_rate": 7.226890756302521e-06, |
| "loss": 0.5633, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.07327344188658057, |
| "grad_norm": 0.473922997713089, |
| "learning_rate": 7.310924369747899e-06, |
| "loss": 0.539, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.07411566535654127, |
| "grad_norm": 0.49199458956718445, |
| "learning_rate": 7.394957983193279e-06, |
| "loss": 0.5605, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.07495788882650196, |
| "grad_norm": 0.44293299317359924, |
| "learning_rate": 7.478991596638656e-06, |
| "loss": 0.5469, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.07580011229646266, |
| "grad_norm": 0.4823598861694336, |
| "learning_rate": 7.563025210084034e-06, |
| "loss": 0.5655, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.07664233576642336, |
| "grad_norm": 0.49608561396598816, |
| "learning_rate": 7.647058823529411e-06, |
| "loss": 0.5285, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.07748455923638406, |
| "grad_norm": 0.40790101885795593, |
| "learning_rate": 7.731092436974791e-06, |
| "loss": 0.5254, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.07832678270634474, |
| "grad_norm": 0.5122684836387634, |
| "learning_rate": 7.815126050420168e-06, |
| "loss": 0.531, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.07916900617630544, |
| "grad_norm": 0.5086395740509033, |
| "learning_rate": 7.899159663865547e-06, |
| "loss": 0.5588, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.08001122964626614, |
| "grad_norm": 0.5407152771949768, |
| "learning_rate": 7.983193277310926e-06, |
| "loss": 0.5284, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.08085345311622684, |
| "grad_norm": 0.45712265372276306, |
| "learning_rate": 8.067226890756303e-06, |
| "loss": 0.5228, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.08169567658618754, |
| "grad_norm": 0.5024585127830505, |
| "learning_rate": 8.151260504201681e-06, |
| "loss": 0.5521, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.08253790005614824, |
| "grad_norm": 0.4624791741371155, |
| "learning_rate": 8.23529411764706e-06, |
| "loss": 0.5154, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.08338012352610892, |
| "grad_norm": 0.51868736743927, |
| "learning_rate": 8.319327731092438e-06, |
| "loss": 0.5123, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.08422234699606962, |
| "grad_norm": 0.4298367202281952, |
| "learning_rate": 8.403361344537815e-06, |
| "loss": 0.5507, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.08506457046603032, |
| "grad_norm": 0.5061134099960327, |
| "learning_rate": 8.487394957983194e-06, |
| "loss": 0.5317, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.08590679393599102, |
| "grad_norm": 0.45311102271080017, |
| "learning_rate": 8.571428571428571e-06, |
| "loss": 0.558, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.08674901740595171, |
| "grad_norm": 0.5305558443069458, |
| "learning_rate": 8.655462184873951e-06, |
| "loss": 0.5218, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.08759124087591241, |
| "grad_norm": 0.5467281341552734, |
| "learning_rate": 8.739495798319328e-06, |
| "loss": 0.5526, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.08843346434587311, |
| "grad_norm": 0.515506386756897, |
| "learning_rate": 8.823529411764707e-06, |
| "loss": 0.5335, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.0892756878158338, |
| "grad_norm": 0.5897248387336731, |
| "learning_rate": 8.907563025210085e-06, |
| "loss": 0.5093, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.0901179112857945, |
| "grad_norm": 0.5122184157371521, |
| "learning_rate": 8.991596638655462e-06, |
| "loss": 0.5161, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.09096013475575519, |
| "grad_norm": 0.49633896350860596, |
| "learning_rate": 9.075630252100841e-06, |
| "loss": 0.5306, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.09180235822571589, |
| "grad_norm": 0.479739785194397, |
| "learning_rate": 9.15966386554622e-06, |
| "loss": 0.5316, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.09264458169567659, |
| "grad_norm": 0.5709593296051025, |
| "learning_rate": 9.243697478991598e-06, |
| "loss": 0.5276, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.09348680516563729, |
| "grad_norm": 0.521088719367981, |
| "learning_rate": 9.327731092436975e-06, |
| "loss": 0.5465, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.09432902863559797, |
| "grad_norm": 0.5315929651260376, |
| "learning_rate": 9.411764705882354e-06, |
| "loss": 0.5103, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.09517125210555867, |
| "grad_norm": 0.4991461932659149, |
| "learning_rate": 9.49579831932773e-06, |
| "loss": 0.5311, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.09601347557551937, |
| "grad_norm": 0.5171940326690674, |
| "learning_rate": 9.579831932773111e-06, |
| "loss": 0.5332, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.09685569904548007, |
| "grad_norm": 0.5097129344940186, |
| "learning_rate": 9.663865546218488e-06, |
| "loss": 0.514, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.09769792251544077, |
| "grad_norm": 0.5047439336776733, |
| "learning_rate": 9.747899159663867e-06, |
| "loss": 0.5127, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.09854014598540146, |
| "grad_norm": 0.45830124616622925, |
| "learning_rate": 9.831932773109244e-06, |
| "loss": 0.5268, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.09938236945536216, |
| "grad_norm": 0.481951504945755, |
| "learning_rate": 9.915966386554622e-06, |
| "loss": 0.5223, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.10022459292532285, |
| "grad_norm": 0.530089795589447, |
| "learning_rate": 1e-05, |
| "loss": 0.5265, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.10106681639528355, |
| "grad_norm": 0.5048061013221741, |
| "learning_rate": 9.999978367986988e-06, |
| "loss": 0.5125, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.10190903986524424, |
| "grad_norm": 0.5036464333534241, |
| "learning_rate": 9.999913472135126e-06, |
| "loss": 0.5475, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.10275126333520494, |
| "grad_norm": 0.4843362867832184, |
| "learning_rate": 9.999805313005946e-06, |
| "loss": 0.5124, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.10359348680516564, |
| "grad_norm": 0.4779297411441803, |
| "learning_rate": 9.99965389153533e-06, |
| "loss": 0.5044, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.10443571027512634, |
| "grad_norm": 0.4498569071292877, |
| "learning_rate": 9.999459209033495e-06, |
| "loss": 0.488, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.10527793374508702, |
| "grad_norm": 0.4998040795326233, |
| "learning_rate": 9.999221267184993e-06, |
| "loss": 0.506, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.10612015721504772, |
| "grad_norm": 0.5822563171386719, |
| "learning_rate": 9.998940068048688e-06, |
| "loss": 0.5578, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.10696238068500842, |
| "grad_norm": 0.5187222957611084, |
| "learning_rate": 9.998615614057743e-06, |
| "loss": 0.519, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.10780460415496912, |
| "grad_norm": 0.44510042667388916, |
| "learning_rate": 9.998247908019594e-06, |
| "loss": 0.4965, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.10864682762492982, |
| "grad_norm": 0.5618104934692383, |
| "learning_rate": 9.997836953115927e-06, |
| "loss": 0.5108, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.10948905109489052, |
| "grad_norm": 0.4618164896965027, |
| "learning_rate": 9.997382752902658e-06, |
| "loss": 0.5406, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.1103312745648512, |
| "grad_norm": 0.4968518018722534, |
| "learning_rate": 9.996885311309892e-06, |
| "loss": 0.5188, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.1111734980348119, |
| "grad_norm": 0.49284565448760986, |
| "learning_rate": 9.996344632641895e-06, |
| "loss": 0.4996, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.1120157215047726, |
| "grad_norm": 0.5027459263801575, |
| "learning_rate": 9.995760721577053e-06, |
| "loss": 0.5256, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.1128579449747333, |
| "grad_norm": 0.5380898714065552, |
| "learning_rate": 9.995133583167833e-06, |
| "loss": 0.5252, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.113700168444694, |
| "grad_norm": 0.4836495816707611, |
| "learning_rate": 9.994463222840748e-06, |
| "loss": 0.4899, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.11454239191465469, |
| "grad_norm": 0.5266380906105042, |
| "learning_rate": 9.993749646396286e-06, |
| "loss": 0.5333, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.11538461538461539, |
| "grad_norm": 0.45768430829048157, |
| "learning_rate": 9.992992860008893e-06, |
| "loss": 0.4913, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.11622683885457608, |
| "grad_norm": 0.5433592200279236, |
| "learning_rate": 9.99219287022689e-06, |
| "loss": 0.5155, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.11706906232453677, |
| "grad_norm": 0.4485207796096802, |
| "learning_rate": 9.991349683972435e-06, |
| "loss": 0.4762, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.11791128579449747, |
| "grad_norm": 0.5021021366119385, |
| "learning_rate": 9.990463308541452e-06, |
| "loss": 0.5166, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.11875350926445817, |
| "grad_norm": 0.5295252799987793, |
| "learning_rate": 9.989533751603578e-06, |
| "loss": 0.5191, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.11959573273441887, |
| "grad_norm": 0.5277601480484009, |
| "learning_rate": 9.988561021202083e-06, |
| "loss": 0.5067, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.12043795620437957, |
| "grad_norm": 0.48189201951026917, |
| "learning_rate": 9.987545125753818e-06, |
| "loss": 0.4935, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.12128017967434025, |
| "grad_norm": 0.578239917755127, |
| "learning_rate": 9.986486074049131e-06, |
| "loss": 0.5292, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.12212240314430095, |
| "grad_norm": 0.5277495384216309, |
| "learning_rate": 9.985383875251783e-06, |
| "loss": 0.5124, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.12296462661426165, |
| "grad_norm": 0.5657446384429932, |
| "learning_rate": 9.98423853889889e-06, |
| "loss": 0.5164, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.12380685008422235, |
| "grad_norm": 0.4794774353504181, |
| "learning_rate": 9.983050074900824e-06, |
| "loss": 0.5111, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.12464907355418305, |
| "grad_norm": 0.5772200226783752, |
| "learning_rate": 9.98181849354113e-06, |
| "loss": 0.5066, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.12549129702414374, |
| "grad_norm": 0.5424597859382629, |
| "learning_rate": 9.980543805476447e-06, |
| "loss": 0.5137, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.12633352049410443, |
| "grad_norm": 0.6257909536361694, |
| "learning_rate": 9.979226021736396e-06, |
| "loss": 0.5385, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.12717574396406514, |
| "grad_norm": 0.5045788288116455, |
| "learning_rate": 9.977865153723508e-06, |
| "loss": 0.4789, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.12801796743402583, |
| "grad_norm": 0.5379694700241089, |
| "learning_rate": 9.976461213213104e-06, |
| "loss": 0.5066, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.12886019090398654, |
| "grad_norm": 0.6253272891044617, |
| "learning_rate": 9.975014212353212e-06, |
| "loss": 0.5247, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.12970241437394722, |
| "grad_norm": 0.4655895233154297, |
| "learning_rate": 9.973524163664447e-06, |
| "loss": 0.5167, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.1305446378439079, |
| "grad_norm": 0.5709823369979858, |
| "learning_rate": 9.971991080039912e-06, |
| "loss": 0.5379, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.13138686131386862, |
| "grad_norm": 0.5366809964179993, |
| "learning_rate": 9.970414974745077e-06, |
| "loss": 0.5017, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.1322290847838293, |
| "grad_norm": 0.5675758123397827, |
| "learning_rate": 9.968795861417676e-06, |
| "loss": 0.5339, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.13307130825379002, |
| "grad_norm": 0.5337751507759094, |
| "learning_rate": 9.967133754067581e-06, |
| "loss": 0.5008, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.1339135317237507, |
| "grad_norm": 0.6179817318916321, |
| "learning_rate": 9.965428667076687e-06, |
| "loss": 0.5308, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.13475575519371139, |
| "grad_norm": 0.5175896286964417, |
| "learning_rate": 9.963680615198774e-06, |
| "loss": 0.51, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.1355979786636721, |
| "grad_norm": 0.5223482847213745, |
| "learning_rate": 9.961889613559396e-06, |
| "loss": 0.5324, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.13644020213363278, |
| "grad_norm": 0.6074829697608948, |
| "learning_rate": 9.960055677655743e-06, |
| "loss": 0.4971, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.1372824256035935, |
| "grad_norm": 0.4789965748786926, |
| "learning_rate": 9.958178823356503e-06, |
| "loss": 0.4979, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.13812464907355418, |
| "grad_norm": 0.521506130695343, |
| "learning_rate": 9.956259066901733e-06, |
| "loss": 0.5463, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.1389668725435149, |
| "grad_norm": 0.5657532215118408, |
| "learning_rate": 9.954296424902709e-06, |
| "loss": 0.4963, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.13980909601347558, |
| "grad_norm": 0.513095498085022, |
| "learning_rate": 9.95229091434179e-06, |
| "loss": 0.538, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.14065131948343626, |
| "grad_norm": 0.5661138892173767, |
| "learning_rate": 9.950242552572272e-06, |
| "loss": 0.508, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.14149354295339697, |
| "grad_norm": 0.5491284132003784, |
| "learning_rate": 9.948151357318228e-06, |
| "loss": 0.5308, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.14233576642335766, |
| "grad_norm": 0.5399675369262695, |
| "learning_rate": 9.946017346674362e-06, |
| "loss": 0.5111, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.14317798989331837, |
| "grad_norm": 0.5326489210128784, |
| "learning_rate": 9.943840539105853e-06, |
| "loss": 0.5101, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.14402021336327905, |
| "grad_norm": 0.47655802965164185, |
| "learning_rate": 9.941620953448195e-06, |
| "loss": 0.4939, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.14486243683323977, |
| "grad_norm": 0.5954977869987488, |
| "learning_rate": 9.939358608907026e-06, |
| "loss": 0.5266, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.14570466030320045, |
| "grad_norm": 0.48619142174720764, |
| "learning_rate": 9.937053525057977e-06, |
| "loss": 0.5084, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.14654688377316114, |
| "grad_norm": 0.5034385323524475, |
| "learning_rate": 9.934705721846487e-06, |
| "loss": 0.4797, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.14738910724312185, |
| "grad_norm": 0.4761101305484772, |
| "learning_rate": 9.932315219587641e-06, |
| "loss": 0.5146, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.14823133071308253, |
| "grad_norm": 0.5504338145256042, |
| "learning_rate": 9.92988203896599e-06, |
| "loss": 0.4966, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.14907355418304324, |
| "grad_norm": 0.5048502087593079, |
| "learning_rate": 9.927406201035368e-06, |
| "loss": 0.5066, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.14991577765300393, |
| "grad_norm": 0.5819829702377319, |
| "learning_rate": 9.924887727218724e-06, |
| "loss": 0.5488, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.1507580011229646, |
| "grad_norm": 0.524554967880249, |
| "learning_rate": 9.922326639307918e-06, |
| "loss": 0.4962, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.15160022459292533, |
| "grad_norm": 0.4543086886405945, |
| "learning_rate": 9.919722959463545e-06, |
| "loss": 0.5099, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.152442448062886, |
| "grad_norm": 0.5056329965591431, |
| "learning_rate": 9.917076710214739e-06, |
| "loss": 0.4723, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.15328467153284672, |
| "grad_norm": 0.5620024800300598, |
| "learning_rate": 9.914387914458983e-06, |
| "loss": 0.5111, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.1541268950028074, |
| "grad_norm": 0.475263386964798, |
| "learning_rate": 9.911656595461899e-06, |
| "loss": 0.5034, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.15496911847276812, |
| "grad_norm": 0.5066370964050293, |
| "learning_rate": 9.908882776857057e-06, |
| "loss": 0.5212, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.1558113419427288, |
| "grad_norm": 0.594321608543396, |
| "learning_rate": 9.906066482645774e-06, |
| "loss": 0.5361, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.1566535654126895, |
| "grad_norm": 0.5244519114494324, |
| "learning_rate": 9.903207737196892e-06, |
| "loss": 0.4945, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.1574957888826502, |
| "grad_norm": 0.5021108984947205, |
| "learning_rate": 9.900306565246579e-06, |
| "loss": 0.4724, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.15833801235261089, |
| "grad_norm": 0.4762863516807556, |
| "learning_rate": 9.89736299189811e-06, |
| "loss": 0.4969, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.1591802358225716, |
| "grad_norm": 0.605094313621521, |
| "learning_rate": 9.894377042621654e-06, |
| "loss": 0.5432, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.16002245929253228, |
| "grad_norm": 0.5847768187522888, |
| "learning_rate": 9.891348743254046e-06, |
| "loss": 0.4992, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.160864682762493, |
| "grad_norm": 0.5650973320007324, |
| "learning_rate": 9.888278119998573e-06, |
| "loss": 0.4782, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.16170690623245368, |
| "grad_norm": 0.5127384066581726, |
| "learning_rate": 9.885165199424738e-06, |
| "loss": 0.4917, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.16254912970241436, |
| "grad_norm": 0.4501786231994629, |
| "learning_rate": 9.882010008468038e-06, |
| "loss": 0.4922, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.16339135317237508, |
| "grad_norm": 0.5227730870246887, |
| "learning_rate": 9.878812574429722e-06, |
| "loss": 0.4937, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.16423357664233576, |
| "grad_norm": 0.4444067180156708, |
| "learning_rate": 9.875572924976568e-06, |
| "loss": 0.4955, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.16507580011229647, |
| "grad_norm": 0.5843701362609863, |
| "learning_rate": 9.87229108814063e-06, |
| "loss": 0.4759, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.16591802358225716, |
| "grad_norm": 0.5321926474571228, |
| "learning_rate": 9.868967092319003e-06, |
| "loss": 0.5134, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.16676024705221784, |
| "grad_norm": 0.5984789133071899, |
| "learning_rate": 9.865600966273576e-06, |
| "loss": 0.516, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.16760247052217855, |
| "grad_norm": 0.4739929437637329, |
| "learning_rate": 9.86219273913078e-06, |
| "loss": 0.4972, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.16844469399213924, |
| "grad_norm": 0.6019454002380371, |
| "learning_rate": 9.858742440381343e-06, |
| "loss": 0.5177, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.16928691746209995, |
| "grad_norm": 0.5428659915924072, |
| "learning_rate": 9.855250099880026e-06, |
| "loss": 0.5024, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.17012914093206064, |
| "grad_norm": 0.5898825526237488, |
| "learning_rate": 9.851715747845372e-06, |
| "loss": 0.4778, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.17097136440202135, |
| "grad_norm": 0.46226394176483154, |
| "learning_rate": 9.848139414859441e-06, |
| "loss": 0.4869, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.17181358787198203, |
| "grad_norm": 0.5695976614952087, |
| "learning_rate": 9.844521131867546e-06, |
| "loss": 0.5295, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.17265581134194272, |
| "grad_norm": 0.5284704566001892, |
| "learning_rate": 9.840860930177984e-06, |
| "loss": 0.4868, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.17349803481190343, |
| "grad_norm": 0.49998217821121216, |
| "learning_rate": 9.837158841461767e-06, |
| "loss": 0.4867, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.1743402582818641, |
| "grad_norm": 0.5210819840431213, |
| "learning_rate": 9.833414897752346e-06, |
| "loss": 0.4872, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.17518248175182483, |
| "grad_norm": 0.5861724019050598, |
| "learning_rate": 9.829629131445342e-06, |
| "loss": 0.5168, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.1760247052217855, |
| "grad_norm": 0.5185627937316895, |
| "learning_rate": 9.825801575298248e-06, |
| "loss": 0.4957, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.17686692869174622, |
| "grad_norm": 0.5859359502792358, |
| "learning_rate": 9.821932262430164e-06, |
| "loss": 0.5051, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.1777091521617069, |
| "grad_norm": 0.5268411040306091, |
| "learning_rate": 9.818021226321502e-06, |
| "loss": 0.5064, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.1785513756316676, |
| "grad_norm": 0.48425450921058655, |
| "learning_rate": 9.814068500813692e-06, |
| "loss": 0.4873, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.1793935991016283, |
| "grad_norm": 0.5642316937446594, |
| "learning_rate": 9.8100741201089e-06, |
| "loss": 0.5047, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.180235822571589, |
| "grad_norm": 0.5157051682472229, |
| "learning_rate": 9.806038118769724e-06, |
| "loss": 0.5235, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.1810780460415497, |
| "grad_norm": 0.5594722032546997, |
| "learning_rate": 9.801960531718898e-06, |
| "loss": 0.5114, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.18192026951151039, |
| "grad_norm": 0.5461316704750061, |
| "learning_rate": 9.797841394238987e-06, |
| "loss": 0.5059, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.1827624929814711, |
| "grad_norm": 0.4522736072540283, |
| "learning_rate": 9.793680741972084e-06, |
| "loss": 0.4978, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.18360471645143178, |
| "grad_norm": 0.5327430963516235, |
| "learning_rate": 9.789478610919508e-06, |
| "loss": 0.4797, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.18444693992139247, |
| "grad_norm": 0.4836098551750183, |
| "learning_rate": 9.785235037441473e-06, |
| "loss": 0.5018, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.18528916339135318, |
| "grad_norm": 0.4598475396633148, |
| "learning_rate": 9.780950058256802e-06, |
| "loss": 0.4982, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.18613138686131386, |
| "grad_norm": 0.5103244781494141, |
| "learning_rate": 9.77662371044258e-06, |
| "loss": 0.4927, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.18697361033127458, |
| "grad_norm": 0.47890129685401917, |
| "learning_rate": 9.77225603143385e-06, |
| "loss": 0.5054, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.18781583380123526, |
| "grad_norm": 0.5484627485275269, |
| "learning_rate": 9.767847059023292e-06, |
| "loss": 0.495, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.18865805727119594, |
| "grad_norm": 0.5115911364555359, |
| "learning_rate": 9.763396831360884e-06, |
| "loss": 0.4868, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.18950028074115666, |
| "grad_norm": 0.6416915655136108, |
| "learning_rate": 9.75890538695358e-06, |
| "loss": 0.4732, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.19034250421111734, |
| "grad_norm": 0.5456217527389526, |
| "learning_rate": 9.75437276466497e-06, |
| "loss": 0.5113, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.19118472768107805, |
| "grad_norm": 0.6080687046051025, |
| "learning_rate": 9.749799003714954e-06, |
| "loss": 0.5086, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.19202695115103874, |
| "grad_norm": 0.5577587485313416, |
| "learning_rate": 9.745184143679398e-06, |
| "loss": 0.4832, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.19286917462099945, |
| "grad_norm": 0.588985025882721, |
| "learning_rate": 9.74052822448978e-06, |
| "loss": 0.4893, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.19371139809096014, |
| "grad_norm": 0.6427119970321655, |
| "learning_rate": 9.735831286432869e-06, |
| "loss": 0.5022, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.19455362156092082, |
| "grad_norm": 0.6053304672241211, |
| "learning_rate": 9.731093370150349e-06, |
| "loss": 0.4925, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.19539584503088153, |
| "grad_norm": 0.6324568390846252, |
| "learning_rate": 9.72631451663849e-06, |
| "loss": 0.4693, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.19623806850084222, |
| "grad_norm": 0.6193096041679382, |
| "learning_rate": 9.721494767247779e-06, |
| "loss": 0.5334, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.19708029197080293, |
| "grad_norm": 0.6784276366233826, |
| "learning_rate": 9.71663416368257e-06, |
| "loss": 0.5018, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.1979225154407636, |
| "grad_norm": 0.5089966058731079, |
| "learning_rate": 9.71173274800072e-06, |
| "loss": 0.4836, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.19876473891072433, |
| "grad_norm": 0.6575846672058105, |
| "learning_rate": 9.70679056261322e-06, |
| "loss": 0.4707, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.199606962380685, |
| "grad_norm": 0.6496636867523193, |
| "learning_rate": 9.70180765028384e-06, |
| "loss": 0.5029, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.2004491858506457, |
| "grad_norm": 0.5280755758285522, |
| "learning_rate": 9.696784054128749e-06, |
| "loss": 0.4846, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.2012914093206064, |
| "grad_norm": 0.5946359038352966, |
| "learning_rate": 9.691719817616148e-06, |
| "loss": 0.4812, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.2021336327905671, |
| "grad_norm": 0.5537473559379578, |
| "learning_rate": 9.686614984565888e-06, |
| "loss": 0.4942, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.2029758562605278, |
| "grad_norm": 0.4930758476257324, |
| "learning_rate": 9.681469599149093e-06, |
| "loss": 0.4953, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.2038180797304885, |
| "grad_norm": 0.6003844738006592, |
| "learning_rate": 9.676283705887783e-06, |
| "loss": 0.4892, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.20466030320044917, |
| "grad_norm": 0.5657713413238525, |
| "learning_rate": 9.671057349654481e-06, |
| "loss": 0.5321, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.20550252667040989, |
| "grad_norm": 0.49356991052627563, |
| "learning_rate": 9.66579057567183e-06, |
| "loss": 0.4886, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.20634475014037057, |
| "grad_norm": 0.5777039527893066, |
| "learning_rate": 9.660483429512198e-06, |
| "loss": 0.4814, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.20718697361033128, |
| "grad_norm": 0.5081586837768555, |
| "learning_rate": 9.65513595709729e-06, |
| "loss": 0.501, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.20802919708029197, |
| "grad_norm": 0.5518150329589844, |
| "learning_rate": 9.649748204697741e-06, |
| "loss": 0.5143, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.20887142055025268, |
| "grad_norm": 0.4836235046386719, |
| "learning_rate": 9.644320218932723e-06, |
| "loss": 0.4852, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.20971364402021336, |
| "grad_norm": 0.5687448978424072, |
| "learning_rate": 9.63885204676954e-06, |
| "loss": 0.4866, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.21055586749017405, |
| "grad_norm": 0.5448442697525024, |
| "learning_rate": 9.63334373552322e-06, |
| "loss": 0.4646, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.21139809096013476, |
| "grad_norm": 0.5512805581092834, |
| "learning_rate": 9.627795332856107e-06, |
| "loss": 0.4892, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.21224031443009544, |
| "grad_norm": 0.64194256067276, |
| "learning_rate": 9.622206886777448e-06, |
| "loss": 0.4869, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.21308253790005616, |
| "grad_norm": 0.5275607109069824, |
| "learning_rate": 9.616578445642982e-06, |
| "loss": 0.4919, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.21392476137001684, |
| "grad_norm": 0.5536665320396423, |
| "learning_rate": 9.61091005815451e-06, |
| "loss": 0.519, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.21476698483997755, |
| "grad_norm": 0.5774405002593994, |
| "learning_rate": 9.605201773359485e-06, |
| "loss": 0.4935, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.21560920830993824, |
| "grad_norm": 0.5153358578681946, |
| "learning_rate": 9.599453640650585e-06, |
| "loss": 0.5004, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.21645143177989892, |
| "grad_norm": 0.5064471960067749, |
| "learning_rate": 9.59366570976528e-06, |
| "loss": 0.494, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.21729365524985964, |
| "grad_norm": 0.48846712708473206, |
| "learning_rate": 9.587838030785413e-06, |
| "loss": 0.4753, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.21813587871982032, |
| "grad_norm": 0.4824623465538025, |
| "learning_rate": 9.581970654136752e-06, |
| "loss": 0.4837, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.21897810218978103, |
| "grad_norm": 0.5995156764984131, |
| "learning_rate": 9.576063630588563e-06, |
| "loss": 0.48, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.21982032565974172, |
| "grad_norm": 0.4862680733203888, |
| "learning_rate": 9.570117011253173e-06, |
| "loss": 0.4709, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.2206625491297024, |
| "grad_norm": 0.65143883228302, |
| "learning_rate": 9.56413084758552e-06, |
| "loss": 0.4809, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.2215047725996631, |
| "grad_norm": 0.5107113122940063, |
| "learning_rate": 9.55810519138271e-06, |
| "loss": 0.5325, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.2223469960696238, |
| "grad_norm": 0.5462081432342529, |
| "learning_rate": 9.552040094783575e-06, |
| "loss": 0.498, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.2231892195395845, |
| "grad_norm": 0.5198892951011658, |
| "learning_rate": 9.545935610268213e-06, |
| "loss": 0.4971, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.2240314430095452, |
| "grad_norm": 0.5494446754455566, |
| "learning_rate": 9.53979179065754e-06, |
| "loss": 0.4989, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.2248736664795059, |
| "grad_norm": 0.4715658724308014, |
| "learning_rate": 9.533608689112827e-06, |
| "loss": 0.5003, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.2257158899494666, |
| "grad_norm": 0.5305702686309814, |
| "learning_rate": 9.527386359135254e-06, |
| "loss": 0.477, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.22655811341942728, |
| "grad_norm": 0.5072541236877441, |
| "learning_rate": 9.521124854565425e-06, |
| "loss": 0.4649, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.227400336889388, |
| "grad_norm": 0.4624420702457428, |
| "learning_rate": 9.514824229582922e-06, |
| "loss": 0.4823, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.22824256035934867, |
| "grad_norm": 0.5773640275001526, |
| "learning_rate": 9.508484538705823e-06, |
| "loss": 0.5053, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.22908478382930939, |
| "grad_norm": 0.5840558409690857, |
| "learning_rate": 9.50210583679024e-06, |
| "loss": 0.5006, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.22992700729927007, |
| "grad_norm": 0.5893429517745972, |
| "learning_rate": 9.495688179029838e-06, |
| "loss": 0.5046, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.23076923076923078, |
| "grad_norm": 0.46160465478897095, |
| "learning_rate": 9.48923162095536e-06, |
| "loss": 0.4527, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.23161145423919147, |
| "grad_norm": 0.5931537747383118, |
| "learning_rate": 9.482736218434144e-06, |
| "loss": 0.4901, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.23245367770915215, |
| "grad_norm": 0.5156863927841187, |
| "learning_rate": 9.476202027669644e-06, |
| "loss": 0.5004, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.23329590117911286, |
| "grad_norm": 0.5780513286590576, |
| "learning_rate": 9.469629105200937e-06, |
| "loss": 0.4915, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.23413812464907355, |
| "grad_norm": 0.6066457629203796, |
| "learning_rate": 9.463017507902245e-06, |
| "loss": 0.5244, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.23498034811903426, |
| "grad_norm": 0.5100727081298828, |
| "learning_rate": 9.45636729298243e-06, |
| "loss": 0.4975, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.23582257158899494, |
| "grad_norm": 0.5455880761146545, |
| "learning_rate": 9.449678517984503e-06, |
| "loss": 0.4704, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.23666479505895563, |
| "grad_norm": 0.5221508741378784, |
| "learning_rate": 9.442951240785135e-06, |
| "loss": 0.5014, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.23750701852891634, |
| "grad_norm": 0.5977054834365845, |
| "learning_rate": 9.436185519594145e-06, |
| "loss": 0.4822, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.23834924199887703, |
| "grad_norm": 0.501565158367157, |
| "learning_rate": 9.429381412954e-06, |
| "loss": 0.4991, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.23919146546883774, |
| "grad_norm": 0.6670308709144592, |
| "learning_rate": 9.422538979739307e-06, |
| "loss": 0.4719, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.24003368893879842, |
| "grad_norm": 0.626640796661377, |
| "learning_rate": 9.415658279156312e-06, |
| "loss": 0.5209, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.24087591240875914, |
| "grad_norm": 0.6013544201850891, |
| "learning_rate": 9.408739370742372e-06, |
| "loss": 0.4626, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.24171813587871982, |
| "grad_norm": 0.572346568107605, |
| "learning_rate": 9.401782314365458e-06, |
| "loss": 0.4987, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.2425603593486805, |
| "grad_norm": 0.5629340410232544, |
| "learning_rate": 9.39478717022362e-06, |
| "loss": 0.4988, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.24340258281864122, |
| "grad_norm": 0.5211260914802551, |
| "learning_rate": 9.387753998844482e-06, |
| "loss": 0.4614, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.2442448062886019, |
| "grad_norm": 0.5111398100852966, |
| "learning_rate": 9.380682861084703e-06, |
| "loss": 0.4653, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.2450870297585626, |
| "grad_norm": 0.4929146468639374, |
| "learning_rate": 9.37357381812946e-06, |
| "loss": 0.505, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.2459292532285233, |
| "grad_norm": 0.3973555862903595, |
| "learning_rate": 9.366426931491917e-06, |
| "loss": 0.4561, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.246771476698484, |
| "grad_norm": 0.5543331503868103, |
| "learning_rate": 9.359242263012693e-06, |
| "loss": 0.4888, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.2476137001684447, |
| "grad_norm": 0.46625789999961853, |
| "learning_rate": 9.352019874859326e-06, |
| "loss": 0.4976, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.24845592363840538, |
| "grad_norm": 0.4668426811695099, |
| "learning_rate": 9.344759829525734e-06, |
| "loss": 0.4638, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.2492981471083661, |
| "grad_norm": 0.501638650894165, |
| "learning_rate": 9.33746218983167e-06, |
| "loss": 0.4863, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.2501403705783268, |
| "grad_norm": 0.445046067237854, |
| "learning_rate": 9.330127018922195e-06, |
| "loss": 0.4802, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.2509825940482875, |
| "grad_norm": 0.4827885627746582, |
| "learning_rate": 9.32275438026711e-06, |
| "loss": 0.4833, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.2518248175182482, |
| "grad_norm": 0.46409258246421814, |
| "learning_rate": 9.315344337660422e-06, |
| "loss": 0.4949, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.25266704098820886, |
| "grad_norm": 0.46458157896995544, |
| "learning_rate": 9.307896955219787e-06, |
| "loss": 0.4828, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.25350926445816957, |
| "grad_norm": 0.452510267496109, |
| "learning_rate": 9.300412297385954e-06, |
| "loss": 0.5037, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.2543514879281303, |
| "grad_norm": 0.4101113975048065, |
| "learning_rate": 9.29289042892221e-06, |
| "loss": 0.4539, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.25519371139809094, |
| "grad_norm": 0.5279428362846375, |
| "learning_rate": 9.285331414913816e-06, |
| "loss": 0.5, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.25603593486805165, |
| "grad_norm": 0.42487549781799316, |
| "learning_rate": 9.277735320767449e-06, |
| "loss": 0.492, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.25687815833801236, |
| "grad_norm": 0.47415146231651306, |
| "learning_rate": 9.270102212210632e-06, |
| "loss": 0.5013, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.2577203818079731, |
| "grad_norm": 0.4520109295845032, |
| "learning_rate": 9.262432155291167e-06, |
| "loss": 0.4561, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.25856260527793373, |
| "grad_norm": 0.45563986897468567, |
| "learning_rate": 9.254725216376562e-06, |
| "loss": 0.4953, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.25940482874789444, |
| "grad_norm": 0.4406437277793884, |
| "learning_rate": 9.246981462153456e-06, |
| "loss": 0.4741, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.26024705221785516, |
| "grad_norm": 0.46866127848625183, |
| "learning_rate": 9.239200959627048e-06, |
| "loss": 0.4782, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.2610892756878158, |
| "grad_norm": 0.526932954788208, |
| "learning_rate": 9.231383776120512e-06, |
| "loss": 0.4758, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.2619314991577765, |
| "grad_norm": 0.4274905323982239, |
| "learning_rate": 9.223529979274411e-06, |
| "loss": 0.4831, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.26277372262773724, |
| "grad_norm": 0.5935413241386414, |
| "learning_rate": 9.215639637046121e-06, |
| "loss": 0.5021, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.2636159460976979, |
| "grad_norm": 0.48600393533706665, |
| "learning_rate": 9.207712817709237e-06, |
| "loss": 0.497, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.2644581695676586, |
| "grad_norm": 0.4954933524131775, |
| "learning_rate": 9.19974958985298e-06, |
| "loss": 0.5099, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.2653003930376193, |
| "grad_norm": 0.6196016073226929, |
| "learning_rate": 9.191750022381613e-06, |
| "loss": 0.4668, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.26614261650758003, |
| "grad_norm": 0.5300858020782471, |
| "learning_rate": 9.183714184513832e-06, |
| "loss": 0.5054, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.2669848399775407, |
| "grad_norm": 0.5432302951812744, |
| "learning_rate": 9.175642145782179e-06, |
| "loss": 0.4651, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.2678270634475014, |
| "grad_norm": 0.5008415579795837, |
| "learning_rate": 9.16753397603243e-06, |
| "loss": 0.4882, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.2686692869174621, |
| "grad_norm": 0.6660073399543762, |
| "learning_rate": 9.159389745423003e-06, |
| "loss": 0.5076, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.26951151038742277, |
| "grad_norm": 0.4663107097148895, |
| "learning_rate": 9.151209524424333e-06, |
| "loss": 0.4694, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.2703537338573835, |
| "grad_norm": 0.5836421251296997, |
| "learning_rate": 9.142993383818284e-06, |
| "loss": 0.489, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.2711959573273442, |
| "grad_norm": 0.4592418372631073, |
| "learning_rate": 9.134741394697517e-06, |
| "loss": 0.486, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.2720381807973049, |
| "grad_norm": 0.4770454466342926, |
| "learning_rate": 9.126453628464889e-06, |
| "loss": 0.4597, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.27288040426726556, |
| "grad_norm": 0.5071132183074951, |
| "learning_rate": 9.118130156832823e-06, |
| "loss": 0.4714, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.2737226277372263, |
| "grad_norm": 0.49201327562332153, |
| "learning_rate": 9.109771051822702e-06, |
| "loss": 0.4799, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.274564851207187, |
| "grad_norm": 0.49194371700286865, |
| "learning_rate": 9.10137638576423e-06, |
| "loss": 0.4754, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.27540707467714765, |
| "grad_norm": 0.46792376041412354, |
| "learning_rate": 9.09294623129482e-06, |
| "loss": 0.484, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.27624929814710836, |
| "grad_norm": 0.458034873008728, |
| "learning_rate": 9.084480661358954e-06, |
| "loss": 0.4731, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.27709152161706907, |
| "grad_norm": 0.4599028527736664, |
| "learning_rate": 9.07597974920756e-06, |
| "loss": 0.4656, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.2779337450870298, |
| "grad_norm": 0.5501565337181091, |
| "learning_rate": 9.067443568397378e-06, |
| "loss": 0.4943, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.27877596855699044, |
| "grad_norm": 0.45656952261924744, |
| "learning_rate": 9.058872192790314e-06, |
| "loss": 0.496, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.27961819202695115, |
| "grad_norm": 0.5053873062133789, |
| "learning_rate": 9.05026569655281e-06, |
| "loss": 0.4582, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.28046041549691186, |
| "grad_norm": 0.48897361755371094, |
| "learning_rate": 9.041624154155208e-06, |
| "loss": 0.5159, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.2813026389668725, |
| "grad_norm": 0.4899885058403015, |
| "learning_rate": 9.032947640371086e-06, |
| "loss": 0.4979, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.28214486243683323, |
| "grad_norm": 0.5201389789581299, |
| "learning_rate": 9.02423623027663e-06, |
| "loss": 0.4887, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.28298708590679394, |
| "grad_norm": 0.6644883751869202, |
| "learning_rate": 9.01548999924997e-06, |
| "loss": 0.5219, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.28382930937675466, |
| "grad_norm": 0.4967377781867981, |
| "learning_rate": 9.006709022970547e-06, |
| "loss": 0.4924, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.2846715328467153, |
| "grad_norm": 0.5921658873558044, |
| "learning_rate": 8.997893377418432e-06, |
| "loss": 0.4556, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.285513756316676, |
| "grad_norm": 0.5479557514190674, |
| "learning_rate": 8.98904313887369e-06, |
| "loss": 0.4698, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.28635597978663674, |
| "grad_norm": 0.5691632032394409, |
| "learning_rate": 8.980158383915714e-06, |
| "loss": 0.5025, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.2871982032565974, |
| "grad_norm": 0.5059176087379456, |
| "learning_rate": 8.971239189422555e-06, |
| "loss": 0.4808, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.2880404267265581, |
| "grad_norm": 0.6245074272155762, |
| "learning_rate": 8.962285632570266e-06, |
| "loss": 0.4723, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.2888826501965188, |
| "grad_norm": 0.6249215006828308, |
| "learning_rate": 8.953297790832231e-06, |
| "loss": 0.506, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.28972487366647953, |
| "grad_norm": 0.4356972277164459, |
| "learning_rate": 8.944275741978495e-06, |
| "loss": 0.4961, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.2905670971364402, |
| "grad_norm": 0.6184642910957336, |
| "learning_rate": 8.935219564075087e-06, |
| "loss": 0.4913, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.2914093206064009, |
| "grad_norm": 0.48030075430870056, |
| "learning_rate": 8.92612933548335e-06, |
| "loss": 0.4517, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.2922515440763616, |
| "grad_norm": 0.4469691216945648, |
| "learning_rate": 8.917005134859263e-06, |
| "loss": 0.4582, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.29309376754632227, |
| "grad_norm": 0.4990270733833313, |
| "learning_rate": 8.907847041152757e-06, |
| "loss": 0.4647, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.293935991016283, |
| "grad_norm": 0.46036815643310547, |
| "learning_rate": 8.89865513360703e-06, |
| "loss": 0.4978, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.2947782144862437, |
| "grad_norm": 0.5019863843917847, |
| "learning_rate": 8.889429491757872e-06, |
| "loss": 0.4791, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.2956204379562044, |
| "grad_norm": 0.5118913054466248, |
| "learning_rate": 8.88017019543296e-06, |
| "loss": 0.4541, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.29646266142616506, |
| "grad_norm": 0.4879518151283264, |
| "learning_rate": 8.870877324751186e-06, |
| "loss": 0.4567, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.2973048848961258, |
| "grad_norm": 0.5687685012817383, |
| "learning_rate": 8.861550960121946e-06, |
| "loss": 0.5065, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.2981471083660865, |
| "grad_norm": 0.42508652806282043, |
| "learning_rate": 8.852191182244456e-06, |
| "loss": 0.4966, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.29898933183604715, |
| "grad_norm": 0.49280911684036255, |
| "learning_rate": 8.842798072107055e-06, |
| "loss": 0.4654, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.29983155530600786, |
| "grad_norm": 0.4535382091999054, |
| "learning_rate": 8.833371710986493e-06, |
| "loss": 0.4796, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.30067377877596857, |
| "grad_norm": 0.4359462261199951, |
| "learning_rate": 8.823912180447237e-06, |
| "loss": 0.4937, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.3015160022459292, |
| "grad_norm": 0.44173017144203186, |
| "learning_rate": 8.81441956234076e-06, |
| "loss": 0.4827, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.30235822571588994, |
| "grad_norm": 0.5167005658149719, |
| "learning_rate": 8.804893938804839e-06, |
| "loss": 0.501, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.30320044918585065, |
| "grad_norm": 0.46836942434310913, |
| "learning_rate": 8.795335392262841e-06, |
| "loss": 0.4475, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.30404267265581136, |
| "grad_norm": 0.5072566866874695, |
| "learning_rate": 8.785744005423003e-06, |
| "loss": 0.4709, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.304884896125772, |
| "grad_norm": 0.533694326877594, |
| "learning_rate": 8.77611986127773e-06, |
| "loss": 0.4942, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.30572711959573273, |
| "grad_norm": 0.4281693398952484, |
| "learning_rate": 8.766463043102864e-06, |
| "loss": 0.477, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.30656934306569344, |
| "grad_norm": 0.4821215867996216, |
| "learning_rate": 8.756773634456975e-06, |
| "loss": 0.4886, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.3074115665356541, |
| "grad_norm": 0.4269597828388214, |
| "learning_rate": 8.747051719180626e-06, |
| "loss": 0.4878, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.3082537900056148, |
| "grad_norm": 0.47855547070503235, |
| "learning_rate": 8.737297381395657e-06, |
| "loss": 0.4844, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.3090960134755755, |
| "grad_norm": 0.46225982904434204, |
| "learning_rate": 8.727510705504453e-06, |
| "loss": 0.4724, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.30993823694553624, |
| "grad_norm": 0.46247169375419617, |
| "learning_rate": 8.717691776189214e-06, |
| "loss": 0.4732, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.3107804604154969, |
| "grad_norm": 0.5116891860961914, |
| "learning_rate": 8.707840678411223e-06, |
| "loss": 0.4744, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.3116226838854576, |
| "grad_norm": 0.4474673867225647, |
| "learning_rate": 8.69795749741011e-06, |
| "loss": 0.4783, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.3124649073554183, |
| "grad_norm": 0.46301230788230896, |
| "learning_rate": 8.688042318703111e-06, |
| "loss": 0.4609, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.313307130825379, |
| "grad_norm": 0.5067939758300781, |
| "learning_rate": 8.678095228084343e-06, |
| "loss": 0.4971, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.3141493542953397, |
| "grad_norm": 0.5659363865852356, |
| "learning_rate": 8.66811631162404e-06, |
| "loss": 0.4724, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.3149915777653004, |
| "grad_norm": 0.42198798060417175, |
| "learning_rate": 8.65810565566782e-06, |
| "loss": 0.4802, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.3158338012352611, |
| "grad_norm": 0.546751081943512, |
| "learning_rate": 8.648063346835943e-06, |
| "loss": 0.4355, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.31667602470522177, |
| "grad_norm": 0.5956584215164185, |
| "learning_rate": 8.637989472022548e-06, |
| "loss": 0.4957, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.3175182481751825, |
| "grad_norm": 0.47946029901504517, |
| "learning_rate": 8.627884118394913e-06, |
| "loss": 0.4776, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.3183604716451432, |
| "grad_norm": 0.5487171411514282, |
| "learning_rate": 8.617747373392697e-06, |
| "loss": 0.4857, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.31920269511510385, |
| "grad_norm": 0.6688556671142578, |
| "learning_rate": 8.607579324727175e-06, |
| "loss": 0.4831, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.32004491858506456, |
| "grad_norm": 0.4539017081260681, |
| "learning_rate": 8.597380060380493e-06, |
| "loss": 0.4841, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.3208871420550253, |
| "grad_norm": 0.600853681564331, |
| "learning_rate": 8.5871496686049e-06, |
| "loss": 0.5023, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.321729365524986, |
| "grad_norm": 0.6517627835273743, |
| "learning_rate": 8.576888237921983e-06, |
| "loss": 0.4938, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.32257158899494665, |
| "grad_norm": 0.42329713702201843, |
| "learning_rate": 8.566595857121902e-06, |
| "loss": 0.4849, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.32341381246490736, |
| "grad_norm": 0.5802134275436401, |
| "learning_rate": 8.556272615262623e-06, |
| "loss": 0.5134, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.32425603593486807, |
| "grad_norm": 0.5750747323036194, |
| "learning_rate": 8.545918601669147e-06, |
| "loss": 0.4516, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.3250982594048287, |
| "grad_norm": 0.42843982577323914, |
| "learning_rate": 8.535533905932739e-06, |
| "loss": 0.486, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.32594048287478944, |
| "grad_norm": 0.4920358955860138, |
| "learning_rate": 8.525118617910144e-06, |
| "loss": 0.4864, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.32678270634475015, |
| "grad_norm": 0.5304857492446899, |
| "learning_rate": 8.514672827722824e-06, |
| "loss": 0.5064, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.32762492981471086, |
| "grad_norm": 0.5001351237297058, |
| "learning_rate": 8.504196625756166e-06, |
| "loss": 0.4711, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.3284671532846715, |
| "grad_norm": 0.46036016941070557, |
| "learning_rate": 8.493690102658703e-06, |
| "loss": 0.4413, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.32930937675463223, |
| "grad_norm": 0.5410290360450745, |
| "learning_rate": 8.483153349341336e-06, |
| "loss": 0.4736, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.33015160022459294, |
| "grad_norm": 0.4987352788448334, |
| "learning_rate": 8.472586456976534e-06, |
| "loss": 0.5136, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.3309938236945536, |
| "grad_norm": 0.5358459949493408, |
| "learning_rate": 8.461989516997565e-06, |
| "loss": 0.4932, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.3318360471645143, |
| "grad_norm": 0.5530543923377991, |
| "learning_rate": 8.45136262109768e-06, |
| "loss": 0.4795, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.332678270634475, |
| "grad_norm": 0.5860891938209534, |
| "learning_rate": 8.440705861229344e-06, |
| "loss": 0.4831, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.3335204941044357, |
| "grad_norm": 0.5428664684295654, |
| "learning_rate": 8.430019329603423e-06, |
| "loss": 0.4794, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.3343627175743964, |
| "grad_norm": 0.6753725409507751, |
| "learning_rate": 8.41930311868839e-06, |
| "loss": 0.4751, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.3352049410443571, |
| "grad_norm": 0.4751342535018921, |
| "learning_rate": 8.408557321209534e-06, |
| "loss": 0.5061, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.3360471645143178, |
| "grad_norm": 0.6972861289978027, |
| "learning_rate": 8.397782030148147e-06, |
| "loss": 0.5005, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.3368893879842785, |
| "grad_norm": 0.5487985014915466, |
| "learning_rate": 8.386977338740724e-06, |
| "loss": 0.4795, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.3377316114542392, |
| "grad_norm": 0.43178796768188477, |
| "learning_rate": 8.376143340478153e-06, |
| "loss": 0.4353, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.3385738349241999, |
| "grad_norm": 0.5557446479797363, |
| "learning_rate": 8.365280129104912e-06, |
| "loss": 0.5051, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.33941605839416056, |
| "grad_norm": 0.5225008726119995, |
| "learning_rate": 8.354387798618254e-06, |
| "loss": 0.4792, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.34025828186412127, |
| "grad_norm": 0.5050113797187805, |
| "learning_rate": 8.34346644326739e-06, |
| "loss": 0.4454, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.341100505334082, |
| "grad_norm": 0.45230233669281006, |
| "learning_rate": 8.332516157552684e-06, |
| "loss": 0.4878, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.3419427288040427, |
| "grad_norm": 0.49938130378723145, |
| "learning_rate": 8.321537036224822e-06, |
| "loss": 0.4974, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.34278495227400335, |
| "grad_norm": 0.4604662358760834, |
| "learning_rate": 8.310529174284004e-06, |
| "loss": 0.4912, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.34362717574396406, |
| "grad_norm": 0.42852020263671875, |
| "learning_rate": 8.299492666979114e-06, |
| "loss": 0.4733, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.3444693992139248, |
| "grad_norm": 0.5300406813621521, |
| "learning_rate": 8.288427609806899e-06, |
| "loss": 0.4936, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.34531162268388543, |
| "grad_norm": 0.46928277611732483, |
| "learning_rate": 8.277334098511147e-06, |
| "loss": 0.4815, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.34615384615384615, |
| "grad_norm": 0.46530747413635254, |
| "learning_rate": 8.266212229081846e-06, |
| "loss": 0.4928, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.34699606962380686, |
| "grad_norm": 0.44099971652030945, |
| "learning_rate": 8.255062097754371e-06, |
| "loss": 0.4854, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.34783829309376757, |
| "grad_norm": 0.4617399275302887, |
| "learning_rate": 8.243883801008632e-06, |
| "loss": 0.4707, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.3486805165637282, |
| "grad_norm": 0.5029078125953674, |
| "learning_rate": 8.232677435568252e-06, |
| "loss": 0.4753, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.34952274003368894, |
| "grad_norm": 0.46492263674736023, |
| "learning_rate": 8.221443098399733e-06, |
| "loss": 0.4455, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.35036496350364965, |
| "grad_norm": 0.4844779968261719, |
| "learning_rate": 8.210180886711603e-06, |
| "loss": 0.4819, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.3512071869736103, |
| "grad_norm": 0.4448031783103943, |
| "learning_rate": 8.198890897953586e-06, |
| "loss": 0.4637, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.352049410443571, |
| "grad_norm": 0.46674251556396484, |
| "learning_rate": 8.187573229815757e-06, |
| "loss": 0.4806, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.35289163391353173, |
| "grad_norm": 0.4910249412059784, |
| "learning_rate": 8.176227980227693e-06, |
| "loss": 0.4937, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.35373385738349244, |
| "grad_norm": 0.4387272894382477, |
| "learning_rate": 8.164855247357628e-06, |
| "loss": 0.4963, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.3545760808534531, |
| "grad_norm": 0.46658802032470703, |
| "learning_rate": 8.153455129611605e-06, |
| "loss": 0.484, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.3554183043234138, |
| "grad_norm": 0.41567468643188477, |
| "learning_rate": 8.142027725632622e-06, |
| "loss": 0.4565, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.3562605277933745, |
| "grad_norm": 0.5063756704330444, |
| "learning_rate": 8.130573134299782e-06, |
| "loss": 0.4982, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.3571027512633352, |
| "grad_norm": 0.48313266038894653, |
| "learning_rate": 8.119091454727427e-06, |
| "loss": 0.5071, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.3579449747332959, |
| "grad_norm": 0.40613314509391785, |
| "learning_rate": 8.107582786264299e-06, |
| "loss": 0.4645, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.3587871982032566, |
| "grad_norm": 0.5263804793357849, |
| "learning_rate": 8.09604722849266e-06, |
| "loss": 0.483, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.3596294216732173, |
| "grad_norm": 0.41154658794403076, |
| "learning_rate": 8.084484881227449e-06, |
| "loss": 0.4742, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.360471645143178, |
| "grad_norm": 0.45373862981796265, |
| "learning_rate": 8.072895844515398e-06, |
| "loss": 0.4975, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.3613138686131387, |
| "grad_norm": 0.5073238611221313, |
| "learning_rate": 8.061280218634192e-06, |
| "loss": 0.4869, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.3621560920830994, |
| "grad_norm": 0.44339868426322937, |
| "learning_rate": 8.049638104091575e-06, |
| "loss": 0.4695, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.36299831555306006, |
| "grad_norm": 0.4490543603897095, |
| "learning_rate": 8.037969601624495e-06, |
| "loss": 0.4818, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.36384053902302077, |
| "grad_norm": 0.4600644111633301, |
| "learning_rate": 8.026274812198235e-06, |
| "loss": 0.4678, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.3646827624929815, |
| "grad_norm": 0.553426206111908, |
| "learning_rate": 8.014553837005527e-06, |
| "loss": 0.4811, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.3655249859629422, |
| "grad_norm": 0.4366907477378845, |
| "learning_rate": 8.002806777465685e-06, |
| "loss": 0.4924, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.36636720943290285, |
| "grad_norm": 0.5031819343566895, |
| "learning_rate": 7.99103373522373e-06, |
| "loss": 0.4777, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.36720943290286356, |
| "grad_norm": 0.4188793897628784, |
| "learning_rate": 7.9792348121495e-06, |
| "loss": 0.4866, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.3680516563728243, |
| "grad_norm": 0.5696221590042114, |
| "learning_rate": 7.967410110336782e-06, |
| "loss": 0.4991, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.36889387984278493, |
| "grad_norm": 0.49628570675849915, |
| "learning_rate": 7.955559732102414e-06, |
| "loss": 0.4774, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.36973610331274565, |
| "grad_norm": 0.4882410168647766, |
| "learning_rate": 7.943683779985412e-06, |
| "loss": 0.449, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.37057832678270636, |
| "grad_norm": 0.48540210723876953, |
| "learning_rate": 7.931782356746076e-06, |
| "loss": 0.4842, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.371420550252667, |
| "grad_norm": 0.5185336470603943, |
| "learning_rate": 7.919855565365102e-06, |
| "loss": 0.4869, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.3722627737226277, |
| "grad_norm": 0.5434253811836243, |
| "learning_rate": 7.907903509042696e-06, |
| "loss": 0.4941, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.37310499719258844, |
| "grad_norm": 0.5163787007331848, |
| "learning_rate": 7.895926291197667e-06, |
| "loss": 0.4892, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.37394722066254915, |
| "grad_norm": 0.44851982593536377, |
| "learning_rate": 7.883924015466554e-06, |
| "loss": 0.4845, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.3747894441325098, |
| "grad_norm": 0.49744945764541626, |
| "learning_rate": 7.871896785702707e-06, |
| "loss": 0.4848, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.3756316676024705, |
| "grad_norm": 0.40333092212677, |
| "learning_rate": 7.859844705975405e-06, |
| "loss": 0.4714, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.37647389107243123, |
| "grad_norm": 0.4801202714443207, |
| "learning_rate": 7.847767880568944e-06, |
| "loss": 0.4677, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.3773161145423919, |
| "grad_norm": 0.469712495803833, |
| "learning_rate": 7.835666413981744e-06, |
| "loss": 0.4804, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.3781583380123526, |
| "grad_norm": 0.5278536677360535, |
| "learning_rate": 7.823540410925434e-06, |
| "loss": 0.4955, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.3790005614823133, |
| "grad_norm": 0.5002195239067078, |
| "learning_rate": 7.811389976323963e-06, |
| "loss": 0.5015, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.379842784952274, |
| "grad_norm": 0.5122157335281372, |
| "learning_rate": 7.799215215312667e-06, |
| "loss": 0.4618, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.3806850084222347, |
| "grad_norm": 0.4218645393848419, |
| "learning_rate": 7.787016233237387e-06, |
| "loss": 0.4651, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.3815272318921954, |
| "grad_norm": 0.4596264362335205, |
| "learning_rate": 7.774793135653537e-06, |
| "loss": 0.4818, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.3823694553621561, |
| "grad_norm": 0.5322352051734924, |
| "learning_rate": 7.7625460283252e-06, |
| "loss": 0.477, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.38321167883211676, |
| "grad_norm": 0.4300509989261627, |
| "learning_rate": 7.750275017224208e-06, |
| "loss": 0.4693, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.3840539023020775, |
| "grad_norm": 0.4399045407772064, |
| "learning_rate": 7.737980208529232e-06, |
| "loss": 0.4498, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.3848961257720382, |
| "grad_norm": 0.49768343567848206, |
| "learning_rate": 7.725661708624855e-06, |
| "loss": 0.4487, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.3857383492419989, |
| "grad_norm": 0.47401291131973267, |
| "learning_rate": 7.713319624100657e-06, |
| "loss": 0.4897, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.38658057271195956, |
| "grad_norm": 0.4233331084251404, |
| "learning_rate": 7.700954061750295e-06, |
| "loss": 0.4838, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.38742279618192027, |
| "grad_norm": 0.45259544253349304, |
| "learning_rate": 7.688565128570564e-06, |
| "loss": 0.4935, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.388265019651881, |
| "grad_norm": 0.5088585615158081, |
| "learning_rate": 7.676152931760496e-06, |
| "loss": 0.4579, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.38910724312184164, |
| "grad_norm": 0.3829163908958435, |
| "learning_rate": 7.663717578720412e-06, |
| "loss": 0.4354, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.38994946659180235, |
| "grad_norm": 0.46096691489219666, |
| "learning_rate": 7.651259177050996e-06, |
| "loss": 0.4781, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.39079169006176306, |
| "grad_norm": 0.4710644781589508, |
| "learning_rate": 7.638777834552372e-06, |
| "loss": 0.4683, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.3916339135317238, |
| "grad_norm": 0.4167614281177521, |
| "learning_rate": 7.626273659223166e-06, |
| "loss": 0.476, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.39247613700168443, |
| "grad_norm": 0.46090462803840637, |
| "learning_rate": 7.61374675925957e-06, |
| "loss": 0.4579, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.39331836047164515, |
| "grad_norm": 0.4632381498813629, |
| "learning_rate": 7.601197243054411e-06, |
| "loss": 0.484, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.39416058394160586, |
| "grad_norm": 0.3899933993816376, |
| "learning_rate": 7.588625219196208e-06, |
| "loss": 0.4761, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.3950028074115665, |
| "grad_norm": 0.424213171005249, |
| "learning_rate": 7.576030796468233e-06, |
| "loss": 0.4765, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.3958450308815272, |
| "grad_norm": 0.4018155336380005, |
| "learning_rate": 7.563414083847573e-06, |
| "loss": 0.4628, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.39668725435148794, |
| "grad_norm": 0.41502058506011963, |
| "learning_rate": 7.5507751905041885e-06, |
| "loss": 0.4882, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.39752947782144865, |
| "grad_norm": 0.39122501015663147, |
| "learning_rate": 7.538114225799955e-06, |
| "loss": 0.476, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.3983717012914093, |
| "grad_norm": 0.4315950274467468, |
| "learning_rate": 7.525431299287737e-06, |
| "loss": 0.4919, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.39921392476137, |
| "grad_norm": 0.43616175651550293, |
| "learning_rate": 7.512726520710429e-06, |
| "loss": 0.4607, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.40005614823133073, |
| "grad_norm": 0.4297219216823578, |
| "learning_rate": 7.500000000000001e-06, |
| "loss": 0.4841, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.4008983717012914, |
| "grad_norm": 0.4343617260456085, |
| "learning_rate": 7.4872518472765594e-06, |
| "loss": 0.4981, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.4017405951712521, |
| "grad_norm": 0.4350733757019043, |
| "learning_rate": 7.474482172847391e-06, |
| "loss": 0.4766, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.4025828186412128, |
| "grad_norm": 0.42052868008613586, |
| "learning_rate": 7.461691087205993e-06, |
| "loss": 0.4421, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.40342504211117347, |
| "grad_norm": 0.43227866291999817, |
| "learning_rate": 7.4488787010311425e-06, |
| "loss": 0.4643, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.4042672655811342, |
| "grad_norm": 0.4426368772983551, |
| "learning_rate": 7.436045125185923e-06, |
| "loss": 0.4906, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.4051094890510949, |
| "grad_norm": 0.436074823141098, |
| "learning_rate": 7.423190470716761e-06, |
| "loss": 0.4437, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.4059517125210556, |
| "grad_norm": 0.40371957421302795, |
| "learning_rate": 7.4103148488524824e-06, |
| "loss": 0.4843, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.40679393599101626, |
| "grad_norm": 0.47098270058631897, |
| "learning_rate": 7.3974183710033334e-06, |
| "loss": 0.4811, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.407636159460977, |
| "grad_norm": 0.4514760375022888, |
| "learning_rate": 7.384501148760024e-06, |
| "loss": 0.4919, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.4084783829309377, |
| "grad_norm": 0.4151240289211273, |
| "learning_rate": 7.371563293892761e-06, |
| "loss": 0.493, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.40932060640089835, |
| "grad_norm": 0.4454137980937958, |
| "learning_rate": 7.3586049183502875e-06, |
| "loss": 0.4652, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.41016282987085906, |
| "grad_norm": 0.4351678490638733, |
| "learning_rate": 7.345626134258897e-06, |
| "loss": 0.4761, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.41100505334081977, |
| "grad_norm": 0.4036249816417694, |
| "learning_rate": 7.3326270539214826e-06, |
| "loss": 0.4785, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.4118472768107805, |
| "grad_norm": 0.46877068281173706, |
| "learning_rate": 7.319607789816555e-06, |
| "loss": 0.4532, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.41268950028074114, |
| "grad_norm": 0.41278883814811707, |
| "learning_rate": 7.306568454597269e-06, |
| "loss": 0.4644, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.41353172375070185, |
| "grad_norm": 0.4356534779071808, |
| "learning_rate": 7.293509161090453e-06, |
| "loss": 0.488, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.41437394722066256, |
| "grad_norm": 0.45111823081970215, |
| "learning_rate": 7.28043002229563e-06, |
| "loss": 0.4584, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.4152161706906232, |
| "grad_norm": 0.510417103767395, |
| "learning_rate": 7.2673311513840395e-06, |
| "loss": 0.4709, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.41605839416058393, |
| "grad_norm": 0.46581393480300903, |
| "learning_rate": 7.2542126616976596e-06, |
| "loss": 0.4468, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.41690061763054465, |
| "grad_norm": 0.5376666784286499, |
| "learning_rate": 7.241074666748228e-06, |
| "loss": 0.4703, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.41774284110050536, |
| "grad_norm": 0.47817128896713257, |
| "learning_rate": 7.227917280216254e-06, |
| "loss": 0.5045, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.418585064570466, |
| "grad_norm": 0.5402735471725464, |
| "learning_rate": 7.214740615950041e-06, |
| "loss": 0.4845, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.4194272880404267, |
| "grad_norm": 0.4992601275444031, |
| "learning_rate": 7.201544787964698e-06, |
| "loss": 0.4657, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.42026951151038744, |
| "grad_norm": 0.45684367418289185, |
| "learning_rate": 7.188329910441154e-06, |
| "loss": 0.4952, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.4211117349803481, |
| "grad_norm": 0.519280731678009, |
| "learning_rate": 7.175096097725169e-06, |
| "loss": 0.4763, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.4219539584503088, |
| "grad_norm": 0.4592819809913635, |
| "learning_rate": 7.161843464326349e-06, |
| "loss": 0.484, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.4227961819202695, |
| "grad_norm": 0.4927172064781189, |
| "learning_rate": 7.148572124917148e-06, |
| "loss": 0.465, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.42363840539023023, |
| "grad_norm": 0.4536415934562683, |
| "learning_rate": 7.135282194331881e-06, |
| "loss": 0.4831, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.4244806288601909, |
| "grad_norm": 0.5175096392631531, |
| "learning_rate": 7.121973787565727e-06, |
| "loss": 0.4689, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.4253228523301516, |
| "grad_norm": 0.48967283964157104, |
| "learning_rate": 7.1086470197737405e-06, |
| "loss": 0.4808, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.4261650758001123, |
| "grad_norm": 0.4530904293060303, |
| "learning_rate": 7.095302006269842e-06, |
| "loss": 0.4601, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.42700729927007297, |
| "grad_norm": 0.48659196496009827, |
| "learning_rate": 7.0819388625258385e-06, |
| "loss": 0.4942, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.4278495227400337, |
| "grad_norm": 0.5468390583992004, |
| "learning_rate": 7.06855770417041e-06, |
| "loss": 0.4671, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.4286917462099944, |
| "grad_norm": 0.465054452419281, |
| "learning_rate": 7.05515864698811e-06, |
| "loss": 0.4608, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.4295339696799551, |
| "grad_norm": 0.4302669167518616, |
| "learning_rate": 7.041741806918372e-06, |
| "loss": 0.4582, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.43037619314991576, |
| "grad_norm": 0.4395880699157715, |
| "learning_rate": 7.028307300054499e-06, |
| "loss": 0.4585, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.4312184166198765, |
| "grad_norm": 0.4656515419483185, |
| "learning_rate": 7.014855242642662e-06, |
| "loss": 0.4557, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.4320606400898372, |
| "grad_norm": 0.4596274793148041, |
| "learning_rate": 7.0013857510808934e-06, |
| "loss": 0.483, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.43290286355979785, |
| "grad_norm": 0.5049313902854919, |
| "learning_rate": 6.987898941918082e-06, |
| "loss": 0.4748, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.43374508702975856, |
| "grad_norm": 0.4165242910385132, |
| "learning_rate": 6.974394931852957e-06, |
| "loss": 0.4917, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.43458731049971927, |
| "grad_norm": 0.402126282453537, |
| "learning_rate": 6.960873837733089e-06, |
| "loss": 0.4691, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.43542953396968, |
| "grad_norm": 0.444795697927475, |
| "learning_rate": 6.94733577655387e-06, |
| "loss": 0.4783, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.43627175743964064, |
| "grad_norm": 0.4488651752471924, |
| "learning_rate": 6.933780865457508e-06, |
| "loss": 0.4687, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.43711398090960135, |
| "grad_norm": 0.44755545258522034, |
| "learning_rate": 6.920209221732007e-06, |
| "loss": 0.4759, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.43795620437956206, |
| "grad_norm": 0.4631388187408447, |
| "learning_rate": 6.90662096281016e-06, |
| "loss": 0.4559, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.4387984278495227, |
| "grad_norm": 0.47367194294929504, |
| "learning_rate": 6.893016206268518e-06, |
| "loss": 0.4921, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.43964065131948343, |
| "grad_norm": 0.4509705603122711, |
| "learning_rate": 6.879395069826394e-06, |
| "loss": 0.488, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.44048287478944415, |
| "grad_norm": 0.4384632408618927, |
| "learning_rate": 6.865757671344827e-06, |
| "loss": 0.4634, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.4413250982594048, |
| "grad_norm": 0.43855151534080505, |
| "learning_rate": 6.85210412882557e-06, |
| "loss": 0.4577, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.4421673217293655, |
| "grad_norm": 0.4600560963153839, |
| "learning_rate": 6.838434560410064e-06, |
| "loss": 0.4548, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.4430095451993262, |
| "grad_norm": 0.4522815942764282, |
| "learning_rate": 6.824749084378428e-06, |
| "loss": 0.4455, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.44385176866928694, |
| "grad_norm": 0.44057661294937134, |
| "learning_rate": 6.811047819148413e-06, |
| "loss": 0.462, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.4446939921392476, |
| "grad_norm": 0.40139660239219666, |
| "learning_rate": 6.7973308832744035e-06, |
| "loss": 0.4661, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.4455362156092083, |
| "grad_norm": 0.48533400893211365, |
| "learning_rate": 6.783598395446371e-06, |
| "loss": 0.4767, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.446378439079169, |
| "grad_norm": 0.39077308773994446, |
| "learning_rate": 6.769850474488859e-06, |
| "loss": 0.4465, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.4472206625491297, |
| "grad_norm": 0.4498750865459442, |
| "learning_rate": 6.756087239359948e-06, |
| "loss": 0.467, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.4480628860190904, |
| "grad_norm": 0.4155936539173126, |
| "learning_rate": 6.742308809150232e-06, |
| "loss": 0.4667, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.4489051094890511, |
| "grad_norm": 0.44090476632118225, |
| "learning_rate": 6.728515303081782e-06, |
| "loss": 0.4641, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.4497473329590118, |
| "grad_norm": 0.43852943181991577, |
| "learning_rate": 6.714706840507122e-06, |
| "loss": 0.4667, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.45058955642897247, |
| "grad_norm": 0.42327818274497986, |
| "learning_rate": 6.700883540908185e-06, |
| "loss": 0.4574, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.4514317798989332, |
| "grad_norm": 0.4370694160461426, |
| "learning_rate": 6.687045523895292e-06, |
| "loss": 0.4699, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.4522740033688939, |
| "grad_norm": 0.45204928517341614, |
| "learning_rate": 6.673192909206109e-06, |
| "loss": 0.483, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.45311622683885455, |
| "grad_norm": 0.4360177218914032, |
| "learning_rate": 6.6593258167046115e-06, |
| "loss": 0.5015, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.45395845030881526, |
| "grad_norm": 0.5273513793945312, |
| "learning_rate": 6.64544436638005e-06, |
| "loss": 0.4976, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.454800673778776, |
| "grad_norm": 0.45911315083503723, |
| "learning_rate": 6.63154867834591e-06, |
| "loss": 0.4805, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.4556428972487367, |
| "grad_norm": 0.46684151887893677, |
| "learning_rate": 6.617638872838874e-06, |
| "loss": 0.4719, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.45648512071869735, |
| "grad_norm": 0.44970595836639404, |
| "learning_rate": 6.603715070217779e-06, |
| "loss": 0.467, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.45732734418865806, |
| "grad_norm": 0.45776426792144775, |
| "learning_rate": 6.589777390962575e-06, |
| "loss": 0.4645, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.45816956765861877, |
| "grad_norm": 0.4789178669452667, |
| "learning_rate": 6.5758259556732896e-06, |
| "loss": 0.4857, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.4590117911285794, |
| "grad_norm": 0.4421025514602661, |
| "learning_rate": 6.561860885068972e-06, |
| "loss": 0.4294, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.45985401459854014, |
| "grad_norm": 0.46660929918289185, |
| "learning_rate": 6.547882299986658e-06, |
| "loss": 0.4715, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.46069623806850085, |
| "grad_norm": 0.4705526530742645, |
| "learning_rate": 6.53389032138032e-06, |
| "loss": 0.4902, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.46153846153846156, |
| "grad_norm": 0.44154104590415955, |
| "learning_rate": 6.519885070319827e-06, |
| "loss": 0.4955, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.4623806850084222, |
| "grad_norm": 0.4737115502357483, |
| "learning_rate": 6.505866667989884e-06, |
| "loss": 0.4666, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.46322290847838293, |
| "grad_norm": 0.4516438841819763, |
| "learning_rate": 6.491835235688999e-06, |
| "loss": 0.4667, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.46406513194834365, |
| "grad_norm": 0.39548397064208984, |
| "learning_rate": 6.477790894828422e-06, |
| "loss": 0.4495, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.4649073554183043, |
| "grad_norm": 0.4581812620162964, |
| "learning_rate": 6.463733766931096e-06, |
| "loss": 0.4766, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.465749578888265, |
| "grad_norm": 0.5608298182487488, |
| "learning_rate": 6.449663973630613e-06, |
| "loss": 0.4651, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.4665918023582257, |
| "grad_norm": 0.433051735162735, |
| "learning_rate": 6.435581636670154e-06, |
| "loss": 0.4615, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.46743402582818644, |
| "grad_norm": 0.45779332518577576, |
| "learning_rate": 6.421486877901436e-06, |
| "loss": 0.4748, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.4682762492981471, |
| "grad_norm": 0.5338222980499268, |
| "learning_rate": 6.407379819283661e-06, |
| "loss": 0.4677, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.4691184727681078, |
| "grad_norm": 0.4301375150680542, |
| "learning_rate": 6.393260582882462e-06, |
| "loss": 0.4461, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.4699606962380685, |
| "grad_norm": 0.3875766694545746, |
| "learning_rate": 6.379129290868837e-06, |
| "loss": 0.4608, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.4708029197080292, |
| "grad_norm": 0.49393564462661743, |
| "learning_rate": 6.364986065518106e-06, |
| "loss": 0.454, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.4716451431779899, |
| "grad_norm": 0.433536559343338, |
| "learning_rate": 6.350831029208844e-06, |
| "loss": 0.4648, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.4724873666479506, |
| "grad_norm": 0.3517136871814728, |
| "learning_rate": 6.336664304421818e-06, |
| "loss": 0.4587, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.47332959011791126, |
| "grad_norm": 0.4940553307533264, |
| "learning_rate": 6.322486013738942e-06, |
| "loss": 0.4799, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.47417181358787197, |
| "grad_norm": 0.4068980813026428, |
| "learning_rate": 6.308296279842204e-06, |
| "loss": 0.4621, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.4750140370578327, |
| "grad_norm": 0.41587033867836, |
| "learning_rate": 6.294095225512604e-06, |
| "loss": 0.4695, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.4758562605277934, |
| "grad_norm": 0.4289080798625946, |
| "learning_rate": 6.279882973629101e-06, |
| "loss": 0.4606, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.47669848399775405, |
| "grad_norm": 0.3981984555721283, |
| "learning_rate": 6.265659647167542e-06, |
| "loss": 0.4778, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.47754070746771476, |
| "grad_norm": 0.41328713297843933, |
| "learning_rate": 6.2514253691996e-06, |
| "loss": 0.4698, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.4783829309376755, |
| "grad_norm": 0.509678304195404, |
| "learning_rate": 6.237180262891709e-06, |
| "loss": 0.4817, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.47922515440763613, |
| "grad_norm": 0.4245469272136688, |
| "learning_rate": 6.222924451504001e-06, |
| "loss": 0.4453, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.48006737787759685, |
| "grad_norm": 0.44388455152511597, |
| "learning_rate": 6.208658058389232e-06, |
| "loss": 0.4514, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.48090960134755756, |
| "grad_norm": 0.4333799481391907, |
| "learning_rate": 6.194381206991723e-06, |
| "loss": 0.4746, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.48175182481751827, |
| "grad_norm": 0.46454960107803345, |
| "learning_rate": 6.180094020846291e-06, |
| "loss": 0.4685, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.4825940482874789, |
| "grad_norm": 0.41998291015625, |
| "learning_rate": 6.165796623577171e-06, |
| "loss": 0.4747, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.48343627175743964, |
| "grad_norm": 0.3703177869319916, |
| "learning_rate": 6.15148913889696e-06, |
| "loss": 0.4871, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.48427849522740035, |
| "grad_norm": 0.5082182884216309, |
| "learning_rate": 6.1371716906055336e-06, |
| "loss": 0.4732, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.485120718697361, |
| "grad_norm": 0.4215005040168762, |
| "learning_rate": 6.122844402588982e-06, |
| "loss": 0.4494, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.4859629421673217, |
| "grad_norm": 0.42425084114074707, |
| "learning_rate": 6.10850739881854e-06, |
| "loss": 0.4969, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.48680516563728243, |
| "grad_norm": 0.4311264455318451, |
| "learning_rate": 6.094160803349508e-06, |
| "loss": 0.469, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.48764738910724315, |
| "grad_norm": 0.41804108023643494, |
| "learning_rate": 6.079804740320181e-06, |
| "loss": 0.4426, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.4884896125772038, |
| "grad_norm": 0.41517356038093567, |
| "learning_rate": 6.065439333950776e-06, |
| "loss": 0.4928, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.4893318360471645, |
| "grad_norm": 0.42737406492233276, |
| "learning_rate": 6.051064708542357e-06, |
| "loss": 0.4801, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.4901740595171252, |
| "grad_norm": 0.43230849504470825, |
| "learning_rate": 6.036680988475756e-06, |
| "loss": 0.4823, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.4910162829870859, |
| "grad_norm": 0.4388849437236786, |
| "learning_rate": 6.022288298210502e-06, |
| "loss": 0.4669, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.4918585064570466, |
| "grad_norm": 0.37067726254463196, |
| "learning_rate": 6.00788676228374e-06, |
| "loss": 0.4689, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.4927007299270073, |
| "grad_norm": 0.47136881947517395, |
| "learning_rate": 5.993476505309154e-06, |
| "loss": 0.4483, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.493542953396968, |
| "grad_norm": 0.42969149351119995, |
| "learning_rate": 5.979057651975893e-06, |
| "loss": 0.4769, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.4943851768669287, |
| "grad_norm": 0.40368854999542236, |
| "learning_rate": 5.964630327047485e-06, |
| "loss": 0.4696, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.4952274003368894, |
| "grad_norm": 0.41034096479415894, |
| "learning_rate": 5.9501946553607615e-06, |
| "loss": 0.4644, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.4960696238068501, |
| "grad_norm": 0.39626145362854004, |
| "learning_rate": 5.935750761824777e-06, |
| "loss": 0.4946, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.49691184727681076, |
| "grad_norm": 0.3816596567630768, |
| "learning_rate": 5.921298771419731e-06, |
| "loss": 0.4747, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.49775407074677147, |
| "grad_norm": 0.37927156686782837, |
| "learning_rate": 5.906838809195879e-06, |
| "loss": 0.4609, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.4985962942167322, |
| "grad_norm": 0.4023749828338623, |
| "learning_rate": 5.8923710002724595e-06, |
| "loss": 0.4696, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.4994385176866929, |
| "grad_norm": 0.39053875207901, |
| "learning_rate": 5.877895469836604e-06, |
| "loss": 0.4747, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.5002807411566536, |
| "grad_norm": 0.4169563353061676, |
| "learning_rate": 5.863412343142258e-06, |
| "loss": 0.4278, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.5011229646266142, |
| "grad_norm": 0.37836650013923645, |
| "learning_rate": 5.848921745509094e-06, |
| "loss": 0.4845, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.501965188096575, |
| "grad_norm": 0.3742503523826599, |
| "learning_rate": 5.8344238023214305e-06, |
| "loss": 0.4789, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.5028074115665356, |
| "grad_norm": 0.4057691693305969, |
| "learning_rate": 5.819918639027149e-06, |
| "loss": 0.46, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.5036496350364964, |
| "grad_norm": 0.41869857907295227, |
| "learning_rate": 5.805406381136598e-06, |
| "loss": 0.4445, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.5044918585064571, |
| "grad_norm": 0.4319513142108917, |
| "learning_rate": 5.790887154221521e-06, |
| "loss": 0.4922, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.5053340819764177, |
| "grad_norm": 0.4296809434890747, |
| "learning_rate": 5.776361083913959e-06, |
| "loss": 0.472, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.5061763054463785, |
| "grad_norm": 0.4201277494430542, |
| "learning_rate": 5.7618282959051685e-06, |
| "loss": 0.4929, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.5070185289163391, |
| "grad_norm": 0.4361591637134552, |
| "learning_rate": 5.747288915944533e-06, |
| "loss": 0.4883, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.5078607523862998, |
| "grad_norm": 0.44996178150177, |
| "learning_rate": 5.7327430698384775e-06, |
| "loss": 0.464, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.5087029758562606, |
| "grad_norm": 0.42448410391807556, |
| "learning_rate": 5.718190883449373e-06, |
| "loss": 0.4874, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.5095451993262212, |
| "grad_norm": 0.39895886182785034, |
| "learning_rate": 5.703632482694453e-06, |
| "loss": 0.4809, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.5103874227961819, |
| "grad_norm": 0.38959255814552307, |
| "learning_rate": 5.689067993544726e-06, |
| "loss": 0.4764, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.5112296462661426, |
| "grad_norm": 0.401683509349823, |
| "learning_rate": 5.674497542023875e-06, |
| "loss": 0.462, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.5120718697361033, |
| "grad_norm": 0.3931426405906677, |
| "learning_rate": 5.659921254207183e-06, |
| "loss": 0.4601, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.512914093206064, |
| "grad_norm": 0.40031349658966064, |
| "learning_rate": 5.645339256220427e-06, |
| "loss": 0.4754, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.5137563166760247, |
| "grad_norm": 0.3735891282558441, |
| "learning_rate": 5.630751674238796e-06, |
| "loss": 0.4692, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.5145985401459854, |
| "grad_norm": 0.38055193424224854, |
| "learning_rate": 5.616158634485793e-06, |
| "loss": 0.4608, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.5154407636159462, |
| "grad_norm": 0.3828696310520172, |
| "learning_rate": 5.601560263232153e-06, |
| "loss": 0.4503, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.5162829870859068, |
| "grad_norm": 0.4191468358039856, |
| "learning_rate": 5.5869566867947344e-06, |
| "loss": 0.4595, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.5171252105558675, |
| "grad_norm": 0.4003213346004486, |
| "learning_rate": 5.572348031535442e-06, |
| "loss": 0.4844, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.5179674340258282, |
| "grad_norm": 0.4294838309288025, |
| "learning_rate": 5.557734423860122e-06, |
| "loss": 0.4766, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.5188096574957889, |
| "grad_norm": 0.40257686376571655, |
| "learning_rate": 5.543115990217478e-06, |
| "loss": 0.4449, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.5196518809657495, |
| "grad_norm": 0.4232497811317444, |
| "learning_rate": 5.528492857097966e-06, |
| "loss": 0.4923, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.5204941044357103, |
| "grad_norm": 0.40218302607536316, |
| "learning_rate": 5.513865151032709e-06, |
| "loss": 0.4734, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.521336327905671, |
| "grad_norm": 0.44613218307495117, |
| "learning_rate": 5.499232998592399e-06, |
| "loss": 0.4776, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.5221785513756316, |
| "grad_norm": 0.387199342250824, |
| "learning_rate": 5.484596526386198e-06, |
| "loss": 0.4559, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.5230207748455924, |
| "grad_norm": 0.40792933106422424, |
| "learning_rate": 5.469955861060653e-06, |
| "loss": 0.4555, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.523862998315553, |
| "grad_norm": 0.43032124638557434, |
| "learning_rate": 5.455311129298586e-06, |
| "loss": 0.4751, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.5247052217855137, |
| "grad_norm": 0.4241279661655426, |
| "learning_rate": 5.44066245781801e-06, |
| "loss": 0.4617, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.5255474452554745, |
| "grad_norm": 0.496736615896225, |
| "learning_rate": 5.426009973371026e-06, |
| "loss": 0.4822, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.5263896687254351, |
| "grad_norm": 0.3980671465396881, |
| "learning_rate": 5.4113538027427245e-06, |
| "loss": 0.4444, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.5272318921953958, |
| "grad_norm": 0.4685482978820801, |
| "learning_rate": 5.396694072750099e-06, |
| "loss": 0.4616, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.5280741156653566, |
| "grad_norm": 0.5459217429161072, |
| "learning_rate": 5.382030910240936e-06, |
| "loss": 0.4992, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.5289163391353172, |
| "grad_norm": 0.4384770095348358, |
| "learning_rate": 5.367364442092724e-06, |
| "loss": 0.4957, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.529758562605278, |
| "grad_norm": 0.518413782119751, |
| "learning_rate": 5.352694795211555e-06, |
| "loss": 0.4672, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.5306007860752386, |
| "grad_norm": 0.4350719749927521, |
| "learning_rate": 5.338022096531028e-06, |
| "loss": 0.4752, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.5314430095451993, |
| "grad_norm": 0.39527785778045654, |
| "learning_rate": 5.3233464730111426e-06, |
| "loss": 0.4673, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.5322852330151601, |
| "grad_norm": 0.4205104410648346, |
| "learning_rate": 5.308668051637213e-06, |
| "loss": 0.472, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.5331274564851207, |
| "grad_norm": 0.4029710590839386, |
| "learning_rate": 5.29398695941876e-06, |
| "loss": 0.446, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.5339696799550814, |
| "grad_norm": 0.3991697430610657, |
| "learning_rate": 5.279303323388413e-06, |
| "loss": 0.4524, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.5348119034250421, |
| "grad_norm": 0.44701534509658813, |
| "learning_rate": 5.2646172706008154e-06, |
| "loss": 0.4895, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.5356541268950028, |
| "grad_norm": 0.38555386662483215, |
| "learning_rate": 5.249928928131523e-06, |
| "loss": 0.4863, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.5364963503649635, |
| "grad_norm": 0.41185519099235535, |
| "learning_rate": 5.235238423075899e-06, |
| "loss": 0.4537, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.5373385738349242, |
| "grad_norm": 0.42284438014030457, |
| "learning_rate": 5.220545882548024e-06, |
| "loss": 0.4314, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.5381807973048849, |
| "grad_norm": 0.4240425229072571, |
| "learning_rate": 5.20585143367959e-06, |
| "loss": 0.475, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.5390230207748455, |
| "grad_norm": 0.3929160535335541, |
| "learning_rate": 5.191155203618796e-06, |
| "loss": 0.4807, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.5398652442448063, |
| "grad_norm": 0.4253106713294983, |
| "learning_rate": 5.176457319529264e-06, |
| "loss": 0.4671, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.540707467714767, |
| "grad_norm": 0.41673335433006287, |
| "learning_rate": 5.161757908588917e-06, |
| "loss": 0.4632, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.5415496911847277, |
| "grad_norm": 0.39571234583854675, |
| "learning_rate": 5.147057097988898e-06, |
| "loss": 0.4997, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.5423919146546884, |
| "grad_norm": 0.4015507996082306, |
| "learning_rate": 5.132355014932455e-06, |
| "loss": 0.4616, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.543234138124649, |
| "grad_norm": 0.38526424765586853, |
| "learning_rate": 5.1176517866338495e-06, |
| "loss": 0.4742, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.5440763615946098, |
| "grad_norm": 0.41674524545669556, |
| "learning_rate": 5.102947540317254e-06, |
| "loss": 0.469, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.5449185850645705, |
| "grad_norm": 0.4026922881603241, |
| "learning_rate": 5.088242403215644e-06, |
| "loss": 0.4537, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.5457608085345311, |
| "grad_norm": 0.4339665472507477, |
| "learning_rate": 5.073536502569708e-06, |
| "loss": 0.4755, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.5466030320044919, |
| "grad_norm": 0.46872204542160034, |
| "learning_rate": 5.058829965626742e-06, |
| "loss": 0.4777, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.5474452554744526, |
| "grad_norm": 0.42406511306762695, |
| "learning_rate": 5.0441229196395416e-06, |
| "loss": 0.4523, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.5482874789444132, |
| "grad_norm": 0.4089900851249695, |
| "learning_rate": 5.029415491865311e-06, |
| "loss": 0.4713, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.549129702414374, |
| "grad_norm": 0.4266711175441742, |
| "learning_rate": 5.014707809564562e-06, |
| "loss": 0.457, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.5499719258843346, |
| "grad_norm": 0.39317575097084045, |
| "learning_rate": 5e-06, |
| "loss": 0.4486, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.5508141493542953, |
| "grad_norm": 0.35429295897483826, |
| "learning_rate": 4.98529219043544e-06, |
| "loss": 0.43, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.5516563728242561, |
| "grad_norm": 0.4003024101257324, |
| "learning_rate": 4.97058450813469e-06, |
| "loss": 0.4655, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.5524985962942167, |
| "grad_norm": 0.37687185406684875, |
| "learning_rate": 4.955877080360462e-06, |
| "loss": 0.4599, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.5533408197641775, |
| "grad_norm": 0.43657246232032776, |
| "learning_rate": 4.94117003437326e-06, |
| "loss": 0.4669, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.5541830432341381, |
| "grad_norm": 0.3669135570526123, |
| "learning_rate": 4.926463497430293e-06, |
| "loss": 0.4728, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.5550252667040988, |
| "grad_norm": 0.44429507851600647, |
| "learning_rate": 4.911757596784358e-06, |
| "loss": 0.4655, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.5558674901740596, |
| "grad_norm": 0.42411744594573975, |
| "learning_rate": 4.897052459682749e-06, |
| "loss": 0.4752, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.5567097136440202, |
| "grad_norm": 0.4312066435813904, |
| "learning_rate": 4.882348213366152e-06, |
| "loss": 0.498, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.5575519371139809, |
| "grad_norm": 0.3983328342437744, |
| "learning_rate": 4.867644985067548e-06, |
| "loss": 0.4738, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.5583941605839416, |
| "grad_norm": 0.39142411947250366, |
| "learning_rate": 4.8529429020111035e-06, |
| "loss": 0.4389, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.5592363840539023, |
| "grad_norm": 0.39371412992477417, |
| "learning_rate": 4.838242091411085e-06, |
| "loss": 0.4884, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.560078607523863, |
| "grad_norm": 0.45728200674057007, |
| "learning_rate": 4.823542680470738e-06, |
| "loss": 0.5223, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.5609208309938237, |
| "grad_norm": 0.4624292254447937, |
| "learning_rate": 4.808844796381205e-06, |
| "loss": 0.4471, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.5617630544637844, |
| "grad_norm": 0.3894542157649994, |
| "learning_rate": 4.794148566320412e-06, |
| "loss": 0.467, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.562605277933745, |
| "grad_norm": 0.39054685831069946, |
| "learning_rate": 4.779454117451978e-06, |
| "loss": 0.4515, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.5634475014037058, |
| "grad_norm": 0.4176219701766968, |
| "learning_rate": 4.7647615769241e-06, |
| "loss": 0.4652, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.5642897248736665, |
| "grad_norm": 0.4907318949699402, |
| "learning_rate": 4.750071071868478e-06, |
| "loss": 0.4899, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.5651319483436271, |
| "grad_norm": 0.44418320059776306, |
| "learning_rate": 4.7353827293991845e-06, |
| "loss": 0.4385, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.5659741718135879, |
| "grad_norm": 0.40623655915260315, |
| "learning_rate": 4.720696676611589e-06, |
| "loss": 0.4393, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.5668163952835485, |
| "grad_norm": 0.4422489404678345, |
| "learning_rate": 4.706013040581242e-06, |
| "loss": 0.4545, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.5676586187535093, |
| "grad_norm": 0.46714815497398376, |
| "learning_rate": 4.691331948362789e-06, |
| "loss": 0.4459, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.56850084222347, |
| "grad_norm": 0.4542917013168335, |
| "learning_rate": 4.676653526988858e-06, |
| "loss": 0.4744, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.5693430656934306, |
| "grad_norm": 0.4214479625225067, |
| "learning_rate": 4.661977903468974e-06, |
| "loss": 0.4547, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.5701852891633914, |
| "grad_norm": 0.3610055148601532, |
| "learning_rate": 4.647305204788445e-06, |
| "loss": 0.4427, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.571027512633352, |
| "grad_norm": 0.4122559130191803, |
| "learning_rate": 4.632635557907277e-06, |
| "loss": 0.4881, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.5718697361033127, |
| "grad_norm": 0.3858247399330139, |
| "learning_rate": 4.617969089759066e-06, |
| "loss": 0.4591, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.5727119595732735, |
| "grad_norm": 0.3892834186553955, |
| "learning_rate": 4.603305927249902e-06, |
| "loss": 0.4604, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.5735541830432341, |
| "grad_norm": 0.415051132440567, |
| "learning_rate": 4.588646197257278e-06, |
| "loss": 0.4817, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.5743964065131948, |
| "grad_norm": 0.3991219997406006, |
| "learning_rate": 4.573990026628976e-06, |
| "loss": 0.4761, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.5752386299831556, |
| "grad_norm": 0.4070507287979126, |
| "learning_rate": 4.559337542181993e-06, |
| "loss": 0.4514, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.5760808534531162, |
| "grad_norm": 0.40679407119750977, |
| "learning_rate": 4.544688870701416e-06, |
| "loss": 0.4671, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.5769230769230769, |
| "grad_norm": 0.38636258244514465, |
| "learning_rate": 4.53004413893935e-06, |
| "loss": 0.4757, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.5777653003930376, |
| "grad_norm": 0.40904495120048523, |
| "learning_rate": 4.5154034736138035e-06, |
| "loss": 0.4678, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.5786075238629983, |
| "grad_norm": 0.3879135549068451, |
| "learning_rate": 4.500767001407604e-06, |
| "loss": 0.467, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.5794497473329591, |
| "grad_norm": 0.3964616060256958, |
| "learning_rate": 4.486134848967292e-06, |
| "loss": 0.4487, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.5802919708029197, |
| "grad_norm": 0.39060208201408386, |
| "learning_rate": 4.471507142902036e-06, |
| "loss": 0.4671, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.5811341942728804, |
| "grad_norm": 0.4536742568016052, |
| "learning_rate": 4.4568840097825225e-06, |
| "loss": 0.474, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.5819764177428411, |
| "grad_norm": 0.3637699484825134, |
| "learning_rate": 4.4422655761398785e-06, |
| "loss": 0.4465, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.5828186412128018, |
| "grad_norm": 0.36765387654304504, |
| "learning_rate": 4.427651968464559e-06, |
| "loss": 0.4307, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.5836608646827625, |
| "grad_norm": 0.42411142587661743, |
| "learning_rate": 4.413043313205266e-06, |
| "loss": 0.4689, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.5845030881527232, |
| "grad_norm": 0.36600083112716675, |
| "learning_rate": 4.3984397367678475e-06, |
| "loss": 0.4659, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.5853453116226839, |
| "grad_norm": 0.42704781889915466, |
| "learning_rate": 4.383841365514208e-06, |
| "loss": 0.4869, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.5861875350926445, |
| "grad_norm": 0.3502572774887085, |
| "learning_rate": 4.369248325761205e-06, |
| "loss": 0.4559, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.5870297585626053, |
| "grad_norm": 0.412727415561676, |
| "learning_rate": 4.354660743779575e-06, |
| "loss": 0.4551, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.587871982032566, |
| "grad_norm": 0.43753746151924133, |
| "learning_rate": 4.340078745792818e-06, |
| "loss": 0.4875, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.5887142055025266, |
| "grad_norm": 0.3773738443851471, |
| "learning_rate": 4.325502457976126e-06, |
| "loss": 0.4571, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.5895564289724874, |
| "grad_norm": 0.3818628191947937, |
| "learning_rate": 4.310932006455276e-06, |
| "loss": 0.4368, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.590398652442448, |
| "grad_norm": 0.4297826588153839, |
| "learning_rate": 4.296367517305548e-06, |
| "loss": 0.4613, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.5912408759124088, |
| "grad_norm": 0.4199880361557007, |
| "learning_rate": 4.281809116550629e-06, |
| "loss": 0.4512, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.5920830993823695, |
| "grad_norm": 0.3798048496246338, |
| "learning_rate": 4.267256930161523e-06, |
| "loss": 0.4399, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.5929253228523301, |
| "grad_norm": 0.38816648721694946, |
| "learning_rate": 4.252711084055468e-06, |
| "loss": 0.4415, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.5937675463222909, |
| "grad_norm": 0.38051608204841614, |
| "learning_rate": 4.238171704094833e-06, |
| "loss": 0.4622, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.5946097697922516, |
| "grad_norm": 0.39357250928878784, |
| "learning_rate": 4.223638916086044e-06, |
| "loss": 0.4664, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.5954519932622122, |
| "grad_norm": 0.4027978777885437, |
| "learning_rate": 4.209112845778481e-06, |
| "loss": 0.4643, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.596294216732173, |
| "grad_norm": 0.38121509552001953, |
| "learning_rate": 4.194593618863404e-06, |
| "loss": 0.4472, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.5971364402021336, |
| "grad_norm": 0.3984185755252838, |
| "learning_rate": 4.180081360972852e-06, |
| "loss": 0.4479, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.5979786636720943, |
| "grad_norm": 0.4842238128185272, |
| "learning_rate": 4.165576197678571e-06, |
| "loss": 0.4685, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.5988208871420551, |
| "grad_norm": 0.38819262385368347, |
| "learning_rate": 4.151078254490908e-06, |
| "loss": 0.4776, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.5996631106120157, |
| "grad_norm": 0.4201744496822357, |
| "learning_rate": 4.136587656857744e-06, |
| "loss": 0.471, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.6005053340819764, |
| "grad_norm": 0.4200340211391449, |
| "learning_rate": 4.122104530163397e-06, |
| "loss": 0.4309, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.6013475575519371, |
| "grad_norm": 0.37184223532676697, |
| "learning_rate": 4.107628999727542e-06, |
| "loss": 0.462, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.6021897810218978, |
| "grad_norm": 0.43214350938796997, |
| "learning_rate": 4.09316119080412e-06, |
| "loss": 0.4824, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.6030320044918585, |
| "grad_norm": 0.3922446370124817, |
| "learning_rate": 4.0787012285802695e-06, |
| "loss": 0.4716, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.6038742279618192, |
| "grad_norm": 0.4424769878387451, |
| "learning_rate": 4.064249238175223e-06, |
| "loss": 0.4681, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.6047164514317799, |
| "grad_norm": 0.41791683435440063, |
| "learning_rate": 4.04980534463924e-06, |
| "loss": 0.4773, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.6055586749017406, |
| "grad_norm": 0.3661481738090515, |
| "learning_rate": 4.035369672952516e-06, |
| "loss": 0.4553, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.6064008983717013, |
| "grad_norm": 0.3772629201412201, |
| "learning_rate": 4.020942348024108e-06, |
| "loss": 0.432, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.607243121841662, |
| "grad_norm": 0.4189452826976776, |
| "learning_rate": 4.0065234946908456e-06, |
| "loss": 0.472, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.6080853453116227, |
| "grad_norm": 0.38295382261276245, |
| "learning_rate": 3.992113237716261e-06, |
| "loss": 0.487, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.6089275687815834, |
| "grad_norm": 0.39993205666542053, |
| "learning_rate": 3.977711701789499e-06, |
| "loss": 0.4594, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.609769792251544, |
| "grad_norm": 0.40375831723213196, |
| "learning_rate": 3.963319011524246e-06, |
| "loss": 0.4476, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.6106120157215048, |
| "grad_norm": 0.39132022857666016, |
| "learning_rate": 3.948935291457645e-06, |
| "loss": 0.4686, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.6114542391914655, |
| "grad_norm": 0.3846696615219116, |
| "learning_rate": 3.934560666049226e-06, |
| "loss": 0.4318, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.6122964626614261, |
| "grad_norm": 0.3892068564891815, |
| "learning_rate": 3.920195259679822e-06, |
| "loss": 0.455, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.6131386861313869, |
| "grad_norm": 0.4346502125263214, |
| "learning_rate": 3.905839196650494e-06, |
| "loss": 0.474, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.6139809096013475, |
| "grad_norm": 0.38509148359298706, |
| "learning_rate": 3.891492601181462e-06, |
| "loss": 0.4617, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.6148231330713082, |
| "grad_norm": 0.35971835255622864, |
| "learning_rate": 3.877155597411019e-06, |
| "loss": 0.4574, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.615665356541269, |
| "grad_norm": 0.4372704029083252, |
| "learning_rate": 3.862828309394469e-06, |
| "loss": 0.4864, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.6165075800112296, |
| "grad_norm": 0.41383740305900574, |
| "learning_rate": 3.8485108611030415e-06, |
| "loss": 0.4679, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.6173498034811904, |
| "grad_norm": 0.3286128044128418, |
| "learning_rate": 3.834203376422831e-06, |
| "loss": 0.4487, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.618192026951151, |
| "grad_norm": 0.4194988012313843, |
| "learning_rate": 3.8199059791537105e-06, |
| "loss": 0.4588, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.6190342504211117, |
| "grad_norm": 0.4303875267505646, |
| "learning_rate": 3.805618793008279e-06, |
| "loss": 0.4594, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.6198764738910725, |
| "grad_norm": 0.38063475489616394, |
| "learning_rate": 3.7913419416107692e-06, |
| "loss": 0.4497, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.6207186973610331, |
| "grad_norm": 0.3899458348751068, |
| "learning_rate": 3.777075548496001e-06, |
| "loss": 0.4649, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.6215609208309938, |
| "grad_norm": 0.39380112290382385, |
| "learning_rate": 3.7628197371082916e-06, |
| "loss": 0.4539, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.6224031443009546, |
| "grad_norm": 0.4633565843105316, |
| "learning_rate": 3.7485746308004013e-06, |
| "loss": 0.4817, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.6232453677709152, |
| "grad_norm": 0.37416234612464905, |
| "learning_rate": 3.7343403528324574e-06, |
| "loss": 0.4651, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.6240875912408759, |
| "grad_norm": 0.4205109775066376, |
| "learning_rate": 3.7201170263709004e-06, |
| "loss": 0.4576, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.6249298147108366, |
| "grad_norm": 0.3740626871585846, |
| "learning_rate": 3.705904774487396e-06, |
| "loss": 0.4523, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.6257720381807973, |
| "grad_norm": 0.39562278985977173, |
| "learning_rate": 3.6917037201577977e-06, |
| "loss": 0.4616, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.626614261650758, |
| "grad_norm": 0.4320240318775177, |
| "learning_rate": 3.6775139862610577e-06, |
| "loss": 0.478, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.6274564851207187, |
| "grad_norm": 0.356246680021286, |
| "learning_rate": 3.6633356955781827e-06, |
| "loss": 0.4365, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.6282987085906794, |
| "grad_norm": 0.4068841338157654, |
| "learning_rate": 3.649168970791157e-06, |
| "loss": 0.4614, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.62914093206064, |
| "grad_norm": 0.4344610571861267, |
| "learning_rate": 3.635013934481895e-06, |
| "loss": 0.437, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.6299831555306008, |
| "grad_norm": 0.3860117793083191, |
| "learning_rate": 3.620870709131163e-06, |
| "loss": 0.4724, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.6308253790005615, |
| "grad_norm": 0.3780432343482971, |
| "learning_rate": 3.6067394171175397e-06, |
| "loss": 0.4866, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.6316676024705222, |
| "grad_norm": 0.3598399758338928, |
| "learning_rate": 3.5926201807163384e-06, |
| "loss": 0.4655, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.6325098259404829, |
| "grad_norm": 0.3982943594455719, |
| "learning_rate": 3.578513122098566e-06, |
| "loss": 0.4721, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.6333520494104435, |
| "grad_norm": 0.4459211826324463, |
| "learning_rate": 3.564418363329848e-06, |
| "loss": 0.446, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.6341942728804043, |
| "grad_norm": 0.45306849479675293, |
| "learning_rate": 3.5503360263693887e-06, |
| "loss": 0.4618, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.635036496350365, |
| "grad_norm": 0.351254940032959, |
| "learning_rate": 3.5362662330689067e-06, |
| "loss": 0.4425, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.6358787198203256, |
| "grad_norm": 0.3903196156024933, |
| "learning_rate": 3.5222091051715803e-06, |
| "loss": 0.4659, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.6367209432902864, |
| "grad_norm": 0.3876386284828186, |
| "learning_rate": 3.5081647643110028e-06, |
| "loss": 0.4447, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.637563166760247, |
| "grad_norm": 0.4539998173713684, |
| "learning_rate": 3.4941333320101173e-06, |
| "loss": 0.4648, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.6384053902302077, |
| "grad_norm": 0.43911775946617126, |
| "learning_rate": 3.480114929680176e-06, |
| "loss": 0.4684, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.6392476137001685, |
| "grad_norm": 0.36161547899246216, |
| "learning_rate": 3.466109678619681e-06, |
| "loss": 0.445, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.6400898371701291, |
| "grad_norm": 0.4111180007457733, |
| "learning_rate": 3.4521177000133456e-06, |
| "loss": 0.4635, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.6409320606400898, |
| "grad_norm": 0.42481526732444763, |
| "learning_rate": 3.4381391149310294e-06, |
| "loss": 0.4194, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.6417742841100506, |
| "grad_norm": 0.44296056032180786, |
| "learning_rate": 3.4241740443267112e-06, |
| "loss": 0.4611, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.6426165075800112, |
| "grad_norm": 0.3898600935935974, |
| "learning_rate": 3.4102226090374246e-06, |
| "loss": 0.4717, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.643458731049972, |
| "grad_norm": 0.41485339403152466, |
| "learning_rate": 3.3962849297822225e-06, |
| "loss": 0.4246, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.6443009545199326, |
| "grad_norm": 0.42214149236679077, |
| "learning_rate": 3.3823611271611266e-06, |
| "loss": 0.456, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.6451431779898933, |
| "grad_norm": 0.3852960765361786, |
| "learning_rate": 3.368451321654091e-06, |
| "loss": 0.4626, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.6459854014598541, |
| "grad_norm": 0.3872814178466797, |
| "learning_rate": 3.35455563361995e-06, |
| "loss": 0.4632, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.6468276249298147, |
| "grad_norm": 0.41995540261268616, |
| "learning_rate": 3.3406741832953893e-06, |
| "loss": 0.4672, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.6476698483997754, |
| "grad_norm": 0.43301689624786377, |
| "learning_rate": 3.3268070907938915e-06, |
| "loss": 0.4625, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.6485120718697361, |
| "grad_norm": 0.3875824809074402, |
| "learning_rate": 3.3129544761047093e-06, |
| "loss": 0.4773, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.6493542953396968, |
| "grad_norm": 0.40870511531829834, |
| "learning_rate": 3.2991164590918162e-06, |
| "loss": 0.4682, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.6501965188096575, |
| "grad_norm": 0.43525877594947815, |
| "learning_rate": 3.2852931594928804e-06, |
| "loss": 0.4594, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.6510387422796182, |
| "grad_norm": 0.40894654393196106, |
| "learning_rate": 3.271484696918218e-06, |
| "loss": 0.4695, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.6518809657495789, |
| "grad_norm": 0.42762210965156555, |
| "learning_rate": 3.2576911908497695e-06, |
| "loss": 0.4597, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.6527231892195395, |
| "grad_norm": 0.33561986684799194, |
| "learning_rate": 3.2439127606400546e-06, |
| "loss": 0.4486, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.6535654126895003, |
| "grad_norm": 0.4465593695640564, |
| "learning_rate": 3.2301495255111426e-06, |
| "loss": 0.4937, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.654407636159461, |
| "grad_norm": 0.3989386558532715, |
| "learning_rate": 3.2164016045536306e-06, |
| "loss": 0.4541, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.6552498596294217, |
| "grad_norm": 0.38122543692588806, |
| "learning_rate": 3.202669116725598e-06, |
| "loss": 0.4426, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.6560920830993824, |
| "grad_norm": 0.3579535484313965, |
| "learning_rate": 3.1889521808515888e-06, |
| "loss": 0.464, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.656934306569343, |
| "grad_norm": 0.38302677869796753, |
| "learning_rate": 3.1752509156215738e-06, |
| "loss": 0.4418, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.6577765300393038, |
| "grad_norm": 0.3925486207008362, |
| "learning_rate": 3.1615654395899377e-06, |
| "loss": 0.4621, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.6586187535092645, |
| "grad_norm": 0.3953411877155304, |
| "learning_rate": 3.1478958711744324e-06, |
| "loss": 0.4503, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.6594609769792251, |
| "grad_norm": 0.3958507478237152, |
| "learning_rate": 3.1342423286551756e-06, |
| "loss": 0.4718, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.6603032004491859, |
| "grad_norm": 0.39234763383865356, |
| "learning_rate": 3.120604930173608e-06, |
| "loss": 0.4806, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.6611454239191465, |
| "grad_norm": 0.3791189193725586, |
| "learning_rate": 3.1069837937314846e-06, |
| "loss": 0.4574, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.6619876473891072, |
| "grad_norm": 0.36702293157577515, |
| "learning_rate": 3.093379037189842e-06, |
| "loss": 0.4556, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.662829870859068, |
| "grad_norm": 0.3860447406768799, |
| "learning_rate": 3.0797907782679944e-06, |
| "loss": 0.439, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.6636720943290286, |
| "grad_norm": 0.37849584221839905, |
| "learning_rate": 3.0662191345424925e-06, |
| "loss": 0.4869, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.6645143177989893, |
| "grad_norm": 0.3850601017475128, |
| "learning_rate": 3.0526642234461313e-06, |
| "loss": 0.4673, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.66535654126895, |
| "grad_norm": 0.3666015565395355, |
| "learning_rate": 3.039126162266912e-06, |
| "loss": 0.4341, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.6661987647389107, |
| "grad_norm": 0.36709144711494446, |
| "learning_rate": 3.0256050681470446e-06, |
| "loss": 0.4873, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.6670409882088714, |
| "grad_norm": 0.3814394474029541, |
| "learning_rate": 3.012101058081919e-06, |
| "loss": 0.4806, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.6678832116788321, |
| "grad_norm": 0.3725821375846863, |
| "learning_rate": 2.9986142489191074e-06, |
| "loss": 0.4441, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.6687254351487928, |
| "grad_norm": 0.3743121325969696, |
| "learning_rate": 2.9851447573573383e-06, |
| "loss": 0.4677, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.6695676586187536, |
| "grad_norm": 0.3642611801624298, |
| "learning_rate": 2.971692699945502e-06, |
| "loss": 0.4593, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.6704098820887142, |
| "grad_norm": 0.36425134539604187, |
| "learning_rate": 2.958258193081629e-06, |
| "loss": 0.4546, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.6712521055586749, |
| "grad_norm": 0.36076676845550537, |
| "learning_rate": 2.9448413530118912e-06, |
| "loss": 0.4589, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.6720943290286356, |
| "grad_norm": 0.3566359877586365, |
| "learning_rate": 2.9314422958295906e-06, |
| "loss": 0.4733, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.6729365524985963, |
| "grad_norm": 0.3717946410179138, |
| "learning_rate": 2.9180611374741623e-06, |
| "loss": 0.4578, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.673778775968557, |
| "grad_norm": 0.34884119033813477, |
| "learning_rate": 2.904697993730159e-06, |
| "loss": 0.439, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.6746209994385177, |
| "grad_norm": 0.374682754278183, |
| "learning_rate": 2.891352980226262e-06, |
| "loss": 0.4692, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.6754632229084784, |
| "grad_norm": 0.35721421241760254, |
| "learning_rate": 2.8780262124342755e-06, |
| "loss": 0.4848, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.676305446378439, |
| "grad_norm": 0.41552138328552246, |
| "learning_rate": 2.8647178056681197e-06, |
| "loss": 0.4716, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.6771476698483998, |
| "grad_norm": 0.37886112928390503, |
| "learning_rate": 2.8514278750828537e-06, |
| "loss": 0.4754, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.6779898933183605, |
| "grad_norm": 0.3755894601345062, |
| "learning_rate": 2.838156535673652e-06, |
| "loss": 0.4457, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.6788321167883211, |
| "grad_norm": 0.35193660855293274, |
| "learning_rate": 2.8249039022748315e-06, |
| "loss": 0.4909, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.6796743402582819, |
| "grad_norm": 0.3655393421649933, |
| "learning_rate": 2.8116700895588473e-06, |
| "loss": 0.4767, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.6805165637282425, |
| "grad_norm": 0.38788074254989624, |
| "learning_rate": 2.798455212035305e-06, |
| "loss": 0.4875, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.6813587871982033, |
| "grad_norm": 0.3898300528526306, |
| "learning_rate": 2.785259384049959e-06, |
| "loss": 0.462, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.682201010668164, |
| "grad_norm": 0.34134534001350403, |
| "learning_rate": 2.7720827197837475e-06, |
| "loss": 0.4641, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.6830432341381246, |
| "grad_norm": 0.352770060300827, |
| "learning_rate": 2.7589253332517736e-06, |
| "loss": 0.4659, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.6838854576080854, |
| "grad_norm": 0.37993523478507996, |
| "learning_rate": 2.745787338302341e-06, |
| "loss": 0.4567, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.684727681078046, |
| "grad_norm": 0.35517552495002747, |
| "learning_rate": 2.7326688486159613e-06, |
| "loss": 0.4586, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.6855699045480067, |
| "grad_norm": 0.3561233878135681, |
| "learning_rate": 2.7195699777043723e-06, |
| "loss": 0.4721, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.6864121280179675, |
| "grad_norm": 0.4417415261268616, |
| "learning_rate": 2.706490838909547e-06, |
| "loss": 0.4788, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.6872543514879281, |
| "grad_norm": 0.3459290564060211, |
| "learning_rate": 2.6934315454027323e-06, |
| "loss": 0.4818, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.6880965749578888, |
| "grad_norm": 0.36064231395721436, |
| "learning_rate": 2.680392210183446e-06, |
| "loss": 0.4524, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.6889387984278496, |
| "grad_norm": 0.3664780259132385, |
| "learning_rate": 2.6673729460785174e-06, |
| "loss": 0.4666, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.6897810218978102, |
| "grad_norm": 0.40207329392433167, |
| "learning_rate": 2.6543738657411033e-06, |
| "loss": 0.4433, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.6906232453677709, |
| "grad_norm": 0.3451634347438812, |
| "learning_rate": 2.6413950816497146e-06, |
| "loss": 0.4731, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.6914654688377316, |
| "grad_norm": 0.3632340729236603, |
| "learning_rate": 2.628436706107238e-06, |
| "loss": 0.4773, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.6923076923076923, |
| "grad_norm": 0.41608792543411255, |
| "learning_rate": 2.6154988512399784e-06, |
| "loss": 0.4705, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.6931499157776531, |
| "grad_norm": 0.38861706852912903, |
| "learning_rate": 2.6025816289966703e-06, |
| "loss": 0.4725, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.6939921392476137, |
| "grad_norm": 0.36379146575927734, |
| "learning_rate": 2.5896851511475184e-06, |
| "loss": 0.4898, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.6948343627175744, |
| "grad_norm": 0.35487642884254456, |
| "learning_rate": 2.5768095292832412e-06, |
| "loss": 0.4552, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.6956765861875351, |
| "grad_norm": 0.3770192861557007, |
| "learning_rate": 2.5639548748140803e-06, |
| "loss": 0.4327, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.6965188096574958, |
| "grad_norm": 0.33418017625808716, |
| "learning_rate": 2.5511212989688587e-06, |
| "loss": 0.4585, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.6973610331274565, |
| "grad_norm": 0.3845199942588806, |
| "learning_rate": 2.5383089127940087e-06, |
| "loss": 0.4692, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.6982032565974172, |
| "grad_norm": 0.38967469334602356, |
| "learning_rate": 2.525517827152614e-06, |
| "loss": 0.4701, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.6990454800673779, |
| "grad_norm": 0.3837958872318268, |
| "learning_rate": 2.5127481527234397e-06, |
| "loss": 0.4746, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.6998877035373385, |
| "grad_norm": 0.3344170153141022, |
| "learning_rate": 2.5000000000000015e-06, |
| "loss": 0.4391, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.7007299270072993, |
| "grad_norm": 0.3698020279407501, |
| "learning_rate": 2.487273479289574e-06, |
| "loss": 0.4525, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.70157215047726, |
| "grad_norm": 0.3206958472728729, |
| "learning_rate": 2.4745687007122636e-06, |
| "loss": 0.4596, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.7024143739472206, |
| "grad_norm": 0.37502309679985046, |
| "learning_rate": 2.4618857742000463e-06, |
| "loss": 0.458, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.7032565974171814, |
| "grad_norm": 0.3526128828525543, |
| "learning_rate": 2.449224809495815e-06, |
| "loss": 0.4851, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.704098820887142, |
| "grad_norm": 0.35734719038009644, |
| "learning_rate": 2.436585916152426e-06, |
| "loss": 0.4281, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.7049410443571027, |
| "grad_norm": 0.41668057441711426, |
| "learning_rate": 2.423969203531768e-06, |
| "loss": 0.4487, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.7057832678270635, |
| "grad_norm": 0.3506368100643158, |
| "learning_rate": 2.411374780803793e-06, |
| "loss": 0.4606, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.7066254912970241, |
| "grad_norm": 0.3638664484024048, |
| "learning_rate": 2.3988027569455895e-06, |
| "loss": 0.4605, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.7074677147669849, |
| "grad_norm": 0.4103443920612335, |
| "learning_rate": 2.3862532407404306e-06, |
| "loss": 0.449, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.7083099382369455, |
| "grad_norm": 0.3737192451953888, |
| "learning_rate": 2.373726340776837e-06, |
| "loss": 0.4451, |
| "step": 841 |
| }, |
| { |
| "epoch": 0.7091521617069062, |
| "grad_norm": 0.39184120297431946, |
| "learning_rate": 2.361222165447628e-06, |
| "loss": 0.4641, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.709994385176867, |
| "grad_norm": 0.4118758738040924, |
| "learning_rate": 2.348740822949006e-06, |
| "loss": 0.4908, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.7108366086468276, |
| "grad_norm": 0.35428386926651, |
| "learning_rate": 2.33628242127959e-06, |
| "loss": 0.4454, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.7116788321167883, |
| "grad_norm": 0.4048777222633362, |
| "learning_rate": 2.323847068239504e-06, |
| "loss": 0.4765, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.712521055586749, |
| "grad_norm": 0.40927958488464355, |
| "learning_rate": 2.3114348714294355e-06, |
| "loss": 0.4608, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.7133632790567097, |
| "grad_norm": 0.39046961069107056, |
| "learning_rate": 2.2990459382497086e-06, |
| "loss": 0.4751, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.7142055025266704, |
| "grad_norm": 0.37439659237861633, |
| "learning_rate": 2.2866803758993446e-06, |
| "loss": 0.4899, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.7150477259966311, |
| "grad_norm": 0.3961264491081238, |
| "learning_rate": 2.274338291375147e-06, |
| "loss": 0.4674, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.7158899494665918, |
| "grad_norm": 0.37867552042007446, |
| "learning_rate": 2.262019791470772e-06, |
| "loss": 0.4495, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.7167321729365524, |
| "grad_norm": 0.3406490087509155, |
| "learning_rate": 2.2497249827757933e-06, |
| "loss": 0.4941, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.7175743964065132, |
| "grad_norm": 0.3773308992385864, |
| "learning_rate": 2.2374539716748034e-06, |
| "loss": 0.4789, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.7184166198764739, |
| "grad_norm": 0.3965294063091278, |
| "learning_rate": 2.225206864346465e-06, |
| "loss": 0.474, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.7192588433464346, |
| "grad_norm": 0.3620738983154297, |
| "learning_rate": 2.2129837667626147e-06, |
| "loss": 0.436, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.7201010668163953, |
| "grad_norm": 0.3788299560546875, |
| "learning_rate": 2.2007847846873342e-06, |
| "loss": 0.4686, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.720943290286356, |
| "grad_norm": 0.3688678741455078, |
| "learning_rate": 2.188610023676041e-06, |
| "loss": 0.4698, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.7217855137563167, |
| "grad_norm": 0.3527979552745819, |
| "learning_rate": 2.176459589074566e-06, |
| "loss": 0.4463, |
| "step": 857 |
| }, |
| { |
| "epoch": 0.7226277372262774, |
| "grad_norm": 0.35224097967147827, |
| "learning_rate": 2.164333586018259e-06, |
| "loss": 0.4563, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.723469960696238, |
| "grad_norm": 0.36833032965660095, |
| "learning_rate": 2.1522321194310577e-06, |
| "loss": 0.4714, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.7243121841661988, |
| "grad_norm": 0.3530896306037903, |
| "learning_rate": 2.1401552940245962e-06, |
| "loss": 0.4585, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.7251544076361595, |
| "grad_norm": 0.37035128474235535, |
| "learning_rate": 2.1281032142972933e-06, |
| "loss": 0.4443, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.7259966311061201, |
| "grad_norm": 0.3335658609867096, |
| "learning_rate": 2.1160759845334483e-06, |
| "loss": 0.4704, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.7268388545760809, |
| "grad_norm": 0.3341200351715088, |
| "learning_rate": 2.1040737088023323e-06, |
| "loss": 0.4591, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.7276810780460415, |
| "grad_norm": 0.33908799290657043, |
| "learning_rate": 2.0920964909573065e-06, |
| "loss": 0.4628, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.7285233015160022, |
| "grad_norm": 0.35746559500694275, |
| "learning_rate": 2.080144434634898e-06, |
| "loss": 0.4501, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.729365524985963, |
| "grad_norm": 0.3367277979850769, |
| "learning_rate": 2.068217643253925e-06, |
| "loss": 0.4557, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.7302077484559236, |
| "grad_norm": 0.35350626707077026, |
| "learning_rate": 2.056316220014588e-06, |
| "loss": 0.4379, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.7310499719258844, |
| "grad_norm": 0.36783963441848755, |
| "learning_rate": 2.0444402678975876e-06, |
| "loss": 0.4491, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.731892195395845, |
| "grad_norm": 0.3690626621246338, |
| "learning_rate": 2.0325898896632178e-06, |
| "loss": 0.4369, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.7327344188658057, |
| "grad_norm": 0.36176997423171997, |
| "learning_rate": 2.0207651878505e-06, |
| "loss": 0.4682, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.7335766423357665, |
| "grad_norm": 0.3541899621486664, |
| "learning_rate": 2.0089662647762716e-06, |
| "loss": 0.4621, |
| "step": 871 |
| }, |
| { |
| "epoch": 0.7344188658057271, |
| "grad_norm": 0.3505668342113495, |
| "learning_rate": 1.997193222534316e-06, |
| "loss": 0.4271, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.7352610892756878, |
| "grad_norm": 0.3986637592315674, |
| "learning_rate": 1.9854461629944764e-06, |
| "loss": 0.4557, |
| "step": 873 |
| }, |
| { |
| "epoch": 0.7361033127456486, |
| "grad_norm": 0.3776889443397522, |
| "learning_rate": 1.9737251878017678e-06, |
| "loss": 0.4389, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.7369455362156092, |
| "grad_norm": 0.36136969923973083, |
| "learning_rate": 1.962030398375506e-06, |
| "loss": 0.4618, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.7377877596855699, |
| "grad_norm": 0.35746991634368896, |
| "learning_rate": 1.950361895908427e-06, |
| "loss": 0.4429, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.7386299831555306, |
| "grad_norm": 0.3493303060531616, |
| "learning_rate": 1.9387197813658092e-06, |
| "loss": 0.4768, |
| "step": 877 |
| }, |
| { |
| "epoch": 0.7394722066254913, |
| "grad_norm": 0.36661627888679504, |
| "learning_rate": 1.927104155484602e-06, |
| "loss": 0.4613, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.740314430095452, |
| "grad_norm": 0.3595631718635559, |
| "learning_rate": 1.915515118772555e-06, |
| "loss": 0.4718, |
| "step": 879 |
| }, |
| { |
| "epoch": 0.7411566535654127, |
| "grad_norm": 0.3635704219341278, |
| "learning_rate": 1.9039527715073424e-06, |
| "loss": 0.4571, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.7419988770353734, |
| "grad_norm": 0.32493817806243896, |
| "learning_rate": 1.8924172137357038e-06, |
| "loss": 0.4787, |
| "step": 881 |
| }, |
| { |
| "epoch": 0.742841100505334, |
| "grad_norm": 0.3330385684967041, |
| "learning_rate": 1.8809085452725744e-06, |
| "loss": 0.4564, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.7436833239752948, |
| "grad_norm": 0.3349233567714691, |
| "learning_rate": 1.8694268657002197e-06, |
| "loss": 0.4492, |
| "step": 883 |
| }, |
| { |
| "epoch": 0.7445255474452555, |
| "grad_norm": 0.3418170213699341, |
| "learning_rate": 1.8579722743673773e-06, |
| "loss": 0.4697, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.7453677709152162, |
| "grad_norm": 0.4015568792819977, |
| "learning_rate": 1.8465448703883959e-06, |
| "loss": 0.4776, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.7462099943851769, |
| "grad_norm": 0.34949299693107605, |
| "learning_rate": 1.8351447526423728e-06, |
| "loss": 0.4805, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.7470522178551375, |
| "grad_norm": 0.33957937359809875, |
| "learning_rate": 1.8237720197723075e-06, |
| "loss": 0.4721, |
| "step": 887 |
| }, |
| { |
| "epoch": 0.7478944413250983, |
| "grad_norm": 0.38134124875068665, |
| "learning_rate": 1.812426770184243e-06, |
| "loss": 0.4715, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.748736664795059, |
| "grad_norm": 0.3836059272289276, |
| "learning_rate": 1.8011091020464138e-06, |
| "loss": 0.5041, |
| "step": 889 |
| }, |
| { |
| "epoch": 0.7495788882650196, |
| "grad_norm": 0.34067878127098083, |
| "learning_rate": 1.789819113288397e-06, |
| "loss": 0.445, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.7504211117349804, |
| "grad_norm": 0.3494390845298767, |
| "learning_rate": 1.7785569016002686e-06, |
| "loss": 0.4438, |
| "step": 891 |
| }, |
| { |
| "epoch": 0.751263335204941, |
| "grad_norm": 0.3596593141555786, |
| "learning_rate": 1.7673225644317487e-06, |
| "loss": 0.4784, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.7521055586749017, |
| "grad_norm": 0.35766345262527466, |
| "learning_rate": 1.75611619899137e-06, |
| "loss": 0.4532, |
| "step": 893 |
| }, |
| { |
| "epoch": 0.7529477821448625, |
| "grad_norm": 0.35504239797592163, |
| "learning_rate": 1.7449379022456297e-06, |
| "loss": 0.4469, |
| "step": 894 |
| }, |
| { |
| "epoch": 0.7537900056148231, |
| "grad_norm": 0.3405526280403137, |
| "learning_rate": 1.7337877709181527e-06, |
| "loss": 0.4492, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.7546322290847838, |
| "grad_norm": 0.3638141453266144, |
| "learning_rate": 1.7226659014888548e-06, |
| "loss": 0.4509, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.7554744525547445, |
| "grad_norm": 0.36837533116340637, |
| "learning_rate": 1.711572390193102e-06, |
| "loss": 0.4557, |
| "step": 897 |
| }, |
| { |
| "epoch": 0.7563166760247052, |
| "grad_norm": 0.37345463037490845, |
| "learning_rate": 1.7005073330208881e-06, |
| "loss": 0.4515, |
| "step": 898 |
| }, |
| { |
| "epoch": 0.757158899494666, |
| "grad_norm": 0.34140193462371826, |
| "learning_rate": 1.689470825715998e-06, |
| "loss": 0.4511, |
| "step": 899 |
| }, |
| { |
| "epoch": 0.7580011229646266, |
| "grad_norm": 0.33038443326950073, |
| "learning_rate": 1.6784629637751814e-06, |
| "loss": 0.4395, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.7588433464345873, |
| "grad_norm": 0.35559070110321045, |
| "learning_rate": 1.6674838424473172e-06, |
| "loss": 0.4892, |
| "step": 901 |
| }, |
| { |
| "epoch": 0.759685569904548, |
| "grad_norm": 0.34186041355133057, |
| "learning_rate": 1.6565335567326112e-06, |
| "loss": 0.4472, |
| "step": 902 |
| }, |
| { |
| "epoch": 0.7605277933745087, |
| "grad_norm": 0.36697492003440857, |
| "learning_rate": 1.6456122013817477e-06, |
| "loss": 0.4597, |
| "step": 903 |
| }, |
| { |
| "epoch": 0.7613700168444694, |
| "grad_norm": 0.3907342553138733, |
| "learning_rate": 1.6347198708950884e-06, |
| "loss": 0.4845, |
| "step": 904 |
| }, |
| { |
| "epoch": 0.7622122403144301, |
| "grad_norm": 0.3752238154411316, |
| "learning_rate": 1.6238566595218475e-06, |
| "loss": 0.4525, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.7630544637843908, |
| "grad_norm": 0.41171255707740784, |
| "learning_rate": 1.6130226612592787e-06, |
| "loss": 0.4556, |
| "step": 906 |
| }, |
| { |
| "epoch": 0.7638966872543514, |
| "grad_norm": 0.3502821922302246, |
| "learning_rate": 1.6022179698518525e-06, |
| "loss": 0.4688, |
| "step": 907 |
| }, |
| { |
| "epoch": 0.7647389107243122, |
| "grad_norm": 0.3488776683807373, |
| "learning_rate": 1.591442678790467e-06, |
| "loss": 0.4798, |
| "step": 908 |
| }, |
| { |
| "epoch": 0.7655811341942729, |
| "grad_norm": 0.36690619587898254, |
| "learning_rate": 1.580696881311611e-06, |
| "loss": 0.4869, |
| "step": 909 |
| }, |
| { |
| "epoch": 0.7664233576642335, |
| "grad_norm": 0.3431231677532196, |
| "learning_rate": 1.5699806703965787e-06, |
| "loss": 0.4285, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.7672655811341943, |
| "grad_norm": 0.35018178820610046, |
| "learning_rate": 1.5592941387706562e-06, |
| "loss": 0.4608, |
| "step": 911 |
| }, |
| { |
| "epoch": 0.768107804604155, |
| "grad_norm": 0.36093589663505554, |
| "learning_rate": 1.5486373789023206e-06, |
| "loss": 0.4785, |
| "step": 912 |
| }, |
| { |
| "epoch": 0.7689500280741156, |
| "grad_norm": 0.3167992830276489, |
| "learning_rate": 1.538010483002435e-06, |
| "loss": 0.445, |
| "step": 913 |
| }, |
| { |
| "epoch": 0.7697922515440764, |
| "grad_norm": 0.37118929624557495, |
| "learning_rate": 1.5274135430234654e-06, |
| "loss": 0.4591, |
| "step": 914 |
| }, |
| { |
| "epoch": 0.770634475014037, |
| "grad_norm": 0.3630264699459076, |
| "learning_rate": 1.5168466506586654e-06, |
| "loss": 0.4588, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.7714766984839978, |
| "grad_norm": 0.33517614006996155, |
| "learning_rate": 1.506309897341297e-06, |
| "loss": 0.4644, |
| "step": 916 |
| }, |
| { |
| "epoch": 0.7723189219539585, |
| "grad_norm": 0.35454773902893066, |
| "learning_rate": 1.4958033742438348e-06, |
| "loss": 0.4808, |
| "step": 917 |
| }, |
| { |
| "epoch": 0.7731611454239191, |
| "grad_norm": 0.3116423487663269, |
| "learning_rate": 1.4853271722771772e-06, |
| "loss": 0.4403, |
| "step": 918 |
| }, |
| { |
| "epoch": 0.7740033688938799, |
| "grad_norm": 0.3462409973144531, |
| "learning_rate": 1.4748813820898554e-06, |
| "loss": 0.434, |
| "step": 919 |
| }, |
| { |
| "epoch": 0.7748455923638405, |
| "grad_norm": 0.3144718110561371, |
| "learning_rate": 1.4644660940672628e-06, |
| "loss": 0.4408, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.7756878158338012, |
| "grad_norm": 0.3506813049316406, |
| "learning_rate": 1.454081398330855e-06, |
| "loss": 0.4548, |
| "step": 921 |
| }, |
| { |
| "epoch": 0.776530039303762, |
| "grad_norm": 0.3580954074859619, |
| "learning_rate": 1.4437273847373778e-06, |
| "loss": 0.4638, |
| "step": 922 |
| }, |
| { |
| "epoch": 0.7773722627737226, |
| "grad_norm": 0.3401355445384979, |
| "learning_rate": 1.4334041428781003e-06, |
| "loss": 0.4827, |
| "step": 923 |
| }, |
| { |
| "epoch": 0.7782144862436833, |
| "grad_norm": 0.34176456928253174, |
| "learning_rate": 1.4231117620780188e-06, |
| "loss": 0.5025, |
| "step": 924 |
| }, |
| { |
| "epoch": 0.779056709713644, |
| "grad_norm": 0.3647969663143158, |
| "learning_rate": 1.4128503313951008e-06, |
| "loss": 0.4278, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.7798989331836047, |
| "grad_norm": 0.3605908751487732, |
| "learning_rate": 1.4026199396195078e-06, |
| "loss": 0.4816, |
| "step": 926 |
| }, |
| { |
| "epoch": 0.7807411566535654, |
| "grad_norm": 0.33657947182655334, |
| "learning_rate": 1.3924206752728282e-06, |
| "loss": 0.4488, |
| "step": 927 |
| }, |
| { |
| "epoch": 0.7815833801235261, |
| "grad_norm": 0.3494366407394409, |
| "learning_rate": 1.3822526266073044e-06, |
| "loss": 0.446, |
| "step": 928 |
| }, |
| { |
| "epoch": 0.7824256035934868, |
| "grad_norm": 0.4006534516811371, |
| "learning_rate": 1.3721158816050872e-06, |
| "loss": 0.4588, |
| "step": 929 |
| }, |
| { |
| "epoch": 0.7832678270634476, |
| "grad_norm": 0.36393067240715027, |
| "learning_rate": 1.3620105279774532e-06, |
| "loss": 0.4539, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.7841100505334082, |
| "grad_norm": 0.369555801153183, |
| "learning_rate": 1.3519366531640589e-06, |
| "loss": 0.4803, |
| "step": 931 |
| }, |
| { |
| "epoch": 0.7849522740033689, |
| "grad_norm": 0.31321248412132263, |
| "learning_rate": 1.3418943443321807e-06, |
| "loss": 0.475, |
| "step": 932 |
| }, |
| { |
| "epoch": 0.7857944974733296, |
| "grad_norm": 0.3947639763355255, |
| "learning_rate": 1.3318836883759634e-06, |
| "loss": 0.4564, |
| "step": 933 |
| }, |
| { |
| "epoch": 0.7866367209432903, |
| "grad_norm": 0.3348684012889862, |
| "learning_rate": 1.3219047719156575e-06, |
| "loss": 0.4617, |
| "step": 934 |
| }, |
| { |
| "epoch": 0.787478944413251, |
| "grad_norm": 0.35532504320144653, |
| "learning_rate": 1.3119576812968893e-06, |
| "loss": 0.477, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.7883211678832117, |
| "grad_norm": 0.35622647404670715, |
| "learning_rate": 1.3020425025898926e-06, |
| "loss": 0.4599, |
| "step": 936 |
| }, |
| { |
| "epoch": 0.7891633913531724, |
| "grad_norm": 0.3481491208076477, |
| "learning_rate": 1.292159321588778e-06, |
| "loss": 0.4724, |
| "step": 937 |
| }, |
| { |
| "epoch": 0.790005614823133, |
| "grad_norm": 0.377271443605423, |
| "learning_rate": 1.282308223810786e-06, |
| "loss": 0.4607, |
| "step": 938 |
| }, |
| { |
| "epoch": 0.7908478382930938, |
| "grad_norm": 0.35245975852012634, |
| "learning_rate": 1.272489294495548e-06, |
| "loss": 0.4545, |
| "step": 939 |
| }, |
| { |
| "epoch": 0.7916900617630545, |
| "grad_norm": 0.3466741144657135, |
| "learning_rate": 1.2627026186043423e-06, |
| "loss": 0.434, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.7925322852330151, |
| "grad_norm": 0.3157729506492615, |
| "learning_rate": 1.252948280819375e-06, |
| "loss": 0.4306, |
| "step": 941 |
| }, |
| { |
| "epoch": 0.7933745087029759, |
| "grad_norm": 0.339692085981369, |
| "learning_rate": 1.243226365543026e-06, |
| "loss": 0.4624, |
| "step": 942 |
| }, |
| { |
| "epoch": 0.7942167321729365, |
| "grad_norm": 0.3418128192424774, |
| "learning_rate": 1.2335369568971362e-06, |
| "loss": 0.4604, |
| "step": 943 |
| }, |
| { |
| "epoch": 0.7950589556428973, |
| "grad_norm": 0.33322200179100037, |
| "learning_rate": 1.2238801387222716e-06, |
| "loss": 0.4623, |
| "step": 944 |
| }, |
| { |
| "epoch": 0.795901179112858, |
| "grad_norm": 0.3198584020137787, |
| "learning_rate": 1.2142559945769995e-06, |
| "loss": 0.4588, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.7967434025828186, |
| "grad_norm": 0.3580982983112335, |
| "learning_rate": 1.2046646077371615e-06, |
| "loss": 0.4891, |
| "step": 946 |
| }, |
| { |
| "epoch": 0.7975856260527794, |
| "grad_norm": 0.2945074141025543, |
| "learning_rate": 1.1951060611951615e-06, |
| "loss": 0.4566, |
| "step": 947 |
| }, |
| { |
| "epoch": 0.79842784952274, |
| "grad_norm": 0.3214232921600342, |
| "learning_rate": 1.185580437659241e-06, |
| "loss": 0.4424, |
| "step": 948 |
| }, |
| { |
| "epoch": 0.7992700729927007, |
| "grad_norm": 0.3181054890155792, |
| "learning_rate": 1.1760878195527642e-06, |
| "loss": 0.4495, |
| "step": 949 |
| }, |
| { |
| "epoch": 0.8001122964626615, |
| "grad_norm": 0.3124549686908722, |
| "learning_rate": 1.1666282890135083e-06, |
| "loss": 0.4596, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.8009545199326221, |
| "grad_norm": 0.3557204008102417, |
| "learning_rate": 1.1572019278929457e-06, |
| "loss": 0.4542, |
| "step": 951 |
| }, |
| { |
| "epoch": 0.8017967434025828, |
| "grad_norm": 0.3370462954044342, |
| "learning_rate": 1.147808817755544e-06, |
| "loss": 0.4626, |
| "step": 952 |
| }, |
| { |
| "epoch": 0.8026389668725435, |
| "grad_norm": 0.3505118489265442, |
| "learning_rate": 1.1384490398780563e-06, |
| "loss": 0.463, |
| "step": 953 |
| }, |
| { |
| "epoch": 0.8034811903425042, |
| "grad_norm": 0.36364004015922546, |
| "learning_rate": 1.129122675248816e-06, |
| "loss": 0.4577, |
| "step": 954 |
| }, |
| { |
| "epoch": 0.8043234138124649, |
| "grad_norm": 0.3689129650592804, |
| "learning_rate": 1.1198298045670402e-06, |
| "loss": 0.4615, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.8051656372824256, |
| "grad_norm": 0.37028342485427856, |
| "learning_rate": 1.1105705082421303e-06, |
| "loss": 0.4859, |
| "step": 956 |
| }, |
| { |
| "epoch": 0.8060078607523863, |
| "grad_norm": 0.3648272752761841, |
| "learning_rate": 1.1013448663929704e-06, |
| "loss": 0.4671, |
| "step": 957 |
| }, |
| { |
| "epoch": 0.8068500842223469, |
| "grad_norm": 0.345866322517395, |
| "learning_rate": 1.0921529588472446e-06, |
| "loss": 0.4561, |
| "step": 958 |
| }, |
| { |
| "epoch": 0.8076923076923077, |
| "grad_norm": 0.3294440507888794, |
| "learning_rate": 1.0829948651407374e-06, |
| "loss": 0.4804, |
| "step": 959 |
| }, |
| { |
| "epoch": 0.8085345311622684, |
| "grad_norm": 0.3212808668613434, |
| "learning_rate": 1.0738706645166508e-06, |
| "loss": 0.4342, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.8093767546322291, |
| "grad_norm": 0.33868685364723206, |
| "learning_rate": 1.0647804359249143e-06, |
| "loss": 0.4843, |
| "step": 961 |
| }, |
| { |
| "epoch": 0.8102189781021898, |
| "grad_norm": 0.36153316497802734, |
| "learning_rate": 1.0557242580215066e-06, |
| "loss": 0.4651, |
| "step": 962 |
| }, |
| { |
| "epoch": 0.8110612015721504, |
| "grad_norm": 0.33889979124069214, |
| "learning_rate": 1.0467022091677692e-06, |
| "loss": 0.4553, |
| "step": 963 |
| }, |
| { |
| "epoch": 0.8119034250421112, |
| "grad_norm": 0.34202301502227783, |
| "learning_rate": 1.037714367429734e-06, |
| "loss": 0.506, |
| "step": 964 |
| }, |
| { |
| "epoch": 0.8127456485120719, |
| "grad_norm": 0.3445115089416504, |
| "learning_rate": 1.0287608105774456e-06, |
| "loss": 0.4386, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.8135878719820325, |
| "grad_norm": 0.34236183762550354, |
| "learning_rate": 1.019841616084286e-06, |
| "loss": 0.4441, |
| "step": 966 |
| }, |
| { |
| "epoch": 0.8144300954519933, |
| "grad_norm": 0.3376852869987488, |
| "learning_rate": 1.0109568611263094e-06, |
| "loss": 0.4643, |
| "step": 967 |
| }, |
| { |
| "epoch": 0.815272318921954, |
| "grad_norm": 0.33317050337791443, |
| "learning_rate": 1.002106622581569e-06, |
| "loss": 0.4628, |
| "step": 968 |
| }, |
| { |
| "epoch": 0.8161145423919146, |
| "grad_norm": 0.3268939256668091, |
| "learning_rate": 9.932909770294542e-07, |
| "loss": 0.4513, |
| "step": 969 |
| }, |
| { |
| "epoch": 0.8169567658618754, |
| "grad_norm": 0.32083117961883545, |
| "learning_rate": 9.845100007500292e-07, |
| "loss": 0.4724, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.817798989331836, |
| "grad_norm": 0.36022859811782837, |
| "learning_rate": 9.757637697233723e-07, |
| "loss": 0.4802, |
| "step": 971 |
| }, |
| { |
| "epoch": 0.8186412128017967, |
| "grad_norm": 0.34767282009124756, |
| "learning_rate": 9.670523596289138e-07, |
| "loss": 0.4534, |
| "step": 972 |
| }, |
| { |
| "epoch": 0.8194834362717575, |
| "grad_norm": 0.3325275778770447, |
| "learning_rate": 9.58375845844793e-07, |
| "loss": 0.4681, |
| "step": 973 |
| }, |
| { |
| "epoch": 0.8203256597417181, |
| "grad_norm": 0.3268345296382904, |
| "learning_rate": 9.497343034471896e-07, |
| "loss": 0.4752, |
| "step": 974 |
| }, |
| { |
| "epoch": 0.8211678832116789, |
| "grad_norm": 0.31763315200805664, |
| "learning_rate": 9.41127807209688e-07, |
| "loss": 0.4296, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.8220101066816395, |
| "grad_norm": 0.35879212617874146, |
| "learning_rate": 9.325564316026236e-07, |
| "loss": 0.4732, |
| "step": 976 |
| }, |
| { |
| "epoch": 0.8228523301516002, |
| "grad_norm": 0.3344736695289612, |
| "learning_rate": 9.240202507924412e-07, |
| "loss": 0.456, |
| "step": 977 |
| }, |
| { |
| "epoch": 0.823694553621561, |
| "grad_norm": 0.3530421555042267, |
| "learning_rate": 9.155193386410466e-07, |
| "loss": 0.4732, |
| "step": 978 |
| }, |
| { |
| "epoch": 0.8245367770915216, |
| "grad_norm": 0.32757529616355896, |
| "learning_rate": 9.070537687051817e-07, |
| "loss": 0.4557, |
| "step": 979 |
| }, |
| { |
| "epoch": 0.8253790005614823, |
| "grad_norm": 0.31608912348747253, |
| "learning_rate": 8.986236142357707e-07, |
| "loss": 0.4505, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.826221224031443, |
| "grad_norm": 0.32184287905693054, |
| "learning_rate": 8.902289481772996e-07, |
| "loss": 0.4261, |
| "step": 981 |
| }, |
| { |
| "epoch": 0.8270634475014037, |
| "grad_norm": 0.36576586961746216, |
| "learning_rate": 8.818698431671774e-07, |
| "loss": 0.4738, |
| "step": 982 |
| }, |
| { |
| "epoch": 0.8279056709713644, |
| "grad_norm": 0.34923386573791504, |
| "learning_rate": 8.735463715351139e-07, |
| "loss": 0.4699, |
| "step": 983 |
| }, |
| { |
| "epoch": 0.8287478944413251, |
| "grad_norm": 0.3303389549255371, |
| "learning_rate": 8.652586053024836e-07, |
| "loss": 0.447, |
| "step": 984 |
| }, |
| { |
| "epoch": 0.8295901179112858, |
| "grad_norm": 0.3435465097427368, |
| "learning_rate": 8.570066161817176e-07, |
| "loss": 0.429, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.8304323413812464, |
| "grad_norm": 0.30803781747817993, |
| "learning_rate": 8.487904755756676e-07, |
| "loss": 0.4628, |
| "step": 986 |
| }, |
| { |
| "epoch": 0.8312745648512072, |
| "grad_norm": 0.3358916640281677, |
| "learning_rate": 8.406102545769989e-07, |
| "loss": 0.4662, |
| "step": 987 |
| }, |
| { |
| "epoch": 0.8321167883211679, |
| "grad_norm": 0.3121514618396759, |
| "learning_rate": 8.324660239675697e-07, |
| "loss": 0.4482, |
| "step": 988 |
| }, |
| { |
| "epoch": 0.8329590117911286, |
| "grad_norm": 0.33737143874168396, |
| "learning_rate": 8.243578542178227e-07, |
| "loss": 0.4405, |
| "step": 989 |
| }, |
| { |
| "epoch": 0.8338012352610893, |
| "grad_norm": 0.34829044342041016, |
| "learning_rate": 8.16285815486168e-07, |
| "loss": 0.4804, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.83464345873105, |
| "grad_norm": 0.352693110704422, |
| "learning_rate": 8.082499776183883e-07, |
| "loss": 0.4785, |
| "step": 991 |
| }, |
| { |
| "epoch": 0.8354856822010107, |
| "grad_norm": 0.31321579217910767, |
| "learning_rate": 8.002504101470204e-07, |
| "loss": 0.4538, |
| "step": 992 |
| }, |
| { |
| "epoch": 0.8363279056709714, |
| "grad_norm": 0.2995845079421997, |
| "learning_rate": 7.922871822907641e-07, |
| "loss": 0.4467, |
| "step": 993 |
| }, |
| { |
| "epoch": 0.837170129140932, |
| "grad_norm": 0.32853782176971436, |
| "learning_rate": 7.843603629538804e-07, |
| "loss": 0.488, |
| "step": 994 |
| }, |
| { |
| "epoch": 0.8380123526108928, |
| "grad_norm": 0.31039342284202576, |
| "learning_rate": 7.764700207255904e-07, |
| "loss": 0.4746, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.8388545760808535, |
| "grad_norm": 0.3386421501636505, |
| "learning_rate": 7.686162238794898e-07, |
| "loss": 0.4655, |
| "step": 996 |
| }, |
| { |
| "epoch": 0.8396967995508141, |
| "grad_norm": 0.34342360496520996, |
| "learning_rate": 7.607990403729526e-07, |
| "loss": 0.4375, |
| "step": 997 |
| }, |
| { |
| "epoch": 0.8405390230207749, |
| "grad_norm": 0.3342914581298828, |
| "learning_rate": 7.530185378465459e-07, |
| "loss": 0.4417, |
| "step": 998 |
| }, |
| { |
| "epoch": 0.8413812464907355, |
| "grad_norm": 0.3670804798603058, |
| "learning_rate": 7.452747836234392e-07, |
| "loss": 0.472, |
| "step": 999 |
| }, |
| { |
| "epoch": 0.8422234699606962, |
| "grad_norm": 0.32072174549102783, |
| "learning_rate": 7.375678447088347e-07, |
| "loss": 0.4538, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.843065693430657, |
| "grad_norm": 0.3503890037536621, |
| "learning_rate": 7.298977877893688e-07, |
| "loss": 0.4648, |
| "step": 1001 |
| }, |
| { |
| "epoch": 0.8439079169006176, |
| "grad_norm": 0.3428994417190552, |
| "learning_rate": 7.222646792325516e-07, |
| "loss": 0.4639, |
| "step": 1002 |
| }, |
| { |
| "epoch": 0.8447501403705783, |
| "grad_norm": 0.3595855236053467, |
| "learning_rate": 7.146685850861851e-07, |
| "loss": 0.4595, |
| "step": 1003 |
| }, |
| { |
| "epoch": 0.845592363840539, |
| "grad_norm": 0.34215492010116577, |
| "learning_rate": 7.071095710777925e-07, |
| "loss": 0.4493, |
| "step": 1004 |
| }, |
| { |
| "epoch": 0.8464345873104997, |
| "grad_norm": 0.3216904401779175, |
| "learning_rate": 6.995877026140468e-07, |
| "loss": 0.4516, |
| "step": 1005 |
| }, |
| { |
| "epoch": 0.8472768107804605, |
| "grad_norm": 0.33294597268104553, |
| "learning_rate": 6.921030447802146e-07, |
| "loss": 0.4595, |
| "step": 1006 |
| }, |
| { |
| "epoch": 0.8481190342504211, |
| "grad_norm": 0.3170475959777832, |
| "learning_rate": 6.846556623395795e-07, |
| "loss": 0.4438, |
| "step": 1007 |
| }, |
| { |
| "epoch": 0.8489612577203818, |
| "grad_norm": 0.3171912133693695, |
| "learning_rate": 6.772456197328919e-07, |
| "loss": 0.4724, |
| "step": 1008 |
| }, |
| { |
| "epoch": 0.8498034811903425, |
| "grad_norm": 0.34563779830932617, |
| "learning_rate": 6.698729810778065e-07, |
| "loss": 0.4691, |
| "step": 1009 |
| }, |
| { |
| "epoch": 0.8506457046603032, |
| "grad_norm": 0.32657885551452637, |
| "learning_rate": 6.625378101683317e-07, |
| "loss": 0.4664, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.8514879281302639, |
| "grad_norm": 0.35269269347190857, |
| "learning_rate": 6.552401704742678e-07, |
| "loss": 0.4788, |
| "step": 1011 |
| }, |
| { |
| "epoch": 0.8523301516002246, |
| "grad_norm": 0.3336371183395386, |
| "learning_rate": 6.479801251406748e-07, |
| "loss": 0.4678, |
| "step": 1012 |
| }, |
| { |
| "epoch": 0.8531723750701853, |
| "grad_norm": 0.3277773857116699, |
| "learning_rate": 6.40757736987307e-07, |
| "loss": 0.4671, |
| "step": 1013 |
| }, |
| { |
| "epoch": 0.8540145985401459, |
| "grad_norm": 0.3333572447299957, |
| "learning_rate": 6.335730685080838e-07, |
| "loss": 0.4708, |
| "step": 1014 |
| }, |
| { |
| "epoch": 0.8548568220101067, |
| "grad_norm": 0.3374318778514862, |
| "learning_rate": 6.26426181870542e-07, |
| "loss": 0.4384, |
| "step": 1015 |
| }, |
| { |
| "epoch": 0.8556990454800674, |
| "grad_norm": 0.3308733105659485, |
| "learning_rate": 6.193171389152996e-07, |
| "loss": 0.4482, |
| "step": 1016 |
| }, |
| { |
| "epoch": 0.856541268950028, |
| "grad_norm": 0.347648561000824, |
| "learning_rate": 6.122460011555187e-07, |
| "loss": 0.4679, |
| "step": 1017 |
| }, |
| { |
| "epoch": 0.8573834924199888, |
| "grad_norm": 0.3401923179626465, |
| "learning_rate": 6.052128297763804e-07, |
| "loss": 0.5031, |
| "step": 1018 |
| }, |
| { |
| "epoch": 0.8582257158899494, |
| "grad_norm": 0.329550176858902, |
| "learning_rate": 5.982176856345445e-07, |
| "loss": 0.4705, |
| "step": 1019 |
| }, |
| { |
| "epoch": 0.8590679393599102, |
| "grad_norm": 0.3228277862071991, |
| "learning_rate": 5.912606292576284e-07, |
| "loss": 0.4551, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.8599101628298709, |
| "grad_norm": 0.3513161242008209, |
| "learning_rate": 5.843417208436908e-07, |
| "loss": 0.4995, |
| "step": 1021 |
| }, |
| { |
| "epoch": 0.8607523862998315, |
| "grad_norm": 0.33879998326301575, |
| "learning_rate": 5.774610202606939e-07, |
| "loss": 0.4468, |
| "step": 1022 |
| }, |
| { |
| "epoch": 0.8615946097697923, |
| "grad_norm": 0.3087005019187927, |
| "learning_rate": 5.706185870460018e-07, |
| "loss": 0.4404, |
| "step": 1023 |
| }, |
| { |
| "epoch": 0.862436833239753, |
| "grad_norm": 0.334729939699173, |
| "learning_rate": 5.63814480405856e-07, |
| "loss": 0.4747, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.8632790567097136, |
| "grad_norm": 0.3216525614261627, |
| "learning_rate": 5.570487592148666e-07, |
| "loss": 0.4475, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.8641212801796744, |
| "grad_norm": 0.3208960294723511, |
| "learning_rate": 5.503214820154979e-07, |
| "loss": 0.462, |
| "step": 1026 |
| }, |
| { |
| "epoch": 0.864963503649635, |
| "grad_norm": 0.35795074701309204, |
| "learning_rate": 5.436327070175729e-07, |
| "loss": 0.4573, |
| "step": 1027 |
| }, |
| { |
| "epoch": 0.8658057271195957, |
| "grad_norm": 0.33961227536201477, |
| "learning_rate": 5.369824920977567e-07, |
| "loss": 0.4597, |
| "step": 1028 |
| }, |
| { |
| "epoch": 0.8666479505895565, |
| "grad_norm": 0.32470422983169556, |
| "learning_rate": 5.303708947990638e-07, |
| "loss": 0.4706, |
| "step": 1029 |
| }, |
| { |
| "epoch": 0.8674901740595171, |
| "grad_norm": 0.31693166494369507, |
| "learning_rate": 5.237979723303582e-07, |
| "loss": 0.4491, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.8683323975294778, |
| "grad_norm": 0.33701324462890625, |
| "learning_rate": 5.172637815658583e-07, |
| "loss": 0.471, |
| "step": 1031 |
| }, |
| { |
| "epoch": 0.8691746209994385, |
| "grad_norm": 0.3477896451950073, |
| "learning_rate": 5.107683790446411e-07, |
| "loss": 0.465, |
| "step": 1032 |
| }, |
| { |
| "epoch": 0.8700168444693992, |
| "grad_norm": 0.3382064700126648, |
| "learning_rate": 5.04311820970163e-07, |
| "loss": 0.5033, |
| "step": 1033 |
| }, |
| { |
| "epoch": 0.87085906793936, |
| "grad_norm": 0.33173805475234985, |
| "learning_rate": 4.978941632097612e-07, |
| "loss": 0.4809, |
| "step": 1034 |
| }, |
| { |
| "epoch": 0.8717012914093206, |
| "grad_norm": 0.3257426917552948, |
| "learning_rate": 4.915154612941781e-07, |
| "loss": 0.4539, |
| "step": 1035 |
| }, |
| { |
| "epoch": 0.8725435148792813, |
| "grad_norm": 0.3181188404560089, |
| "learning_rate": 4.851757704170796e-07, |
| "loss": 0.4636, |
| "step": 1036 |
| }, |
| { |
| "epoch": 0.873385738349242, |
| "grad_norm": 0.3330731689929962, |
| "learning_rate": 4.788751454345763e-07, |
| "loss": 0.4539, |
| "step": 1037 |
| }, |
| { |
| "epoch": 0.8742279618192027, |
| "grad_norm": 0.3220534324645996, |
| "learning_rate": 4.726136408647464e-07, |
| "loss": 0.4509, |
| "step": 1038 |
| }, |
| { |
| "epoch": 0.8750701852891634, |
| "grad_norm": 0.30984482169151306, |
| "learning_rate": 4.663913108871726e-07, |
| "loss": 0.432, |
| "step": 1039 |
| }, |
| { |
| "epoch": 0.8759124087591241, |
| "grad_norm": 0.3303835988044739, |
| "learning_rate": 4.60208209342462e-07, |
| "loss": 0.45, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.8767546322290848, |
| "grad_norm": 0.3490801453590393, |
| "learning_rate": 4.540643897317887e-07, |
| "loss": 0.4558, |
| "step": 1041 |
| }, |
| { |
| "epoch": 0.8775968556990454, |
| "grad_norm": 0.31086137890815735, |
| "learning_rate": 4.4795990521642684e-07, |
| "loss": 0.4454, |
| "step": 1042 |
| }, |
| { |
| "epoch": 0.8784390791690062, |
| "grad_norm": 0.34392303228378296, |
| "learning_rate": 4.4189480861729137e-07, |
| "loss": 0.4594, |
| "step": 1043 |
| }, |
| { |
| "epoch": 0.8792813026389669, |
| "grad_norm": 0.3247262239456177, |
| "learning_rate": 4.35869152414482e-07, |
| "loss": 0.4592, |
| "step": 1044 |
| }, |
| { |
| "epoch": 0.8801235261089275, |
| "grad_norm": 0.34201639890670776, |
| "learning_rate": 4.2988298874682754e-07, |
| "loss": 0.441, |
| "step": 1045 |
| }, |
| { |
| "epoch": 0.8809657495788883, |
| "grad_norm": 0.35518065094947815, |
| "learning_rate": 4.239363694114368e-07, |
| "loss": 0.4735, |
| "step": 1046 |
| }, |
| { |
| "epoch": 0.881807973048849, |
| "grad_norm": 0.3133198916912079, |
| "learning_rate": 4.1802934586324897e-07, |
| "loss": 0.4543, |
| "step": 1047 |
| }, |
| { |
| "epoch": 0.8826501965188096, |
| "grad_norm": 0.3672572076320648, |
| "learning_rate": 4.1216196921458786e-07, |
| "loss": 0.4466, |
| "step": 1048 |
| }, |
| { |
| "epoch": 0.8834924199887704, |
| "grad_norm": 0.3202572464942932, |
| "learning_rate": 4.0633429023472004e-07, |
| "loss": 0.4643, |
| "step": 1049 |
| }, |
| { |
| "epoch": 0.884334643458731, |
| "grad_norm": 0.33787569403648376, |
| "learning_rate": 4.0054635934941633e-07, |
| "loss": 0.4664, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.8851768669286918, |
| "grad_norm": 0.3472073972225189, |
| "learning_rate": 3.947982266405159e-07, |
| "loss": 0.4589, |
| "step": 1051 |
| }, |
| { |
| "epoch": 0.8860190903986525, |
| "grad_norm": 0.3364206850528717, |
| "learning_rate": 3.890899418454913e-07, |
| "loss": 0.451, |
| "step": 1052 |
| }, |
| { |
| "epoch": 0.8868613138686131, |
| "grad_norm": 0.3336820602416992, |
| "learning_rate": 3.834215543570191e-07, |
| "loss": 0.4541, |
| "step": 1053 |
| }, |
| { |
| "epoch": 0.8877035373385739, |
| "grad_norm": 0.35087835788726807, |
| "learning_rate": 3.777931132225526e-07, |
| "loss": 0.4952, |
| "step": 1054 |
| }, |
| { |
| "epoch": 0.8885457608085345, |
| "grad_norm": 0.36997100710868835, |
| "learning_rate": 3.72204667143895e-07, |
| "loss": 0.4624, |
| "step": 1055 |
| }, |
| { |
| "epoch": 0.8893879842784952, |
| "grad_norm": 0.34807565808296204, |
| "learning_rate": 3.666562644767824e-07, |
| "loss": 0.48, |
| "step": 1056 |
| }, |
| { |
| "epoch": 0.890230207748456, |
| "grad_norm": 0.300436407327652, |
| "learning_rate": 3.611479532304618e-07, |
| "loss": 0.4361, |
| "step": 1057 |
| }, |
| { |
| "epoch": 0.8910724312184166, |
| "grad_norm": 0.3128056824207306, |
| "learning_rate": 3.556797810672785e-07, |
| "loss": 0.4511, |
| "step": 1058 |
| }, |
| { |
| "epoch": 0.8919146546883773, |
| "grad_norm": 0.3215906322002411, |
| "learning_rate": 3.5025179530225995e-07, |
| "loss": 0.4541, |
| "step": 1059 |
| }, |
| { |
| "epoch": 0.892756878158338, |
| "grad_norm": 0.3172469139099121, |
| "learning_rate": 3.4486404290271115e-07, |
| "loss": 0.4803, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.8935991016282987, |
| "grad_norm": 0.31866055727005005, |
| "learning_rate": 3.395165704878023e-07, |
| "loss": 0.4688, |
| "step": 1061 |
| }, |
| { |
| "epoch": 0.8944413250982594, |
| "grad_norm": 0.3102911710739136, |
| "learning_rate": 3.3420942432817127e-07, |
| "loss": 0.46, |
| "step": 1062 |
| }, |
| { |
| "epoch": 0.8952835485682201, |
| "grad_norm": 0.2940250039100647, |
| "learning_rate": 3.289426503455201e-07, |
| "loss": 0.4482, |
| "step": 1063 |
| }, |
| { |
| "epoch": 0.8961257720381808, |
| "grad_norm": 0.34147220849990845, |
| "learning_rate": 3.237162941122185e-07, |
| "loss": 0.4358, |
| "step": 1064 |
| }, |
| { |
| "epoch": 0.8969679955081415, |
| "grad_norm": 0.32565441727638245, |
| "learning_rate": 3.185304008509077e-07, |
| "loss": 0.4543, |
| "step": 1065 |
| }, |
| { |
| "epoch": 0.8978102189781022, |
| "grad_norm": 0.34025701880455017, |
| "learning_rate": 3.133850154341139e-07, |
| "loss": 0.4523, |
| "step": 1066 |
| }, |
| { |
| "epoch": 0.8986524424480629, |
| "grad_norm": 0.3196007311344147, |
| "learning_rate": 3.082801823838527e-07, |
| "loss": 0.43, |
| "step": 1067 |
| }, |
| { |
| "epoch": 0.8994946659180236, |
| "grad_norm": 0.3036392331123352, |
| "learning_rate": 3.0321594587125083e-07, |
| "loss": 0.4414, |
| "step": 1068 |
| }, |
| { |
| "epoch": 0.9003368893879843, |
| "grad_norm": 0.32302045822143555, |
| "learning_rate": 2.9819234971616154e-07, |
| "loss": 0.4655, |
| "step": 1069 |
| }, |
| { |
| "epoch": 0.9011791128579449, |
| "grad_norm": 0.3299964368343353, |
| "learning_rate": 2.932094373867811e-07, |
| "loss": 0.4653, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.9020213363279057, |
| "grad_norm": 0.32102474570274353, |
| "learning_rate": 2.882672519992824e-07, |
| "loss": 0.434, |
| "step": 1071 |
| }, |
| { |
| "epoch": 0.9028635597978664, |
| "grad_norm": 0.3391880393028259, |
| "learning_rate": 2.833658363174302e-07, |
| "loss": 0.4677, |
| "step": 1072 |
| }, |
| { |
| "epoch": 0.903705783267827, |
| "grad_norm": 0.318908154964447, |
| "learning_rate": 2.785052327522214e-07, |
| "loss": 0.4661, |
| "step": 1073 |
| }, |
| { |
| "epoch": 0.9045480067377878, |
| "grad_norm": 0.37129920721054077, |
| "learning_rate": 2.73685483361511e-07, |
| "loss": 0.4942, |
| "step": 1074 |
| }, |
| { |
| "epoch": 0.9053902302077484, |
| "grad_norm": 0.3275892734527588, |
| "learning_rate": 2.6890662984965234e-07, |
| "loss": 0.4769, |
| "step": 1075 |
| }, |
| { |
| "epoch": 0.9062324536777091, |
| "grad_norm": 0.32207468152046204, |
| "learning_rate": 2.6416871356713224e-07, |
| "loss": 0.459, |
| "step": 1076 |
| }, |
| { |
| "epoch": 0.9070746771476699, |
| "grad_norm": 0.32156863808631897, |
| "learning_rate": 2.594717755102205e-07, |
| "loss": 0.4553, |
| "step": 1077 |
| }, |
| { |
| "epoch": 0.9079169006176305, |
| "grad_norm": 0.3304842710494995, |
| "learning_rate": 2.548158563206038e-07, |
| "loss": 0.4779, |
| "step": 1078 |
| }, |
| { |
| "epoch": 0.9087591240875912, |
| "grad_norm": 0.3306860029697418, |
| "learning_rate": 2.5020099628504603e-07, |
| "loss": 0.4543, |
| "step": 1079 |
| }, |
| { |
| "epoch": 0.909601347557552, |
| "grad_norm": 0.3527275621891022, |
| "learning_rate": 2.4562723533503084e-07, |
| "loss": 0.4604, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.9104435710275126, |
| "grad_norm": 0.33252066373825073, |
| "learning_rate": 2.4109461304642254e-07, |
| "loss": 0.4467, |
| "step": 1081 |
| }, |
| { |
| "epoch": 0.9112857944974734, |
| "grad_norm": 0.3353213667869568, |
| "learning_rate": 2.3660316863911682e-07, |
| "loss": 0.4306, |
| "step": 1082 |
| }, |
| { |
| "epoch": 0.912128017967434, |
| "grad_norm": 0.33303454518318176, |
| "learning_rate": 2.3215294097670927e-07, |
| "loss": 0.456, |
| "step": 1083 |
| }, |
| { |
| "epoch": 0.9129702414373947, |
| "grad_norm": 0.3394293487071991, |
| "learning_rate": 2.277439685661509e-07, |
| "loss": 0.4491, |
| "step": 1084 |
| }, |
| { |
| "epoch": 0.9138124649073555, |
| "grad_norm": 0.30794280767440796, |
| "learning_rate": 2.2337628955742263e-07, |
| "loss": 0.4618, |
| "step": 1085 |
| }, |
| { |
| "epoch": 0.9146546883773161, |
| "grad_norm": 0.31820741295814514, |
| "learning_rate": 2.1904994174319903e-07, |
| "loss": 0.4513, |
| "step": 1086 |
| }, |
| { |
| "epoch": 0.9154969118472768, |
| "grad_norm": 0.3417550325393677, |
| "learning_rate": 2.1476496255852685e-07, |
| "loss": 0.4437, |
| "step": 1087 |
| }, |
| { |
| "epoch": 0.9163391353172375, |
| "grad_norm": 0.3287929892539978, |
| "learning_rate": 2.1052138908049303e-07, |
| "loss": 0.4683, |
| "step": 1088 |
| }, |
| { |
| "epoch": 0.9171813587871982, |
| "grad_norm": 0.3297256529331207, |
| "learning_rate": 2.0631925802791608e-07, |
| "loss": 0.4547, |
| "step": 1089 |
| }, |
| { |
| "epoch": 0.9180235822571589, |
| "grad_norm": 0.33473873138427734, |
| "learning_rate": 2.0215860576101532e-07, |
| "loss": 0.4455, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.9188658057271196, |
| "grad_norm": 0.334439754486084, |
| "learning_rate": 1.9803946828110376e-07, |
| "loss": 0.4639, |
| "step": 1091 |
| }, |
| { |
| "epoch": 0.9197080291970803, |
| "grad_norm": 0.3432200849056244, |
| "learning_rate": 1.9396188123027736e-07, |
| "loss": 0.4752, |
| "step": 1092 |
| }, |
| { |
| "epoch": 0.9205502526670409, |
| "grad_norm": 0.3343367278575897, |
| "learning_rate": 1.8992587989110133e-07, |
| "loss": 0.4781, |
| "step": 1093 |
| }, |
| { |
| "epoch": 0.9213924761370017, |
| "grad_norm": 0.33942049741744995, |
| "learning_rate": 1.8593149918630927e-07, |
| "loss": 0.4673, |
| "step": 1094 |
| }, |
| { |
| "epoch": 0.9222346996069624, |
| "grad_norm": 0.32365378737449646, |
| "learning_rate": 1.8197877367849948e-07, |
| "loss": 0.482, |
| "step": 1095 |
| }, |
| { |
| "epoch": 0.9230769230769231, |
| "grad_norm": 0.34894439578056335, |
| "learning_rate": 1.7806773756983641e-07, |
| "loss": 0.4658, |
| "step": 1096 |
| }, |
| { |
| "epoch": 0.9239191465468838, |
| "grad_norm": 0.35942527651786804, |
| "learning_rate": 1.7419842470175196e-07, |
| "loss": 0.4569, |
| "step": 1097 |
| }, |
| { |
| "epoch": 0.9247613700168444, |
| "grad_norm": 0.31009694933891296, |
| "learning_rate": 1.7037086855465902e-07, |
| "loss": 0.4564, |
| "step": 1098 |
| }, |
| { |
| "epoch": 0.9256035934868052, |
| "grad_norm": 0.30984339118003845, |
| "learning_rate": 1.6658510224765333e-07, |
| "loss": 0.4567, |
| "step": 1099 |
| }, |
| { |
| "epoch": 0.9264458169567659, |
| "grad_norm": 0.3273439109325409, |
| "learning_rate": 1.6284115853823445e-07, |
| "loss": 0.454, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.9272880404267265, |
| "grad_norm": 0.3297070562839508, |
| "learning_rate": 1.5913906982201744e-07, |
| "loss": 0.4438, |
| "step": 1101 |
| }, |
| { |
| "epoch": 0.9281302638966873, |
| "grad_norm": 0.31525179743766785, |
| "learning_rate": 1.554788681324554e-07, |
| "loss": 0.4877, |
| "step": 1102 |
| }, |
| { |
| "epoch": 0.928972487366648, |
| "grad_norm": 0.3105153739452362, |
| "learning_rate": 1.5186058514055912e-07, |
| "loss": 0.4317, |
| "step": 1103 |
| }, |
| { |
| "epoch": 0.9298147108366086, |
| "grad_norm": 0.3106979429721832, |
| "learning_rate": 1.482842521546285e-07, |
| "loss": 0.4627, |
| "step": 1104 |
| }, |
| { |
| "epoch": 0.9306569343065694, |
| "grad_norm": 0.31040892004966736, |
| "learning_rate": 1.447499001199748e-07, |
| "loss": 0.4383, |
| "step": 1105 |
| }, |
| { |
| "epoch": 0.93149915777653, |
| "grad_norm": 0.33874398469924927, |
| "learning_rate": 1.4125755961865827e-07, |
| "loss": 0.4794, |
| "step": 1106 |
| }, |
| { |
| "epoch": 0.9323413812464907, |
| "grad_norm": 0.3188782036304474, |
| "learning_rate": 1.3780726086922103e-07, |
| "loss": 0.4523, |
| "step": 1107 |
| }, |
| { |
| "epoch": 0.9331836047164515, |
| "grad_norm": 0.326948344707489, |
| "learning_rate": 1.3439903372642615e-07, |
| "loss": 0.4595, |
| "step": 1108 |
| }, |
| { |
| "epoch": 0.9340258281864121, |
| "grad_norm": 0.3299093544483185, |
| "learning_rate": 1.3103290768099796e-07, |
| "loss": 0.4418, |
| "step": 1109 |
| }, |
| { |
| "epoch": 0.9348680516563729, |
| "grad_norm": 0.3170269727706909, |
| "learning_rate": 1.2770891185937106e-07, |
| "loss": 0.4831, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.9357102751263335, |
| "grad_norm": 0.3100402057170868, |
| "learning_rate": 1.244270750234333e-07, |
| "loss": 0.4914, |
| "step": 1111 |
| }, |
| { |
| "epoch": 0.9365524985962942, |
| "grad_norm": 0.31475040316581726, |
| "learning_rate": 1.2118742557027885e-07, |
| "loss": 0.4592, |
| "step": 1112 |
| }, |
| { |
| "epoch": 0.937394722066255, |
| "grad_norm": 0.32388702034950256, |
| "learning_rate": 1.1798999153196433e-07, |
| "loss": 0.4579, |
| "step": 1113 |
| }, |
| { |
| "epoch": 0.9382369455362156, |
| "grad_norm": 0.31572088599205017, |
| "learning_rate": 1.1483480057526364e-07, |
| "loss": 0.4435, |
| "step": 1114 |
| }, |
| { |
| "epoch": 0.9390791690061763, |
| "grad_norm": 0.3283679187297821, |
| "learning_rate": 1.1172188000142803e-07, |
| "loss": 0.4483, |
| "step": 1115 |
| }, |
| { |
| "epoch": 0.939921392476137, |
| "grad_norm": 0.32103320956230164, |
| "learning_rate": 1.0865125674595467e-07, |
| "loss": 0.4839, |
| "step": 1116 |
| }, |
| { |
| "epoch": 0.9407636159460977, |
| "grad_norm": 0.3113073706626892, |
| "learning_rate": 1.0562295737834738e-07, |
| "loss": 0.4756, |
| "step": 1117 |
| }, |
| { |
| "epoch": 0.9416058394160584, |
| "grad_norm": 0.3419308066368103, |
| "learning_rate": 1.026370081018907e-07, |
| "loss": 0.4271, |
| "step": 1118 |
| }, |
| { |
| "epoch": 0.9424480628860191, |
| "grad_norm": 0.4538518488407135, |
| "learning_rate": 9.969343475342285e-08, |
| "loss": 0.4584, |
| "step": 1119 |
| }, |
| { |
| "epoch": 0.9432902863559798, |
| "grad_norm": 0.35381177067756653, |
| "learning_rate": 9.679226280310982e-08, |
| "loss": 0.477, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.9441325098259404, |
| "grad_norm": 0.3468037545681, |
| "learning_rate": 9.393351735422773e-08, |
| "loss": 0.4517, |
| "step": 1121 |
| }, |
| { |
| "epoch": 0.9449747332959012, |
| "grad_norm": 0.3164037764072418, |
| "learning_rate": 9.111722314294358e-08, |
| "loss": 0.446, |
| "step": 1122 |
| }, |
| { |
| "epoch": 0.9458169567658619, |
| "grad_norm": 0.33864447474479675, |
| "learning_rate": 8.834340453810375e-08, |
| "loss": 0.481, |
| "step": 1123 |
| }, |
| { |
| "epoch": 0.9466591802358225, |
| "grad_norm": 0.31847962737083435, |
| "learning_rate": 8.561208554101863e-08, |
| "loss": 0.452, |
| "step": 1124 |
| }, |
| { |
| "epoch": 0.9475014037057833, |
| "grad_norm": 0.3508765995502472, |
| "learning_rate": 8.29232897852611e-08, |
| "loss": 0.4696, |
| "step": 1125 |
| }, |
| { |
| "epoch": 0.9483436271757439, |
| "grad_norm": 0.3419753909111023, |
| "learning_rate": 8.027704053645613e-08, |
| "loss": 0.4534, |
| "step": 1126 |
| }, |
| { |
| "epoch": 0.9491858506457047, |
| "grad_norm": 0.3147391080856323, |
| "learning_rate": 7.76733606920832e-08, |
| "loss": 0.4466, |
| "step": 1127 |
| }, |
| { |
| "epoch": 0.9500280741156654, |
| "grad_norm": 0.3117488920688629, |
| "learning_rate": 7.511227278127697e-08, |
| "loss": 0.4598, |
| "step": 1128 |
| }, |
| { |
| "epoch": 0.950870297585626, |
| "grad_norm": 0.30068475008010864, |
| "learning_rate": 7.259379896463248e-08, |
| "loss": 0.462, |
| "step": 1129 |
| }, |
| { |
| "epoch": 0.9517125210555868, |
| "grad_norm": 0.32078614830970764, |
| "learning_rate": 7.011796103401192e-08, |
| "loss": 0.4382, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.9525547445255474, |
| "grad_norm": 0.31206777691841125, |
| "learning_rate": 6.768478041236037e-08, |
| "loss": 0.4833, |
| "step": 1131 |
| }, |
| { |
| "epoch": 0.9533969679955081, |
| "grad_norm": 0.31248289346694946, |
| "learning_rate": 6.529427815351374e-08, |
| "loss": 0.4607, |
| "step": 1132 |
| }, |
| { |
| "epoch": 0.9542391914654689, |
| "grad_norm": 0.3496483862400055, |
| "learning_rate": 6.294647494202444e-08, |
| "loss": 0.4231, |
| "step": 1133 |
| }, |
| { |
| "epoch": 0.9550814149354295, |
| "grad_norm": 0.32398563623428345, |
| "learning_rate": 6.064139109297485e-08, |
| "loss": 0.4466, |
| "step": 1134 |
| }, |
| { |
| "epoch": 0.9559236384053902, |
| "grad_norm": 0.3160340189933777, |
| "learning_rate": 5.8379046551807486e-08, |
| "loss": 0.443, |
| "step": 1135 |
| }, |
| { |
| "epoch": 0.956765861875351, |
| "grad_norm": 0.3245152235031128, |
| "learning_rate": 5.615946089414737e-08, |
| "loss": 0.4684, |
| "step": 1136 |
| }, |
| { |
| "epoch": 0.9576080853453116, |
| "grad_norm": 0.3228430151939392, |
| "learning_rate": 5.398265332563935e-08, |
| "loss": 0.4714, |
| "step": 1137 |
| }, |
| { |
| "epoch": 0.9584503088152723, |
| "grad_norm": 0.3297078609466553, |
| "learning_rate": 5.1848642681773254e-08, |
| "loss": 0.4613, |
| "step": 1138 |
| }, |
| { |
| "epoch": 0.959292532285233, |
| "grad_norm": 0.32669320702552795, |
| "learning_rate": 4.975744742772848e-08, |
| "loss": 0.4832, |
| "step": 1139 |
| }, |
| { |
| "epoch": 0.9601347557551937, |
| "grad_norm": 0.3655106723308563, |
| "learning_rate": 4.770908565820964e-08, |
| "loss": 0.4831, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.9609769792251545, |
| "grad_norm": 0.3126697242259979, |
| "learning_rate": 4.5703575097292286e-08, |
| "loss": 0.4384, |
| "step": 1141 |
| }, |
| { |
| "epoch": 0.9618192026951151, |
| "grad_norm": 0.33409810066223145, |
| "learning_rate": 4.37409330982691e-08, |
| "loss": 0.4351, |
| "step": 1142 |
| }, |
| { |
| "epoch": 0.9626614261650758, |
| "grad_norm": 0.3374941945075989, |
| "learning_rate": 4.182117664349783e-08, |
| "loss": 0.424, |
| "step": 1143 |
| }, |
| { |
| "epoch": 0.9635036496350365, |
| "grad_norm": 0.36208537220954895, |
| "learning_rate": 3.99443223442586e-08, |
| "loss": 0.4723, |
| "step": 1144 |
| }, |
| { |
| "epoch": 0.9643458731049972, |
| "grad_norm": 0.2885582447052002, |
| "learning_rate": 3.8110386440605164e-08, |
| "loss": 0.4464, |
| "step": 1145 |
| }, |
| { |
| "epoch": 0.9651880965749579, |
| "grad_norm": 0.30136173963546753, |
| "learning_rate": 3.631938480122777e-08, |
| "loss": 0.4362, |
| "step": 1146 |
| }, |
| { |
| "epoch": 0.9660303200449186, |
| "grad_norm": 0.3135235905647278, |
| "learning_rate": 3.457133292331494e-08, |
| "loss": 0.4462, |
| "step": 1147 |
| }, |
| { |
| "epoch": 0.9668725435148793, |
| "grad_norm": 0.3099575936794281, |
| "learning_rate": 3.2866245932418606e-08, |
| "loss": 0.4595, |
| "step": 1148 |
| }, |
| { |
| "epoch": 0.9677147669848399, |
| "grad_norm": 0.30907002091407776, |
| "learning_rate": 3.120413858232474e-08, |
| "loss": 0.4586, |
| "step": 1149 |
| }, |
| { |
| "epoch": 0.9685569904548007, |
| "grad_norm": 0.3242543935775757, |
| "learning_rate": 2.9585025254924572e-08, |
| "loss": 0.4581, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.9693992139247614, |
| "grad_norm": 0.33935239911079407, |
| "learning_rate": 2.8008919960090253e-08, |
| "loss": 0.4586, |
| "step": 1151 |
| }, |
| { |
| "epoch": 0.970241437394722, |
| "grad_norm": 0.30846089124679565, |
| "learning_rate": 2.6475836335553838e-08, |
| "loss": 0.4396, |
| "step": 1152 |
| }, |
| { |
| "epoch": 0.9710836608646828, |
| "grad_norm": 0.30442187190055847, |
| "learning_rate": 2.4985787646788497e-08, |
| "loss": 0.4384, |
| "step": 1153 |
| }, |
| { |
| "epoch": 0.9719258843346434, |
| "grad_norm": 0.31546372175216675, |
| "learning_rate": 2.3538786786896918e-08, |
| "loss": 0.4443, |
| "step": 1154 |
| }, |
| { |
| "epoch": 0.9727681078046042, |
| "grad_norm": 0.32305002212524414, |
| "learning_rate": 2.2134846276494205e-08, |
| "loss": 0.4267, |
| "step": 1155 |
| }, |
| { |
| "epoch": 0.9736103312745649, |
| "grad_norm": 0.3233368694782257, |
| "learning_rate": 2.0773978263605164e-08, |
| "loss": 0.4707, |
| "step": 1156 |
| }, |
| { |
| "epoch": 0.9744525547445255, |
| "grad_norm": 0.31967225670814514, |
| "learning_rate": 1.9456194523554404e-08, |
| "loss": 0.4829, |
| "step": 1157 |
| }, |
| { |
| "epoch": 0.9752947782144863, |
| "grad_norm": 0.31925785541534424, |
| "learning_rate": 1.8181506458869735e-08, |
| "loss": 0.4679, |
| "step": 1158 |
| }, |
| { |
| "epoch": 0.976137001684447, |
| "grad_norm": 0.3099507689476013, |
| "learning_rate": 1.69499250991767e-08, |
| "loss": 0.4498, |
| "step": 1159 |
| }, |
| { |
| "epoch": 0.9769792251544076, |
| "grad_norm": 0.32314586639404297, |
| "learning_rate": 1.576146110111032e-08, |
| "loss": 0.4782, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.9778214486243684, |
| "grad_norm": 0.3091767728328705, |
| "learning_rate": 1.4616124748217387e-08, |
| "loss": 0.4562, |
| "step": 1161 |
| }, |
| { |
| "epoch": 0.978663672094329, |
| "grad_norm": 0.3342806398868561, |
| "learning_rate": 1.351392595087042e-08, |
| "loss": 0.4515, |
| "step": 1162 |
| }, |
| { |
| "epoch": 0.9795058955642897, |
| "grad_norm": 0.3417477607727051, |
| "learning_rate": 1.2454874246181081e-08, |
| "loss": 0.4769, |
| "step": 1163 |
| }, |
| { |
| "epoch": 0.9803481190342505, |
| "grad_norm": 0.32453441619873047, |
| "learning_rate": 1.1438978797916888e-08, |
| "loss": 0.4575, |
| "step": 1164 |
| }, |
| { |
| "epoch": 0.9811903425042111, |
| "grad_norm": 0.3606019914150238, |
| "learning_rate": 1.0466248396424072e-08, |
| "loss": 0.4627, |
| "step": 1165 |
| }, |
| { |
| "epoch": 0.9820325659741718, |
| "grad_norm": 0.3059011697769165, |
| "learning_rate": 9.536691458548741e-09, |
| "loss": 0.4475, |
| "step": 1166 |
| }, |
| { |
| "epoch": 0.9828747894441325, |
| "grad_norm": 0.3114994466304779, |
| "learning_rate": 8.650316027566386e-09, |
| "loss": 0.454, |
| "step": 1167 |
| }, |
| { |
| "epoch": 0.9837170129140932, |
| "grad_norm": 0.3212180733680725, |
| "learning_rate": 7.807129773110822e-09, |
| "loss": 0.4512, |
| "step": 1168 |
| }, |
| { |
| "epoch": 0.9845592363840538, |
| "grad_norm": 0.3352208733558655, |
| "learning_rate": 7.007139991108136e-09, |
| "loss": 0.4453, |
| "step": 1169 |
| }, |
| { |
| "epoch": 0.9854014598540146, |
| "grad_norm": 0.33941376209259033, |
| "learning_rate": 6.25035360371451e-09, |
| "loss": 0.4793, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.9862436833239753, |
| "grad_norm": 0.3476077914237976, |
| "learning_rate": 5.536777159254603e-09, |
| "loss": 0.4851, |
| "step": 1171 |
| }, |
| { |
| "epoch": 0.987085906793936, |
| "grad_norm": 0.33196163177490234, |
| "learning_rate": 4.866416832167153e-09, |
| "loss": 0.4625, |
| "step": 1172 |
| }, |
| { |
| "epoch": 0.9879281302638967, |
| "grad_norm": 0.32519593834877014, |
| "learning_rate": 4.239278422948911e-09, |
| "loss": 0.4776, |
| "step": 1173 |
| }, |
| { |
| "epoch": 0.9887703537338574, |
| "grad_norm": 0.3022322356700897, |
| "learning_rate": 3.655367358106343e-09, |
| "loss": 0.4776, |
| "step": 1174 |
| }, |
| { |
| "epoch": 0.9896125772038181, |
| "grad_norm": 0.3089068830013275, |
| "learning_rate": 3.1146886901090024e-09, |
| "loss": 0.448, |
| "step": 1175 |
| }, |
| { |
| "epoch": 0.9904548006737788, |
| "grad_norm": 0.31966155767440796, |
| "learning_rate": 2.617247097342901e-09, |
| "loss": 0.4386, |
| "step": 1176 |
| }, |
| { |
| "epoch": 0.9912970241437394, |
| "grad_norm": 0.3162902593612671, |
| "learning_rate": 2.1630468840738716e-09, |
| "loss": 0.4569, |
| "step": 1177 |
| }, |
| { |
| "epoch": 0.9921392476137002, |
| "grad_norm": 0.3310737907886505, |
| "learning_rate": 1.7520919804075997e-09, |
| "loss": 0.4602, |
| "step": 1178 |
| }, |
| { |
| "epoch": 0.9929814710836609, |
| "grad_norm": 0.33026689291000366, |
| "learning_rate": 1.3843859422574269e-09, |
| "loss": 0.4783, |
| "step": 1179 |
| }, |
| { |
| "epoch": 0.9938236945536215, |
| "grad_norm": 0.3080196976661682, |
| "learning_rate": 1.0599319513115992e-09, |
| "loss": 0.4715, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.9946659180235823, |
| "grad_norm": 0.31929126381874084, |
| "learning_rate": 7.787328150071771e-10, |
| "loss": 0.4488, |
| "step": 1181 |
| }, |
| { |
| "epoch": 0.9955081414935429, |
| "grad_norm": 0.3194161057472229, |
| "learning_rate": 5.40790966505611e-10, |
| "loss": 0.43, |
| "step": 1182 |
| }, |
| { |
| "epoch": 0.9963503649635036, |
| "grad_norm": 0.3364320695400238, |
| "learning_rate": 3.4610846467109106e-10, |
| "loss": 0.4737, |
| "step": 1183 |
| }, |
| { |
| "epoch": 0.9971925884334644, |
| "grad_norm": 0.317007452249527, |
| "learning_rate": 1.9468699405444936e-10, |
| "loss": 0.4806, |
| "step": 1184 |
| }, |
| { |
| "epoch": 0.998034811903425, |
| "grad_norm": 0.3290872871875763, |
| "learning_rate": 8.652786487484133e-11, |
| "loss": 0.4443, |
| "step": 1185 |
| }, |
| { |
| "epoch": 0.9988770353733858, |
| "grad_norm": 0.3284390866756439, |
| "learning_rate": 2.1632013013084265e-11, |
| "loss": 0.4694, |
| "step": 1186 |
| }, |
| { |
| "epoch": 0.9997192588433464, |
| "grad_norm": 0.3138267397880554, |
| "learning_rate": 0.0, |
| "loss": 0.4357, |
| "step": 1187 |
| }, |
| { |
| "epoch": 0.9997192588433464, |
| "step": 1187, |
| "total_flos": 1539275766759424.0, |
| "train_loss": 0.48665387864462345, |
| "train_runtime": 40217.8604, |
| "train_samples_per_second": 2.833, |
| "train_steps_per_second": 0.03 |
| } |
| ], |
| "logging_steps": 1.0, |
| "max_steps": 1187, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1539275766759424.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|