| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.0, |
| "eval_steps": 500, |
| "global_step": 1190, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0016806722689075631, |
| "grad_norm": 8.21841464435543, |
| "learning_rate": 9.99998257609161e-06, |
| "loss": 0.3444, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0033613445378151263, |
| "grad_norm": 6.5307561999009405, |
| "learning_rate": 9.999930304487874e-06, |
| "loss": 0.2694, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.005042016806722689, |
| "grad_norm": 3.8967180599837037, |
| "learning_rate": 9.999843185553106e-06, |
| "loss": 0.1601, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.0067226890756302525, |
| "grad_norm": 3.516207675689668, |
| "learning_rate": 9.999721219894482e-06, |
| "loss": 0.1494, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.008403361344537815, |
| "grad_norm": 6.088858147617413, |
| "learning_rate": 9.999564408362054e-06, |
| "loss": 0.2068, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.010084033613445379, |
| "grad_norm": 4.5663525378609835, |
| "learning_rate": 9.999372752048729e-06, |
| "loss": 0.1637, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.011764705882352941, |
| "grad_norm": 4.286095780850501, |
| "learning_rate": 9.999146252290264e-06, |
| "loss": 0.1554, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.013445378151260505, |
| "grad_norm": 4.03551304941898, |
| "learning_rate": 9.998884910665267e-06, |
| "loss": 0.157, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.015126050420168067, |
| "grad_norm": 5.805511665267424, |
| "learning_rate": 9.998588728995176e-06, |
| "loss": 0.2141, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.01680672268907563, |
| "grad_norm": 5.634463236999942, |
| "learning_rate": 9.998257709344246e-06, |
| "loss": 0.2417, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.018487394957983194, |
| "grad_norm": 4.639281556442096, |
| "learning_rate": 9.997891854019538e-06, |
| "loss": 0.2314, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.020168067226890758, |
| "grad_norm": 4.275446688406603, |
| "learning_rate": 9.997491165570907e-06, |
| "loss": 0.183, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.021848739495798318, |
| "grad_norm": 5.1126652407586555, |
| "learning_rate": 9.997055646790974e-06, |
| "loss": 0.251, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.023529411764705882, |
| "grad_norm": 5.537773614821847, |
| "learning_rate": 9.996585300715117e-06, |
| "loss": 0.2673, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.025210084033613446, |
| "grad_norm": 4.7256726060040455, |
| "learning_rate": 9.99608013062144e-06, |
| "loss": 0.1974, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.02689075630252101, |
| "grad_norm": 4.187375261904063, |
| "learning_rate": 9.995540140030759e-06, |
| "loss": 0.2111, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.02857142857142857, |
| "grad_norm": 4.0040557938220385, |
| "learning_rate": 9.994965332706574e-06, |
| "loss": 0.1859, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.030252100840336135, |
| "grad_norm": 3.3281846391864676, |
| "learning_rate": 9.99435571265504e-06, |
| "loss": 0.171, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.031932773109243695, |
| "grad_norm": 4.478925095913666, |
| "learning_rate": 9.993711284124943e-06, |
| "loss": 0.1966, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.03361344537815126, |
| "grad_norm": 3.6018656957376756, |
| "learning_rate": 9.99303205160767e-06, |
| "loss": 0.2085, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.03529411764705882, |
| "grad_norm": 4.319806671578674, |
| "learning_rate": 9.992318019837171e-06, |
| "loss": 0.2047, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.03697478991596639, |
| "grad_norm": 4.620436316163216, |
| "learning_rate": 9.991569193789938e-06, |
| "loss": 0.2713, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.03865546218487395, |
| "grad_norm": 4.4035777895289065, |
| "learning_rate": 9.990785578684963e-06, |
| "loss": 0.1968, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.040336134453781515, |
| "grad_norm": 4.529546514351305, |
| "learning_rate": 9.989967179983699e-06, |
| "loss": 0.2439, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.04201680672268908, |
| "grad_norm": 4.146310383033315, |
| "learning_rate": 9.989114003390028e-06, |
| "loss": 0.1803, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.043697478991596636, |
| "grad_norm": 4.256564415881964, |
| "learning_rate": 9.988226054850218e-06, |
| "loss": 0.1626, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.0453781512605042, |
| "grad_norm": 4.278060540315039, |
| "learning_rate": 9.987303340552885e-06, |
| "loss": 0.2531, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.047058823529411764, |
| "grad_norm": 5.409118538573271, |
| "learning_rate": 9.98634586692894e-06, |
| "loss": 0.2607, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.04873949579831933, |
| "grad_norm": 4.529140394915868, |
| "learning_rate": 9.985353640651563e-06, |
| "loss": 0.1756, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.05042016806722689, |
| "grad_norm": 5.0629410665648775, |
| "learning_rate": 9.984326668636131e-06, |
| "loss": 0.2619, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.052100840336134456, |
| "grad_norm": 5.030852124775789, |
| "learning_rate": 9.983264958040194e-06, |
| "loss": 0.2179, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.05378151260504202, |
| "grad_norm": 4.13446898262633, |
| "learning_rate": 9.98216851626341e-06, |
| "loss": 0.1483, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.05546218487394958, |
| "grad_norm": 4.718860562037743, |
| "learning_rate": 9.981037350947503e-06, |
| "loss": 0.1942, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.05714285714285714, |
| "grad_norm": 5.0987197312854216, |
| "learning_rate": 9.979871469976197e-06, |
| "loss": 0.2192, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.058823529411764705, |
| "grad_norm": 4.853234294545507, |
| "learning_rate": 9.978670881475173e-06, |
| "loss": 0.2256, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.06050420168067227, |
| "grad_norm": 4.566039439694336, |
| "learning_rate": 9.977435593812013e-06, |
| "loss": 0.212, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.06218487394957983, |
| "grad_norm": 4.303938728983624, |
| "learning_rate": 9.976165615596128e-06, |
| "loss": 0.1981, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.06386554621848739, |
| "grad_norm": 4.738804474245815, |
| "learning_rate": 9.974860955678715e-06, |
| "loss": 0.1928, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.06554621848739496, |
| "grad_norm": 4.74356391080987, |
| "learning_rate": 9.973521623152682e-06, |
| "loss": 0.195, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.06722689075630252, |
| "grad_norm": 6.034671221697289, |
| "learning_rate": 9.972147627352593e-06, |
| "loss": 0.2954, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.06890756302521009, |
| "grad_norm": 4.6685509141294155, |
| "learning_rate": 9.970738977854597e-06, |
| "loss": 0.2195, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.07058823529411765, |
| "grad_norm": 4.564959380430705, |
| "learning_rate": 9.96929568447637e-06, |
| "loss": 0.2394, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.07226890756302522, |
| "grad_norm": 3.707111096084297, |
| "learning_rate": 9.967817757277031e-06, |
| "loss": 0.1534, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.07394957983193277, |
| "grad_norm": 5.558481032268956, |
| "learning_rate": 9.966305206557092e-06, |
| "loss": 0.259, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.07563025210084033, |
| "grad_norm": 4.816786645150557, |
| "learning_rate": 9.964758042858368e-06, |
| "loss": 0.3009, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.0773109243697479, |
| "grad_norm": 3.46056254061901, |
| "learning_rate": 9.963176276963916e-06, |
| "loss": 0.1647, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.07899159663865546, |
| "grad_norm": 4.324140282243881, |
| "learning_rate": 9.961559919897954e-06, |
| "loss": 0.1879, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.08067226890756303, |
| "grad_norm": 4.097033705840372, |
| "learning_rate": 9.959908982925783e-06, |
| "loss": 0.2056, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.08235294117647059, |
| "grad_norm": 3.8703298631531573, |
| "learning_rate": 9.958223477553715e-06, |
| "loss": 0.2251, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.08403361344537816, |
| "grad_norm": 4.530326484306399, |
| "learning_rate": 9.956503415528984e-06, |
| "loss": 0.214, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.08571428571428572, |
| "grad_norm": 3.838645445285574, |
| "learning_rate": 9.954748808839675e-06, |
| "loss": 0.1631, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.08739495798319327, |
| "grad_norm": 5.189457107947033, |
| "learning_rate": 9.952959669714627e-06, |
| "loss": 0.2349, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.08907563025210084, |
| "grad_norm": 4.478468457730722, |
| "learning_rate": 9.951136010623359e-06, |
| "loss": 0.2271, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.0907563025210084, |
| "grad_norm": 3.7459087505453543, |
| "learning_rate": 9.94927784427598e-06, |
| "loss": 0.1991, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.09243697478991597, |
| "grad_norm": 4.334863357859166, |
| "learning_rate": 9.947385183623099e-06, |
| "loss": 0.1952, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.09411764705882353, |
| "grad_norm": 4.120411980539683, |
| "learning_rate": 9.945458041855732e-06, |
| "loss": 0.217, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.0957983193277311, |
| "grad_norm": 3.6830264614513237, |
| "learning_rate": 9.943496432405213e-06, |
| "loss": 0.1846, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.09747899159663866, |
| "grad_norm": 3.8118209298483823, |
| "learning_rate": 9.941500368943111e-06, |
| "loss": 0.174, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.09915966386554621, |
| "grad_norm": 3.6490768370277, |
| "learning_rate": 9.939469865381111e-06, |
| "loss": 0.1764, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.10084033613445378, |
| "grad_norm": 4.594768474393017, |
| "learning_rate": 9.937404935870938e-06, |
| "loss": 0.2139, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.10252100840336134, |
| "grad_norm": 3.977673868555674, |
| "learning_rate": 9.935305594804247e-06, |
| "loss": 0.1925, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.10420168067226891, |
| "grad_norm": 4.136078170621533, |
| "learning_rate": 9.933171856812533e-06, |
| "loss": 0.225, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.10588235294117647, |
| "grad_norm": 4.5341488755616695, |
| "learning_rate": 9.931003736767013e-06, |
| "loss": 0.1781, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.10756302521008404, |
| "grad_norm": 4.276594908796245, |
| "learning_rate": 9.92880124977854e-06, |
| "loss": 0.2098, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.1092436974789916, |
| "grad_norm": 4.444880797388785, |
| "learning_rate": 9.926564411197488e-06, |
| "loss": 0.2456, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.11092436974789915, |
| "grad_norm": 4.0506236653094305, |
| "learning_rate": 9.924293236613643e-06, |
| "loss": 0.2102, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.11260504201680673, |
| "grad_norm": 4.545097789043228, |
| "learning_rate": 9.921987741856099e-06, |
| "loss": 0.2588, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.11428571428571428, |
| "grad_norm": 3.6500673666849215, |
| "learning_rate": 9.91964794299315e-06, |
| "loss": 0.1782, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.11596638655462185, |
| "grad_norm": 4.238844893437317, |
| "learning_rate": 9.91727385633217e-06, |
| "loss": 0.1981, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.11764705882352941, |
| "grad_norm": 4.007425712958092, |
| "learning_rate": 9.91486549841951e-06, |
| "loss": 0.1857, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.11932773109243698, |
| "grad_norm": 5.073962058395761, |
| "learning_rate": 9.91242288604037e-06, |
| "loss": 0.2808, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.12100840336134454, |
| "grad_norm": 3.742305034008983, |
| "learning_rate": 9.909946036218694e-06, |
| "loss": 0.2158, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.1226890756302521, |
| "grad_norm": 4.840903526928681, |
| "learning_rate": 9.907434966217041e-06, |
| "loss": 0.2658, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.12436974789915967, |
| "grad_norm": 4.515748230482176, |
| "learning_rate": 9.904889693536475e-06, |
| "loss": 0.1715, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.12605042016806722, |
| "grad_norm": 4.576112978323743, |
| "learning_rate": 9.902310235916435e-06, |
| "loss": 0.2407, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.12773109243697478, |
| "grad_norm": 4.073187031843224, |
| "learning_rate": 9.899696611334612e-06, |
| "loss": 0.1885, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.12941176470588237, |
| "grad_norm": 4.837076708284357, |
| "learning_rate": 9.89704883800683e-06, |
| "loss": 0.2429, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.13109243697478992, |
| "grad_norm": 4.608602122041852, |
| "learning_rate": 9.894366934386913e-06, |
| "loss": 0.1704, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.13277310924369748, |
| "grad_norm": 5.570923387271547, |
| "learning_rate": 9.891650919166558e-06, |
| "loss": 0.2605, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.13445378151260504, |
| "grad_norm": 4.528798317968409, |
| "learning_rate": 9.888900811275205e-06, |
| "loss": 0.2563, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.1361344537815126, |
| "grad_norm": 4.842711689139884, |
| "learning_rate": 9.886116629879906e-06, |
| "loss": 0.2596, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.13781512605042018, |
| "grad_norm": 4.7561000330794645, |
| "learning_rate": 9.883298394385186e-06, |
| "loss": 0.1933, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.13949579831932774, |
| "grad_norm": 5.009176440097795, |
| "learning_rate": 9.880446124432921e-06, |
| "loss": 0.2083, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.1411764705882353, |
| "grad_norm": 5.331779721460354, |
| "learning_rate": 9.877559839902185e-06, |
| "loss": 0.2831, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.14285714285714285, |
| "grad_norm": 4.460326910194911, |
| "learning_rate": 9.874639560909118e-06, |
| "loss": 0.2279, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.14453781512605043, |
| "grad_norm": 4.971242931396187, |
| "learning_rate": 9.871685307806796e-06, |
| "loss": 0.2144, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.146218487394958, |
| "grad_norm": 4.941549501376583, |
| "learning_rate": 9.868697101185066e-06, |
| "loss": 0.238, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.14789915966386555, |
| "grad_norm": 4.837604687905309, |
| "learning_rate": 9.865674961870428e-06, |
| "loss": 0.1736, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.1495798319327731, |
| "grad_norm": 4.301169486478077, |
| "learning_rate": 9.862618910925873e-06, |
| "loss": 0.1791, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.15126050420168066, |
| "grad_norm": 4.419011396958963, |
| "learning_rate": 9.859528969650739e-06, |
| "loss": 0.2424, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.15294117647058825, |
| "grad_norm": 4.439104272537347, |
| "learning_rate": 9.85640515958057e-06, |
| "loss": 0.1947, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.1546218487394958, |
| "grad_norm": 4.1606384288910006, |
| "learning_rate": 9.853247502486957e-06, |
| "loss": 0.2373, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.15630252100840336, |
| "grad_norm": 4.426995034165966, |
| "learning_rate": 9.850056020377392e-06, |
| "loss": 0.1984, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.15798319327731092, |
| "grad_norm": 4.747800742019179, |
| "learning_rate": 9.846830735495112e-06, |
| "loss": 0.2304, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.15966386554621848, |
| "grad_norm": 4.325848335124482, |
| "learning_rate": 9.843571670318943e-06, |
| "loss": 0.2418, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.16134453781512606, |
| "grad_norm": 5.066068637598227, |
| "learning_rate": 9.840278847563147e-06, |
| "loss": 0.3608, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.16302521008403362, |
| "grad_norm": 3.861333513668538, |
| "learning_rate": 9.836952290177261e-06, |
| "loss": 0.1868, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.16470588235294117, |
| "grad_norm": 3.9561522011266343, |
| "learning_rate": 9.833592021345938e-06, |
| "loss": 0.2093, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.16638655462184873, |
| "grad_norm": 4.132131484116752, |
| "learning_rate": 9.830198064488783e-06, |
| "loss": 0.2068, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.16806722689075632, |
| "grad_norm": 3.9580220367981513, |
| "learning_rate": 9.826770443260193e-06, |
| "loss": 0.1995, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.16974789915966387, |
| "grad_norm": 4.264099752452373, |
| "learning_rate": 9.823309181549194e-06, |
| "loss": 0.2047, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.17142857142857143, |
| "grad_norm": 4.954717584301421, |
| "learning_rate": 9.819814303479268e-06, |
| "loss": 0.2233, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.173109243697479, |
| "grad_norm": 4.616243415887144, |
| "learning_rate": 9.816285833408185e-06, |
| "loss": 0.2475, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.17478991596638654, |
| "grad_norm": 4.489313598265477, |
| "learning_rate": 9.812723795927848e-06, |
| "loss": 0.2042, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.17647058823529413, |
| "grad_norm": 4.891274285476457, |
| "learning_rate": 9.809128215864096e-06, |
| "loss": 0.2188, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.1781512605042017, |
| "grad_norm": 4.479381566640569, |
| "learning_rate": 9.805499118276555e-06, |
| "loss": 0.2009, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.17983193277310924, |
| "grad_norm": 4.492071651356979, |
| "learning_rate": 9.801836528458453e-06, |
| "loss": 0.2032, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.1815126050420168, |
| "grad_norm": 3.9572668817717425, |
| "learning_rate": 9.798140471936437e-06, |
| "loss": 0.1341, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.18319327731092436, |
| "grad_norm": 5.227775544340066, |
| "learning_rate": 9.79441097447041e-06, |
| "loss": 0.2361, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.18487394957983194, |
| "grad_norm": 4.480776893524611, |
| "learning_rate": 9.790648062053341e-06, |
| "loss": 0.223, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.1865546218487395, |
| "grad_norm": 3.9849522745760977, |
| "learning_rate": 9.786851760911084e-06, |
| "loss": 0.1797, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.18823529411764706, |
| "grad_norm": 3.765063799769012, |
| "learning_rate": 9.783022097502204e-06, |
| "loss": 0.2076, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.1899159663865546, |
| "grad_norm": 5.652920919469918, |
| "learning_rate": 9.779159098517781e-06, |
| "loss": 0.298, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.1915966386554622, |
| "grad_norm": 4.4105882329875135, |
| "learning_rate": 9.77526279088123e-06, |
| "loss": 0.2307, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.19327731092436976, |
| "grad_norm": 5.721726388390177, |
| "learning_rate": 9.771333201748116e-06, |
| "loss": 0.2443, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.1949579831932773, |
| "grad_norm": 3.9054517446447123, |
| "learning_rate": 9.767370358505958e-06, |
| "loss": 0.1683, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.19663865546218487, |
| "grad_norm": 5.2393244986377, |
| "learning_rate": 9.763374288774043e-06, |
| "loss": 0.2234, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.19831932773109243, |
| "grad_norm": 4.116682183617701, |
| "learning_rate": 9.759345020403233e-06, |
| "loss": 0.2216, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 4.2674607287039406, |
| "learning_rate": 9.755282581475769e-06, |
| "loss": 0.1931, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.20168067226890757, |
| "grad_norm": 3.6201843953883284, |
| "learning_rate": 9.751187000305076e-06, |
| "loss": 0.186, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.20336134453781513, |
| "grad_norm": 4.970181582570933, |
| "learning_rate": 9.747058305435566e-06, |
| "loss": 0.2231, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.20504201680672268, |
| "grad_norm": 4.6456388163364295, |
| "learning_rate": 9.742896525642442e-06, |
| "loss": 0.2568, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.20672268907563024, |
| "grad_norm": 4.972251282923708, |
| "learning_rate": 9.738701689931488e-06, |
| "loss": 0.2637, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.20840336134453782, |
| "grad_norm": 4.423134442557392, |
| "learning_rate": 9.734473827538881e-06, |
| "loss": 0.2398, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.21008403361344538, |
| "grad_norm": 4.560715987612911, |
| "learning_rate": 9.730212967930974e-06, |
| "loss": 0.2274, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.21176470588235294, |
| "grad_norm": 3.5433880225975254, |
| "learning_rate": 9.7259191408041e-06, |
| "loss": 0.173, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.2134453781512605, |
| "grad_norm": 5.148310128666882, |
| "learning_rate": 9.721592376084355e-06, |
| "loss": 0.2769, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.21512605042016808, |
| "grad_norm": 4.795984062191726, |
| "learning_rate": 9.717232703927402e-06, |
| "loss": 0.2244, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.21680672268907564, |
| "grad_norm": 5.700619371572016, |
| "learning_rate": 9.712840154718253e-06, |
| "loss": 0.2338, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.2184873949579832, |
| "grad_norm": 4.669225751974187, |
| "learning_rate": 9.70841475907106e-06, |
| "loss": 0.1966, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.22016806722689075, |
| "grad_norm": 3.6626918920905878, |
| "learning_rate": 9.703956547828893e-06, |
| "loss": 0.1954, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.2218487394957983, |
| "grad_norm": 5.236075056300641, |
| "learning_rate": 9.69946555206354e-06, |
| "loss": 0.1988, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.2235294117647059, |
| "grad_norm": 4.69030459950783, |
| "learning_rate": 9.694941803075285e-06, |
| "loss": 0.2693, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.22521008403361345, |
| "grad_norm": 3.955969407559595, |
| "learning_rate": 9.690385332392676e-06, |
| "loss": 0.2082, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.226890756302521, |
| "grad_norm": 4.177067871929489, |
| "learning_rate": 9.685796171772327e-06, |
| "loss": 0.2962, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.22857142857142856, |
| "grad_norm": 4.167190069041118, |
| "learning_rate": 9.681174353198687e-06, |
| "loss": 0.2331, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.23025210084033612, |
| "grad_norm": 4.225274226603427, |
| "learning_rate": 9.67651990888381e-06, |
| "loss": 0.1906, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.2319327731092437, |
| "grad_norm": 4.197817430164727, |
| "learning_rate": 9.67183287126714e-06, |
| "loss": 0.2023, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.23361344537815126, |
| "grad_norm": 3.935578613508391, |
| "learning_rate": 9.667113273015283e-06, |
| "loss": 0.2062, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.23529411764705882, |
| "grad_norm": 4.605647036736872, |
| "learning_rate": 9.66236114702178e-06, |
| "loss": 0.2104, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.23697478991596638, |
| "grad_norm": 4.341408968768613, |
| "learning_rate": 9.657576526406872e-06, |
| "loss": 0.1792, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.23865546218487396, |
| "grad_norm": 4.754632418091511, |
| "learning_rate": 9.652759444517276e-06, |
| "loss": 0.212, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.24033613445378152, |
| "grad_norm": 4.013047554379441, |
| "learning_rate": 9.647909934925952e-06, |
| "loss": 0.2134, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.24201680672268908, |
| "grad_norm": 4.3593189912939945, |
| "learning_rate": 9.64302803143186e-06, |
| "loss": 0.184, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.24369747899159663, |
| "grad_norm": 4.378110003430036, |
| "learning_rate": 9.63811376805974e-06, |
| "loss": 0.2012, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.2453781512605042, |
| "grad_norm": 4.828592016527972, |
| "learning_rate": 9.633167179059859e-06, |
| "loss": 0.2797, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.24705882352941178, |
| "grad_norm": 3.891371257775755, |
| "learning_rate": 9.628188298907782e-06, |
| "loss": 0.1788, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.24873949579831933, |
| "grad_norm": 5.6001909540395065, |
| "learning_rate": 9.623177162304132e-06, |
| "loss": 0.2722, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.2504201680672269, |
| "grad_norm": 4.193146001915105, |
| "learning_rate": 9.618133804174341e-06, |
| "loss": 0.2035, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.25210084033613445, |
| "grad_norm": 4.325524865328703, |
| "learning_rate": 9.613058259668416e-06, |
| "loss": 0.2327, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.253781512605042, |
| "grad_norm": 5.602899385924722, |
| "learning_rate": 9.607950564160682e-06, |
| "loss": 0.2548, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.25546218487394956, |
| "grad_norm": 4.129341895449091, |
| "learning_rate": 9.602810753249549e-06, |
| "loss": 0.1687, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.2571428571428571, |
| "grad_norm": 4.056125421715808, |
| "learning_rate": 9.597638862757255e-06, |
| "loss": 0.1439, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.25882352941176473, |
| "grad_norm": 4.625817847491738, |
| "learning_rate": 9.592434928729617e-06, |
| "loss": 0.2313, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.2605042016806723, |
| "grad_norm": 5.159432944923756, |
| "learning_rate": 9.587198987435782e-06, |
| "loss": 0.2314, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.26218487394957984, |
| "grad_norm": 4.821801696820484, |
| "learning_rate": 9.581931075367979e-06, |
| "loss": 0.2598, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.2638655462184874, |
| "grad_norm": 4.864810002494065, |
| "learning_rate": 9.576631229241248e-06, |
| "loss": 0.2023, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.26554621848739496, |
| "grad_norm": 4.706045710445991, |
| "learning_rate": 9.57129948599321e-06, |
| "loss": 0.2339, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.2672268907563025, |
| "grad_norm": 4.712230200754653, |
| "learning_rate": 9.565935882783784e-06, |
| "loss": 0.2565, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.2689075630252101, |
| "grad_norm": 5.822625035219728, |
| "learning_rate": 9.56054045699494e-06, |
| "loss": 0.2678, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.27058823529411763, |
| "grad_norm": 4.050112712888665, |
| "learning_rate": 9.555113246230443e-06, |
| "loss": 0.2299, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.2722689075630252, |
| "grad_norm": 4.070314071680027, |
| "learning_rate": 9.54965428831558e-06, |
| "loss": 0.2794, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.2739495798319328, |
| "grad_norm": 4.036666909908974, |
| "learning_rate": 9.544163621296906e-06, |
| "loss": 0.2553, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.27563025210084036, |
| "grad_norm": 4.617935809136051, |
| "learning_rate": 9.538641283441974e-06, |
| "loss": 0.2972, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.2773109243697479, |
| "grad_norm": 3.5010006509743867, |
| "learning_rate": 9.533087313239065e-06, |
| "loss": 0.181, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.27899159663865547, |
| "grad_norm": 4.62451295112266, |
| "learning_rate": 9.527501749396924e-06, |
| "loss": 0.2186, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.280672268907563, |
| "grad_norm": 4.058513129530688, |
| "learning_rate": 9.521884630844498e-06, |
| "loss": 0.24, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.2823529411764706, |
| "grad_norm": 4.1500543346518155, |
| "learning_rate": 9.516235996730645e-06, |
| "loss": 0.2403, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.28403361344537814, |
| "grad_norm": 3.7555979901592376, |
| "learning_rate": 9.510555886423883e-06, |
| "loss": 0.154, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.2857142857142857, |
| "grad_norm": 4.78167958275682, |
| "learning_rate": 9.504844339512096e-06, |
| "loss": 0.2789, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.28739495798319326, |
| "grad_norm": 5.370146109187836, |
| "learning_rate": 9.499101395802277e-06, |
| "loss": 0.2698, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.28907563025210087, |
| "grad_norm": 5.169364964187269, |
| "learning_rate": 9.493327095320231e-06, |
| "loss": 0.2108, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.2907563025210084, |
| "grad_norm": 4.469107375219695, |
| "learning_rate": 9.487521478310316e-06, |
| "loss": 0.2703, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.292436974789916, |
| "grad_norm": 4.21829425061834, |
| "learning_rate": 9.481684585235145e-06, |
| "loss": 0.1538, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.29411764705882354, |
| "grad_norm": 4.194377947803058, |
| "learning_rate": 9.475816456775313e-06, |
| "loss": 0.2168, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.2957983193277311, |
| "grad_norm": 4.513615580315599, |
| "learning_rate": 9.469917133829114e-06, |
| "loss": 0.2384, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.29747899159663865, |
| "grad_norm": 3.475010035423131, |
| "learning_rate": 9.463986657512254e-06, |
| "loss": 0.1856, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.2991596638655462, |
| "grad_norm": 3.9726843146403574, |
| "learning_rate": 9.458025069157563e-06, |
| "loss": 0.2072, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.30084033613445377, |
| "grad_norm": 3.13300922477254, |
| "learning_rate": 9.452032410314709e-06, |
| "loss": 0.1727, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.3025210084033613, |
| "grad_norm": 3.723460543351599, |
| "learning_rate": 9.446008722749906e-06, |
| "loss": 0.1676, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.3042016806722689, |
| "grad_norm": 4.528548649597003, |
| "learning_rate": 9.439954048445628e-06, |
| "loss": 0.2251, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.3058823529411765, |
| "grad_norm": 3.557373366966036, |
| "learning_rate": 9.43386842960031e-06, |
| "loss": 0.206, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.30756302521008405, |
| "grad_norm": 3.42214545293451, |
| "learning_rate": 9.427751908628059e-06, |
| "loss": 0.1895, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.3092436974789916, |
| "grad_norm": 4.515404571418755, |
| "learning_rate": 9.421604528158355e-06, |
| "loss": 0.2641, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.31092436974789917, |
| "grad_norm": 4.926637503494079, |
| "learning_rate": 9.415426331035754e-06, |
| "loss": 0.2524, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.3126050420168067, |
| "grad_norm": 4.383497390044887, |
| "learning_rate": 9.409217360319594e-06, |
| "loss": 0.238, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.3142857142857143, |
| "grad_norm": 4.234792938224042, |
| "learning_rate": 9.40297765928369e-06, |
| "loss": 0.2292, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.31596638655462184, |
| "grad_norm": 4.481631640061522, |
| "learning_rate": 9.396707271416035e-06, |
| "loss": 0.3017, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.3176470588235294, |
| "grad_norm": 5.256316840677135, |
| "learning_rate": 9.39040624041849e-06, |
| "loss": 0.2403, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.31932773109243695, |
| "grad_norm": 3.781091993143701, |
| "learning_rate": 9.384074610206495e-06, |
| "loss": 0.1869, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.32100840336134456, |
| "grad_norm": 4.6057017882339615, |
| "learning_rate": 9.377712424908743e-06, |
| "loss": 0.2507, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.3226890756302521, |
| "grad_norm": 6.461861467319499, |
| "learning_rate": 9.371319728866892e-06, |
| "loss": 0.281, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.3243697478991597, |
| "grad_norm": 4.800015707877198, |
| "learning_rate": 9.36489656663524e-06, |
| "loss": 0.2458, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.32605042016806723, |
| "grad_norm": 4.227032338998176, |
| "learning_rate": 9.35844298298042e-06, |
| "loss": 0.2197, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.3277310924369748, |
| "grad_norm": 4.816215636974397, |
| "learning_rate": 9.351959022881098e-06, |
| "loss": 0.2489, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.32941176470588235, |
| "grad_norm": 4.886220251164849, |
| "learning_rate": 9.345444731527642e-06, |
| "loss": 0.1837, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.3310924369747899, |
| "grad_norm": 5.191082310263523, |
| "learning_rate": 9.338900154321818e-06, |
| "loss": 0.206, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.33277310924369746, |
| "grad_norm": 3.617055755969013, |
| "learning_rate": 9.332325336876472e-06, |
| "loss": 0.1452, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.334453781512605, |
| "grad_norm": 4.226640548699603, |
| "learning_rate": 9.325720325015211e-06, |
| "loss": 0.224, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.33613445378151263, |
| "grad_norm": 4.513807147293619, |
| "learning_rate": 9.319085164772082e-06, |
| "loss": 0.2354, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.3378151260504202, |
| "grad_norm": 4.077629119418003, |
| "learning_rate": 9.312419902391256e-06, |
| "loss": 0.2192, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.33949579831932775, |
| "grad_norm": 4.470110460616241, |
| "learning_rate": 9.305724584326702e-06, |
| "loss": 0.182, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.3411764705882353, |
| "grad_norm": 4.680846295566748, |
| "learning_rate": 9.298999257241862e-06, |
| "loss": 0.2393, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.34285714285714286, |
| "grad_norm": 4.526839194923339, |
| "learning_rate": 9.292243968009332e-06, |
| "loss": 0.1923, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.3445378151260504, |
| "grad_norm": 4.538887920332639, |
| "learning_rate": 9.285458763710524e-06, |
| "loss": 0.2485, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.346218487394958, |
| "grad_norm": 5.593301905843468, |
| "learning_rate": 9.278643691635352e-06, |
| "loss": 0.2729, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.34789915966386553, |
| "grad_norm": 4.928678837563507, |
| "learning_rate": 9.271798799281893e-06, |
| "loss": 0.2701, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.3495798319327731, |
| "grad_norm": 4.211161086011563, |
| "learning_rate": 9.264924134356057e-06, |
| "loss": 0.2218, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.35126050420168065, |
| "grad_norm": 4.619486458421115, |
| "learning_rate": 9.258019744771256e-06, |
| "loss": 0.2004, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.35294117647058826, |
| "grad_norm": 3.602112878149427, |
| "learning_rate": 9.251085678648072e-06, |
| "loss": 0.2173, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.3546218487394958, |
| "grad_norm": 3.7653597093168556, |
| "learning_rate": 9.244121984313916e-06, |
| "loss": 0.1888, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.3563025210084034, |
| "grad_norm": 5.432992211487408, |
| "learning_rate": 9.2371287103027e-06, |
| "loss": 0.274, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.35798319327731093, |
| "grad_norm": 4.246141250922947, |
| "learning_rate": 9.23010590535449e-06, |
| "loss": 0.2673, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.3596638655462185, |
| "grad_norm": 5.33566750421804, |
| "learning_rate": 9.223053618415168e-06, |
| "loss": 0.2074, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.36134453781512604, |
| "grad_norm": 4.283572988780274, |
| "learning_rate": 9.215971898636094e-06, |
| "loss": 0.2254, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.3630252100840336, |
| "grad_norm": 4.561348305944251, |
| "learning_rate": 9.208860795373765e-06, |
| "loss": 0.2254, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.36470588235294116, |
| "grad_norm": 4.9036278330461665, |
| "learning_rate": 9.201720358189464e-06, |
| "loss": 0.2071, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.3663865546218487, |
| "grad_norm": 4.635203208682745, |
| "learning_rate": 9.194550636848923e-06, |
| "loss": 0.2038, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.3680672268907563, |
| "grad_norm": 4.19385346288178, |
| "learning_rate": 9.187351681321965e-06, |
| "loss": 0.2382, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.3697478991596639, |
| "grad_norm": 3.8373061734988623, |
| "learning_rate": 9.180123541782172e-06, |
| "loss": 0.1818, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.37142857142857144, |
| "grad_norm": 3.9983995742701466, |
| "learning_rate": 9.172866268606514e-06, |
| "loss": 0.2239, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.373109243697479, |
| "grad_norm": 4.096523467317054, |
| "learning_rate": 9.16557991237502e-06, |
| "loss": 0.1967, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.37478991596638656, |
| "grad_norm": 3.9949804148348953, |
| "learning_rate": 9.158264523870413e-06, |
| "loss": 0.2539, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.3764705882352941, |
| "grad_norm": 3.4121379024957426, |
| "learning_rate": 9.150920154077753e-06, |
| "loss": 0.1659, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.37815126050420167, |
| "grad_norm": 4.223999169456629, |
| "learning_rate": 9.143546854184095e-06, |
| "loss": 0.1866, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.3798319327731092, |
| "grad_norm": 4.4462570365988885, |
| "learning_rate": 9.136144675578114e-06, |
| "loss": 0.2234, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.3815126050420168, |
| "grad_norm": 5.937282515027506, |
| "learning_rate": 9.128713669849767e-06, |
| "loss": 0.2484, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.3831932773109244, |
| "grad_norm": 3.970889487846386, |
| "learning_rate": 9.121253888789916e-06, |
| "loss": 0.184, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.38487394957983195, |
| "grad_norm": 3.4532808484924638, |
| "learning_rate": 9.113765384389984e-06, |
| "loss": 0.1931, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.3865546218487395, |
| "grad_norm": 3.296349607830855, |
| "learning_rate": 9.106248208841568e-06, |
| "loss": 0.1928, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.38823529411764707, |
| "grad_norm": 3.5827334306096548, |
| "learning_rate": 9.098702414536107e-06, |
| "loss": 0.1947, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.3899159663865546, |
| "grad_norm": 4.419565381593234, |
| "learning_rate": 9.091128054064487e-06, |
| "loss": 0.1988, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.3915966386554622, |
| "grad_norm": 4.629227753836929, |
| "learning_rate": 9.083525180216697e-06, |
| "loss": 0.2783, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.39327731092436974, |
| "grad_norm": 4.780093359100441, |
| "learning_rate": 9.075893845981445e-06, |
| "loss": 0.1882, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.3949579831932773, |
| "grad_norm": 4.028034546146023, |
| "learning_rate": 9.0682341045458e-06, |
| "loss": 0.2319, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.39663865546218485, |
| "grad_norm": 3.8750364011106884, |
| "learning_rate": 9.060546009294818e-06, |
| "loss": 0.2066, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.3983193277310924, |
| "grad_norm": 4.158470486371845, |
| "learning_rate": 9.05282961381116e-06, |
| "loss": 0.2507, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 4.18280526847092, |
| "learning_rate": 9.045084971874738e-06, |
| "loss": 0.2264, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.4016806722689076, |
| "grad_norm": 3.899581582099338, |
| "learning_rate": 9.037312137462323e-06, |
| "loss": 0.1807, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.40336134453781514, |
| "grad_norm": 3.790705042738854, |
| "learning_rate": 9.029511164747175e-06, |
| "loss": 0.1991, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.4050420168067227, |
| "grad_norm": 3.9082019683768157, |
| "learning_rate": 9.021682108098671e-06, |
| "loss": 0.195, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.40672268907563025, |
| "grad_norm": 4.988301662674425, |
| "learning_rate": 9.013825022081915e-06, |
| "loss": 0.2016, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.4084033613445378, |
| "grad_norm": 4.5339550958204855, |
| "learning_rate": 9.005939961457366e-06, |
| "loss": 0.2199, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.41008403361344536, |
| "grad_norm": 3.8323113686251644, |
| "learning_rate": 8.998026981180454e-06, |
| "loss": 0.19, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.4117647058823529, |
| "grad_norm": 4.253369786543169, |
| "learning_rate": 8.990086136401199e-06, |
| "loss": 0.2044, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.4134453781512605, |
| "grad_norm": 3.9026309891957736, |
| "learning_rate": 8.982117482463817e-06, |
| "loss": 0.2221, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.4151260504201681, |
| "grad_norm": 3.9695322564079674, |
| "learning_rate": 8.97412107490635e-06, |
| "loss": 0.22, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.41680672268907565, |
| "grad_norm": 4.967876667155862, |
| "learning_rate": 8.966096969460263e-06, |
| "loss": 0.2525, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.4184873949579832, |
| "grad_norm": 4.060825678979327, |
| "learning_rate": 8.958045222050073e-06, |
| "loss": 0.1935, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.42016806722689076, |
| "grad_norm": 3.973788704821746, |
| "learning_rate": 8.94996588879294e-06, |
| "loss": 0.2171, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.4218487394957983, |
| "grad_norm": 4.090322297929094, |
| "learning_rate": 8.94185902599829e-06, |
| "loss": 0.2173, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.4235294117647059, |
| "grad_norm": 4.503759124581851, |
| "learning_rate": 8.933724690167417e-06, |
| "loss": 0.2442, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.42521008403361343, |
| "grad_norm": 4.46443714035923, |
| "learning_rate": 8.92556293799309e-06, |
| "loss": 0.2254, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.426890756302521, |
| "grad_norm": 4.801328310938415, |
| "learning_rate": 8.917373826359156e-06, |
| "loss": 0.2286, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.42857142857142855, |
| "grad_norm": 4.106180470850406, |
| "learning_rate": 8.90915741234015e-06, |
| "loss": 0.2321, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.43025210084033616, |
| "grad_norm": 4.105749099535154, |
| "learning_rate": 8.900913753200887e-06, |
| "loss": 0.2081, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.4319327731092437, |
| "grad_norm": 4.043544008475371, |
| "learning_rate": 8.892642906396076e-06, |
| "loss": 0.2189, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.4336134453781513, |
| "grad_norm": 4.095210376362221, |
| "learning_rate": 8.884344929569905e-06, |
| "loss": 0.2413, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.43529411764705883, |
| "grad_norm": 3.341752408472611, |
| "learning_rate": 8.87601988055565e-06, |
| "loss": 0.1966, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.4369747899159664, |
| "grad_norm": 4.364023975969356, |
| "learning_rate": 8.867667817375266e-06, |
| "loss": 0.2762, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.43865546218487395, |
| "grad_norm": 5.054813922266153, |
| "learning_rate": 8.859288798238988e-06, |
| "loss": 0.2721, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.4403361344537815, |
| "grad_norm": 3.2448771175239903, |
| "learning_rate": 8.850882881544923e-06, |
| "loss": 0.1857, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.44201680672268906, |
| "grad_norm": 3.5338228285569224, |
| "learning_rate": 8.842450125878634e-06, |
| "loss": 0.1913, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.4436974789915966, |
| "grad_norm": 4.138571886719138, |
| "learning_rate": 8.833990590012749e-06, |
| "loss": 0.2272, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.44537815126050423, |
| "grad_norm": 4.40959325327376, |
| "learning_rate": 8.825504332906542e-06, |
| "loss": 0.2235, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.4470588235294118, |
| "grad_norm": 5.463965171515439, |
| "learning_rate": 8.816991413705515e-06, |
| "loss": 0.2681, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.44873949579831934, |
| "grad_norm": 4.258513743820523, |
| "learning_rate": 8.808451891741001e-06, |
| "loss": 0.1973, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.4504201680672269, |
| "grad_norm": 5.106159542200387, |
| "learning_rate": 8.799885826529736e-06, |
| "loss": 0.2918, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.45210084033613446, |
| "grad_norm": 3.4701298119834476, |
| "learning_rate": 8.79129327777346e-06, |
| "loss": 0.2155, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.453781512605042, |
| "grad_norm": 3.7529119643338014, |
| "learning_rate": 8.782674305358481e-06, |
| "loss": 0.2887, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.45546218487394957, |
| "grad_norm": 5.682941483106064, |
| "learning_rate": 8.774028969355273e-06, |
| "loss": 0.2483, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.45714285714285713, |
| "grad_norm": 4.058019802098144, |
| "learning_rate": 8.765357330018056e-06, |
| "loss": 0.2492, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.4588235294117647, |
| "grad_norm": 4.641783192950925, |
| "learning_rate": 8.756659447784367e-06, |
| "loss": 0.235, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.46050420168067224, |
| "grad_norm": 4.648930577610503, |
| "learning_rate": 8.74793538327465e-06, |
| "loss": 0.2723, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.46218487394957986, |
| "grad_norm": 3.6265958946164676, |
| "learning_rate": 8.739185197291824e-06, |
| "loss": 0.1753, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.4638655462184874, |
| "grad_norm": 4.151010075520344, |
| "learning_rate": 8.730408950820864e-06, |
| "loss": 0.2342, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.46554621848739497, |
| "grad_norm": 3.9544139566060075, |
| "learning_rate": 8.721606705028376e-06, |
| "loss": 0.2369, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.4672268907563025, |
| "grad_norm": 4.865583546199144, |
| "learning_rate": 8.71277852126217e-06, |
| "loss": 0.2174, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.4689075630252101, |
| "grad_norm": 3.7227278170113167, |
| "learning_rate": 8.703924461050832e-06, |
| "loss": 0.2652, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.47058823529411764, |
| "grad_norm": 3.9404090239139413, |
| "learning_rate": 8.695044586103297e-06, |
| "loss": 0.1988, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.4722689075630252, |
| "grad_norm": 3.8358691946565355, |
| "learning_rate": 8.686138958308415e-06, |
| "loss": 0.2018, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.47394957983193275, |
| "grad_norm": 3.311037810132601, |
| "learning_rate": 8.67720763973452e-06, |
| "loss": 0.195, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.4756302521008403, |
| "grad_norm": 3.5994550095049656, |
| "learning_rate": 8.668250692629008e-06, |
| "loss": 0.2216, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.4773109243697479, |
| "grad_norm": 3.9949982493621166, |
| "learning_rate": 8.659268179417886e-06, |
| "loss": 0.2447, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.4789915966386555, |
| "grad_norm": 5.049172465722346, |
| "learning_rate": 8.65026016270535e-06, |
| "loss": 0.2252, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.48067226890756304, |
| "grad_norm": 3.7246219501155675, |
| "learning_rate": 8.641226705273344e-06, |
| "loss": 0.1994, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.4823529411764706, |
| "grad_norm": 5.424735374280226, |
| "learning_rate": 8.632167870081122e-06, |
| "loss": 0.2268, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.48403361344537815, |
| "grad_norm": 3.9698670375044944, |
| "learning_rate": 8.623083720264806e-06, |
| "loss": 0.1919, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.4857142857142857, |
| "grad_norm": 6.189711620026336, |
| "learning_rate": 8.613974319136959e-06, |
| "loss": 0.2538, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.48739495798319327, |
| "grad_norm": 4.139394655175779, |
| "learning_rate": 8.604839730186125e-06, |
| "loss": 0.1841, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.4890756302521008, |
| "grad_norm": 4.255536063641137, |
| "learning_rate": 8.595680017076403e-06, |
| "loss": 0.2175, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.4907563025210084, |
| "grad_norm": 3.4984785232169826, |
| "learning_rate": 8.586495243646992e-06, |
| "loss": 0.1834, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.492436974789916, |
| "grad_norm": 3.9216701824988074, |
| "learning_rate": 8.577285473911753e-06, |
| "loss": 0.1737, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.49411764705882355, |
| "grad_norm": 3.8780941059875027, |
| "learning_rate": 8.568050772058763e-06, |
| "loss": 0.186, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.4957983193277311, |
| "grad_norm": 4.036911708332518, |
| "learning_rate": 8.558791202449857e-06, |
| "loss": 0.229, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.49747899159663866, |
| "grad_norm": 3.514731462007391, |
| "learning_rate": 8.549506829620193e-06, |
| "loss": 0.1942, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.4991596638655462, |
| "grad_norm": 3.2899615193670066, |
| "learning_rate": 8.540197718277797e-06, |
| "loss": 0.1781, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.5008403361344538, |
| "grad_norm": 4.001548111037081, |
| "learning_rate": 8.530863933303108e-06, |
| "loss": 0.1553, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.5025210084033613, |
| "grad_norm": 3.725806915044608, |
| "learning_rate": 8.521505539748535e-06, |
| "loss": 0.1946, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.5042016806722689, |
| "grad_norm": 4.659889554742877, |
| "learning_rate": 8.512122602837993e-06, |
| "loss": 0.2254, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.5058823529411764, |
| "grad_norm": 4.433725934195565, |
| "learning_rate": 8.502715187966455e-06, |
| "loss": 0.2236, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.507563025210084, |
| "grad_norm": 3.699038449232604, |
| "learning_rate": 8.493283360699496e-06, |
| "loss": 0.1884, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.5092436974789916, |
| "grad_norm": 4.3616517996529085, |
| "learning_rate": 8.483827186772832e-06, |
| "loss": 0.2594, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.5109243697478991, |
| "grad_norm": 3.272531751768155, |
| "learning_rate": 8.47434673209187e-06, |
| "loss": 0.1864, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.5126050420168067, |
| "grad_norm": 5.245759108142561, |
| "learning_rate": 8.464842062731235e-06, |
| "loss": 0.2004, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.5142857142857142, |
| "grad_norm": 3.924276284348769, |
| "learning_rate": 8.455313244934324e-06, |
| "loss": 0.1824, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.5159663865546219, |
| "grad_norm": 4.6395692867201195, |
| "learning_rate": 8.445760345112836e-06, |
| "loss": 0.2402, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.5176470588235295, |
| "grad_norm": 3.5570683386402204, |
| "learning_rate": 8.436183429846314e-06, |
| "loss": 0.1809, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.519327731092437, |
| "grad_norm": 4.102626184053465, |
| "learning_rate": 8.426582565881674e-06, |
| "loss": 0.2048, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.5210084033613446, |
| "grad_norm": 3.52485735121433, |
| "learning_rate": 8.416957820132743e-06, |
| "loss": 0.163, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.5226890756302521, |
| "grad_norm": 4.346874108094046, |
| "learning_rate": 8.407309259679801e-06, |
| "loss": 0.1943, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.5243697478991597, |
| "grad_norm": 3.355148847177652, |
| "learning_rate": 8.397636951769099e-06, |
| "loss": 0.206, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.5260504201680672, |
| "grad_norm": 3.0746648845052977, |
| "learning_rate": 8.387940963812398e-06, |
| "loss": 0.1799, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.5277310924369748, |
| "grad_norm": 3.994292228485202, |
| "learning_rate": 8.378221363386506e-06, |
| "loss": 0.1758, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.5294117647058824, |
| "grad_norm": 4.30723546706679, |
| "learning_rate": 8.368478218232787e-06, |
| "loss": 0.2426, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.5310924369747899, |
| "grad_norm": 4.563931221264526, |
| "learning_rate": 8.358711596256712e-06, |
| "loss": 0.2205, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.5327731092436975, |
| "grad_norm": 4.342112338014289, |
| "learning_rate": 8.348921565527373e-06, |
| "loss": 0.2655, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.534453781512605, |
| "grad_norm": 4.148394609285132, |
| "learning_rate": 8.339108194277006e-06, |
| "loss": 0.1984, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.5361344537815126, |
| "grad_norm": 5.008722667113153, |
| "learning_rate": 8.329271550900528e-06, |
| "loss": 0.2658, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.5378151260504201, |
| "grad_norm": 3.841587828000924, |
| "learning_rate": 8.319411703955042e-06, |
| "loss": 0.2112, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.5394957983193277, |
| "grad_norm": 4.50236826122661, |
| "learning_rate": 8.309528722159383e-06, |
| "loss": 0.2555, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.5411764705882353, |
| "grad_norm": 4.710869349180975, |
| "learning_rate": 8.299622674393615e-06, |
| "loss": 0.2483, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.5428571428571428, |
| "grad_norm": 3.557233194960392, |
| "learning_rate": 8.289693629698564e-06, |
| "loss": 0.2096, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.5445378151260504, |
| "grad_norm": 3.766181124916138, |
| "learning_rate": 8.27974165727534e-06, |
| "loss": 0.2387, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.5462184873949579, |
| "grad_norm": 5.327374692077757, |
| "learning_rate": 8.269766826484841e-06, |
| "loss": 0.2217, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.5478991596638656, |
| "grad_norm": 6.34817507695616, |
| "learning_rate": 8.259769206847286e-06, |
| "loss": 0.2707, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.5495798319327732, |
| "grad_norm": 4.381678895740831, |
| "learning_rate": 8.249748868041717e-06, |
| "loss": 0.2104, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.5512605042016807, |
| "grad_norm": 3.998236552947582, |
| "learning_rate": 8.239705879905519e-06, |
| "loss": 0.2198, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.5529411764705883, |
| "grad_norm": 4.213580958945381, |
| "learning_rate": 8.229640312433938e-06, |
| "loss": 0.1988, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.5546218487394958, |
| "grad_norm": 3.759112597696243, |
| "learning_rate": 8.219552235779578e-06, |
| "loss": 0.1866, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.5563025210084034, |
| "grad_norm": 3.3792729302736375, |
| "learning_rate": 8.209441720251934e-06, |
| "loss": 0.1907, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.5579831932773109, |
| "grad_norm": 4.944461357096941, |
| "learning_rate": 8.199308836316883e-06, |
| "loss": 0.2319, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.5596638655462185, |
| "grad_norm": 4.3235209989485925, |
| "learning_rate": 8.189153654596199e-06, |
| "loss": 0.1951, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.561344537815126, |
| "grad_norm": 4.706476875198324, |
| "learning_rate": 8.178976245867068e-06, |
| "loss": 0.1995, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.5630252100840336, |
| "grad_norm": 3.934741224212452, |
| "learning_rate": 8.168776681061583e-06, |
| "loss": 0.1704, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.5647058823529412, |
| "grad_norm": 4.4892628919370425, |
| "learning_rate": 8.158555031266255e-06, |
| "loss": 0.2723, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.5663865546218487, |
| "grad_norm": 4.718004820102291, |
| "learning_rate": 8.148311367721524e-06, |
| "loss": 0.221, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.5680672268907563, |
| "grad_norm": 4.176984062196839, |
| "learning_rate": 8.138045761821252e-06, |
| "loss": 0.2361, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.5697478991596638, |
| "grad_norm": 4.875232795035776, |
| "learning_rate": 8.127758285112226e-06, |
| "loss": 0.1787, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.5714285714285714, |
| "grad_norm": 5.5937101005600365, |
| "learning_rate": 8.117449009293668e-06, |
| "loss": 0.2216, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.573109243697479, |
| "grad_norm": 4.212760302913961, |
| "learning_rate": 8.107118006216732e-06, |
| "loss": 0.2994, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.5747899159663865, |
| "grad_norm": 3.3746602463279185, |
| "learning_rate": 8.096765347883995e-06, |
| "loss": 0.1657, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.5764705882352941, |
| "grad_norm": 3.9827276771268463, |
| "learning_rate": 8.086391106448965e-06, |
| "loss": 0.1944, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.5781512605042017, |
| "grad_norm": 4.092011786287589, |
| "learning_rate": 8.075995354215578e-06, |
| "loss": 0.1775, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.5798319327731093, |
| "grad_norm": 3.844261931463413, |
| "learning_rate": 8.065578163637686e-06, |
| "loss": 0.1849, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.5815126050420169, |
| "grad_norm": 4.048687763988565, |
| "learning_rate": 8.055139607318558e-06, |
| "loss": 0.2156, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.5831932773109244, |
| "grad_norm": 3.7618483841802335, |
| "learning_rate": 8.044679758010376e-06, |
| "loss": 0.2066, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.584873949579832, |
| "grad_norm": 3.737106642987954, |
| "learning_rate": 8.03419868861372e-06, |
| "loss": 0.1807, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.5865546218487395, |
| "grad_norm": 4.074003575121079, |
| "learning_rate": 8.023696472177068e-06, |
| "loss": 0.2227, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.5882352941176471, |
| "grad_norm": 4.406312200787636, |
| "learning_rate": 8.013173181896283e-06, |
| "loss": 0.2373, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.5899159663865546, |
| "grad_norm": 3.3827862112164917, |
| "learning_rate": 8.002628891114104e-06, |
| "loss": 0.1775, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.5915966386554622, |
| "grad_norm": 5.429752189803184, |
| "learning_rate": 7.992063673319632e-06, |
| "loss": 0.2702, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.5932773109243697, |
| "grad_norm": 4.120154578167831, |
| "learning_rate": 7.981477602147823e-06, |
| "loss": 0.2111, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.5949579831932773, |
| "grad_norm": 2.9979780735807946, |
| "learning_rate": 7.97087075137897e-06, |
| "loss": 0.1397, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.5966386554621849, |
| "grad_norm": 4.005290859935757, |
| "learning_rate": 7.960243194938192e-06, |
| "loss": 0.2193, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.5983193277310924, |
| "grad_norm": 4.690992613506787, |
| "learning_rate": 7.949595006894917e-06, |
| "loss": 0.2113, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 3.6002782273862195, |
| "learning_rate": 7.938926261462366e-06, |
| "loss": 0.2194, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.6016806722689075, |
| "grad_norm": 4.832797563091427, |
| "learning_rate": 7.928237032997037e-06, |
| "loss": 0.2212, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.6033613445378151, |
| "grad_norm": 3.954408670704424, |
| "learning_rate": 7.917527395998183e-06, |
| "loss": 0.2093, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.6050420168067226, |
| "grad_norm": 3.2290183482816897, |
| "learning_rate": 7.9067974251073e-06, |
| "loss": 0.1968, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.6067226890756302, |
| "grad_norm": 4.651476660595037, |
| "learning_rate": 7.896047195107599e-06, |
| "loss": 0.2358, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.6084033613445378, |
| "grad_norm": 4.357653576638546, |
| "learning_rate": 7.885276780923488e-06, |
| "loss": 0.2042, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.6100840336134454, |
| "grad_norm": 5.0662328604426365, |
| "learning_rate": 7.87448625762005e-06, |
| "loss": 0.2529, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.611764705882353, |
| "grad_norm": 4.096891845681164, |
| "learning_rate": 7.863675700402527e-06, |
| "loss": 0.2639, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.6134453781512605, |
| "grad_norm": 4.404970446064358, |
| "learning_rate": 7.852845184615776e-06, |
| "loss": 0.2659, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.6151260504201681, |
| "grad_norm": 4.205283221153625, |
| "learning_rate": 7.841994785743765e-06, |
| "loss": 0.2272, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.6168067226890757, |
| "grad_norm": 5.230739104122307, |
| "learning_rate": 7.831124579409036e-06, |
| "loss": 0.2448, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.6184873949579832, |
| "grad_norm": 3.6020763749502036, |
| "learning_rate": 7.820234641372182e-06, |
| "loss": 0.1714, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.6201680672268908, |
| "grad_norm": 5.00315419944731, |
| "learning_rate": 7.809325047531315e-06, |
| "loss": 0.2037, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.6218487394957983, |
| "grad_norm": 3.057913558877073, |
| "learning_rate": 7.798395873921542e-06, |
| "loss": 0.184, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.6235294117647059, |
| "grad_norm": 4.100355290117442, |
| "learning_rate": 7.787447196714428e-06, |
| "loss": 0.2343, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.6252100840336134, |
| "grad_norm": 4.158571878640022, |
| "learning_rate": 7.776479092217475e-06, |
| "loss": 0.249, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.626890756302521, |
| "grad_norm": 4.066662933301616, |
| "learning_rate": 7.76549163687358e-06, |
| "loss": 0.1849, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.6285714285714286, |
| "grad_norm": 4.089826297346638, |
| "learning_rate": 7.754484907260513e-06, |
| "loss": 0.1991, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.6302521008403361, |
| "grad_norm": 4.234826231319271, |
| "learning_rate": 7.743458980090371e-06, |
| "loss": 0.2463, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.6319327731092437, |
| "grad_norm": 3.265023140813625, |
| "learning_rate": 7.73241393220905e-06, |
| "loss": 0.1926, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.6336134453781512, |
| "grad_norm": 3.134882453177233, |
| "learning_rate": 7.721349840595713e-06, |
| "loss": 0.1627, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.6352941176470588, |
| "grad_norm": 4.423918239797939, |
| "learning_rate": 7.710266782362248e-06, |
| "loss": 0.2362, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.6369747899159663, |
| "grad_norm": 3.9529856544671573, |
| "learning_rate": 7.69916483475273e-06, |
| "loss": 0.1822, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.6386554621848739, |
| "grad_norm": 3.561131066780499, |
| "learning_rate": 7.688044075142888e-06, |
| "loss": 0.1633, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.6403361344537815, |
| "grad_norm": 4.054438364074705, |
| "learning_rate": 7.676904581039559e-06, |
| "loss": 0.201, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.6420168067226891, |
| "grad_norm": 3.19756418481164, |
| "learning_rate": 7.665746430080155e-06, |
| "loss": 0.1914, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.6436974789915967, |
| "grad_norm": 5.087773350167841, |
| "learning_rate": 7.654569700032112e-06, |
| "loss": 0.2399, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.6453781512605042, |
| "grad_norm": 4.8707614819292555, |
| "learning_rate": 7.643374468792364e-06, |
| "loss": 0.272, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.6470588235294118, |
| "grad_norm": 3.7201873865620305, |
| "learning_rate": 7.63216081438678e-06, |
| "loss": 0.1768, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.6487394957983194, |
| "grad_norm": 3.639086819183024, |
| "learning_rate": 7.620928814969636e-06, |
| "loss": 0.1856, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.6504201680672269, |
| "grad_norm": 4.5114182364446185, |
| "learning_rate": 7.609678548823065e-06, |
| "loss": 0.2406, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.6521008403361345, |
| "grad_norm": 4.37570960896787, |
| "learning_rate": 7.5984100943565055e-06, |
| "loss": 0.2287, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.653781512605042, |
| "grad_norm": 4.71815661233923, |
| "learning_rate": 7.587123530106171e-06, |
| "loss": 0.2254, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.6554621848739496, |
| "grad_norm": 4.948310397670234, |
| "learning_rate": 7.57581893473448e-06, |
| "loss": 0.2357, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.6571428571428571, |
| "grad_norm": 4.0251665782039465, |
| "learning_rate": 7.564496387029532e-06, |
| "loss": 0.1872, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.6588235294117647, |
| "grad_norm": 5.572454479666195, |
| "learning_rate": 7.553155965904535e-06, |
| "loss": 0.198, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.6605042016806723, |
| "grad_norm": 6.1555918443054285, |
| "learning_rate": 7.541797750397277e-06, |
| "loss": 0.2744, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.6621848739495798, |
| "grad_norm": 4.037230245840294, |
| "learning_rate": 7.530421819669558e-06, |
| "loss": 0.2114, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.6638655462184874, |
| "grad_norm": 3.2696215968590883, |
| "learning_rate": 7.519028253006649e-06, |
| "loss": 0.2114, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.6655462184873949, |
| "grad_norm": 4.073541716708416, |
| "learning_rate": 7.507617129816733e-06, |
| "loss": 0.2231, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.6672268907563025, |
| "grad_norm": 3.409976478951058, |
| "learning_rate": 7.496188529630359e-06, |
| "loss": 0.1906, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.66890756302521, |
| "grad_norm": 4.003033211601922, |
| "learning_rate": 7.484742532099878e-06, |
| "loss": 0.1951, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.6705882352941176, |
| "grad_norm": 5.3967018780533245, |
| "learning_rate": 7.473279216998896e-06, |
| "loss": 0.2938, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.6722689075630253, |
| "grad_norm": 4.650400594771881, |
| "learning_rate": 7.461798664221711e-06, |
| "loss": 0.2075, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.6739495798319328, |
| "grad_norm": 4.445862374489302, |
| "learning_rate": 7.450300953782768e-06, |
| "loss": 0.1727, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.6756302521008404, |
| "grad_norm": 3.6492298616881627, |
| "learning_rate": 7.438786165816084e-06, |
| "loss": 0.2474, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.6773109243697479, |
| "grad_norm": 4.174927493743256, |
| "learning_rate": 7.427254380574705e-06, |
| "loss": 0.2298, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.6789915966386555, |
| "grad_norm": 4.516892697613207, |
| "learning_rate": 7.415705678430138e-06, |
| "loss": 0.2445, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.680672268907563, |
| "grad_norm": 5.1555804344893845, |
| "learning_rate": 7.404140139871797e-06, |
| "loss": 0.2457, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.6823529411764706, |
| "grad_norm": 4.788105675096441, |
| "learning_rate": 7.392557845506433e-06, |
| "loss": 0.2454, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.6840336134453782, |
| "grad_norm": 3.8556159103630128, |
| "learning_rate": 7.380958876057581e-06, |
| "loss": 0.1904, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.6857142857142857, |
| "grad_norm": 3.923233518341112, |
| "learning_rate": 7.369343312364994e-06, |
| "loss": 0.191, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.6873949579831933, |
| "grad_norm": 4.439644054359771, |
| "learning_rate": 7.357711235384079e-06, |
| "loss": 0.1808, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.6890756302521008, |
| "grad_norm": 3.950994577812542, |
| "learning_rate": 7.346062726185332e-06, |
| "loss": 0.264, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.6907563025210084, |
| "grad_norm": 4.63010670550712, |
| "learning_rate": 7.3343978659537775e-06, |
| "loss": 0.1907, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.692436974789916, |
| "grad_norm": 3.2839801005028604, |
| "learning_rate": 7.3227167359883964e-06, |
| "loss": 0.1402, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.6941176470588235, |
| "grad_norm": 3.983435177231738, |
| "learning_rate": 7.311019417701567e-06, |
| "loss": 0.2036, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.6957983193277311, |
| "grad_norm": 4.171300817489276, |
| "learning_rate": 7.299305992618488e-06, |
| "loss": 0.2104, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.6974789915966386, |
| "grad_norm": 3.291598476495542, |
| "learning_rate": 7.287576542376616e-06, |
| "loss": 0.1918, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.6991596638655462, |
| "grad_norm": 3.0983336934959422, |
| "learning_rate": 7.275831148725101e-06, |
| "loss": 0.1717, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.7008403361344537, |
| "grad_norm": 3.472013647791573, |
| "learning_rate": 7.264069893524207e-06, |
| "loss": 0.1839, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.7025210084033613, |
| "grad_norm": 3.9532810808863905, |
| "learning_rate": 7.252292858744747e-06, |
| "loss": 0.1869, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.704201680672269, |
| "grad_norm": 3.886560617548254, |
| "learning_rate": 7.24050012646751e-06, |
| "loss": 0.2145, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.7058823529411765, |
| "grad_norm": 3.873829915587893, |
| "learning_rate": 7.2286917788826926e-06, |
| "loss": 0.1802, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.7075630252100841, |
| "grad_norm": 3.8215403691812555, |
| "learning_rate": 7.216867898289319e-06, |
| "loss": 0.1784, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.7092436974789916, |
| "grad_norm": 3.02923526458967, |
| "learning_rate": 7.2050285670946776e-06, |
| "loss": 0.1541, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.7109243697478992, |
| "grad_norm": 3.894230763890687, |
| "learning_rate": 7.193173867813735e-06, |
| "loss": 0.1612, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.7126050420168067, |
| "grad_norm": 4.346711078005211, |
| "learning_rate": 7.181303883068569e-06, |
| "loss": 0.2124, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.7142857142857143, |
| "grad_norm": 4.470881599786187, |
| "learning_rate": 7.169418695587791e-06, |
| "loss": 0.2095, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.7159663865546219, |
| "grad_norm": 4.802836466531396, |
| "learning_rate": 7.157518388205969e-06, |
| "loss": 0.1863, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.7176470588235294, |
| "grad_norm": 5.540903785045774, |
| "learning_rate": 7.145603043863045e-06, |
| "loss": 0.3212, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.719327731092437, |
| "grad_norm": 4.7139148814674305, |
| "learning_rate": 7.1336727456037716e-06, |
| "loss": 0.2206, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.7210084033613445, |
| "grad_norm": 4.370838665402854, |
| "learning_rate": 7.121727576577116e-06, |
| "loss": 0.1965, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.7226890756302521, |
| "grad_norm": 4.566376247147124, |
| "learning_rate": 7.109767620035689e-06, |
| "loss": 0.2085, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.7243697478991596, |
| "grad_norm": 4.642592708671401, |
| "learning_rate": 7.097792959335169e-06, |
| "loss": 0.2326, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.7260504201680672, |
| "grad_norm": 4.345117649290927, |
| "learning_rate": 7.0858036779337095e-06, |
| "loss": 0.2093, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.7277310924369748, |
| "grad_norm": 3.374330056349454, |
| "learning_rate": 7.0737998593913686e-06, |
| "loss": 0.1793, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.7294117647058823, |
| "grad_norm": 3.087076270827566, |
| "learning_rate": 7.061781587369518e-06, |
| "loss": 0.1545, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.7310924369747899, |
| "grad_norm": 4.534902982370824, |
| "learning_rate": 7.049748945630269e-06, |
| "loss": 0.2009, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.7327731092436974, |
| "grad_norm": 4.596100062760324, |
| "learning_rate": 7.037702018035878e-06, |
| "loss": 0.2578, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.7344537815126051, |
| "grad_norm": 4.584632394943085, |
| "learning_rate": 7.0256408885481694e-06, |
| "loss": 0.1796, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.7361344537815127, |
| "grad_norm": 5.400392053544899, |
| "learning_rate": 7.013565641227954e-06, |
| "loss": 0.2702, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.7378151260504202, |
| "grad_norm": 4.092852923049045, |
| "learning_rate": 7.001476360234428e-06, |
| "loss": 0.2004, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.7394957983193278, |
| "grad_norm": 3.5689773393131192, |
| "learning_rate": 6.989373129824605e-06, |
| "loss": 0.1711, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.7411764705882353, |
| "grad_norm": 4.434405027954638, |
| "learning_rate": 6.977256034352713e-06, |
| "loss": 0.1862, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.7428571428571429, |
| "grad_norm": 3.3402058102648784, |
| "learning_rate": 6.965125158269619e-06, |
| "loss": 0.1704, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.7445378151260504, |
| "grad_norm": 4.450842620271656, |
| "learning_rate": 6.952980586122231e-06, |
| "loss": 0.179, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.746218487394958, |
| "grad_norm": 4.5683781079735715, |
| "learning_rate": 6.940822402552914e-06, |
| "loss": 0.2515, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.7478991596638656, |
| "grad_norm": 3.936528425785662, |
| "learning_rate": 6.928650692298898e-06, |
| "loss": 0.1535, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.7495798319327731, |
| "grad_norm": 3.585367350636562, |
| "learning_rate": 6.916465540191692e-06, |
| "loss": 0.1368, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.7512605042016807, |
| "grad_norm": 4.195744282723126, |
| "learning_rate": 6.904267031156482e-06, |
| "loss": 0.2003, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.7529411764705882, |
| "grad_norm": 5.910759174258135, |
| "learning_rate": 6.892055250211552e-06, |
| "loss": 0.2944, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.7546218487394958, |
| "grad_norm": 2.926901791069088, |
| "learning_rate": 6.879830282467682e-06, |
| "loss": 0.1387, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.7563025210084033, |
| "grad_norm": 3.6549630804255946, |
| "learning_rate": 6.867592213127559e-06, |
| "loss": 0.1798, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.7579831932773109, |
| "grad_norm": 3.8334460927490093, |
| "learning_rate": 6.855341127485183e-06, |
| "loss": 0.1726, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.7596638655462185, |
| "grad_norm": 4.456207894333341, |
| "learning_rate": 6.84307711092527e-06, |
| "loss": 0.2311, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.761344537815126, |
| "grad_norm": 4.414789487098786, |
| "learning_rate": 6.8308002489226645e-06, |
| "loss": 0.2103, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.7630252100840336, |
| "grad_norm": 3.893046041679212, |
| "learning_rate": 6.81851062704173e-06, |
| "loss": 0.1943, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.7647058823529411, |
| "grad_norm": 3.43196206922528, |
| "learning_rate": 6.806208330935766e-06, |
| "loss": 0.1767, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.7663865546218488, |
| "grad_norm": 4.520503474294174, |
| "learning_rate": 6.793893446346405e-06, |
| "loss": 0.2123, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.7680672268907563, |
| "grad_norm": 3.964065429569578, |
| "learning_rate": 6.7815660591030155e-06, |
| "loss": 0.2025, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.7697478991596639, |
| "grad_norm": 4.19984280134145, |
| "learning_rate": 6.769226255122104e-06, |
| "loss": 0.1915, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.7714285714285715, |
| "grad_norm": 4.839865608547356, |
| "learning_rate": 6.7568741204067145e-06, |
| "loss": 0.2495, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.773109243697479, |
| "grad_norm": 3.7709931628629194, |
| "learning_rate": 6.744509741045835e-06, |
| "loss": 0.2231, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.7747899159663866, |
| "grad_norm": 4.272844439888244, |
| "learning_rate": 6.7321332032137875e-06, |
| "loss": 0.1612, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.7764705882352941, |
| "grad_norm": 3.9915676640206263, |
| "learning_rate": 6.719744593169642e-06, |
| "loss": 0.2039, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.7781512605042017, |
| "grad_norm": 3.8781159196470063, |
| "learning_rate": 6.7073439972565955e-06, |
| "loss": 0.2488, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.7798319327731092, |
| "grad_norm": 3.677549031841177, |
| "learning_rate": 6.6949315019013895e-06, |
| "loss": 0.1634, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.7815126050420168, |
| "grad_norm": 3.27402087914958, |
| "learning_rate": 6.682507193613697e-06, |
| "loss": 0.1907, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.7831932773109244, |
| "grad_norm": 4.551565457153353, |
| "learning_rate": 6.670071158985521e-06, |
| "loss": 0.2494, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.7848739495798319, |
| "grad_norm": 4.295403358109363, |
| "learning_rate": 6.657623484690593e-06, |
| "loss": 0.2359, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.7865546218487395, |
| "grad_norm": 4.889317330640271, |
| "learning_rate": 6.645164257483767e-06, |
| "loss": 0.2795, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.788235294117647, |
| "grad_norm": 3.5002311648455975, |
| "learning_rate": 6.6326935642004165e-06, |
| "loss": 0.2332, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.7899159663865546, |
| "grad_norm": 3.94464205537663, |
| "learning_rate": 6.62021149175583e-06, |
| "loss": 0.2435, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.7915966386554621, |
| "grad_norm": 3.8028700500611565, |
| "learning_rate": 6.607718127144601e-06, |
| "loss": 0.1995, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.7932773109243697, |
| "grad_norm": 4.525546500618434, |
| "learning_rate": 6.595213557440026e-06, |
| "loss": 0.2288, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.7949579831932773, |
| "grad_norm": 5.606880801822294, |
| "learning_rate": 6.582697869793495e-06, |
| "loss": 0.2867, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.7966386554621848, |
| "grad_norm": 3.6542402819017394, |
| "learning_rate": 6.570171151433887e-06, |
| "loss": 0.1618, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.7983193277310925, |
| "grad_norm": 4.210723963029238, |
| "learning_rate": 6.557633489666958e-06, |
| "loss": 0.2028, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 3.2867443961571956, |
| "learning_rate": 6.545084971874738e-06, |
| "loss": 0.1748, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.8016806722689076, |
| "grad_norm": 2.9348406753396694, |
| "learning_rate": 6.532525685514915e-06, |
| "loss": 0.1964, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.8033613445378152, |
| "grad_norm": 4.528700088502344, |
| "learning_rate": 6.519955718120231e-06, |
| "loss": 0.2665, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.8050420168067227, |
| "grad_norm": 3.942446224867525, |
| "learning_rate": 6.507375157297872e-06, |
| "loss": 0.1981, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.8067226890756303, |
| "grad_norm": 3.0398230461352687, |
| "learning_rate": 6.494784090728852e-06, |
| "loss": 0.1563, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.8084033613445378, |
| "grad_norm": 4.140405081211564, |
| "learning_rate": 6.482182606167409e-06, |
| "loss": 0.259, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.8100840336134454, |
| "grad_norm": 3.560603255149939, |
| "learning_rate": 6.469570791440385e-06, |
| "loss": 0.1507, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.8117647058823529, |
| "grad_norm": 3.864812587459039, |
| "learning_rate": 6.456948734446624e-06, |
| "loss": 0.2533, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.8134453781512605, |
| "grad_norm": 3.978398759949487, |
| "learning_rate": 6.444316523156352e-06, |
| "loss": 0.1719, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.8151260504201681, |
| "grad_norm": 3.906418335788084, |
| "learning_rate": 6.4316742456105645e-06, |
| "loss": 0.2105, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.8168067226890756, |
| "grad_norm": 4.288716787663696, |
| "learning_rate": 6.419021989920416e-06, |
| "loss": 0.257, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.8184873949579832, |
| "grad_norm": 3.613809860500844, |
| "learning_rate": 6.406359844266607e-06, |
| "loss": 0.2117, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.8201680672268907, |
| "grad_norm": 4.094846884836178, |
| "learning_rate": 6.393687896898759e-06, |
| "loss": 0.2074, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.8218487394957983, |
| "grad_norm": 3.7968850151738796, |
| "learning_rate": 6.381006236134817e-06, |
| "loss": 0.1863, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.8235294117647058, |
| "grad_norm": 4.003377153677425, |
| "learning_rate": 6.368314950360416e-06, |
| "loss": 0.2318, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.8252100840336134, |
| "grad_norm": 4.180943524371029, |
| "learning_rate": 6.355614128028277e-06, |
| "loss": 0.1799, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.826890756302521, |
| "grad_norm": 3.506573280410032, |
| "learning_rate": 6.342903857657585e-06, |
| "loss": 0.1787, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.8285714285714286, |
| "grad_norm": 4.146295337136301, |
| "learning_rate": 6.330184227833376e-06, |
| "loss": 0.2522, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.8302521008403362, |
| "grad_norm": 3.604462904458598, |
| "learning_rate": 6.317455327205916e-06, |
| "loss": 0.1934, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.8319327731092437, |
| "grad_norm": 3.368315681661413, |
| "learning_rate": 6.304717244490084e-06, |
| "loss": 0.1691, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.8336134453781513, |
| "grad_norm": 4.391274581535866, |
| "learning_rate": 6.291970068464755e-06, |
| "loss": 0.1983, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.8352941176470589, |
| "grad_norm": 3.9021942728409207, |
| "learning_rate": 6.279213887972179e-06, |
| "loss": 0.2088, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.8369747899159664, |
| "grad_norm": 4.660288821078394, |
| "learning_rate": 6.266448791917364e-06, |
| "loss": 0.2459, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.838655462184874, |
| "grad_norm": 3.6787269513356637, |
| "learning_rate": 6.253674869267457e-06, |
| "loss": 0.1722, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.8403361344537815, |
| "grad_norm": 2.8892867743590096, |
| "learning_rate": 6.24089220905112e-06, |
| "loss": 0.1319, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.8403361344537815, |
| "eval_loss": 0.19269125163555145, |
| "eval_runtime": 1.2338, |
| "eval_samples_per_second": 39.714, |
| "eval_steps_per_second": 10.536, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.8420168067226891, |
| "grad_norm": 4.305155123990724, |
| "learning_rate": 6.228100900357914e-06, |
| "loss": 0.2207, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.8436974789915966, |
| "grad_norm": 3.1815187896266863, |
| "learning_rate": 6.215301032337674e-06, |
| "loss": 0.1676, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.8453781512605042, |
| "grad_norm": 3.857922746443263, |
| "learning_rate": 6.202492694199893e-06, |
| "loss": 0.1769, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.8470588235294118, |
| "grad_norm": 4.386476438810581, |
| "learning_rate": 6.189675975213094e-06, |
| "loss": 0.2229, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.8487394957983193, |
| "grad_norm": 4.114744331162852, |
| "learning_rate": 6.176850964704213e-06, |
| "loss": 0.1997, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.8504201680672269, |
| "grad_norm": 3.6941593344593913, |
| "learning_rate": 6.164017752057972e-06, |
| "loss": 0.2237, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.8521008403361344, |
| "grad_norm": 3.0148646159345804, |
| "learning_rate": 6.151176426716261e-06, |
| "loss": 0.1536, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.853781512605042, |
| "grad_norm": 3.439147590128657, |
| "learning_rate": 6.13832707817751e-06, |
| "loss": 0.1735, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.8554621848739495, |
| "grad_norm": 3.6968600719242692, |
| "learning_rate": 6.125469795996065e-06, |
| "loss": 0.1789, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.8571428571428571, |
| "grad_norm": 3.355068222017964, |
| "learning_rate": 6.112604669781572e-06, |
| "loss": 0.2063, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.8588235294117647, |
| "grad_norm": 3.3680814260117735, |
| "learning_rate": 6.099731789198344e-06, |
| "loss": 0.1371, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.8605042016806723, |
| "grad_norm": 3.487954359774497, |
| "learning_rate": 6.0868512439647345e-06, |
| "loss": 0.1718, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.8621848739495799, |
| "grad_norm": 3.890256891189335, |
| "learning_rate": 6.073963123852522e-06, |
| "loss": 0.1765, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.8638655462184874, |
| "grad_norm": 3.7318020528328524, |
| "learning_rate": 6.061067518686277e-06, |
| "loss": 0.1817, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.865546218487395, |
| "grad_norm": 3.7656918774904544, |
| "learning_rate": 6.048164518342734e-06, |
| "loss": 0.2006, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.8672268907563025, |
| "grad_norm": 3.5973168312687487, |
| "learning_rate": 6.035254212750172e-06, |
| "loss": 0.2338, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.8689075630252101, |
| "grad_norm": 4.876839752819946, |
| "learning_rate": 6.022336691887785e-06, |
| "loss": 0.2091, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.8705882352941177, |
| "grad_norm": 3.868757342188446, |
| "learning_rate": 6.009412045785051e-06, |
| "loss": 0.2211, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.8722689075630252, |
| "grad_norm": 3.788258159682757, |
| "learning_rate": 5.996480364521114e-06, |
| "loss": 0.2236, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.8739495798319328, |
| "grad_norm": 3.9423457060806606, |
| "learning_rate": 5.983541738224141e-06, |
| "loss": 0.2087, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.8756302521008403, |
| "grad_norm": 4.0221101831521215, |
| "learning_rate": 5.970596257070711e-06, |
| "loss": 0.2095, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.8773109243697479, |
| "grad_norm": 3.5399279637868553, |
| "learning_rate": 5.957644011285173e-06, |
| "loss": 0.1803, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.8789915966386554, |
| "grad_norm": 3.960179989285108, |
| "learning_rate": 5.944685091139026e-06, |
| "loss": 0.2075, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.880672268907563, |
| "grad_norm": 3.791068687537994, |
| "learning_rate": 5.931719586950286e-06, |
| "loss": 0.2153, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.8823529411764706, |
| "grad_norm": 3.6833034726015317, |
| "learning_rate": 5.918747589082853e-06, |
| "loss": 0.1893, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.8840336134453781, |
| "grad_norm": 3.34569881611167, |
| "learning_rate": 5.905769187945889e-06, |
| "loss": 0.1874, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.8857142857142857, |
| "grad_norm": 3.5706859104839443, |
| "learning_rate": 5.892784473993184e-06, |
| "loss": 0.1515, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.8873949579831932, |
| "grad_norm": 3.2543249925897175, |
| "learning_rate": 5.879793537722525e-06, |
| "loss": 0.1589, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.8890756302521008, |
| "grad_norm": 3.835128233080215, |
| "learning_rate": 5.8667964696750625e-06, |
| "loss": 0.1623, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.8907563025210085, |
| "grad_norm": 4.145256581839058, |
| "learning_rate": 5.853793360434687e-06, |
| "loss": 0.2354, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.892436974789916, |
| "grad_norm": 3.5184174760119684, |
| "learning_rate": 5.840784300627396e-06, |
| "loss": 0.2513, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.8941176470588236, |
| "grad_norm": 3.85133303541918, |
| "learning_rate": 5.82776938092065e-06, |
| "loss": 0.179, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.8957983193277311, |
| "grad_norm": 4.151107547880621, |
| "learning_rate": 5.814748692022761e-06, |
| "loss": 0.1681, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.8974789915966387, |
| "grad_norm": 4.844244644198082, |
| "learning_rate": 5.801722324682243e-06, |
| "loss": 0.2482, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.8991596638655462, |
| "grad_norm": 3.128553167302584, |
| "learning_rate": 5.788690369687188e-06, |
| "loss": 0.1688, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.9008403361344538, |
| "grad_norm": 4.787289254729522, |
| "learning_rate": 5.775652917864633e-06, |
| "loss": 0.2269, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.9025210084033614, |
| "grad_norm": 4.200201714416603, |
| "learning_rate": 5.762610060079926e-06, |
| "loss": 0.2303, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.9042016806722689, |
| "grad_norm": 4.12447129777908, |
| "learning_rate": 5.749561887236088e-06, |
| "loss": 0.223, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.9058823529411765, |
| "grad_norm": 4.09214914019045, |
| "learning_rate": 5.736508490273189e-06, |
| "loss": 0.1983, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.907563025210084, |
| "grad_norm": 3.8622295106717903, |
| "learning_rate": 5.723449960167703e-06, |
| "loss": 0.1658, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.9092436974789916, |
| "grad_norm": 3.880296503246165, |
| "learning_rate": 5.710386387931886e-06, |
| "loss": 0.2093, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.9109243697478991, |
| "grad_norm": 3.8952684640098663, |
| "learning_rate": 5.697317864613127e-06, |
| "loss": 0.2113, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.9126050420168067, |
| "grad_norm": 5.702686238624931, |
| "learning_rate": 5.684244481293335e-06, |
| "loss": 0.3463, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.9142857142857143, |
| "grad_norm": 4.148502838419665, |
| "learning_rate": 5.671166329088278e-06, |
| "loss": 0.1912, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.9159663865546218, |
| "grad_norm": 4.268707661002628, |
| "learning_rate": 5.658083499146968e-06, |
| "loss": 0.2138, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.9176470588235294, |
| "grad_norm": 4.455105326523613, |
| "learning_rate": 5.644996082651018e-06, |
| "loss": 0.2346, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.9193277310924369, |
| "grad_norm": 3.4501943037207456, |
| "learning_rate": 5.6319041708140045e-06, |
| "loss": 0.1399, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.9210084033613445, |
| "grad_norm": 3.2939764503789197, |
| "learning_rate": 5.6188078548808366e-06, |
| "loss": 0.1744, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.9226890756302522, |
| "grad_norm": 3.980663885603751, |
| "learning_rate": 5.6057072261271194e-06, |
| "loss": 0.1973, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.9243697478991597, |
| "grad_norm": 4.204596605574354, |
| "learning_rate": 5.592602375858515e-06, |
| "loss": 0.2391, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.9260504201680673, |
| "grad_norm": 3.8468761629959447, |
| "learning_rate": 5.579493395410105e-06, |
| "loss": 0.2292, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.9277310924369748, |
| "grad_norm": 4.078220920961927, |
| "learning_rate": 5.566380376145762e-06, |
| "loss": 0.2246, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.9294117647058824, |
| "grad_norm": 3.4471781455989836, |
| "learning_rate": 5.553263409457504e-06, |
| "loss": 0.1621, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.9310924369747899, |
| "grad_norm": 3.8735614371403795, |
| "learning_rate": 5.540142586764862e-06, |
| "loss": 0.2191, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.9327731092436975, |
| "grad_norm": 4.252312135545805, |
| "learning_rate": 5.527017999514239e-06, |
| "loss": 0.2322, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.934453781512605, |
| "grad_norm": 3.82412550009896, |
| "learning_rate": 5.51388973917828e-06, |
| "loss": 0.1896, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.9361344537815126, |
| "grad_norm": 4.155221147014321, |
| "learning_rate": 5.5007578972552246e-06, |
| "loss": 0.2503, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.9378151260504202, |
| "grad_norm": 5.351585100726684, |
| "learning_rate": 5.4876225652682776e-06, |
| "loss": 0.2787, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.9394957983193277, |
| "grad_norm": 3.1779913442831313, |
| "learning_rate": 5.474483834764968e-06, |
| "loss": 0.1735, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.9411764705882353, |
| "grad_norm": 3.991015985678567, |
| "learning_rate": 5.46134179731651e-06, |
| "loss": 0.256, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.9428571428571428, |
| "grad_norm": 4.20790696331099, |
| "learning_rate": 5.448196544517168e-06, |
| "loss": 0.2059, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.9445378151260504, |
| "grad_norm": 3.4948615727030776, |
| "learning_rate": 5.435048167983613e-06, |
| "loss": 0.1985, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.946218487394958, |
| "grad_norm": 3.473819533320583, |
| "learning_rate": 5.421896759354288e-06, |
| "loss": 0.156, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.9478991596638655, |
| "grad_norm": 3.5100781536487915, |
| "learning_rate": 5.408742410288769e-06, |
| "loss": 0.1861, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.9495798319327731, |
| "grad_norm": 4.3829409660703265, |
| "learning_rate": 5.395585212467124e-06, |
| "loss": 0.1835, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.9512605042016806, |
| "grad_norm": 4.99516346331094, |
| "learning_rate": 5.382425257589277e-06, |
| "loss": 0.2931, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.9529411764705882, |
| "grad_norm": 3.8273375889082772, |
| "learning_rate": 5.36926263737437e-06, |
| "loss": 0.1865, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.9546218487394958, |
| "grad_norm": 3.4442977342234893, |
| "learning_rate": 5.356097443560116e-06, |
| "loss": 0.1864, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.9563025210084034, |
| "grad_norm": 3.466342287642546, |
| "learning_rate": 5.342929767902168e-06, |
| "loss": 0.1711, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.957983193277311, |
| "grad_norm": 4.347576000442588, |
| "learning_rate": 5.329759702173477e-06, |
| "loss": 0.2321, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.9596638655462185, |
| "grad_norm": 3.3229433415787573, |
| "learning_rate": 5.316587338163649e-06, |
| "loss": 0.166, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.9613445378151261, |
| "grad_norm": 4.009826790458202, |
| "learning_rate": 5.30341276767831e-06, |
| "loss": 0.1621, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.9630252100840336, |
| "grad_norm": 4.076780910110831, |
| "learning_rate": 5.290236082538464e-06, |
| "loss": 0.2166, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.9647058823529412, |
| "grad_norm": 3.7350445830670655, |
| "learning_rate": 5.27705737457985e-06, |
| "loss": 0.2014, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.9663865546218487, |
| "grad_norm": 4.830893698589951, |
| "learning_rate": 5.2638767356523125e-06, |
| "loss": 0.2067, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.9680672268907563, |
| "grad_norm": 3.400150909293186, |
| "learning_rate": 5.2506942576191466e-06, |
| "loss": 0.1654, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.9697478991596639, |
| "grad_norm": 3.7474326387520596, |
| "learning_rate": 5.23751003235647e-06, |
| "loss": 0.1942, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.9714285714285714, |
| "grad_norm": 3.7927268527910245, |
| "learning_rate": 5.224324151752575e-06, |
| "loss": 0.1804, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.973109243697479, |
| "grad_norm": 3.4344555746240424, |
| "learning_rate": 5.211136707707293e-06, |
| "loss": 0.1766, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.9747899159663865, |
| "grad_norm": 3.641318428686794, |
| "learning_rate": 5.197947792131348e-06, |
| "loss": 0.1824, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.9764705882352941, |
| "grad_norm": 3.928761105749297, |
| "learning_rate": 5.184757496945726e-06, |
| "loss": 0.1529, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.9781512605042016, |
| "grad_norm": 4.742597309312045, |
| "learning_rate": 5.1715659140810225e-06, |
| "loss": 0.2013, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.9798319327731092, |
| "grad_norm": 4.980888920820449, |
| "learning_rate": 5.158373135476811e-06, |
| "loss": 0.2352, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.9815126050420168, |
| "grad_norm": 3.485110939059201, |
| "learning_rate": 5.145179253080997e-06, |
| "loss": 0.1568, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.9831932773109243, |
| "grad_norm": 3.964452907941118, |
| "learning_rate": 5.131984358849182e-06, |
| "loss": 0.1985, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.984873949579832, |
| "grad_norm": 3.46258407863298, |
| "learning_rate": 5.118788544744016e-06, |
| "loss": 0.1444, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.9865546218487395, |
| "grad_norm": 3.7112975214320083, |
| "learning_rate": 5.105591902734561e-06, |
| "loss": 0.1687, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.9882352941176471, |
| "grad_norm": 4.641220841662963, |
| "learning_rate": 5.09239452479565e-06, |
| "loss": 0.189, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.9899159663865547, |
| "grad_norm": 3.775450292074785, |
| "learning_rate": 5.079196502907246e-06, |
| "loss": 0.1787, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.9915966386554622, |
| "grad_norm": 3.1329955863518633, |
| "learning_rate": 5.065997929053795e-06, |
| "loss": 0.1587, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.9932773109243698, |
| "grad_norm": 4.076665571532509, |
| "learning_rate": 5.052798895223597e-06, |
| "loss": 0.1631, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.9949579831932773, |
| "grad_norm": 4.242436811769809, |
| "learning_rate": 5.039599493408154e-06, |
| "loss": 0.2022, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.9966386554621849, |
| "grad_norm": 3.6846927374699603, |
| "learning_rate": 5.026399815601533e-06, |
| "loss": 0.1845, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.9983193277310924, |
| "grad_norm": 4.151082605904576, |
| "learning_rate": 5.0131999537997235e-06, |
| "loss": 0.193, |
| "step": 594 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 4.076240791018785, |
| "learning_rate": 5e-06, |
| "loss": 0.1675, |
| "step": 595 |
| }, |
| { |
| "epoch": 1.0016806722689076, |
| "grad_norm": 2.8260558868123344, |
| "learning_rate": 4.986800046200278e-06, |
| "loss": 0.1033, |
| "step": 596 |
| }, |
| { |
| "epoch": 1.0033613445378151, |
| "grad_norm": 2.4638758993873733, |
| "learning_rate": 4.97360018439847e-06, |
| "loss": 0.0739, |
| "step": 597 |
| }, |
| { |
| "epoch": 1.0050420168067227, |
| "grad_norm": 2.731312250527938, |
| "learning_rate": 4.960400506591848e-06, |
| "loss": 0.078, |
| "step": 598 |
| }, |
| { |
| "epoch": 1.0067226890756302, |
| "grad_norm": 2.851110493741904, |
| "learning_rate": 4.947201104776404e-06, |
| "loss": 0.0819, |
| "step": 599 |
| }, |
| { |
| "epoch": 1.0084033613445378, |
| "grad_norm": 3.6514539147116216, |
| "learning_rate": 4.934002070946206e-06, |
| "loss": 0.0869, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.0100840336134453, |
| "grad_norm": 3.007592142558298, |
| "learning_rate": 4.920803497092757e-06, |
| "loss": 0.0862, |
| "step": 601 |
| }, |
| { |
| "epoch": 1.011764705882353, |
| "grad_norm": 2.4923023998651224, |
| "learning_rate": 4.907605475204352e-06, |
| "loss": 0.0818, |
| "step": 602 |
| }, |
| { |
| "epoch": 1.0134453781512605, |
| "grad_norm": 3.9074853870198187, |
| "learning_rate": 4.894408097265441e-06, |
| "loss": 0.0982, |
| "step": 603 |
| }, |
| { |
| "epoch": 1.015126050420168, |
| "grad_norm": 2.738820573589542, |
| "learning_rate": 4.881211455255986e-06, |
| "loss": 0.061, |
| "step": 604 |
| }, |
| { |
| "epoch": 1.0168067226890756, |
| "grad_norm": 2.923472628759956, |
| "learning_rate": 4.86801564115082e-06, |
| "loss": 0.0947, |
| "step": 605 |
| }, |
| { |
| "epoch": 1.0184873949579831, |
| "grad_norm": 2.8463378417161027, |
| "learning_rate": 4.854820746919005e-06, |
| "loss": 0.0992, |
| "step": 606 |
| }, |
| { |
| "epoch": 1.0201680672268907, |
| "grad_norm": 3.3678742941776996, |
| "learning_rate": 4.8416268645231915e-06, |
| "loss": 0.0906, |
| "step": 607 |
| }, |
| { |
| "epoch": 1.0218487394957982, |
| "grad_norm": 2.294318309383224, |
| "learning_rate": 4.82843408591898e-06, |
| "loss": 0.0765, |
| "step": 608 |
| }, |
| { |
| "epoch": 1.0235294117647058, |
| "grad_norm": 2.5732816755240138, |
| "learning_rate": 4.815242503054277e-06, |
| "loss": 0.053, |
| "step": 609 |
| }, |
| { |
| "epoch": 1.0252100840336134, |
| "grad_norm": 3.2633128617028273, |
| "learning_rate": 4.802052207868654e-06, |
| "loss": 0.0768, |
| "step": 610 |
| }, |
| { |
| "epoch": 1.026890756302521, |
| "grad_norm": 3.494394392617399, |
| "learning_rate": 4.78886329229271e-06, |
| "loss": 0.0817, |
| "step": 611 |
| }, |
| { |
| "epoch": 1.0285714285714285, |
| "grad_norm": 2.7203476878745727, |
| "learning_rate": 4.775675848247427e-06, |
| "loss": 0.0735, |
| "step": 612 |
| }, |
| { |
| "epoch": 1.030252100840336, |
| "grad_norm": 4.00604536341265, |
| "learning_rate": 4.762489967643532e-06, |
| "loss": 0.093, |
| "step": 613 |
| }, |
| { |
| "epoch": 1.0319327731092436, |
| "grad_norm": 5.6713321751245775, |
| "learning_rate": 4.749305742380853e-06, |
| "loss": 0.1251, |
| "step": 614 |
| }, |
| { |
| "epoch": 1.0336134453781514, |
| "grad_norm": 3.69960957020687, |
| "learning_rate": 4.736123264347688e-06, |
| "loss": 0.0577, |
| "step": 615 |
| }, |
| { |
| "epoch": 1.035294117647059, |
| "grad_norm": 4.033751938519058, |
| "learning_rate": 4.7229426254201504e-06, |
| "loss": 0.0932, |
| "step": 616 |
| }, |
| { |
| "epoch": 1.0369747899159665, |
| "grad_norm": 4.34024174931469, |
| "learning_rate": 4.709763917461537e-06, |
| "loss": 0.1057, |
| "step": 617 |
| }, |
| { |
| "epoch": 1.038655462184874, |
| "grad_norm": 4.306617374054065, |
| "learning_rate": 4.696587232321691e-06, |
| "loss": 0.092, |
| "step": 618 |
| }, |
| { |
| "epoch": 1.0403361344537816, |
| "grad_norm": 3.948492964553929, |
| "learning_rate": 4.683412661836351e-06, |
| "loss": 0.1047, |
| "step": 619 |
| }, |
| { |
| "epoch": 1.0420168067226891, |
| "grad_norm": 3.7774272002812275, |
| "learning_rate": 4.6702402978265235e-06, |
| "loss": 0.0482, |
| "step": 620 |
| }, |
| { |
| "epoch": 1.0436974789915967, |
| "grad_norm": 3.774510074895978, |
| "learning_rate": 4.657070232097832e-06, |
| "loss": 0.0678, |
| "step": 621 |
| }, |
| { |
| "epoch": 1.0453781512605043, |
| "grad_norm": 4.231562137608742, |
| "learning_rate": 4.643902556439885e-06, |
| "loss": 0.0613, |
| "step": 622 |
| }, |
| { |
| "epoch": 1.0470588235294118, |
| "grad_norm": 3.470487492807781, |
| "learning_rate": 4.630737362625631e-06, |
| "loss": 0.0759, |
| "step": 623 |
| }, |
| { |
| "epoch": 1.0487394957983194, |
| "grad_norm": 3.309954439385579, |
| "learning_rate": 4.6175747424107234e-06, |
| "loss": 0.0688, |
| "step": 624 |
| }, |
| { |
| "epoch": 1.050420168067227, |
| "grad_norm": 6.516977912688737, |
| "learning_rate": 4.604414787532877e-06, |
| "loss": 0.128, |
| "step": 625 |
| }, |
| { |
| "epoch": 1.0521008403361345, |
| "grad_norm": 2.82864423728578, |
| "learning_rate": 4.591257589711233e-06, |
| "loss": 0.0733, |
| "step": 626 |
| }, |
| { |
| "epoch": 1.053781512605042, |
| "grad_norm": 4.24023591542261, |
| "learning_rate": 4.578103240645714e-06, |
| "loss": 0.0933, |
| "step": 627 |
| }, |
| { |
| "epoch": 1.0554621848739496, |
| "grad_norm": 3.5934583792735495, |
| "learning_rate": 4.5649518320163885e-06, |
| "loss": 0.0521, |
| "step": 628 |
| }, |
| { |
| "epoch": 1.0571428571428572, |
| "grad_norm": 2.4529016487205757, |
| "learning_rate": 4.551803455482833e-06, |
| "loss": 0.0679, |
| "step": 629 |
| }, |
| { |
| "epoch": 1.0588235294117647, |
| "grad_norm": 3.6105584004853983, |
| "learning_rate": 4.53865820268349e-06, |
| "loss": 0.0784, |
| "step": 630 |
| }, |
| { |
| "epoch": 1.0605042016806723, |
| "grad_norm": 4.102058266909981, |
| "learning_rate": 4.525516165235034e-06, |
| "loss": 0.1085, |
| "step": 631 |
| }, |
| { |
| "epoch": 1.0621848739495798, |
| "grad_norm": 3.2927361829912827, |
| "learning_rate": 4.512377434731724e-06, |
| "loss": 0.0807, |
| "step": 632 |
| }, |
| { |
| "epoch": 1.0638655462184874, |
| "grad_norm": 2.42452755770337, |
| "learning_rate": 4.499242102744777e-06, |
| "loss": 0.0536, |
| "step": 633 |
| }, |
| { |
| "epoch": 1.065546218487395, |
| "grad_norm": 4.397700003303606, |
| "learning_rate": 4.486110260821722e-06, |
| "loss": 0.0999, |
| "step": 634 |
| }, |
| { |
| "epoch": 1.0672268907563025, |
| "grad_norm": 4.5796843219518415, |
| "learning_rate": 4.4729820004857625e-06, |
| "loss": 0.1061, |
| "step": 635 |
| }, |
| { |
| "epoch": 1.06890756302521, |
| "grad_norm": 3.249155400076322, |
| "learning_rate": 4.45985741323514e-06, |
| "loss": 0.0637, |
| "step": 636 |
| }, |
| { |
| "epoch": 1.0705882352941176, |
| "grad_norm": 3.195791033403845, |
| "learning_rate": 4.446736590542497e-06, |
| "loss": 0.0495, |
| "step": 637 |
| }, |
| { |
| "epoch": 1.0722689075630252, |
| "grad_norm": 4.633287166254214, |
| "learning_rate": 4.4336196238542394e-06, |
| "loss": 0.1159, |
| "step": 638 |
| }, |
| { |
| "epoch": 1.0739495798319327, |
| "grad_norm": 3.7403134736334422, |
| "learning_rate": 4.420506604589897e-06, |
| "loss": 0.0908, |
| "step": 639 |
| }, |
| { |
| "epoch": 1.0756302521008403, |
| "grad_norm": 3.455796997777909, |
| "learning_rate": 4.407397624141487e-06, |
| "loss": 0.0933, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.0773109243697478, |
| "grad_norm": 4.311267532340438, |
| "learning_rate": 4.394292773872882e-06, |
| "loss": 0.084, |
| "step": 641 |
| }, |
| { |
| "epoch": 1.0789915966386554, |
| "grad_norm": 3.1805108046865107, |
| "learning_rate": 4.381192145119165e-06, |
| "loss": 0.0899, |
| "step": 642 |
| }, |
| { |
| "epoch": 1.080672268907563, |
| "grad_norm": 4.48640343008081, |
| "learning_rate": 4.368095829185999e-06, |
| "loss": 0.0764, |
| "step": 643 |
| }, |
| { |
| "epoch": 1.0823529411764705, |
| "grad_norm": 3.7052198008043495, |
| "learning_rate": 4.355003917348985e-06, |
| "loss": 0.0927, |
| "step": 644 |
| }, |
| { |
| "epoch": 1.084033613445378, |
| "grad_norm": 2.728516187495892, |
| "learning_rate": 4.341916500853034e-06, |
| "loss": 0.0508, |
| "step": 645 |
| }, |
| { |
| "epoch": 1.0857142857142856, |
| "grad_norm": 3.1223532912143828, |
| "learning_rate": 4.3288336709117246e-06, |
| "loss": 0.07, |
| "step": 646 |
| }, |
| { |
| "epoch": 1.0873949579831932, |
| "grad_norm": 2.5244693951878205, |
| "learning_rate": 4.315755518706667e-06, |
| "loss": 0.0675, |
| "step": 647 |
| }, |
| { |
| "epoch": 1.0890756302521007, |
| "grad_norm": 3.005200455986411, |
| "learning_rate": 4.302682135386873e-06, |
| "loss": 0.0769, |
| "step": 648 |
| }, |
| { |
| "epoch": 1.0907563025210083, |
| "grad_norm": 3.1629945090674543, |
| "learning_rate": 4.289613612068118e-06, |
| "loss": 0.0588, |
| "step": 649 |
| }, |
| { |
| "epoch": 1.092436974789916, |
| "grad_norm": 3.2297557131977874, |
| "learning_rate": 4.276550039832299e-06, |
| "loss": 0.0618, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.0941176470588236, |
| "grad_norm": 3.8464690490881597, |
| "learning_rate": 4.263491509726812e-06, |
| "loss": 0.0811, |
| "step": 651 |
| }, |
| { |
| "epoch": 1.0957983193277312, |
| "grad_norm": 2.483505006443395, |
| "learning_rate": 4.250438112763911e-06, |
| "loss": 0.0409, |
| "step": 652 |
| }, |
| { |
| "epoch": 1.0974789915966388, |
| "grad_norm": 2.6783407216299455, |
| "learning_rate": 4.237389939920075e-06, |
| "loss": 0.067, |
| "step": 653 |
| }, |
| { |
| "epoch": 1.0991596638655463, |
| "grad_norm": 3.4364206440555, |
| "learning_rate": 4.224347082135367e-06, |
| "loss": 0.083, |
| "step": 654 |
| }, |
| { |
| "epoch": 1.1008403361344539, |
| "grad_norm": 4.483374784861664, |
| "learning_rate": 4.211309630312813e-06, |
| "loss": 0.0707, |
| "step": 655 |
| }, |
| { |
| "epoch": 1.1025210084033614, |
| "grad_norm": 2.8804417997633776, |
| "learning_rate": 4.198277675317758e-06, |
| "loss": 0.0783, |
| "step": 656 |
| }, |
| { |
| "epoch": 1.104201680672269, |
| "grad_norm": 4.024917127829357, |
| "learning_rate": 4.185251307977241e-06, |
| "loss": 0.0952, |
| "step": 657 |
| }, |
| { |
| "epoch": 1.1058823529411765, |
| "grad_norm": 3.029919492161537, |
| "learning_rate": 4.17223061907935e-06, |
| "loss": 0.0703, |
| "step": 658 |
| }, |
| { |
| "epoch": 1.107563025210084, |
| "grad_norm": 4.159762100625219, |
| "learning_rate": 4.159215699372605e-06, |
| "loss": 0.0705, |
| "step": 659 |
| }, |
| { |
| "epoch": 1.1092436974789917, |
| "grad_norm": 2.585359252326579, |
| "learning_rate": 4.146206639565313e-06, |
| "loss": 0.0588, |
| "step": 660 |
| }, |
| { |
| "epoch": 1.1109243697478992, |
| "grad_norm": 3.1077598416909624, |
| "learning_rate": 4.133203530324938e-06, |
| "loss": 0.0714, |
| "step": 661 |
| }, |
| { |
| "epoch": 1.1126050420168068, |
| "grad_norm": 3.8441626427860274, |
| "learning_rate": 4.120206462277478e-06, |
| "loss": 0.0874, |
| "step": 662 |
| }, |
| { |
| "epoch": 1.1142857142857143, |
| "grad_norm": 2.9422268635935436, |
| "learning_rate": 4.107215526006818e-06, |
| "loss": 0.0598, |
| "step": 663 |
| }, |
| { |
| "epoch": 1.1159663865546219, |
| "grad_norm": 3.1192685111992935, |
| "learning_rate": 4.094230812054113e-06, |
| "loss": 0.0721, |
| "step": 664 |
| }, |
| { |
| "epoch": 1.1176470588235294, |
| "grad_norm": 3.53377011926501, |
| "learning_rate": 4.081252410917148e-06, |
| "loss": 0.0682, |
| "step": 665 |
| }, |
| { |
| "epoch": 1.119327731092437, |
| "grad_norm": 3.674753388626168, |
| "learning_rate": 4.068280413049715e-06, |
| "loss": 0.0825, |
| "step": 666 |
| }, |
| { |
| "epoch": 1.1210084033613446, |
| "grad_norm": 3.9666213447083067, |
| "learning_rate": 4.0553149088609745e-06, |
| "loss": 0.1018, |
| "step": 667 |
| }, |
| { |
| "epoch": 1.122689075630252, |
| "grad_norm": 3.6420396886380724, |
| "learning_rate": 4.042355988714828e-06, |
| "loss": 0.0831, |
| "step": 668 |
| }, |
| { |
| "epoch": 1.1243697478991597, |
| "grad_norm": 3.9967836672599324, |
| "learning_rate": 4.02940374292929e-06, |
| "loss": 0.1076, |
| "step": 669 |
| }, |
| { |
| "epoch": 1.1260504201680672, |
| "grad_norm": 3.607820316984983, |
| "learning_rate": 4.01645826177586e-06, |
| "loss": 0.0665, |
| "step": 670 |
| }, |
| { |
| "epoch": 1.1277310924369748, |
| "grad_norm": 4.383577038304497, |
| "learning_rate": 4.003519635478889e-06, |
| "loss": 0.0983, |
| "step": 671 |
| }, |
| { |
| "epoch": 1.1294117647058823, |
| "grad_norm": 3.045483064524733, |
| "learning_rate": 3.99058795421495e-06, |
| "loss": 0.0738, |
| "step": 672 |
| }, |
| { |
| "epoch": 1.13109243697479, |
| "grad_norm": 3.5868924253500696, |
| "learning_rate": 3.977663308112216e-06, |
| "loss": 0.078, |
| "step": 673 |
| }, |
| { |
| "epoch": 1.1327731092436975, |
| "grad_norm": 3.9552854654283243, |
| "learning_rate": 3.9647457872498295e-06, |
| "loss": 0.1083, |
| "step": 674 |
| }, |
| { |
| "epoch": 1.134453781512605, |
| "grad_norm": 3.123737566538539, |
| "learning_rate": 3.951835481657268e-06, |
| "loss": 0.0597, |
| "step": 675 |
| }, |
| { |
| "epoch": 1.1361344537815126, |
| "grad_norm": 4.412159709913494, |
| "learning_rate": 3.938932481313725e-06, |
| "loss": 0.0911, |
| "step": 676 |
| }, |
| { |
| "epoch": 1.1378151260504201, |
| "grad_norm": 5.484517514561643, |
| "learning_rate": 3.926036876147479e-06, |
| "loss": 0.0883, |
| "step": 677 |
| }, |
| { |
| "epoch": 1.1394957983193277, |
| "grad_norm": 4.06808403561468, |
| "learning_rate": 3.913148756035266e-06, |
| "loss": 0.0825, |
| "step": 678 |
| }, |
| { |
| "epoch": 1.1411764705882352, |
| "grad_norm": 2.9469250740798554, |
| "learning_rate": 3.9002682108016585e-06, |
| "loss": 0.0719, |
| "step": 679 |
| }, |
| { |
| "epoch": 1.1428571428571428, |
| "grad_norm": 2.48192021077067, |
| "learning_rate": 3.887395330218429e-06, |
| "loss": 0.0454, |
| "step": 680 |
| }, |
| { |
| "epoch": 1.1445378151260504, |
| "grad_norm": 3.4541071171494875, |
| "learning_rate": 3.874530204003937e-06, |
| "loss": 0.0899, |
| "step": 681 |
| }, |
| { |
| "epoch": 1.146218487394958, |
| "grad_norm": 3.4381814076738797, |
| "learning_rate": 3.861672921822493e-06, |
| "loss": 0.0729, |
| "step": 682 |
| }, |
| { |
| "epoch": 1.1478991596638655, |
| "grad_norm": 3.5999940971089748, |
| "learning_rate": 3.848823573283742e-06, |
| "loss": 0.0708, |
| "step": 683 |
| }, |
| { |
| "epoch": 1.149579831932773, |
| "grad_norm": 3.649458495375344, |
| "learning_rate": 3.83598224794203e-06, |
| "loss": 0.083, |
| "step": 684 |
| }, |
| { |
| "epoch": 1.1512605042016806, |
| "grad_norm": 3.375981309835745, |
| "learning_rate": 3.823149035295789e-06, |
| "loss": 0.0819, |
| "step": 685 |
| }, |
| { |
| "epoch": 1.1529411764705881, |
| "grad_norm": 3.970338952510589, |
| "learning_rate": 3.8103240247869077e-06, |
| "loss": 0.0652, |
| "step": 686 |
| }, |
| { |
| "epoch": 1.1546218487394957, |
| "grad_norm": 2.7421482149314342, |
| "learning_rate": 3.7975073058001092e-06, |
| "loss": 0.0586, |
| "step": 687 |
| }, |
| { |
| "epoch": 1.1563025210084033, |
| "grad_norm": 3.268321864940929, |
| "learning_rate": 3.7846989676623265e-06, |
| "loss": 0.0926, |
| "step": 688 |
| }, |
| { |
| "epoch": 1.1579831932773108, |
| "grad_norm": 3.586775391618902, |
| "learning_rate": 3.7718990996420875e-06, |
| "loss": 0.0733, |
| "step": 689 |
| }, |
| { |
| "epoch": 1.1596638655462184, |
| "grad_norm": 3.3748255405441534, |
| "learning_rate": 3.7591077909488817e-06, |
| "loss": 0.0696, |
| "step": 690 |
| }, |
| { |
| "epoch": 1.1613445378151261, |
| "grad_norm": 3.8838712829074242, |
| "learning_rate": 3.7463251307325432e-06, |
| "loss": 0.0734, |
| "step": 691 |
| }, |
| { |
| "epoch": 1.1630252100840337, |
| "grad_norm": 3.0131056465098713, |
| "learning_rate": 3.7335512080826363e-06, |
| "loss": 0.0698, |
| "step": 692 |
| }, |
| { |
| "epoch": 1.1647058823529413, |
| "grad_norm": 3.358638177868907, |
| "learning_rate": 3.720786112027822e-06, |
| "loss": 0.0905, |
| "step": 693 |
| }, |
| { |
| "epoch": 1.1663865546218488, |
| "grad_norm": 2.9670467971339773, |
| "learning_rate": 3.708029931535246e-06, |
| "loss": 0.0645, |
| "step": 694 |
| }, |
| { |
| "epoch": 1.1680672268907564, |
| "grad_norm": 4.426026291295541, |
| "learning_rate": 3.695282755509917e-06, |
| "loss": 0.0848, |
| "step": 695 |
| }, |
| { |
| "epoch": 1.169747899159664, |
| "grad_norm": 2.6536083172365896, |
| "learning_rate": 3.682544672794085e-06, |
| "loss": 0.0671, |
| "step": 696 |
| }, |
| { |
| "epoch": 1.1714285714285715, |
| "grad_norm": 2.784629510362822, |
| "learning_rate": 3.669815772166625e-06, |
| "loss": 0.0719, |
| "step": 697 |
| }, |
| { |
| "epoch": 1.173109243697479, |
| "grad_norm": 3.0434417394574007, |
| "learning_rate": 3.6570961423424155e-06, |
| "loss": 0.1106, |
| "step": 698 |
| }, |
| { |
| "epoch": 1.1747899159663866, |
| "grad_norm": 3.181246700838244, |
| "learning_rate": 3.644385871971725e-06, |
| "loss": 0.0604, |
| "step": 699 |
| }, |
| { |
| "epoch": 1.1764705882352942, |
| "grad_norm": 2.792428283134699, |
| "learning_rate": 3.6316850496395863e-06, |
| "loss": 0.0639, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.1781512605042017, |
| "grad_norm": 3.3226768232667054, |
| "learning_rate": 3.618993763865185e-06, |
| "loss": 0.0792, |
| "step": 701 |
| }, |
| { |
| "epoch": 1.1798319327731093, |
| "grad_norm": 2.857505547869181, |
| "learning_rate": 3.6063121031012417e-06, |
| "loss": 0.0578, |
| "step": 702 |
| }, |
| { |
| "epoch": 1.1815126050420168, |
| "grad_norm": 3.2551731380467888, |
| "learning_rate": 3.5936401557333957e-06, |
| "loss": 0.0751, |
| "step": 703 |
| }, |
| { |
| "epoch": 1.1831932773109244, |
| "grad_norm": 2.8917175801511386, |
| "learning_rate": 3.5809780100795853e-06, |
| "loss": 0.0756, |
| "step": 704 |
| }, |
| { |
| "epoch": 1.184873949579832, |
| "grad_norm": 3.671906721272244, |
| "learning_rate": 3.568325754389438e-06, |
| "loss": 0.1162, |
| "step": 705 |
| }, |
| { |
| "epoch": 1.1865546218487395, |
| "grad_norm": 3.3302257093373004, |
| "learning_rate": 3.5556834768436498e-06, |
| "loss": 0.0841, |
| "step": 706 |
| }, |
| { |
| "epoch": 1.188235294117647, |
| "grad_norm": 4.600875308240711, |
| "learning_rate": 3.5430512655533774e-06, |
| "loss": 0.0997, |
| "step": 707 |
| }, |
| { |
| "epoch": 1.1899159663865546, |
| "grad_norm": 2.681493548071181, |
| "learning_rate": 3.5304292085596166e-06, |
| "loss": 0.0587, |
| "step": 708 |
| }, |
| { |
| "epoch": 1.1915966386554622, |
| "grad_norm": 4.633166803844994, |
| "learning_rate": 3.517817393832593e-06, |
| "loss": 0.082, |
| "step": 709 |
| }, |
| { |
| "epoch": 1.1932773109243697, |
| "grad_norm": 3.112723495852739, |
| "learning_rate": 3.505215909271149e-06, |
| "loss": 0.071, |
| "step": 710 |
| }, |
| { |
| "epoch": 1.1949579831932773, |
| "grad_norm": 4.400636552843737, |
| "learning_rate": 3.4926248427021293e-06, |
| "loss": 0.115, |
| "step": 711 |
| }, |
| { |
| "epoch": 1.1966386554621848, |
| "grad_norm": 3.6647916654013026, |
| "learning_rate": 3.48004428187977e-06, |
| "loss": 0.1058, |
| "step": 712 |
| }, |
| { |
| "epoch": 1.1983193277310924, |
| "grad_norm": 5.140848639409356, |
| "learning_rate": 3.4674743144850865e-06, |
| "loss": 0.135, |
| "step": 713 |
| }, |
| { |
| "epoch": 1.2, |
| "grad_norm": 3.150576093066749, |
| "learning_rate": 3.4549150281252635e-06, |
| "loss": 0.0823, |
| "step": 714 |
| }, |
| { |
| "epoch": 1.2016806722689075, |
| "grad_norm": 2.812928703712718, |
| "learning_rate": 3.442366510333043e-06, |
| "loss": 0.0842, |
| "step": 715 |
| }, |
| { |
| "epoch": 1.203361344537815, |
| "grad_norm": 3.212135115117658, |
| "learning_rate": 3.4298288485661147e-06, |
| "loss": 0.0751, |
| "step": 716 |
| }, |
| { |
| "epoch": 1.2050420168067226, |
| "grad_norm": 3.799042933874438, |
| "learning_rate": 3.417302130206507e-06, |
| "loss": 0.083, |
| "step": 717 |
| }, |
| { |
| "epoch": 1.2067226890756302, |
| "grad_norm": 4.475769779293008, |
| "learning_rate": 3.404786442559976e-06, |
| "loss": 0.1013, |
| "step": 718 |
| }, |
| { |
| "epoch": 1.2084033613445377, |
| "grad_norm": 4.06485466865869, |
| "learning_rate": 3.3922818728554008e-06, |
| "loss": 0.0919, |
| "step": 719 |
| }, |
| { |
| "epoch": 1.2100840336134453, |
| "grad_norm": 2.41822548073786, |
| "learning_rate": 3.3797885082441717e-06, |
| "loss": 0.0514, |
| "step": 720 |
| }, |
| { |
| "epoch": 1.2117647058823529, |
| "grad_norm": 3.4968966029084623, |
| "learning_rate": 3.3673064357995844e-06, |
| "loss": 0.0861, |
| "step": 721 |
| }, |
| { |
| "epoch": 1.2134453781512604, |
| "grad_norm": 3.474316911856891, |
| "learning_rate": 3.3548357425162347e-06, |
| "loss": 0.0941, |
| "step": 722 |
| }, |
| { |
| "epoch": 1.2151260504201682, |
| "grad_norm": 2.865139040558747, |
| "learning_rate": 3.3423765153094097e-06, |
| "loss": 0.0683, |
| "step": 723 |
| }, |
| { |
| "epoch": 1.2168067226890757, |
| "grad_norm": 3.1100792648330926, |
| "learning_rate": 3.3299288410144813e-06, |
| "loss": 0.0632, |
| "step": 724 |
| }, |
| { |
| "epoch": 1.2184873949579833, |
| "grad_norm": 3.612471717248503, |
| "learning_rate": 3.3174928063863054e-06, |
| "loss": 0.0638, |
| "step": 725 |
| }, |
| { |
| "epoch": 1.2201680672268909, |
| "grad_norm": 2.571773313771785, |
| "learning_rate": 3.3050684980986105e-06, |
| "loss": 0.0516, |
| "step": 726 |
| }, |
| { |
| "epoch": 1.2218487394957984, |
| "grad_norm": 3.2675354986489147, |
| "learning_rate": 3.292656002743405e-06, |
| "loss": 0.0638, |
| "step": 727 |
| }, |
| { |
| "epoch": 1.223529411764706, |
| "grad_norm": 3.5021127435373023, |
| "learning_rate": 3.2802554068303595e-06, |
| "loss": 0.0783, |
| "step": 728 |
| }, |
| { |
| "epoch": 1.2252100840336135, |
| "grad_norm": 2.9029895571183446, |
| "learning_rate": 3.267866796786212e-06, |
| "loss": 0.0666, |
| "step": 729 |
| }, |
| { |
| "epoch": 1.226890756302521, |
| "grad_norm": 2.381919361917146, |
| "learning_rate": 3.255490258954167e-06, |
| "loss": 0.0358, |
| "step": 730 |
| }, |
| { |
| "epoch": 1.2285714285714286, |
| "grad_norm": 4.37685199400935, |
| "learning_rate": 3.2431258795932863e-06, |
| "loss": 0.1048, |
| "step": 731 |
| }, |
| { |
| "epoch": 1.2302521008403362, |
| "grad_norm": 3.563536843629901, |
| "learning_rate": 3.2307737448778977e-06, |
| "loss": 0.0861, |
| "step": 732 |
| }, |
| { |
| "epoch": 1.2319327731092438, |
| "grad_norm": 2.6074609627736387, |
| "learning_rate": 3.2184339408969857e-06, |
| "loss": 0.0591, |
| "step": 733 |
| }, |
| { |
| "epoch": 1.2336134453781513, |
| "grad_norm": 3.222255678882669, |
| "learning_rate": 3.206106553653596e-06, |
| "loss": 0.068, |
| "step": 734 |
| }, |
| { |
| "epoch": 1.2352941176470589, |
| "grad_norm": 3.0662427438038176, |
| "learning_rate": 3.1937916690642356e-06, |
| "loss": 0.059, |
| "step": 735 |
| }, |
| { |
| "epoch": 1.2369747899159664, |
| "grad_norm": 3.9738908619535196, |
| "learning_rate": 3.181489372958272e-06, |
| "loss": 0.0813, |
| "step": 736 |
| }, |
| { |
| "epoch": 1.238655462184874, |
| "grad_norm": 3.4998507094974367, |
| "learning_rate": 3.1691997510773376e-06, |
| "loss": 0.0738, |
| "step": 737 |
| }, |
| { |
| "epoch": 1.2403361344537815, |
| "grad_norm": 3.360066353012223, |
| "learning_rate": 3.1569228890747305e-06, |
| "loss": 0.0915, |
| "step": 738 |
| }, |
| { |
| "epoch": 1.242016806722689, |
| "grad_norm": 3.7980964862934936, |
| "learning_rate": 3.1446588725148186e-06, |
| "loss": 0.0792, |
| "step": 739 |
| }, |
| { |
| "epoch": 1.2436974789915967, |
| "grad_norm": 4.364395487305871, |
| "learning_rate": 3.132407786872442e-06, |
| "loss": 0.0953, |
| "step": 740 |
| }, |
| { |
| "epoch": 1.2453781512605042, |
| "grad_norm": 4.524790558239161, |
| "learning_rate": 3.120169717532319e-06, |
| "loss": 0.114, |
| "step": 741 |
| }, |
| { |
| "epoch": 1.2470588235294118, |
| "grad_norm": 2.501082399010883, |
| "learning_rate": 3.107944749788449e-06, |
| "loss": 0.0666, |
| "step": 742 |
| }, |
| { |
| "epoch": 1.2487394957983193, |
| "grad_norm": 3.165692341292902, |
| "learning_rate": 3.095732968843519e-06, |
| "loss": 0.0706, |
| "step": 743 |
| }, |
| { |
| "epoch": 1.250420168067227, |
| "grad_norm": 3.1228575299987136, |
| "learning_rate": 3.0835344598083095e-06, |
| "loss": 0.0681, |
| "step": 744 |
| }, |
| { |
| "epoch": 1.2521008403361344, |
| "grad_norm": 3.92884344358869, |
| "learning_rate": 3.0713493077011027e-06, |
| "loss": 0.0908, |
| "step": 745 |
| }, |
| { |
| "epoch": 1.253781512605042, |
| "grad_norm": 2.365391092045409, |
| "learning_rate": 3.059177597447087e-06, |
| "loss": 0.0598, |
| "step": 746 |
| }, |
| { |
| "epoch": 1.2554621848739496, |
| "grad_norm": 4.073880181854191, |
| "learning_rate": 3.0470194138777707e-06, |
| "loss": 0.0814, |
| "step": 747 |
| }, |
| { |
| "epoch": 1.2571428571428571, |
| "grad_norm": 4.477490809903387, |
| "learning_rate": 3.0348748417303826e-06, |
| "loss": 0.0878, |
| "step": 748 |
| }, |
| { |
| "epoch": 1.2588235294117647, |
| "grad_norm": 2.7792946761864497, |
| "learning_rate": 3.0227439656472878e-06, |
| "loss": 0.0501, |
| "step": 749 |
| }, |
| { |
| "epoch": 1.2605042016806722, |
| "grad_norm": 2.4283549892797485, |
| "learning_rate": 3.0106268701753967e-06, |
| "loss": 0.0481, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.2621848739495798, |
| "grad_norm": 3.994164729590725, |
| "learning_rate": 2.9985236397655726e-06, |
| "loss": 0.0796, |
| "step": 751 |
| }, |
| { |
| "epoch": 1.2638655462184873, |
| "grad_norm": 3.2881789906717733, |
| "learning_rate": 2.986434358772048e-06, |
| "loss": 0.054, |
| "step": 752 |
| }, |
| { |
| "epoch": 1.265546218487395, |
| "grad_norm": 3.0735868424802653, |
| "learning_rate": 2.974359111451831e-06, |
| "loss": 0.0978, |
| "step": 753 |
| }, |
| { |
| "epoch": 1.2672268907563025, |
| "grad_norm": 2.6779749000507453, |
| "learning_rate": 2.962297981964124e-06, |
| "loss": 0.0501, |
| "step": 754 |
| }, |
| { |
| "epoch": 1.26890756302521, |
| "grad_norm": 3.9611547652129095, |
| "learning_rate": 2.950251054369733e-06, |
| "loss": 0.0769, |
| "step": 755 |
| }, |
| { |
| "epoch": 1.2705882352941176, |
| "grad_norm": 3.6957752079688095, |
| "learning_rate": 2.9382184126304834e-06, |
| "loss": 0.0728, |
| "step": 756 |
| }, |
| { |
| "epoch": 1.2722689075630251, |
| "grad_norm": 2.7774794174969757, |
| "learning_rate": 2.926200140608634e-06, |
| "loss": 0.0765, |
| "step": 757 |
| }, |
| { |
| "epoch": 1.2739495798319327, |
| "grad_norm": 2.604238358761592, |
| "learning_rate": 2.9141963220662917e-06, |
| "loss": 0.0557, |
| "step": 758 |
| }, |
| { |
| "epoch": 1.2756302521008402, |
| "grad_norm": 4.0260201986923585, |
| "learning_rate": 2.902207040664834e-06, |
| "loss": 0.0968, |
| "step": 759 |
| }, |
| { |
| "epoch": 1.2773109243697478, |
| "grad_norm": 3.830288993136352, |
| "learning_rate": 2.8902323799643116e-06, |
| "loss": 0.0868, |
| "step": 760 |
| }, |
| { |
| "epoch": 1.2789915966386554, |
| "grad_norm": 3.5769886193519693, |
| "learning_rate": 2.8782724234228876e-06, |
| "loss": 0.0647, |
| "step": 761 |
| }, |
| { |
| "epoch": 1.280672268907563, |
| "grad_norm": 5.759630745437997, |
| "learning_rate": 2.8663272543962305e-06, |
| "loss": 0.084, |
| "step": 762 |
| }, |
| { |
| "epoch": 1.2823529411764705, |
| "grad_norm": 3.1549545632891824, |
| "learning_rate": 2.8543969561369556e-06, |
| "loss": 0.0762, |
| "step": 763 |
| }, |
| { |
| "epoch": 1.284033613445378, |
| "grad_norm": 4.2731527378729846, |
| "learning_rate": 2.842481611794032e-06, |
| "loss": 0.0788, |
| "step": 764 |
| }, |
| { |
| "epoch": 1.2857142857142856, |
| "grad_norm": 3.4697998803710868, |
| "learning_rate": 2.83058130441221e-06, |
| "loss": 0.0547, |
| "step": 765 |
| }, |
| { |
| "epoch": 1.2873949579831931, |
| "grad_norm": 3.410299474955917, |
| "learning_rate": 2.818696116931431e-06, |
| "loss": 0.0837, |
| "step": 766 |
| }, |
| { |
| "epoch": 1.289075630252101, |
| "grad_norm": 3.5616877511635754, |
| "learning_rate": 2.8068261321862667e-06, |
| "loss": 0.0703, |
| "step": 767 |
| }, |
| { |
| "epoch": 1.2907563025210085, |
| "grad_norm": 3.0509370835754503, |
| "learning_rate": 2.794971432905323e-06, |
| "loss": 0.0577, |
| "step": 768 |
| }, |
| { |
| "epoch": 1.292436974789916, |
| "grad_norm": 2.8272150735022654, |
| "learning_rate": 2.7831321017106805e-06, |
| "loss": 0.0605, |
| "step": 769 |
| }, |
| { |
| "epoch": 1.2941176470588236, |
| "grad_norm": 4.147608557361858, |
| "learning_rate": 2.771308221117309e-06, |
| "loss": 0.1196, |
| "step": 770 |
| }, |
| { |
| "epoch": 1.2957983193277312, |
| "grad_norm": 3.3124997919407946, |
| "learning_rate": 2.7594998735324905e-06, |
| "loss": 0.0846, |
| "step": 771 |
| }, |
| { |
| "epoch": 1.2974789915966387, |
| "grad_norm": 3.8012586970938993, |
| "learning_rate": 2.7477071412552554e-06, |
| "loss": 0.096, |
| "step": 772 |
| }, |
| { |
| "epoch": 1.2991596638655463, |
| "grad_norm": 3.1553346227663543, |
| "learning_rate": 2.735930106475794e-06, |
| "loss": 0.0707, |
| "step": 773 |
| }, |
| { |
| "epoch": 1.3008403361344538, |
| "grad_norm": 2.7958280303718173, |
| "learning_rate": 2.724168851274901e-06, |
| "loss": 0.0715, |
| "step": 774 |
| }, |
| { |
| "epoch": 1.3025210084033614, |
| "grad_norm": 3.1372008613992772, |
| "learning_rate": 2.712423457623385e-06, |
| "loss": 0.0983, |
| "step": 775 |
| }, |
| { |
| "epoch": 1.304201680672269, |
| "grad_norm": 3.483988250360276, |
| "learning_rate": 2.7006940073815136e-06, |
| "loss": 0.0834, |
| "step": 776 |
| }, |
| { |
| "epoch": 1.3058823529411765, |
| "grad_norm": 3.2442585566233304, |
| "learning_rate": 2.6889805822984348e-06, |
| "loss": 0.0664, |
| "step": 777 |
| }, |
| { |
| "epoch": 1.307563025210084, |
| "grad_norm": 2.679757415437461, |
| "learning_rate": 2.6772832640116035e-06, |
| "loss": 0.0625, |
| "step": 778 |
| }, |
| { |
| "epoch": 1.3092436974789916, |
| "grad_norm": 3.715165066826765, |
| "learning_rate": 2.6656021340462246e-06, |
| "loss": 0.0976, |
| "step": 779 |
| }, |
| { |
| "epoch": 1.3109243697478992, |
| "grad_norm": 2.936192869439388, |
| "learning_rate": 2.6539372738146696e-06, |
| "loss": 0.0715, |
| "step": 780 |
| }, |
| { |
| "epoch": 1.3126050420168067, |
| "grad_norm": 4.020034837920706, |
| "learning_rate": 2.6422887646159234e-06, |
| "loss": 0.1028, |
| "step": 781 |
| }, |
| { |
| "epoch": 1.3142857142857143, |
| "grad_norm": 3.20595776083432, |
| "learning_rate": 2.6306566876350072e-06, |
| "loss": 0.0599, |
| "step": 782 |
| }, |
| { |
| "epoch": 1.3159663865546218, |
| "grad_norm": 3.1991584669083273, |
| "learning_rate": 2.619041123942419e-06, |
| "loss": 0.0848, |
| "step": 783 |
| }, |
| { |
| "epoch": 1.3176470588235294, |
| "grad_norm": 3.954684313309723, |
| "learning_rate": 2.607442154493568e-06, |
| "loss": 0.0737, |
| "step": 784 |
| }, |
| { |
| "epoch": 1.319327731092437, |
| "grad_norm": 2.6473103137935023, |
| "learning_rate": 2.5958598601282036e-06, |
| "loss": 0.0613, |
| "step": 785 |
| }, |
| { |
| "epoch": 1.3210084033613445, |
| "grad_norm": 2.826407561892185, |
| "learning_rate": 2.584294321569862e-06, |
| "loss": 0.0724, |
| "step": 786 |
| }, |
| { |
| "epoch": 1.322689075630252, |
| "grad_norm": 3.5898393382793494, |
| "learning_rate": 2.572745619425296e-06, |
| "loss": 0.0655, |
| "step": 787 |
| }, |
| { |
| "epoch": 1.3243697478991596, |
| "grad_norm": 3.252531020220819, |
| "learning_rate": 2.561213834183919e-06, |
| "loss": 0.0838, |
| "step": 788 |
| }, |
| { |
| "epoch": 1.3260504201680672, |
| "grad_norm": 2.530396975887401, |
| "learning_rate": 2.5496990462172344e-06, |
| "loss": 0.0399, |
| "step": 789 |
| }, |
| { |
| "epoch": 1.3277310924369747, |
| "grad_norm": 3.6056822958439327, |
| "learning_rate": 2.538201335778289e-06, |
| "loss": 0.0721, |
| "step": 790 |
| }, |
| { |
| "epoch": 1.3294117647058823, |
| "grad_norm": 2.94093572457041, |
| "learning_rate": 2.526720783001107e-06, |
| "loss": 0.07, |
| "step": 791 |
| }, |
| { |
| "epoch": 1.3310924369747898, |
| "grad_norm": 3.1830814602946877, |
| "learning_rate": 2.5152574679001236e-06, |
| "loss": 0.0855, |
| "step": 792 |
| }, |
| { |
| "epoch": 1.3327731092436974, |
| "grad_norm": 2.8576486748655636, |
| "learning_rate": 2.503811470369644e-06, |
| "loss": 0.081, |
| "step": 793 |
| }, |
| { |
| "epoch": 1.334453781512605, |
| "grad_norm": 3.754866972368774, |
| "learning_rate": 2.4923828701832682e-06, |
| "loss": 0.0722, |
| "step": 794 |
| }, |
| { |
| "epoch": 1.3361344537815127, |
| "grad_norm": 3.4010955615044955, |
| "learning_rate": 2.4809717469933543e-06, |
| "loss": 0.071, |
| "step": 795 |
| }, |
| { |
| "epoch": 1.3378151260504203, |
| "grad_norm": 3.309802090876078, |
| "learning_rate": 2.469578180330444e-06, |
| "loss": 0.0843, |
| "step": 796 |
| }, |
| { |
| "epoch": 1.3394957983193279, |
| "grad_norm": 3.719812421406123, |
| "learning_rate": 2.458202249602726e-06, |
| "loss": 0.0723, |
| "step": 797 |
| }, |
| { |
| "epoch": 1.3411764705882354, |
| "grad_norm": 2.964558896376801, |
| "learning_rate": 2.4468440340954664e-06, |
| "loss": 0.0696, |
| "step": 798 |
| }, |
| { |
| "epoch": 1.342857142857143, |
| "grad_norm": 3.111713776639778, |
| "learning_rate": 2.43550361297047e-06, |
| "loss": 0.0952, |
| "step": 799 |
| }, |
| { |
| "epoch": 1.3445378151260505, |
| "grad_norm": 3.9430475538259633, |
| "learning_rate": 2.4241810652655197e-06, |
| "loss": 0.0603, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.346218487394958, |
| "grad_norm": 3.156839472343245, |
| "learning_rate": 2.4128764698938297e-06, |
| "loss": 0.0587, |
| "step": 801 |
| }, |
| { |
| "epoch": 1.3478991596638656, |
| "grad_norm": 2.9707320508794437, |
| "learning_rate": 2.4015899056434945e-06, |
| "loss": 0.0692, |
| "step": 802 |
| }, |
| { |
| "epoch": 1.3495798319327732, |
| "grad_norm": 3.6116340592816565, |
| "learning_rate": 2.390321451176936e-06, |
| "loss": 0.0675, |
| "step": 803 |
| }, |
| { |
| "epoch": 1.3512605042016808, |
| "grad_norm": 3.217311154453433, |
| "learning_rate": 2.379071185030365e-06, |
| "loss": 0.0732, |
| "step": 804 |
| }, |
| { |
| "epoch": 1.3529411764705883, |
| "grad_norm": 3.6926248506331576, |
| "learning_rate": 2.3678391856132203e-06, |
| "loss": 0.0571, |
| "step": 805 |
| }, |
| { |
| "epoch": 1.3546218487394959, |
| "grad_norm": 3.4897829856860816, |
| "learning_rate": 2.356625531207638e-06, |
| "loss": 0.0632, |
| "step": 806 |
| }, |
| { |
| "epoch": 1.3563025210084034, |
| "grad_norm": 2.9222780601737823, |
| "learning_rate": 2.345430299967888e-06, |
| "loss": 0.0645, |
| "step": 807 |
| }, |
| { |
| "epoch": 1.357983193277311, |
| "grad_norm": 2.6894558711212877, |
| "learning_rate": 2.334253569919846e-06, |
| "loss": 0.0704, |
| "step": 808 |
| }, |
| { |
| "epoch": 1.3596638655462185, |
| "grad_norm": 2.8905733444681325, |
| "learning_rate": 2.323095418960442e-06, |
| "loss": 0.0643, |
| "step": 809 |
| }, |
| { |
| "epoch": 1.361344537815126, |
| "grad_norm": 2.9154377681468997, |
| "learning_rate": 2.311955924857113e-06, |
| "loss": 0.08, |
| "step": 810 |
| }, |
| { |
| "epoch": 1.3630252100840337, |
| "grad_norm": 3.8727199729180435, |
| "learning_rate": 2.3008351652472714e-06, |
| "loss": 0.0867, |
| "step": 811 |
| }, |
| { |
| "epoch": 1.3647058823529412, |
| "grad_norm": 3.3845490730017267, |
| "learning_rate": 2.289733217637753e-06, |
| "loss": 0.0683, |
| "step": 812 |
| }, |
| { |
| "epoch": 1.3663865546218488, |
| "grad_norm": 3.373920847418989, |
| "learning_rate": 2.278650159404289e-06, |
| "loss": 0.0582, |
| "step": 813 |
| }, |
| { |
| "epoch": 1.3680672268907563, |
| "grad_norm": 3.2000406965333568, |
| "learning_rate": 2.267586067790952e-06, |
| "loss": 0.0761, |
| "step": 814 |
| }, |
| { |
| "epoch": 1.3697478991596639, |
| "grad_norm": 3.161658970701597, |
| "learning_rate": 2.2565410199096322e-06, |
| "loss": 0.0852, |
| "step": 815 |
| }, |
| { |
| "epoch": 1.3714285714285714, |
| "grad_norm": 3.7295821902310005, |
| "learning_rate": 2.245515092739488e-06, |
| "loss": 0.0645, |
| "step": 816 |
| }, |
| { |
| "epoch": 1.373109243697479, |
| "grad_norm": 3.691464649608567, |
| "learning_rate": 2.234508363126419e-06, |
| "loss": 0.1022, |
| "step": 817 |
| }, |
| { |
| "epoch": 1.3747899159663866, |
| "grad_norm": 3.474872908063579, |
| "learning_rate": 2.2235209077825264e-06, |
| "loss": 0.0904, |
| "step": 818 |
| }, |
| { |
| "epoch": 1.3764705882352941, |
| "grad_norm": 2.425412221727446, |
| "learning_rate": 2.2125528032855727e-06, |
| "loss": 0.054, |
| "step": 819 |
| }, |
| { |
| "epoch": 1.3781512605042017, |
| "grad_norm": 3.9835616677585004, |
| "learning_rate": 2.2016041260784604e-06, |
| "loss": 0.0738, |
| "step": 820 |
| }, |
| { |
| "epoch": 1.3798319327731092, |
| "grad_norm": 3.347359342136899, |
| "learning_rate": 2.1906749524686856e-06, |
| "loss": 0.0636, |
| "step": 821 |
| }, |
| { |
| "epoch": 1.3815126050420168, |
| "grad_norm": 2.658874860597347, |
| "learning_rate": 2.1797653586278193e-06, |
| "loss": 0.0655, |
| "step": 822 |
| }, |
| { |
| "epoch": 1.3831932773109243, |
| "grad_norm": 4.133400652102387, |
| "learning_rate": 2.168875420590965e-06, |
| "loss": 0.0849, |
| "step": 823 |
| }, |
| { |
| "epoch": 1.384873949579832, |
| "grad_norm": 3.4119688167441242, |
| "learning_rate": 2.158005214256236e-06, |
| "loss": 0.0762, |
| "step": 824 |
| }, |
| { |
| "epoch": 1.3865546218487395, |
| "grad_norm": 3.419640662395995, |
| "learning_rate": 2.147154815384226e-06, |
| "loss": 0.0725, |
| "step": 825 |
| }, |
| { |
| "epoch": 1.388235294117647, |
| "grad_norm": 3.2329083905175446, |
| "learning_rate": 2.136324299597474e-06, |
| "loss": 0.0656, |
| "step": 826 |
| }, |
| { |
| "epoch": 1.3899159663865546, |
| "grad_norm": 3.302465939051848, |
| "learning_rate": 2.12551374237995e-06, |
| "loss": 0.0841, |
| "step": 827 |
| }, |
| { |
| "epoch": 1.3915966386554621, |
| "grad_norm": 2.747466701190292, |
| "learning_rate": 2.1147232190765137e-06, |
| "loss": 0.0762, |
| "step": 828 |
| }, |
| { |
| "epoch": 1.3932773109243697, |
| "grad_norm": 3.529029515356708, |
| "learning_rate": 2.1039528048924043e-06, |
| "loss": 0.0614, |
| "step": 829 |
| }, |
| { |
| "epoch": 1.3949579831932772, |
| "grad_norm": 3.744954621522683, |
| "learning_rate": 2.0932025748927015e-06, |
| "loss": 0.0925, |
| "step": 830 |
| }, |
| { |
| "epoch": 1.3966386554621848, |
| "grad_norm": 2.8163888129776913, |
| "learning_rate": 2.0824726040018174e-06, |
| "loss": 0.0484, |
| "step": 831 |
| }, |
| { |
| "epoch": 1.3983193277310924, |
| "grad_norm": 3.021692852469359, |
| "learning_rate": 2.0717629670029653e-06, |
| "loss": 0.0567, |
| "step": 832 |
| }, |
| { |
| "epoch": 1.4, |
| "grad_norm": 3.8572687066728304, |
| "learning_rate": 2.061073738537635e-06, |
| "loss": 0.0679, |
| "step": 833 |
| }, |
| { |
| "epoch": 1.4016806722689075, |
| "grad_norm": 3.2949355182108127, |
| "learning_rate": 2.050404993105085e-06, |
| "loss": 0.0589, |
| "step": 834 |
| }, |
| { |
| "epoch": 1.403361344537815, |
| "grad_norm": 2.7633377494695495, |
| "learning_rate": 2.0397568050618095e-06, |
| "loss": 0.0609, |
| "step": 835 |
| }, |
| { |
| "epoch": 1.4050420168067226, |
| "grad_norm": 4.202873524739907, |
| "learning_rate": 2.0291292486210327e-06, |
| "loss": 0.1041, |
| "step": 836 |
| }, |
| { |
| "epoch": 1.4067226890756301, |
| "grad_norm": 3.0297174653829506, |
| "learning_rate": 2.018522397852178e-06, |
| "loss": 0.0577, |
| "step": 837 |
| }, |
| { |
| "epoch": 1.4084033613445377, |
| "grad_norm": 5.096519904720835, |
| "learning_rate": 2.0079363266803696e-06, |
| "loss": 0.1025, |
| "step": 838 |
| }, |
| { |
| "epoch": 1.4100840336134453, |
| "grad_norm": 3.4530721843432786, |
| "learning_rate": 1.9973711088858973e-06, |
| "loss": 0.0668, |
| "step": 839 |
| }, |
| { |
| "epoch": 1.4117647058823528, |
| "grad_norm": 4.742716235589666, |
| "learning_rate": 1.9868268181037186e-06, |
| "loss": 0.0825, |
| "step": 840 |
| }, |
| { |
| "epoch": 1.4134453781512604, |
| "grad_norm": 4.061809503594159, |
| "learning_rate": 1.976303527822933e-06, |
| "loss": 0.0916, |
| "step": 841 |
| }, |
| { |
| "epoch": 1.4151260504201681, |
| "grad_norm": 2.4976436771213466, |
| "learning_rate": 1.9658013113862806e-06, |
| "loss": 0.0509, |
| "step": 842 |
| }, |
| { |
| "epoch": 1.4168067226890757, |
| "grad_norm": 3.6299683241023644, |
| "learning_rate": 1.9553202419896256e-06, |
| "loss": 0.0788, |
| "step": 843 |
| }, |
| { |
| "epoch": 1.4184873949579833, |
| "grad_norm": 3.511030363971081, |
| "learning_rate": 1.944860392681442e-06, |
| "loss": 0.0861, |
| "step": 844 |
| }, |
| { |
| "epoch": 1.4201680672268908, |
| "grad_norm": 4.2560912674457825, |
| "learning_rate": 1.934421836362315e-06, |
| "loss": 0.0859, |
| "step": 845 |
| }, |
| { |
| "epoch": 1.4218487394957984, |
| "grad_norm": 2.7976331399633056, |
| "learning_rate": 1.9240046457844223e-06, |
| "loss": 0.0564, |
| "step": 846 |
| }, |
| { |
| "epoch": 1.423529411764706, |
| "grad_norm": 3.328285171512968, |
| "learning_rate": 1.913608893551036e-06, |
| "loss": 0.0787, |
| "step": 847 |
| }, |
| { |
| "epoch": 1.4252100840336135, |
| "grad_norm": 3.7878960189817876, |
| "learning_rate": 1.9032346521160066e-06, |
| "loss": 0.0724, |
| "step": 848 |
| }, |
| { |
| "epoch": 1.426890756302521, |
| "grad_norm": 2.8858346913152544, |
| "learning_rate": 1.8928819937832689e-06, |
| "loss": 0.0773, |
| "step": 849 |
| }, |
| { |
| "epoch": 1.4285714285714286, |
| "grad_norm": 2.7302815253012596, |
| "learning_rate": 1.8825509907063328e-06, |
| "loss": 0.0723, |
| "step": 850 |
| }, |
| { |
| "epoch": 1.4302521008403362, |
| "grad_norm": 3.233101512998323, |
| "learning_rate": 1.8722417148877752e-06, |
| "loss": 0.0694, |
| "step": 851 |
| }, |
| { |
| "epoch": 1.4319327731092437, |
| "grad_norm": 3.3469400451646103, |
| "learning_rate": 1.8619542381787508e-06, |
| "loss": 0.0647, |
| "step": 852 |
| }, |
| { |
| "epoch": 1.4336134453781513, |
| "grad_norm": 3.133653343298944, |
| "learning_rate": 1.851688632278476e-06, |
| "loss": 0.0588, |
| "step": 853 |
| }, |
| { |
| "epoch": 1.4352941176470588, |
| "grad_norm": 3.6247472557751186, |
| "learning_rate": 1.8414449687337467e-06, |
| "loss": 0.0798, |
| "step": 854 |
| }, |
| { |
| "epoch": 1.4369747899159664, |
| "grad_norm": 3.8304422336907824, |
| "learning_rate": 1.8312233189384194e-06, |
| "loss": 0.0842, |
| "step": 855 |
| }, |
| { |
| "epoch": 1.438655462184874, |
| "grad_norm": 3.374646172759464, |
| "learning_rate": 1.821023754132933e-06, |
| "loss": 0.056, |
| "step": 856 |
| }, |
| { |
| "epoch": 1.4403361344537815, |
| "grad_norm": 3.934404310282199, |
| "learning_rate": 1.8108463454038022e-06, |
| "loss": 0.0747, |
| "step": 857 |
| }, |
| { |
| "epoch": 1.442016806722689, |
| "grad_norm": 3.363490582454245, |
| "learning_rate": 1.800691163683118e-06, |
| "loss": 0.0607, |
| "step": 858 |
| }, |
| { |
| "epoch": 1.4436974789915966, |
| "grad_norm": 3.3531978696324085, |
| "learning_rate": 1.790558279748067e-06, |
| "loss": 0.117, |
| "step": 859 |
| }, |
| { |
| "epoch": 1.4453781512605042, |
| "grad_norm": 2.9364782421206628, |
| "learning_rate": 1.780447764220422e-06, |
| "loss": 0.067, |
| "step": 860 |
| }, |
| { |
| "epoch": 1.4470588235294117, |
| "grad_norm": 2.927454347382001, |
| "learning_rate": 1.7703596875660645e-06, |
| "loss": 0.0559, |
| "step": 861 |
| }, |
| { |
| "epoch": 1.4487394957983193, |
| "grad_norm": 2.691782762898773, |
| "learning_rate": 1.7602941200944812e-06, |
| "loss": 0.051, |
| "step": 862 |
| }, |
| { |
| "epoch": 1.4504201680672268, |
| "grad_norm": 3.1537634566918156, |
| "learning_rate": 1.7502511319582855e-06, |
| "loss": 0.0588, |
| "step": 863 |
| }, |
| { |
| "epoch": 1.4521008403361344, |
| "grad_norm": 2.370144487772094, |
| "learning_rate": 1.7402307931527157e-06, |
| "loss": 0.0492, |
| "step": 864 |
| }, |
| { |
| "epoch": 1.453781512605042, |
| "grad_norm": 3.0105957574806217, |
| "learning_rate": 1.7302331735151594e-06, |
| "loss": 0.088, |
| "step": 865 |
| }, |
| { |
| "epoch": 1.4554621848739495, |
| "grad_norm": 3.279739938039434, |
| "learning_rate": 1.7202583427246633e-06, |
| "loss": 0.0902, |
| "step": 866 |
| }, |
| { |
| "epoch": 1.457142857142857, |
| "grad_norm": 2.7217645473901273, |
| "learning_rate": 1.7103063703014372e-06, |
| "loss": 0.0608, |
| "step": 867 |
| }, |
| { |
| "epoch": 1.4588235294117646, |
| "grad_norm": 2.5822955700611967, |
| "learning_rate": 1.7003773256063882e-06, |
| "loss": 0.0686, |
| "step": 868 |
| }, |
| { |
| "epoch": 1.4605042016806722, |
| "grad_norm": 3.63221129177691, |
| "learning_rate": 1.690471277840619e-06, |
| "loss": 0.0557, |
| "step": 869 |
| }, |
| { |
| "epoch": 1.46218487394958, |
| "grad_norm": 3.570715915651536, |
| "learning_rate": 1.6805882960449594e-06, |
| "loss": 0.0839, |
| "step": 870 |
| }, |
| { |
| "epoch": 1.4638655462184875, |
| "grad_norm": 4.328501618442257, |
| "learning_rate": 1.6707284490994746e-06, |
| "loss": 0.0749, |
| "step": 871 |
| }, |
| { |
| "epoch": 1.465546218487395, |
| "grad_norm": 3.433908960710609, |
| "learning_rate": 1.6608918057229944e-06, |
| "loss": 0.0646, |
| "step": 872 |
| }, |
| { |
| "epoch": 1.4672268907563026, |
| "grad_norm": 3.9068591126104173, |
| "learning_rate": 1.6510784344726294e-06, |
| "loss": 0.073, |
| "step": 873 |
| }, |
| { |
| "epoch": 1.4689075630252102, |
| "grad_norm": 2.2341793133831893, |
| "learning_rate": 1.6412884037432875e-06, |
| "loss": 0.0659, |
| "step": 874 |
| }, |
| { |
| "epoch": 1.4705882352941178, |
| "grad_norm": 3.0973040136017396, |
| "learning_rate": 1.6315217817672142e-06, |
| "loss": 0.0549, |
| "step": 875 |
| }, |
| { |
| "epoch": 1.4722689075630253, |
| "grad_norm": 2.90499093270013, |
| "learning_rate": 1.6217786366134953e-06, |
| "loss": 0.0617, |
| "step": 876 |
| }, |
| { |
| "epoch": 1.4739495798319329, |
| "grad_norm": 3.4404460266407746, |
| "learning_rate": 1.612059036187602e-06, |
| "loss": 0.0899, |
| "step": 877 |
| }, |
| { |
| "epoch": 1.4756302521008404, |
| "grad_norm": 2.3746789596139117, |
| "learning_rate": 1.6023630482309017e-06, |
| "loss": 0.0388, |
| "step": 878 |
| }, |
| { |
| "epoch": 1.477310924369748, |
| "grad_norm": 2.4723482281971836, |
| "learning_rate": 1.5926907403202001e-06, |
| "loss": 0.0697, |
| "step": 879 |
| }, |
| { |
| "epoch": 1.4789915966386555, |
| "grad_norm": 3.5714612174603197, |
| "learning_rate": 1.5830421798672568e-06, |
| "loss": 0.0533, |
| "step": 880 |
| }, |
| { |
| "epoch": 1.480672268907563, |
| "grad_norm": 3.755706414606266, |
| "learning_rate": 1.5734174341183284e-06, |
| "loss": 0.0821, |
| "step": 881 |
| }, |
| { |
| "epoch": 1.4823529411764707, |
| "grad_norm": 3.152624513233021, |
| "learning_rate": 1.5638165701536866e-06, |
| "loss": 0.0664, |
| "step": 882 |
| }, |
| { |
| "epoch": 1.4840336134453782, |
| "grad_norm": 3.911741470369773, |
| "learning_rate": 1.554239654887163e-06, |
| "loss": 0.0669, |
| "step": 883 |
| }, |
| { |
| "epoch": 1.4857142857142858, |
| "grad_norm": 2.9095761902447803, |
| "learning_rate": 1.544686755065677e-06, |
| "loss": 0.0747, |
| "step": 884 |
| }, |
| { |
| "epoch": 1.4873949579831933, |
| "grad_norm": 3.0824905880712468, |
| "learning_rate": 1.5351579372687658e-06, |
| "loss": 0.0633, |
| "step": 885 |
| }, |
| { |
| "epoch": 1.4890756302521009, |
| "grad_norm": 4.286560398167493, |
| "learning_rate": 1.525653267908132e-06, |
| "loss": 0.0796, |
| "step": 886 |
| }, |
| { |
| "epoch": 1.4907563025210084, |
| "grad_norm": 3.4776797987296173, |
| "learning_rate": 1.5161728132271674e-06, |
| "loss": 0.0847, |
| "step": 887 |
| }, |
| { |
| "epoch": 1.492436974789916, |
| "grad_norm": 2.453146716219204, |
| "learning_rate": 1.5067166393005055e-06, |
| "loss": 0.0501, |
| "step": 888 |
| }, |
| { |
| "epoch": 1.4941176470588236, |
| "grad_norm": 3.2190789264340722, |
| "learning_rate": 1.4972848120335453e-06, |
| "loss": 0.0785, |
| "step": 889 |
| }, |
| { |
| "epoch": 1.495798319327731, |
| "grad_norm": 3.0703657065913488, |
| "learning_rate": 1.4878773971620076e-06, |
| "loss": 0.0591, |
| "step": 890 |
| }, |
| { |
| "epoch": 1.4974789915966387, |
| "grad_norm": 3.139122876389468, |
| "learning_rate": 1.4784944602514662e-06, |
| "loss": 0.0512, |
| "step": 891 |
| }, |
| { |
| "epoch": 1.4991596638655462, |
| "grad_norm": 4.209187908547148, |
| "learning_rate": 1.4691360666968923e-06, |
| "loss": 0.1004, |
| "step": 892 |
| }, |
| { |
| "epoch": 1.5008403361344538, |
| "grad_norm": 3.0345447924030777, |
| "learning_rate": 1.4598022817222058e-06, |
| "loss": 0.0794, |
| "step": 893 |
| }, |
| { |
| "epoch": 1.5025210084033613, |
| "grad_norm": 3.890049029138699, |
| "learning_rate": 1.4504931703798086e-06, |
| "loss": 0.0656, |
| "step": 894 |
| }, |
| { |
| "epoch": 1.504201680672269, |
| "grad_norm": 3.314495638248082, |
| "learning_rate": 1.4412087975501459e-06, |
| "loss": 0.0622, |
| "step": 895 |
| }, |
| { |
| "epoch": 1.5058823529411764, |
| "grad_norm": 4.090407824067872, |
| "learning_rate": 1.4319492279412388e-06, |
| "loss": 0.0589, |
| "step": 896 |
| }, |
| { |
| "epoch": 1.507563025210084, |
| "grad_norm": 3.023129387472712, |
| "learning_rate": 1.4227145260882463e-06, |
| "loss": 0.0722, |
| "step": 897 |
| }, |
| { |
| "epoch": 1.5092436974789916, |
| "grad_norm": 4.878127172916561, |
| "learning_rate": 1.413504756353009e-06, |
| "loss": 0.1063, |
| "step": 898 |
| }, |
| { |
| "epoch": 1.5109243697478991, |
| "grad_norm": 3.6392372529586914, |
| "learning_rate": 1.4043199829235983e-06, |
| "loss": 0.077, |
| "step": 899 |
| }, |
| { |
| "epoch": 1.5126050420168067, |
| "grad_norm": 3.8202214737174747, |
| "learning_rate": 1.3951602698138773e-06, |
| "loss": 0.0704, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.5142857142857142, |
| "grad_norm": 3.310249153920368, |
| "learning_rate": 1.3860256808630429e-06, |
| "loss": 0.0624, |
| "step": 901 |
| }, |
| { |
| "epoch": 1.5159663865546218, |
| "grad_norm": 2.5229149653319727, |
| "learning_rate": 1.3769162797351953e-06, |
| "loss": 0.0432, |
| "step": 902 |
| }, |
| { |
| "epoch": 1.5176470588235293, |
| "grad_norm": 4.604525464252146, |
| "learning_rate": 1.3678321299188802e-06, |
| "loss": 0.1086, |
| "step": 903 |
| }, |
| { |
| "epoch": 1.519327731092437, |
| "grad_norm": 2.9709223655943453, |
| "learning_rate": 1.3587732947266557e-06, |
| "loss": 0.0624, |
| "step": 904 |
| }, |
| { |
| "epoch": 1.5210084033613445, |
| "grad_norm": 3.331172827523415, |
| "learning_rate": 1.34973983729465e-06, |
| "loss": 0.0573, |
| "step": 905 |
| }, |
| { |
| "epoch": 1.522689075630252, |
| "grad_norm": 3.5693054861844367, |
| "learning_rate": 1.340731820582114e-06, |
| "loss": 0.0692, |
| "step": 906 |
| }, |
| { |
| "epoch": 1.5243697478991596, |
| "grad_norm": 3.4050102849137778, |
| "learning_rate": 1.3317493073709936e-06, |
| "loss": 0.0658, |
| "step": 907 |
| }, |
| { |
| "epoch": 1.5260504201680671, |
| "grad_norm": 2.7092516800684314, |
| "learning_rate": 1.3227923602654808e-06, |
| "loss": 0.0568, |
| "step": 908 |
| }, |
| { |
| "epoch": 1.5277310924369747, |
| "grad_norm": 2.948568924947018, |
| "learning_rate": 1.3138610416915887e-06, |
| "loss": 0.0655, |
| "step": 909 |
| }, |
| { |
| "epoch": 1.5294117647058822, |
| "grad_norm": 3.296455192826209, |
| "learning_rate": 1.3049554138967052e-06, |
| "loss": 0.0528, |
| "step": 910 |
| }, |
| { |
| "epoch": 1.5310924369747898, |
| "grad_norm": 2.567047257565957, |
| "learning_rate": 1.2960755389491703e-06, |
| "loss": 0.0467, |
| "step": 911 |
| }, |
| { |
| "epoch": 1.5327731092436974, |
| "grad_norm": 3.6779655883833304, |
| "learning_rate": 1.2872214787378306e-06, |
| "loss": 0.0677, |
| "step": 912 |
| }, |
| { |
| "epoch": 1.534453781512605, |
| "grad_norm": 4.383239615603696, |
| "learning_rate": 1.278393294971626e-06, |
| "loss": 0.1001, |
| "step": 913 |
| }, |
| { |
| "epoch": 1.5361344537815125, |
| "grad_norm": 3.239950652852017, |
| "learning_rate": 1.269591049179138e-06, |
| "loss": 0.0575, |
| "step": 914 |
| }, |
| { |
| "epoch": 1.53781512605042, |
| "grad_norm": 3.267287830498372, |
| "learning_rate": 1.2608148027081773e-06, |
| "loss": 0.0666, |
| "step": 915 |
| }, |
| { |
| "epoch": 1.5394957983193276, |
| "grad_norm": 3.8006140208799137, |
| "learning_rate": 1.2520646167253514e-06, |
| "loss": 0.0869, |
| "step": 916 |
| }, |
| { |
| "epoch": 1.5411764705882351, |
| "grad_norm": 3.632458756246524, |
| "learning_rate": 1.2433405522156334e-06, |
| "loss": 0.0534, |
| "step": 917 |
| }, |
| { |
| "epoch": 1.5428571428571427, |
| "grad_norm": 2.9078940235412976, |
| "learning_rate": 1.234642669981946e-06, |
| "loss": 0.0579, |
| "step": 918 |
| }, |
| { |
| "epoch": 1.5445378151260503, |
| "grad_norm": 3.7248910970111204, |
| "learning_rate": 1.2259710306447275e-06, |
| "loss": 0.0904, |
| "step": 919 |
| }, |
| { |
| "epoch": 1.5462184873949578, |
| "grad_norm": 3.306059100125909, |
| "learning_rate": 1.2173256946415214e-06, |
| "loss": 0.0705, |
| "step": 920 |
| }, |
| { |
| "epoch": 1.5478991596638656, |
| "grad_norm": 4.470317231270553, |
| "learning_rate": 1.2087067222265409e-06, |
| "loss": 0.0695, |
| "step": 921 |
| }, |
| { |
| "epoch": 1.5495798319327732, |
| "grad_norm": 3.749413820788246, |
| "learning_rate": 1.2001141734702625e-06, |
| "loss": 0.0758, |
| "step": 922 |
| }, |
| { |
| "epoch": 1.5512605042016807, |
| "grad_norm": 3.1346413732999325, |
| "learning_rate": 1.1915481082589998e-06, |
| "loss": 0.0591, |
| "step": 923 |
| }, |
| { |
| "epoch": 1.5529411764705883, |
| "grad_norm": 3.130619985905547, |
| "learning_rate": 1.1830085862944851e-06, |
| "loss": 0.0717, |
| "step": 924 |
| }, |
| { |
| "epoch": 1.5546218487394958, |
| "grad_norm": 2.2960691852895985, |
| "learning_rate": 1.17449566709346e-06, |
| "loss": 0.0382, |
| "step": 925 |
| }, |
| { |
| "epoch": 1.5563025210084034, |
| "grad_norm": 2.62733850882563, |
| "learning_rate": 1.166009409987251e-06, |
| "loss": 0.0532, |
| "step": 926 |
| }, |
| { |
| "epoch": 1.557983193277311, |
| "grad_norm": 2.987104215317316, |
| "learning_rate": 1.1575498741213682e-06, |
| "loss": 0.0616, |
| "step": 927 |
| }, |
| { |
| "epoch": 1.5596638655462185, |
| "grad_norm": 3.129195240880437, |
| "learning_rate": 1.1491171184550799e-06, |
| "loss": 0.0509, |
| "step": 928 |
| }, |
| { |
| "epoch": 1.561344537815126, |
| "grad_norm": 3.75801919908673, |
| "learning_rate": 1.1407112017610134e-06, |
| "loss": 0.0756, |
| "step": 929 |
| }, |
| { |
| "epoch": 1.5630252100840336, |
| "grad_norm": 4.590561742167558, |
| "learning_rate": 1.1323321826247347e-06, |
| "loss": 0.0709, |
| "step": 930 |
| }, |
| { |
| "epoch": 1.5647058823529412, |
| "grad_norm": 2.2659166137528097, |
| "learning_rate": 1.1239801194443507e-06, |
| "loss": 0.0469, |
| "step": 931 |
| }, |
| { |
| "epoch": 1.5663865546218487, |
| "grad_norm": 3.382390295282448, |
| "learning_rate": 1.115655070430096e-06, |
| "loss": 0.0571, |
| "step": 932 |
| }, |
| { |
| "epoch": 1.5680672268907563, |
| "grad_norm": 2.596026919066409, |
| "learning_rate": 1.107357093603924e-06, |
| "loss": 0.0474, |
| "step": 933 |
| }, |
| { |
| "epoch": 1.5697478991596638, |
| "grad_norm": 3.7190109549497956, |
| "learning_rate": 1.0990862467991132e-06, |
| "loss": 0.0767, |
| "step": 934 |
| }, |
| { |
| "epoch": 1.5714285714285714, |
| "grad_norm": 2.6518667247248713, |
| "learning_rate": 1.0908425876598512e-06, |
| "loss": 0.0614, |
| "step": 935 |
| }, |
| { |
| "epoch": 1.573109243697479, |
| "grad_norm": 3.3105529179765663, |
| "learning_rate": 1.082626173640846e-06, |
| "loss": 0.0713, |
| "step": 936 |
| }, |
| { |
| "epoch": 1.5747899159663865, |
| "grad_norm": 2.9534768924558956, |
| "learning_rate": 1.0744370620069122e-06, |
| "loss": 0.0699, |
| "step": 937 |
| }, |
| { |
| "epoch": 1.576470588235294, |
| "grad_norm": 2.833311203890012, |
| "learning_rate": 1.066275309832584e-06, |
| "loss": 0.0548, |
| "step": 938 |
| }, |
| { |
| "epoch": 1.5781512605042018, |
| "grad_norm": 4.798887220822771, |
| "learning_rate": 1.0581409740017113e-06, |
| "loss": 0.0954, |
| "step": 939 |
| }, |
| { |
| "epoch": 1.5798319327731094, |
| "grad_norm": 4.148894262060034, |
| "learning_rate": 1.0500341112070605e-06, |
| "loss": 0.0681, |
| "step": 940 |
| }, |
| { |
| "epoch": 1.581512605042017, |
| "grad_norm": 3.2754454999274287, |
| "learning_rate": 1.0419547779499283e-06, |
| "loss": 0.0767, |
| "step": 941 |
| }, |
| { |
| "epoch": 1.5831932773109245, |
| "grad_norm": 3.3373777087154974, |
| "learning_rate": 1.0339030305397374e-06, |
| "loss": 0.0525, |
| "step": 942 |
| }, |
| { |
| "epoch": 1.584873949579832, |
| "grad_norm": 3.0386973905398, |
| "learning_rate": 1.025878925093653e-06, |
| "loss": 0.0705, |
| "step": 943 |
| }, |
| { |
| "epoch": 1.5865546218487396, |
| "grad_norm": 2.5493203288809747, |
| "learning_rate": 1.0178825175361846e-06, |
| "loss": 0.05, |
| "step": 944 |
| }, |
| { |
| "epoch": 1.5882352941176472, |
| "grad_norm": 3.7835221762872275, |
| "learning_rate": 1.0099138635988026e-06, |
| "loss": 0.0793, |
| "step": 945 |
| }, |
| { |
| "epoch": 1.5899159663865547, |
| "grad_norm": 2.9769518109780786, |
| "learning_rate": 1.0019730188195464e-06, |
| "loss": 0.0482, |
| "step": 946 |
| }, |
| { |
| "epoch": 1.5915966386554623, |
| "grad_norm": 2.9047376160951126, |
| "learning_rate": 9.940600385426347e-07, |
| "loss": 0.0775, |
| "step": 947 |
| }, |
| { |
| "epoch": 1.5932773109243699, |
| "grad_norm": 3.7163601063599585, |
| "learning_rate": 9.861749779180873e-07, |
| "loss": 0.0792, |
| "step": 948 |
| }, |
| { |
| "epoch": 1.5949579831932774, |
| "grad_norm": 4.409032506672899, |
| "learning_rate": 9.783178919013297e-07, |
| "loss": 0.074, |
| "step": 949 |
| }, |
| { |
| "epoch": 1.596638655462185, |
| "grad_norm": 3.1957793653873647, |
| "learning_rate": 9.704888352528257e-07, |
| "loss": 0.0815, |
| "step": 950 |
| }, |
| { |
| "epoch": 1.5983193277310925, |
| "grad_norm": 2.611065141281167, |
| "learning_rate": 9.626878625376784e-07, |
| "loss": 0.0511, |
| "step": 951 |
| }, |
| { |
| "epoch": 1.6, |
| "grad_norm": 3.107864414307667, |
| "learning_rate": 9.549150281252633e-07, |
| "loss": 0.0726, |
| "step": 952 |
| }, |
| { |
| "epoch": 1.6016806722689076, |
| "grad_norm": 2.633882625896426, |
| "learning_rate": 9.471703861888398e-07, |
| "loss": 0.0695, |
| "step": 953 |
| }, |
| { |
| "epoch": 1.6033613445378152, |
| "grad_norm": 3.3594734416875345, |
| "learning_rate": 9.394539907051837e-07, |
| "loss": 0.0649, |
| "step": 954 |
| }, |
| { |
| "epoch": 1.6050420168067228, |
| "grad_norm": 2.461508884535206, |
| "learning_rate": 9.317658954541992e-07, |
| "loss": 0.0589, |
| "step": 955 |
| }, |
| { |
| "epoch": 1.6067226890756303, |
| "grad_norm": 4.539836468707796, |
| "learning_rate": 9.241061540185547e-07, |
| "loss": 0.0751, |
| "step": 956 |
| }, |
| { |
| "epoch": 1.6084033613445379, |
| "grad_norm": 3.3620051027546483, |
| "learning_rate": 9.164748197833039e-07, |
| "loss": 0.0533, |
| "step": 957 |
| }, |
| { |
| "epoch": 1.6100840336134454, |
| "grad_norm": 2.9983538488735184, |
| "learning_rate": 9.088719459355133e-07, |
| "loss": 0.0525, |
| "step": 958 |
| }, |
| { |
| "epoch": 1.611764705882353, |
| "grad_norm": 3.1377214405299765, |
| "learning_rate": 9.01297585463895e-07, |
| "loss": 0.0683, |
| "step": 959 |
| }, |
| { |
| "epoch": 1.6134453781512605, |
| "grad_norm": 3.2506007192617092, |
| "learning_rate": 8.937517911584321e-07, |
| "loss": 0.069, |
| "step": 960 |
| }, |
| { |
| "epoch": 1.615126050420168, |
| "grad_norm": 2.752232899637137, |
| "learning_rate": 8.862346156100188e-07, |
| "loss": 0.0452, |
| "step": 961 |
| }, |
| { |
| "epoch": 1.6168067226890757, |
| "grad_norm": 2.894169394808922, |
| "learning_rate": 8.787461112100837e-07, |
| "loss": 0.0609, |
| "step": 962 |
| }, |
| { |
| "epoch": 1.6184873949579832, |
| "grad_norm": 3.4459617448123434, |
| "learning_rate": 8.712863301502339e-07, |
| "loss": 0.0693, |
| "step": 963 |
| }, |
| { |
| "epoch": 1.6201680672268908, |
| "grad_norm": 4.242431845815356, |
| "learning_rate": 8.638553244218872e-07, |
| "loss": 0.085, |
| "step": 964 |
| }, |
| { |
| "epoch": 1.6218487394957983, |
| "grad_norm": 3.407760495616189, |
| "learning_rate": 8.56453145815907e-07, |
| "loss": 0.0675, |
| "step": 965 |
| }, |
| { |
| "epoch": 1.6235294117647059, |
| "grad_norm": 3.0306976690592413, |
| "learning_rate": 8.490798459222477e-07, |
| "loss": 0.0756, |
| "step": 966 |
| }, |
| { |
| "epoch": 1.6252100840336134, |
| "grad_norm": 2.84786975619905, |
| "learning_rate": 8.417354761295876e-07, |
| "loss": 0.0509, |
| "step": 967 |
| }, |
| { |
| "epoch": 1.626890756302521, |
| "grad_norm": 3.683064163624422, |
| "learning_rate": 8.344200876249803e-07, |
| "loss": 0.0771, |
| "step": 968 |
| }, |
| { |
| "epoch": 1.6285714285714286, |
| "grad_norm": 3.605480002311447, |
| "learning_rate": 8.271337313934869e-07, |
| "loss": 0.063, |
| "step": 969 |
| }, |
| { |
| "epoch": 1.6302521008403361, |
| "grad_norm": 3.906165742726037, |
| "learning_rate": 8.198764582178303e-07, |
| "loss": 0.0884, |
| "step": 970 |
| }, |
| { |
| "epoch": 1.6319327731092437, |
| "grad_norm": 3.210290270345182, |
| "learning_rate": 8.12648318678036e-07, |
| "loss": 0.061, |
| "step": 971 |
| }, |
| { |
| "epoch": 1.6336134453781512, |
| "grad_norm": 3.7077638002651745, |
| "learning_rate": 8.054493631510785e-07, |
| "loss": 0.0783, |
| "step": 972 |
| }, |
| { |
| "epoch": 1.6352941176470588, |
| "grad_norm": 3.00209725981241, |
| "learning_rate": 7.98279641810537e-07, |
| "loss": 0.0562, |
| "step": 973 |
| }, |
| { |
| "epoch": 1.6369747899159663, |
| "grad_norm": 2.755773701613939, |
| "learning_rate": 7.911392046262367e-07, |
| "loss": 0.0509, |
| "step": 974 |
| }, |
| { |
| "epoch": 1.638655462184874, |
| "grad_norm": 3.1721020032583405, |
| "learning_rate": 7.840281013639078e-07, |
| "loss": 0.0641, |
| "step": 975 |
| }, |
| { |
| "epoch": 1.6403361344537815, |
| "grad_norm": 3.2205453157951776, |
| "learning_rate": 7.769463815848344e-07, |
| "loss": 0.0721, |
| "step": 976 |
| }, |
| { |
| "epoch": 1.642016806722689, |
| "grad_norm": 3.0892435081374177, |
| "learning_rate": 7.698940946455125e-07, |
| "loss": 0.0687, |
| "step": 977 |
| }, |
| { |
| "epoch": 1.6436974789915966, |
| "grad_norm": 2.944546301438669, |
| "learning_rate": 7.628712896973006e-07, |
| "loss": 0.0472, |
| "step": 978 |
| }, |
| { |
| "epoch": 1.6453781512605041, |
| "grad_norm": 2.8245833208645563, |
| "learning_rate": 7.55878015686084e-07, |
| "loss": 0.054, |
| "step": 979 |
| }, |
| { |
| "epoch": 1.6470588235294117, |
| "grad_norm": 3.716112700120881, |
| "learning_rate": 7.489143213519301e-07, |
| "loss": 0.0695, |
| "step": 980 |
| }, |
| { |
| "epoch": 1.6487394957983192, |
| "grad_norm": 2.9711948721794377, |
| "learning_rate": 7.419802552287453e-07, |
| "loss": 0.0478, |
| "step": 981 |
| }, |
| { |
| "epoch": 1.6504201680672268, |
| "grad_norm": 2.9026496025038626, |
| "learning_rate": 7.350758656439455e-07, |
| "loss": 0.0527, |
| "step": 982 |
| }, |
| { |
| "epoch": 1.6521008403361344, |
| "grad_norm": 2.8302281641844287, |
| "learning_rate": 7.282012007181083e-07, |
| "loss": 0.0607, |
| "step": 983 |
| }, |
| { |
| "epoch": 1.653781512605042, |
| "grad_norm": 3.6058469193440392, |
| "learning_rate": 7.213563083646497e-07, |
| "loss": 0.0782, |
| "step": 984 |
| }, |
| { |
| "epoch": 1.6554621848739495, |
| "grad_norm": 3.7038887110593186, |
| "learning_rate": 7.145412362894771e-07, |
| "loss": 0.0737, |
| "step": 985 |
| }, |
| { |
| "epoch": 1.657142857142857, |
| "grad_norm": 3.0478216404333502, |
| "learning_rate": 7.077560319906696e-07, |
| "loss": 0.0848, |
| "step": 986 |
| }, |
| { |
| "epoch": 1.6588235294117646, |
| "grad_norm": 4.337845037581458, |
| "learning_rate": 7.010007427581378e-07, |
| "loss": 0.0821, |
| "step": 987 |
| }, |
| { |
| "epoch": 1.6605042016806721, |
| "grad_norm": 2.938832327531002, |
| "learning_rate": 6.942754156732978e-07, |
| "loss": 0.0957, |
| "step": 988 |
| }, |
| { |
| "epoch": 1.6621848739495797, |
| "grad_norm": 2.7760331192989316, |
| "learning_rate": 6.875800976087444e-07, |
| "loss": 0.0819, |
| "step": 989 |
| }, |
| { |
| "epoch": 1.6638655462184873, |
| "grad_norm": 2.617807812869756, |
| "learning_rate": 6.809148352279182e-07, |
| "loss": 0.0447, |
| "step": 990 |
| }, |
| { |
| "epoch": 1.6655462184873948, |
| "grad_norm": 2.9879632871750985, |
| "learning_rate": 6.742796749847908e-07, |
| "loss": 0.0593, |
| "step": 991 |
| }, |
| { |
| "epoch": 1.6672268907563024, |
| "grad_norm": 4.757558026364091, |
| "learning_rate": 6.676746631235282e-07, |
| "loss": 0.082, |
| "step": 992 |
| }, |
| { |
| "epoch": 1.66890756302521, |
| "grad_norm": 3.225928749204381, |
| "learning_rate": 6.61099845678183e-07, |
| "loss": 0.0554, |
| "step": 993 |
| }, |
| { |
| "epoch": 1.6705882352941175, |
| "grad_norm": 3.9904444022658496, |
| "learning_rate": 6.545552684723583e-07, |
| "loss": 0.0802, |
| "step": 994 |
| }, |
| { |
| "epoch": 1.6722689075630253, |
| "grad_norm": 3.3696742625546214, |
| "learning_rate": 6.480409771189027e-07, |
| "loss": 0.0722, |
| "step": 995 |
| }, |
| { |
| "epoch": 1.6739495798319328, |
| "grad_norm": 2.2477843738626855, |
| "learning_rate": 6.415570170195801e-07, |
| "loss": 0.0501, |
| "step": 996 |
| }, |
| { |
| "epoch": 1.6756302521008404, |
| "grad_norm": 3.6877994992049348, |
| "learning_rate": 6.351034333647615e-07, |
| "loss": 0.0725, |
| "step": 997 |
| }, |
| { |
| "epoch": 1.677310924369748, |
| "grad_norm": 3.9372860574992097, |
| "learning_rate": 6.286802711331097e-07, |
| "loss": 0.065, |
| "step": 998 |
| }, |
| { |
| "epoch": 1.6789915966386555, |
| "grad_norm": 4.448834128583502, |
| "learning_rate": 6.222875750912571e-07, |
| "loss": 0.0988, |
| "step": 999 |
| }, |
| { |
| "epoch": 1.680672268907563, |
| "grad_norm": 3.181414199217415, |
| "learning_rate": 6.159253897935069e-07, |
| "loss": 0.0622, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.680672268907563, |
| "eval_loss": 0.18958403170108795, |
| "eval_runtime": 1.1814, |
| "eval_samples_per_second": 41.477, |
| "eval_steps_per_second": 11.004, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.6823529411764706, |
| "grad_norm": 2.8667787652122203, |
| "learning_rate": 6.095937595815104e-07, |
| "loss": 0.0481, |
| "step": 1001 |
| }, |
| { |
| "epoch": 1.6840336134453782, |
| "grad_norm": 3.30814842995984, |
| "learning_rate": 6.032927285839674e-07, |
| "loss": 0.0781, |
| "step": 1002 |
| }, |
| { |
| "epoch": 1.6857142857142857, |
| "grad_norm": 3.273078173220105, |
| "learning_rate": 5.9702234071631e-07, |
| "loss": 0.0498, |
| "step": 1003 |
| }, |
| { |
| "epoch": 1.6873949579831933, |
| "grad_norm": 2.549502139708129, |
| "learning_rate": 5.907826396804062e-07, |
| "loss": 0.0573, |
| "step": 1004 |
| }, |
| { |
| "epoch": 1.6890756302521008, |
| "grad_norm": 2.7316380969088794, |
| "learning_rate": 5.845736689642472e-07, |
| "loss": 0.0696, |
| "step": 1005 |
| }, |
| { |
| "epoch": 1.6907563025210084, |
| "grad_norm": 2.9477520630378438, |
| "learning_rate": 5.783954718416468e-07, |
| "loss": 0.0517, |
| "step": 1006 |
| }, |
| { |
| "epoch": 1.692436974789916, |
| "grad_norm": 3.9905021450657863, |
| "learning_rate": 5.722480913719425e-07, |
| "loss": 0.0692, |
| "step": 1007 |
| }, |
| { |
| "epoch": 1.6941176470588235, |
| "grad_norm": 3.379977731241573, |
| "learning_rate": 5.661315703996905e-07, |
| "loss": 0.0574, |
| "step": 1008 |
| }, |
| { |
| "epoch": 1.695798319327731, |
| "grad_norm": 2.651344824049694, |
| "learning_rate": 5.600459515543733e-07, |
| "loss": 0.0536, |
| "step": 1009 |
| }, |
| { |
| "epoch": 1.6974789915966386, |
| "grad_norm": 3.839321294261383, |
| "learning_rate": 5.539912772500943e-07, |
| "loss": 0.0599, |
| "step": 1010 |
| }, |
| { |
| "epoch": 1.6991596638655462, |
| "grad_norm": 4.502537960250825, |
| "learning_rate": 5.47967589685292e-07, |
| "loss": 0.0764, |
| "step": 1011 |
| }, |
| { |
| "epoch": 1.7008403361344537, |
| "grad_norm": 3.066578903690047, |
| "learning_rate": 5.419749308424377e-07, |
| "loss": 0.0526, |
| "step": 1012 |
| }, |
| { |
| "epoch": 1.7025210084033613, |
| "grad_norm": 3.2961861585751944, |
| "learning_rate": 5.360133424877467e-07, |
| "loss": 0.0716, |
| "step": 1013 |
| }, |
| { |
| "epoch": 1.704201680672269, |
| "grad_norm": 4.278116825721235, |
| "learning_rate": 5.300828661708873e-07, |
| "loss": 0.1063, |
| "step": 1014 |
| }, |
| { |
| "epoch": 1.7058823529411766, |
| "grad_norm": 3.2236409490459597, |
| "learning_rate": 5.241835432246888e-07, |
| "loss": 0.0666, |
| "step": 1015 |
| }, |
| { |
| "epoch": 1.7075630252100842, |
| "grad_norm": 2.9602534255149524, |
| "learning_rate": 5.183154147648578e-07, |
| "loss": 0.0659, |
| "step": 1016 |
| }, |
| { |
| "epoch": 1.7092436974789917, |
| "grad_norm": 2.659719689119663, |
| "learning_rate": 5.124785216896854e-07, |
| "loss": 0.0487, |
| "step": 1017 |
| }, |
| { |
| "epoch": 1.7109243697478993, |
| "grad_norm": 2.807149009390532, |
| "learning_rate": 5.066729046797692e-07, |
| "loss": 0.0744, |
| "step": 1018 |
| }, |
| { |
| "epoch": 1.7126050420168069, |
| "grad_norm": 3.7321641841913458, |
| "learning_rate": 5.008986041977254e-07, |
| "loss": 0.0722, |
| "step": 1019 |
| }, |
| { |
| "epoch": 1.7142857142857144, |
| "grad_norm": 4.130707731441335, |
| "learning_rate": 4.951556604879049e-07, |
| "loss": 0.0788, |
| "step": 1020 |
| }, |
| { |
| "epoch": 1.715966386554622, |
| "grad_norm": 3.0918431678717972, |
| "learning_rate": 4.894441135761197e-07, |
| "loss": 0.0673, |
| "step": 1021 |
| }, |
| { |
| "epoch": 1.7176470588235295, |
| "grad_norm": 2.976439686671259, |
| "learning_rate": 4.837640032693558e-07, |
| "loss": 0.0636, |
| "step": 1022 |
| }, |
| { |
| "epoch": 1.719327731092437, |
| "grad_norm": 3.9989069666982906, |
| "learning_rate": 4.781153691555035e-07, |
| "loss": 0.1041, |
| "step": 1023 |
| }, |
| { |
| "epoch": 1.7210084033613446, |
| "grad_norm": 2.82983606873746, |
| "learning_rate": 4.724982506030762e-07, |
| "loss": 0.0647, |
| "step": 1024 |
| }, |
| { |
| "epoch": 1.7226890756302522, |
| "grad_norm": 3.484959680642574, |
| "learning_rate": 4.669126867609375e-07, |
| "loss": 0.0619, |
| "step": 1025 |
| }, |
| { |
| "epoch": 1.7243697478991598, |
| "grad_norm": 3.1730436291690203, |
| "learning_rate": 4.613587165580269e-07, |
| "loss": 0.0806, |
| "step": 1026 |
| }, |
| { |
| "epoch": 1.7260504201680673, |
| "grad_norm": 2.6705239615445007, |
| "learning_rate": 4.5583637870309397e-07, |
| "loss": 0.0463, |
| "step": 1027 |
| }, |
| { |
| "epoch": 1.7277310924369749, |
| "grad_norm": 2.595728117055968, |
| "learning_rate": 4.503457116844201e-07, |
| "loss": 0.0496, |
| "step": 1028 |
| }, |
| { |
| "epoch": 1.7294117647058824, |
| "grad_norm": 2.8656841743678068, |
| "learning_rate": 4.448867537695578e-07, |
| "loss": 0.0554, |
| "step": 1029 |
| }, |
| { |
| "epoch": 1.73109243697479, |
| "grad_norm": 3.253801626814162, |
| "learning_rate": 4.394595430050613e-07, |
| "loss": 0.0816, |
| "step": 1030 |
| }, |
| { |
| "epoch": 1.7327731092436975, |
| "grad_norm": 3.644665909418401, |
| "learning_rate": 4.34064117216218e-07, |
| "loss": 0.0697, |
| "step": 1031 |
| }, |
| { |
| "epoch": 1.734453781512605, |
| "grad_norm": 3.8362438500108507, |
| "learning_rate": 4.287005140067912e-07, |
| "loss": 0.0886, |
| "step": 1032 |
| }, |
| { |
| "epoch": 1.7361344537815127, |
| "grad_norm": 3.1234733214287402, |
| "learning_rate": 4.2336877075875136e-07, |
| "loss": 0.0612, |
| "step": 1033 |
| }, |
| { |
| "epoch": 1.7378151260504202, |
| "grad_norm": 2.386823444604084, |
| "learning_rate": 4.1806892463202353e-07, |
| "loss": 0.0407, |
| "step": 1034 |
| }, |
| { |
| "epoch": 1.7394957983193278, |
| "grad_norm": 2.802793611918228, |
| "learning_rate": 4.1280101256421903e-07, |
| "loss": 0.0522, |
| "step": 1035 |
| }, |
| { |
| "epoch": 1.7411764705882353, |
| "grad_norm": 2.4401900894075172, |
| "learning_rate": 4.0756507127038494e-07, |
| "loss": 0.0574, |
| "step": 1036 |
| }, |
| { |
| "epoch": 1.7428571428571429, |
| "grad_norm": 3.475878310133994, |
| "learning_rate": 4.0236113724274716e-07, |
| "loss": 0.0778, |
| "step": 1037 |
| }, |
| { |
| "epoch": 1.7445378151260504, |
| "grad_norm": 3.9184928910191963, |
| "learning_rate": 3.971892467504518e-07, |
| "loss": 0.0935, |
| "step": 1038 |
| }, |
| { |
| "epoch": 1.746218487394958, |
| "grad_norm": 4.216121004493787, |
| "learning_rate": 3.9204943583931953e-07, |
| "loss": 0.0713, |
| "step": 1039 |
| }, |
| { |
| "epoch": 1.7478991596638656, |
| "grad_norm": 3.0902619137106115, |
| "learning_rate": 3.869417403315856e-07, |
| "loss": 0.07, |
| "step": 1040 |
| }, |
| { |
| "epoch": 1.749579831932773, |
| "grad_norm": 3.5215787238811656, |
| "learning_rate": 3.8186619582565974e-07, |
| "loss": 0.0612, |
| "step": 1041 |
| }, |
| { |
| "epoch": 1.7512605042016807, |
| "grad_norm": 3.068365157929383, |
| "learning_rate": 3.7682283769586883e-07, |
| "loss": 0.0598, |
| "step": 1042 |
| }, |
| { |
| "epoch": 1.7529411764705882, |
| "grad_norm": 3.2060690731093295, |
| "learning_rate": 3.71811701092219e-07, |
| "loss": 0.0622, |
| "step": 1043 |
| }, |
| { |
| "epoch": 1.7546218487394958, |
| "grad_norm": 4.08757596782293, |
| "learning_rate": 3.6683282094014285e-07, |
| "loss": 0.0893, |
| "step": 1044 |
| }, |
| { |
| "epoch": 1.7563025210084033, |
| "grad_norm": 2.6145817155772684, |
| "learning_rate": 3.6188623194026105e-07, |
| "loss": 0.0519, |
| "step": 1045 |
| }, |
| { |
| "epoch": 1.757983193277311, |
| "grad_norm": 2.914336649954542, |
| "learning_rate": 3.569719685681405e-07, |
| "loss": 0.0527, |
| "step": 1046 |
| }, |
| { |
| "epoch": 1.7596638655462185, |
| "grad_norm": 3.7177435770144847, |
| "learning_rate": 3.5209006507404883e-07, |
| "loss": 0.0668, |
| "step": 1047 |
| }, |
| { |
| "epoch": 1.761344537815126, |
| "grad_norm": 2.607571098443921, |
| "learning_rate": 3.4724055548272406e-07, |
| "loss": 0.0498, |
| "step": 1048 |
| }, |
| { |
| "epoch": 1.7630252100840336, |
| "grad_norm": 4.459090476779105, |
| "learning_rate": 3.4242347359312864e-07, |
| "loss": 0.076, |
| "step": 1049 |
| }, |
| { |
| "epoch": 1.7647058823529411, |
| "grad_norm": 5.168276264230189, |
| "learning_rate": 3.3763885297822153e-07, |
| "loss": 0.0654, |
| "step": 1050 |
| }, |
| { |
| "epoch": 1.7663865546218487, |
| "grad_norm": 2.9922484939596576, |
| "learning_rate": 3.3288672698471804e-07, |
| "loss": 0.0583, |
| "step": 1051 |
| }, |
| { |
| "epoch": 1.7680672268907562, |
| "grad_norm": 3.083684278623569, |
| "learning_rate": 3.281671287328614e-07, |
| "loss": 0.0646, |
| "step": 1052 |
| }, |
| { |
| "epoch": 1.7697478991596638, |
| "grad_norm": 3.620493118528574, |
| "learning_rate": 3.2348009111619227e-07, |
| "loss": 0.0948, |
| "step": 1053 |
| }, |
| { |
| "epoch": 1.7714285714285714, |
| "grad_norm": 2.7564111302020544, |
| "learning_rate": 3.18825646801314e-07, |
| "loss": 0.0462, |
| "step": 1054 |
| }, |
| { |
| "epoch": 1.773109243697479, |
| "grad_norm": 2.8782377534767076, |
| "learning_rate": 3.1420382822767326e-07, |
| "loss": 0.0502, |
| "step": 1055 |
| }, |
| { |
| "epoch": 1.7747899159663865, |
| "grad_norm": 3.4400037527416836, |
| "learning_rate": 3.096146676073253e-07, |
| "loss": 0.0792, |
| "step": 1056 |
| }, |
| { |
| "epoch": 1.776470588235294, |
| "grad_norm": 2.6901880238607814, |
| "learning_rate": 3.0505819692471797e-07, |
| "loss": 0.0593, |
| "step": 1057 |
| }, |
| { |
| "epoch": 1.7781512605042016, |
| "grad_norm": 3.6125322665717046, |
| "learning_rate": 3.0053444793646024e-07, |
| "loss": 0.066, |
| "step": 1058 |
| }, |
| { |
| "epoch": 1.7798319327731091, |
| "grad_norm": 3.3356619840985164, |
| "learning_rate": 2.960434521711086e-07, |
| "loss": 0.0495, |
| "step": 1059 |
| }, |
| { |
| "epoch": 1.7815126050420167, |
| "grad_norm": 2.449842215488209, |
| "learning_rate": 2.915852409289421e-07, |
| "loss": 0.0584, |
| "step": 1060 |
| }, |
| { |
| "epoch": 1.7831932773109243, |
| "grad_norm": 3.8717672742770795, |
| "learning_rate": 2.8715984528174757e-07, |
| "loss": 0.0611, |
| "step": 1061 |
| }, |
| { |
| "epoch": 1.7848739495798318, |
| "grad_norm": 3.2030831698927975, |
| "learning_rate": 2.827672960725991e-07, |
| "loss": 0.0913, |
| "step": 1062 |
| }, |
| { |
| "epoch": 1.7865546218487394, |
| "grad_norm": 3.865848748318045, |
| "learning_rate": 2.7840762391564634e-07, |
| "loss": 0.0762, |
| "step": 1063 |
| }, |
| { |
| "epoch": 1.788235294117647, |
| "grad_norm": 3.4085943532702103, |
| "learning_rate": 2.7408085919590265e-07, |
| "loss": 0.0552, |
| "step": 1064 |
| }, |
| { |
| "epoch": 1.7899159663865545, |
| "grad_norm": 3.1995440473790335, |
| "learning_rate": 2.697870320690266e-07, |
| "loss": 0.0502, |
| "step": 1065 |
| }, |
| { |
| "epoch": 1.791596638655462, |
| "grad_norm": 3.7669348300021124, |
| "learning_rate": 2.6552617246111966e-07, |
| "loss": 0.0574, |
| "step": 1066 |
| }, |
| { |
| "epoch": 1.7932773109243696, |
| "grad_norm": 3.3293423324211098, |
| "learning_rate": 2.612983100685118e-07, |
| "loss": 0.0518, |
| "step": 1067 |
| }, |
| { |
| "epoch": 1.7949579831932772, |
| "grad_norm": 6.432997589961303, |
| "learning_rate": 2.5710347435755955e-07, |
| "loss": 0.0604, |
| "step": 1068 |
| }, |
| { |
| "epoch": 1.7966386554621847, |
| "grad_norm": 3.3108833882683464, |
| "learning_rate": 2.5294169456443416e-07, |
| "loss": 0.056, |
| "step": 1069 |
| }, |
| { |
| "epoch": 1.7983193277310925, |
| "grad_norm": 2.9734777133032835, |
| "learning_rate": 2.4881299969492514e-07, |
| "loss": 0.0646, |
| "step": 1070 |
| }, |
| { |
| "epoch": 1.8, |
| "grad_norm": 2.860144532702313, |
| "learning_rate": 2.447174185242324e-07, |
| "loss": 0.0727, |
| "step": 1071 |
| }, |
| { |
| "epoch": 1.8016806722689076, |
| "grad_norm": 4.282160091271182, |
| "learning_rate": 2.406549795967678e-07, |
| "loss": 0.067, |
| "step": 1072 |
| }, |
| { |
| "epoch": 1.8033613445378152, |
| "grad_norm": 3.3754058094707937, |
| "learning_rate": 2.366257112259579e-07, |
| "loss": 0.0581, |
| "step": 1073 |
| }, |
| { |
| "epoch": 1.8050420168067227, |
| "grad_norm": 2.306814130707301, |
| "learning_rate": 2.3262964149404322e-07, |
| "loss": 0.0531, |
| "step": 1074 |
| }, |
| { |
| "epoch": 1.8067226890756303, |
| "grad_norm": 2.9409070748310935, |
| "learning_rate": 2.286667982518853e-07, |
| "loss": 0.0639, |
| "step": 1075 |
| }, |
| { |
| "epoch": 1.8084033613445378, |
| "grad_norm": 3.0396807188768644, |
| "learning_rate": 2.247372091187705e-07, |
| "loss": 0.0603, |
| "step": 1076 |
| }, |
| { |
| "epoch": 1.8100840336134454, |
| "grad_norm": 4.04313014248122, |
| "learning_rate": 2.2084090148221937e-07, |
| "loss": 0.0986, |
| "step": 1077 |
| }, |
| { |
| "epoch": 1.811764705882353, |
| "grad_norm": 3.0115016227322697, |
| "learning_rate": 2.1697790249779638e-07, |
| "loss": 0.0681, |
| "step": 1078 |
| }, |
| { |
| "epoch": 1.8134453781512605, |
| "grad_norm": 3.2168149498398533, |
| "learning_rate": 2.1314823908891558e-07, |
| "loss": 0.0611, |
| "step": 1079 |
| }, |
| { |
| "epoch": 1.815126050420168, |
| "grad_norm": 3.830162600946003, |
| "learning_rate": 2.093519379466602e-07, |
| "loss": 0.0881, |
| "step": 1080 |
| }, |
| { |
| "epoch": 1.8168067226890756, |
| "grad_norm": 2.4163130625409672, |
| "learning_rate": 2.0558902552959058e-07, |
| "loss": 0.0647, |
| "step": 1081 |
| }, |
| { |
| "epoch": 1.8184873949579832, |
| "grad_norm": 3.139900286939481, |
| "learning_rate": 2.018595280635638e-07, |
| "loss": 0.0537, |
| "step": 1082 |
| }, |
| { |
| "epoch": 1.8201680672268907, |
| "grad_norm": 3.1466684302589423, |
| "learning_rate": 1.981634715415487e-07, |
| "loss": 0.0523, |
| "step": 1083 |
| }, |
| { |
| "epoch": 1.8218487394957983, |
| "grad_norm": 3.9632174225214984, |
| "learning_rate": 1.945008817234445e-07, |
| "loss": 0.0916, |
| "step": 1084 |
| }, |
| { |
| "epoch": 1.8235294117647058, |
| "grad_norm": 3.36248537702875, |
| "learning_rate": 1.908717841359048e-07, |
| "loss": 0.0558, |
| "step": 1085 |
| }, |
| { |
| "epoch": 1.8252100840336134, |
| "grad_norm": 3.39348312293213, |
| "learning_rate": 1.8727620407215375e-07, |
| "loss": 0.0439, |
| "step": 1086 |
| }, |
| { |
| "epoch": 1.826890756302521, |
| "grad_norm": 4.032088455291679, |
| "learning_rate": 1.837141665918152e-07, |
| "loss": 0.0771, |
| "step": 1087 |
| }, |
| { |
| "epoch": 1.8285714285714287, |
| "grad_norm": 3.4190633994412836, |
| "learning_rate": 1.801856965207338e-07, |
| "loss": 0.0563, |
| "step": 1088 |
| }, |
| { |
| "epoch": 1.8302521008403363, |
| "grad_norm": 3.534886276176037, |
| "learning_rate": 1.7669081845080648e-07, |
| "loss": 0.0692, |
| "step": 1089 |
| }, |
| { |
| "epoch": 1.8319327731092439, |
| "grad_norm": 3.4435868355700174, |
| "learning_rate": 1.7322955673980678e-07, |
| "loss": 0.0625, |
| "step": 1090 |
| }, |
| { |
| "epoch": 1.8336134453781514, |
| "grad_norm": 3.241199652941816, |
| "learning_rate": 1.6980193551121848e-07, |
| "loss": 0.0741, |
| "step": 1091 |
| }, |
| { |
| "epoch": 1.835294117647059, |
| "grad_norm": 2.800413218064161, |
| "learning_rate": 1.664079786540629e-07, |
| "loss": 0.0684, |
| "step": 1092 |
| }, |
| { |
| "epoch": 1.8369747899159665, |
| "grad_norm": 3.8889142045528775, |
| "learning_rate": 1.6304770982273898e-07, |
| "loss": 0.0871, |
| "step": 1093 |
| }, |
| { |
| "epoch": 1.838655462184874, |
| "grad_norm": 2.996815076712509, |
| "learning_rate": 1.597211524368536e-07, |
| "loss": 0.0587, |
| "step": 1094 |
| }, |
| { |
| "epoch": 1.8403361344537816, |
| "grad_norm": 3.0926722750899023, |
| "learning_rate": 1.564283296810576e-07, |
| "loss": 0.0551, |
| "step": 1095 |
| }, |
| { |
| "epoch": 1.8420168067226892, |
| "grad_norm": 3.5055235248239627, |
| "learning_rate": 1.5316926450488878e-07, |
| "loss": 0.0633, |
| "step": 1096 |
| }, |
| { |
| "epoch": 1.8436974789915967, |
| "grad_norm": 2.663472736681946, |
| "learning_rate": 1.499439796226082e-07, |
| "loss": 0.0619, |
| "step": 1097 |
| }, |
| { |
| "epoch": 1.8453781512605043, |
| "grad_norm": 3.953570927026477, |
| "learning_rate": 1.4675249751304353e-07, |
| "loss": 0.078, |
| "step": 1098 |
| }, |
| { |
| "epoch": 1.8470588235294119, |
| "grad_norm": 2.493322146525807, |
| "learning_rate": 1.435948404194304e-07, |
| "loss": 0.0368, |
| "step": 1099 |
| }, |
| { |
| "epoch": 1.8487394957983194, |
| "grad_norm": 2.3023658930459403, |
| "learning_rate": 1.404710303492618e-07, |
| "loss": 0.0426, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.850420168067227, |
| "grad_norm": 2.97412848554832, |
| "learning_rate": 1.373810890741284e-07, |
| "loss": 0.0581, |
| "step": 1101 |
| }, |
| { |
| "epoch": 1.8521008403361345, |
| "grad_norm": 3.74074771238985, |
| "learning_rate": 1.3432503812957242e-07, |
| "loss": 0.07, |
| "step": 1102 |
| }, |
| { |
| "epoch": 1.853781512605042, |
| "grad_norm": 3.4159524020499354, |
| "learning_rate": 1.3130289881493452e-07, |
| "loss": 0.1028, |
| "step": 1103 |
| }, |
| { |
| "epoch": 1.8554621848739496, |
| "grad_norm": 3.4247555155007534, |
| "learning_rate": 1.2831469219320603e-07, |
| "loss": 0.075, |
| "step": 1104 |
| }, |
| { |
| "epoch": 1.8571428571428572, |
| "grad_norm": 3.3014047371850235, |
| "learning_rate": 1.253604390908819e-07, |
| "loss": 0.0744, |
| "step": 1105 |
| }, |
| { |
| "epoch": 1.8588235294117648, |
| "grad_norm": 2.920013877825399, |
| "learning_rate": 1.22440160097817e-07, |
| "loss": 0.0579, |
| "step": 1106 |
| }, |
| { |
| "epoch": 1.8605042016806723, |
| "grad_norm": 3.7369832957086153, |
| "learning_rate": 1.1955387556708e-07, |
| "loss": 0.0542, |
| "step": 1107 |
| }, |
| { |
| "epoch": 1.8621848739495799, |
| "grad_norm": 2.2666088356536345, |
| "learning_rate": 1.1670160561481458e-07, |
| "loss": 0.0371, |
| "step": 1108 |
| }, |
| { |
| "epoch": 1.8638655462184874, |
| "grad_norm": 2.674691850295334, |
| "learning_rate": 1.1388337012009643e-07, |
| "loss": 0.0507, |
| "step": 1109 |
| }, |
| { |
| "epoch": 1.865546218487395, |
| "grad_norm": 4.010100748714858, |
| "learning_rate": 1.1109918872479642e-07, |
| "loss": 0.1174, |
| "step": 1110 |
| }, |
| { |
| "epoch": 1.8672268907563025, |
| "grad_norm": 2.892673598502158, |
| "learning_rate": 1.0834908083344253e-07, |
| "loss": 0.0548, |
| "step": 1111 |
| }, |
| { |
| "epoch": 1.86890756302521, |
| "grad_norm": 3.1318654544587696, |
| "learning_rate": 1.0563306561308773e-07, |
| "loss": 0.0626, |
| "step": 1112 |
| }, |
| { |
| "epoch": 1.8705882352941177, |
| "grad_norm": 2.6308962683101083, |
| "learning_rate": 1.0295116199317057e-07, |
| "loss": 0.049, |
| "step": 1113 |
| }, |
| { |
| "epoch": 1.8722689075630252, |
| "grad_norm": 3.2429908938438805, |
| "learning_rate": 1.0030338866538925e-07, |
| "loss": 0.0965, |
| "step": 1114 |
| }, |
| { |
| "epoch": 1.8739495798319328, |
| "grad_norm": 2.8503852447249067, |
| "learning_rate": 9.768976408356667e-08, |
| "loss": 0.07, |
| "step": 1115 |
| }, |
| { |
| "epoch": 1.8756302521008403, |
| "grad_norm": 2.677806287085677, |
| "learning_rate": 9.511030646352615e-08, |
| "loss": 0.053, |
| "step": 1116 |
| }, |
| { |
| "epoch": 1.877310924369748, |
| "grad_norm": 3.1946750075620844, |
| "learning_rate": 9.256503378295978e-08, |
| "loss": 0.0651, |
| "step": 1117 |
| }, |
| { |
| "epoch": 1.8789915966386554, |
| "grad_norm": 3.584179246441202, |
| "learning_rate": 9.005396378130748e-08, |
| "loss": 0.0791, |
| "step": 1118 |
| }, |
| { |
| "epoch": 1.880672268907563, |
| "grad_norm": 3.61746694704659, |
| "learning_rate": 8.757711395963097e-08, |
| "loss": 0.0793, |
| "step": 1119 |
| }, |
| { |
| "epoch": 1.8823529411764706, |
| "grad_norm": 4.645088603678588, |
| "learning_rate": 8.513450158049109e-08, |
| "loss": 0.1481, |
| "step": 1120 |
| }, |
| { |
| "epoch": 1.8840336134453781, |
| "grad_norm": 4.909166462690628, |
| "learning_rate": 8.27261436678306e-08, |
| "loss": 0.0949, |
| "step": 1121 |
| }, |
| { |
| "epoch": 1.8857142857142857, |
| "grad_norm": 3.129071533007231, |
| "learning_rate": 8.035205700685167e-08, |
| "loss": 0.0566, |
| "step": 1122 |
| }, |
| { |
| "epoch": 1.8873949579831932, |
| "grad_norm": 3.657589121538267, |
| "learning_rate": 7.801225814390245e-08, |
| "loss": 0.067, |
| "step": 1123 |
| }, |
| { |
| "epoch": 1.8890756302521008, |
| "grad_norm": 3.694014323127052, |
| "learning_rate": 7.570676338635896e-08, |
| "loss": 0.0838, |
| "step": 1124 |
| }, |
| { |
| "epoch": 1.8907563025210083, |
| "grad_norm": 3.6696526234853604, |
| "learning_rate": 7.343558880251289e-08, |
| "loss": 0.0768, |
| "step": 1125 |
| }, |
| { |
| "epoch": 1.892436974789916, |
| "grad_norm": 3.3985635955632123, |
| "learning_rate": 7.11987502214595e-08, |
| "loss": 0.0538, |
| "step": 1126 |
| }, |
| { |
| "epoch": 1.8941176470588235, |
| "grad_norm": 2.9132010200327234, |
| "learning_rate": 6.899626323298714e-08, |
| "loss": 0.052, |
| "step": 1127 |
| }, |
| { |
| "epoch": 1.895798319327731, |
| "grad_norm": 3.029727226881479, |
| "learning_rate": 6.682814318746844e-08, |
| "loss": 0.0461, |
| "step": 1128 |
| }, |
| { |
| "epoch": 1.8974789915966386, |
| "grad_norm": 3.0897927655523967, |
| "learning_rate": 6.46944051957532e-08, |
| "loss": 0.059, |
| "step": 1129 |
| }, |
| { |
| "epoch": 1.8991596638655461, |
| "grad_norm": 2.497103912549752, |
| "learning_rate": 6.259506412906402e-08, |
| "loss": 0.0569, |
| "step": 1130 |
| }, |
| { |
| "epoch": 1.9008403361344537, |
| "grad_norm": 4.753093028355241, |
| "learning_rate": 6.053013461889023e-08, |
| "loss": 0.066, |
| "step": 1131 |
| }, |
| { |
| "epoch": 1.9025210084033612, |
| "grad_norm": 2.744729456009063, |
| "learning_rate": 5.849963105689027e-08, |
| "loss": 0.0673, |
| "step": 1132 |
| }, |
| { |
| "epoch": 1.9042016806722688, |
| "grad_norm": 3.6223706521954235, |
| "learning_rate": 5.65035675947867e-08, |
| "loss": 0.0719, |
| "step": 1133 |
| }, |
| { |
| "epoch": 1.9058823529411764, |
| "grad_norm": 2.6260713125590813, |
| "learning_rate": 5.454195814427021e-08, |
| "loss": 0.0482, |
| "step": 1134 |
| }, |
| { |
| "epoch": 1.907563025210084, |
| "grad_norm": 3.977427947617622, |
| "learning_rate": 5.261481637690247e-08, |
| "loss": 0.0488, |
| "step": 1135 |
| }, |
| { |
| "epoch": 1.9092436974789915, |
| "grad_norm": 2.0514624582452434, |
| "learning_rate": 5.072215572402006e-08, |
| "loss": 0.038, |
| "step": 1136 |
| }, |
| { |
| "epoch": 1.910924369747899, |
| "grad_norm": 3.2121969818313083, |
| "learning_rate": 4.886398937664127e-08, |
| "loss": 0.0658, |
| "step": 1137 |
| }, |
| { |
| "epoch": 1.9126050420168066, |
| "grad_norm": 3.2196806088420393, |
| "learning_rate": 4.704033028537391e-08, |
| "loss": 0.0496, |
| "step": 1138 |
| }, |
| { |
| "epoch": 1.9142857142857141, |
| "grad_norm": 3.2967972931881713, |
| "learning_rate": 4.52511911603265e-08, |
| "loss": 0.0734, |
| "step": 1139 |
| }, |
| { |
| "epoch": 1.9159663865546217, |
| "grad_norm": 2.542731148202116, |
| "learning_rate": 4.3496584471016125e-08, |
| "loss": 0.0403, |
| "step": 1140 |
| }, |
| { |
| "epoch": 1.9176470588235293, |
| "grad_norm": 3.152363972853614, |
| "learning_rate": 4.177652244628627e-08, |
| "loss": 0.0438, |
| "step": 1141 |
| }, |
| { |
| "epoch": 1.9193277310924368, |
| "grad_norm": 3.166435459793863, |
| "learning_rate": 4.009101707421803e-08, |
| "loss": 0.0765, |
| "step": 1142 |
| }, |
| { |
| "epoch": 1.9210084033613444, |
| "grad_norm": 3.04932577570153, |
| "learning_rate": 3.8440080102047364e-08, |
| "loss": 0.0641, |
| "step": 1143 |
| }, |
| { |
| "epoch": 1.9226890756302522, |
| "grad_norm": 3.031257181815333, |
| "learning_rate": 3.6823723036084616e-08, |
| "loss": 0.0508, |
| "step": 1144 |
| }, |
| { |
| "epoch": 1.9243697478991597, |
| "grad_norm": 3.1891638467360695, |
| "learning_rate": 3.5241957141632923e-08, |
| "loss": 0.0579, |
| "step": 1145 |
| }, |
| { |
| "epoch": 1.9260504201680673, |
| "grad_norm": 4.517312973719181, |
| "learning_rate": 3.369479344290938e-08, |
| "loss": 0.0876, |
| "step": 1146 |
| }, |
| { |
| "epoch": 1.9277310924369748, |
| "grad_norm": 3.700818940451555, |
| "learning_rate": 3.218224272296955e-08, |
| "loss": 0.0518, |
| "step": 1147 |
| }, |
| { |
| "epoch": 1.9294117647058824, |
| "grad_norm": 2.346728135463526, |
| "learning_rate": 3.0704315523631956e-08, |
| "loss": 0.0487, |
| "step": 1148 |
| }, |
| { |
| "epoch": 1.93109243697479, |
| "grad_norm": 3.167266611349493, |
| "learning_rate": 2.926102214540316e-08, |
| "loss": 0.0566, |
| "step": 1149 |
| }, |
| { |
| "epoch": 1.9327731092436975, |
| "grad_norm": 2.860221027996709, |
| "learning_rate": 2.7852372647407812e-08, |
| "loss": 0.0593, |
| "step": 1150 |
| }, |
| { |
| "epoch": 1.934453781512605, |
| "grad_norm": 4.135973604969323, |
| "learning_rate": 2.6478376847318687e-08, |
| "loss": 0.074, |
| "step": 1151 |
| }, |
| { |
| "epoch": 1.9361344537815126, |
| "grad_norm": 3.341556979644147, |
| "learning_rate": 2.5139044321286223e-08, |
| "loss": 0.0699, |
| "step": 1152 |
| }, |
| { |
| "epoch": 1.9378151260504202, |
| "grad_norm": 3.027802548930941, |
| "learning_rate": 2.383438440387298e-08, |
| "loss": 0.0709, |
| "step": 1153 |
| }, |
| { |
| "epoch": 1.9394957983193277, |
| "grad_norm": 3.2086989982561107, |
| "learning_rate": 2.256440618798872e-08, |
| "loss": 0.0612, |
| "step": 1154 |
| }, |
| { |
| "epoch": 1.9411764705882353, |
| "grad_norm": 3.5321511064341538, |
| "learning_rate": 2.1329118524827662e-08, |
| "loss": 0.0562, |
| "step": 1155 |
| }, |
| { |
| "epoch": 1.9428571428571428, |
| "grad_norm": 2.8118478612530735, |
| "learning_rate": 2.012853002380466e-08, |
| "loss": 0.0534, |
| "step": 1156 |
| }, |
| { |
| "epoch": 1.9445378151260504, |
| "grad_norm": 3.432208698670532, |
| "learning_rate": 1.896264905249856e-08, |
| "loss": 0.0837, |
| "step": 1157 |
| }, |
| { |
| "epoch": 1.946218487394958, |
| "grad_norm": 3.21605104376969, |
| "learning_rate": 1.783148373659005e-08, |
| "loss": 0.0506, |
| "step": 1158 |
| }, |
| { |
| "epoch": 1.9478991596638655, |
| "grad_norm": 3.896377413465593, |
| "learning_rate": 1.6735041959806686e-08, |
| "loss": 0.075, |
| "step": 1159 |
| }, |
| { |
| "epoch": 1.949579831932773, |
| "grad_norm": 3.519301914030783, |
| "learning_rate": 1.567333136387017e-08, |
| "loss": 0.0726, |
| "step": 1160 |
| }, |
| { |
| "epoch": 1.9512605042016806, |
| "grad_norm": 3.6292612141416334, |
| "learning_rate": 1.4646359348439165e-08, |
| "loss": 0.0706, |
| "step": 1161 |
| }, |
| { |
| "epoch": 1.9529411764705882, |
| "grad_norm": 3.132655139876115, |
| "learning_rate": 1.3654133071059894e-08, |
| "loss": 0.0564, |
| "step": 1162 |
| }, |
| { |
| "epoch": 1.954621848739496, |
| "grad_norm": 2.404923639354769, |
| "learning_rate": 1.2696659447116732e-08, |
| "loss": 0.0453, |
| "step": 1163 |
| }, |
| { |
| "epoch": 1.9563025210084035, |
| "grad_norm": 2.759015592903886, |
| "learning_rate": 1.1773945149782805e-08, |
| "loss": 0.0592, |
| "step": 1164 |
| }, |
| { |
| "epoch": 1.957983193277311, |
| "grad_norm": 2.932560579044183, |
| "learning_rate": 1.088599660997336e-08, |
| "loss": 0.0489, |
| "step": 1165 |
| }, |
| { |
| "epoch": 1.9596638655462186, |
| "grad_norm": 3.0306047589144036, |
| "learning_rate": 1.0032820016302458e-08, |
| "loss": 0.0458, |
| "step": 1166 |
| }, |
| { |
| "epoch": 1.9613445378151262, |
| "grad_norm": 4.176058096232488, |
| "learning_rate": 9.21442131503858e-09, |
| "loss": 0.0638, |
| "step": 1167 |
| }, |
| { |
| "epoch": 1.9630252100840337, |
| "grad_norm": 3.26158879531482, |
| "learning_rate": 8.430806210062426e-09, |
| "loss": 0.0583, |
| "step": 1168 |
| }, |
| { |
| "epoch": 1.9647058823529413, |
| "grad_norm": 2.606267093967038, |
| "learning_rate": 7.681980162830283e-09, |
| "loss": 0.0536, |
| "step": 1169 |
| }, |
| { |
| "epoch": 1.9663865546218489, |
| "grad_norm": 3.1278293687636625, |
| "learning_rate": 6.9679483923318356e-09, |
| "loss": 0.0747, |
| "step": 1170 |
| }, |
| { |
| "epoch": 1.9680672268907564, |
| "grad_norm": 3.290980715214881, |
| "learning_rate": 6.288715875057416e-09, |
| "loss": 0.0778, |
| "step": 1171 |
| }, |
| { |
| "epoch": 1.969747899159664, |
| "grad_norm": 3.044737695675404, |
| "learning_rate": 5.644287344960253e-09, |
| "loss": 0.0566, |
| "step": 1172 |
| }, |
| { |
| "epoch": 1.9714285714285715, |
| "grad_norm": 2.946504192696155, |
| "learning_rate": 5.034667293427053e-09, |
| "loss": 0.0629, |
| "step": 1173 |
| }, |
| { |
| "epoch": 1.973109243697479, |
| "grad_norm": 3.186912828675924, |
| "learning_rate": 4.45985996924192e-09, |
| "loss": 0.0803, |
| "step": 1174 |
| }, |
| { |
| "epoch": 1.9747899159663866, |
| "grad_norm": 3.892275647593651, |
| "learning_rate": 3.919869378561925e-09, |
| "loss": 0.0805, |
| "step": 1175 |
| }, |
| { |
| "epoch": 1.9764705882352942, |
| "grad_norm": 3.389822194680044, |
| "learning_rate": 3.41469928488547e-09, |
| "loss": 0.0566, |
| "step": 1176 |
| }, |
| { |
| "epoch": 1.9781512605042018, |
| "grad_norm": 2.5650320359231693, |
| "learning_rate": 2.9443532090273064e-09, |
| "loss": 0.0613, |
| "step": 1177 |
| }, |
| { |
| "epoch": 1.9798319327731093, |
| "grad_norm": 3.010639961534959, |
| "learning_rate": 2.508834429094664e-09, |
| "loss": 0.0699, |
| "step": 1178 |
| }, |
| { |
| "epoch": 1.9815126050420169, |
| "grad_norm": 2.5014084566068804, |
| "learning_rate": 2.108145980462828e-09, |
| "loss": 0.0504, |
| "step": 1179 |
| }, |
| { |
| "epoch": 1.9831932773109244, |
| "grad_norm": 3.9182990233667945, |
| "learning_rate": 1.7422906557557074e-09, |
| "loss": 0.0827, |
| "step": 1180 |
| }, |
| { |
| "epoch": 1.984873949579832, |
| "grad_norm": 3.507465961115335, |
| "learning_rate": 1.4112710048247436e-09, |
| "loss": 0.0854, |
| "step": 1181 |
| }, |
| { |
| "epoch": 1.9865546218487395, |
| "grad_norm": 3.0650868400573907, |
| "learning_rate": 1.1150893347328107e-09, |
| "loss": 0.0657, |
| "step": 1182 |
| }, |
| { |
| "epoch": 1.988235294117647, |
| "grad_norm": 2.9055910306096964, |
| "learning_rate": 8.537477097364522e-10, |
| "loss": 0.0624, |
| "step": 1183 |
| }, |
| { |
| "epoch": 1.9899159663865547, |
| "grad_norm": 4.616023305998801, |
| "learning_rate": 6.272479512731133e-10, |
| "loss": 0.056, |
| "step": 1184 |
| }, |
| { |
| "epoch": 1.9915966386554622, |
| "grad_norm": 2.6716933754582874, |
| "learning_rate": 4.3559163794670844e-10, |
| "loss": 0.0567, |
| "step": 1185 |
| }, |
| { |
| "epoch": 1.9932773109243698, |
| "grad_norm": 3.4964821514974602, |
| "learning_rate": 2.787801055181838e-10, |
| "loss": 0.0722, |
| "step": 1186 |
| }, |
| { |
| "epoch": 1.9949579831932773, |
| "grad_norm": 2.654518550392596, |
| "learning_rate": 1.568144468955257e-10, |
| "loss": 0.0604, |
| "step": 1187 |
| }, |
| { |
| "epoch": 1.9966386554621849, |
| "grad_norm": 3.8620551224683424, |
| "learning_rate": 6.969551212598901e-11, |
| "loss": 0.098, |
| "step": 1188 |
| }, |
| { |
| "epoch": 1.9983193277310924, |
| "grad_norm": 3.276316686896461, |
| "learning_rate": 1.7423908390545862e-11, |
| "loss": 0.0731, |
| "step": 1189 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 2.6008822542366743, |
| "learning_rate": 0.0, |
| "loss": 0.0496, |
| "step": 1190 |
| }, |
| { |
| "epoch": 2.0, |
| "step": 1190, |
| "total_flos": 1781687255040.0, |
| "train_loss": 0.14153178428464075, |
| "train_runtime": 543.0923, |
| "train_samples_per_second": 17.515, |
| "train_steps_per_second": 2.191 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 1190, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 50000000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1781687255040.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|