{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 2956, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00033829499323410016, "grad_norm": 1.6407462358474731, "learning_rate": 5.0000000000000004e-08, "loss": 0.8903, "step": 1 }, { "epoch": 0.0006765899864682003, "grad_norm": 1.5672248601913452, "learning_rate": 1.0000000000000001e-07, "loss": 0.8953, "step": 2 }, { "epoch": 0.0010148849797023004, "grad_norm": 1.6485013961791992, "learning_rate": 1.5000000000000002e-07, "loss": 0.8939, "step": 3 }, { "epoch": 0.0013531799729364006, "grad_norm": 1.6102432012557983, "learning_rate": 2.0000000000000002e-07, "loss": 0.9113, "step": 4 }, { "epoch": 0.0016914749661705007, "grad_norm": 1.5657951831817627, "learning_rate": 2.5000000000000004e-07, "loss": 0.9247, "step": 5 }, { "epoch": 0.0020297699594046007, "grad_norm": 1.5672881603240967, "learning_rate": 3.0000000000000004e-07, "loss": 0.9252, "step": 6 }, { "epoch": 0.002368064952638701, "grad_norm": 1.6827484369277954, "learning_rate": 3.5000000000000004e-07, "loss": 0.909, "step": 7 }, { "epoch": 0.0027063599458728013, "grad_norm": 1.5518872737884521, "learning_rate": 4.0000000000000003e-07, "loss": 0.8979, "step": 8 }, { "epoch": 0.003044654939106901, "grad_norm": 1.5625290870666504, "learning_rate": 4.5000000000000003e-07, "loss": 0.9146, "step": 9 }, { "epoch": 0.0033829499323410014, "grad_norm": 1.491262674331665, "learning_rate": 5.000000000000001e-07, "loss": 0.9011, "step": 10 }, { "epoch": 0.0037212449255751017, "grad_norm": 1.6137615442276, "learning_rate": 5.5e-07, "loss": 0.9229, "step": 11 }, { "epoch": 0.0040595399188092015, "grad_norm": 1.67592191696167, "learning_rate": 6.000000000000001e-07, "loss": 0.9178, "step": 12 }, { "epoch": 0.004397834912043302, "grad_norm": 1.4431407451629639, "learning_rate": 6.5e-07, "loss": 0.899, "step": 13 }, { "epoch": 0.004736129905277402, "grad_norm": 1.4896955490112305, "learning_rate": 7.000000000000001e-07, "loss": 0.8985, "step": 14 }, { "epoch": 0.005074424898511502, "grad_norm": 1.3822637796401978, "learning_rate": 7.5e-07, "loss": 0.9025, "step": 15 }, { "epoch": 0.005412719891745603, "grad_norm": 1.2707531452178955, "learning_rate": 8.000000000000001e-07, "loss": 0.9148, "step": 16 }, { "epoch": 0.005751014884979702, "grad_norm": 1.298106074333191, "learning_rate": 8.500000000000001e-07, "loss": 0.8818, "step": 17 }, { "epoch": 0.006089309878213802, "grad_norm": 1.1657626628875732, "learning_rate": 9.000000000000001e-07, "loss": 0.8882, "step": 18 }, { "epoch": 0.0064276048714479025, "grad_norm": 1.1404670476913452, "learning_rate": 9.500000000000001e-07, "loss": 0.9025, "step": 19 }, { "epoch": 0.006765899864682003, "grad_norm": 1.0163322687149048, "learning_rate": 1.0000000000000002e-06, "loss": 0.8729, "step": 20 }, { "epoch": 0.007104194857916103, "grad_norm": 1.0249444246292114, "learning_rate": 1.0500000000000001e-06, "loss": 0.867, "step": 21 }, { "epoch": 0.007442489851150203, "grad_norm": 0.9234563708305359, "learning_rate": 1.1e-06, "loss": 0.857, "step": 22 }, { "epoch": 0.007780784844384303, "grad_norm": 0.8658290505409241, "learning_rate": 1.1500000000000002e-06, "loss": 0.8693, "step": 23 }, { "epoch": 0.008119079837618403, "grad_norm": 0.8107594847679138, "learning_rate": 1.2000000000000002e-06, "loss": 0.8694, "step": 24 }, { "epoch": 0.008457374830852503, "grad_norm": 0.8184226751327515, "learning_rate": 1.25e-06, "loss": 0.8522, "step": 25 }, { "epoch": 0.008795669824086604, "grad_norm": 0.7714635729789734, "learning_rate": 1.3e-06, "loss": 0.8593, "step": 26 }, { "epoch": 0.009133964817320704, "grad_norm": 0.7085200548171997, "learning_rate": 1.3500000000000002e-06, "loss": 0.846, "step": 27 }, { "epoch": 0.009472259810554804, "grad_norm": 0.7201448082923889, "learning_rate": 1.4000000000000001e-06, "loss": 0.8564, "step": 28 }, { "epoch": 0.009810554803788904, "grad_norm": 0.6707621216773987, "learning_rate": 1.45e-06, "loss": 0.8195, "step": 29 }, { "epoch": 0.010148849797023005, "grad_norm": 0.6455383896827698, "learning_rate": 1.5e-06, "loss": 0.8349, "step": 30 }, { "epoch": 0.010487144790257105, "grad_norm": 0.5779524445533752, "learning_rate": 1.5500000000000002e-06, "loss": 0.819, "step": 31 }, { "epoch": 0.010825439783491205, "grad_norm": 0.575465202331543, "learning_rate": 1.6000000000000001e-06, "loss": 0.8466, "step": 32 }, { "epoch": 0.011163734776725304, "grad_norm": 0.5340820550918579, "learning_rate": 1.6500000000000003e-06, "loss": 0.8167, "step": 33 }, { "epoch": 0.011502029769959404, "grad_norm": 0.4989947974681854, "learning_rate": 1.7000000000000002e-06, "loss": 0.8148, "step": 34 }, { "epoch": 0.011840324763193504, "grad_norm": 0.4422353208065033, "learning_rate": 1.75e-06, "loss": 0.806, "step": 35 }, { "epoch": 0.012178619756427604, "grad_norm": 0.4404710531234741, "learning_rate": 1.8000000000000001e-06, "loss": 0.7935, "step": 36 }, { "epoch": 0.012516914749661705, "grad_norm": 0.4069363474845886, "learning_rate": 1.85e-06, "loss": 0.793, "step": 37 }, { "epoch": 0.012855209742895805, "grad_norm": 0.40475258231163025, "learning_rate": 1.9000000000000002e-06, "loss": 0.7932, "step": 38 }, { "epoch": 0.013193504736129905, "grad_norm": 0.38950204849243164, "learning_rate": 1.9500000000000004e-06, "loss": 0.8042, "step": 39 }, { "epoch": 0.013531799729364006, "grad_norm": 0.3722783625125885, "learning_rate": 2.0000000000000003e-06, "loss": 0.7772, "step": 40 }, { "epoch": 0.013870094722598106, "grad_norm": 0.3677540123462677, "learning_rate": 2.05e-06, "loss": 0.7688, "step": 41 }, { "epoch": 0.014208389715832206, "grad_norm": 0.37267571687698364, "learning_rate": 2.1000000000000002e-06, "loss": 0.7631, "step": 42 }, { "epoch": 0.014546684709066306, "grad_norm": 0.3498350977897644, "learning_rate": 2.15e-06, "loss": 0.7653, "step": 43 }, { "epoch": 0.014884979702300407, "grad_norm": 0.334867388010025, "learning_rate": 2.2e-06, "loss": 0.7604, "step": 44 }, { "epoch": 0.015223274695534507, "grad_norm": 0.3571762442588806, "learning_rate": 2.25e-06, "loss": 0.7978, "step": 45 }, { "epoch": 0.015561569688768605, "grad_norm": 0.32837843894958496, "learning_rate": 2.3000000000000004e-06, "loss": 0.7444, "step": 46 }, { "epoch": 0.015899864682002707, "grad_norm": 0.34096264839172363, "learning_rate": 2.35e-06, "loss": 0.784, "step": 47 }, { "epoch": 0.016238159675236806, "grad_norm": 0.3243989944458008, "learning_rate": 2.4000000000000003e-06, "loss": 0.7727, "step": 48 }, { "epoch": 0.016576454668470908, "grad_norm": 0.3283677101135254, "learning_rate": 2.4500000000000003e-06, "loss": 0.7939, "step": 49 }, { "epoch": 0.016914749661705007, "grad_norm": 0.3175937533378601, "learning_rate": 2.5e-06, "loss": 0.7696, "step": 50 }, { "epoch": 0.01725304465493911, "grad_norm": 0.2957337498664856, "learning_rate": 2.55e-06, "loss": 0.7507, "step": 51 }, { "epoch": 0.017591339648173207, "grad_norm": 0.3031109571456909, "learning_rate": 2.6e-06, "loss": 0.7743, "step": 52 }, { "epoch": 0.017929634641407306, "grad_norm": 0.2781732678413391, "learning_rate": 2.6500000000000005e-06, "loss": 0.7428, "step": 53 }, { "epoch": 0.018267929634641408, "grad_norm": 0.28893619775772095, "learning_rate": 2.7000000000000004e-06, "loss": 0.7814, "step": 54 }, { "epoch": 0.018606224627875506, "grad_norm": 0.2668273448944092, "learning_rate": 2.7500000000000004e-06, "loss": 0.7836, "step": 55 }, { "epoch": 0.018944519621109608, "grad_norm": 0.2713071405887604, "learning_rate": 2.8000000000000003e-06, "loss": 0.7601, "step": 56 }, { "epoch": 0.019282814614343707, "grad_norm": 0.2615717649459839, "learning_rate": 2.85e-06, "loss": 0.772, "step": 57 }, { "epoch": 0.01962110960757781, "grad_norm": 0.24850614368915558, "learning_rate": 2.9e-06, "loss": 0.7478, "step": 58 }, { "epoch": 0.019959404600811907, "grad_norm": 0.25940048694610596, "learning_rate": 2.95e-06, "loss": 0.7871, "step": 59 }, { "epoch": 0.02029769959404601, "grad_norm": 0.23697689175605774, "learning_rate": 3e-06, "loss": 0.7599, "step": 60 }, { "epoch": 0.020635994587280108, "grad_norm": 0.23364393413066864, "learning_rate": 3.05e-06, "loss": 0.7619, "step": 61 }, { "epoch": 0.02097428958051421, "grad_norm": 0.23482820391654968, "learning_rate": 3.1000000000000004e-06, "loss": 0.7502, "step": 62 }, { "epoch": 0.02131258457374831, "grad_norm": 0.24202880263328552, "learning_rate": 3.1500000000000003e-06, "loss": 0.7422, "step": 63 }, { "epoch": 0.02165087956698241, "grad_norm": 0.2344835251569748, "learning_rate": 3.2000000000000003e-06, "loss": 0.734, "step": 64 }, { "epoch": 0.02198917456021651, "grad_norm": 0.23511846363544464, "learning_rate": 3.2500000000000002e-06, "loss": 0.7659, "step": 65 }, { "epoch": 0.022327469553450607, "grad_norm": 0.22822819650173187, "learning_rate": 3.3000000000000006e-06, "loss": 0.77, "step": 66 }, { "epoch": 0.02266576454668471, "grad_norm": 0.23218046128749847, "learning_rate": 3.3500000000000005e-06, "loss": 0.7751, "step": 67 }, { "epoch": 0.023004059539918808, "grad_norm": 0.22659695148468018, "learning_rate": 3.4000000000000005e-06, "loss": 0.7375, "step": 68 }, { "epoch": 0.02334235453315291, "grad_norm": 0.23136673867702484, "learning_rate": 3.45e-06, "loss": 0.7406, "step": 69 }, { "epoch": 0.02368064952638701, "grad_norm": 0.2264736145734787, "learning_rate": 3.5e-06, "loss": 0.7837, "step": 70 }, { "epoch": 0.02401894451962111, "grad_norm": 0.21895566582679749, "learning_rate": 3.5500000000000003e-06, "loss": 0.7698, "step": 71 }, { "epoch": 0.02435723951285521, "grad_norm": 0.2110176980495453, "learning_rate": 3.6000000000000003e-06, "loss": 0.7692, "step": 72 }, { "epoch": 0.02469553450608931, "grad_norm": 0.2077152580022812, "learning_rate": 3.65e-06, "loss": 0.7274, "step": 73 }, { "epoch": 0.02503382949932341, "grad_norm": 0.2114841192960739, "learning_rate": 3.7e-06, "loss": 0.7438, "step": 74 }, { "epoch": 0.02537212449255751, "grad_norm": 0.20656870305538177, "learning_rate": 3.7500000000000005e-06, "loss": 0.72, "step": 75 }, { "epoch": 0.02571041948579161, "grad_norm": 0.20921222865581512, "learning_rate": 3.8000000000000005e-06, "loss": 0.7624, "step": 76 }, { "epoch": 0.026048714479025712, "grad_norm": 0.2081456482410431, "learning_rate": 3.85e-06, "loss": 0.7413, "step": 77 }, { "epoch": 0.02638700947225981, "grad_norm": 0.19761289656162262, "learning_rate": 3.900000000000001e-06, "loss": 0.7468, "step": 78 }, { "epoch": 0.02672530446549391, "grad_norm": 0.20001396536827087, "learning_rate": 3.95e-06, "loss": 0.7538, "step": 79 }, { "epoch": 0.02706359945872801, "grad_norm": 0.2047550082206726, "learning_rate": 4.000000000000001e-06, "loss": 0.7576, "step": 80 }, { "epoch": 0.02740189445196211, "grad_norm": 0.1974104791879654, "learning_rate": 4.05e-06, "loss": 0.7558, "step": 81 }, { "epoch": 0.02774018944519621, "grad_norm": 0.18975216150283813, "learning_rate": 4.1e-06, "loss": 0.7245, "step": 82 }, { "epoch": 0.02807848443843031, "grad_norm": 0.19672256708145142, "learning_rate": 4.15e-06, "loss": 0.7299, "step": 83 }, { "epoch": 0.028416779431664412, "grad_norm": 0.1959012895822525, "learning_rate": 4.2000000000000004e-06, "loss": 0.7412, "step": 84 }, { "epoch": 0.02875507442489851, "grad_norm": 0.19008736312389374, "learning_rate": 4.25e-06, "loss": 0.7514, "step": 85 }, { "epoch": 0.029093369418132613, "grad_norm": 0.19592899084091187, "learning_rate": 4.3e-06, "loss": 0.7386, "step": 86 }, { "epoch": 0.02943166441136671, "grad_norm": 0.19850178062915802, "learning_rate": 4.350000000000001e-06, "loss": 0.745, "step": 87 }, { "epoch": 0.029769959404600813, "grad_norm": 0.199819415807724, "learning_rate": 4.4e-06, "loss": 0.7525, "step": 88 }, { "epoch": 0.030108254397834912, "grad_norm": 0.18576590716838837, "learning_rate": 4.450000000000001e-06, "loss": 0.739, "step": 89 }, { "epoch": 0.030446549391069014, "grad_norm": 0.18295684456825256, "learning_rate": 4.5e-06, "loss": 0.7568, "step": 90 }, { "epoch": 0.030784844384303112, "grad_norm": 0.19440272450447083, "learning_rate": 4.5500000000000005e-06, "loss": 0.7195, "step": 91 }, { "epoch": 0.03112313937753721, "grad_norm": 0.19021347165107727, "learning_rate": 4.600000000000001e-06, "loss": 0.7338, "step": 92 }, { "epoch": 0.03146143437077131, "grad_norm": 0.18591071665287018, "learning_rate": 4.65e-06, "loss": 0.7145, "step": 93 }, { "epoch": 0.031799729364005415, "grad_norm": 0.19127537310123444, "learning_rate": 4.7e-06, "loss": 0.7326, "step": 94 }, { "epoch": 0.03213802435723951, "grad_norm": 0.20237044990062714, "learning_rate": 4.75e-06, "loss": 0.7342, "step": 95 }, { "epoch": 0.03247631935047361, "grad_norm": 0.1817764937877655, "learning_rate": 4.800000000000001e-06, "loss": 0.6857, "step": 96 }, { "epoch": 0.032814614343707714, "grad_norm": 0.18967989087104797, "learning_rate": 4.85e-06, "loss": 0.7208, "step": 97 }, { "epoch": 0.033152909336941816, "grad_norm": 0.1865711659193039, "learning_rate": 4.9000000000000005e-06, "loss": 0.7205, "step": 98 }, { "epoch": 0.03349120433017591, "grad_norm": 0.18442098796367645, "learning_rate": 4.95e-06, "loss": 0.7496, "step": 99 }, { "epoch": 0.03382949932341001, "grad_norm": 0.18783792853355408, "learning_rate": 5e-06, "loss": 0.7195, "step": 100 }, { "epoch": 0.034167794316644115, "grad_norm": 0.18517974019050598, "learning_rate": 4.999999839524525e-06, "loss": 0.7014, "step": 101 }, { "epoch": 0.03450608930987822, "grad_norm": 0.1914462447166443, "learning_rate": 4.999999358098119e-06, "loss": 0.7325, "step": 102 }, { "epoch": 0.03484438430311231, "grad_norm": 0.18725411593914032, "learning_rate": 4.999998555720843e-06, "loss": 0.7186, "step": 103 }, { "epoch": 0.035182679296346414, "grad_norm": 0.1851007640361786, "learning_rate": 4.999997432392803e-06, "loss": 0.7244, "step": 104 }, { "epoch": 0.035520974289580516, "grad_norm": 0.18126922845840454, "learning_rate": 4.99999598811414e-06, "loss": 0.7183, "step": 105 }, { "epoch": 0.03585926928281461, "grad_norm": 0.18332946300506592, "learning_rate": 4.999994222885042e-06, "loss": 0.701, "step": 106 }, { "epoch": 0.03619756427604871, "grad_norm": 0.19509664177894592, "learning_rate": 4.999992136705734e-06, "loss": 0.7298, "step": 107 }, { "epoch": 0.036535859269282815, "grad_norm": 0.1882726401090622, "learning_rate": 4.999989729576485e-06, "loss": 0.7258, "step": 108 }, { "epoch": 0.03687415426251692, "grad_norm": 0.19097895920276642, "learning_rate": 4.999987001497602e-06, "loss": 0.7253, "step": 109 }, { "epoch": 0.03721244925575101, "grad_norm": 0.19654102623462677, "learning_rate": 4.999983952469437e-06, "loss": 0.7086, "step": 110 }, { "epoch": 0.037550744248985114, "grad_norm": 0.18612295389175415, "learning_rate": 4.999980582492382e-06, "loss": 0.7197, "step": 111 }, { "epoch": 0.037889039242219216, "grad_norm": 0.1974918097257614, "learning_rate": 4.999976891566866e-06, "loss": 0.7076, "step": 112 }, { "epoch": 0.03822733423545332, "grad_norm": 0.1830679476261139, "learning_rate": 4.9999728796933684e-06, "loss": 0.7242, "step": 113 }, { "epoch": 0.03856562922868741, "grad_norm": 0.19517149031162262, "learning_rate": 4.9999685468724e-06, "loss": 0.7088, "step": 114 }, { "epoch": 0.038903924221921515, "grad_norm": 0.18623511493206024, "learning_rate": 4.9999638931045186e-06, "loss": 0.7168, "step": 115 }, { "epoch": 0.03924221921515562, "grad_norm": 0.18677926063537598, "learning_rate": 4.999958918390321e-06, "loss": 0.7097, "step": 116 }, { "epoch": 0.03958051420838971, "grad_norm": 0.18877893686294556, "learning_rate": 4.999953622730447e-06, "loss": 0.6961, "step": 117 }, { "epoch": 0.039918809201623814, "grad_norm": 0.19688422977924347, "learning_rate": 4.999948006125575e-06, "loss": 0.7143, "step": 118 }, { "epoch": 0.040257104194857916, "grad_norm": 0.1940767616033554, "learning_rate": 4.999942068576428e-06, "loss": 0.7357, "step": 119 }, { "epoch": 0.04059539918809202, "grad_norm": 0.18815898895263672, "learning_rate": 4.999935810083766e-06, "loss": 0.7258, "step": 120 }, { "epoch": 0.040933694181326113, "grad_norm": 0.18311698734760284, "learning_rate": 4.999929230648394e-06, "loss": 0.6921, "step": 121 }, { "epoch": 0.041271989174560215, "grad_norm": 0.1848682463169098, "learning_rate": 4.999922330271155e-06, "loss": 0.7434, "step": 122 }, { "epoch": 0.04161028416779432, "grad_norm": 0.18727323412895203, "learning_rate": 4.999915108952937e-06, "loss": 0.7205, "step": 123 }, { "epoch": 0.04194857916102842, "grad_norm": 0.18879395723342896, "learning_rate": 4.999907566694667e-06, "loss": 0.7029, "step": 124 }, { "epoch": 0.042286874154262515, "grad_norm": 0.18784329295158386, "learning_rate": 4.999899703497312e-06, "loss": 0.7002, "step": 125 }, { "epoch": 0.04262516914749662, "grad_norm": 0.1885390430688858, "learning_rate": 4.999891519361882e-06, "loss": 0.7038, "step": 126 }, { "epoch": 0.04296346414073072, "grad_norm": 0.18721194565296173, "learning_rate": 4.999883014289426e-06, "loss": 0.7227, "step": 127 }, { "epoch": 0.04330175913396482, "grad_norm": 0.18127906322479248, "learning_rate": 4.999874188281039e-06, "loss": 0.7432, "step": 128 }, { "epoch": 0.043640054127198916, "grad_norm": 0.18488162755966187, "learning_rate": 4.999865041337852e-06, "loss": 0.7434, "step": 129 }, { "epoch": 0.04397834912043302, "grad_norm": 0.18584181368350983, "learning_rate": 4.99985557346104e-06, "loss": 0.7445, "step": 130 }, { "epoch": 0.04431664411366712, "grad_norm": 0.18478542566299438, "learning_rate": 4.999845784651818e-06, "loss": 0.7149, "step": 131 }, { "epoch": 0.044654939106901215, "grad_norm": 0.1904173046350479, "learning_rate": 4.9998356749114434e-06, "loss": 0.716, "step": 132 }, { "epoch": 0.04499323410013532, "grad_norm": 0.19709834456443787, "learning_rate": 4.9998252442412134e-06, "loss": 0.758, "step": 133 }, { "epoch": 0.04533152909336942, "grad_norm": 0.1878356784582138, "learning_rate": 4.999814492642468e-06, "loss": 0.7011, "step": 134 }, { "epoch": 0.04566982408660352, "grad_norm": 0.1950335055589676, "learning_rate": 4.9998034201165866e-06, "loss": 0.6946, "step": 135 }, { "epoch": 0.046008119079837616, "grad_norm": 0.19865374267101288, "learning_rate": 4.999792026664992e-06, "loss": 0.7163, "step": 136 }, { "epoch": 0.04634641407307172, "grad_norm": 0.1960017830133438, "learning_rate": 4.9997803122891446e-06, "loss": 0.7347, "step": 137 }, { "epoch": 0.04668470906630582, "grad_norm": 0.18267491459846497, "learning_rate": 4.99976827699055e-06, "loss": 0.7052, "step": 138 }, { "epoch": 0.04702300405953992, "grad_norm": 0.18890222907066345, "learning_rate": 4.999755920770752e-06, "loss": 0.7128, "step": 139 }, { "epoch": 0.04736129905277402, "grad_norm": 0.20229989290237427, "learning_rate": 4.9997432436313386e-06, "loss": 0.7105, "step": 140 }, { "epoch": 0.04769959404600812, "grad_norm": 0.20218680799007416, "learning_rate": 4.999730245573936e-06, "loss": 0.7088, "step": 141 }, { "epoch": 0.04803788903924222, "grad_norm": 0.19978873431682587, "learning_rate": 4.999716926600215e-06, "loss": 0.7154, "step": 142 }, { "epoch": 0.048376184032476316, "grad_norm": 0.1951395869255066, "learning_rate": 4.999703286711882e-06, "loss": 0.7204, "step": 143 }, { "epoch": 0.04871447902571042, "grad_norm": 0.18328897655010223, "learning_rate": 4.999689325910691e-06, "loss": 0.694, "step": 144 }, { "epoch": 0.04905277401894452, "grad_norm": 0.20357415080070496, "learning_rate": 4.999675044198432e-06, "loss": 0.7298, "step": 145 }, { "epoch": 0.04939106901217862, "grad_norm": 0.19996631145477295, "learning_rate": 4.999660441576941e-06, "loss": 0.6968, "step": 146 }, { "epoch": 0.04972936400541272, "grad_norm": 0.19135387241840363, "learning_rate": 4.999645518048092e-06, "loss": 0.7131, "step": 147 }, { "epoch": 0.05006765899864682, "grad_norm": 0.1947384476661682, "learning_rate": 4.9996302736137994e-06, "loss": 0.6915, "step": 148 }, { "epoch": 0.05040595399188092, "grad_norm": 0.18958574533462524, "learning_rate": 4.999614708276021e-06, "loss": 0.6993, "step": 149 }, { "epoch": 0.05074424898511502, "grad_norm": 0.1931043118238449, "learning_rate": 4.999598822036755e-06, "loss": 0.6793, "step": 150 }, { "epoch": 0.05108254397834912, "grad_norm": 0.20002217590808868, "learning_rate": 4.999582614898042e-06, "loss": 0.7188, "step": 151 }, { "epoch": 0.05142083897158322, "grad_norm": 0.18821609020233154, "learning_rate": 4.9995660868619614e-06, "loss": 0.6756, "step": 152 }, { "epoch": 0.05175913396481732, "grad_norm": 0.18836663663387299, "learning_rate": 4.999549237930636e-06, "loss": 0.6953, "step": 153 }, { "epoch": 0.052097428958051424, "grad_norm": 0.18696729838848114, "learning_rate": 4.999532068106228e-06, "loss": 0.7316, "step": 154 }, { "epoch": 0.05243572395128552, "grad_norm": 0.18782716989517212, "learning_rate": 4.999514577390943e-06, "loss": 0.7344, "step": 155 }, { "epoch": 0.05277401894451962, "grad_norm": 0.20105955004692078, "learning_rate": 4.999496765787024e-06, "loss": 0.7039, "step": 156 }, { "epoch": 0.05311231393775372, "grad_norm": 0.19204497337341309, "learning_rate": 4.999478633296761e-06, "loss": 0.6769, "step": 157 }, { "epoch": 0.05345060893098782, "grad_norm": 0.1988452970981598, "learning_rate": 4.999460179922479e-06, "loss": 0.7108, "step": 158 }, { "epoch": 0.05378890392422192, "grad_norm": 0.1891714632511139, "learning_rate": 4.999441405666547e-06, "loss": 0.7266, "step": 159 }, { "epoch": 0.05412719891745602, "grad_norm": 0.18919819593429565, "learning_rate": 4.999422310531377e-06, "loss": 0.7072, "step": 160 }, { "epoch": 0.054465493910690124, "grad_norm": 0.19568458199501038, "learning_rate": 4.999402894519421e-06, "loss": 0.7325, "step": 161 }, { "epoch": 0.05480378890392422, "grad_norm": 0.18339653313159943, "learning_rate": 4.9993831576331696e-06, "loss": 0.6853, "step": 162 }, { "epoch": 0.05514208389715832, "grad_norm": 0.19178633391857147, "learning_rate": 4.999363099875158e-06, "loss": 0.6865, "step": 163 }, { "epoch": 0.05548037889039242, "grad_norm": 0.18333321809768677, "learning_rate": 4.9993427212479604e-06, "loss": 0.7193, "step": 164 }, { "epoch": 0.055818673883626525, "grad_norm": 0.1928037852048874, "learning_rate": 4.999322021754193e-06, "loss": 0.6949, "step": 165 }, { "epoch": 0.05615696887686062, "grad_norm": 0.1899040788412094, "learning_rate": 4.999301001396513e-06, "loss": 0.6957, "step": 166 }, { "epoch": 0.05649526387009472, "grad_norm": 0.20395973324775696, "learning_rate": 4.99927966017762e-06, "loss": 0.7068, "step": 167 }, { "epoch": 0.056833558863328824, "grad_norm": 0.1995311826467514, "learning_rate": 4.999257998100254e-06, "loss": 0.7331, "step": 168 }, { "epoch": 0.05717185385656292, "grad_norm": 0.19529490172863007, "learning_rate": 4.999236015167195e-06, "loss": 0.6982, "step": 169 }, { "epoch": 0.05751014884979702, "grad_norm": 0.19061776995658875, "learning_rate": 4.999213711381265e-06, "loss": 0.6944, "step": 170 }, { "epoch": 0.05784844384303112, "grad_norm": 0.1899946630001068, "learning_rate": 4.999191086745328e-06, "loss": 0.6782, "step": 171 }, { "epoch": 0.058186738836265225, "grad_norm": 0.18246881663799286, "learning_rate": 4.9991681412622896e-06, "loss": 0.7231, "step": 172 }, { "epoch": 0.05852503382949932, "grad_norm": 0.18850313127040863, "learning_rate": 4.999144874935092e-06, "loss": 0.7017, "step": 173 }, { "epoch": 0.05886332882273342, "grad_norm": 0.19261717796325684, "learning_rate": 4.999121287766726e-06, "loss": 0.6849, "step": 174 }, { "epoch": 0.059201623815967525, "grad_norm": 0.1908152848482132, "learning_rate": 4.999097379760218e-06, "loss": 0.7284, "step": 175 }, { "epoch": 0.05953991880920163, "grad_norm": 0.19049489498138428, "learning_rate": 4.999073150918637e-06, "loss": 0.6793, "step": 176 }, { "epoch": 0.05987821380243572, "grad_norm": 0.18340197205543518, "learning_rate": 4.999048601245095e-06, "loss": 0.7046, "step": 177 }, { "epoch": 0.060216508795669824, "grad_norm": 0.1974797546863556, "learning_rate": 4.999023730742742e-06, "loss": 0.7157, "step": 178 }, { "epoch": 0.060554803788903926, "grad_norm": 0.19154323637485504, "learning_rate": 4.998998539414772e-06, "loss": 0.7011, "step": 179 }, { "epoch": 0.06089309878213803, "grad_norm": 0.2043318897485733, "learning_rate": 4.998973027264419e-06, "loss": 0.7123, "step": 180 }, { "epoch": 0.06123139377537212, "grad_norm": 0.19144178926944733, "learning_rate": 4.998947194294958e-06, "loss": 0.7065, "step": 181 }, { "epoch": 0.061569688768606225, "grad_norm": 0.18897554278373718, "learning_rate": 4.998921040509704e-06, "loss": 0.6802, "step": 182 }, { "epoch": 0.06190798376184033, "grad_norm": 0.19585008919239044, "learning_rate": 4.9988945659120175e-06, "loss": 0.6886, "step": 183 }, { "epoch": 0.06224627875507442, "grad_norm": 0.19337284564971924, "learning_rate": 4.998867770505295e-06, "loss": 0.7344, "step": 184 }, { "epoch": 0.06258457374830853, "grad_norm": 0.18725362420082092, "learning_rate": 4.998840654292978e-06, "loss": 0.7053, "step": 185 }, { "epoch": 0.06292286874154263, "grad_norm": 0.19687679409980774, "learning_rate": 4.998813217278546e-06, "loss": 0.6677, "step": 186 }, { "epoch": 0.06326116373477672, "grad_norm": 0.19855792820453644, "learning_rate": 4.9987854594655236e-06, "loss": 0.7214, "step": 187 }, { "epoch": 0.06359945872801083, "grad_norm": 0.20533519983291626, "learning_rate": 4.998757380857473e-06, "loss": 0.7445, "step": 188 }, { "epoch": 0.06393775372124492, "grad_norm": 0.20073583722114563, "learning_rate": 4.9987289814579984e-06, "loss": 0.686, "step": 189 }, { "epoch": 0.06427604871447902, "grad_norm": 0.19877952337265015, "learning_rate": 4.9987002612707466e-06, "loss": 0.6955, "step": 190 }, { "epoch": 0.06461434370771313, "grad_norm": 0.1983058601617813, "learning_rate": 4.998671220299405e-06, "loss": 0.7196, "step": 191 }, { "epoch": 0.06495263870094722, "grad_norm": 0.19413228332996368, "learning_rate": 4.9986418585477016e-06, "loss": 0.6662, "step": 192 }, { "epoch": 0.06529093369418133, "grad_norm": 0.19949573278427124, "learning_rate": 4.998612176019405e-06, "loss": 0.6847, "step": 193 }, { "epoch": 0.06562922868741543, "grad_norm": 0.19834288954734802, "learning_rate": 4.998582172718327e-06, "loss": 0.6724, "step": 194 }, { "epoch": 0.06596752368064952, "grad_norm": 0.19697165489196777, "learning_rate": 4.9985518486483196e-06, "loss": 0.6989, "step": 195 }, { "epoch": 0.06630581867388363, "grad_norm": 0.19365616142749786, "learning_rate": 4.998521203813275e-06, "loss": 0.7187, "step": 196 }, { "epoch": 0.06664411366711773, "grad_norm": 0.2025780975818634, "learning_rate": 4.998490238217126e-06, "loss": 0.7115, "step": 197 }, { "epoch": 0.06698240866035182, "grad_norm": 0.18400835990905762, "learning_rate": 4.998458951863851e-06, "loss": 0.6836, "step": 198 }, { "epoch": 0.06732070365358593, "grad_norm": 0.21085551381111145, "learning_rate": 4.998427344757465e-06, "loss": 0.6975, "step": 199 }, { "epoch": 0.06765899864682003, "grad_norm": 0.20779626071453094, "learning_rate": 4.998395416902026e-06, "loss": 0.7019, "step": 200 }, { "epoch": 0.06799729364005412, "grad_norm": 0.19607093930244446, "learning_rate": 4.998363168301633e-06, "loss": 0.6979, "step": 201 }, { "epoch": 0.06833558863328823, "grad_norm": 0.1943480521440506, "learning_rate": 4.9983305989604256e-06, "loss": 0.7063, "step": 202 }, { "epoch": 0.06867388362652233, "grad_norm": 0.1963743269443512, "learning_rate": 4.998297708882584e-06, "loss": 0.6998, "step": 203 }, { "epoch": 0.06901217861975643, "grad_norm": 0.2052895426750183, "learning_rate": 4.998264498072334e-06, "loss": 0.7093, "step": 204 }, { "epoch": 0.06935047361299053, "grad_norm": 0.19581186771392822, "learning_rate": 4.998230966533937e-06, "loss": 0.6761, "step": 205 }, { "epoch": 0.06968876860622462, "grad_norm": 0.20708315074443817, "learning_rate": 4.998197114271697e-06, "loss": 0.6819, "step": 206 }, { "epoch": 0.07002706359945873, "grad_norm": 0.20113201439380646, "learning_rate": 4.998162941289961e-06, "loss": 0.7028, "step": 207 }, { "epoch": 0.07036535859269283, "grad_norm": 0.19930900633335114, "learning_rate": 4.998128447593117e-06, "loss": 0.6805, "step": 208 }, { "epoch": 0.07070365358592692, "grad_norm": 0.19791027903556824, "learning_rate": 4.998093633185592e-06, "loss": 0.7051, "step": 209 }, { "epoch": 0.07104194857916103, "grad_norm": 0.20085057616233826, "learning_rate": 4.998058498071856e-06, "loss": 0.706, "step": 210 }, { "epoch": 0.07138024357239513, "grad_norm": 0.19891057908535004, "learning_rate": 4.9980230422564195e-06, "loss": 0.6875, "step": 211 }, { "epoch": 0.07171853856562922, "grad_norm": 0.19535799324512482, "learning_rate": 4.997987265743834e-06, "loss": 0.6855, "step": 212 }, { "epoch": 0.07205683355886333, "grad_norm": 0.19949902594089508, "learning_rate": 4.997951168538693e-06, "loss": 0.6651, "step": 213 }, { "epoch": 0.07239512855209743, "grad_norm": 0.20261453092098236, "learning_rate": 4.99791475064563e-06, "loss": 0.6809, "step": 214 }, { "epoch": 0.07273342354533154, "grad_norm": 0.19712717831134796, "learning_rate": 4.997878012069322e-06, "loss": 0.6899, "step": 215 }, { "epoch": 0.07307171853856563, "grad_norm": 0.2066112756729126, "learning_rate": 4.997840952814484e-06, "loss": 0.7037, "step": 216 }, { "epoch": 0.07341001353179973, "grad_norm": 0.19402629137039185, "learning_rate": 4.997803572885874e-06, "loss": 0.68, "step": 217 }, { "epoch": 0.07374830852503383, "grad_norm": 0.19789829850196838, "learning_rate": 4.997765872288292e-06, "loss": 0.7236, "step": 218 }, { "epoch": 0.07408660351826793, "grad_norm": 0.20883303880691528, "learning_rate": 4.997727851026576e-06, "loss": 0.6912, "step": 219 }, { "epoch": 0.07442489851150202, "grad_norm": 0.2045092135667801, "learning_rate": 4.997689509105608e-06, "loss": 0.6858, "step": 220 }, { "epoch": 0.07476319350473613, "grad_norm": 0.20657972991466522, "learning_rate": 4.997650846530311e-06, "loss": 0.7066, "step": 221 }, { "epoch": 0.07510148849797023, "grad_norm": 0.2036881297826767, "learning_rate": 4.997611863305647e-06, "loss": 0.6891, "step": 222 }, { "epoch": 0.07543978349120432, "grad_norm": 0.20515350997447968, "learning_rate": 4.997572559436623e-06, "loss": 0.7125, "step": 223 }, { "epoch": 0.07577807848443843, "grad_norm": 0.19229350984096527, "learning_rate": 4.997532934928283e-06, "loss": 0.6614, "step": 224 }, { "epoch": 0.07611637347767253, "grad_norm": 0.20285889506340027, "learning_rate": 4.997492989785714e-06, "loss": 0.7085, "step": 225 }, { "epoch": 0.07645466847090664, "grad_norm": 0.19925077259540558, "learning_rate": 4.9974527240140455e-06, "loss": 0.7113, "step": 226 }, { "epoch": 0.07679296346414073, "grad_norm": 0.1916913241147995, "learning_rate": 4.997412137618446e-06, "loss": 0.6637, "step": 227 }, { "epoch": 0.07713125845737483, "grad_norm": 0.20506256818771362, "learning_rate": 4.997371230604126e-06, "loss": 0.6923, "step": 228 }, { "epoch": 0.07746955345060894, "grad_norm": 0.20287774503231049, "learning_rate": 4.997330002976337e-06, "loss": 0.6908, "step": 229 }, { "epoch": 0.07780784844384303, "grad_norm": 0.20252133905887604, "learning_rate": 4.997288454740372e-06, "loss": 0.6796, "step": 230 }, { "epoch": 0.07814614343707713, "grad_norm": 0.1976034790277481, "learning_rate": 4.997246585901565e-06, "loss": 0.7089, "step": 231 }, { "epoch": 0.07848443843031123, "grad_norm": 0.2120821177959442, "learning_rate": 4.997204396465292e-06, "loss": 0.6763, "step": 232 }, { "epoch": 0.07882273342354533, "grad_norm": 0.20599138736724854, "learning_rate": 4.9971618864369676e-06, "loss": 0.6834, "step": 233 }, { "epoch": 0.07916102841677942, "grad_norm": 0.20934651792049408, "learning_rate": 4.99711905582205e-06, "loss": 0.6665, "step": 234 }, { "epoch": 0.07949932341001353, "grad_norm": 0.204156294465065, "learning_rate": 4.9970759046260385e-06, "loss": 0.6802, "step": 235 }, { "epoch": 0.07983761840324763, "grad_norm": 0.20378589630126953, "learning_rate": 4.997032432854472e-06, "loss": 0.7043, "step": 236 }, { "epoch": 0.08017591339648174, "grad_norm": 0.20697647333145142, "learning_rate": 4.996988640512931e-06, "loss": 0.6939, "step": 237 }, { "epoch": 0.08051420838971583, "grad_norm": 0.2135564535856247, "learning_rate": 4.9969445276070395e-06, "loss": 0.7218, "step": 238 }, { "epoch": 0.08085250338294993, "grad_norm": 0.2033076286315918, "learning_rate": 4.996900094142459e-06, "loss": 0.6678, "step": 239 }, { "epoch": 0.08119079837618404, "grad_norm": 0.20832550525665283, "learning_rate": 4.996855340124894e-06, "loss": 0.6908, "step": 240 }, { "epoch": 0.08152909336941813, "grad_norm": 0.20612551271915436, "learning_rate": 4.996810265560091e-06, "loss": 0.6838, "step": 241 }, { "epoch": 0.08186738836265223, "grad_norm": 0.20617735385894775, "learning_rate": 4.996764870453836e-06, "loss": 0.6855, "step": 242 }, { "epoch": 0.08220568335588634, "grad_norm": 0.21661782264709473, "learning_rate": 4.996719154811956e-06, "loss": 0.6565, "step": 243 }, { "epoch": 0.08254397834912043, "grad_norm": 0.20216654241085052, "learning_rate": 4.996673118640323e-06, "loss": 0.7049, "step": 244 }, { "epoch": 0.08288227334235454, "grad_norm": 0.21835078299045563, "learning_rate": 4.996626761944844e-06, "loss": 0.6743, "step": 245 }, { "epoch": 0.08322056833558863, "grad_norm": 0.21051070094108582, "learning_rate": 4.9965800847314705e-06, "loss": 0.678, "step": 246 }, { "epoch": 0.08355886332882273, "grad_norm": 0.2078743427991867, "learning_rate": 4.996533087006197e-06, "loss": 0.6791, "step": 247 }, { "epoch": 0.08389715832205684, "grad_norm": 0.20969849824905396, "learning_rate": 4.996485768775056e-06, "loss": 0.6473, "step": 248 }, { "epoch": 0.08423545331529093, "grad_norm": 0.20665079355239868, "learning_rate": 4.996438130044121e-06, "loss": 0.673, "step": 249 }, { "epoch": 0.08457374830852503, "grad_norm": 0.2039971500635147, "learning_rate": 4.99639017081951e-06, "loss": 0.6727, "step": 250 }, { "epoch": 0.08491204330175914, "grad_norm": 0.21164476871490479, "learning_rate": 4.996341891107379e-06, "loss": 0.6526, "step": 251 }, { "epoch": 0.08525033829499323, "grad_norm": 0.2099243700504303, "learning_rate": 4.9962932909139265e-06, "loss": 0.6831, "step": 252 }, { "epoch": 0.08558863328822733, "grad_norm": 0.21000251173973083, "learning_rate": 4.99624437024539e-06, "loss": 0.6917, "step": 253 }, { "epoch": 0.08592692828146144, "grad_norm": 0.2100362479686737, "learning_rate": 4.996195129108053e-06, "loss": 0.698, "step": 254 }, { "epoch": 0.08626522327469553, "grad_norm": 0.20553170144557953, "learning_rate": 4.996145567508235e-06, "loss": 0.6822, "step": 255 }, { "epoch": 0.08660351826792964, "grad_norm": 0.20825114846229553, "learning_rate": 4.996095685452299e-06, "loss": 0.6884, "step": 256 }, { "epoch": 0.08694181326116374, "grad_norm": 0.19606679677963257, "learning_rate": 4.99604548294665e-06, "loss": 0.6707, "step": 257 }, { "epoch": 0.08728010825439783, "grad_norm": 0.2105158567428589, "learning_rate": 4.995994959997731e-06, "loss": 0.7002, "step": 258 }, { "epoch": 0.08761840324763194, "grad_norm": 0.21269915997982025, "learning_rate": 4.995944116612031e-06, "loss": 0.6952, "step": 259 }, { "epoch": 0.08795669824086604, "grad_norm": 0.21205005049705505, "learning_rate": 4.995892952796074e-06, "loss": 0.6893, "step": 260 }, { "epoch": 0.08829499323410013, "grad_norm": 0.22121933102607727, "learning_rate": 4.995841468556432e-06, "loss": 0.6896, "step": 261 }, { "epoch": 0.08863328822733424, "grad_norm": 0.20240426063537598, "learning_rate": 4.995789663899711e-06, "loss": 0.6832, "step": 262 }, { "epoch": 0.08897158322056833, "grad_norm": 0.20699365437030792, "learning_rate": 4.995737538832564e-06, "loss": 0.6788, "step": 263 }, { "epoch": 0.08930987821380243, "grad_norm": 0.2231341451406479, "learning_rate": 4.995685093361682e-06, "loss": 0.6799, "step": 264 }, { "epoch": 0.08964817320703654, "grad_norm": 0.21361176669597626, "learning_rate": 4.9956323274937975e-06, "loss": 0.7036, "step": 265 }, { "epoch": 0.08998646820027063, "grad_norm": 0.21375077962875366, "learning_rate": 4.9955792412356865e-06, "loss": 0.673, "step": 266 }, { "epoch": 0.09032476319350474, "grad_norm": 0.2075895369052887, "learning_rate": 4.9955258345941625e-06, "loss": 0.691, "step": 267 }, { "epoch": 0.09066305818673884, "grad_norm": 0.20629572868347168, "learning_rate": 4.995472107576083e-06, "loss": 0.7092, "step": 268 }, { "epoch": 0.09100135317997293, "grad_norm": 0.1996820867061615, "learning_rate": 4.995418060188344e-06, "loss": 0.6688, "step": 269 }, { "epoch": 0.09133964817320704, "grad_norm": 0.20391984283924103, "learning_rate": 4.995363692437884e-06, "loss": 0.6982, "step": 270 }, { "epoch": 0.09167794316644114, "grad_norm": 0.21628272533416748, "learning_rate": 4.995309004331686e-06, "loss": 0.662, "step": 271 }, { "epoch": 0.09201623815967523, "grad_norm": 0.20856939256191254, "learning_rate": 4.9952539958767675e-06, "loss": 0.6523, "step": 272 }, { "epoch": 0.09235453315290934, "grad_norm": 0.21313869953155518, "learning_rate": 4.995198667080191e-06, "loss": 0.6743, "step": 273 }, { "epoch": 0.09269282814614344, "grad_norm": 0.2262369841337204, "learning_rate": 4.995143017949061e-06, "loss": 0.6894, "step": 274 }, { "epoch": 0.09303112313937753, "grad_norm": 0.21210289001464844, "learning_rate": 4.995087048490521e-06, "loss": 0.6546, "step": 275 }, { "epoch": 0.09336941813261164, "grad_norm": 0.2152973711490631, "learning_rate": 4.995030758711756e-06, "loss": 0.7006, "step": 276 }, { "epoch": 0.09370771312584573, "grad_norm": 0.21384137868881226, "learning_rate": 4.994974148619994e-06, "loss": 0.6973, "step": 277 }, { "epoch": 0.09404600811907984, "grad_norm": 0.22062373161315918, "learning_rate": 4.9949172182225e-06, "loss": 0.6732, "step": 278 }, { "epoch": 0.09438430311231394, "grad_norm": 0.21281228959560394, "learning_rate": 4.9948599675265855e-06, "loss": 0.6917, "step": 279 }, { "epoch": 0.09472259810554803, "grad_norm": 0.2118413895368576, "learning_rate": 4.994802396539599e-06, "loss": 0.6764, "step": 280 }, { "epoch": 0.09506089309878214, "grad_norm": 0.20453035831451416, "learning_rate": 4.99474450526893e-06, "loss": 0.6785, "step": 281 }, { "epoch": 0.09539918809201624, "grad_norm": 0.20882472395896912, "learning_rate": 4.994686293722014e-06, "loss": 0.6935, "step": 282 }, { "epoch": 0.09573748308525033, "grad_norm": 0.22508378326892853, "learning_rate": 4.9946277619063225e-06, "loss": 0.6787, "step": 283 }, { "epoch": 0.09607577807848444, "grad_norm": 0.2192172408103943, "learning_rate": 4.994568909829369e-06, "loss": 0.6843, "step": 284 }, { "epoch": 0.09641407307171854, "grad_norm": 0.2155693918466568, "learning_rate": 4.9945097374987096e-06, "loss": 0.6758, "step": 285 }, { "epoch": 0.09675236806495263, "grad_norm": 0.21563497185707092, "learning_rate": 4.994450244921941e-06, "loss": 0.6792, "step": 286 }, { "epoch": 0.09709066305818674, "grad_norm": 0.2330203652381897, "learning_rate": 4.994390432106702e-06, "loss": 0.6961, "step": 287 }, { "epoch": 0.09742895805142084, "grad_norm": 0.21809963881969452, "learning_rate": 4.994330299060668e-06, "loss": 0.6747, "step": 288 }, { "epoch": 0.09776725304465494, "grad_norm": 0.21034228801727295, "learning_rate": 4.994269845791563e-06, "loss": 0.7135, "step": 289 }, { "epoch": 0.09810554803788904, "grad_norm": 0.21551263332366943, "learning_rate": 4.9942090723071454e-06, "loss": 0.6612, "step": 290 }, { "epoch": 0.09844384303112313, "grad_norm": 0.22050173580646515, "learning_rate": 4.994147978615219e-06, "loss": 0.6871, "step": 291 }, { "epoch": 0.09878213802435724, "grad_norm": 0.21225669980049133, "learning_rate": 4.9940865647236255e-06, "loss": 0.6901, "step": 292 }, { "epoch": 0.09912043301759134, "grad_norm": 0.20882059633731842, "learning_rate": 4.99402483064025e-06, "loss": 0.6834, "step": 293 }, { "epoch": 0.09945872801082543, "grad_norm": 0.22327478229999542, "learning_rate": 4.993962776373018e-06, "loss": 0.6978, "step": 294 }, { "epoch": 0.09979702300405954, "grad_norm": 0.21540549397468567, "learning_rate": 4.9939004019298955e-06, "loss": 0.6869, "step": 295 }, { "epoch": 0.10013531799729364, "grad_norm": 0.2194754183292389, "learning_rate": 4.99383770731889e-06, "loss": 0.6952, "step": 296 }, { "epoch": 0.10047361299052775, "grad_norm": 0.21692237257957458, "learning_rate": 4.993774692548052e-06, "loss": 0.6973, "step": 297 }, { "epoch": 0.10081190798376184, "grad_norm": 0.21536946296691895, "learning_rate": 4.993711357625469e-06, "loss": 0.6721, "step": 298 }, { "epoch": 0.10115020297699594, "grad_norm": 0.213889017701149, "learning_rate": 4.993647702559274e-06, "loss": 0.7035, "step": 299 }, { "epoch": 0.10148849797023005, "grad_norm": 0.22723858058452606, "learning_rate": 4.993583727357638e-06, "loss": 0.677, "step": 300 }, { "epoch": 0.10182679296346414, "grad_norm": 0.21247157454490662, "learning_rate": 4.993519432028774e-06, "loss": 0.6962, "step": 301 }, { "epoch": 0.10216508795669824, "grad_norm": 0.21780304610729218, "learning_rate": 4.993454816580937e-06, "loss": 0.7032, "step": 302 }, { "epoch": 0.10250338294993235, "grad_norm": 0.2129039615392685, "learning_rate": 4.993389881022421e-06, "loss": 0.6654, "step": 303 }, { "epoch": 0.10284167794316644, "grad_norm": 0.21771487593650818, "learning_rate": 4.9933246253615645e-06, "loss": 0.6745, "step": 304 }, { "epoch": 0.10317997293640054, "grad_norm": 0.21839098632335663, "learning_rate": 4.993259049606743e-06, "loss": 0.6797, "step": 305 }, { "epoch": 0.10351826792963464, "grad_norm": 0.23275397717952728, "learning_rate": 4.9931931537663776e-06, "loss": 0.6763, "step": 306 }, { "epoch": 0.10385656292286874, "grad_norm": 0.20738539099693298, "learning_rate": 4.9931269378489255e-06, "loss": 0.6942, "step": 307 }, { "epoch": 0.10419485791610285, "grad_norm": 0.22642409801483154, "learning_rate": 4.993060401862888e-06, "loss": 0.7125, "step": 308 }, { "epoch": 0.10453315290933694, "grad_norm": 0.21417498588562012, "learning_rate": 4.992993545816809e-06, "loss": 0.6696, "step": 309 }, { "epoch": 0.10487144790257104, "grad_norm": 0.2231566309928894, "learning_rate": 4.9929263697192685e-06, "loss": 0.6865, "step": 310 }, { "epoch": 0.10520974289580515, "grad_norm": 0.23336437344551086, "learning_rate": 4.992858873578893e-06, "loss": 0.6897, "step": 311 }, { "epoch": 0.10554803788903924, "grad_norm": 0.2189081758260727, "learning_rate": 4.992791057404346e-06, "loss": 0.6653, "step": 312 }, { "epoch": 0.10588633288227334, "grad_norm": 0.21535345911979675, "learning_rate": 4.992722921204336e-06, "loss": 0.6718, "step": 313 }, { "epoch": 0.10622462787550745, "grad_norm": 0.2202640026807785, "learning_rate": 4.992654464987608e-06, "loss": 0.7185, "step": 314 }, { "epoch": 0.10656292286874154, "grad_norm": 0.23366160690784454, "learning_rate": 4.992585688762951e-06, "loss": 0.7022, "step": 315 }, { "epoch": 0.10690121786197564, "grad_norm": 0.21393480896949768, "learning_rate": 4.992516592539196e-06, "loss": 0.7088, "step": 316 }, { "epoch": 0.10723951285520975, "grad_norm": 0.23210909962654114, "learning_rate": 4.992447176325212e-06, "loss": 0.6905, "step": 317 }, { "epoch": 0.10757780784844384, "grad_norm": 0.20842531323432922, "learning_rate": 4.992377440129911e-06, "loss": 0.6709, "step": 318 }, { "epoch": 0.10791610284167795, "grad_norm": 0.21786710619926453, "learning_rate": 4.992307383962246e-06, "loss": 0.6912, "step": 319 }, { "epoch": 0.10825439783491204, "grad_norm": 0.21988864243030548, "learning_rate": 4.992237007831211e-06, "loss": 0.6735, "step": 320 }, { "epoch": 0.10859269282814614, "grad_norm": 0.22171525657176971, "learning_rate": 4.992166311745841e-06, "loss": 0.6923, "step": 321 }, { "epoch": 0.10893098782138025, "grad_norm": 0.22636902332305908, "learning_rate": 4.992095295715211e-06, "loss": 0.6709, "step": 322 }, { "epoch": 0.10926928281461434, "grad_norm": 0.2151307761669159, "learning_rate": 4.992023959748439e-06, "loss": 0.6878, "step": 323 }, { "epoch": 0.10960757780784844, "grad_norm": 0.22044600546360016, "learning_rate": 4.991952303854683e-06, "loss": 0.7115, "step": 324 }, { "epoch": 0.10994587280108255, "grad_norm": 0.21665897965431213, "learning_rate": 4.991880328043141e-06, "loss": 0.704, "step": 325 }, { "epoch": 0.11028416779431664, "grad_norm": 0.22031225264072418, "learning_rate": 4.991808032323056e-06, "loss": 0.6744, "step": 326 }, { "epoch": 0.11062246278755074, "grad_norm": 0.22540146112442017, "learning_rate": 4.991735416703707e-06, "loss": 0.6606, "step": 327 }, { "epoch": 0.11096075778078485, "grad_norm": 0.2185710072517395, "learning_rate": 4.9916624811944175e-06, "loss": 0.6654, "step": 328 }, { "epoch": 0.11129905277401894, "grad_norm": 0.2213398814201355, "learning_rate": 4.991589225804551e-06, "loss": 0.6684, "step": 329 }, { "epoch": 0.11163734776725305, "grad_norm": 0.22903922200202942, "learning_rate": 4.99151565054351e-06, "loss": 0.6736, "step": 330 }, { "epoch": 0.11197564276048715, "grad_norm": 0.21496374905109406, "learning_rate": 4.9914417554207434e-06, "loss": 0.6889, "step": 331 }, { "epoch": 0.11231393775372124, "grad_norm": 0.216947540640831, "learning_rate": 4.991367540445736e-06, "loss": 0.6847, "step": 332 }, { "epoch": 0.11265223274695535, "grad_norm": 0.2170547991991043, "learning_rate": 4.991293005628015e-06, "loss": 0.6734, "step": 333 }, { "epoch": 0.11299052774018944, "grad_norm": 0.22944290935993195, "learning_rate": 4.9912181509771505e-06, "loss": 0.6554, "step": 334 }, { "epoch": 0.11332882273342354, "grad_norm": 0.22795914113521576, "learning_rate": 4.991142976502753e-06, "loss": 0.6582, "step": 335 }, { "epoch": 0.11366711772665765, "grad_norm": 0.22316622734069824, "learning_rate": 4.991067482214471e-06, "loss": 0.6872, "step": 336 }, { "epoch": 0.11400541271989174, "grad_norm": 0.2117655873298645, "learning_rate": 4.990991668121998e-06, "loss": 0.6567, "step": 337 }, { "epoch": 0.11434370771312584, "grad_norm": 0.22236984968185425, "learning_rate": 4.9909155342350675e-06, "loss": 0.6716, "step": 338 }, { "epoch": 0.11468200270635995, "grad_norm": 0.21970327198505402, "learning_rate": 4.990839080563452e-06, "loss": 0.6748, "step": 339 }, { "epoch": 0.11502029769959404, "grad_norm": 0.23046572506427765, "learning_rate": 4.990762307116969e-06, "loss": 0.6816, "step": 340 }, { "epoch": 0.11535859269282815, "grad_norm": 0.23384122550487518, "learning_rate": 4.990685213905472e-06, "loss": 0.6724, "step": 341 }, { "epoch": 0.11569688768606225, "grad_norm": 0.20976535975933075, "learning_rate": 4.99060780093886e-06, "loss": 0.6582, "step": 342 }, { "epoch": 0.11603518267929634, "grad_norm": 0.21732792258262634, "learning_rate": 4.990530068227072e-06, "loss": 0.6678, "step": 343 }, { "epoch": 0.11637347767253045, "grad_norm": 0.21509791910648346, "learning_rate": 4.990452015780085e-06, "loss": 0.715, "step": 344 }, { "epoch": 0.11671177266576455, "grad_norm": 0.21847152709960938, "learning_rate": 4.9903736436079205e-06, "loss": 0.6756, "step": 345 }, { "epoch": 0.11705006765899864, "grad_norm": 0.21810345351696014, "learning_rate": 4.9902949517206415e-06, "loss": 0.6664, "step": 346 }, { "epoch": 0.11738836265223275, "grad_norm": 0.21546834707260132, "learning_rate": 4.990215940128348e-06, "loss": 0.6773, "step": 347 }, { "epoch": 0.11772665764546685, "grad_norm": 0.2323540896177292, "learning_rate": 4.990136608841185e-06, "loss": 0.685, "step": 348 }, { "epoch": 0.11806495263870095, "grad_norm": 0.22190017998218536, "learning_rate": 4.990056957869336e-06, "loss": 0.6909, "step": 349 }, { "epoch": 0.11840324763193505, "grad_norm": 0.2267863005399704, "learning_rate": 4.989976987223027e-06, "loss": 0.6703, "step": 350 }, { "epoch": 0.11874154262516914, "grad_norm": 0.22832167148590088, "learning_rate": 4.9898966969125265e-06, "loss": 0.6806, "step": 351 }, { "epoch": 0.11907983761840325, "grad_norm": 0.24483756721019745, "learning_rate": 4.98981608694814e-06, "loss": 0.6898, "step": 352 }, { "epoch": 0.11941813261163735, "grad_norm": 0.220206618309021, "learning_rate": 4.989735157340218e-06, "loss": 0.6824, "step": 353 }, { "epoch": 0.11975642760487144, "grad_norm": 0.2350161224603653, "learning_rate": 4.989653908099148e-06, "loss": 0.6722, "step": 354 }, { "epoch": 0.12009472259810555, "grad_norm": 0.23080413043498993, "learning_rate": 4.989572339235361e-06, "loss": 0.653, "step": 355 }, { "epoch": 0.12043301759133965, "grad_norm": 0.23627646267414093, "learning_rate": 4.989490450759331e-06, "loss": 0.6642, "step": 356 }, { "epoch": 0.12077131258457374, "grad_norm": 0.22271862626075745, "learning_rate": 4.98940824268157e-06, "loss": 0.6768, "step": 357 }, { "epoch": 0.12110960757780785, "grad_norm": 0.2121579349040985, "learning_rate": 4.989325715012632e-06, "loss": 0.6807, "step": 358 }, { "epoch": 0.12144790257104195, "grad_norm": 0.23613037168979645, "learning_rate": 4.989242867763111e-06, "loss": 0.7094, "step": 359 }, { "epoch": 0.12178619756427606, "grad_norm": 0.21646465361118317, "learning_rate": 4.989159700943643e-06, "loss": 0.6835, "step": 360 }, { "epoch": 0.12212449255751015, "grad_norm": 0.2305869609117508, "learning_rate": 4.989076214564906e-06, "loss": 0.6884, "step": 361 }, { "epoch": 0.12246278755074425, "grad_norm": 0.2341344654560089, "learning_rate": 4.988992408637618e-06, "loss": 0.6497, "step": 362 }, { "epoch": 0.12280108254397835, "grad_norm": 0.22924962639808655, "learning_rate": 4.9889082831725375e-06, "loss": 0.6816, "step": 363 }, { "epoch": 0.12313937753721245, "grad_norm": 0.2256554514169693, "learning_rate": 4.988823838180464e-06, "loss": 0.6962, "step": 364 }, { "epoch": 0.12347767253044654, "grad_norm": 0.2216321974992752, "learning_rate": 4.98873907367224e-06, "loss": 0.6787, "step": 365 }, { "epoch": 0.12381596752368065, "grad_norm": 0.22423723340034485, "learning_rate": 4.988653989658746e-06, "loss": 0.6891, "step": 366 }, { "epoch": 0.12415426251691475, "grad_norm": 0.23579758405685425, "learning_rate": 4.988568586150906e-06, "loss": 0.7101, "step": 367 }, { "epoch": 0.12449255751014884, "grad_norm": 0.24115969240665436, "learning_rate": 4.988482863159684e-06, "loss": 0.6762, "step": 368 }, { "epoch": 0.12483085250338295, "grad_norm": 0.2256184071302414, "learning_rate": 4.988396820696086e-06, "loss": 0.71, "step": 369 }, { "epoch": 0.12516914749661706, "grad_norm": 0.23354458808898926, "learning_rate": 4.988310458771156e-06, "loss": 0.6777, "step": 370 }, { "epoch": 0.12550744248985116, "grad_norm": 0.23490329086780548, "learning_rate": 4.988223777395984e-06, "loss": 0.6833, "step": 371 }, { "epoch": 0.12584573748308525, "grad_norm": 0.2223309725522995, "learning_rate": 4.988136776581697e-06, "loss": 0.6554, "step": 372 }, { "epoch": 0.12618403247631935, "grad_norm": 0.22902588546276093, "learning_rate": 4.988049456339462e-06, "loss": 0.6585, "step": 373 }, { "epoch": 0.12652232746955344, "grad_norm": 0.22965329885482788, "learning_rate": 4.987961816680493e-06, "loss": 0.6896, "step": 374 }, { "epoch": 0.12686062246278756, "grad_norm": 0.235035702586174, "learning_rate": 4.987873857616038e-06, "loss": 0.675, "step": 375 }, { "epoch": 0.12719891745602166, "grad_norm": 0.23586681485176086, "learning_rate": 4.987785579157392e-06, "loss": 0.6887, "step": 376 }, { "epoch": 0.12753721244925575, "grad_norm": 0.22219079732894897, "learning_rate": 4.987696981315885e-06, "loss": 0.6761, "step": 377 }, { "epoch": 0.12787550744248985, "grad_norm": 0.22536414861679077, "learning_rate": 4.987608064102895e-06, "loss": 0.6639, "step": 378 }, { "epoch": 0.12821380243572394, "grad_norm": 0.2327205091714859, "learning_rate": 4.987518827529833e-06, "loss": 0.6817, "step": 379 }, { "epoch": 0.12855209742895804, "grad_norm": 0.22478225827217102, "learning_rate": 4.987429271608159e-06, "loss": 0.6531, "step": 380 }, { "epoch": 0.12889039242219216, "grad_norm": 0.23206846415996552, "learning_rate": 4.987339396349369e-06, "loss": 0.6877, "step": 381 }, { "epoch": 0.12922868741542626, "grad_norm": 0.23701916635036469, "learning_rate": 4.987249201765001e-06, "loss": 0.6873, "step": 382 }, { "epoch": 0.12956698240866035, "grad_norm": 0.22712261974811554, "learning_rate": 4.9871586878666325e-06, "loss": 0.6491, "step": 383 }, { "epoch": 0.12990527740189445, "grad_norm": 0.23785048723220825, "learning_rate": 4.987067854665887e-06, "loss": 0.6892, "step": 384 }, { "epoch": 0.13024357239512854, "grad_norm": 0.22262883186340332, "learning_rate": 4.986976702174423e-06, "loss": 0.6904, "step": 385 }, { "epoch": 0.13058186738836267, "grad_norm": 0.22985069453716278, "learning_rate": 4.986885230403944e-06, "loss": 0.6414, "step": 386 }, { "epoch": 0.13092016238159676, "grad_norm": 0.21784214675426483, "learning_rate": 4.986793439366193e-06, "loss": 0.6617, "step": 387 }, { "epoch": 0.13125845737483086, "grad_norm": 0.2375217229127884, "learning_rate": 4.986701329072954e-06, "loss": 0.6718, "step": 388 }, { "epoch": 0.13159675236806495, "grad_norm": 0.2305949479341507, "learning_rate": 4.9866088995360524e-06, "loss": 0.6607, "step": 389 }, { "epoch": 0.13193504736129905, "grad_norm": 0.2283347249031067, "learning_rate": 4.986516150767354e-06, "loss": 0.6797, "step": 390 }, { "epoch": 0.13227334235453314, "grad_norm": 0.23237332701683044, "learning_rate": 4.986423082778766e-06, "loss": 0.6691, "step": 391 }, { "epoch": 0.13261163734776726, "grad_norm": 0.23644429445266724, "learning_rate": 4.986329695582237e-06, "loss": 0.6836, "step": 392 }, { "epoch": 0.13294993234100136, "grad_norm": 0.22994762659072876, "learning_rate": 4.986235989189756e-06, "loss": 0.6724, "step": 393 }, { "epoch": 0.13328822733423545, "grad_norm": 0.23970770835876465, "learning_rate": 4.986141963613352e-06, "loss": 0.6727, "step": 394 }, { "epoch": 0.13362652232746955, "grad_norm": 0.2304246574640274, "learning_rate": 4.986047618865097e-06, "loss": 0.6784, "step": 395 }, { "epoch": 0.13396481732070364, "grad_norm": 0.23033125698566437, "learning_rate": 4.985952954957103e-06, "loss": 0.6693, "step": 396 }, { "epoch": 0.13430311231393777, "grad_norm": 0.24728672206401825, "learning_rate": 4.985857971901522e-06, "loss": 0.7006, "step": 397 }, { "epoch": 0.13464140730717186, "grad_norm": 0.23562437295913696, "learning_rate": 4.98576266971055e-06, "loss": 0.6456, "step": 398 }, { "epoch": 0.13497970230040596, "grad_norm": 0.23348332941532135, "learning_rate": 4.98566704839642e-06, "loss": 0.7073, "step": 399 }, { "epoch": 0.13531799729364005, "grad_norm": 0.23460273444652557, "learning_rate": 4.985571107971408e-06, "loss": 0.6774, "step": 400 }, { "epoch": 0.13565629228687415, "grad_norm": 0.23990432918071747, "learning_rate": 4.9854748484478324e-06, "loss": 0.675, "step": 401 }, { "epoch": 0.13599458728010824, "grad_norm": 0.24199119210243225, "learning_rate": 4.98537826983805e-06, "loss": 0.6657, "step": 402 }, { "epoch": 0.13633288227334237, "grad_norm": 0.24207346141338348, "learning_rate": 4.985281372154459e-06, "loss": 0.6583, "step": 403 }, { "epoch": 0.13667117726657646, "grad_norm": 0.24412518739700317, "learning_rate": 4.9851841554095e-06, "loss": 0.7038, "step": 404 }, { "epoch": 0.13700947225981056, "grad_norm": 0.2288157343864441, "learning_rate": 4.985086619615655e-06, "loss": 0.6766, "step": 405 }, { "epoch": 0.13734776725304465, "grad_norm": 0.23269274830818176, "learning_rate": 4.984988764785443e-06, "loss": 0.6947, "step": 406 }, { "epoch": 0.13768606224627875, "grad_norm": 0.23585785925388336, "learning_rate": 4.984890590931427e-06, "loss": 0.6566, "step": 407 }, { "epoch": 0.13802435723951287, "grad_norm": 0.2309969663619995, "learning_rate": 4.9847920980662134e-06, "loss": 0.6573, "step": 408 }, { "epoch": 0.13836265223274696, "grad_norm": 0.23268240690231323, "learning_rate": 4.984693286202444e-06, "loss": 0.6722, "step": 409 }, { "epoch": 0.13870094722598106, "grad_norm": 0.23013322055339813, "learning_rate": 4.984594155352805e-06, "loss": 0.6639, "step": 410 }, { "epoch": 0.13903924221921515, "grad_norm": 0.23997361958026886, "learning_rate": 4.984494705530023e-06, "loss": 0.6633, "step": 411 }, { "epoch": 0.13937753721244925, "grad_norm": 0.23696044087409973, "learning_rate": 4.984394936746865e-06, "loss": 0.672, "step": 412 }, { "epoch": 0.13971583220568334, "grad_norm": 0.2297801524400711, "learning_rate": 4.98429484901614e-06, "loss": 0.6808, "step": 413 }, { "epoch": 0.14005412719891747, "grad_norm": 0.23803795874118805, "learning_rate": 4.984194442350698e-06, "loss": 0.6413, "step": 414 }, { "epoch": 0.14039242219215156, "grad_norm": 0.2315259426832199, "learning_rate": 4.984093716763427e-06, "loss": 0.6792, "step": 415 }, { "epoch": 0.14073071718538566, "grad_norm": 0.2283433973789215, "learning_rate": 4.983992672267261e-06, "loss": 0.6614, "step": 416 }, { "epoch": 0.14106901217861975, "grad_norm": 0.23342324793338776, "learning_rate": 4.98389130887517e-06, "loss": 0.6929, "step": 417 }, { "epoch": 0.14140730717185385, "grad_norm": 0.23194460570812225, "learning_rate": 4.983789626600168e-06, "loss": 0.6685, "step": 418 }, { "epoch": 0.14174560216508797, "grad_norm": 0.23261994123458862, "learning_rate": 4.983687625455307e-06, "loss": 0.6587, "step": 419 }, { "epoch": 0.14208389715832206, "grad_norm": 0.2293771505355835, "learning_rate": 4.983585305453685e-06, "loss": 0.6794, "step": 420 }, { "epoch": 0.14242219215155616, "grad_norm": 0.2353665828704834, "learning_rate": 4.983482666608436e-06, "loss": 0.6542, "step": 421 }, { "epoch": 0.14276048714479025, "grad_norm": 0.23223666846752167, "learning_rate": 4.9833797089327375e-06, "loss": 0.68, "step": 422 }, { "epoch": 0.14309878213802435, "grad_norm": 0.23364655673503876, "learning_rate": 4.983276432439808e-06, "loss": 0.6553, "step": 423 }, { "epoch": 0.14343707713125844, "grad_norm": 0.23806330561637878, "learning_rate": 4.983172837142904e-06, "loss": 0.6657, "step": 424 }, { "epoch": 0.14377537212449257, "grad_norm": 0.24118171632289886, "learning_rate": 4.983068923055327e-06, "loss": 0.6511, "step": 425 }, { "epoch": 0.14411366711772666, "grad_norm": 0.24000559747219086, "learning_rate": 4.982964690190417e-06, "loss": 0.6915, "step": 426 }, { "epoch": 0.14445196211096076, "grad_norm": 0.21783405542373657, "learning_rate": 4.982860138561555e-06, "loss": 0.6357, "step": 427 }, { "epoch": 0.14479025710419485, "grad_norm": 0.23208469152450562, "learning_rate": 4.9827552681821645e-06, "loss": 0.6702, "step": 428 }, { "epoch": 0.14512855209742895, "grad_norm": 0.2403120994567871, "learning_rate": 4.982650079065707e-06, "loss": 0.6619, "step": 429 }, { "epoch": 0.14546684709066307, "grad_norm": 0.24065937101840973, "learning_rate": 4.982544571225688e-06, "loss": 0.6672, "step": 430 }, { "epoch": 0.14580514208389717, "grad_norm": 0.23187507688999176, "learning_rate": 4.982438744675653e-06, "loss": 0.6505, "step": 431 }, { "epoch": 0.14614343707713126, "grad_norm": 0.22386693954467773, "learning_rate": 4.982332599429187e-06, "loss": 0.6761, "step": 432 }, { "epoch": 0.14648173207036536, "grad_norm": 0.23126639425754547, "learning_rate": 4.982226135499917e-06, "loss": 0.6824, "step": 433 }, { "epoch": 0.14682002706359945, "grad_norm": 0.24290819466114044, "learning_rate": 4.9821193529015124e-06, "loss": 0.6612, "step": 434 }, { "epoch": 0.14715832205683355, "grad_norm": 0.24675530195236206, "learning_rate": 4.98201225164768e-06, "loss": 0.6502, "step": 435 }, { "epoch": 0.14749661705006767, "grad_norm": 0.2415398508310318, "learning_rate": 4.98190483175217e-06, "loss": 0.6635, "step": 436 }, { "epoch": 0.14783491204330176, "grad_norm": 0.23464731872081757, "learning_rate": 4.981797093228775e-06, "loss": 0.6662, "step": 437 }, { "epoch": 0.14817320703653586, "grad_norm": 0.23631632328033447, "learning_rate": 4.981689036091323e-06, "loss": 0.6669, "step": 438 }, { "epoch": 0.14851150202976995, "grad_norm": 0.22991792857646942, "learning_rate": 4.98158066035369e-06, "loss": 0.7033, "step": 439 }, { "epoch": 0.14884979702300405, "grad_norm": 0.23498861491680145, "learning_rate": 4.981471966029787e-06, "loss": 0.6785, "step": 440 }, { "epoch": 0.14918809201623817, "grad_norm": 0.238022118806839, "learning_rate": 4.981362953133569e-06, "loss": 0.6858, "step": 441 }, { "epoch": 0.14952638700947227, "grad_norm": 0.22725965082645416, "learning_rate": 4.981253621679029e-06, "loss": 0.6651, "step": 442 }, { "epoch": 0.14986468200270636, "grad_norm": 0.2447727769613266, "learning_rate": 4.981143971680208e-06, "loss": 0.663, "step": 443 }, { "epoch": 0.15020297699594046, "grad_norm": 0.24607551097869873, "learning_rate": 4.9810340031511785e-06, "loss": 0.6717, "step": 444 }, { "epoch": 0.15054127198917455, "grad_norm": 0.22809375822544098, "learning_rate": 4.98092371610606e-06, "loss": 0.6569, "step": 445 }, { "epoch": 0.15087956698240865, "grad_norm": 0.22392520308494568, "learning_rate": 4.98081311055901e-06, "loss": 0.645, "step": 446 }, { "epoch": 0.15121786197564277, "grad_norm": 0.23991553485393524, "learning_rate": 4.98070218652423e-06, "loss": 0.6891, "step": 447 }, { "epoch": 0.15155615696887687, "grad_norm": 0.24203583598136902, "learning_rate": 4.9805909440159585e-06, "loss": 0.6836, "step": 448 }, { "epoch": 0.15189445196211096, "grad_norm": 0.23865444958209991, "learning_rate": 4.9804793830484785e-06, "loss": 0.6895, "step": 449 }, { "epoch": 0.15223274695534506, "grad_norm": 0.23681670427322388, "learning_rate": 4.980367503636111e-06, "loss": 0.6936, "step": 450 }, { "epoch": 0.15257104194857915, "grad_norm": 0.24201667308807373, "learning_rate": 4.9802553057932205e-06, "loss": 0.6647, "step": 451 }, { "epoch": 0.15290933694181327, "grad_norm": 0.23797008395195007, "learning_rate": 4.980142789534209e-06, "loss": 0.6518, "step": 452 }, { "epoch": 0.15324763193504737, "grad_norm": 0.25405654311180115, "learning_rate": 4.980029954873524e-06, "loss": 0.6798, "step": 453 }, { "epoch": 0.15358592692828146, "grad_norm": 0.24635176360607147, "learning_rate": 4.9799168018256486e-06, "loss": 0.6755, "step": 454 }, { "epoch": 0.15392422192151556, "grad_norm": 0.2485082596540451, "learning_rate": 4.9798033304051115e-06, "loss": 0.6922, "step": 455 }, { "epoch": 0.15426251691474965, "grad_norm": 0.26109230518341064, "learning_rate": 4.979689540626479e-06, "loss": 0.681, "step": 456 }, { "epoch": 0.15460081190798375, "grad_norm": 0.23434917628765106, "learning_rate": 4.979575432504359e-06, "loss": 0.6705, "step": 457 }, { "epoch": 0.15493910690121787, "grad_norm": 0.24609756469726562, "learning_rate": 4.979461006053403e-06, "loss": 0.6757, "step": 458 }, { "epoch": 0.15527740189445197, "grad_norm": 0.24599488079547882, "learning_rate": 4.979346261288298e-06, "loss": 0.6653, "step": 459 }, { "epoch": 0.15561569688768606, "grad_norm": 0.24822676181793213, "learning_rate": 4.979231198223778e-06, "loss": 0.6723, "step": 460 }, { "epoch": 0.15595399188092016, "grad_norm": 0.24261929094791412, "learning_rate": 4.979115816874613e-06, "loss": 0.6642, "step": 461 }, { "epoch": 0.15629228687415425, "grad_norm": 0.23589009046554565, "learning_rate": 4.979000117255616e-06, "loss": 0.6571, "step": 462 }, { "epoch": 0.15663058186738837, "grad_norm": 0.24605385959148407, "learning_rate": 4.978884099381642e-06, "loss": 0.6823, "step": 463 }, { "epoch": 0.15696887686062247, "grad_norm": 0.2397337108850479, "learning_rate": 4.9787677632675825e-06, "loss": 0.6515, "step": 464 }, { "epoch": 0.15730717185385656, "grad_norm": 0.22930362820625305, "learning_rate": 4.978651108928375e-06, "loss": 0.6656, "step": 465 }, { "epoch": 0.15764546684709066, "grad_norm": 0.24789506196975708, "learning_rate": 4.978534136378995e-06, "loss": 0.6671, "step": 466 }, { "epoch": 0.15798376184032475, "grad_norm": 0.22828440368175507, "learning_rate": 4.9784168456344595e-06, "loss": 0.6455, "step": 467 }, { "epoch": 0.15832205683355885, "grad_norm": 0.23598957061767578, "learning_rate": 4.978299236709826e-06, "loss": 0.645, "step": 468 }, { "epoch": 0.15866035182679297, "grad_norm": 0.24245992302894592, "learning_rate": 4.978181309620195e-06, "loss": 0.693, "step": 469 }, { "epoch": 0.15899864682002707, "grad_norm": 0.24275842308998108, "learning_rate": 4.978063064380703e-06, "loss": 0.6519, "step": 470 }, { "epoch": 0.15933694181326116, "grad_norm": 0.2353448122739792, "learning_rate": 4.977944501006532e-06, "loss": 0.6516, "step": 471 }, { "epoch": 0.15967523680649526, "grad_norm": 0.2516363859176636, "learning_rate": 4.977825619512905e-06, "loss": 0.6612, "step": 472 }, { "epoch": 0.16001353179972935, "grad_norm": 0.23496970534324646, "learning_rate": 4.9777064199150805e-06, "loss": 0.6503, "step": 473 }, { "epoch": 0.16035182679296348, "grad_norm": 0.23348847031593323, "learning_rate": 4.977586902228363e-06, "loss": 0.6835, "step": 474 }, { "epoch": 0.16069012178619757, "grad_norm": 0.23757898807525635, "learning_rate": 4.977467066468097e-06, "loss": 0.6397, "step": 475 }, { "epoch": 0.16102841677943167, "grad_norm": 0.23906610906124115, "learning_rate": 4.977346912649666e-06, "loss": 0.6589, "step": 476 }, { "epoch": 0.16136671177266576, "grad_norm": 0.23589485883712769, "learning_rate": 4.977226440788496e-06, "loss": 0.6431, "step": 477 }, { "epoch": 0.16170500676589986, "grad_norm": 0.24150773882865906, "learning_rate": 4.977105650900053e-06, "loss": 0.6895, "step": 478 }, { "epoch": 0.16204330175913398, "grad_norm": 0.25467047095298767, "learning_rate": 4.9769845429998445e-06, "loss": 0.6453, "step": 479 }, { "epoch": 0.16238159675236807, "grad_norm": 0.23450668156147003, "learning_rate": 4.976863117103417e-06, "loss": 0.6632, "step": 480 }, { "epoch": 0.16271989174560217, "grad_norm": 0.23753339052200317, "learning_rate": 4.976741373226361e-06, "loss": 0.6694, "step": 481 }, { "epoch": 0.16305818673883626, "grad_norm": 0.25083303451538086, "learning_rate": 4.976619311384305e-06, "loss": 0.665, "step": 482 }, { "epoch": 0.16339648173207036, "grad_norm": 0.23956729471683502, "learning_rate": 4.976496931592919e-06, "loss": 0.6765, "step": 483 }, { "epoch": 0.16373477672530445, "grad_norm": 0.24329128861427307, "learning_rate": 4.976374233867915e-06, "loss": 0.659, "step": 484 }, { "epoch": 0.16407307171853858, "grad_norm": 0.23810063302516937, "learning_rate": 4.976251218225045e-06, "loss": 0.656, "step": 485 }, { "epoch": 0.16441136671177267, "grad_norm": 0.24175657331943512, "learning_rate": 4.976127884680101e-06, "loss": 0.6672, "step": 486 }, { "epoch": 0.16474966170500677, "grad_norm": 0.23622599244117737, "learning_rate": 4.976004233248917e-06, "loss": 0.6676, "step": 487 }, { "epoch": 0.16508795669824086, "grad_norm": 0.24881932139396667, "learning_rate": 4.975880263947367e-06, "loss": 0.6813, "step": 488 }, { "epoch": 0.16542625169147496, "grad_norm": 0.23597869277000427, "learning_rate": 4.975755976791368e-06, "loss": 0.6424, "step": 489 }, { "epoch": 0.16576454668470908, "grad_norm": 0.23728232085704803, "learning_rate": 4.975631371796873e-06, "loss": 0.6821, "step": 490 }, { "epoch": 0.16610284167794317, "grad_norm": 0.2352864295244217, "learning_rate": 4.975506448979882e-06, "loss": 0.6864, "step": 491 }, { "epoch": 0.16644113667117727, "grad_norm": 0.245172381401062, "learning_rate": 4.975381208356431e-06, "loss": 0.7119, "step": 492 }, { "epoch": 0.16677943166441137, "grad_norm": 0.2432330846786499, "learning_rate": 4.975255649942598e-06, "loss": 0.6212, "step": 493 }, { "epoch": 0.16711772665764546, "grad_norm": 0.23403246700763702, "learning_rate": 4.975129773754503e-06, "loss": 0.6694, "step": 494 }, { "epoch": 0.16745602165087956, "grad_norm": 0.2418632060289383, "learning_rate": 4.975003579808306e-06, "loss": 0.6518, "step": 495 }, { "epoch": 0.16779431664411368, "grad_norm": 0.24580247700214386, "learning_rate": 4.974877068120208e-06, "loss": 0.6554, "step": 496 }, { "epoch": 0.16813261163734777, "grad_norm": 0.2346314638853073, "learning_rate": 4.97475023870645e-06, "loss": 0.6557, "step": 497 }, { "epoch": 0.16847090663058187, "grad_norm": 0.24950683116912842, "learning_rate": 4.974623091583316e-06, "loss": 0.6857, "step": 498 }, { "epoch": 0.16880920162381596, "grad_norm": 0.2463996857404709, "learning_rate": 4.974495626767126e-06, "loss": 0.6805, "step": 499 }, { "epoch": 0.16914749661705006, "grad_norm": 0.2514938712120056, "learning_rate": 4.974367844274248e-06, "loss": 0.6701, "step": 500 }, { "epoch": 0.16948579161028418, "grad_norm": 0.2525053322315216, "learning_rate": 4.974239744121083e-06, "loss": 0.676, "step": 501 }, { "epoch": 0.16982408660351828, "grad_norm": 0.2540481686592102, "learning_rate": 4.974111326324079e-06, "loss": 0.6734, "step": 502 }, { "epoch": 0.17016238159675237, "grad_norm": 0.2555442452430725, "learning_rate": 4.973982590899722e-06, "loss": 0.6532, "step": 503 }, { "epoch": 0.17050067658998647, "grad_norm": 0.24633650481700897, "learning_rate": 4.973853537864538e-06, "loss": 0.6913, "step": 504 }, { "epoch": 0.17083897158322056, "grad_norm": 0.24489474296569824, "learning_rate": 4.973724167235095e-06, "loss": 0.6379, "step": 505 }, { "epoch": 0.17117726657645466, "grad_norm": 0.2564601004123688, "learning_rate": 4.973594479028003e-06, "loss": 0.6836, "step": 506 }, { "epoch": 0.17151556156968878, "grad_norm": 0.24805857241153717, "learning_rate": 4.973464473259911e-06, "loss": 0.6795, "step": 507 }, { "epoch": 0.17185385656292287, "grad_norm": 0.24846164882183075, "learning_rate": 4.973334149947508e-06, "loss": 0.6684, "step": 508 }, { "epoch": 0.17219215155615697, "grad_norm": 0.23314347863197327, "learning_rate": 4.973203509107527e-06, "loss": 0.6417, "step": 509 }, { "epoch": 0.17253044654939106, "grad_norm": 0.24465447664260864, "learning_rate": 4.973072550756737e-06, "loss": 0.6643, "step": 510 }, { "epoch": 0.17286874154262516, "grad_norm": 0.2504658102989197, "learning_rate": 4.972941274911953e-06, "loss": 0.669, "step": 511 }, { "epoch": 0.17320703653585928, "grad_norm": 0.24236737191677094, "learning_rate": 4.972809681590027e-06, "loss": 0.6568, "step": 512 }, { "epoch": 0.17354533152909338, "grad_norm": 0.2436477541923523, "learning_rate": 4.972677770807853e-06, "loss": 0.6959, "step": 513 }, { "epoch": 0.17388362652232747, "grad_norm": 0.24689815938472748, "learning_rate": 4.972545542582366e-06, "loss": 0.6841, "step": 514 }, { "epoch": 0.17422192151556157, "grad_norm": 0.24580040574073792, "learning_rate": 4.972412996930541e-06, "loss": 0.6684, "step": 515 }, { "epoch": 0.17456021650879566, "grad_norm": 0.24564984440803528, "learning_rate": 4.972280133869396e-06, "loss": 0.6322, "step": 516 }, { "epoch": 0.17489851150202976, "grad_norm": 0.2577616572380066, "learning_rate": 4.972146953415986e-06, "loss": 0.6658, "step": 517 }, { "epoch": 0.17523680649526388, "grad_norm": 0.23606464266777039, "learning_rate": 4.972013455587409e-06, "loss": 0.6507, "step": 518 }, { "epoch": 0.17557510148849798, "grad_norm": 0.23771150410175323, "learning_rate": 4.9718796404008054e-06, "loss": 0.6829, "step": 519 }, { "epoch": 0.17591339648173207, "grad_norm": 0.25152748823165894, "learning_rate": 4.971745507873352e-06, "loss": 0.6766, "step": 520 }, { "epoch": 0.17625169147496617, "grad_norm": 0.2582070231437683, "learning_rate": 4.97161105802227e-06, "loss": 0.687, "step": 521 }, { "epoch": 0.17658998646820026, "grad_norm": 0.2388746291399002, "learning_rate": 4.9714762908648206e-06, "loss": 0.6462, "step": 522 }, { "epoch": 0.17692828146143438, "grad_norm": 0.2524266839027405, "learning_rate": 4.971341206418305e-06, "loss": 0.6546, "step": 523 }, { "epoch": 0.17726657645466848, "grad_norm": 0.2386075109243393, "learning_rate": 4.971205804700064e-06, "loss": 0.643, "step": 524 }, { "epoch": 0.17760487144790257, "grad_norm": 0.2496464103460312, "learning_rate": 4.9710700857274805e-06, "loss": 0.6488, "step": 525 }, { "epoch": 0.17794316644113667, "grad_norm": 0.25205540657043457, "learning_rate": 4.97093404951798e-06, "loss": 0.6863, "step": 526 }, { "epoch": 0.17828146143437076, "grad_norm": 0.25648388266563416, "learning_rate": 4.9707976960890266e-06, "loss": 0.6692, "step": 527 }, { "epoch": 0.17861975642760486, "grad_norm": 0.24036119878292084, "learning_rate": 4.9706610254581255e-06, "loss": 0.6605, "step": 528 }, { "epoch": 0.17895805142083898, "grad_norm": 0.2514593303203583, "learning_rate": 4.970524037642821e-06, "loss": 0.6625, "step": 529 }, { "epoch": 0.17929634641407308, "grad_norm": 0.2690679132938385, "learning_rate": 4.970386732660701e-06, "loss": 0.6736, "step": 530 }, { "epoch": 0.17963464140730717, "grad_norm": 0.25247398018836975, "learning_rate": 4.970249110529392e-06, "loss": 0.6712, "step": 531 }, { "epoch": 0.17997293640054127, "grad_norm": 0.24959304928779602, "learning_rate": 4.970111171266563e-06, "loss": 0.6608, "step": 532 }, { "epoch": 0.18031123139377536, "grad_norm": 0.2703661024570465, "learning_rate": 4.969972914889921e-06, "loss": 0.659, "step": 533 }, { "epoch": 0.18064952638700948, "grad_norm": 0.25666624307632446, "learning_rate": 4.9698343414172165e-06, "loss": 0.6777, "step": 534 }, { "epoch": 0.18098782138024358, "grad_norm": 0.23413406312465668, "learning_rate": 4.9696954508662405e-06, "loss": 0.6328, "step": 535 }, { "epoch": 0.18132611637347767, "grad_norm": 0.2529549300670624, "learning_rate": 4.969556243254822e-06, "loss": 0.6612, "step": 536 }, { "epoch": 0.18166441136671177, "grad_norm": 0.24946318566799164, "learning_rate": 4.969416718600834e-06, "loss": 0.6363, "step": 537 }, { "epoch": 0.18200270635994586, "grad_norm": 0.24231383204460144, "learning_rate": 4.969276876922188e-06, "loss": 0.6348, "step": 538 }, { "epoch": 0.18234100135317996, "grad_norm": 0.2565390169620514, "learning_rate": 4.969136718236836e-06, "loss": 0.6792, "step": 539 }, { "epoch": 0.18267929634641408, "grad_norm": 0.2580963969230652, "learning_rate": 4.968996242562774e-06, "loss": 0.6665, "step": 540 }, { "epoch": 0.18301759133964818, "grad_norm": 0.24817582964897156, "learning_rate": 4.968855449918034e-06, "loss": 0.6499, "step": 541 }, { "epoch": 0.18335588633288227, "grad_norm": 0.26380398869514465, "learning_rate": 4.968714340320692e-06, "loss": 0.6347, "step": 542 }, { "epoch": 0.18369418132611637, "grad_norm": 0.2705709934234619, "learning_rate": 4.968572913788864e-06, "loss": 0.6603, "step": 543 }, { "epoch": 0.18403247631935046, "grad_norm": 0.2509385943412781, "learning_rate": 4.968431170340706e-06, "loss": 0.6374, "step": 544 }, { "epoch": 0.18437077131258459, "grad_norm": 0.255299836397171, "learning_rate": 4.9682891099944155e-06, "loss": 0.6557, "step": 545 }, { "epoch": 0.18470906630581868, "grad_norm": 0.26094627380371094, "learning_rate": 4.9681467327682305e-06, "loss": 0.6637, "step": 546 }, { "epoch": 0.18504736129905278, "grad_norm": 0.24482354521751404, "learning_rate": 4.968004038680428e-06, "loss": 0.6459, "step": 547 }, { "epoch": 0.18538565629228687, "grad_norm": 0.258472204208374, "learning_rate": 4.967861027749327e-06, "loss": 0.6663, "step": 548 }, { "epoch": 0.18572395128552097, "grad_norm": 0.25342991948127747, "learning_rate": 4.967717699993291e-06, "loss": 0.6603, "step": 549 }, { "epoch": 0.18606224627875506, "grad_norm": 0.2580524981021881, "learning_rate": 4.9675740554307155e-06, "loss": 0.6748, "step": 550 }, { "epoch": 0.18640054127198918, "grad_norm": 0.24921679496765137, "learning_rate": 4.967430094080045e-06, "loss": 0.6815, "step": 551 }, { "epoch": 0.18673883626522328, "grad_norm": 0.2525157928466797, "learning_rate": 4.967285815959759e-06, "loss": 0.6687, "step": 552 }, { "epoch": 0.18707713125845737, "grad_norm": 0.2625119388103485, "learning_rate": 4.9671412210883826e-06, "loss": 0.6528, "step": 553 }, { "epoch": 0.18741542625169147, "grad_norm": 0.27415037155151367, "learning_rate": 4.966996309484477e-06, "loss": 0.6494, "step": 554 }, { "epoch": 0.18775372124492556, "grad_norm": 0.267037957906723, "learning_rate": 4.966851081166646e-06, "loss": 0.6888, "step": 555 }, { "epoch": 0.1880920162381597, "grad_norm": 0.2636261582374573, "learning_rate": 4.966705536153536e-06, "loss": 0.6489, "step": 556 }, { "epoch": 0.18843031123139378, "grad_norm": 0.30301186442375183, "learning_rate": 4.9665596744638285e-06, "loss": 0.6583, "step": 557 }, { "epoch": 0.18876860622462788, "grad_norm": 0.2526191771030426, "learning_rate": 4.966413496116254e-06, "loss": 0.6708, "step": 558 }, { "epoch": 0.18910690121786197, "grad_norm": 0.2610830068588257, "learning_rate": 4.966267001129575e-06, "loss": 0.6426, "step": 559 }, { "epoch": 0.18944519621109607, "grad_norm": 0.24611899256706238, "learning_rate": 4.9661201895226e-06, "loss": 0.6545, "step": 560 }, { "epoch": 0.18978349120433016, "grad_norm": 0.26428115367889404, "learning_rate": 4.965973061314178e-06, "loss": 0.7069, "step": 561 }, { "epoch": 0.19012178619756429, "grad_norm": 0.23961693048477173, "learning_rate": 4.965825616523194e-06, "loss": 0.6376, "step": 562 }, { "epoch": 0.19046008119079838, "grad_norm": 0.2400297075510025, "learning_rate": 4.965677855168581e-06, "loss": 0.6612, "step": 563 }, { "epoch": 0.19079837618403248, "grad_norm": 0.2490435689687729, "learning_rate": 4.9655297772693066e-06, "loss": 0.6539, "step": 564 }, { "epoch": 0.19113667117726657, "grad_norm": 0.2572137415409088, "learning_rate": 4.965381382844381e-06, "loss": 0.6447, "step": 565 }, { "epoch": 0.19147496617050067, "grad_norm": 0.2542514503002167, "learning_rate": 4.965232671912855e-06, "loss": 0.6692, "step": 566 }, { "epoch": 0.1918132611637348, "grad_norm": 0.2554891109466553, "learning_rate": 4.965083644493821e-06, "loss": 0.6909, "step": 567 }, { "epoch": 0.19215155615696888, "grad_norm": 0.2654957175254822, "learning_rate": 4.964934300606411e-06, "loss": 0.6793, "step": 568 }, { "epoch": 0.19248985115020298, "grad_norm": 0.25700655579566956, "learning_rate": 4.9647846402697974e-06, "loss": 0.6723, "step": 569 }, { "epoch": 0.19282814614343707, "grad_norm": 0.2551063299179077, "learning_rate": 4.964634663503194e-06, "loss": 0.6621, "step": 570 }, { "epoch": 0.19316644113667117, "grad_norm": 0.25916796922683716, "learning_rate": 4.964484370325855e-06, "loss": 0.6729, "step": 571 }, { "epoch": 0.19350473612990526, "grad_norm": 0.24621054530143738, "learning_rate": 4.964333760757074e-06, "loss": 0.6571, "step": 572 }, { "epoch": 0.1938430311231394, "grad_norm": 0.24834199249744415, "learning_rate": 4.9641828348161874e-06, "loss": 0.6636, "step": 573 }, { "epoch": 0.19418132611637348, "grad_norm": 0.25093671679496765, "learning_rate": 4.9640315925225715e-06, "loss": 0.6407, "step": 574 }, { "epoch": 0.19451962110960758, "grad_norm": 0.2574840486049652, "learning_rate": 4.9638800338956415e-06, "loss": 0.6799, "step": 575 }, { "epoch": 0.19485791610284167, "grad_norm": 0.2423202246427536, "learning_rate": 4.9637281589548555e-06, "loss": 0.654, "step": 576 }, { "epoch": 0.19519621109607577, "grad_norm": 0.2530333995819092, "learning_rate": 4.963575967719711e-06, "loss": 0.647, "step": 577 }, { "epoch": 0.1955345060893099, "grad_norm": 0.24557620286941528, "learning_rate": 4.963423460209747e-06, "loss": 0.6482, "step": 578 }, { "epoch": 0.19587280108254398, "grad_norm": 0.2524111866950989, "learning_rate": 4.963270636444542e-06, "loss": 0.6562, "step": 579 }, { "epoch": 0.19621109607577808, "grad_norm": 0.2587498426437378, "learning_rate": 4.963117496443715e-06, "loss": 0.6445, "step": 580 }, { "epoch": 0.19654939106901217, "grad_norm": 0.2540567219257355, "learning_rate": 4.962964040226927e-06, "loss": 0.6789, "step": 581 }, { "epoch": 0.19688768606224627, "grad_norm": 0.2543244957923889, "learning_rate": 4.962810267813878e-06, "loss": 0.6623, "step": 582 }, { "epoch": 0.1972259810554804, "grad_norm": 0.2576369643211365, "learning_rate": 4.96265617922431e-06, "loss": 0.6481, "step": 583 }, { "epoch": 0.1975642760487145, "grad_norm": 0.2497120052576065, "learning_rate": 4.962501774478005e-06, "loss": 0.6584, "step": 584 }, { "epoch": 0.19790257104194858, "grad_norm": 0.24584496021270752, "learning_rate": 4.962347053594784e-06, "loss": 0.6596, "step": 585 }, { "epoch": 0.19824086603518268, "grad_norm": 0.2562602758407593, "learning_rate": 4.9621920165945125e-06, "loss": 0.6449, "step": 586 }, { "epoch": 0.19857916102841677, "grad_norm": 0.2451668381690979, "learning_rate": 4.962036663497093e-06, "loss": 0.6362, "step": 587 }, { "epoch": 0.19891745602165087, "grad_norm": 0.24884480237960815, "learning_rate": 4.96188099432247e-06, "loss": 0.6191, "step": 588 }, { "epoch": 0.199255751014885, "grad_norm": 0.24974341690540314, "learning_rate": 4.961725009090627e-06, "loss": 0.6672, "step": 589 }, { "epoch": 0.19959404600811909, "grad_norm": 0.25356993079185486, "learning_rate": 4.9615687078215925e-06, "loss": 0.6598, "step": 590 }, { "epoch": 0.19993234100135318, "grad_norm": 0.2501576542854309, "learning_rate": 4.96141209053543e-06, "loss": 0.6595, "step": 591 }, { "epoch": 0.20027063599458728, "grad_norm": 0.2463545799255371, "learning_rate": 4.9612551572522465e-06, "loss": 0.6488, "step": 592 }, { "epoch": 0.20060893098782137, "grad_norm": 0.25693491101264954, "learning_rate": 4.96109790799219e-06, "loss": 0.6575, "step": 593 }, { "epoch": 0.2009472259810555, "grad_norm": 0.251827597618103, "learning_rate": 4.960940342775448e-06, "loss": 0.6565, "step": 594 }, { "epoch": 0.2012855209742896, "grad_norm": 0.2766624689102173, "learning_rate": 4.960782461622247e-06, "loss": 0.6515, "step": 595 }, { "epoch": 0.20162381596752368, "grad_norm": 0.24949146807193756, "learning_rate": 4.960624264552858e-06, "loss": 0.6789, "step": 596 }, { "epoch": 0.20196211096075778, "grad_norm": 0.2712768316268921, "learning_rate": 4.9604657515875894e-06, "loss": 0.6403, "step": 597 }, { "epoch": 0.20230040595399187, "grad_norm": 0.24750161170959473, "learning_rate": 4.960306922746793e-06, "loss": 0.6671, "step": 598 }, { "epoch": 0.20263870094722597, "grad_norm": 0.2518949806690216, "learning_rate": 4.960147778050856e-06, "loss": 0.6559, "step": 599 }, { "epoch": 0.2029769959404601, "grad_norm": 0.26023411750793457, "learning_rate": 4.959988317520213e-06, "loss": 0.6844, "step": 600 }, { "epoch": 0.2033152909336942, "grad_norm": 0.26342353224754333, "learning_rate": 4.959828541175332e-06, "loss": 0.6627, "step": 601 }, { "epoch": 0.20365358592692828, "grad_norm": 0.26747390627861023, "learning_rate": 4.959668449036728e-06, "loss": 0.6379, "step": 602 }, { "epoch": 0.20399188092016238, "grad_norm": 0.262066513299942, "learning_rate": 4.959508041124952e-06, "loss": 0.6667, "step": 603 }, { "epoch": 0.20433017591339647, "grad_norm": 0.26433098316192627, "learning_rate": 4.9593473174605976e-06, "loss": 0.6713, "step": 604 }, { "epoch": 0.2046684709066306, "grad_norm": 0.27428731322288513, "learning_rate": 4.959186278064299e-06, "loss": 0.627, "step": 605 }, { "epoch": 0.2050067658998647, "grad_norm": 0.25257450342178345, "learning_rate": 4.959024922956732e-06, "loss": 0.6573, "step": 606 }, { "epoch": 0.20534506089309879, "grad_norm": 0.25500935316085815, "learning_rate": 4.958863252158608e-06, "loss": 0.6821, "step": 607 }, { "epoch": 0.20568335588633288, "grad_norm": 0.2703934907913208, "learning_rate": 4.958701265690685e-06, "loss": 0.6471, "step": 608 }, { "epoch": 0.20602165087956698, "grad_norm": 0.26352187991142273, "learning_rate": 4.958538963573758e-06, "loss": 0.6738, "step": 609 }, { "epoch": 0.20635994587280107, "grad_norm": 0.26434412598609924, "learning_rate": 4.958376345828662e-06, "loss": 0.666, "step": 610 }, { "epoch": 0.2066982408660352, "grad_norm": 0.26020878553390503, "learning_rate": 4.9582134124762775e-06, "loss": 0.6866, "step": 611 }, { "epoch": 0.2070365358592693, "grad_norm": 0.2619779109954834, "learning_rate": 4.958050163537519e-06, "loss": 0.6566, "step": 612 }, { "epoch": 0.20737483085250338, "grad_norm": 0.2512053847312927, "learning_rate": 4.957886599033345e-06, "loss": 0.6648, "step": 613 }, { "epoch": 0.20771312584573748, "grad_norm": 0.27655842900276184, "learning_rate": 4.957722718984754e-06, "loss": 0.6549, "step": 614 }, { "epoch": 0.20805142083897157, "grad_norm": 0.2558527886867523, "learning_rate": 4.957558523412786e-06, "loss": 0.6734, "step": 615 }, { "epoch": 0.2083897158322057, "grad_norm": 0.26270580291748047, "learning_rate": 4.957394012338519e-06, "loss": 0.6546, "step": 616 }, { "epoch": 0.2087280108254398, "grad_norm": 0.2507633864879608, "learning_rate": 4.957229185783074e-06, "loss": 0.6549, "step": 617 }, { "epoch": 0.2090663058186739, "grad_norm": 0.2596471309661865, "learning_rate": 4.957064043767611e-06, "loss": 0.6698, "step": 618 }, { "epoch": 0.20940460081190798, "grad_norm": 0.24986913800239563, "learning_rate": 4.956898586313332e-06, "loss": 0.639, "step": 619 }, { "epoch": 0.20974289580514208, "grad_norm": 0.24691560864448547, "learning_rate": 4.956732813441477e-06, "loss": 0.6757, "step": 620 }, { "epoch": 0.21008119079837617, "grad_norm": 0.2614850699901581, "learning_rate": 4.956566725173329e-06, "loss": 0.6553, "step": 621 }, { "epoch": 0.2104194857916103, "grad_norm": 0.26026490330696106, "learning_rate": 4.956400321530211e-06, "loss": 0.6743, "step": 622 }, { "epoch": 0.2107577807848444, "grad_norm": 0.25970712304115295, "learning_rate": 4.956233602533483e-06, "loss": 0.6715, "step": 623 }, { "epoch": 0.21109607577807848, "grad_norm": 0.2530291974544525, "learning_rate": 4.956066568204551e-06, "loss": 0.639, "step": 624 }, { "epoch": 0.21143437077131258, "grad_norm": 0.2687148153781891, "learning_rate": 4.95589921856486e-06, "loss": 0.6565, "step": 625 }, { "epoch": 0.21177266576454667, "grad_norm": 0.2534351944923401, "learning_rate": 4.955731553635892e-06, "loss": 0.64, "step": 626 }, { "epoch": 0.2121109607577808, "grad_norm": 0.25353437662124634, "learning_rate": 4.9555635734391725e-06, "loss": 0.684, "step": 627 }, { "epoch": 0.2124492557510149, "grad_norm": 0.26010623574256897, "learning_rate": 4.955395277996268e-06, "loss": 0.6516, "step": 628 }, { "epoch": 0.212787550744249, "grad_norm": 0.2603456974029541, "learning_rate": 4.955226667328783e-06, "loss": 0.6619, "step": 629 }, { "epoch": 0.21312584573748308, "grad_norm": 0.25239628553390503, "learning_rate": 4.9550577414583635e-06, "loss": 0.673, "step": 630 }, { "epoch": 0.21346414073071718, "grad_norm": 0.25560659170150757, "learning_rate": 4.954888500406699e-06, "loss": 0.6576, "step": 631 }, { "epoch": 0.21380243572395127, "grad_norm": 0.26723402738571167, "learning_rate": 4.9547189441955124e-06, "loss": 0.6821, "step": 632 }, { "epoch": 0.2141407307171854, "grad_norm": 0.24569633603096008, "learning_rate": 4.9545490728465754e-06, "loss": 0.6669, "step": 633 }, { "epoch": 0.2144790257104195, "grad_norm": 0.26862242817878723, "learning_rate": 4.9543788863816944e-06, "loss": 0.6476, "step": 634 }, { "epoch": 0.21481732070365359, "grad_norm": 0.24638478457927704, "learning_rate": 4.954208384822717e-06, "loss": 0.6506, "step": 635 }, { "epoch": 0.21515561569688768, "grad_norm": 0.24835918843746185, "learning_rate": 4.954037568191534e-06, "loss": 0.6584, "step": 636 }, { "epoch": 0.21549391069012178, "grad_norm": 0.24750922620296478, "learning_rate": 4.9538664365100755e-06, "loss": 0.6486, "step": 637 }, { "epoch": 0.2158322056833559, "grad_norm": 0.27238205075263977, "learning_rate": 4.953694989800309e-06, "loss": 0.6549, "step": 638 }, { "epoch": 0.21617050067659, "grad_norm": 0.2620726227760315, "learning_rate": 4.953523228084246e-06, "loss": 0.6545, "step": 639 }, { "epoch": 0.2165087956698241, "grad_norm": 0.2717283070087433, "learning_rate": 4.953351151383938e-06, "loss": 0.6326, "step": 640 }, { "epoch": 0.21684709066305818, "grad_norm": 0.28660696744918823, "learning_rate": 4.9531787597214756e-06, "loss": 0.662, "step": 641 }, { "epoch": 0.21718538565629228, "grad_norm": 0.26013630628585815, "learning_rate": 4.953006053118991e-06, "loss": 0.6644, "step": 642 }, { "epoch": 0.21752368064952637, "grad_norm": 0.2518508732318878, "learning_rate": 4.952833031598655e-06, "loss": 0.6412, "step": 643 }, { "epoch": 0.2178619756427605, "grad_norm": 0.2623193860054016, "learning_rate": 4.952659695182682e-06, "loss": 0.6527, "step": 644 }, { "epoch": 0.2182002706359946, "grad_norm": 0.2599288821220398, "learning_rate": 4.952486043893324e-06, "loss": 0.6508, "step": 645 }, { "epoch": 0.2185385656292287, "grad_norm": 0.25234729051589966, "learning_rate": 4.952312077752875e-06, "loss": 0.6416, "step": 646 }, { "epoch": 0.21887686062246278, "grad_norm": 0.2652762234210968, "learning_rate": 4.952137796783667e-06, "loss": 0.6537, "step": 647 }, { "epoch": 0.21921515561569688, "grad_norm": 0.26549839973449707, "learning_rate": 4.9519632010080765e-06, "loss": 0.6722, "step": 648 }, { "epoch": 0.219553450608931, "grad_norm": 0.2607754170894623, "learning_rate": 4.951788290448517e-06, "loss": 0.6602, "step": 649 }, { "epoch": 0.2198917456021651, "grad_norm": 0.26408514380455017, "learning_rate": 4.951613065127445e-06, "loss": 0.6671, "step": 650 }, { "epoch": 0.2202300405953992, "grad_norm": 0.2623929977416992, "learning_rate": 4.951437525067354e-06, "loss": 0.6523, "step": 651 }, { "epoch": 0.22056833558863329, "grad_norm": 0.25325843691825867, "learning_rate": 4.951261670290781e-06, "loss": 0.6434, "step": 652 }, { "epoch": 0.22090663058186738, "grad_norm": 0.25801414251327515, "learning_rate": 4.951085500820302e-06, "loss": 0.665, "step": 653 }, { "epoch": 0.22124492557510148, "grad_norm": 0.25949883460998535, "learning_rate": 4.950909016678534e-06, "loss": 0.6339, "step": 654 }, { "epoch": 0.2215832205683356, "grad_norm": 0.26575353741645813, "learning_rate": 4.950732217888134e-06, "loss": 0.6508, "step": 655 }, { "epoch": 0.2219215155615697, "grad_norm": 0.2612333297729492, "learning_rate": 4.950555104471799e-06, "loss": 0.6499, "step": 656 }, { "epoch": 0.2222598105548038, "grad_norm": 0.26511678099632263, "learning_rate": 4.950377676452267e-06, "loss": 0.6475, "step": 657 }, { "epoch": 0.22259810554803788, "grad_norm": 0.264708936214447, "learning_rate": 4.950199933852318e-06, "loss": 0.632, "step": 658 }, { "epoch": 0.22293640054127198, "grad_norm": 0.2613477408885956, "learning_rate": 4.950021876694768e-06, "loss": 0.6399, "step": 659 }, { "epoch": 0.2232746955345061, "grad_norm": 0.2623678743839264, "learning_rate": 4.949843505002478e-06, "loss": 0.657, "step": 660 }, { "epoch": 0.2236129905277402, "grad_norm": 0.2551998198032379, "learning_rate": 4.949664818798347e-06, "loss": 0.6564, "step": 661 }, { "epoch": 0.2239512855209743, "grad_norm": 0.27580368518829346, "learning_rate": 4.949485818105313e-06, "loss": 0.68, "step": 662 }, { "epoch": 0.2242895805142084, "grad_norm": 0.2604037821292877, "learning_rate": 4.949306502946358e-06, "loss": 0.6649, "step": 663 }, { "epoch": 0.22462787550744248, "grad_norm": 0.27373746037483215, "learning_rate": 4.949126873344504e-06, "loss": 0.6637, "step": 664 }, { "epoch": 0.22496617050067658, "grad_norm": 0.256798654794693, "learning_rate": 4.948946929322809e-06, "loss": 0.6679, "step": 665 }, { "epoch": 0.2253044654939107, "grad_norm": 0.26641178131103516, "learning_rate": 4.948766670904375e-06, "loss": 0.6541, "step": 666 }, { "epoch": 0.2256427604871448, "grad_norm": 0.2707980275154114, "learning_rate": 4.948586098112344e-06, "loss": 0.6644, "step": 667 }, { "epoch": 0.2259810554803789, "grad_norm": 0.26444727182388306, "learning_rate": 4.9484052109698984e-06, "loss": 0.6547, "step": 668 }, { "epoch": 0.22631935047361298, "grad_norm": 0.26225337386131287, "learning_rate": 4.948224009500261e-06, "loss": 0.6686, "step": 669 }, { "epoch": 0.22665764546684708, "grad_norm": 0.26892396807670593, "learning_rate": 4.948042493726694e-06, "loss": 0.6394, "step": 670 }, { "epoch": 0.2269959404600812, "grad_norm": 0.25884899497032166, "learning_rate": 4.947860663672499e-06, "loss": 0.6215, "step": 671 }, { "epoch": 0.2273342354533153, "grad_norm": 0.26995548605918884, "learning_rate": 4.947678519361021e-06, "loss": 0.6476, "step": 672 }, { "epoch": 0.2276725304465494, "grad_norm": 0.26591798663139343, "learning_rate": 4.947496060815643e-06, "loss": 0.6653, "step": 673 }, { "epoch": 0.2280108254397835, "grad_norm": 0.2683780789375305, "learning_rate": 4.94731328805979e-06, "loss": 0.6593, "step": 674 }, { "epoch": 0.22834912043301758, "grad_norm": 0.2754748463630676, "learning_rate": 4.947130201116926e-06, "loss": 0.6589, "step": 675 }, { "epoch": 0.22868741542625168, "grad_norm": 0.25586527585983276, "learning_rate": 4.946946800010556e-06, "loss": 0.6178, "step": 676 }, { "epoch": 0.2290257104194858, "grad_norm": 0.252361536026001, "learning_rate": 4.946763084764225e-06, "loss": 0.6348, "step": 677 }, { "epoch": 0.2293640054127199, "grad_norm": 0.2510469853878021, "learning_rate": 4.946579055401517e-06, "loss": 0.6216, "step": 678 }, { "epoch": 0.229702300405954, "grad_norm": 0.2685176432132721, "learning_rate": 4.94639471194606e-06, "loss": 0.6469, "step": 679 }, { "epoch": 0.23004059539918809, "grad_norm": 0.2603522539138794, "learning_rate": 4.946210054421519e-06, "loss": 0.6434, "step": 680 }, { "epoch": 0.23037889039242218, "grad_norm": 0.2632562220096588, "learning_rate": 4.9460250828516e-06, "loss": 0.6483, "step": 681 }, { "epoch": 0.2307171853856563, "grad_norm": 0.2648269236087799, "learning_rate": 4.9458397972600505e-06, "loss": 0.645, "step": 682 }, { "epoch": 0.2310554803788904, "grad_norm": 0.254108190536499, "learning_rate": 4.945654197670657e-06, "loss": 0.6604, "step": 683 }, { "epoch": 0.2313937753721245, "grad_norm": 0.269853413105011, "learning_rate": 4.945468284107246e-06, "loss": 0.6617, "step": 684 }, { "epoch": 0.2317320703653586, "grad_norm": 0.2735177278518677, "learning_rate": 4.945282056593689e-06, "loss": 0.6613, "step": 685 }, { "epoch": 0.23207036535859268, "grad_norm": 0.2711608409881592, "learning_rate": 4.945095515153889e-06, "loss": 0.647, "step": 686 }, { "epoch": 0.2324086603518268, "grad_norm": 0.2601723074913025, "learning_rate": 4.944908659811796e-06, "loss": 0.6584, "step": 687 }, { "epoch": 0.2327469553450609, "grad_norm": 0.2672250270843506, "learning_rate": 4.944721490591401e-06, "loss": 0.6529, "step": 688 }, { "epoch": 0.233085250338295, "grad_norm": 0.2668798863887787, "learning_rate": 4.94453400751673e-06, "loss": 0.6695, "step": 689 }, { "epoch": 0.2334235453315291, "grad_norm": 0.26739099621772766, "learning_rate": 4.944346210611853e-06, "loss": 0.6493, "step": 690 }, { "epoch": 0.2337618403247632, "grad_norm": 0.2629740834236145, "learning_rate": 4.9441580999008794e-06, "loss": 0.6472, "step": 691 }, { "epoch": 0.23410013531799728, "grad_norm": 0.2657390534877777, "learning_rate": 4.94396967540796e-06, "loss": 0.6516, "step": 692 }, { "epoch": 0.2344384303112314, "grad_norm": 0.26596182584762573, "learning_rate": 4.943780937157283e-06, "loss": 0.6571, "step": 693 }, { "epoch": 0.2347767253044655, "grad_norm": 0.26130345463752747, "learning_rate": 4.943591885173079e-06, "loss": 0.6407, "step": 694 }, { "epoch": 0.2351150202976996, "grad_norm": 0.24915356934070587, "learning_rate": 4.943402519479619e-06, "loss": 0.6739, "step": 695 }, { "epoch": 0.2354533152909337, "grad_norm": 0.27618497610092163, "learning_rate": 4.943212840101215e-06, "loss": 0.658, "step": 696 }, { "epoch": 0.23579161028416779, "grad_norm": 0.2530587613582611, "learning_rate": 4.943022847062216e-06, "loss": 0.6126, "step": 697 }, { "epoch": 0.2361299052774019, "grad_norm": 0.2686367928981781, "learning_rate": 4.942832540387015e-06, "loss": 0.6383, "step": 698 }, { "epoch": 0.236468200270636, "grad_norm": 0.27149710059165955, "learning_rate": 4.942641920100042e-06, "loss": 0.6494, "step": 699 }, { "epoch": 0.2368064952638701, "grad_norm": 0.2659294903278351, "learning_rate": 4.942450986225771e-06, "loss": 0.6707, "step": 700 }, { "epoch": 0.2371447902571042, "grad_norm": 0.2757556438446045, "learning_rate": 4.9422597387887126e-06, "loss": 0.6336, "step": 701 }, { "epoch": 0.2374830852503383, "grad_norm": 0.2766989767551422, "learning_rate": 4.942068177813419e-06, "loss": 0.6496, "step": 702 }, { "epoch": 0.23782138024357238, "grad_norm": 0.2604788541793823, "learning_rate": 4.941876303324486e-06, "loss": 0.6459, "step": 703 }, { "epoch": 0.2381596752368065, "grad_norm": 0.2848023474216461, "learning_rate": 4.941684115346541e-06, "loss": 0.6673, "step": 704 }, { "epoch": 0.2384979702300406, "grad_norm": 0.27619898319244385, "learning_rate": 4.941491613904263e-06, "loss": 0.6474, "step": 705 }, { "epoch": 0.2388362652232747, "grad_norm": 0.25733035802841187, "learning_rate": 4.941298799022361e-06, "loss": 0.6355, "step": 706 }, { "epoch": 0.2391745602165088, "grad_norm": 0.25964587926864624, "learning_rate": 4.94110567072559e-06, "loss": 0.6675, "step": 707 }, { "epoch": 0.2395128552097429, "grad_norm": 0.2735098898410797, "learning_rate": 4.940912229038746e-06, "loss": 0.665, "step": 708 }, { "epoch": 0.239851150202977, "grad_norm": 0.28130871057510376, "learning_rate": 4.94071847398666e-06, "loss": 0.6356, "step": 709 }, { "epoch": 0.2401894451962111, "grad_norm": 0.2739136219024658, "learning_rate": 4.940524405594208e-06, "loss": 0.6485, "step": 710 }, { "epoch": 0.2405277401894452, "grad_norm": 0.27404212951660156, "learning_rate": 4.940330023886304e-06, "loss": 0.6505, "step": 711 }, { "epoch": 0.2408660351826793, "grad_norm": 0.2825517952442169, "learning_rate": 4.940135328887903e-06, "loss": 0.6816, "step": 712 }, { "epoch": 0.2412043301759134, "grad_norm": 0.26147276163101196, "learning_rate": 4.939940320624e-06, "loss": 0.6614, "step": 713 }, { "epoch": 0.24154262516914748, "grad_norm": 0.26788076758384705, "learning_rate": 4.9397449991196305e-06, "loss": 0.6508, "step": 714 }, { "epoch": 0.2418809201623816, "grad_norm": 0.26942339539527893, "learning_rate": 4.93954936439987e-06, "loss": 0.6447, "step": 715 }, { "epoch": 0.2422192151556157, "grad_norm": 0.26093873381614685, "learning_rate": 4.939353416489834e-06, "loss": 0.6644, "step": 716 }, { "epoch": 0.2425575101488498, "grad_norm": 0.2670133411884308, "learning_rate": 4.9391571554146775e-06, "loss": 0.6457, "step": 717 }, { "epoch": 0.2428958051420839, "grad_norm": 0.2692917287349701, "learning_rate": 4.938960581199598e-06, "loss": 0.6626, "step": 718 }, { "epoch": 0.243234100135318, "grad_norm": 0.2619960606098175, "learning_rate": 4.93876369386983e-06, "loss": 0.6708, "step": 719 }, { "epoch": 0.2435723951285521, "grad_norm": 0.25591549277305603, "learning_rate": 4.938566493450653e-06, "loss": 0.6589, "step": 720 }, { "epoch": 0.2439106901217862, "grad_norm": 0.2719472348690033, "learning_rate": 4.9383689799673805e-06, "loss": 0.6549, "step": 721 }, { "epoch": 0.2442489851150203, "grad_norm": 0.2731001675128937, "learning_rate": 4.938171153445371e-06, "loss": 0.669, "step": 722 }, { "epoch": 0.2445872801082544, "grad_norm": 0.2630438506603241, "learning_rate": 4.937973013910021e-06, "loss": 0.6771, "step": 723 }, { "epoch": 0.2449255751014885, "grad_norm": 0.2734837234020233, "learning_rate": 4.937774561386768e-06, "loss": 0.6661, "step": 724 }, { "epoch": 0.24526387009472259, "grad_norm": 0.2699190378189087, "learning_rate": 4.937575795901089e-06, "loss": 0.6566, "step": 725 }, { "epoch": 0.2456021650879567, "grad_norm": 0.27050119638442993, "learning_rate": 4.937376717478502e-06, "loss": 0.6205, "step": 726 }, { "epoch": 0.2459404600811908, "grad_norm": 0.26535895466804504, "learning_rate": 4.937177326144564e-06, "loss": 0.6516, "step": 727 }, { "epoch": 0.2462787550744249, "grad_norm": 0.2628813683986664, "learning_rate": 4.936977621924875e-06, "loss": 0.6572, "step": 728 }, { "epoch": 0.246617050067659, "grad_norm": 0.27562880516052246, "learning_rate": 4.936777604845071e-06, "loss": 0.6686, "step": 729 }, { "epoch": 0.2469553450608931, "grad_norm": 0.26140421628952026, "learning_rate": 4.936577274930831e-06, "loss": 0.6538, "step": 730 }, { "epoch": 0.2472936400541272, "grad_norm": 0.26812177896499634, "learning_rate": 4.9363766322078735e-06, "loss": 0.6526, "step": 731 }, { "epoch": 0.2476319350473613, "grad_norm": 0.2756509780883789, "learning_rate": 4.936175676701956e-06, "loss": 0.648, "step": 732 }, { "epoch": 0.2479702300405954, "grad_norm": 0.27314281463623047, "learning_rate": 4.935974408438879e-06, "loss": 0.7115, "step": 733 }, { "epoch": 0.2483085250338295, "grad_norm": 0.2648617625236511, "learning_rate": 4.9357728274444806e-06, "loss": 0.6563, "step": 734 }, { "epoch": 0.2486468200270636, "grad_norm": 0.27088579535484314, "learning_rate": 4.93557093374464e-06, "loss": 0.6569, "step": 735 }, { "epoch": 0.2489851150202977, "grad_norm": 0.2711031436920166, "learning_rate": 4.935368727365276e-06, "loss": 0.6514, "step": 736 }, { "epoch": 0.2493234100135318, "grad_norm": 0.2798830270767212, "learning_rate": 4.935166208332348e-06, "loss": 0.6791, "step": 737 }, { "epoch": 0.2496617050067659, "grad_norm": 0.26322317123413086, "learning_rate": 4.934963376671857e-06, "loss": 0.6176, "step": 738 }, { "epoch": 0.25, "grad_norm": 0.2789643108844757, "learning_rate": 4.934760232409839e-06, "loss": 0.6578, "step": 739 }, { "epoch": 0.2503382949932341, "grad_norm": 0.26852181553840637, "learning_rate": 4.934556775572377e-06, "loss": 0.6377, "step": 740 }, { "epoch": 0.2506765899864682, "grad_norm": 0.2695555090904236, "learning_rate": 4.9343530061855906e-06, "loss": 0.6618, "step": 741 }, { "epoch": 0.2510148849797023, "grad_norm": 0.272977739572525, "learning_rate": 4.934148924275638e-06, "loss": 0.6486, "step": 742 }, { "epoch": 0.2513531799729364, "grad_norm": 0.2690495550632477, "learning_rate": 4.933944529868721e-06, "loss": 0.6457, "step": 743 }, { "epoch": 0.2516914749661705, "grad_norm": 0.281653493642807, "learning_rate": 4.933739822991079e-06, "loss": 0.6658, "step": 744 }, { "epoch": 0.2520297699594046, "grad_norm": 0.27239227294921875, "learning_rate": 4.933534803668992e-06, "loss": 0.64, "step": 745 }, { "epoch": 0.2523680649526387, "grad_norm": 0.2809191346168518, "learning_rate": 4.9333294719287815e-06, "loss": 0.6798, "step": 746 }, { "epoch": 0.2527063599458728, "grad_norm": 0.2619982659816742, "learning_rate": 4.933123827796808e-06, "loss": 0.6456, "step": 747 }, { "epoch": 0.2530446549391069, "grad_norm": 0.26253849267959595, "learning_rate": 4.932917871299471e-06, "loss": 0.6387, "step": 748 }, { "epoch": 0.253382949932341, "grad_norm": 0.27713361382484436, "learning_rate": 4.932711602463212e-06, "loss": 0.6593, "step": 749 }, { "epoch": 0.25372124492557513, "grad_norm": 0.285663366317749, "learning_rate": 4.932505021314513e-06, "loss": 0.6451, "step": 750 }, { "epoch": 0.2540595399188092, "grad_norm": 0.27399569749832153, "learning_rate": 4.932298127879893e-06, "loss": 0.6649, "step": 751 }, { "epoch": 0.2543978349120433, "grad_norm": 0.2679443955421448, "learning_rate": 4.932090922185914e-06, "loss": 0.6687, "step": 752 }, { "epoch": 0.2547361299052774, "grad_norm": 0.26548048853874207, "learning_rate": 4.9318834042591765e-06, "loss": 0.6627, "step": 753 }, { "epoch": 0.2550744248985115, "grad_norm": 0.274859219789505, "learning_rate": 4.9316755741263225e-06, "loss": 0.6615, "step": 754 }, { "epoch": 0.2554127198917456, "grad_norm": 0.2755845785140991, "learning_rate": 4.931467431814034e-06, "loss": 0.6382, "step": 755 }, { "epoch": 0.2557510148849797, "grad_norm": 0.2709038555622101, "learning_rate": 4.93125897734903e-06, "loss": 0.6393, "step": 756 }, { "epoch": 0.2560893098782138, "grad_norm": 0.26591774821281433, "learning_rate": 4.931050210758075e-06, "loss": 0.6749, "step": 757 }, { "epoch": 0.2564276048714479, "grad_norm": 0.2669565677642822, "learning_rate": 4.9308411320679685e-06, "loss": 0.6301, "step": 758 }, { "epoch": 0.256765899864682, "grad_norm": 0.26058095693588257, "learning_rate": 4.930631741305552e-06, "loss": 0.661, "step": 759 }, { "epoch": 0.2571041948579161, "grad_norm": 0.27673038840293884, "learning_rate": 4.930422038497708e-06, "loss": 0.6619, "step": 760 }, { "epoch": 0.2574424898511502, "grad_norm": 0.27486756443977356, "learning_rate": 4.930212023671358e-06, "loss": 0.652, "step": 761 }, { "epoch": 0.2577807848443843, "grad_norm": 0.27896642684936523, "learning_rate": 4.930001696853463e-06, "loss": 0.6478, "step": 762 }, { "epoch": 0.2581190798376184, "grad_norm": 0.26254603266716003, "learning_rate": 4.929791058071027e-06, "loss": 0.663, "step": 763 }, { "epoch": 0.2584573748308525, "grad_norm": 0.2756212055683136, "learning_rate": 4.92958010735109e-06, "loss": 0.6576, "step": 764 }, { "epoch": 0.2587956698240866, "grad_norm": 0.2646501362323761, "learning_rate": 4.929368844720734e-06, "loss": 0.677, "step": 765 }, { "epoch": 0.2591339648173207, "grad_norm": 0.2635130286216736, "learning_rate": 4.929157270207082e-06, "loss": 0.6584, "step": 766 }, { "epoch": 0.25947225981055483, "grad_norm": 0.26653656363487244, "learning_rate": 4.928945383837295e-06, "loss": 0.6664, "step": 767 }, { "epoch": 0.2598105548037889, "grad_norm": 0.2647206485271454, "learning_rate": 4.928733185638576e-06, "loss": 0.6653, "step": 768 }, { "epoch": 0.260148849797023, "grad_norm": 0.26432615518569946, "learning_rate": 4.928520675638166e-06, "loss": 0.6444, "step": 769 }, { "epoch": 0.2604871447902571, "grad_norm": 0.26040834188461304, "learning_rate": 4.928307853863348e-06, "loss": 0.6244, "step": 770 }, { "epoch": 0.2608254397834912, "grad_norm": 0.2694340944290161, "learning_rate": 4.928094720341443e-06, "loss": 0.6523, "step": 771 }, { "epoch": 0.26116373477672533, "grad_norm": 0.27743464708328247, "learning_rate": 4.927881275099815e-06, "loss": 0.6625, "step": 772 }, { "epoch": 0.2615020297699594, "grad_norm": 0.2654753625392914, "learning_rate": 4.927667518165865e-06, "loss": 0.6122, "step": 773 }, { "epoch": 0.2618403247631935, "grad_norm": 0.25984954833984375, "learning_rate": 4.927453449567034e-06, "loss": 0.659, "step": 774 }, { "epoch": 0.2621786197564276, "grad_norm": 0.2867293357849121, "learning_rate": 4.927239069330807e-06, "loss": 0.6761, "step": 775 }, { "epoch": 0.2625169147496617, "grad_norm": 0.2768084704875946, "learning_rate": 4.927024377484705e-06, "loss": 0.6616, "step": 776 }, { "epoch": 0.2628552097428958, "grad_norm": 0.27421021461486816, "learning_rate": 4.926809374056289e-06, "loss": 0.6375, "step": 777 }, { "epoch": 0.2631935047361299, "grad_norm": 0.2622529864311218, "learning_rate": 4.926594059073163e-06, "loss": 0.6457, "step": 778 }, { "epoch": 0.263531799729364, "grad_norm": 0.27969247102737427, "learning_rate": 4.926378432562967e-06, "loss": 0.6683, "step": 779 }, { "epoch": 0.2638700947225981, "grad_norm": 0.2766921818256378, "learning_rate": 4.926162494553386e-06, "loss": 0.664, "step": 780 }, { "epoch": 0.2642083897158322, "grad_norm": 0.26445505023002625, "learning_rate": 4.925946245072141e-06, "loss": 0.6448, "step": 781 }, { "epoch": 0.2645466847090663, "grad_norm": 0.2706442177295685, "learning_rate": 4.925729684146993e-06, "loss": 0.6505, "step": 782 }, { "epoch": 0.2648849797023004, "grad_norm": 0.27379173040390015, "learning_rate": 4.925512811805746e-06, "loss": 0.6697, "step": 783 }, { "epoch": 0.2652232746955345, "grad_norm": 0.26962971687316895, "learning_rate": 4.925295628076241e-06, "loss": 0.6092, "step": 784 }, { "epoch": 0.2655615696887686, "grad_norm": 0.2711654007434845, "learning_rate": 4.925078132986361e-06, "loss": 0.6708, "step": 785 }, { "epoch": 0.2658998646820027, "grad_norm": 0.26972121000289917, "learning_rate": 4.924860326564027e-06, "loss": 0.6659, "step": 786 }, { "epoch": 0.2662381596752368, "grad_norm": 0.27795904874801636, "learning_rate": 4.9246422088372015e-06, "loss": 0.6511, "step": 787 }, { "epoch": 0.2665764546684709, "grad_norm": 0.26935580372810364, "learning_rate": 4.924423779833887e-06, "loss": 0.6264, "step": 788 }, { "epoch": 0.26691474966170503, "grad_norm": 0.26369181275367737, "learning_rate": 4.924205039582125e-06, "loss": 0.6679, "step": 789 }, { "epoch": 0.2672530446549391, "grad_norm": 0.2865050733089447, "learning_rate": 4.9239859881099984e-06, "loss": 0.6591, "step": 790 }, { "epoch": 0.2675913396481732, "grad_norm": 0.3010486960411072, "learning_rate": 4.923766625445627e-06, "loss": 0.6298, "step": 791 }, { "epoch": 0.2679296346414073, "grad_norm": 0.26568490266799927, "learning_rate": 4.9235469516171754e-06, "loss": 0.6755, "step": 792 }, { "epoch": 0.2682679296346414, "grad_norm": 0.27336952090263367, "learning_rate": 4.923326966652844e-06, "loss": 0.6797, "step": 793 }, { "epoch": 0.26860622462787553, "grad_norm": 0.29760247468948364, "learning_rate": 4.923106670580874e-06, "loss": 0.6558, "step": 794 }, { "epoch": 0.2689445196211096, "grad_norm": 0.2981927692890167, "learning_rate": 4.922886063429547e-06, "loss": 0.6575, "step": 795 }, { "epoch": 0.2692828146143437, "grad_norm": 0.2672402858734131, "learning_rate": 4.9226651452271865e-06, "loss": 0.6547, "step": 796 }, { "epoch": 0.2696211096075778, "grad_norm": 0.2725027799606323, "learning_rate": 4.922443916002153e-06, "loss": 0.6709, "step": 797 }, { "epoch": 0.2699594046008119, "grad_norm": 0.2933136224746704, "learning_rate": 4.922222375782847e-06, "loss": 0.6582, "step": 798 }, { "epoch": 0.270297699594046, "grad_norm": 0.26434826850891113, "learning_rate": 4.922000524597712e-06, "loss": 0.6615, "step": 799 }, { "epoch": 0.2706359945872801, "grad_norm": 0.24848580360412598, "learning_rate": 4.921778362475227e-06, "loss": 0.6071, "step": 800 }, { "epoch": 0.2709742895805142, "grad_norm": 0.26173582673072815, "learning_rate": 4.921555889443914e-06, "loss": 0.6705, "step": 801 }, { "epoch": 0.2713125845737483, "grad_norm": 0.29065439105033875, "learning_rate": 4.9213331055323356e-06, "loss": 0.6351, "step": 802 }, { "epoch": 0.2716508795669824, "grad_norm": 0.287558913230896, "learning_rate": 4.921110010769091e-06, "loss": 0.6432, "step": 803 }, { "epoch": 0.2719891745602165, "grad_norm": 0.2773423492908478, "learning_rate": 4.920886605182823e-06, "loss": 0.6495, "step": 804 }, { "epoch": 0.2723274695534506, "grad_norm": 0.2841305434703827, "learning_rate": 4.9206628888022116e-06, "loss": 0.6729, "step": 805 }, { "epoch": 0.27266576454668473, "grad_norm": 0.28614985942840576, "learning_rate": 4.920438861655977e-06, "loss": 0.6524, "step": 806 }, { "epoch": 0.2730040595399188, "grad_norm": 0.2771781086921692, "learning_rate": 4.9202145237728805e-06, "loss": 0.6564, "step": 807 }, { "epoch": 0.2733423545331529, "grad_norm": 0.2679559886455536, "learning_rate": 4.919989875181722e-06, "loss": 0.6593, "step": 808 }, { "epoch": 0.273680649526387, "grad_norm": 0.27360039949417114, "learning_rate": 4.919764915911344e-06, "loss": 0.6367, "step": 809 }, { "epoch": 0.2740189445196211, "grad_norm": 0.295267254114151, "learning_rate": 4.919539645990624e-06, "loss": 0.6485, "step": 810 }, { "epoch": 0.27435723951285523, "grad_norm": 0.2728036046028137, "learning_rate": 4.919314065448484e-06, "loss": 0.6688, "step": 811 }, { "epoch": 0.2746955345060893, "grad_norm": 0.2643529772758484, "learning_rate": 4.919088174313884e-06, "loss": 0.6599, "step": 812 }, { "epoch": 0.2750338294993234, "grad_norm": 0.2759469151496887, "learning_rate": 4.918861972615824e-06, "loss": 0.6354, "step": 813 }, { "epoch": 0.2753721244925575, "grad_norm": 0.29973581433296204, "learning_rate": 4.918635460383343e-06, "loss": 0.6638, "step": 814 }, { "epoch": 0.2757104194857916, "grad_norm": 0.2820885479450226, "learning_rate": 4.918408637645522e-06, "loss": 0.6475, "step": 815 }, { "epoch": 0.27604871447902574, "grad_norm": 0.2663920223712921, "learning_rate": 4.91818150443148e-06, "loss": 0.648, "step": 816 }, { "epoch": 0.2763870094722598, "grad_norm": 0.29732027649879456, "learning_rate": 4.917954060770376e-06, "loss": 0.6461, "step": 817 }, { "epoch": 0.2767253044654939, "grad_norm": 0.27227893471717834, "learning_rate": 4.917726306691409e-06, "loss": 0.6449, "step": 818 }, { "epoch": 0.277063599458728, "grad_norm": 0.27002593874931335, "learning_rate": 4.91749824222382e-06, "loss": 0.6624, "step": 819 }, { "epoch": 0.2774018944519621, "grad_norm": 0.2894153594970703, "learning_rate": 4.917269867396887e-06, "loss": 0.6217, "step": 820 }, { "epoch": 0.2777401894451962, "grad_norm": 0.263049453496933, "learning_rate": 4.917041182239927e-06, "loss": 0.6274, "step": 821 }, { "epoch": 0.2780784844384303, "grad_norm": 0.2681529223918915, "learning_rate": 4.916812186782303e-06, "loss": 0.6323, "step": 822 }, { "epoch": 0.27841677943166443, "grad_norm": 0.27024561166763306, "learning_rate": 4.9165828810534085e-06, "loss": 0.6493, "step": 823 }, { "epoch": 0.2787550744248985, "grad_norm": 0.28334054350852966, "learning_rate": 4.916353265082686e-06, "loss": 0.6519, "step": 824 }, { "epoch": 0.2790933694181326, "grad_norm": 0.2738794982433319, "learning_rate": 4.916123338899611e-06, "loss": 0.6228, "step": 825 }, { "epoch": 0.2794316644113667, "grad_norm": 0.2756909430027008, "learning_rate": 4.915893102533702e-06, "loss": 0.6367, "step": 826 }, { "epoch": 0.2797699594046008, "grad_norm": 0.272421270608902, "learning_rate": 4.9156625560145185e-06, "loss": 0.6688, "step": 827 }, { "epoch": 0.28010825439783493, "grad_norm": 0.2805042266845703, "learning_rate": 4.915431699371656e-06, "loss": 0.6588, "step": 828 }, { "epoch": 0.280446549391069, "grad_norm": 0.27977386116981506, "learning_rate": 4.915200532634754e-06, "loss": 0.6351, "step": 829 }, { "epoch": 0.2807848443843031, "grad_norm": 0.26902759075164795, "learning_rate": 4.914969055833488e-06, "loss": 0.6649, "step": 830 }, { "epoch": 0.2811231393775372, "grad_norm": 0.26842644810676575, "learning_rate": 4.914737268997576e-06, "loss": 0.6335, "step": 831 }, { "epoch": 0.2814614343707713, "grad_norm": 0.2780354619026184, "learning_rate": 4.914505172156774e-06, "loss": 0.6641, "step": 832 }, { "epoch": 0.28179972936400544, "grad_norm": 0.276547372341156, "learning_rate": 4.91427276534088e-06, "loss": 0.6778, "step": 833 }, { "epoch": 0.2821380243572395, "grad_norm": 0.26204949617385864, "learning_rate": 4.91404004857973e-06, "loss": 0.6253, "step": 834 }, { "epoch": 0.2824763193504736, "grad_norm": 0.2651567757129669, "learning_rate": 4.913807021903199e-06, "loss": 0.6357, "step": 835 }, { "epoch": 0.2828146143437077, "grad_norm": 0.26619213819503784, "learning_rate": 4.913573685341205e-06, "loss": 0.6679, "step": 836 }, { "epoch": 0.2831529093369418, "grad_norm": 0.26389190554618835, "learning_rate": 4.913340038923703e-06, "loss": 0.6428, "step": 837 }, { "epoch": 0.28349120433017594, "grad_norm": 0.264245867729187, "learning_rate": 4.913106082680688e-06, "loss": 0.6489, "step": 838 }, { "epoch": 0.28382949932341, "grad_norm": 0.27729716897010803, "learning_rate": 4.912871816642196e-06, "loss": 0.6338, "step": 839 }, { "epoch": 0.28416779431664413, "grad_norm": 0.2706574499607086, "learning_rate": 4.912637240838302e-06, "loss": 0.6252, "step": 840 }, { "epoch": 0.2845060893098782, "grad_norm": 0.2795005142688751, "learning_rate": 4.912402355299122e-06, "loss": 0.6473, "step": 841 }, { "epoch": 0.2848443843031123, "grad_norm": 0.2789385914802551, "learning_rate": 4.9121671600548094e-06, "loss": 0.6632, "step": 842 }, { "epoch": 0.2851826792963464, "grad_norm": 0.27970561385154724, "learning_rate": 4.911931655135559e-06, "loss": 0.6352, "step": 843 }, { "epoch": 0.2855209742895805, "grad_norm": 0.2838321030139923, "learning_rate": 4.911695840571605e-06, "loss": 0.6298, "step": 844 }, { "epoch": 0.28585926928281463, "grad_norm": 0.2680176794528961, "learning_rate": 4.911459716393221e-06, "loss": 0.6386, "step": 845 }, { "epoch": 0.2861975642760487, "grad_norm": 0.2741706669330597, "learning_rate": 4.911223282630722e-06, "loss": 0.642, "step": 846 }, { "epoch": 0.2865358592692828, "grad_norm": 0.27544164657592773, "learning_rate": 4.91098653931446e-06, "loss": 0.6226, "step": 847 }, { "epoch": 0.2868741542625169, "grad_norm": 0.2681983709335327, "learning_rate": 4.9107494864748285e-06, "loss": 0.6822, "step": 848 }, { "epoch": 0.287212449255751, "grad_norm": 0.27470800280570984, "learning_rate": 4.91051212414226e-06, "loss": 0.6666, "step": 849 }, { "epoch": 0.28755074424898514, "grad_norm": 0.27337849140167236, "learning_rate": 4.91027445234723e-06, "loss": 0.6535, "step": 850 }, { "epoch": 0.2878890392422192, "grad_norm": 0.26465293765068054, "learning_rate": 4.910036471120247e-06, "loss": 0.6492, "step": 851 }, { "epoch": 0.2882273342354533, "grad_norm": 0.2718878984451294, "learning_rate": 4.909798180491865e-06, "loss": 0.656, "step": 852 }, { "epoch": 0.2885656292286874, "grad_norm": 0.27970167994499207, "learning_rate": 4.909559580492677e-06, "loss": 0.6487, "step": 853 }, { "epoch": 0.2889039242219215, "grad_norm": 0.2778087854385376, "learning_rate": 4.909320671153311e-06, "loss": 0.6309, "step": 854 }, { "epoch": 0.28924221921515564, "grad_norm": 0.29702576994895935, "learning_rate": 4.909081452504443e-06, "loss": 0.6452, "step": 855 }, { "epoch": 0.2895805142083897, "grad_norm": 0.28228092193603516, "learning_rate": 4.908841924576781e-06, "loss": 0.6476, "step": 856 }, { "epoch": 0.28991880920162383, "grad_norm": 0.279044508934021, "learning_rate": 4.908602087401075e-06, "loss": 0.6609, "step": 857 }, { "epoch": 0.2902571041948579, "grad_norm": 0.2703443467617035, "learning_rate": 4.9083619410081165e-06, "loss": 0.6392, "step": 858 }, { "epoch": 0.290595399188092, "grad_norm": 0.273985356092453, "learning_rate": 4.908121485428737e-06, "loss": 0.6312, "step": 859 }, { "epoch": 0.29093369418132614, "grad_norm": 0.271116703748703, "learning_rate": 4.907880720693804e-06, "loss": 0.6329, "step": 860 }, { "epoch": 0.2912719891745602, "grad_norm": 0.2669009864330292, "learning_rate": 4.907639646834229e-06, "loss": 0.6406, "step": 861 }, { "epoch": 0.29161028416779433, "grad_norm": 0.262957900762558, "learning_rate": 4.907398263880959e-06, "loss": 0.6125, "step": 862 }, { "epoch": 0.2919485791610284, "grad_norm": 0.2755131423473358, "learning_rate": 4.907156571864984e-06, "loss": 0.654, "step": 863 }, { "epoch": 0.2922868741542625, "grad_norm": 0.28943315148353577, "learning_rate": 4.906914570817333e-06, "loss": 0.6432, "step": 864 }, { "epoch": 0.29262516914749664, "grad_norm": 0.2823984622955322, "learning_rate": 4.9066722607690734e-06, "loss": 0.6548, "step": 865 }, { "epoch": 0.2929634641407307, "grad_norm": 0.2662210464477539, "learning_rate": 4.906429641751313e-06, "loss": 0.6448, "step": 866 }, { "epoch": 0.29330175913396483, "grad_norm": 0.27130478620529175, "learning_rate": 4.9061867137952006e-06, "loss": 0.6261, "step": 867 }, { "epoch": 0.2936400541271989, "grad_norm": 0.2734956741333008, "learning_rate": 4.9059434769319205e-06, "loss": 0.6509, "step": 868 }, { "epoch": 0.293978349120433, "grad_norm": 0.2768975496292114, "learning_rate": 4.905699931192703e-06, "loss": 0.6399, "step": 869 }, { "epoch": 0.2943166441136671, "grad_norm": 0.2712252140045166, "learning_rate": 4.905456076608812e-06, "loss": 0.6462, "step": 870 }, { "epoch": 0.2946549391069012, "grad_norm": 0.2708142101764679, "learning_rate": 4.905211913211555e-06, "loss": 0.6442, "step": 871 }, { "epoch": 0.29499323410013534, "grad_norm": 0.2733133137226105, "learning_rate": 4.9049674410322775e-06, "loss": 0.6661, "step": 872 }, { "epoch": 0.2953315290933694, "grad_norm": 0.28249210119247437, "learning_rate": 4.904722660102366e-06, "loss": 0.6331, "step": 873 }, { "epoch": 0.2956698240866035, "grad_norm": 0.28206610679626465, "learning_rate": 4.904477570453243e-06, "loss": 0.6507, "step": 874 }, { "epoch": 0.2960081190798376, "grad_norm": 0.26992663741111755, "learning_rate": 4.904232172116375e-06, "loss": 0.6725, "step": 875 }, { "epoch": 0.2963464140730717, "grad_norm": 0.27740415930747986, "learning_rate": 4.903986465123266e-06, "loss": 0.6486, "step": 876 }, { "epoch": 0.29668470906630584, "grad_norm": 0.2828187346458435, "learning_rate": 4.903740449505461e-06, "loss": 0.6728, "step": 877 }, { "epoch": 0.2970230040595399, "grad_norm": 0.27313926815986633, "learning_rate": 4.903494125294541e-06, "loss": 0.6422, "step": 878 }, { "epoch": 0.29736129905277403, "grad_norm": 0.2670018970966339, "learning_rate": 4.903247492522131e-06, "loss": 0.6512, "step": 879 }, { "epoch": 0.2976995940460081, "grad_norm": 0.2771807312965393, "learning_rate": 4.903000551219894e-06, "loss": 0.6448, "step": 880 }, { "epoch": 0.2980378890392422, "grad_norm": 0.2740825414657593, "learning_rate": 4.902753301419532e-06, "loss": 0.6555, "step": 881 }, { "epoch": 0.29837618403247634, "grad_norm": 0.26953041553497314, "learning_rate": 4.902505743152786e-06, "loss": 0.616, "step": 882 }, { "epoch": 0.2987144790257104, "grad_norm": 0.2877902686595917, "learning_rate": 4.90225787645144e-06, "loss": 0.6762, "step": 883 }, { "epoch": 0.29905277401894453, "grad_norm": 0.275796115398407, "learning_rate": 4.902009701347313e-06, "loss": 0.6637, "step": 884 }, { "epoch": 0.2993910690121786, "grad_norm": 0.28130945563316345, "learning_rate": 4.9017612178722665e-06, "loss": 0.652, "step": 885 }, { "epoch": 0.2997293640054127, "grad_norm": 0.29672494530677795, "learning_rate": 4.901512426058202e-06, "loss": 0.659, "step": 886 }, { "epoch": 0.30006765899864685, "grad_norm": 0.27471110224723816, "learning_rate": 4.901263325937057e-06, "loss": 0.6305, "step": 887 }, { "epoch": 0.3004059539918809, "grad_norm": 0.26989707350730896, "learning_rate": 4.901013917540814e-06, "loss": 0.6655, "step": 888 }, { "epoch": 0.30074424898511504, "grad_norm": 0.2895686626434326, "learning_rate": 4.90076420090149e-06, "loss": 0.6305, "step": 889 }, { "epoch": 0.3010825439783491, "grad_norm": 0.2914600074291229, "learning_rate": 4.900514176051144e-06, "loss": 0.6623, "step": 890 }, { "epoch": 0.3014208389715832, "grad_norm": 0.27903082966804504, "learning_rate": 4.900263843021876e-06, "loss": 0.6194, "step": 891 }, { "epoch": 0.3017591339648173, "grad_norm": 0.2799786925315857, "learning_rate": 4.900013201845821e-06, "loss": 0.6553, "step": 892 }, { "epoch": 0.3020974289580514, "grad_norm": 0.27043768763542175, "learning_rate": 4.89976225255516e-06, "loss": 0.6641, "step": 893 }, { "epoch": 0.30243572395128554, "grad_norm": 0.27861344814300537, "learning_rate": 4.899510995182107e-06, "loss": 0.6405, "step": 894 }, { "epoch": 0.3027740189445196, "grad_norm": 0.28648340702056885, "learning_rate": 4.8992594297589194e-06, "loss": 0.6421, "step": 895 }, { "epoch": 0.30311231393775373, "grad_norm": 0.27372077107429504, "learning_rate": 4.899007556317893e-06, "loss": 0.6481, "step": 896 }, { "epoch": 0.3034506089309878, "grad_norm": 0.2841525673866272, "learning_rate": 4.898755374891364e-06, "loss": 0.6849, "step": 897 }, { "epoch": 0.3037889039242219, "grad_norm": 0.2740134298801422, "learning_rate": 4.898502885511707e-06, "loss": 0.6327, "step": 898 }, { "epoch": 0.30412719891745604, "grad_norm": 0.264041930437088, "learning_rate": 4.898250088211337e-06, "loss": 0.6478, "step": 899 }, { "epoch": 0.3044654939106901, "grad_norm": 0.27168142795562744, "learning_rate": 4.897996983022709e-06, "loss": 0.6394, "step": 900 }, { "epoch": 0.30480378890392423, "grad_norm": 0.2795570194721222, "learning_rate": 4.897743569978315e-06, "loss": 0.659, "step": 901 }, { "epoch": 0.3051420838971583, "grad_norm": 0.2912237346172333, "learning_rate": 4.8974898491106895e-06, "loss": 0.681, "step": 902 }, { "epoch": 0.3054803788903924, "grad_norm": 0.27849677205085754, "learning_rate": 4.897235820452405e-06, "loss": 0.6362, "step": 903 }, { "epoch": 0.30581867388362655, "grad_norm": 0.274339497089386, "learning_rate": 4.896981484036074e-06, "loss": 0.6646, "step": 904 }, { "epoch": 0.3061569688768606, "grad_norm": 0.29278630018234253, "learning_rate": 4.896726839894348e-06, "loss": 0.6681, "step": 905 }, { "epoch": 0.30649526387009474, "grad_norm": 0.27960678935050964, "learning_rate": 4.896471888059918e-06, "loss": 0.6597, "step": 906 }, { "epoch": 0.3068335588633288, "grad_norm": 0.2835066318511963, "learning_rate": 4.896216628565515e-06, "loss": 0.639, "step": 907 }, { "epoch": 0.3071718538565629, "grad_norm": 0.27574077248573303, "learning_rate": 4.895961061443912e-06, "loss": 0.6654, "step": 908 }, { "epoch": 0.30751014884979705, "grad_norm": 0.29681262373924255, "learning_rate": 4.895705186727913e-06, "loss": 0.6216, "step": 909 }, { "epoch": 0.3078484438430311, "grad_norm": 0.2979026436805725, "learning_rate": 4.8954490044503726e-06, "loss": 0.6562, "step": 910 }, { "epoch": 0.30818673883626524, "grad_norm": 0.28780779242515564, "learning_rate": 4.895192514644176e-06, "loss": 0.636, "step": 911 }, { "epoch": 0.3085250338294993, "grad_norm": 0.2931484580039978, "learning_rate": 4.894935717342255e-06, "loss": 0.6479, "step": 912 }, { "epoch": 0.30886332882273343, "grad_norm": 0.29892581701278687, "learning_rate": 4.894678612577574e-06, "loss": 0.654, "step": 913 }, { "epoch": 0.3092016238159675, "grad_norm": 0.29458022117614746, "learning_rate": 4.894421200383142e-06, "loss": 0.6537, "step": 914 }, { "epoch": 0.3095399188092016, "grad_norm": 0.2971714735031128, "learning_rate": 4.894163480792006e-06, "loss": 0.6514, "step": 915 }, { "epoch": 0.30987821380243574, "grad_norm": 0.28501057624816895, "learning_rate": 4.89390545383725e-06, "loss": 0.6881, "step": 916 }, { "epoch": 0.3102165087956698, "grad_norm": 0.2875775098800659, "learning_rate": 4.893647119552002e-06, "loss": 0.6531, "step": 917 }, { "epoch": 0.31055480378890393, "grad_norm": 0.28826165199279785, "learning_rate": 4.893388477969425e-06, "loss": 0.6387, "step": 918 }, { "epoch": 0.310893098782138, "grad_norm": 0.2894171476364136, "learning_rate": 4.893129529122725e-06, "loss": 0.643, "step": 919 }, { "epoch": 0.3112313937753721, "grad_norm": 0.2911660969257355, "learning_rate": 4.892870273045146e-06, "loss": 0.6703, "step": 920 }, { "epoch": 0.31156968876860625, "grad_norm": 0.2881146967411041, "learning_rate": 4.892610709769971e-06, "loss": 0.6624, "step": 921 }, { "epoch": 0.3119079837618403, "grad_norm": 0.288736492395401, "learning_rate": 4.8923508393305224e-06, "loss": 0.6588, "step": 922 }, { "epoch": 0.31224627875507444, "grad_norm": 0.2821895480155945, "learning_rate": 4.892090661760163e-06, "loss": 0.6462, "step": 923 }, { "epoch": 0.3125845737483085, "grad_norm": 0.2836967706680298, "learning_rate": 4.891830177092294e-06, "loss": 0.6612, "step": 924 }, { "epoch": 0.3129228687415426, "grad_norm": 0.2861119210720062, "learning_rate": 4.8915693853603576e-06, "loss": 0.65, "step": 925 }, { "epoch": 0.31326116373477675, "grad_norm": 0.2934957444667816, "learning_rate": 4.891308286597832e-06, "loss": 0.662, "step": 926 }, { "epoch": 0.3135994587280108, "grad_norm": 0.280984103679657, "learning_rate": 4.891046880838241e-06, "loss": 0.6539, "step": 927 }, { "epoch": 0.31393775372124494, "grad_norm": 0.27881526947021484, "learning_rate": 4.89078516811514e-06, "loss": 0.6465, "step": 928 }, { "epoch": 0.314276048714479, "grad_norm": 0.2846492826938629, "learning_rate": 4.89052314846213e-06, "loss": 0.6333, "step": 929 }, { "epoch": 0.31461434370771313, "grad_norm": 0.2940545380115509, "learning_rate": 4.890260821912848e-06, "loss": 0.6067, "step": 930 }, { "epoch": 0.31495263870094725, "grad_norm": 0.28840309381484985, "learning_rate": 4.889998188500973e-06, "loss": 0.6233, "step": 931 }, { "epoch": 0.3152909336941813, "grad_norm": 0.26783108711242676, "learning_rate": 4.889735248260221e-06, "loss": 0.6431, "step": 932 }, { "epoch": 0.31562922868741544, "grad_norm": 0.28233930468559265, "learning_rate": 4.889472001224348e-06, "loss": 0.6512, "step": 933 }, { "epoch": 0.3159675236806495, "grad_norm": 0.2793835699558258, "learning_rate": 4.8892084474271515e-06, "loss": 0.6427, "step": 934 }, { "epoch": 0.31630581867388363, "grad_norm": 0.29794105887413025, "learning_rate": 4.888944586902464e-06, "loss": 0.6256, "step": 935 }, { "epoch": 0.3166441136671177, "grad_norm": 0.2742573618888855, "learning_rate": 4.8886804196841634e-06, "loss": 0.6423, "step": 936 }, { "epoch": 0.3169824086603518, "grad_norm": 0.29450520873069763, "learning_rate": 4.8884159458061604e-06, "loss": 0.6399, "step": 937 }, { "epoch": 0.31732070365358594, "grad_norm": 0.28303098678588867, "learning_rate": 4.888151165302409e-06, "loss": 0.6366, "step": 938 }, { "epoch": 0.31765899864682, "grad_norm": 0.28655314445495605, "learning_rate": 4.887886078206904e-06, "loss": 0.655, "step": 939 }, { "epoch": 0.31799729364005414, "grad_norm": 0.28076592087745667, "learning_rate": 4.887620684553675e-06, "loss": 0.6391, "step": 940 }, { "epoch": 0.3183355886332882, "grad_norm": 0.2662147283554077, "learning_rate": 4.8873549843767935e-06, "loss": 0.6394, "step": 941 }, { "epoch": 0.3186738836265223, "grad_norm": 0.28641197085380554, "learning_rate": 4.887088977710371e-06, "loss": 0.6288, "step": 942 }, { "epoch": 0.31901217861975645, "grad_norm": 0.2831316292285919, "learning_rate": 4.886822664588558e-06, "loss": 0.6378, "step": 943 }, { "epoch": 0.3193504736129905, "grad_norm": 0.27543431520462036, "learning_rate": 4.886556045045542e-06, "loss": 0.6125, "step": 944 }, { "epoch": 0.31968876860622464, "grad_norm": 0.2942037880420685, "learning_rate": 4.886289119115554e-06, "loss": 0.6516, "step": 945 }, { "epoch": 0.3200270635994587, "grad_norm": 0.28229376673698425, "learning_rate": 4.886021886832861e-06, "loss": 0.6216, "step": 946 }, { "epoch": 0.32036535859269283, "grad_norm": 0.27998530864715576, "learning_rate": 4.885754348231769e-06, "loss": 0.6612, "step": 947 }, { "epoch": 0.32070365358592695, "grad_norm": 0.29919928312301636, "learning_rate": 4.885486503346628e-06, "loss": 0.6532, "step": 948 }, { "epoch": 0.321041948579161, "grad_norm": 0.3005315363407135, "learning_rate": 4.88521835221182e-06, "loss": 0.6491, "step": 949 }, { "epoch": 0.32138024357239514, "grad_norm": 0.30181828141212463, "learning_rate": 4.884949894861774e-06, "loss": 0.6376, "step": 950 }, { "epoch": 0.3217185385656292, "grad_norm": 0.2998781204223633, "learning_rate": 4.884681131330953e-06, "loss": 0.676, "step": 951 }, { "epoch": 0.32205683355886333, "grad_norm": 0.27801403403282166, "learning_rate": 4.88441206165386e-06, "loss": 0.6399, "step": 952 }, { "epoch": 0.32239512855209745, "grad_norm": 0.2941897511482239, "learning_rate": 4.88414268586504e-06, "loss": 0.6376, "step": 953 }, { "epoch": 0.3227334235453315, "grad_norm": 0.2913419008255005, "learning_rate": 4.883873003999075e-06, "loss": 0.6379, "step": 954 }, { "epoch": 0.32307171853856564, "grad_norm": 0.2757265865802765, "learning_rate": 4.883603016090586e-06, "loss": 0.6473, "step": 955 }, { "epoch": 0.3234100135317997, "grad_norm": 0.28613418340682983, "learning_rate": 4.883332722174236e-06, "loss": 0.6618, "step": 956 }, { "epoch": 0.32374830852503383, "grad_norm": 0.2763930857181549, "learning_rate": 4.883062122284723e-06, "loss": 0.6264, "step": 957 }, { "epoch": 0.32408660351826796, "grad_norm": 0.27556413412094116, "learning_rate": 4.882791216456789e-06, "loss": 0.6497, "step": 958 }, { "epoch": 0.324424898511502, "grad_norm": 0.28852880001068115, "learning_rate": 4.882520004725212e-06, "loss": 0.6384, "step": 959 }, { "epoch": 0.32476319350473615, "grad_norm": 0.29310303926467896, "learning_rate": 4.88224848712481e-06, "loss": 0.6453, "step": 960 }, { "epoch": 0.3251014884979702, "grad_norm": 0.2852711081504822, "learning_rate": 4.881976663690441e-06, "loss": 0.662, "step": 961 }, { "epoch": 0.32543978349120434, "grad_norm": 0.30317068099975586, "learning_rate": 4.881704534457001e-06, "loss": 0.6489, "step": 962 }, { "epoch": 0.3257780784844384, "grad_norm": 0.29215002059936523, "learning_rate": 4.881432099459427e-06, "loss": 0.6365, "step": 963 }, { "epoch": 0.3261163734776725, "grad_norm": 0.2727847099304199, "learning_rate": 4.881159358732695e-06, "loss": 0.6483, "step": 964 }, { "epoch": 0.32645466847090665, "grad_norm": 0.28112176060676575, "learning_rate": 4.880886312311817e-06, "loss": 0.6496, "step": 965 }, { "epoch": 0.3267929634641407, "grad_norm": 0.2979572117328644, "learning_rate": 4.880612960231849e-06, "loss": 0.6684, "step": 966 }, { "epoch": 0.32713125845737484, "grad_norm": 0.2899046838283539, "learning_rate": 4.880339302527883e-06, "loss": 0.6463, "step": 967 }, { "epoch": 0.3274695534506089, "grad_norm": 0.28542327880859375, "learning_rate": 4.880065339235053e-06, "loss": 0.6415, "step": 968 }, { "epoch": 0.32780784844384303, "grad_norm": 0.27839645743370056, "learning_rate": 4.8797910703885275e-06, "loss": 0.6607, "step": 969 }, { "epoch": 0.32814614343707715, "grad_norm": 0.3090164065361023, "learning_rate": 4.8795164960235206e-06, "loss": 0.644, "step": 970 }, { "epoch": 0.3284844384303112, "grad_norm": 0.2935158610343933, "learning_rate": 4.879241616175279e-06, "loss": 0.6398, "step": 971 }, { "epoch": 0.32882273342354534, "grad_norm": 0.2757309377193451, "learning_rate": 4.878966430879094e-06, "loss": 0.6296, "step": 972 }, { "epoch": 0.3291610284167794, "grad_norm": 0.3061336874961853, "learning_rate": 4.8786909401702944e-06, "loss": 0.6394, "step": 973 }, { "epoch": 0.32949932341001353, "grad_norm": 0.297267347574234, "learning_rate": 4.8784151440842455e-06, "loss": 0.6492, "step": 974 }, { "epoch": 0.32983761840324766, "grad_norm": 0.29051071405410767, "learning_rate": 4.878139042656356e-06, "loss": 0.6316, "step": 975 }, { "epoch": 0.3301759133964817, "grad_norm": 0.2814705967903137, "learning_rate": 4.877862635922071e-06, "loss": 0.6727, "step": 976 }, { "epoch": 0.33051420838971585, "grad_norm": 0.282053142786026, "learning_rate": 4.877585923916877e-06, "loss": 0.6201, "step": 977 }, { "epoch": 0.3308525033829499, "grad_norm": 0.27852943539619446, "learning_rate": 4.877308906676297e-06, "loss": 0.6466, "step": 978 }, { "epoch": 0.33119079837618404, "grad_norm": 0.2765594720840454, "learning_rate": 4.877031584235895e-06, "loss": 0.6282, "step": 979 }, { "epoch": 0.33152909336941816, "grad_norm": 0.28560516238212585, "learning_rate": 4.876753956631274e-06, "loss": 0.6581, "step": 980 }, { "epoch": 0.3318673883626522, "grad_norm": 0.27953213453292847, "learning_rate": 4.876476023898076e-06, "loss": 0.6319, "step": 981 }, { "epoch": 0.33220568335588635, "grad_norm": 0.29077577590942383, "learning_rate": 4.876197786071981e-06, "loss": 0.6608, "step": 982 }, { "epoch": 0.3325439783491204, "grad_norm": 0.2820235788822174, "learning_rate": 4.87591924318871e-06, "loss": 0.6464, "step": 983 }, { "epoch": 0.33288227334235454, "grad_norm": 0.297158420085907, "learning_rate": 4.875640395284023e-06, "loss": 0.6358, "step": 984 }, { "epoch": 0.3332205683355886, "grad_norm": 0.2829797863960266, "learning_rate": 4.875361242393719e-06, "loss": 0.6352, "step": 985 }, { "epoch": 0.33355886332882273, "grad_norm": 0.284898579120636, "learning_rate": 4.875081784553634e-06, "loss": 0.6305, "step": 986 }, { "epoch": 0.33389715832205685, "grad_norm": 0.2893214523792267, "learning_rate": 4.8748020217996465e-06, "loss": 0.6286, "step": 987 }, { "epoch": 0.3342354533152909, "grad_norm": 0.29545867443084717, "learning_rate": 4.874521954167671e-06, "loss": 0.6374, "step": 988 }, { "epoch": 0.33457374830852504, "grad_norm": 0.2779877185821533, "learning_rate": 4.874241581693664e-06, "loss": 0.6334, "step": 989 }, { "epoch": 0.3349120433017591, "grad_norm": 0.2803078591823578, "learning_rate": 4.87396090441362e-06, "loss": 0.6608, "step": 990 }, { "epoch": 0.33525033829499323, "grad_norm": 0.27986928820610046, "learning_rate": 4.873679922363571e-06, "loss": 0.6628, "step": 991 }, { "epoch": 0.33558863328822736, "grad_norm": 0.30071017146110535, "learning_rate": 4.87339863557959e-06, "loss": 0.6446, "step": 992 }, { "epoch": 0.3359269282814614, "grad_norm": 0.2871248424053192, "learning_rate": 4.87311704409779e-06, "loss": 0.6347, "step": 993 }, { "epoch": 0.33626522327469555, "grad_norm": 0.2827127277851105, "learning_rate": 4.872835147954321e-06, "loss": 0.6415, "step": 994 }, { "epoch": 0.3366035182679296, "grad_norm": 0.28385990858078003, "learning_rate": 4.8725529471853726e-06, "loss": 0.5986, "step": 995 }, { "epoch": 0.33694181326116374, "grad_norm": 0.26931530237197876, "learning_rate": 4.872270441827174e-06, "loss": 0.6573, "step": 996 }, { "epoch": 0.33728010825439786, "grad_norm": 0.29211145639419556, "learning_rate": 4.871987631915994e-06, "loss": 0.6367, "step": 997 }, { "epoch": 0.3376184032476319, "grad_norm": 0.2922752797603607, "learning_rate": 4.871704517488139e-06, "loss": 0.6503, "step": 998 }, { "epoch": 0.33795669824086605, "grad_norm": 0.2984164357185364, "learning_rate": 4.8714210985799556e-06, "loss": 0.6605, "step": 999 }, { "epoch": 0.3382949932341001, "grad_norm": 0.28730762004852295, "learning_rate": 4.871137375227829e-06, "loss": 0.6365, "step": 1000 }, { "epoch": 0.33863328822733424, "grad_norm": 0.3080568015575409, "learning_rate": 4.8708533474681844e-06, "loss": 0.6491, "step": 1001 }, { "epoch": 0.33897158322056836, "grad_norm": 0.3268609642982483, "learning_rate": 4.870569015337485e-06, "loss": 0.6398, "step": 1002 }, { "epoch": 0.33930987821380243, "grad_norm": 0.28499341011047363, "learning_rate": 4.8702843788722335e-06, "loss": 0.6574, "step": 1003 }, { "epoch": 0.33964817320703655, "grad_norm": 0.3010106384754181, "learning_rate": 4.8699994381089715e-06, "loss": 0.6277, "step": 1004 }, { "epoch": 0.3399864682002706, "grad_norm": 0.29605910181999207, "learning_rate": 4.86971419308428e-06, "loss": 0.6473, "step": 1005 }, { "epoch": 0.34032476319350474, "grad_norm": 0.2929319441318512, "learning_rate": 4.869428643834779e-06, "loss": 0.6541, "step": 1006 }, { "epoch": 0.3406630581867388, "grad_norm": 0.2899619936943054, "learning_rate": 4.869142790397126e-06, "loss": 0.6425, "step": 1007 }, { "epoch": 0.34100135317997293, "grad_norm": 0.29885226488113403, "learning_rate": 4.8688566328080215e-06, "loss": 0.6462, "step": 1008 }, { "epoch": 0.34133964817320706, "grad_norm": 0.28795602917671204, "learning_rate": 4.868570171104201e-06, "loss": 0.6526, "step": 1009 }, { "epoch": 0.3416779431664411, "grad_norm": 0.27857333421707153, "learning_rate": 4.86828340532244e-06, "loss": 0.6579, "step": 1010 }, { "epoch": 0.34201623815967525, "grad_norm": 0.30258554220199585, "learning_rate": 4.867996335499555e-06, "loss": 0.6285, "step": 1011 }, { "epoch": 0.3423545331529093, "grad_norm": 0.284223735332489, "learning_rate": 4.867708961672399e-06, "loss": 0.6259, "step": 1012 }, { "epoch": 0.34269282814614344, "grad_norm": 0.29295721650123596, "learning_rate": 4.867421283877866e-06, "loss": 0.6311, "step": 1013 }, { "epoch": 0.34303112313937756, "grad_norm": 0.2908198833465576, "learning_rate": 4.867133302152888e-06, "loss": 0.6339, "step": 1014 }, { "epoch": 0.3433694181326116, "grad_norm": 0.2774893641471863, "learning_rate": 4.866845016534436e-06, "loss": 0.6468, "step": 1015 }, { "epoch": 0.34370771312584575, "grad_norm": 0.2836349904537201, "learning_rate": 4.86655642705952e-06, "loss": 0.6219, "step": 1016 }, { "epoch": 0.3440460081190798, "grad_norm": 0.2842315435409546, "learning_rate": 4.8662675337651886e-06, "loss": 0.6248, "step": 1017 }, { "epoch": 0.34438430311231394, "grad_norm": 0.2972695827484131, "learning_rate": 4.865978336688532e-06, "loss": 0.676, "step": 1018 }, { "epoch": 0.34472259810554806, "grad_norm": 0.2845618724822998, "learning_rate": 4.865688835866677e-06, "loss": 0.6475, "step": 1019 }, { "epoch": 0.34506089309878213, "grad_norm": 0.2947195768356323, "learning_rate": 4.8653990313367875e-06, "loss": 0.6472, "step": 1020 }, { "epoch": 0.34539918809201625, "grad_norm": 0.28122177720069885, "learning_rate": 4.865108923136071e-06, "loss": 0.6306, "step": 1021 }, { "epoch": 0.3457374830852503, "grad_norm": 0.29748567938804626, "learning_rate": 4.864818511301771e-06, "loss": 0.6618, "step": 1022 }, { "epoch": 0.34607577807848444, "grad_norm": 0.2848156690597534, "learning_rate": 4.864527795871172e-06, "loss": 0.6294, "step": 1023 }, { "epoch": 0.34641407307171856, "grad_norm": 0.2786930799484253, "learning_rate": 4.864236776881593e-06, "loss": 0.67, "step": 1024 }, { "epoch": 0.34675236806495263, "grad_norm": 0.28665855526924133, "learning_rate": 4.863945454370398e-06, "loss": 0.665, "step": 1025 }, { "epoch": 0.34709066305818675, "grad_norm": 0.30270564556121826, "learning_rate": 4.863653828374986e-06, "loss": 0.6347, "step": 1026 }, { "epoch": 0.3474289580514208, "grad_norm": 0.28528571128845215, "learning_rate": 4.863361898932796e-06, "loss": 0.6698, "step": 1027 }, { "epoch": 0.34776725304465494, "grad_norm": 0.28242045640945435, "learning_rate": 4.863069666081307e-06, "loss": 0.6253, "step": 1028 }, { "epoch": 0.348105548037889, "grad_norm": 0.2775231599807739, "learning_rate": 4.862777129858034e-06, "loss": 0.606, "step": 1029 }, { "epoch": 0.34844384303112313, "grad_norm": 0.2802172601222992, "learning_rate": 4.862484290300536e-06, "loss": 0.6675, "step": 1030 }, { "epoch": 0.34878213802435726, "grad_norm": 0.2965852916240692, "learning_rate": 4.862191147446403e-06, "loss": 0.6503, "step": 1031 }, { "epoch": 0.3491204330175913, "grad_norm": 0.2785798907279968, "learning_rate": 4.8618977013332744e-06, "loss": 0.6122, "step": 1032 }, { "epoch": 0.34945872801082545, "grad_norm": 0.2883950471878052, "learning_rate": 4.861603951998819e-06, "loss": 0.6526, "step": 1033 }, { "epoch": 0.3497970230040595, "grad_norm": 0.2888367474079132, "learning_rate": 4.86130989948075e-06, "loss": 0.6418, "step": 1034 }, { "epoch": 0.35013531799729364, "grad_norm": 0.30539917945861816, "learning_rate": 4.8610155438168175e-06, "loss": 0.6232, "step": 1035 }, { "epoch": 0.35047361299052776, "grad_norm": 0.28636616468429565, "learning_rate": 4.860720885044811e-06, "loss": 0.6353, "step": 1036 }, { "epoch": 0.35081190798376183, "grad_norm": 0.2941320240497589, "learning_rate": 4.86042592320256e-06, "loss": 0.6699, "step": 1037 }, { "epoch": 0.35115020297699595, "grad_norm": 0.28965914249420166, "learning_rate": 4.860130658327929e-06, "loss": 0.6387, "step": 1038 }, { "epoch": 0.35148849797023, "grad_norm": 0.28881844878196716, "learning_rate": 4.859835090458827e-06, "loss": 0.6532, "step": 1039 }, { "epoch": 0.35182679296346414, "grad_norm": 0.2799035906791687, "learning_rate": 4.859539219633199e-06, "loss": 0.635, "step": 1040 }, { "epoch": 0.35216508795669826, "grad_norm": 0.2909943759441376, "learning_rate": 4.859243045889028e-06, "loss": 0.6236, "step": 1041 }, { "epoch": 0.35250338294993233, "grad_norm": 0.3185156583786011, "learning_rate": 4.858946569264337e-06, "loss": 0.6398, "step": 1042 }, { "epoch": 0.35284167794316645, "grad_norm": 0.3125178813934326, "learning_rate": 4.858649789797188e-06, "loss": 0.6224, "step": 1043 }, { "epoch": 0.3531799729364005, "grad_norm": 0.365818053483963, "learning_rate": 4.858352707525681e-06, "loss": 0.665, "step": 1044 }, { "epoch": 0.35351826792963464, "grad_norm": 0.2922377288341522, "learning_rate": 4.858055322487957e-06, "loss": 0.6528, "step": 1045 }, { "epoch": 0.35385656292286877, "grad_norm": 0.2925521731376648, "learning_rate": 4.857757634722193e-06, "loss": 0.6496, "step": 1046 }, { "epoch": 0.35419485791610283, "grad_norm": 0.32205817103385925, "learning_rate": 4.857459644266606e-06, "loss": 0.6299, "step": 1047 }, { "epoch": 0.35453315290933696, "grad_norm": 0.32586464285850525, "learning_rate": 4.857161351159454e-06, "loss": 0.6439, "step": 1048 }, { "epoch": 0.354871447902571, "grad_norm": 0.29772520065307617, "learning_rate": 4.8568627554390304e-06, "loss": 0.6637, "step": 1049 }, { "epoch": 0.35520974289580515, "grad_norm": 0.38943392038345337, "learning_rate": 4.85656385714367e-06, "loss": 0.6257, "step": 1050 }, { "epoch": 0.3555480378890392, "grad_norm": 0.2854504883289337, "learning_rate": 4.856264656311745e-06, "loss": 0.618, "step": 1051 }, { "epoch": 0.35588633288227334, "grad_norm": 0.3191981315612793, "learning_rate": 4.855965152981667e-06, "loss": 0.6259, "step": 1052 }, { "epoch": 0.35622462787550746, "grad_norm": 0.326311320066452, "learning_rate": 4.855665347191885e-06, "loss": 0.6669, "step": 1053 }, { "epoch": 0.3565629228687415, "grad_norm": 0.2891475260257721, "learning_rate": 4.85536523898089e-06, "loss": 0.6516, "step": 1054 }, { "epoch": 0.35690121786197565, "grad_norm": 0.29754310846328735, "learning_rate": 4.85506482838721e-06, "loss": 0.6429, "step": 1055 }, { "epoch": 0.3572395128552097, "grad_norm": 0.3007654547691345, "learning_rate": 4.85476411544941e-06, "loss": 0.6733, "step": 1056 }, { "epoch": 0.35757780784844384, "grad_norm": 0.30116209387779236, "learning_rate": 4.8544631002060995e-06, "loss": 0.6343, "step": 1057 }, { "epoch": 0.35791610284167796, "grad_norm": 0.27309471368789673, "learning_rate": 4.8541617826959186e-06, "loss": 0.6562, "step": 1058 }, { "epoch": 0.35825439783491203, "grad_norm": 0.2854005694389343, "learning_rate": 4.8538601629575525e-06, "loss": 0.6266, "step": 1059 }, { "epoch": 0.35859269282814615, "grad_norm": 0.29010218381881714, "learning_rate": 4.853558241029723e-06, "loss": 0.6342, "step": 1060 }, { "epoch": 0.3589309878213802, "grad_norm": 0.28771087527275085, "learning_rate": 4.853256016951191e-06, "loss": 0.6483, "step": 1061 }, { "epoch": 0.35926928281461434, "grad_norm": 0.2850498855113983, "learning_rate": 4.852953490760757e-06, "loss": 0.6505, "step": 1062 }, { "epoch": 0.35960757780784847, "grad_norm": 0.2764224708080292, "learning_rate": 4.8526506624972576e-06, "loss": 0.6644, "step": 1063 }, { "epoch": 0.35994587280108253, "grad_norm": 0.27553361654281616, "learning_rate": 4.852347532199571e-06, "loss": 0.6562, "step": 1064 }, { "epoch": 0.36028416779431666, "grad_norm": 0.28768324851989746, "learning_rate": 4.8520440999066135e-06, "loss": 0.6411, "step": 1065 }, { "epoch": 0.3606224627875507, "grad_norm": 0.29022908210754395, "learning_rate": 4.85174036565734e-06, "loss": 0.6808, "step": 1066 }, { "epoch": 0.36096075778078485, "grad_norm": 0.28811946511268616, "learning_rate": 4.851436329490743e-06, "loss": 0.6348, "step": 1067 }, { "epoch": 0.36129905277401897, "grad_norm": 0.28230148553848267, "learning_rate": 4.851131991445856e-06, "loss": 0.6294, "step": 1068 }, { "epoch": 0.36163734776725304, "grad_norm": 0.2874899208545685, "learning_rate": 4.850827351561749e-06, "loss": 0.6418, "step": 1069 }, { "epoch": 0.36197564276048716, "grad_norm": 0.28801053762435913, "learning_rate": 4.850522409877532e-06, "loss": 0.6389, "step": 1070 }, { "epoch": 0.3623139377537212, "grad_norm": 0.28445324301719666, "learning_rate": 4.850217166432354e-06, "loss": 0.615, "step": 1071 }, { "epoch": 0.36265223274695535, "grad_norm": 0.29263418912887573, "learning_rate": 4.849911621265401e-06, "loss": 0.6432, "step": 1072 }, { "epoch": 0.3629905277401895, "grad_norm": 0.2812570035457611, "learning_rate": 4.849605774415901e-06, "loss": 0.6478, "step": 1073 }, { "epoch": 0.36332882273342354, "grad_norm": 0.27595818042755127, "learning_rate": 4.849299625923117e-06, "loss": 0.6271, "step": 1074 }, { "epoch": 0.36366711772665766, "grad_norm": 0.2792835533618927, "learning_rate": 4.848993175826354e-06, "loss": 0.6411, "step": 1075 }, { "epoch": 0.36400541271989173, "grad_norm": 0.2848093509674072, "learning_rate": 4.848686424164953e-06, "loss": 0.6452, "step": 1076 }, { "epoch": 0.36434370771312585, "grad_norm": 0.2892758846282959, "learning_rate": 4.8483793709782955e-06, "loss": 0.6333, "step": 1077 }, { "epoch": 0.3646820027063599, "grad_norm": 0.2910735607147217, "learning_rate": 4.8480720163058e-06, "loss": 0.6263, "step": 1078 }, { "epoch": 0.36502029769959404, "grad_norm": 0.2763359546661377, "learning_rate": 4.8477643601869265e-06, "loss": 0.6252, "step": 1079 }, { "epoch": 0.36535859269282817, "grad_norm": 0.28197360038757324, "learning_rate": 4.84745640266117e-06, "loss": 0.619, "step": 1080 }, { "epoch": 0.36569688768606223, "grad_norm": 0.29176396131515503, "learning_rate": 4.847148143768069e-06, "loss": 0.6784, "step": 1081 }, { "epoch": 0.36603518267929636, "grad_norm": 0.28936561942100525, "learning_rate": 4.846839583547195e-06, "loss": 0.627, "step": 1082 }, { "epoch": 0.3663734776725304, "grad_norm": 0.2793295979499817, "learning_rate": 4.846530722038163e-06, "loss": 0.6333, "step": 1083 }, { "epoch": 0.36671177266576455, "grad_norm": 0.295923113822937, "learning_rate": 4.846221559280624e-06, "loss": 0.6628, "step": 1084 }, { "epoch": 0.36705006765899867, "grad_norm": 0.28163689374923706, "learning_rate": 4.845912095314269e-06, "loss": 0.6323, "step": 1085 }, { "epoch": 0.36738836265223274, "grad_norm": 0.26855286955833435, "learning_rate": 4.845602330178826e-06, "loss": 0.6075, "step": 1086 }, { "epoch": 0.36772665764546686, "grad_norm": 0.3333419859409332, "learning_rate": 4.845292263914063e-06, "loss": 0.6323, "step": 1087 }, { "epoch": 0.3680649526387009, "grad_norm": 0.2788902223110199, "learning_rate": 4.844981896559787e-06, "loss": 0.6374, "step": 1088 }, { "epoch": 0.36840324763193505, "grad_norm": 0.3036459982395172, "learning_rate": 4.844671228155844e-06, "loss": 0.6248, "step": 1089 }, { "epoch": 0.36874154262516917, "grad_norm": 0.27933943271636963, "learning_rate": 4.844360258742115e-06, "loss": 0.6524, "step": 1090 }, { "epoch": 0.36907983761840324, "grad_norm": 0.2957841157913208, "learning_rate": 4.844048988358525e-06, "loss": 0.6441, "step": 1091 }, { "epoch": 0.36941813261163736, "grad_norm": 0.2745031714439392, "learning_rate": 4.843737417045035e-06, "loss": 0.6421, "step": 1092 }, { "epoch": 0.36975642760487143, "grad_norm": 0.2944257855415344, "learning_rate": 4.843425544841642e-06, "loss": 0.6331, "step": 1093 }, { "epoch": 0.37009472259810555, "grad_norm": 0.2939079999923706, "learning_rate": 4.843113371788386e-06, "loss": 0.6425, "step": 1094 }, { "epoch": 0.3704330175913397, "grad_norm": 0.2723322808742523, "learning_rate": 4.842800897925344e-06, "loss": 0.6283, "step": 1095 }, { "epoch": 0.37077131258457374, "grad_norm": 0.2891277074813843, "learning_rate": 4.842488123292632e-06, "loss": 0.634, "step": 1096 }, { "epoch": 0.37110960757780787, "grad_norm": 0.27961093187332153, "learning_rate": 4.842175047930403e-06, "loss": 0.6664, "step": 1097 }, { "epoch": 0.37144790257104193, "grad_norm": 0.2886982858181, "learning_rate": 4.841861671878851e-06, "loss": 0.6424, "step": 1098 }, { "epoch": 0.37178619756427606, "grad_norm": 0.28926536440849304, "learning_rate": 4.8415479951782054e-06, "loss": 0.642, "step": 1099 }, { "epoch": 0.3721244925575101, "grad_norm": 0.28860124945640564, "learning_rate": 4.841234017868738e-06, "loss": 0.6579, "step": 1100 }, { "epoch": 0.37246278755074425, "grad_norm": 0.2937227189540863, "learning_rate": 4.840919739990756e-06, "loss": 0.6384, "step": 1101 }, { "epoch": 0.37280108254397837, "grad_norm": 0.31778162717819214, "learning_rate": 4.840605161584607e-06, "loss": 0.6537, "step": 1102 }, { "epoch": 0.37313937753721244, "grad_norm": 0.2775851786136627, "learning_rate": 4.840290282690678e-06, "loss": 0.6401, "step": 1103 }, { "epoch": 0.37347767253044656, "grad_norm": 0.28459596633911133, "learning_rate": 4.839975103349391e-06, "loss": 0.6417, "step": 1104 }, { "epoch": 0.3738159675236806, "grad_norm": 0.30100637674331665, "learning_rate": 4.83965962360121e-06, "loss": 0.6175, "step": 1105 }, { "epoch": 0.37415426251691475, "grad_norm": 0.27300921082496643, "learning_rate": 4.839343843486636e-06, "loss": 0.6477, "step": 1106 }, { "epoch": 0.37449255751014887, "grad_norm": 0.2922280728816986, "learning_rate": 4.8390277630462105e-06, "loss": 0.6143, "step": 1107 }, { "epoch": 0.37483085250338294, "grad_norm": 0.2937355041503906, "learning_rate": 4.8387113823205104e-06, "loss": 0.6433, "step": 1108 }, { "epoch": 0.37516914749661706, "grad_norm": 0.27802449464797974, "learning_rate": 4.838394701350153e-06, "loss": 0.6202, "step": 1109 }, { "epoch": 0.37550744248985113, "grad_norm": 0.2949639856815338, "learning_rate": 4.838077720175793e-06, "loss": 0.6614, "step": 1110 }, { "epoch": 0.37584573748308525, "grad_norm": 0.3015292286872864, "learning_rate": 4.837760438838128e-06, "loss": 0.6589, "step": 1111 }, { "epoch": 0.3761840324763194, "grad_norm": 0.30569127202033997, "learning_rate": 4.837442857377886e-06, "loss": 0.652, "step": 1112 }, { "epoch": 0.37652232746955344, "grad_norm": 0.27839574217796326, "learning_rate": 4.837124975835842e-06, "loss": 0.6272, "step": 1113 }, { "epoch": 0.37686062246278756, "grad_norm": 0.29350098967552185, "learning_rate": 4.8368067942528045e-06, "loss": 0.6202, "step": 1114 }, { "epoch": 0.37719891745602163, "grad_norm": 0.2903738021850586, "learning_rate": 4.836488312669621e-06, "loss": 0.6172, "step": 1115 }, { "epoch": 0.37753721244925575, "grad_norm": 0.28676044940948486, "learning_rate": 4.8361695311271795e-06, "loss": 0.6215, "step": 1116 }, { "epoch": 0.3778755074424899, "grad_norm": 0.2821767032146454, "learning_rate": 4.835850449666405e-06, "loss": 0.6195, "step": 1117 }, { "epoch": 0.37821380243572394, "grad_norm": 0.29364868998527527, "learning_rate": 4.83553106832826e-06, "loss": 0.6345, "step": 1118 }, { "epoch": 0.37855209742895807, "grad_norm": 0.2790587246417999, "learning_rate": 4.835211387153749e-06, "loss": 0.659, "step": 1119 }, { "epoch": 0.37889039242219213, "grad_norm": 0.2861728072166443, "learning_rate": 4.83489140618391e-06, "loss": 0.6375, "step": 1120 }, { "epoch": 0.37922868741542626, "grad_norm": 0.2797929048538208, "learning_rate": 4.834571125459826e-06, "loss": 0.6465, "step": 1121 }, { "epoch": 0.3795669824086603, "grad_norm": 0.30132776498794556, "learning_rate": 4.834250545022611e-06, "loss": 0.6353, "step": 1122 }, { "epoch": 0.37990527740189445, "grad_norm": 0.2855285704135895, "learning_rate": 4.833929664913423e-06, "loss": 0.6558, "step": 1123 }, { "epoch": 0.38024357239512857, "grad_norm": 0.2976146638393402, "learning_rate": 4.833608485173458e-06, "loss": 0.6267, "step": 1124 }, { "epoch": 0.38058186738836264, "grad_norm": 0.2943631410598755, "learning_rate": 4.8332870058439465e-06, "loss": 0.6269, "step": 1125 }, { "epoch": 0.38092016238159676, "grad_norm": 0.2879925072193146, "learning_rate": 4.832965226966161e-06, "loss": 0.6487, "step": 1126 }, { "epoch": 0.38125845737483083, "grad_norm": 0.2811349034309387, "learning_rate": 4.832643148581413e-06, "loss": 0.6268, "step": 1127 }, { "epoch": 0.38159675236806495, "grad_norm": 0.30948612093925476, "learning_rate": 4.83232077073105e-06, "loss": 0.638, "step": 1128 }, { "epoch": 0.3819350473612991, "grad_norm": 0.3030785620212555, "learning_rate": 4.8319980934564595e-06, "loss": 0.6621, "step": 1129 }, { "epoch": 0.38227334235453314, "grad_norm": 0.29253870248794556, "learning_rate": 4.831675116799065e-06, "loss": 0.623, "step": 1130 }, { "epoch": 0.38261163734776726, "grad_norm": 0.28105953335762024, "learning_rate": 4.831351840800333e-06, "loss": 0.6375, "step": 1131 }, { "epoch": 0.38294993234100133, "grad_norm": 0.307233989238739, "learning_rate": 4.831028265501764e-06, "loss": 0.6732, "step": 1132 }, { "epoch": 0.38328822733423545, "grad_norm": 0.2878662943840027, "learning_rate": 4.8307043909449e-06, "loss": 0.6332, "step": 1133 }, { "epoch": 0.3836265223274696, "grad_norm": 0.30148088932037354, "learning_rate": 4.830380217171319e-06, "loss": 0.6477, "step": 1134 }, { "epoch": 0.38396481732070364, "grad_norm": 0.3052041232585907, "learning_rate": 4.830055744222639e-06, "loss": 0.6243, "step": 1135 }, { "epoch": 0.38430311231393777, "grad_norm": 0.2861061990261078, "learning_rate": 4.8297309721405175e-06, "loss": 0.6257, "step": 1136 }, { "epoch": 0.38464140730717183, "grad_norm": 0.2866950035095215, "learning_rate": 4.8294059009666455e-06, "loss": 0.623, "step": 1137 }, { "epoch": 0.38497970230040596, "grad_norm": 0.28932666778564453, "learning_rate": 4.8290805307427585e-06, "loss": 0.6312, "step": 1138 }, { "epoch": 0.3853179972936401, "grad_norm": 0.2892247438430786, "learning_rate": 4.828754861510627e-06, "loss": 0.6162, "step": 1139 }, { "epoch": 0.38565629228687415, "grad_norm": 0.30072498321533203, "learning_rate": 4.8284288933120595e-06, "loss": 0.6235, "step": 1140 }, { "epoch": 0.38599458728010827, "grad_norm": 0.2970368266105652, "learning_rate": 4.828102626188905e-06, "loss": 0.6561, "step": 1141 }, { "epoch": 0.38633288227334234, "grad_norm": 0.2955375611782074, "learning_rate": 4.82777606018305e-06, "loss": 0.6355, "step": 1142 }, { "epoch": 0.38667117726657646, "grad_norm": 0.27442505955696106, "learning_rate": 4.827449195336419e-06, "loss": 0.6217, "step": 1143 }, { "epoch": 0.3870094722598105, "grad_norm": 0.28408247232437134, "learning_rate": 4.827122031690974e-06, "loss": 0.6251, "step": 1144 }, { "epoch": 0.38734776725304465, "grad_norm": 0.30623340606689453, "learning_rate": 4.826794569288717e-06, "loss": 0.6523, "step": 1145 }, { "epoch": 0.3876860622462788, "grad_norm": 0.2974097430706024, "learning_rate": 4.826466808171689e-06, "loss": 0.6364, "step": 1146 }, { "epoch": 0.38802435723951284, "grad_norm": 0.2743532061576843, "learning_rate": 4.826138748381966e-06, "loss": 0.6601, "step": 1147 }, { "epoch": 0.38836265223274696, "grad_norm": 0.2808363437652588, "learning_rate": 4.825810389961666e-06, "loss": 0.6576, "step": 1148 }, { "epoch": 0.38870094722598103, "grad_norm": 0.29762133955955505, "learning_rate": 4.825481732952943e-06, "loss": 0.6562, "step": 1149 }, { "epoch": 0.38903924221921515, "grad_norm": 0.2852347791194916, "learning_rate": 4.82515277739799e-06, "loss": 0.6076, "step": 1150 }, { "epoch": 0.3893775372124493, "grad_norm": 0.2761669158935547, "learning_rate": 4.824823523339039e-06, "loss": 0.6341, "step": 1151 }, { "epoch": 0.38971583220568334, "grad_norm": 0.3076622784137726, "learning_rate": 4.824493970818359e-06, "loss": 0.6332, "step": 1152 }, { "epoch": 0.39005412719891747, "grad_norm": 0.28125861287117004, "learning_rate": 4.82416411987826e-06, "loss": 0.6297, "step": 1153 }, { "epoch": 0.39039242219215153, "grad_norm": 0.2835453748703003, "learning_rate": 4.823833970561086e-06, "loss": 0.6394, "step": 1154 }, { "epoch": 0.39073071718538566, "grad_norm": 0.28459158539772034, "learning_rate": 4.823503522909223e-06, "loss": 0.618, "step": 1155 }, { "epoch": 0.3910690121786198, "grad_norm": 0.27935102581977844, "learning_rate": 4.8231727769650934e-06, "loss": 0.6447, "step": 1156 }, { "epoch": 0.39140730717185385, "grad_norm": 0.2861892580986023, "learning_rate": 4.822841732771159e-06, "loss": 0.6379, "step": 1157 }, { "epoch": 0.39174560216508797, "grad_norm": 0.290678471326828, "learning_rate": 4.8225103903699185e-06, "loss": 0.6247, "step": 1158 }, { "epoch": 0.39208389715832204, "grad_norm": 0.2962476313114166, "learning_rate": 4.822178749803912e-06, "loss": 0.6452, "step": 1159 }, { "epoch": 0.39242219215155616, "grad_norm": 0.283066987991333, "learning_rate": 4.821846811115713e-06, "loss": 0.6398, "step": 1160 }, { "epoch": 0.3927604871447903, "grad_norm": 0.28135359287261963, "learning_rate": 4.821514574347937e-06, "loss": 0.618, "step": 1161 }, { "epoch": 0.39309878213802435, "grad_norm": 0.29703113436698914, "learning_rate": 4.821182039543236e-06, "loss": 0.6371, "step": 1162 }, { "epoch": 0.3934370771312585, "grad_norm": 0.33292141556739807, "learning_rate": 4.820849206744303e-06, "loss": 0.6329, "step": 1163 }, { "epoch": 0.39377537212449254, "grad_norm": 0.29253455996513367, "learning_rate": 4.820516075993865e-06, "loss": 0.65, "step": 1164 }, { "epoch": 0.39411366711772666, "grad_norm": 0.3138137757778168, "learning_rate": 4.82018264733469e-06, "loss": 0.6401, "step": 1165 }, { "epoch": 0.3944519621109608, "grad_norm": 0.3234380781650543, "learning_rate": 4.819848920809584e-06, "loss": 0.6454, "step": 1166 }, { "epoch": 0.39479025710419485, "grad_norm": 0.3004589080810547, "learning_rate": 4.8195148964613916e-06, "loss": 0.6386, "step": 1167 }, { "epoch": 0.395128552097429, "grad_norm": 0.2853522300720215, "learning_rate": 4.8191805743329945e-06, "loss": 0.6326, "step": 1168 }, { "epoch": 0.39546684709066304, "grad_norm": 0.3297440707683563, "learning_rate": 4.8188459544673114e-06, "loss": 0.6675, "step": 1169 }, { "epoch": 0.39580514208389717, "grad_norm": 0.3187149167060852, "learning_rate": 4.818511036907304e-06, "loss": 0.6379, "step": 1170 }, { "epoch": 0.39614343707713123, "grad_norm": 0.28473100066185, "learning_rate": 4.8181758216959665e-06, "loss": 0.6199, "step": 1171 }, { "epoch": 0.39648173207036536, "grad_norm": 0.29906320571899414, "learning_rate": 4.817840308876335e-06, "loss": 0.6314, "step": 1172 }, { "epoch": 0.3968200270635995, "grad_norm": 0.3337453603744507, "learning_rate": 4.817504498491483e-06, "loss": 0.6325, "step": 1173 }, { "epoch": 0.39715832205683355, "grad_norm": 0.29629188776016235, "learning_rate": 4.817168390584522e-06, "loss": 0.6378, "step": 1174 }, { "epoch": 0.39749661705006767, "grad_norm": 0.28483545780181885, "learning_rate": 4.816831985198601e-06, "loss": 0.6383, "step": 1175 }, { "epoch": 0.39783491204330174, "grad_norm": 0.28717607259750366, "learning_rate": 4.8164952823769085e-06, "loss": 0.6127, "step": 1176 }, { "epoch": 0.39817320703653586, "grad_norm": 0.30153074860572815, "learning_rate": 4.816158282162671e-06, "loss": 0.6759, "step": 1177 }, { "epoch": 0.39851150202977, "grad_norm": 0.3128243088722229, "learning_rate": 4.815820984599151e-06, "loss": 0.6336, "step": 1178 }, { "epoch": 0.39884979702300405, "grad_norm": 0.29970309138298035, "learning_rate": 4.815483389729652e-06, "loss": 0.6306, "step": 1179 }, { "epoch": 0.39918809201623817, "grad_norm": 0.2995675802230835, "learning_rate": 4.815145497597514e-06, "loss": 0.657, "step": 1180 }, { "epoch": 0.39952638700947224, "grad_norm": 0.33796557784080505, "learning_rate": 4.814807308246116e-06, "loss": 0.6151, "step": 1181 }, { "epoch": 0.39986468200270636, "grad_norm": 0.2935757637023926, "learning_rate": 4.8144688217188765e-06, "loss": 0.6291, "step": 1182 }, { "epoch": 0.4002029769959405, "grad_norm": 0.2939322590827942, "learning_rate": 4.814130038059248e-06, "loss": 0.6379, "step": 1183 }, { "epoch": 0.40054127198917455, "grad_norm": 0.32517513632774353, "learning_rate": 4.813790957310725e-06, "loss": 0.6222, "step": 1184 }, { "epoch": 0.4008795669824087, "grad_norm": 0.28321826457977295, "learning_rate": 4.813451579516839e-06, "loss": 0.6199, "step": 1185 }, { "epoch": 0.40121786197564274, "grad_norm": 0.287484735250473, "learning_rate": 4.8131119047211585e-06, "loss": 0.6533, "step": 1186 }, { "epoch": 0.40155615696887687, "grad_norm": 0.30498993396759033, "learning_rate": 4.8127719329672914e-06, "loss": 0.6154, "step": 1187 }, { "epoch": 0.401894451962111, "grad_norm": 0.28674793243408203, "learning_rate": 4.812431664298884e-06, "loss": 0.6355, "step": 1188 }, { "epoch": 0.40223274695534506, "grad_norm": 0.294114887714386, "learning_rate": 4.812091098759619e-06, "loss": 0.6637, "step": 1189 }, { "epoch": 0.4025710419485792, "grad_norm": 0.2917622923851013, "learning_rate": 4.8117502363932195e-06, "loss": 0.6409, "step": 1190 }, { "epoch": 0.40290933694181325, "grad_norm": 0.2833552658557892, "learning_rate": 4.811409077243445e-06, "loss": 0.6262, "step": 1191 }, { "epoch": 0.40324763193504737, "grad_norm": 0.28660398721694946, "learning_rate": 4.811067621354094e-06, "loss": 0.6387, "step": 1192 }, { "epoch": 0.40358592692828144, "grad_norm": 0.28230899572372437, "learning_rate": 4.8107258687690015e-06, "loss": 0.6346, "step": 1193 }, { "epoch": 0.40392422192151556, "grad_norm": 0.2882273495197296, "learning_rate": 4.810383819532044e-06, "loss": 0.6639, "step": 1194 }, { "epoch": 0.4042625169147497, "grad_norm": 0.3016176223754883, "learning_rate": 4.810041473687132e-06, "loss": 0.6336, "step": 1195 }, { "epoch": 0.40460081190798375, "grad_norm": 0.2878057360649109, "learning_rate": 4.809698831278217e-06, "loss": 0.6015, "step": 1196 }, { "epoch": 0.40493910690121787, "grad_norm": 0.29583847522735596, "learning_rate": 4.809355892349288e-06, "loss": 0.6413, "step": 1197 }, { "epoch": 0.40527740189445194, "grad_norm": 0.29840466380119324, "learning_rate": 4.80901265694437e-06, "loss": 0.6442, "step": 1198 }, { "epoch": 0.40561569688768606, "grad_norm": 0.2863212525844574, "learning_rate": 4.808669125107528e-06, "loss": 0.6521, "step": 1199 }, { "epoch": 0.4059539918809202, "grad_norm": 0.29659488797187805, "learning_rate": 4.808325296882866e-06, "loss": 0.6321, "step": 1200 }, { "epoch": 0.40629228687415425, "grad_norm": 0.29904645681381226, "learning_rate": 4.807981172314524e-06, "loss": 0.6282, "step": 1201 }, { "epoch": 0.4066305818673884, "grad_norm": 0.29009896516799927, "learning_rate": 4.8076367514466815e-06, "loss": 0.6455, "step": 1202 }, { "epoch": 0.40696887686062244, "grad_norm": 0.2847822308540344, "learning_rate": 4.807292034323554e-06, "loss": 0.6386, "step": 1203 }, { "epoch": 0.40730717185385656, "grad_norm": 0.316760390996933, "learning_rate": 4.8069470209893976e-06, "loss": 0.6163, "step": 1204 }, { "epoch": 0.4076454668470907, "grad_norm": 0.28848010301589966, "learning_rate": 4.806601711488505e-06, "loss": 0.6422, "step": 1205 }, { "epoch": 0.40798376184032475, "grad_norm": 0.28792351484298706, "learning_rate": 4.806256105865207e-06, "loss": 0.645, "step": 1206 }, { "epoch": 0.4083220568335589, "grad_norm": 0.2956791818141937, "learning_rate": 4.805910204163873e-06, "loss": 0.6307, "step": 1207 }, { "epoch": 0.40866035182679294, "grad_norm": 0.27958643436431885, "learning_rate": 4.805564006428909e-06, "loss": 0.6482, "step": 1208 }, { "epoch": 0.40899864682002707, "grad_norm": 0.29864054918289185, "learning_rate": 4.80521751270476e-06, "loss": 0.6393, "step": 1209 }, { "epoch": 0.4093369418132612, "grad_norm": 0.28602924942970276, "learning_rate": 4.804870723035912e-06, "loss": 0.6451, "step": 1210 }, { "epoch": 0.40967523680649526, "grad_norm": 0.2817135155200958, "learning_rate": 4.804523637466882e-06, "loss": 0.6507, "step": 1211 }, { "epoch": 0.4100135317997294, "grad_norm": 0.2887398302555084, "learning_rate": 4.80417625604223e-06, "loss": 0.6357, "step": 1212 }, { "epoch": 0.41035182679296345, "grad_norm": 0.29490935802459717, "learning_rate": 4.803828578806555e-06, "loss": 0.6125, "step": 1213 }, { "epoch": 0.41069012178619757, "grad_norm": 0.28504541516304016, "learning_rate": 4.8034806058044895e-06, "loss": 0.6331, "step": 1214 }, { "epoch": 0.41102841677943164, "grad_norm": 0.3002273738384247, "learning_rate": 4.803132337080708e-06, "loss": 0.6351, "step": 1215 }, { "epoch": 0.41136671177266576, "grad_norm": 0.3103099465370178, "learning_rate": 4.80278377267992e-06, "loss": 0.6436, "step": 1216 }, { "epoch": 0.4117050067658999, "grad_norm": 0.291058212518692, "learning_rate": 4.802434912646876e-06, "loss": 0.6405, "step": 1217 }, { "epoch": 0.41204330175913395, "grad_norm": 0.28568235039711, "learning_rate": 4.802085757026361e-06, "loss": 0.6553, "step": 1218 }, { "epoch": 0.4123815967523681, "grad_norm": 0.303610622882843, "learning_rate": 4.801736305863202e-06, "loss": 0.6947, "step": 1219 }, { "epoch": 0.41271989174560214, "grad_norm": 0.2856765687465668, "learning_rate": 4.801386559202259e-06, "loss": 0.6226, "step": 1220 }, { "epoch": 0.41305818673883626, "grad_norm": 0.30076685547828674, "learning_rate": 4.801036517088434e-06, "loss": 0.6668, "step": 1221 }, { "epoch": 0.4133964817320704, "grad_norm": 0.2825355529785156, "learning_rate": 4.800686179566666e-06, "loss": 0.6337, "step": 1222 }, { "epoch": 0.41373477672530445, "grad_norm": 0.29440179467201233, "learning_rate": 4.8003355466819306e-06, "loss": 0.6355, "step": 1223 }, { "epoch": 0.4140730717185386, "grad_norm": 0.28212273120880127, "learning_rate": 4.7999846184792424e-06, "loss": 0.6378, "step": 1224 }, { "epoch": 0.41441136671177264, "grad_norm": 0.3060968518257141, "learning_rate": 4.799633395003655e-06, "loss": 0.637, "step": 1225 }, { "epoch": 0.41474966170500677, "grad_norm": 0.2914919853210449, "learning_rate": 4.799281876300256e-06, "loss": 0.6605, "step": 1226 }, { "epoch": 0.4150879566982409, "grad_norm": 0.29111871123313904, "learning_rate": 4.7989300624141745e-06, "loss": 0.6086, "step": 1227 }, { "epoch": 0.41542625169147496, "grad_norm": 0.2990707457065582, "learning_rate": 4.798577953390577e-06, "loss": 0.6452, "step": 1228 }, { "epoch": 0.4157645466847091, "grad_norm": 0.29072779417037964, "learning_rate": 4.7982255492746685e-06, "loss": 0.6408, "step": 1229 }, { "epoch": 0.41610284167794315, "grad_norm": 0.28976115584373474, "learning_rate": 4.797872850111689e-06, "loss": 0.6294, "step": 1230 }, { "epoch": 0.41644113667117727, "grad_norm": 0.291080504655838, "learning_rate": 4.797519855946918e-06, "loss": 0.6231, "step": 1231 }, { "epoch": 0.4167794316644114, "grad_norm": 0.2862843871116638, "learning_rate": 4.797166566825675e-06, "loss": 0.626, "step": 1232 }, { "epoch": 0.41711772665764546, "grad_norm": 0.28314611315727234, "learning_rate": 4.7968129827933135e-06, "loss": 0.6495, "step": 1233 }, { "epoch": 0.4174560216508796, "grad_norm": 0.29271024465560913, "learning_rate": 4.7964591038952276e-06, "loss": 0.6341, "step": 1234 }, { "epoch": 0.41779431664411365, "grad_norm": 0.2987518310546875, "learning_rate": 4.796104930176848e-06, "loss": 0.6417, "step": 1235 }, { "epoch": 0.4181326116373478, "grad_norm": 0.29295241832733154, "learning_rate": 4.795750461683643e-06, "loss": 0.6201, "step": 1236 }, { "epoch": 0.41847090663058184, "grad_norm": 0.29133695363998413, "learning_rate": 4.7953956984611215e-06, "loss": 0.6094, "step": 1237 }, { "epoch": 0.41880920162381596, "grad_norm": 0.2857004702091217, "learning_rate": 4.795040640554827e-06, "loss": 0.6453, "step": 1238 }, { "epoch": 0.4191474966170501, "grad_norm": 0.2874625623226166, "learning_rate": 4.7946852880103414e-06, "loss": 0.6183, "step": 1239 }, { "epoch": 0.41948579161028415, "grad_norm": 0.29205045104026794, "learning_rate": 4.794329640873285e-06, "loss": 0.6329, "step": 1240 }, { "epoch": 0.4198240866035183, "grad_norm": 0.2948002219200134, "learning_rate": 4.793973699189317e-06, "loss": 0.6295, "step": 1241 }, { "epoch": 0.42016238159675234, "grad_norm": 0.2800194025039673, "learning_rate": 4.793617463004132e-06, "loss": 0.6322, "step": 1242 }, { "epoch": 0.42050067658998647, "grad_norm": 0.27651315927505493, "learning_rate": 4.793260932363465e-06, "loss": 0.6343, "step": 1243 }, { "epoch": 0.4208389715832206, "grad_norm": 0.2835398316383362, "learning_rate": 4.792904107313086e-06, "loss": 0.6401, "step": 1244 }, { "epoch": 0.42117726657645466, "grad_norm": 0.28559476137161255, "learning_rate": 4.792546987898807e-06, "loss": 0.625, "step": 1245 }, { "epoch": 0.4215155615696888, "grad_norm": 0.27733302116394043, "learning_rate": 4.792189574166473e-06, "loss": 0.6432, "step": 1246 }, { "epoch": 0.42185385656292285, "grad_norm": 0.29606395959854126, "learning_rate": 4.791831866161968e-06, "loss": 0.643, "step": 1247 }, { "epoch": 0.42219215155615697, "grad_norm": 0.30032095313072205, "learning_rate": 4.7914738639312165e-06, "loss": 0.612, "step": 1248 }, { "epoch": 0.4225304465493911, "grad_norm": 0.3007729649543762, "learning_rate": 4.791115567520179e-06, "loss": 0.6394, "step": 1249 }, { "epoch": 0.42286874154262516, "grad_norm": 0.290590763092041, "learning_rate": 4.790756976974853e-06, "loss": 0.6427, "step": 1250 }, { "epoch": 0.4232070365358593, "grad_norm": 0.2848972976207733, "learning_rate": 4.790398092341275e-06, "loss": 0.6471, "step": 1251 }, { "epoch": 0.42354533152909335, "grad_norm": 0.29602229595184326, "learning_rate": 4.790038913665519e-06, "loss": 0.613, "step": 1252 }, { "epoch": 0.4238836265223275, "grad_norm": 0.2961762547492981, "learning_rate": 4.789679440993695e-06, "loss": 0.6486, "step": 1253 }, { "epoch": 0.4242219215155616, "grad_norm": 0.2972279489040375, "learning_rate": 4.789319674371953e-06, "loss": 0.6368, "step": 1254 }, { "epoch": 0.42456021650879566, "grad_norm": 0.28891313076019287, "learning_rate": 4.788959613846481e-06, "loss": 0.6802, "step": 1255 }, { "epoch": 0.4248985115020298, "grad_norm": 0.28022804856300354, "learning_rate": 4.7885992594635025e-06, "loss": 0.627, "step": 1256 }, { "epoch": 0.42523680649526385, "grad_norm": 0.2823637127876282, "learning_rate": 4.78823861126928e-06, "loss": 0.6288, "step": 1257 }, { "epoch": 0.425575101488498, "grad_norm": 0.29723280668258667, "learning_rate": 4.7878776693101146e-06, "loss": 0.6202, "step": 1258 }, { "epoch": 0.42591339648173204, "grad_norm": 0.2898513376712799, "learning_rate": 4.787516433632342e-06, "loss": 0.6265, "step": 1259 }, { "epoch": 0.42625169147496617, "grad_norm": 0.31341275572776794, "learning_rate": 4.787154904282341e-06, "loss": 0.6241, "step": 1260 }, { "epoch": 0.4265899864682003, "grad_norm": 0.28693923354148865, "learning_rate": 4.786793081306523e-06, "loss": 0.6273, "step": 1261 }, { "epoch": 0.42692828146143436, "grad_norm": 0.305917888879776, "learning_rate": 4.786430964751338e-06, "loss": 0.6224, "step": 1262 }, { "epoch": 0.4272665764546685, "grad_norm": 0.2971956133842468, "learning_rate": 4.7860685546632775e-06, "loss": 0.6333, "step": 1263 }, { "epoch": 0.42760487144790255, "grad_norm": 0.3205307424068451, "learning_rate": 4.785705851088865e-06, "loss": 0.6435, "step": 1264 }, { "epoch": 0.42794316644113667, "grad_norm": 0.3163212835788727, "learning_rate": 4.785342854074666e-06, "loss": 0.6457, "step": 1265 }, { "epoch": 0.4282814614343708, "grad_norm": 0.28766465187072754, "learning_rate": 4.784979563667281e-06, "loss": 0.6522, "step": 1266 }, { "epoch": 0.42861975642760486, "grad_norm": 0.298446387052536, "learning_rate": 4.784615979913352e-06, "loss": 0.6412, "step": 1267 }, { "epoch": 0.428958051420839, "grad_norm": 0.30019763112068176, "learning_rate": 4.784252102859553e-06, "loss": 0.6241, "step": 1268 }, { "epoch": 0.42929634641407305, "grad_norm": 0.2901851534843445, "learning_rate": 4.7838879325526e-06, "loss": 0.6138, "step": 1269 }, { "epoch": 0.42963464140730717, "grad_norm": 0.2932225167751312, "learning_rate": 4.783523469039245e-06, "loss": 0.5911, "step": 1270 }, { "epoch": 0.4299729364005413, "grad_norm": 0.29000577330589294, "learning_rate": 4.7831587123662794e-06, "loss": 0.6138, "step": 1271 }, { "epoch": 0.43031123139377536, "grad_norm": 0.2969661355018616, "learning_rate": 4.782793662580528e-06, "loss": 0.6491, "step": 1272 }, { "epoch": 0.4306495263870095, "grad_norm": 0.2832574248313904, "learning_rate": 4.782428319728859e-06, "loss": 0.663, "step": 1273 }, { "epoch": 0.43098782138024355, "grad_norm": 0.28881847858428955, "learning_rate": 4.782062683858174e-06, "loss": 0.6298, "step": 1274 }, { "epoch": 0.4313261163734777, "grad_norm": 0.301044225692749, "learning_rate": 4.781696755015412e-06, "loss": 0.6531, "step": 1275 }, { "epoch": 0.4316644113667118, "grad_norm": 0.29331329464912415, "learning_rate": 4.781330533247554e-06, "loss": 0.652, "step": 1276 }, { "epoch": 0.43200270635994586, "grad_norm": 0.2893969714641571, "learning_rate": 4.780964018601613e-06, "loss": 0.6428, "step": 1277 }, { "epoch": 0.43234100135318, "grad_norm": 0.2999071180820465, "learning_rate": 4.780597211124643e-06, "loss": 0.6286, "step": 1278 }, { "epoch": 0.43267929634641406, "grad_norm": 0.29314321279525757, "learning_rate": 4.780230110863737e-06, "loss": 0.6701, "step": 1279 }, { "epoch": 0.4330175913396482, "grad_norm": 0.28190457820892334, "learning_rate": 4.779862717866021e-06, "loss": 0.6192, "step": 1280 }, { "epoch": 0.4333558863328823, "grad_norm": 0.2893087863922119, "learning_rate": 4.779495032178662e-06, "loss": 0.6405, "step": 1281 }, { "epoch": 0.43369418132611637, "grad_norm": 0.2829298973083496, "learning_rate": 4.779127053848863e-06, "loss": 0.6296, "step": 1282 }, { "epoch": 0.4340324763193505, "grad_norm": 0.2973841726779938, "learning_rate": 4.778758782923866e-06, "loss": 0.6828, "step": 1283 }, { "epoch": 0.43437077131258456, "grad_norm": 0.298194020986557, "learning_rate": 4.778390219450949e-06, "loss": 0.648, "step": 1284 }, { "epoch": 0.4347090663058187, "grad_norm": 0.2952548861503601, "learning_rate": 4.778021363477429e-06, "loss": 0.6675, "step": 1285 }, { "epoch": 0.43504736129905275, "grad_norm": 0.28702524304389954, "learning_rate": 4.777652215050659e-06, "loss": 0.6434, "step": 1286 }, { "epoch": 0.43538565629228687, "grad_norm": 0.28044795989990234, "learning_rate": 4.777282774218033e-06, "loss": 0.6356, "step": 1287 }, { "epoch": 0.435723951285521, "grad_norm": 0.2795807123184204, "learning_rate": 4.776913041026976e-06, "loss": 0.6434, "step": 1288 }, { "epoch": 0.43606224627875506, "grad_norm": 0.2964823544025421, "learning_rate": 4.776543015524957e-06, "loss": 0.6511, "step": 1289 }, { "epoch": 0.4364005412719892, "grad_norm": 0.29234057664871216, "learning_rate": 4.77617269775948e-06, "loss": 0.6316, "step": 1290 }, { "epoch": 0.43673883626522325, "grad_norm": 0.2864561080932617, "learning_rate": 4.775802087778085e-06, "loss": 0.6307, "step": 1291 }, { "epoch": 0.4370771312584574, "grad_norm": 0.2859666049480438, "learning_rate": 4.775431185628353e-06, "loss": 0.607, "step": 1292 }, { "epoch": 0.4374154262516915, "grad_norm": 0.2826318144798279, "learning_rate": 4.775059991357899e-06, "loss": 0.6114, "step": 1293 }, { "epoch": 0.43775372124492556, "grad_norm": 0.2815684676170349, "learning_rate": 4.774688505014379e-06, "loss": 0.6453, "step": 1294 }, { "epoch": 0.4380920162381597, "grad_norm": 0.3040962219238281, "learning_rate": 4.7743167266454815e-06, "loss": 0.6587, "step": 1295 }, { "epoch": 0.43843031123139375, "grad_norm": 0.2755117416381836, "learning_rate": 4.773944656298939e-06, "loss": 0.6284, "step": 1296 }, { "epoch": 0.4387686062246279, "grad_norm": 0.31091102957725525, "learning_rate": 4.773572294022515e-06, "loss": 0.6014, "step": 1297 }, { "epoch": 0.439106901217862, "grad_norm": 0.29069027304649353, "learning_rate": 4.7731996398640155e-06, "loss": 0.6281, "step": 1298 }, { "epoch": 0.43944519621109607, "grad_norm": 0.27709999680519104, "learning_rate": 4.7728266938712806e-06, "loss": 0.6052, "step": 1299 }, { "epoch": 0.4397834912043302, "grad_norm": 0.2892073690891266, "learning_rate": 4.772453456092191e-06, "loss": 0.6333, "step": 1300 }, { "epoch": 0.44012178619756426, "grad_norm": 0.30395960807800293, "learning_rate": 4.772079926574662e-06, "loss": 0.6376, "step": 1301 }, { "epoch": 0.4404600811907984, "grad_norm": 0.3262636363506317, "learning_rate": 4.771706105366647e-06, "loss": 0.6489, "step": 1302 }, { "epoch": 0.4407983761840325, "grad_norm": 0.2975095510482788, "learning_rate": 4.771331992516137e-06, "loss": 0.6259, "step": 1303 }, { "epoch": 0.44113667117726657, "grad_norm": 0.3001902103424072, "learning_rate": 4.770957588071163e-06, "loss": 0.6566, "step": 1304 }, { "epoch": 0.4414749661705007, "grad_norm": 0.315866619348526, "learning_rate": 4.7705828920797895e-06, "loss": 0.6424, "step": 1305 }, { "epoch": 0.44181326116373476, "grad_norm": 0.2861289978027344, "learning_rate": 4.770207904590121e-06, "loss": 0.6457, "step": 1306 }, { "epoch": 0.4421515561569689, "grad_norm": 0.30767688155174255, "learning_rate": 4.7698326256502966e-06, "loss": 0.6309, "step": 1307 }, { "epoch": 0.44248985115020295, "grad_norm": 0.3229195773601532, "learning_rate": 4.7694570553084974e-06, "loss": 0.6264, "step": 1308 }, { "epoch": 0.4428281461434371, "grad_norm": 0.29174500703811646, "learning_rate": 4.769081193612937e-06, "loss": 0.6583, "step": 1309 }, { "epoch": 0.4431664411366712, "grad_norm": 0.28457409143447876, "learning_rate": 4.76870504061187e-06, "loss": 0.6225, "step": 1310 }, { "epoch": 0.44350473612990526, "grad_norm": 0.285459965467453, "learning_rate": 4.768328596353586e-06, "loss": 0.6216, "step": 1311 }, { "epoch": 0.4438430311231394, "grad_norm": 0.29809266328811646, "learning_rate": 4.767951860886415e-06, "loss": 0.6375, "step": 1312 }, { "epoch": 0.44418132611637345, "grad_norm": 0.29426756501197815, "learning_rate": 4.767574834258721e-06, "loss": 0.6495, "step": 1313 }, { "epoch": 0.4445196211096076, "grad_norm": 0.277817040681839, "learning_rate": 4.7671975165189065e-06, "loss": 0.6251, "step": 1314 }, { "epoch": 0.4448579161028417, "grad_norm": 0.28166523575782776, "learning_rate": 4.766819907715412e-06, "loss": 0.6252, "step": 1315 }, { "epoch": 0.44519621109607577, "grad_norm": 0.2760406732559204, "learning_rate": 4.766442007896716e-06, "loss": 0.639, "step": 1316 }, { "epoch": 0.4455345060893099, "grad_norm": 0.28585439920425415, "learning_rate": 4.766063817111332e-06, "loss": 0.633, "step": 1317 }, { "epoch": 0.44587280108254396, "grad_norm": 0.2784285247325897, "learning_rate": 4.765685335407814e-06, "loss": 0.6203, "step": 1318 }, { "epoch": 0.4462110960757781, "grad_norm": 0.28521108627319336, "learning_rate": 4.765306562834749e-06, "loss": 0.6029, "step": 1319 }, { "epoch": 0.4465493910690122, "grad_norm": 0.2915601432323456, "learning_rate": 4.764927499440767e-06, "loss": 0.6454, "step": 1320 }, { "epoch": 0.44688768606224627, "grad_norm": 0.3053300976753235, "learning_rate": 4.7645481452745305e-06, "loss": 0.6349, "step": 1321 }, { "epoch": 0.4472259810554804, "grad_norm": 0.29973912239074707, "learning_rate": 4.764168500384741e-06, "loss": 0.6618, "step": 1322 }, { "epoch": 0.44756427604871446, "grad_norm": 0.2932901084423065, "learning_rate": 4.763788564820138e-06, "loss": 0.6189, "step": 1323 }, { "epoch": 0.4479025710419486, "grad_norm": 0.29192137718200684, "learning_rate": 4.7634083386294975e-06, "loss": 0.6457, "step": 1324 }, { "epoch": 0.4482408660351827, "grad_norm": 0.29242607951164246, "learning_rate": 4.763027821861634e-06, "loss": 0.6253, "step": 1325 }, { "epoch": 0.4485791610284168, "grad_norm": 0.28581079840660095, "learning_rate": 4.7626470145653975e-06, "loss": 0.6435, "step": 1326 }, { "epoch": 0.4489174560216509, "grad_norm": 0.3019113540649414, "learning_rate": 4.762265916789676e-06, "loss": 0.6543, "step": 1327 }, { "epoch": 0.44925575101488496, "grad_norm": 0.2888141870498657, "learning_rate": 4.7618845285833955e-06, "loss": 0.6301, "step": 1328 }, { "epoch": 0.4495940460081191, "grad_norm": 0.2920967936515808, "learning_rate": 4.761502849995519e-06, "loss": 0.619, "step": 1329 }, { "epoch": 0.44993234100135315, "grad_norm": 0.29467645287513733, "learning_rate": 4.761120881075046e-06, "loss": 0.6471, "step": 1330 }, { "epoch": 0.4502706359945873, "grad_norm": 0.302282452583313, "learning_rate": 4.760738621871014e-06, "loss": 0.6422, "step": 1331 }, { "epoch": 0.4506089309878214, "grad_norm": 0.32091787457466125, "learning_rate": 4.7603560724324985e-06, "loss": 0.6119, "step": 1332 }, { "epoch": 0.45094722598105547, "grad_norm": 0.2797046899795532, "learning_rate": 4.759973232808609e-06, "loss": 0.6276, "step": 1333 }, { "epoch": 0.4512855209742896, "grad_norm": 0.2929953634738922, "learning_rate": 4.7595901030484955e-06, "loss": 0.6542, "step": 1334 }, { "epoch": 0.45162381596752366, "grad_norm": 0.3046846091747284, "learning_rate": 4.759206683201346e-06, "loss": 0.6434, "step": 1335 }, { "epoch": 0.4519621109607578, "grad_norm": 0.29917415976524353, "learning_rate": 4.758822973316383e-06, "loss": 0.6491, "step": 1336 }, { "epoch": 0.4523004059539919, "grad_norm": 0.29974162578582764, "learning_rate": 4.758438973442867e-06, "loss": 0.641, "step": 1337 }, { "epoch": 0.45263870094722597, "grad_norm": 0.28453171253204346, "learning_rate": 4.758054683630097e-06, "loss": 0.6292, "step": 1338 }, { "epoch": 0.4529769959404601, "grad_norm": 0.2985386550426483, "learning_rate": 4.757670103927406e-06, "loss": 0.6235, "step": 1339 }, { "epoch": 0.45331529093369416, "grad_norm": 0.2876027226448059, "learning_rate": 4.757285234384169e-06, "loss": 0.6396, "step": 1340 }, { "epoch": 0.4536535859269283, "grad_norm": 0.29215019941329956, "learning_rate": 4.756900075049794e-06, "loss": 0.6409, "step": 1341 }, { "epoch": 0.4539918809201624, "grad_norm": 0.29155978560447693, "learning_rate": 4.75651462597373e-06, "loss": 0.6111, "step": 1342 }, { "epoch": 0.4543301759133965, "grad_norm": 0.28371426463127136, "learning_rate": 4.756128887205459e-06, "loss": 0.6385, "step": 1343 }, { "epoch": 0.4546684709066306, "grad_norm": 0.29578351974487305, "learning_rate": 4.755742858794503e-06, "loss": 0.6335, "step": 1344 }, { "epoch": 0.45500676589986466, "grad_norm": 0.2908320426940918, "learning_rate": 4.755356540790421e-06, "loss": 0.649, "step": 1345 }, { "epoch": 0.4553450608930988, "grad_norm": 0.28711384534835815, "learning_rate": 4.754969933242808e-06, "loss": 0.6248, "step": 1346 }, { "epoch": 0.4556833558863329, "grad_norm": 0.2830841839313507, "learning_rate": 4.754583036201297e-06, "loss": 0.6144, "step": 1347 }, { "epoch": 0.456021650879567, "grad_norm": 0.2901039123535156, "learning_rate": 4.754195849715557e-06, "loss": 0.6547, "step": 1348 }, { "epoch": 0.4563599458728011, "grad_norm": 0.29270651936531067, "learning_rate": 4.753808373835297e-06, "loss": 0.6314, "step": 1349 }, { "epoch": 0.45669824086603517, "grad_norm": 0.3053850829601288, "learning_rate": 4.7534206086102604e-06, "loss": 0.6672, "step": 1350 }, { "epoch": 0.4570365358592693, "grad_norm": 0.3080502450466156, "learning_rate": 4.753032554090229e-06, "loss": 0.658, "step": 1351 }, { "epoch": 0.45737483085250336, "grad_norm": 0.2952156662940979, "learning_rate": 4.75264421032502e-06, "loss": 0.6457, "step": 1352 }, { "epoch": 0.4577131258457375, "grad_norm": 0.29784637689590454, "learning_rate": 4.752255577364492e-06, "loss": 0.6259, "step": 1353 }, { "epoch": 0.4580514208389716, "grad_norm": 0.2907617688179016, "learning_rate": 4.751866655258535e-06, "loss": 0.6375, "step": 1354 }, { "epoch": 0.45838971583220567, "grad_norm": 0.29962706565856934, "learning_rate": 4.75147744405708e-06, "loss": 0.6405, "step": 1355 }, { "epoch": 0.4587280108254398, "grad_norm": 0.28614145517349243, "learning_rate": 4.751087943810094e-06, "loss": 0.6156, "step": 1356 }, { "epoch": 0.45906630581867386, "grad_norm": 0.2923153042793274, "learning_rate": 4.7506981545675815e-06, "loss": 0.6227, "step": 1357 }, { "epoch": 0.459404600811908, "grad_norm": 0.29524093866348267, "learning_rate": 4.7503080763795836e-06, "loss": 0.6504, "step": 1358 }, { "epoch": 0.4597428958051421, "grad_norm": 0.2971424460411072, "learning_rate": 4.749917709296178e-06, "loss": 0.6445, "step": 1359 }, { "epoch": 0.46008119079837617, "grad_norm": 0.29231759905815125, "learning_rate": 4.749527053367482e-06, "loss": 0.6015, "step": 1360 }, { "epoch": 0.4604194857916103, "grad_norm": 0.2905111014842987, "learning_rate": 4.749136108643646e-06, "loss": 0.6244, "step": 1361 }, { "epoch": 0.46075778078484436, "grad_norm": 0.3043908178806305, "learning_rate": 4.748744875174861e-06, "loss": 0.6537, "step": 1362 }, { "epoch": 0.4610960757780785, "grad_norm": 0.297868013381958, "learning_rate": 4.748353353011353e-06, "loss": 0.6261, "step": 1363 }, { "epoch": 0.4614343707713126, "grad_norm": 0.2935093641281128, "learning_rate": 4.747961542203387e-06, "loss": 0.6272, "step": 1364 }, { "epoch": 0.4617726657645467, "grad_norm": 0.2828471064567566, "learning_rate": 4.747569442801262e-06, "loss": 0.6421, "step": 1365 }, { "epoch": 0.4621109607577808, "grad_norm": 0.29044583439826965, "learning_rate": 4.747177054855316e-06, "loss": 0.6221, "step": 1366 }, { "epoch": 0.46244925575101486, "grad_norm": 0.2825765907764435, "learning_rate": 4.746784378415926e-06, "loss": 0.6054, "step": 1367 }, { "epoch": 0.462787550744249, "grad_norm": 0.30992552638053894, "learning_rate": 4.746391413533503e-06, "loss": 0.621, "step": 1368 }, { "epoch": 0.4631258457374831, "grad_norm": 0.30108433961868286, "learning_rate": 4.745998160258495e-06, "loss": 0.6174, "step": 1369 }, { "epoch": 0.4634641407307172, "grad_norm": 0.27994582056999207, "learning_rate": 4.745604618641388e-06, "loss": 0.6526, "step": 1370 }, { "epoch": 0.4638024357239513, "grad_norm": 0.3178618848323822, "learning_rate": 4.745210788732707e-06, "loss": 0.6011, "step": 1371 }, { "epoch": 0.46414073071718537, "grad_norm": 0.29102611541748047, "learning_rate": 4.74481667058301e-06, "loss": 0.6517, "step": 1372 }, { "epoch": 0.4644790257104195, "grad_norm": 0.2938326597213745, "learning_rate": 4.744422264242895e-06, "loss": 0.6288, "step": 1373 }, { "epoch": 0.4648173207036536, "grad_norm": 0.2730098366737366, "learning_rate": 4.744027569762996e-06, "loss": 0.6241, "step": 1374 }, { "epoch": 0.4651556156968877, "grad_norm": 0.30259615182876587, "learning_rate": 4.743632587193983e-06, "loss": 0.6524, "step": 1375 }, { "epoch": 0.4654939106901218, "grad_norm": 0.2964252233505249, "learning_rate": 4.743237316586564e-06, "loss": 0.6453, "step": 1376 }, { "epoch": 0.46583220568335587, "grad_norm": 0.2919650673866272, "learning_rate": 4.742841757991486e-06, "loss": 0.6295, "step": 1377 }, { "epoch": 0.46617050067659, "grad_norm": 0.3055798411369324, "learning_rate": 4.74244591145953e-06, "loss": 0.6531, "step": 1378 }, { "epoch": 0.46650879566982406, "grad_norm": 0.2998957335948944, "learning_rate": 4.742049777041514e-06, "loss": 0.6265, "step": 1379 }, { "epoch": 0.4668470906630582, "grad_norm": 0.29530641436576843, "learning_rate": 4.741653354788295e-06, "loss": 0.6321, "step": 1380 }, { "epoch": 0.4671853856562923, "grad_norm": 0.2901705503463745, "learning_rate": 4.741256644750766e-06, "loss": 0.6425, "step": 1381 }, { "epoch": 0.4675236806495264, "grad_norm": 0.2957158386707306, "learning_rate": 4.7408596469798555e-06, "loss": 0.6347, "step": 1382 }, { "epoch": 0.4678619756427605, "grad_norm": 0.30493056774139404, "learning_rate": 4.740462361526531e-06, "loss": 0.6378, "step": 1383 }, { "epoch": 0.46820027063599456, "grad_norm": 0.30551135540008545, "learning_rate": 4.740064788441796e-06, "loss": 0.644, "step": 1384 }, { "epoch": 0.4685385656292287, "grad_norm": 0.3051181435585022, "learning_rate": 4.739666927776691e-06, "loss": 0.6409, "step": 1385 }, { "epoch": 0.4688768606224628, "grad_norm": 0.2968808114528656, "learning_rate": 4.739268779582294e-06, "loss": 0.6492, "step": 1386 }, { "epoch": 0.4692151556156969, "grad_norm": 0.3039150834083557, "learning_rate": 4.738870343909719e-06, "loss": 0.6412, "step": 1387 }, { "epoch": 0.469553450608931, "grad_norm": 0.2963270843029022, "learning_rate": 4.738471620810116e-06, "loss": 0.6187, "step": 1388 }, { "epoch": 0.46989174560216507, "grad_norm": 0.2898751199245453, "learning_rate": 4.738072610334677e-06, "loss": 0.6273, "step": 1389 }, { "epoch": 0.4702300405953992, "grad_norm": 0.30511993169784546, "learning_rate": 4.737673312534624e-06, "loss": 0.6142, "step": 1390 }, { "epoch": 0.4705683355886333, "grad_norm": 0.2916101813316345, "learning_rate": 4.737273727461219e-06, "loss": 0.6232, "step": 1391 }, { "epoch": 0.4709066305818674, "grad_norm": 0.29085099697113037, "learning_rate": 4.736873855165762e-06, "loss": 0.6198, "step": 1392 }, { "epoch": 0.4712449255751015, "grad_norm": 0.28201308846473694, "learning_rate": 4.7364736956995885e-06, "loss": 0.6311, "step": 1393 }, { "epoch": 0.47158322056833557, "grad_norm": 0.30364304780960083, "learning_rate": 4.736073249114071e-06, "loss": 0.6485, "step": 1394 }, { "epoch": 0.4719215155615697, "grad_norm": 0.3099037706851959, "learning_rate": 4.735672515460619e-06, "loss": 0.6662, "step": 1395 }, { "epoch": 0.4722598105548038, "grad_norm": 0.3040882647037506, "learning_rate": 4.7352714947906785e-06, "loss": 0.65, "step": 1396 }, { "epoch": 0.4725981055480379, "grad_norm": 0.29287227988243103, "learning_rate": 4.7348701871557335e-06, "loss": 0.6391, "step": 1397 }, { "epoch": 0.472936400541272, "grad_norm": 0.309793084859848, "learning_rate": 4.734468592607304e-06, "loss": 0.6349, "step": 1398 }, { "epoch": 0.4732746955345061, "grad_norm": 0.32687175273895264, "learning_rate": 4.7340667111969455e-06, "loss": 0.6093, "step": 1399 }, { "epoch": 0.4736129905277402, "grad_norm": 0.2984752953052521, "learning_rate": 4.7336645429762525e-06, "loss": 0.6214, "step": 1400 }, { "epoch": 0.47395128552097426, "grad_norm": 0.3069734573364258, "learning_rate": 4.733262087996856e-06, "loss": 0.6316, "step": 1401 }, { "epoch": 0.4742895805142084, "grad_norm": 0.3015008270740509, "learning_rate": 4.732859346310423e-06, "loss": 0.6207, "step": 1402 }, { "epoch": 0.4746278755074425, "grad_norm": 0.2884121835231781, "learning_rate": 4.732456317968657e-06, "loss": 0.6189, "step": 1403 }, { "epoch": 0.4749661705006766, "grad_norm": 0.3015613555908203, "learning_rate": 4.732053003023301e-06, "loss": 0.6479, "step": 1404 }, { "epoch": 0.4753044654939107, "grad_norm": 0.309891939163208, "learning_rate": 4.73164940152613e-06, "loss": 0.6508, "step": 1405 }, { "epoch": 0.47564276048714477, "grad_norm": 0.2924291491508484, "learning_rate": 4.73124551352896e-06, "loss": 0.6394, "step": 1406 }, { "epoch": 0.4759810554803789, "grad_norm": 0.3016890585422516, "learning_rate": 4.730841339083642e-06, "loss": 0.6225, "step": 1407 }, { "epoch": 0.476319350473613, "grad_norm": 0.32249557971954346, "learning_rate": 4.730436878242064e-06, "loss": 0.6343, "step": 1408 }, { "epoch": 0.4766576454668471, "grad_norm": 0.29110148549079895, "learning_rate": 4.730032131056152e-06, "loss": 0.6443, "step": 1409 }, { "epoch": 0.4769959404600812, "grad_norm": 0.2962009906768799, "learning_rate": 4.7296270975778644e-06, "loss": 0.6331, "step": 1410 }, { "epoch": 0.47733423545331527, "grad_norm": 0.31483912467956543, "learning_rate": 4.729221777859203e-06, "loss": 0.6251, "step": 1411 }, { "epoch": 0.4776725304465494, "grad_norm": 0.2956962585449219, "learning_rate": 4.728816171952202e-06, "loss": 0.6193, "step": 1412 }, { "epoch": 0.4780108254397835, "grad_norm": 0.29781419038772583, "learning_rate": 4.7284102799089325e-06, "loss": 0.6516, "step": 1413 }, { "epoch": 0.4783491204330176, "grad_norm": 0.31204479932785034, "learning_rate": 4.728004101781503e-06, "loss": 0.6504, "step": 1414 }, { "epoch": 0.4786874154262517, "grad_norm": 0.29530659317970276, "learning_rate": 4.7275976376220585e-06, "loss": 0.6497, "step": 1415 }, { "epoch": 0.4790257104194858, "grad_norm": 0.2870703339576721, "learning_rate": 4.727190887482783e-06, "loss": 0.6241, "step": 1416 }, { "epoch": 0.4793640054127199, "grad_norm": 0.2797526717185974, "learning_rate": 4.726783851415893e-06, "loss": 0.6024, "step": 1417 }, { "epoch": 0.479702300405954, "grad_norm": 0.31374120712280273, "learning_rate": 4.7263765294736465e-06, "loss": 0.6248, "step": 1418 }, { "epoch": 0.4800405953991881, "grad_norm": 0.3121717870235443, "learning_rate": 4.7259689217083325e-06, "loss": 0.6621, "step": 1419 }, { "epoch": 0.4803788903924222, "grad_norm": 0.29636168479919434, "learning_rate": 4.725561028172282e-06, "loss": 0.6427, "step": 1420 }, { "epoch": 0.4807171853856563, "grad_norm": 0.2872474789619446, "learning_rate": 4.72515284891786e-06, "loss": 0.6355, "step": 1421 }, { "epoch": 0.4810554803788904, "grad_norm": 0.3191288411617279, "learning_rate": 4.724744383997469e-06, "loss": 0.6253, "step": 1422 }, { "epoch": 0.48139377537212447, "grad_norm": 0.2883187234401703, "learning_rate": 4.724335633463547e-06, "loss": 0.6315, "step": 1423 }, { "epoch": 0.4817320703653586, "grad_norm": 0.2939365804195404, "learning_rate": 4.72392659736857e-06, "loss": 0.6391, "step": 1424 }, { "epoch": 0.4820703653585927, "grad_norm": 0.2954607903957367, "learning_rate": 4.72351727576505e-06, "loss": 0.6202, "step": 1425 }, { "epoch": 0.4824086603518268, "grad_norm": 0.2945690453052521, "learning_rate": 4.723107668705537e-06, "loss": 0.6501, "step": 1426 }, { "epoch": 0.4827469553450609, "grad_norm": 0.29526031017303467, "learning_rate": 4.722697776242615e-06, "loss": 0.6231, "step": 1427 }, { "epoch": 0.48308525033829497, "grad_norm": 0.29499900341033936, "learning_rate": 4.722287598428907e-06, "loss": 0.6362, "step": 1428 }, { "epoch": 0.4834235453315291, "grad_norm": 0.2970479726791382, "learning_rate": 4.721877135317071e-06, "loss": 0.642, "step": 1429 }, { "epoch": 0.4837618403247632, "grad_norm": 0.30396947264671326, "learning_rate": 4.721466386959805e-06, "loss": 0.6774, "step": 1430 }, { "epoch": 0.4841001353179973, "grad_norm": 0.29407331347465515, "learning_rate": 4.721055353409837e-06, "loss": 0.6475, "step": 1431 }, { "epoch": 0.4844384303112314, "grad_norm": 0.2962198555469513, "learning_rate": 4.720644034719938e-06, "loss": 0.6495, "step": 1432 }, { "epoch": 0.4847767253044655, "grad_norm": 0.30761298537254333, "learning_rate": 4.720232430942912e-06, "loss": 0.6254, "step": 1433 }, { "epoch": 0.4851150202976996, "grad_norm": 0.29770034551620483, "learning_rate": 4.719820542131604e-06, "loss": 0.6483, "step": 1434 }, { "epoch": 0.4854533152909337, "grad_norm": 0.2793305814266205, "learning_rate": 4.7194083683388885e-06, "loss": 0.6207, "step": 1435 }, { "epoch": 0.4857916102841678, "grad_norm": 0.29254627227783203, "learning_rate": 4.718995909617683e-06, "loss": 0.6026, "step": 1436 }, { "epoch": 0.4861299052774019, "grad_norm": 0.30059441924095154, "learning_rate": 4.718583166020938e-06, "loss": 0.6157, "step": 1437 }, { "epoch": 0.486468200270636, "grad_norm": 0.3180963397026062, "learning_rate": 4.718170137601642e-06, "loss": 0.6643, "step": 1438 }, { "epoch": 0.4868064952638701, "grad_norm": 0.3006470203399658, "learning_rate": 4.71775682441282e-06, "loss": 0.6524, "step": 1439 }, { "epoch": 0.4871447902571042, "grad_norm": 0.2820841670036316, "learning_rate": 4.717343226507533e-06, "loss": 0.6185, "step": 1440 }, { "epoch": 0.4874830852503383, "grad_norm": 0.28982532024383545, "learning_rate": 4.71692934393888e-06, "loss": 0.6269, "step": 1441 }, { "epoch": 0.4878213802435724, "grad_norm": 0.29427915811538696, "learning_rate": 4.716515176759992e-06, "loss": 0.6493, "step": 1442 }, { "epoch": 0.4881596752368065, "grad_norm": 0.29777371883392334, "learning_rate": 4.716100725024044e-06, "loss": 0.6418, "step": 1443 }, { "epoch": 0.4884979702300406, "grad_norm": 0.29938769340515137, "learning_rate": 4.7156859887842416e-06, "loss": 0.6325, "step": 1444 }, { "epoch": 0.48883626522327467, "grad_norm": 0.305256724357605, "learning_rate": 4.715270968093828e-06, "loss": 0.6552, "step": 1445 }, { "epoch": 0.4891745602165088, "grad_norm": 0.29933780431747437, "learning_rate": 4.714855663006086e-06, "loss": 0.6296, "step": 1446 }, { "epoch": 0.4895128552097429, "grad_norm": 0.2861994802951813, "learning_rate": 4.71444007357433e-06, "loss": 0.6133, "step": 1447 }, { "epoch": 0.489851150202977, "grad_norm": 0.2853560745716095, "learning_rate": 4.7140241998519156e-06, "loss": 0.654, "step": 1448 }, { "epoch": 0.4901894451962111, "grad_norm": 0.2986946403980255, "learning_rate": 4.713608041892232e-06, "loss": 0.6182, "step": 1449 }, { "epoch": 0.49052774018944517, "grad_norm": 0.2984166145324707, "learning_rate": 4.713191599748706e-06, "loss": 0.6207, "step": 1450 }, { "epoch": 0.4908660351826793, "grad_norm": 0.32026952505111694, "learning_rate": 4.712774873474799e-06, "loss": 0.6257, "step": 1451 }, { "epoch": 0.4912043301759134, "grad_norm": 0.31071943044662476, "learning_rate": 4.712357863124013e-06, "loss": 0.6277, "step": 1452 }, { "epoch": 0.4915426251691475, "grad_norm": 0.2902413308620453, "learning_rate": 4.711940568749883e-06, "loss": 0.6342, "step": 1453 }, { "epoch": 0.4918809201623816, "grad_norm": 0.29815566539764404, "learning_rate": 4.71152299040598e-06, "loss": 0.633, "step": 1454 }, { "epoch": 0.4922192151556157, "grad_norm": 0.3356594741344452, "learning_rate": 4.7111051281459155e-06, "loss": 0.633, "step": 1455 }, { "epoch": 0.4925575101488498, "grad_norm": 0.2962017059326172, "learning_rate": 4.7106869820233324e-06, "loss": 0.6481, "step": 1456 }, { "epoch": 0.4928958051420839, "grad_norm": 0.30914294719696045, "learning_rate": 4.710268552091913e-06, "loss": 0.633, "step": 1457 }, { "epoch": 0.493234100135318, "grad_norm": 0.29593202471733093, "learning_rate": 4.709849838405377e-06, "loss": 0.6326, "step": 1458 }, { "epoch": 0.4935723951285521, "grad_norm": 0.29423394799232483, "learning_rate": 4.709430841017477e-06, "loss": 0.6414, "step": 1459 }, { "epoch": 0.4939106901217862, "grad_norm": 0.2971169650554657, "learning_rate": 4.709011559982006e-06, "loss": 0.6301, "step": 1460 }, { "epoch": 0.4942489851150203, "grad_norm": 0.30087241530418396, "learning_rate": 4.70859199535279e-06, "loss": 0.6349, "step": 1461 }, { "epoch": 0.4945872801082544, "grad_norm": 0.30483344197273254, "learning_rate": 4.7081721471836934e-06, "loss": 0.6544, "step": 1462 }, { "epoch": 0.4949255751014885, "grad_norm": 0.2955804765224457, "learning_rate": 4.707752015528616e-06, "loss": 0.6165, "step": 1463 }, { "epoch": 0.4952638700947226, "grad_norm": 0.2943514287471771, "learning_rate": 4.707331600441496e-06, "loss": 0.6206, "step": 1464 }, { "epoch": 0.4956021650879567, "grad_norm": 0.29659345746040344, "learning_rate": 4.706910901976304e-06, "loss": 0.6441, "step": 1465 }, { "epoch": 0.4959404600811908, "grad_norm": 0.29106733202934265, "learning_rate": 4.706489920187051e-06, "loss": 0.654, "step": 1466 }, { "epoch": 0.49627875507442487, "grad_norm": 0.30567026138305664, "learning_rate": 4.706068655127783e-06, "loss": 0.6589, "step": 1467 }, { "epoch": 0.496617050067659, "grad_norm": 0.3061468303203583, "learning_rate": 4.705647106852581e-06, "loss": 0.6408, "step": 1468 }, { "epoch": 0.4969553450608931, "grad_norm": 0.30140402913093567, "learning_rate": 4.705225275415565e-06, "loss": 0.6068, "step": 1469 }, { "epoch": 0.4972936400541272, "grad_norm": 0.29500529170036316, "learning_rate": 4.704803160870888e-06, "loss": 0.6431, "step": 1470 }, { "epoch": 0.4976319350473613, "grad_norm": 0.295264333486557, "learning_rate": 4.704380763272743e-06, "loss": 0.6281, "step": 1471 }, { "epoch": 0.4979702300405954, "grad_norm": 0.2895084023475647, "learning_rate": 4.703958082675357e-06, "loss": 0.6677, "step": 1472 }, { "epoch": 0.4983085250338295, "grad_norm": 0.2979891896247864, "learning_rate": 4.7035351191329935e-06, "loss": 0.6417, "step": 1473 }, { "epoch": 0.4986468200270636, "grad_norm": 0.29371345043182373, "learning_rate": 4.703111872699953e-06, "loss": 0.5812, "step": 1474 }, { "epoch": 0.4989851150202977, "grad_norm": 0.3126574754714966, "learning_rate": 4.702688343430573e-06, "loss": 0.6373, "step": 1475 }, { "epoch": 0.4993234100135318, "grad_norm": 0.30721336603164673, "learning_rate": 4.7022645313792235e-06, "loss": 0.6307, "step": 1476 }, { "epoch": 0.4996617050067659, "grad_norm": 0.296394020318985, "learning_rate": 4.701840436600317e-06, "loss": 0.647, "step": 1477 }, { "epoch": 0.5, "grad_norm": 0.2912466526031494, "learning_rate": 4.701416059148297e-06, "loss": 0.6266, "step": 1478 }, { "epoch": 0.5003382949932341, "grad_norm": 0.30898961424827576, "learning_rate": 4.700991399077646e-06, "loss": 0.637, "step": 1479 }, { "epoch": 0.5006765899864682, "grad_norm": 0.3106672763824463, "learning_rate": 4.700566456442882e-06, "loss": 0.6161, "step": 1480 }, { "epoch": 0.5010148849797023, "grad_norm": 0.2974197268486023, "learning_rate": 4.700141231298558e-06, "loss": 0.6181, "step": 1481 }, { "epoch": 0.5013531799729364, "grad_norm": 0.3075565695762634, "learning_rate": 4.699715723699266e-06, "loss": 0.6344, "step": 1482 }, { "epoch": 0.5016914749661705, "grad_norm": 0.2872021198272705, "learning_rate": 4.699289933699633e-06, "loss": 0.6409, "step": 1483 }, { "epoch": 0.5020297699594046, "grad_norm": 0.28962892293930054, "learning_rate": 4.698863861354322e-06, "loss": 0.5999, "step": 1484 }, { "epoch": 0.5023680649526387, "grad_norm": 0.2840323746204376, "learning_rate": 4.698437506718031e-06, "loss": 0.608, "step": 1485 }, { "epoch": 0.5027063599458728, "grad_norm": 0.2985817790031433, "learning_rate": 4.6980108698454965e-06, "loss": 0.6344, "step": 1486 }, { "epoch": 0.5030446549391069, "grad_norm": 0.295597642660141, "learning_rate": 4.69758395079149e-06, "loss": 0.6484, "step": 1487 }, { "epoch": 0.503382949932341, "grad_norm": 0.28852009773254395, "learning_rate": 4.69715674961082e-06, "loss": 0.6124, "step": 1488 }, { "epoch": 0.5037212449255751, "grad_norm": 0.2893640995025635, "learning_rate": 4.696729266358331e-06, "loss": 0.6167, "step": 1489 }, { "epoch": 0.5040595399188093, "grad_norm": 0.30135470628738403, "learning_rate": 4.6963015010889025e-06, "loss": 0.6498, "step": 1490 }, { "epoch": 0.5043978349120433, "grad_norm": 0.2909967303276062, "learning_rate": 4.695873453857451e-06, "loss": 0.6422, "step": 1491 }, { "epoch": 0.5047361299052774, "grad_norm": 0.299000084400177, "learning_rate": 4.695445124718932e-06, "loss": 0.6358, "step": 1492 }, { "epoch": 0.5050744248985115, "grad_norm": 0.29471755027770996, "learning_rate": 4.695016513728331e-06, "loss": 0.6081, "step": 1493 }, { "epoch": 0.5054127198917456, "grad_norm": 0.30111798644065857, "learning_rate": 4.694587620940675e-06, "loss": 0.6396, "step": 1494 }, { "epoch": 0.5057510148849798, "grad_norm": 0.29957783222198486, "learning_rate": 4.694158446411025e-06, "loss": 0.6298, "step": 1495 }, { "epoch": 0.5060893098782138, "grad_norm": 0.2970424294471741, "learning_rate": 4.69372899019448e-06, "loss": 0.6281, "step": 1496 }, { "epoch": 0.5064276048714479, "grad_norm": 0.3034062385559082, "learning_rate": 4.69329925234617e-06, "loss": 0.6496, "step": 1497 }, { "epoch": 0.506765899864682, "grad_norm": 0.31338825821876526, "learning_rate": 4.69286923292127e-06, "loss": 0.6452, "step": 1498 }, { "epoch": 0.5071041948579161, "grad_norm": 0.2992928624153137, "learning_rate": 4.692438931974983e-06, "loss": 0.6376, "step": 1499 }, { "epoch": 0.5074424898511503, "grad_norm": 0.29723188281059265, "learning_rate": 4.692008349562551e-06, "loss": 0.636, "step": 1500 }, { "epoch": 0.5077807848443843, "grad_norm": 0.29501450061798096, "learning_rate": 4.691577485739254e-06, "loss": 0.6134, "step": 1501 }, { "epoch": 0.5081190798376184, "grad_norm": 0.3050749897956848, "learning_rate": 4.691146340560405e-06, "loss": 0.639, "step": 1502 }, { "epoch": 0.5084573748308525, "grad_norm": 0.29698941111564636, "learning_rate": 4.690714914081355e-06, "loss": 0.6379, "step": 1503 }, { "epoch": 0.5087956698240866, "grad_norm": 0.29626375436782837, "learning_rate": 4.690283206357491e-06, "loss": 0.6353, "step": 1504 }, { "epoch": 0.5091339648173207, "grad_norm": 0.3038589060306549, "learning_rate": 4.689851217444236e-06, "loss": 0.6212, "step": 1505 }, { "epoch": 0.5094722598105548, "grad_norm": 0.3010556697845459, "learning_rate": 4.689418947397047e-06, "loss": 0.6423, "step": 1506 }, { "epoch": 0.5098105548037889, "grad_norm": 0.28557172417640686, "learning_rate": 4.688986396271422e-06, "loss": 0.6096, "step": 1507 }, { "epoch": 0.510148849797023, "grad_norm": 0.3025129735469818, "learning_rate": 4.688553564122891e-06, "loss": 0.6154, "step": 1508 }, { "epoch": 0.5104871447902571, "grad_norm": 0.31298425793647766, "learning_rate": 4.68812045100702e-06, "loss": 0.6312, "step": 1509 }, { "epoch": 0.5108254397834912, "grad_norm": 0.28845450282096863, "learning_rate": 4.687687056979413e-06, "loss": 0.616, "step": 1510 }, { "epoch": 0.5111637347767253, "grad_norm": 0.3298991024494171, "learning_rate": 4.687253382095709e-06, "loss": 0.6243, "step": 1511 }, { "epoch": 0.5115020297699594, "grad_norm": 0.3234930634498596, "learning_rate": 4.686819426411583e-06, "loss": 0.632, "step": 1512 }, { "epoch": 0.5118403247631935, "grad_norm": 0.3078064024448395, "learning_rate": 4.686385189982748e-06, "loss": 0.6429, "step": 1513 }, { "epoch": 0.5121786197564276, "grad_norm": 0.3139505088329315, "learning_rate": 4.6859506728649506e-06, "loss": 0.6035, "step": 1514 }, { "epoch": 0.5125169147496617, "grad_norm": 0.3039025366306305, "learning_rate": 4.685515875113974e-06, "loss": 0.6127, "step": 1515 }, { "epoch": 0.5128552097428958, "grad_norm": 0.2968452572822571, "learning_rate": 4.685080796785637e-06, "loss": 0.6297, "step": 1516 }, { "epoch": 0.5131935047361299, "grad_norm": 0.34408167004585266, "learning_rate": 4.6846454379357965e-06, "loss": 0.6367, "step": 1517 }, { "epoch": 0.513531799729364, "grad_norm": 0.29630929231643677, "learning_rate": 4.684209798620343e-06, "loss": 0.6253, "step": 1518 }, { "epoch": 0.5138700947225981, "grad_norm": 0.32279467582702637, "learning_rate": 4.683773878895206e-06, "loss": 0.6103, "step": 1519 }, { "epoch": 0.5142083897158322, "grad_norm": 0.2961653470993042, "learning_rate": 4.683337678816345e-06, "loss": 0.6256, "step": 1520 }, { "epoch": 0.5145466847090663, "grad_norm": 0.30008238554000854, "learning_rate": 4.682901198439764e-06, "loss": 0.636, "step": 1521 }, { "epoch": 0.5148849797023004, "grad_norm": 0.29182377457618713, "learning_rate": 4.682464437821497e-06, "loss": 0.6359, "step": 1522 }, { "epoch": 0.5152232746955345, "grad_norm": 0.3136955499649048, "learning_rate": 4.682027397017614e-06, "loss": 0.6213, "step": 1523 }, { "epoch": 0.5155615696887687, "grad_norm": 0.3067409098148346, "learning_rate": 4.681590076084224e-06, "loss": 0.6088, "step": 1524 }, { "epoch": 0.5158998646820027, "grad_norm": 0.31430402398109436, "learning_rate": 4.6811524750774695e-06, "loss": 0.6581, "step": 1525 }, { "epoch": 0.5162381596752368, "grad_norm": 0.29381296038627625, "learning_rate": 4.68071459405353e-06, "loss": 0.5942, "step": 1526 }, { "epoch": 0.5165764546684709, "grad_norm": 0.29115763306617737, "learning_rate": 4.680276433068623e-06, "loss": 0.6393, "step": 1527 }, { "epoch": 0.516914749661705, "grad_norm": 0.27736565470695496, "learning_rate": 4.679837992178996e-06, "loss": 0.6567, "step": 1528 }, { "epoch": 0.5172530446549392, "grad_norm": 0.2848239243030548, "learning_rate": 4.679399271440939e-06, "loss": 0.6144, "step": 1529 }, { "epoch": 0.5175913396481732, "grad_norm": 0.30477553606033325, "learning_rate": 4.678960270910774e-06, "loss": 0.6314, "step": 1530 }, { "epoch": 0.5179296346414073, "grad_norm": 0.2981856167316437, "learning_rate": 4.678520990644862e-06, "loss": 0.6189, "step": 1531 }, { "epoch": 0.5182679296346414, "grad_norm": 0.29681396484375, "learning_rate": 4.678081430699594e-06, "loss": 0.6103, "step": 1532 }, { "epoch": 0.5186062246278755, "grad_norm": 0.2962600290775299, "learning_rate": 4.677641591131404e-06, "loss": 0.6481, "step": 1533 }, { "epoch": 0.5189445196211097, "grad_norm": 0.28115904331207275, "learning_rate": 4.6772014719967585e-06, "loss": 0.6124, "step": 1534 }, { "epoch": 0.5192828146143437, "grad_norm": 0.306104451417923, "learning_rate": 4.6767610733521594e-06, "loss": 0.652, "step": 1535 }, { "epoch": 0.5196211096075778, "grad_norm": 0.30700162053108215, "learning_rate": 4.6763203952541455e-06, "loss": 0.6487, "step": 1536 }, { "epoch": 0.5199594046008119, "grad_norm": 0.2890525162220001, "learning_rate": 4.675879437759292e-06, "loss": 0.6282, "step": 1537 }, { "epoch": 0.520297699594046, "grad_norm": 0.3027341365814209, "learning_rate": 4.675438200924207e-06, "loss": 0.6332, "step": 1538 }, { "epoch": 0.5206359945872802, "grad_norm": 0.29767075181007385, "learning_rate": 4.674996684805538e-06, "loss": 0.6543, "step": 1539 }, { "epoch": 0.5209742895805142, "grad_norm": 0.2975456118583679, "learning_rate": 4.674554889459969e-06, "loss": 0.6186, "step": 1540 }, { "epoch": 0.5213125845737483, "grad_norm": 0.3017563819885254, "learning_rate": 4.674112814944214e-06, "loss": 0.6455, "step": 1541 }, { "epoch": 0.5216508795669824, "grad_norm": 0.29811638593673706, "learning_rate": 4.6736704613150295e-06, "loss": 0.6343, "step": 1542 }, { "epoch": 0.5219891745602165, "grad_norm": 0.30314648151397705, "learning_rate": 4.673227828629203e-06, "loss": 0.6222, "step": 1543 }, { "epoch": 0.5223274695534507, "grad_norm": 0.3316211402416229, "learning_rate": 4.672784916943562e-06, "loss": 0.6362, "step": 1544 }, { "epoch": 0.5226657645466847, "grad_norm": 0.31122979521751404, "learning_rate": 4.672341726314968e-06, "loss": 0.6484, "step": 1545 }, { "epoch": 0.5230040595399188, "grad_norm": 0.304100900888443, "learning_rate": 4.671898256800314e-06, "loss": 0.6546, "step": 1546 }, { "epoch": 0.5233423545331529, "grad_norm": 0.2922753691673279, "learning_rate": 4.671454508456536e-06, "loss": 0.6038, "step": 1547 }, { "epoch": 0.523680649526387, "grad_norm": 0.2911527454853058, "learning_rate": 4.671010481340603e-06, "loss": 0.6247, "step": 1548 }, { "epoch": 0.524018944519621, "grad_norm": 0.2970218360424042, "learning_rate": 4.670566175509518e-06, "loss": 0.6106, "step": 1549 }, { "epoch": 0.5243572395128552, "grad_norm": 0.3152008056640625, "learning_rate": 4.6701215910203225e-06, "loss": 0.6001, "step": 1550 }, { "epoch": 0.5246955345060893, "grad_norm": 0.3008357286453247, "learning_rate": 4.66967672793009e-06, "loss": 0.6501, "step": 1551 }, { "epoch": 0.5250338294993234, "grad_norm": 0.300070196390152, "learning_rate": 4.669231586295934e-06, "loss": 0.6292, "step": 1552 }, { "epoch": 0.5253721244925575, "grad_norm": 0.3507230579853058, "learning_rate": 4.668786166175002e-06, "loss": 0.616, "step": 1553 }, { "epoch": 0.5257104194857916, "grad_norm": 0.2806701362133026, "learning_rate": 4.668340467624476e-06, "loss": 0.6118, "step": 1554 }, { "epoch": 0.5260487144790257, "grad_norm": 0.3331495225429535, "learning_rate": 4.667894490701577e-06, "loss": 0.6, "step": 1555 }, { "epoch": 0.5263870094722598, "grad_norm": 0.32306334376335144, "learning_rate": 4.6674482354635576e-06, "loss": 0.6329, "step": 1556 }, { "epoch": 0.5267253044654939, "grad_norm": 0.292290061712265, "learning_rate": 4.667001701967709e-06, "loss": 0.6428, "step": 1557 }, { "epoch": 0.527063599458728, "grad_norm": 0.28629037737846375, "learning_rate": 4.666554890271357e-06, "loss": 0.6499, "step": 1558 }, { "epoch": 0.5274018944519621, "grad_norm": 0.31190797686576843, "learning_rate": 4.666107800431865e-06, "loss": 0.6019, "step": 1559 }, { "epoch": 0.5277401894451962, "grad_norm": 0.35739222168922424, "learning_rate": 4.665660432506629e-06, "loss": 0.6221, "step": 1560 }, { "epoch": 0.5280784844384303, "grad_norm": 0.29792535305023193, "learning_rate": 4.665212786553083e-06, "loss": 0.6332, "step": 1561 }, { "epoch": 0.5284167794316644, "grad_norm": 0.3818718492984772, "learning_rate": 4.664764862628695e-06, "loss": 0.6064, "step": 1562 }, { "epoch": 0.5287550744248986, "grad_norm": 0.3109496533870697, "learning_rate": 4.664316660790971e-06, "loss": 0.649, "step": 1563 }, { "epoch": 0.5290933694181326, "grad_norm": 0.2961856424808502, "learning_rate": 4.6638681810974495e-06, "loss": 0.6346, "step": 1564 }, { "epoch": 0.5294316644113667, "grad_norm": 0.31769058108329773, "learning_rate": 4.663419423605708e-06, "loss": 0.6269, "step": 1565 }, { "epoch": 0.5297699594046008, "grad_norm": 0.30717143416404724, "learning_rate": 4.662970388373359e-06, "loss": 0.6222, "step": 1566 }, { "epoch": 0.5301082543978349, "grad_norm": 0.3136506676673889, "learning_rate": 4.662521075458048e-06, "loss": 0.6372, "step": 1567 }, { "epoch": 0.530446549391069, "grad_norm": 0.2984481751918793, "learning_rate": 4.662071484917458e-06, "loss": 0.6397, "step": 1568 }, { "epoch": 0.5307848443843031, "grad_norm": 0.30340978503227234, "learning_rate": 4.661621616809308e-06, "loss": 0.6271, "step": 1569 }, { "epoch": 0.5311231393775372, "grad_norm": 0.3092040717601776, "learning_rate": 4.661171471191352e-06, "loss": 0.6111, "step": 1570 }, { "epoch": 0.5314614343707713, "grad_norm": 0.2942693829536438, "learning_rate": 4.660721048121382e-06, "loss": 0.6371, "step": 1571 }, { "epoch": 0.5317997293640054, "grad_norm": 0.2909970283508301, "learning_rate": 4.66027034765722e-06, "loss": 0.6365, "step": 1572 }, { "epoch": 0.5321380243572396, "grad_norm": 0.29246649146080017, "learning_rate": 4.65981936985673e-06, "loss": 0.6155, "step": 1573 }, { "epoch": 0.5324763193504736, "grad_norm": 0.3133332133293152, "learning_rate": 4.659368114777807e-06, "loss": 0.6308, "step": 1574 }, { "epoch": 0.5328146143437077, "grad_norm": 0.3009655177593231, "learning_rate": 4.658916582478384e-06, "loss": 0.6051, "step": 1575 }, { "epoch": 0.5331529093369418, "grad_norm": 0.29506585001945496, "learning_rate": 4.658464773016428e-06, "loss": 0.6222, "step": 1576 }, { "epoch": 0.5334912043301759, "grad_norm": 0.2916436791419983, "learning_rate": 4.658012686449944e-06, "loss": 0.6388, "step": 1577 }, { "epoch": 0.5338294993234101, "grad_norm": 0.3032309412956238, "learning_rate": 4.657560322836969e-06, "loss": 0.6428, "step": 1578 }, { "epoch": 0.5341677943166441, "grad_norm": 0.3248575031757355, "learning_rate": 4.657107682235581e-06, "loss": 0.6429, "step": 1579 }, { "epoch": 0.5345060893098782, "grad_norm": 0.30649298429489136, "learning_rate": 4.6566547647038865e-06, "loss": 0.6281, "step": 1580 }, { "epoch": 0.5348443843031123, "grad_norm": 0.28190237283706665, "learning_rate": 4.656201570300033e-06, "loss": 0.6185, "step": 1581 }, { "epoch": 0.5351826792963464, "grad_norm": 0.310199111700058, "learning_rate": 4.655748099082201e-06, "loss": 0.6097, "step": 1582 }, { "epoch": 0.5355209742895806, "grad_norm": 0.30557340383529663, "learning_rate": 4.655294351108609e-06, "loss": 0.6575, "step": 1583 }, { "epoch": 0.5358592692828146, "grad_norm": 0.30597639083862305, "learning_rate": 4.654840326437508e-06, "loss": 0.6497, "step": 1584 }, { "epoch": 0.5361975642760487, "grad_norm": 0.2925478219985962, "learning_rate": 4.654386025127186e-06, "loss": 0.6135, "step": 1585 }, { "epoch": 0.5365358592692828, "grad_norm": 0.3115440905094147, "learning_rate": 4.6539314472359655e-06, "loss": 0.612, "step": 1586 }, { "epoch": 0.536874154262517, "grad_norm": 0.3000645935535431, "learning_rate": 4.653476592822207e-06, "loss": 0.6333, "step": 1587 }, { "epoch": 0.5372124492557511, "grad_norm": 0.34186699986457825, "learning_rate": 4.653021461944304e-06, "loss": 0.6348, "step": 1588 }, { "epoch": 0.5375507442489851, "grad_norm": 0.3043736517429352, "learning_rate": 4.652566054660686e-06, "loss": 0.6465, "step": 1589 }, { "epoch": 0.5378890392422192, "grad_norm": 0.30343347787857056, "learning_rate": 4.652110371029819e-06, "loss": 0.6419, "step": 1590 }, { "epoch": 0.5382273342354533, "grad_norm": 0.3004716634750366, "learning_rate": 4.651654411110205e-06, "loss": 0.6448, "step": 1591 }, { "epoch": 0.5385656292286874, "grad_norm": 0.2967135012149811, "learning_rate": 4.651198174960378e-06, "loss": 0.6284, "step": 1592 }, { "epoch": 0.5389039242219216, "grad_norm": 0.2954842150211334, "learning_rate": 4.65074166263891e-06, "loss": 0.6322, "step": 1593 }, { "epoch": 0.5392422192151556, "grad_norm": 0.2938835322856903, "learning_rate": 4.65028487420441e-06, "loss": 0.6332, "step": 1594 }, { "epoch": 0.5395805142083897, "grad_norm": 0.3059346079826355, "learning_rate": 4.64982780971552e-06, "loss": 0.6341, "step": 1595 }, { "epoch": 0.5399188092016238, "grad_norm": 0.31491121649742126, "learning_rate": 4.6493704692309175e-06, "loss": 0.6307, "step": 1596 }, { "epoch": 0.540257104194858, "grad_norm": 0.31054824590682983, "learning_rate": 4.648912852809317e-06, "loss": 0.6353, "step": 1597 }, { "epoch": 0.540595399188092, "grad_norm": 0.2955755591392517, "learning_rate": 4.648454960509466e-06, "loss": 0.6051, "step": 1598 }, { "epoch": 0.5409336941813261, "grad_norm": 0.3091103136539459, "learning_rate": 4.647996792390151e-06, "loss": 0.6422, "step": 1599 }, { "epoch": 0.5412719891745602, "grad_norm": 0.29176416993141174, "learning_rate": 4.647538348510189e-06, "loss": 0.658, "step": 1600 }, { "epoch": 0.5416102841677943, "grad_norm": 0.3021467626094818, "learning_rate": 4.647079628928437e-06, "loss": 0.6146, "step": 1601 }, { "epoch": 0.5419485791610285, "grad_norm": 0.29566964507102966, "learning_rate": 4.646620633703786e-06, "loss": 0.6163, "step": 1602 }, { "epoch": 0.5422868741542625, "grad_norm": 0.3090681731700897, "learning_rate": 4.64616136289516e-06, "loss": 0.638, "step": 1603 }, { "epoch": 0.5426251691474966, "grad_norm": 0.3095795214176178, "learning_rate": 4.645701816561523e-06, "loss": 0.6195, "step": 1604 }, { "epoch": 0.5429634641407307, "grad_norm": 0.3343175947666168, "learning_rate": 4.64524199476187e-06, "loss": 0.6299, "step": 1605 }, { "epoch": 0.5433017591339648, "grad_norm": 0.2911258041858673, "learning_rate": 4.644781897555233e-06, "loss": 0.6221, "step": 1606 }, { "epoch": 0.543640054127199, "grad_norm": 0.29694199562072754, "learning_rate": 4.644321525000681e-06, "loss": 0.6053, "step": 1607 }, { "epoch": 0.543978349120433, "grad_norm": 0.3226844370365143, "learning_rate": 4.643860877157314e-06, "loss": 0.6264, "step": 1608 }, { "epoch": 0.5443166441136671, "grad_norm": 0.308302104473114, "learning_rate": 4.643399954084274e-06, "loss": 0.6098, "step": 1609 }, { "epoch": 0.5446549391069012, "grad_norm": 0.33216387033462524, "learning_rate": 4.642938755840731e-06, "loss": 0.6029, "step": 1610 }, { "epoch": 0.5449932341001353, "grad_norm": 0.2923735976219177, "learning_rate": 4.642477282485896e-06, "loss": 0.6131, "step": 1611 }, { "epoch": 0.5453315290933695, "grad_norm": 0.3002014458179474, "learning_rate": 4.642015534079012e-06, "loss": 0.6412, "step": 1612 }, { "epoch": 0.5456698240866035, "grad_norm": 0.30318304896354675, "learning_rate": 4.641553510679358e-06, "loss": 0.62, "step": 1613 }, { "epoch": 0.5460081190798376, "grad_norm": 0.3148590326309204, "learning_rate": 4.64109121234625e-06, "loss": 0.6435, "step": 1614 }, { "epoch": 0.5463464140730717, "grad_norm": 0.29820773005485535, "learning_rate": 4.640628639139037e-06, "loss": 0.6331, "step": 1615 }, { "epoch": 0.5466847090663058, "grad_norm": 0.308205246925354, "learning_rate": 4.640165791117106e-06, "loss": 0.6775, "step": 1616 }, { "epoch": 0.54702300405954, "grad_norm": 0.3078860342502594, "learning_rate": 4.639702668339876e-06, "loss": 0.6472, "step": 1617 }, { "epoch": 0.547361299052774, "grad_norm": 0.3088330924510956, "learning_rate": 4.6392392708668035e-06, "loss": 0.623, "step": 1618 }, { "epoch": 0.5476995940460081, "grad_norm": 0.2907048463821411, "learning_rate": 4.638775598757379e-06, "loss": 0.6284, "step": 1619 }, { "epoch": 0.5480378890392422, "grad_norm": 0.31769198179244995, "learning_rate": 4.63831165207113e-06, "loss": 0.6483, "step": 1620 }, { "epoch": 0.5483761840324763, "grad_norm": 0.30527791380882263, "learning_rate": 4.637847430867617e-06, "loss": 0.644, "step": 1621 }, { "epoch": 0.5487144790257105, "grad_norm": 0.3103595972061157, "learning_rate": 4.6373829352064375e-06, "loss": 0.6223, "step": 1622 }, { "epoch": 0.5490527740189445, "grad_norm": 0.2978458106517792, "learning_rate": 4.636918165147224e-06, "loss": 0.6252, "step": 1623 }, { "epoch": 0.5493910690121786, "grad_norm": 0.3217633366584778, "learning_rate": 4.6364531207496426e-06, "loss": 0.6436, "step": 1624 }, { "epoch": 0.5497293640054127, "grad_norm": 0.2983613610267639, "learning_rate": 4.635987802073397e-06, "loss": 0.6162, "step": 1625 }, { "epoch": 0.5500676589986468, "grad_norm": 0.297369122505188, "learning_rate": 4.6355222091782245e-06, "loss": 0.6494, "step": 1626 }, { "epoch": 0.550405953991881, "grad_norm": 0.2905776798725128, "learning_rate": 4.635056342123899e-06, "loss": 0.6155, "step": 1627 }, { "epoch": 0.550744248985115, "grad_norm": 0.3083232045173645, "learning_rate": 4.634590200970228e-06, "loss": 0.6471, "step": 1628 }, { "epoch": 0.5510825439783491, "grad_norm": 0.30626025795936584, "learning_rate": 4.634123785777054e-06, "loss": 0.6376, "step": 1629 }, { "epoch": 0.5514208389715832, "grad_norm": 0.29137128591537476, "learning_rate": 4.633657096604256e-06, "loss": 0.6381, "step": 1630 }, { "epoch": 0.5517591339648173, "grad_norm": 0.29958054423332214, "learning_rate": 4.633190133511749e-06, "loss": 0.642, "step": 1631 }, { "epoch": 0.5520974289580515, "grad_norm": 0.317205548286438, "learning_rate": 4.632722896559481e-06, "loss": 0.6412, "step": 1632 }, { "epoch": 0.5524357239512855, "grad_norm": 0.328844279050827, "learning_rate": 4.632255385807436e-06, "loss": 0.625, "step": 1633 }, { "epoch": 0.5527740189445196, "grad_norm": 0.3044576644897461, "learning_rate": 4.631787601315633e-06, "loss": 0.6228, "step": 1634 }, { "epoch": 0.5531123139377537, "grad_norm": 0.3005305230617523, "learning_rate": 4.631319543144127e-06, "loss": 0.6022, "step": 1635 }, { "epoch": 0.5534506089309879, "grad_norm": 0.3218366801738739, "learning_rate": 4.630851211353007e-06, "loss": 0.6252, "step": 1636 }, { "epoch": 0.553788903924222, "grad_norm": 0.3036673069000244, "learning_rate": 4.6303826060023975e-06, "loss": 0.6553, "step": 1637 }, { "epoch": 0.554127198917456, "grad_norm": 0.2918086349964142, "learning_rate": 4.629913727152459e-06, "loss": 0.6138, "step": 1638 }, { "epoch": 0.5544654939106901, "grad_norm": 0.29163315892219543, "learning_rate": 4.629444574863385e-06, "loss": 0.6228, "step": 1639 }, { "epoch": 0.5548037889039242, "grad_norm": 0.31965193152427673, "learning_rate": 4.6289751491954076e-06, "loss": 0.6039, "step": 1640 }, { "epoch": 0.5551420838971584, "grad_norm": 0.313843309879303, "learning_rate": 4.628505450208789e-06, "loss": 0.6416, "step": 1641 }, { "epoch": 0.5554803788903924, "grad_norm": 0.29786884784698486, "learning_rate": 4.628035477963832e-06, "loss": 0.6026, "step": 1642 }, { "epoch": 0.5558186738836265, "grad_norm": 0.30769261717796326, "learning_rate": 4.627565232520871e-06, "loss": 0.6294, "step": 1643 }, { "epoch": 0.5561569688768606, "grad_norm": 0.3097287714481354, "learning_rate": 4.627094713940274e-06, "loss": 0.6159, "step": 1644 }, { "epoch": 0.5564952638700947, "grad_norm": 0.30283322930336, "learning_rate": 4.626623922282451e-06, "loss": 0.628, "step": 1645 }, { "epoch": 0.5568335588633289, "grad_norm": 0.3172987699508667, "learning_rate": 4.626152857607837e-06, "loss": 0.6285, "step": 1646 }, { "epoch": 0.5571718538565629, "grad_norm": 0.3280118703842163, "learning_rate": 4.625681519976912e-06, "loss": 0.6326, "step": 1647 }, { "epoch": 0.557510148849797, "grad_norm": 0.30398502945899963, "learning_rate": 4.625209909450183e-06, "loss": 0.6218, "step": 1648 }, { "epoch": 0.5578484438430311, "grad_norm": 0.34335753321647644, "learning_rate": 4.6247380260881995e-06, "loss": 0.5902, "step": 1649 }, { "epoch": 0.5581867388362652, "grad_norm": 0.2943098843097687, "learning_rate": 4.624265869951539e-06, "loss": 0.6408, "step": 1650 }, { "epoch": 0.5585250338294994, "grad_norm": 0.31536927819252014, "learning_rate": 4.623793441100816e-06, "loss": 0.6236, "step": 1651 }, { "epoch": 0.5588633288227334, "grad_norm": 0.3096260130405426, "learning_rate": 4.623320739596685e-06, "loss": 0.6412, "step": 1652 }, { "epoch": 0.5592016238159675, "grad_norm": 0.3125949501991272, "learning_rate": 4.6228477654998294e-06, "loss": 0.6222, "step": 1653 }, { "epoch": 0.5595399188092016, "grad_norm": 0.34240230917930603, "learning_rate": 4.622374518870969e-06, "loss": 0.6293, "step": 1654 }, { "epoch": 0.5598782138024357, "grad_norm": 0.30551519989967346, "learning_rate": 4.621900999770862e-06, "loss": 0.6609, "step": 1655 }, { "epoch": 0.5602165087956699, "grad_norm": 0.31002277135849, "learning_rate": 4.621427208260296e-06, "loss": 0.5994, "step": 1656 }, { "epoch": 0.5605548037889039, "grad_norm": 0.2998320460319519, "learning_rate": 4.6209531444000975e-06, "loss": 0.6245, "step": 1657 }, { "epoch": 0.560893098782138, "grad_norm": 0.32024434208869934, "learning_rate": 4.620478808251129e-06, "loss": 0.6083, "step": 1658 }, { "epoch": 0.5612313937753721, "grad_norm": 0.2951360046863556, "learning_rate": 4.620004199874283e-06, "loss": 0.6236, "step": 1659 }, { "epoch": 0.5615696887686062, "grad_norm": 0.29551962018013, "learning_rate": 4.619529319330492e-06, "loss": 0.6338, "step": 1660 }, { "epoch": 0.5619079837618404, "grad_norm": 0.29304054379463196, "learning_rate": 4.619054166680718e-06, "loss": 0.6175, "step": 1661 }, { "epoch": 0.5622462787550744, "grad_norm": 0.31250274181365967, "learning_rate": 4.6185787419859675e-06, "loss": 0.651, "step": 1662 }, { "epoch": 0.5625845737483085, "grad_norm": 0.3109738528728485, "learning_rate": 4.6181030453072695e-06, "loss": 0.6302, "step": 1663 }, { "epoch": 0.5629228687415426, "grad_norm": 0.2980029284954071, "learning_rate": 4.617627076705697e-06, "loss": 0.6325, "step": 1664 }, { "epoch": 0.5632611637347767, "grad_norm": 0.28963977098464966, "learning_rate": 4.617150836242356e-06, "loss": 0.6178, "step": 1665 }, { "epoch": 0.5635994587280109, "grad_norm": 0.2933375835418701, "learning_rate": 4.616674323978384e-06, "loss": 0.6361, "step": 1666 }, { "epoch": 0.5639377537212449, "grad_norm": 0.2948474884033203, "learning_rate": 4.616197539974957e-06, "loss": 0.6377, "step": 1667 }, { "epoch": 0.564276048714479, "grad_norm": 0.29074469208717346, "learning_rate": 4.615720484293286e-06, "loss": 0.6263, "step": 1668 }, { "epoch": 0.5646143437077131, "grad_norm": 0.28231802582740784, "learning_rate": 4.615243156994613e-06, "loss": 0.6354, "step": 1669 }, { "epoch": 0.5649526387009473, "grad_norm": 0.3019883632659912, "learning_rate": 4.614765558140219e-06, "loss": 0.6089, "step": 1670 }, { "epoch": 0.5652909336941814, "grad_norm": 0.29071640968322754, "learning_rate": 4.6142876877914175e-06, "loss": 0.6215, "step": 1671 }, { "epoch": 0.5656292286874154, "grad_norm": 0.2982822060585022, "learning_rate": 4.613809546009558e-06, "loss": 0.6117, "step": 1672 }, { "epoch": 0.5659675236806495, "grad_norm": 0.29466015100479126, "learning_rate": 4.613331132856026e-06, "loss": 0.6109, "step": 1673 }, { "epoch": 0.5663058186738836, "grad_norm": 0.2973395884037018, "learning_rate": 4.612852448392238e-06, "loss": 0.615, "step": 1674 }, { "epoch": 0.5666441136671178, "grad_norm": 0.3011510968208313, "learning_rate": 4.612373492679649e-06, "loss": 0.644, "step": 1675 }, { "epoch": 0.5669824086603519, "grad_norm": 0.2894744575023651, "learning_rate": 4.611894265779748e-06, "loss": 0.6086, "step": 1676 }, { "epoch": 0.5673207036535859, "grad_norm": 0.2948070466518402, "learning_rate": 4.611414767754057e-06, "loss": 0.6314, "step": 1677 }, { "epoch": 0.56765899864682, "grad_norm": 0.2949257493019104, "learning_rate": 4.610934998664134e-06, "loss": 0.6324, "step": 1678 }, { "epoch": 0.5679972936400541, "grad_norm": 0.30842041969299316, "learning_rate": 4.610454958571574e-06, "loss": 0.6016, "step": 1679 }, { "epoch": 0.5683355886332883, "grad_norm": 0.28693491220474243, "learning_rate": 4.609974647538003e-06, "loss": 0.6331, "step": 1680 }, { "epoch": 0.5686738836265224, "grad_norm": 0.2898673415184021, "learning_rate": 4.6094940656250845e-06, "loss": 0.633, "step": 1681 }, { "epoch": 0.5690121786197564, "grad_norm": 0.28437864780426025, "learning_rate": 4.6090132128945146e-06, "loss": 0.5995, "step": 1682 }, { "epoch": 0.5693504736129905, "grad_norm": 0.36713582277297974, "learning_rate": 4.608532089408026e-06, "loss": 0.5904, "step": 1683 }, { "epoch": 0.5696887686062246, "grad_norm": 0.31164711713790894, "learning_rate": 4.6080506952273855e-06, "loss": 0.6251, "step": 1684 }, { "epoch": 0.5700270635994588, "grad_norm": 0.31584176421165466, "learning_rate": 4.607569030414394e-06, "loss": 0.6547, "step": 1685 }, { "epoch": 0.5703653585926928, "grad_norm": 0.34662458300590515, "learning_rate": 4.60708709503089e-06, "loss": 0.6161, "step": 1686 }, { "epoch": 0.5707036535859269, "grad_norm": 0.30349597334861755, "learning_rate": 4.606604889138742e-06, "loss": 0.6173, "step": 1687 }, { "epoch": 0.571041948579161, "grad_norm": 0.32037651538848877, "learning_rate": 4.606122412799857e-06, "loss": 0.6024, "step": 1688 }, { "epoch": 0.5713802435723951, "grad_norm": 0.3367273211479187, "learning_rate": 4.6056396660761755e-06, "loss": 0.6313, "step": 1689 }, { "epoch": 0.5717185385656293, "grad_norm": 0.3002927601337433, "learning_rate": 4.6051566490296726e-06, "loss": 0.6454, "step": 1690 }, { "epoch": 0.5720568335588633, "grad_norm": 0.32096385955810547, "learning_rate": 4.604673361722358e-06, "loss": 0.6168, "step": 1691 }, { "epoch": 0.5723951285520974, "grad_norm": 0.3034731149673462, "learning_rate": 4.604189804216276e-06, "loss": 0.6364, "step": 1692 }, { "epoch": 0.5727334235453315, "grad_norm": 0.29405683279037476, "learning_rate": 4.603705976573507e-06, "loss": 0.6073, "step": 1693 }, { "epoch": 0.5730717185385656, "grad_norm": 0.2853470742702484, "learning_rate": 4.603221878856164e-06, "loss": 0.6218, "step": 1694 }, { "epoch": 0.5734100135317998, "grad_norm": 0.3522389829158783, "learning_rate": 4.602737511126396e-06, "loss": 0.6441, "step": 1695 }, { "epoch": 0.5737483085250338, "grad_norm": 0.32325372099876404, "learning_rate": 4.602252873446386e-06, "loss": 0.664, "step": 1696 }, { "epoch": 0.5740866035182679, "grad_norm": 0.2904968857765198, "learning_rate": 4.601767965878352e-06, "loss": 0.6285, "step": 1697 }, { "epoch": 0.574424898511502, "grad_norm": 0.297441691160202, "learning_rate": 4.601282788484548e-06, "loss": 0.6422, "step": 1698 }, { "epoch": 0.5747631935047361, "grad_norm": 0.3208168148994446, "learning_rate": 4.60079734132726e-06, "loss": 0.6446, "step": 1699 }, { "epoch": 0.5751014884979703, "grad_norm": 0.30540528893470764, "learning_rate": 4.6003116244688095e-06, "loss": 0.6093, "step": 1700 }, { "epoch": 0.5754397834912043, "grad_norm": 0.2912519872188568, "learning_rate": 4.599825637971554e-06, "loss": 0.6016, "step": 1701 }, { "epoch": 0.5757780784844384, "grad_norm": 0.30942586064338684, "learning_rate": 4.599339381897883e-06, "loss": 0.6199, "step": 1702 }, { "epoch": 0.5761163734776725, "grad_norm": 0.2984882891178131, "learning_rate": 4.598852856310224e-06, "loss": 0.6337, "step": 1703 }, { "epoch": 0.5764546684709067, "grad_norm": 0.28245341777801514, "learning_rate": 4.5983660612710365e-06, "loss": 0.6185, "step": 1704 }, { "epoch": 0.5767929634641408, "grad_norm": 0.29131874442100525, "learning_rate": 4.5978789968428165e-06, "loss": 0.6113, "step": 1705 }, { "epoch": 0.5771312584573748, "grad_norm": 0.3047155439853668, "learning_rate": 4.5973916630880924e-06, "loss": 0.587, "step": 1706 }, { "epoch": 0.5774695534506089, "grad_norm": 0.2988587021827698, "learning_rate": 4.596904060069428e-06, "loss": 0.6216, "step": 1707 }, { "epoch": 0.577807848443843, "grad_norm": 0.28836652636528015, "learning_rate": 4.5964161878494225e-06, "loss": 0.635, "step": 1708 }, { "epoch": 0.5781461434370772, "grad_norm": 0.2934615910053253, "learning_rate": 4.59592804649071e-06, "loss": 0.5957, "step": 1709 }, { "epoch": 0.5784844384303113, "grad_norm": 0.2974570691585541, "learning_rate": 4.595439636055957e-06, "loss": 0.6348, "step": 1710 }, { "epoch": 0.5788227334235453, "grad_norm": 0.28336742520332336, "learning_rate": 4.594950956607867e-06, "loss": 0.6351, "step": 1711 }, { "epoch": 0.5791610284167794, "grad_norm": 0.29395925998687744, "learning_rate": 4.594462008209175e-06, "loss": 0.623, "step": 1712 }, { "epoch": 0.5794993234100135, "grad_norm": 0.28559985756874084, "learning_rate": 4.593972790922654e-06, "loss": 0.6327, "step": 1713 }, { "epoch": 0.5798376184032477, "grad_norm": 0.28623858094215393, "learning_rate": 4.593483304811109e-06, "loss": 0.6377, "step": 1714 }, { "epoch": 0.5801759133964818, "grad_norm": 0.3167601525783539, "learning_rate": 4.592993549937381e-06, "loss": 0.6398, "step": 1715 }, { "epoch": 0.5805142083897158, "grad_norm": 0.30663159489631653, "learning_rate": 4.592503526364344e-06, "loss": 0.641, "step": 1716 }, { "epoch": 0.5808525033829499, "grad_norm": 0.2943854331970215, "learning_rate": 4.592013234154909e-06, "loss": 0.6207, "step": 1717 }, { "epoch": 0.581190798376184, "grad_norm": 0.29026129841804504, "learning_rate": 4.591522673372019e-06, "loss": 0.618, "step": 1718 }, { "epoch": 0.5815290933694182, "grad_norm": 0.28923630714416504, "learning_rate": 4.591031844078653e-06, "loss": 0.6366, "step": 1719 }, { "epoch": 0.5818673883626523, "grad_norm": 0.29991474747657776, "learning_rate": 4.590540746337823e-06, "loss": 0.6473, "step": 1720 }, { "epoch": 0.5822056833558863, "grad_norm": 0.2991500496864319, "learning_rate": 4.590049380212576e-06, "loss": 0.5944, "step": 1721 }, { "epoch": 0.5825439783491204, "grad_norm": 0.2879493832588196, "learning_rate": 4.589557745765994e-06, "loss": 0.6345, "step": 1722 }, { "epoch": 0.5828822733423545, "grad_norm": 0.31147581338882446, "learning_rate": 4.5890658430611944e-06, "loss": 0.6371, "step": 1723 }, { "epoch": 0.5832205683355887, "grad_norm": 0.3145962059497833, "learning_rate": 4.588573672161326e-06, "loss": 0.6414, "step": 1724 }, { "epoch": 0.5835588633288228, "grad_norm": 0.3050592243671417, "learning_rate": 4.588081233129576e-06, "loss": 0.6349, "step": 1725 }, { "epoch": 0.5838971583220568, "grad_norm": 0.3005061745643616, "learning_rate": 4.5875885260291625e-06, "loss": 0.637, "step": 1726 }, { "epoch": 0.5842354533152909, "grad_norm": 0.3005605638027191, "learning_rate": 4.5870955509233394e-06, "loss": 0.6203, "step": 1727 }, { "epoch": 0.584573748308525, "grad_norm": 0.28691211342811584, "learning_rate": 4.586602307875396e-06, "loss": 0.6112, "step": 1728 }, { "epoch": 0.5849120433017592, "grad_norm": 0.292736291885376, "learning_rate": 4.586108796948654e-06, "loss": 0.6356, "step": 1729 }, { "epoch": 0.5852503382949933, "grad_norm": 0.31532350182533264, "learning_rate": 4.585615018206471e-06, "loss": 0.6378, "step": 1730 }, { "epoch": 0.5855886332882273, "grad_norm": 0.31929609179496765, "learning_rate": 4.585120971712239e-06, "loss": 0.6412, "step": 1731 }, { "epoch": 0.5859269282814614, "grad_norm": 0.30665725469589233, "learning_rate": 4.584626657529382e-06, "loss": 0.6164, "step": 1732 }, { "epoch": 0.5862652232746955, "grad_norm": 0.29724955558776855, "learning_rate": 4.584132075721362e-06, "loss": 0.614, "step": 1733 }, { "epoch": 0.5866035182679297, "grad_norm": 0.32470571994781494, "learning_rate": 4.5836372263516734e-06, "loss": 0.6302, "step": 1734 }, { "epoch": 0.5869418132611637, "grad_norm": 0.31625714898109436, "learning_rate": 4.583142109483846e-06, "loss": 0.6166, "step": 1735 }, { "epoch": 0.5872801082543978, "grad_norm": 0.28975987434387207, "learning_rate": 4.582646725181441e-06, "loss": 0.6248, "step": 1736 }, { "epoch": 0.5876184032476319, "grad_norm": 0.31003639101982117, "learning_rate": 4.582151073508057e-06, "loss": 0.61, "step": 1737 }, { "epoch": 0.587956698240866, "grad_norm": 0.2950305640697479, "learning_rate": 4.581655154527326e-06, "loss": 0.6081, "step": 1738 }, { "epoch": 0.5882949932341002, "grad_norm": 0.309980183839798, "learning_rate": 4.581158968302913e-06, "loss": 0.6307, "step": 1739 }, { "epoch": 0.5886332882273342, "grad_norm": 0.2961738109588623, "learning_rate": 4.580662514898522e-06, "loss": 0.635, "step": 1740 }, { "epoch": 0.5889715832205683, "grad_norm": 0.29777249693870544, "learning_rate": 4.580165794377884e-06, "loss": 0.6219, "step": 1741 }, { "epoch": 0.5893098782138024, "grad_norm": 0.2928023338317871, "learning_rate": 4.57966880680477e-06, "loss": 0.6453, "step": 1742 }, { "epoch": 0.5896481732070366, "grad_norm": 0.3013555109500885, "learning_rate": 4.579171552242984e-06, "loss": 0.6127, "step": 1743 }, { "epoch": 0.5899864682002707, "grad_norm": 0.30283522605895996, "learning_rate": 4.578674030756364e-06, "loss": 0.6303, "step": 1744 }, { "epoch": 0.5903247631935047, "grad_norm": 0.3043537735939026, "learning_rate": 4.5781762424087794e-06, "loss": 0.6316, "step": 1745 }, { "epoch": 0.5906630581867388, "grad_norm": 0.2953866422176361, "learning_rate": 4.577678187264139e-06, "loss": 0.6209, "step": 1746 }, { "epoch": 0.5910013531799729, "grad_norm": 0.2934488356113434, "learning_rate": 4.577179865386382e-06, "loss": 0.614, "step": 1747 }, { "epoch": 0.591339648173207, "grad_norm": 0.30841347575187683, "learning_rate": 4.576681276839483e-06, "loss": 0.5964, "step": 1748 }, { "epoch": 0.5916779431664412, "grad_norm": 0.29147687554359436, "learning_rate": 4.576182421687452e-06, "loss": 0.6147, "step": 1749 }, { "epoch": 0.5920162381596752, "grad_norm": 0.3026900589466095, "learning_rate": 4.5756832999943326e-06, "loss": 0.641, "step": 1750 }, { "epoch": 0.5923545331529093, "grad_norm": 0.3127942681312561, "learning_rate": 4.5751839118242e-06, "loss": 0.6295, "step": 1751 }, { "epoch": 0.5926928281461434, "grad_norm": 0.2994759976863861, "learning_rate": 4.5746842572411685e-06, "loss": 0.6025, "step": 1752 }, { "epoch": 0.5930311231393776, "grad_norm": 0.3004252016544342, "learning_rate": 4.5741843363093814e-06, "loss": 0.6157, "step": 1753 }, { "epoch": 0.5933694181326117, "grad_norm": 0.2946246862411499, "learning_rate": 4.573684149093021e-06, "loss": 0.6033, "step": 1754 }, { "epoch": 0.5937077131258457, "grad_norm": 0.30164048075675964, "learning_rate": 4.5731836956563e-06, "loss": 0.6602, "step": 1755 }, { "epoch": 0.5940460081190798, "grad_norm": 0.3036515414714813, "learning_rate": 4.5726829760634685e-06, "loss": 0.6136, "step": 1756 }, { "epoch": 0.5943843031123139, "grad_norm": 0.29374492168426514, "learning_rate": 4.572181990378807e-06, "loss": 0.6153, "step": 1757 }, { "epoch": 0.5947225981055481, "grad_norm": 0.2989799380302429, "learning_rate": 4.571680738666633e-06, "loss": 0.6277, "step": 1758 }, { "epoch": 0.5950608930987822, "grad_norm": 0.31341370940208435, "learning_rate": 4.571179220991298e-06, "loss": 0.6217, "step": 1759 }, { "epoch": 0.5953991880920162, "grad_norm": 0.298629492521286, "learning_rate": 4.570677437417186e-06, "loss": 0.6235, "step": 1760 }, { "epoch": 0.5957374830852503, "grad_norm": 0.29496222734451294, "learning_rate": 4.570175388008717e-06, "loss": 0.6526, "step": 1761 }, { "epoch": 0.5960757780784844, "grad_norm": 0.2959974408149719, "learning_rate": 4.569673072830344e-06, "loss": 0.629, "step": 1762 }, { "epoch": 0.5964140730717186, "grad_norm": 0.29397693276405334, "learning_rate": 4.569170491946554e-06, "loss": 0.6256, "step": 1763 }, { "epoch": 0.5967523680649527, "grad_norm": 0.29911816120147705, "learning_rate": 4.56866764542187e-06, "loss": 0.6418, "step": 1764 }, { "epoch": 0.5970906630581867, "grad_norm": 0.30201956629753113, "learning_rate": 4.568164533320846e-06, "loss": 0.6336, "step": 1765 }, { "epoch": 0.5974289580514208, "grad_norm": 0.3169977366924286, "learning_rate": 4.567661155708072e-06, "loss": 0.6216, "step": 1766 }, { "epoch": 0.597767253044655, "grad_norm": 0.2975189983844757, "learning_rate": 4.567157512648173e-06, "loss": 0.621, "step": 1767 }, { "epoch": 0.5981055480378891, "grad_norm": 0.3022186756134033, "learning_rate": 4.566653604205805e-06, "loss": 0.6125, "step": 1768 }, { "epoch": 0.5984438430311232, "grad_norm": 0.3099009692668915, "learning_rate": 4.566149430445662e-06, "loss": 0.6065, "step": 1769 }, { "epoch": 0.5987821380243572, "grad_norm": 0.3135291635990143, "learning_rate": 4.565644991432468e-06, "loss": 0.6071, "step": 1770 }, { "epoch": 0.5991204330175913, "grad_norm": 0.3010227084159851, "learning_rate": 4.565140287230985e-06, "loss": 0.6388, "step": 1771 }, { "epoch": 0.5994587280108254, "grad_norm": 0.2999197840690613, "learning_rate": 4.564635317906006e-06, "loss": 0.6355, "step": 1772 }, { "epoch": 0.5997970230040596, "grad_norm": 0.29599177837371826, "learning_rate": 4.564130083522359e-06, "loss": 0.633, "step": 1773 }, { "epoch": 0.6001353179972937, "grad_norm": 0.3240567743778229, "learning_rate": 4.563624584144907e-06, "loss": 0.6322, "step": 1774 }, { "epoch": 0.6004736129905277, "grad_norm": 0.3124981224536896, "learning_rate": 4.563118819838545e-06, "loss": 0.6321, "step": 1775 }, { "epoch": 0.6008119079837618, "grad_norm": 0.29879269003868103, "learning_rate": 4.562612790668204e-06, "loss": 0.6083, "step": 1776 }, { "epoch": 0.601150202976996, "grad_norm": 0.2953958213329315, "learning_rate": 4.5621064966988485e-06, "loss": 0.6133, "step": 1777 }, { "epoch": 0.6014884979702301, "grad_norm": 0.2944336533546448, "learning_rate": 4.561599937995477e-06, "loss": 0.6442, "step": 1778 }, { "epoch": 0.6018267929634641, "grad_norm": 0.30257466435432434, "learning_rate": 4.561093114623119e-06, "loss": 0.6165, "step": 1779 }, { "epoch": 0.6021650879566982, "grad_norm": 0.28953441977500916, "learning_rate": 4.560586026646845e-06, "loss": 0.6142, "step": 1780 }, { "epoch": 0.6025033829499323, "grad_norm": 0.30395811796188354, "learning_rate": 4.560078674131752e-06, "loss": 0.6217, "step": 1781 }, { "epoch": 0.6028416779431665, "grad_norm": 0.28763335943222046, "learning_rate": 4.559571057142975e-06, "loss": 0.5914, "step": 1782 }, { "epoch": 0.6031799729364006, "grad_norm": 0.3176610767841339, "learning_rate": 4.559063175745682e-06, "loss": 0.6282, "step": 1783 }, { "epoch": 0.6035182679296346, "grad_norm": 0.2906036674976349, "learning_rate": 4.558555030005075e-06, "loss": 0.616, "step": 1784 }, { "epoch": 0.6038565629228687, "grad_norm": 0.2957024872303009, "learning_rate": 4.55804661998639e-06, "loss": 0.651, "step": 1785 }, { "epoch": 0.6041948579161028, "grad_norm": 0.30133163928985596, "learning_rate": 4.557537945754898e-06, "loss": 0.6547, "step": 1786 }, { "epoch": 0.604533152909337, "grad_norm": 0.3015631139278412, "learning_rate": 4.5570290073759e-06, "loss": 0.6307, "step": 1787 }, { "epoch": 0.6048714479025711, "grad_norm": 0.28448137640953064, "learning_rate": 4.556519804914736e-06, "loss": 0.6074, "step": 1788 }, { "epoch": 0.6052097428958051, "grad_norm": 0.286736398935318, "learning_rate": 4.556010338436777e-06, "loss": 0.6448, "step": 1789 }, { "epoch": 0.6055480378890392, "grad_norm": 0.30498284101486206, "learning_rate": 4.555500608007428e-06, "loss": 0.6177, "step": 1790 }, { "epoch": 0.6058863328822733, "grad_norm": 0.32178056240081787, "learning_rate": 4.55499061369213e-06, "loss": 0.6371, "step": 1791 }, { "epoch": 0.6062246278755075, "grad_norm": 0.2903531491756439, "learning_rate": 4.554480355556354e-06, "loss": 0.6149, "step": 1792 }, { "epoch": 0.6065629228687416, "grad_norm": 0.30403220653533936, "learning_rate": 4.553969833665609e-06, "loss": 0.6309, "step": 1793 }, { "epoch": 0.6069012178619756, "grad_norm": 0.3077298104763031, "learning_rate": 4.553459048085435e-06, "loss": 0.5958, "step": 1794 }, { "epoch": 0.6072395128552097, "grad_norm": 0.292605459690094, "learning_rate": 4.552947998881407e-06, "loss": 0.6282, "step": 1795 }, { "epoch": 0.6075778078484438, "grad_norm": 0.29466307163238525, "learning_rate": 4.5524366861191345e-06, "loss": 0.6057, "step": 1796 }, { "epoch": 0.607916102841678, "grad_norm": 0.29204457998275757, "learning_rate": 4.551925109864259e-06, "loss": 0.6301, "step": 1797 }, { "epoch": 0.6082543978349121, "grad_norm": 0.31700393557548523, "learning_rate": 4.551413270182457e-06, "loss": 0.6191, "step": 1798 }, { "epoch": 0.6085926928281461, "grad_norm": 0.3024339973926544, "learning_rate": 4.550901167139439e-06, "loss": 0.6192, "step": 1799 }, { "epoch": 0.6089309878213802, "grad_norm": 0.3239328861236572, "learning_rate": 4.550388800800948e-06, "loss": 0.6272, "step": 1800 }, { "epoch": 0.6092692828146143, "grad_norm": 0.3086513876914978, "learning_rate": 4.549876171232763e-06, "loss": 0.6001, "step": 1801 }, { "epoch": 0.6096075778078485, "grad_norm": 0.3088929057121277, "learning_rate": 4.549363278500696e-06, "loss": 0.6319, "step": 1802 }, { "epoch": 0.6099458728010826, "grad_norm": 0.3044373095035553, "learning_rate": 4.548850122670591e-06, "loss": 0.6127, "step": 1803 }, { "epoch": 0.6102841677943166, "grad_norm": 0.31143665313720703, "learning_rate": 4.548336703808328e-06, "loss": 0.6246, "step": 1804 }, { "epoch": 0.6106224627875507, "grad_norm": 0.3061283826828003, "learning_rate": 4.547823021979819e-06, "loss": 0.6111, "step": 1805 }, { "epoch": 0.6109607577807848, "grad_norm": 0.31211286783218384, "learning_rate": 4.547309077251012e-06, "loss": 0.6391, "step": 1806 }, { "epoch": 0.611299052774019, "grad_norm": 0.29427969455718994, "learning_rate": 4.546794869687887e-06, "loss": 0.6101, "step": 1807 }, { "epoch": 0.6116373477672531, "grad_norm": 0.314834326505661, "learning_rate": 4.546280399356457e-06, "loss": 0.6223, "step": 1808 }, { "epoch": 0.6119756427604871, "grad_norm": 0.28986093401908875, "learning_rate": 4.5457656663227716e-06, "loss": 0.6101, "step": 1809 }, { "epoch": 0.6123139377537212, "grad_norm": 0.3071696162223816, "learning_rate": 4.54525067065291e-06, "loss": 0.6115, "step": 1810 }, { "epoch": 0.6126522327469553, "grad_norm": 0.30934277176856995, "learning_rate": 4.54473541241299e-06, "loss": 0.636, "step": 1811 }, { "epoch": 0.6129905277401895, "grad_norm": 0.30213168263435364, "learning_rate": 4.54421989166916e-06, "loss": 0.6123, "step": 1812 }, { "epoch": 0.6133288227334236, "grad_norm": 0.3003959059715271, "learning_rate": 4.5437041084876024e-06, "loss": 0.6321, "step": 1813 }, { "epoch": 0.6136671177266576, "grad_norm": 0.2956574261188507, "learning_rate": 4.543188062934534e-06, "loss": 0.6395, "step": 1814 }, { "epoch": 0.6140054127198917, "grad_norm": 0.3125058710575104, "learning_rate": 4.542671755076205e-06, "loss": 0.5955, "step": 1815 }, { "epoch": 0.6143437077131259, "grad_norm": 0.30371323227882385, "learning_rate": 4.542155184978898e-06, "loss": 0.6501, "step": 1816 }, { "epoch": 0.61468200270636, "grad_norm": 0.3121377229690552, "learning_rate": 4.541638352708931e-06, "loss": 0.6133, "step": 1817 }, { "epoch": 0.6150202976995941, "grad_norm": 0.30062562227249146, "learning_rate": 4.541121258332657e-06, "loss": 0.6259, "step": 1818 }, { "epoch": 0.6153585926928281, "grad_norm": 0.306066632270813, "learning_rate": 4.540603901916458e-06, "loss": 0.611, "step": 1819 }, { "epoch": 0.6156968876860622, "grad_norm": 0.3028794229030609, "learning_rate": 4.540086283526755e-06, "loss": 0.6574, "step": 1820 }, { "epoch": 0.6160351826792964, "grad_norm": 0.3133271038532257, "learning_rate": 4.539568403229998e-06, "loss": 0.6078, "step": 1821 }, { "epoch": 0.6163734776725305, "grad_norm": 0.28491806983947754, "learning_rate": 4.539050261092672e-06, "loss": 0.6401, "step": 1822 }, { "epoch": 0.6167117726657646, "grad_norm": 0.30179521441459656, "learning_rate": 4.5385318571812994e-06, "loss": 0.6206, "step": 1823 }, { "epoch": 0.6170500676589986, "grad_norm": 0.2998371124267578, "learning_rate": 4.538013191562431e-06, "loss": 0.5907, "step": 1824 }, { "epoch": 0.6173883626522327, "grad_norm": 0.30136269330978394, "learning_rate": 4.537494264302653e-06, "loss": 0.6058, "step": 1825 }, { "epoch": 0.6177266576454669, "grad_norm": 0.295851469039917, "learning_rate": 4.536975075468587e-06, "loss": 0.6031, "step": 1826 }, { "epoch": 0.618064952638701, "grad_norm": 0.3003900349140167, "learning_rate": 4.5364556251268855e-06, "loss": 0.625, "step": 1827 }, { "epoch": 0.618403247631935, "grad_norm": 0.29929232597351074, "learning_rate": 4.535935913344236e-06, "loss": 0.6188, "step": 1828 }, { "epoch": 0.6187415426251691, "grad_norm": 0.30480554699897766, "learning_rate": 4.535415940187359e-06, "loss": 0.6096, "step": 1829 }, { "epoch": 0.6190798376184032, "grad_norm": 0.29189208149909973, "learning_rate": 4.534895705723009e-06, "loss": 0.6328, "step": 1830 }, { "epoch": 0.6194181326116374, "grad_norm": 0.290440171957016, "learning_rate": 4.534375210017975e-06, "loss": 0.6324, "step": 1831 }, { "epoch": 0.6197564276048715, "grad_norm": 0.3073995113372803, "learning_rate": 4.5338544531390775e-06, "loss": 0.6349, "step": 1832 }, { "epoch": 0.6200947225981055, "grad_norm": 0.29721468687057495, "learning_rate": 4.5333334351531695e-06, "loss": 0.6122, "step": 1833 }, { "epoch": 0.6204330175913396, "grad_norm": 0.30599215626716614, "learning_rate": 4.5328121561271435e-06, "loss": 0.6392, "step": 1834 }, { "epoch": 0.6207713125845737, "grad_norm": 0.2927095592021942, "learning_rate": 4.532290616127919e-06, "loss": 0.6602, "step": 1835 }, { "epoch": 0.6211096075778079, "grad_norm": 0.307754784822464, "learning_rate": 4.531768815222452e-06, "loss": 0.6291, "step": 1836 }, { "epoch": 0.621447902571042, "grad_norm": 0.3044011890888214, "learning_rate": 4.531246753477731e-06, "loss": 0.6317, "step": 1837 }, { "epoch": 0.621786197564276, "grad_norm": 0.2979058623313904, "learning_rate": 4.530724430960779e-06, "loss": 0.637, "step": 1838 }, { "epoch": 0.6221244925575101, "grad_norm": 0.31330716609954834, "learning_rate": 4.530201847738652e-06, "loss": 0.6338, "step": 1839 }, { "epoch": 0.6224627875507442, "grad_norm": 0.3006748557090759, "learning_rate": 4.52967900387844e-06, "loss": 0.6259, "step": 1840 }, { "epoch": 0.6228010825439784, "grad_norm": 0.2865881323814392, "learning_rate": 4.529155899447265e-06, "loss": 0.6105, "step": 1841 }, { "epoch": 0.6231393775372125, "grad_norm": 0.30335038900375366, "learning_rate": 4.528632534512283e-06, "loss": 0.6053, "step": 1842 }, { "epoch": 0.6234776725304465, "grad_norm": 0.30165183544158936, "learning_rate": 4.5281089091406845e-06, "loss": 0.6585, "step": 1843 }, { "epoch": 0.6238159675236806, "grad_norm": 0.3047686517238617, "learning_rate": 4.527585023399693e-06, "loss": 0.6155, "step": 1844 }, { "epoch": 0.6241542625169147, "grad_norm": 0.2917962372303009, "learning_rate": 4.527060877356564e-06, "loss": 0.6174, "step": 1845 }, { "epoch": 0.6244925575101489, "grad_norm": 0.3098011314868927, "learning_rate": 4.526536471078589e-06, "loss": 0.6362, "step": 1846 }, { "epoch": 0.624830852503383, "grad_norm": 0.3431854248046875, "learning_rate": 4.526011804633091e-06, "loss": 0.6487, "step": 1847 }, { "epoch": 0.625169147496617, "grad_norm": 0.3013041615486145, "learning_rate": 4.525486878087426e-06, "loss": 0.6037, "step": 1848 }, { "epoch": 0.6255074424898511, "grad_norm": 0.3066675066947937, "learning_rate": 4.5249616915089846e-06, "loss": 0.6335, "step": 1849 }, { "epoch": 0.6258457374830853, "grad_norm": 0.34530869126319885, "learning_rate": 4.5244362449651915e-06, "loss": 0.6089, "step": 1850 }, { "epoch": 0.6261840324763194, "grad_norm": 0.3135780394077301, "learning_rate": 4.5239105385235035e-06, "loss": 0.627, "step": 1851 }, { "epoch": 0.6265223274695535, "grad_norm": 0.3080191910266876, "learning_rate": 4.523384572251409e-06, "loss": 0.6062, "step": 1852 }, { "epoch": 0.6268606224627875, "grad_norm": 0.3695083558559418, "learning_rate": 4.5228583462164345e-06, "loss": 0.6111, "step": 1853 }, { "epoch": 0.6271989174560216, "grad_norm": 0.3244759738445282, "learning_rate": 4.522331860486136e-06, "loss": 0.6436, "step": 1854 }, { "epoch": 0.6275372124492558, "grad_norm": 0.3044929802417755, "learning_rate": 4.521805115128103e-06, "loss": 0.6357, "step": 1855 }, { "epoch": 0.6278755074424899, "grad_norm": 0.31515035033226013, "learning_rate": 4.52127811020996e-06, "loss": 0.5963, "step": 1856 }, { "epoch": 0.628213802435724, "grad_norm": 0.3156471252441406, "learning_rate": 4.520750845799364e-06, "loss": 0.6174, "step": 1857 }, { "epoch": 0.628552097428958, "grad_norm": 0.3069937527179718, "learning_rate": 4.520223321964007e-06, "loss": 0.6293, "step": 1858 }, { "epoch": 0.6288903924221921, "grad_norm": 0.2919570207595825, "learning_rate": 4.51969553877161e-06, "loss": 0.6259, "step": 1859 }, { "epoch": 0.6292286874154263, "grad_norm": 0.3123161494731903, "learning_rate": 4.5191674962899314e-06, "loss": 0.6201, "step": 1860 }, { "epoch": 0.6295669824086604, "grad_norm": 0.30140236020088196, "learning_rate": 4.518639194586762e-06, "loss": 0.6178, "step": 1861 }, { "epoch": 0.6299052774018945, "grad_norm": 0.2950606644153595, "learning_rate": 4.518110633729924e-06, "loss": 0.6103, "step": 1862 }, { "epoch": 0.6302435723951285, "grad_norm": 0.30651673674583435, "learning_rate": 4.517581813787275e-06, "loss": 0.6492, "step": 1863 }, { "epoch": 0.6305818673883626, "grad_norm": 0.3081085681915283, "learning_rate": 4.517052734826706e-06, "loss": 0.6305, "step": 1864 }, { "epoch": 0.6309201623815968, "grad_norm": 0.30926793813705444, "learning_rate": 4.5165233969161394e-06, "loss": 0.6182, "step": 1865 }, { "epoch": 0.6312584573748309, "grad_norm": 0.295867383480072, "learning_rate": 4.515993800123531e-06, "loss": 0.6139, "step": 1866 }, { "epoch": 0.631596752368065, "grad_norm": 0.3241640627384186, "learning_rate": 4.515463944516872e-06, "loss": 0.6298, "step": 1867 }, { "epoch": 0.631935047361299, "grad_norm": 0.32582196593284607, "learning_rate": 4.5149338301641845e-06, "loss": 0.6066, "step": 1868 }, { "epoch": 0.6322733423545331, "grad_norm": 0.3094950020313263, "learning_rate": 4.514403457133526e-06, "loss": 0.6115, "step": 1869 }, { "epoch": 0.6326116373477673, "grad_norm": 0.3076767921447754, "learning_rate": 4.513872825492984e-06, "loss": 0.6482, "step": 1870 }, { "epoch": 0.6329499323410014, "grad_norm": 0.3136141002178192, "learning_rate": 4.513341935310684e-06, "loss": 0.6598, "step": 1871 }, { "epoch": 0.6332882273342354, "grad_norm": 0.34437096118927, "learning_rate": 4.5128107866547795e-06, "loss": 0.6144, "step": 1872 }, { "epoch": 0.6336265223274695, "grad_norm": 0.29778510332107544, "learning_rate": 4.512279379593461e-06, "loss": 0.6197, "step": 1873 }, { "epoch": 0.6339648173207036, "grad_norm": 0.32931333780288696, "learning_rate": 4.5117477141949485e-06, "loss": 0.6288, "step": 1874 }, { "epoch": 0.6343031123139378, "grad_norm": 0.335248202085495, "learning_rate": 4.511215790527501e-06, "loss": 0.6151, "step": 1875 }, { "epoch": 0.6346414073071719, "grad_norm": 0.2965134382247925, "learning_rate": 4.510683608659404e-06, "loss": 0.62, "step": 1876 }, { "epoch": 0.6349797023004059, "grad_norm": 0.294978529214859, "learning_rate": 4.51015116865898e-06, "loss": 0.6222, "step": 1877 }, { "epoch": 0.63531799729364, "grad_norm": 0.33229756355285645, "learning_rate": 4.509618470594584e-06, "loss": 0.6474, "step": 1878 }, { "epoch": 0.6356562922868741, "grad_norm": 0.3117801547050476, "learning_rate": 4.509085514534606e-06, "loss": 0.6074, "step": 1879 }, { "epoch": 0.6359945872801083, "grad_norm": 0.30003616213798523, "learning_rate": 4.508552300547463e-06, "loss": 0.6186, "step": 1880 }, { "epoch": 0.6363328822733424, "grad_norm": 0.29257848858833313, "learning_rate": 4.508018828701613e-06, "loss": 0.6297, "step": 1881 }, { "epoch": 0.6366711772665764, "grad_norm": 0.4021831750869751, "learning_rate": 4.507485099065541e-06, "loss": 0.613, "step": 1882 }, { "epoch": 0.6370094722598105, "grad_norm": 0.3081733286380768, "learning_rate": 4.506951111707768e-06, "loss": 0.6497, "step": 1883 }, { "epoch": 0.6373477672530447, "grad_norm": 0.29754048585891724, "learning_rate": 4.506416866696849e-06, "loss": 0.6135, "step": 1884 }, { "epoch": 0.6376860622462788, "grad_norm": 0.3275603950023651, "learning_rate": 4.505882364101367e-06, "loss": 0.6273, "step": 1885 }, { "epoch": 0.6380243572395129, "grad_norm": 0.32382166385650635, "learning_rate": 4.505347603989946e-06, "loss": 0.6264, "step": 1886 }, { "epoch": 0.6383626522327469, "grad_norm": 0.31701892614364624, "learning_rate": 4.504812586431236e-06, "loss": 0.6106, "step": 1887 }, { "epoch": 0.638700947225981, "grad_norm": 0.2922547161579132, "learning_rate": 4.5042773114939225e-06, "loss": 0.6341, "step": 1888 }, { "epoch": 0.6390392422192152, "grad_norm": 0.3007252812385559, "learning_rate": 4.503741779246726e-06, "loss": 0.6408, "step": 1889 }, { "epoch": 0.6393775372124493, "grad_norm": 0.3614402413368225, "learning_rate": 4.503205989758397e-06, "loss": 0.6023, "step": 1890 }, { "epoch": 0.6397158322056834, "grad_norm": 0.3172839283943176, "learning_rate": 4.502669943097721e-06, "loss": 0.6552, "step": 1891 }, { "epoch": 0.6400541271989174, "grad_norm": 0.3163464367389679, "learning_rate": 4.502133639333516e-06, "loss": 0.6254, "step": 1892 }, { "epoch": 0.6403924221921515, "grad_norm": 0.3306618928909302, "learning_rate": 4.501597078534633e-06, "loss": 0.5947, "step": 1893 }, { "epoch": 0.6407307171853857, "grad_norm": 0.31476470828056335, "learning_rate": 4.501060260769955e-06, "loss": 0.6453, "step": 1894 }, { "epoch": 0.6410690121786198, "grad_norm": 0.3184593915939331, "learning_rate": 4.5005231861083994e-06, "loss": 0.6365, "step": 1895 }, { "epoch": 0.6414073071718539, "grad_norm": 0.3020038306713104, "learning_rate": 4.499985854618916e-06, "loss": 0.6286, "step": 1896 }, { "epoch": 0.6417456021650879, "grad_norm": 0.40171536803245544, "learning_rate": 4.499448266370487e-06, "loss": 0.628, "step": 1897 }, { "epoch": 0.642083897158322, "grad_norm": 0.30737072229385376, "learning_rate": 4.49891042143213e-06, "loss": 0.6304, "step": 1898 }, { "epoch": 0.6424221921515562, "grad_norm": 0.31211888790130615, "learning_rate": 4.498372319872892e-06, "loss": 0.6183, "step": 1899 }, { "epoch": 0.6427604871447903, "grad_norm": 0.31268051266670227, "learning_rate": 4.497833961761855e-06, "loss": 0.6368, "step": 1900 }, { "epoch": 0.6430987821380244, "grad_norm": 0.3052826225757599, "learning_rate": 4.497295347168134e-06, "loss": 0.6023, "step": 1901 }, { "epoch": 0.6434370771312584, "grad_norm": 0.30680912733078003, "learning_rate": 4.496756476160876e-06, "loss": 0.6088, "step": 1902 }, { "epoch": 0.6437753721244925, "grad_norm": 0.2901969850063324, "learning_rate": 4.496217348809263e-06, "loss": 0.6357, "step": 1903 }, { "epoch": 0.6441136671177267, "grad_norm": 0.2912886440753937, "learning_rate": 4.495677965182506e-06, "loss": 0.6027, "step": 1904 }, { "epoch": 0.6444519621109608, "grad_norm": 0.3001358211040497, "learning_rate": 4.495138325349854e-06, "loss": 0.6284, "step": 1905 }, { "epoch": 0.6447902571041949, "grad_norm": 0.32089850306510925, "learning_rate": 4.494598429380583e-06, "loss": 0.6001, "step": 1906 }, { "epoch": 0.6451285520974289, "grad_norm": 0.3067314326763153, "learning_rate": 4.494058277344008e-06, "loss": 0.6002, "step": 1907 }, { "epoch": 0.645466847090663, "grad_norm": 0.29842349886894226, "learning_rate": 4.493517869309472e-06, "loss": 0.5946, "step": 1908 }, { "epoch": 0.6458051420838972, "grad_norm": 0.3103325068950653, "learning_rate": 4.492977205346354e-06, "loss": 0.6184, "step": 1909 }, { "epoch": 0.6461434370771313, "grad_norm": 0.29811927676200867, "learning_rate": 4.492436285524063e-06, "loss": 0.6304, "step": 1910 }, { "epoch": 0.6464817320703654, "grad_norm": 0.2993214726448059, "learning_rate": 4.4918951099120445e-06, "loss": 0.6072, "step": 1911 }, { "epoch": 0.6468200270635994, "grad_norm": 0.2939266562461853, "learning_rate": 4.491353678579774e-06, "loss": 0.6187, "step": 1912 }, { "epoch": 0.6471583220568335, "grad_norm": 0.3164563775062561, "learning_rate": 4.49081199159676e-06, "loss": 0.6248, "step": 1913 }, { "epoch": 0.6474966170500677, "grad_norm": 0.30701878666877747, "learning_rate": 4.490270049032546e-06, "loss": 0.6345, "step": 1914 }, { "epoch": 0.6478349120433018, "grad_norm": 0.28954970836639404, "learning_rate": 4.489727850956705e-06, "loss": 0.6065, "step": 1915 }, { "epoch": 0.6481732070365359, "grad_norm": 0.2986772656440735, "learning_rate": 4.489185397438846e-06, "loss": 0.614, "step": 1916 }, { "epoch": 0.6485115020297699, "grad_norm": 0.2876133620738983, "learning_rate": 4.4886426885486075e-06, "loss": 0.6303, "step": 1917 }, { "epoch": 0.648849797023004, "grad_norm": 0.30175134539604187, "learning_rate": 4.4880997243556654e-06, "loss": 0.5836, "step": 1918 }, { "epoch": 0.6491880920162382, "grad_norm": 0.2978866398334503, "learning_rate": 4.4875565049297235e-06, "loss": 0.6164, "step": 1919 }, { "epoch": 0.6495263870094723, "grad_norm": 0.2987023890018463, "learning_rate": 4.487013030340522e-06, "loss": 0.6097, "step": 1920 }, { "epoch": 0.6498646820027063, "grad_norm": 0.3010125160217285, "learning_rate": 4.4864693006578305e-06, "loss": 0.6113, "step": 1921 }, { "epoch": 0.6502029769959404, "grad_norm": 0.30306074023246765, "learning_rate": 4.485925315951456e-06, "loss": 0.6327, "step": 1922 }, { "epoch": 0.6505412719891746, "grad_norm": 0.3026984930038452, "learning_rate": 4.4853810762912335e-06, "loss": 0.614, "step": 1923 }, { "epoch": 0.6508795669824087, "grad_norm": 0.2981746196746826, "learning_rate": 4.484836581747032e-06, "loss": 0.6354, "step": 1924 }, { "epoch": 0.6512178619756428, "grad_norm": 0.2966022491455078, "learning_rate": 4.4842918323887555e-06, "loss": 0.6432, "step": 1925 }, { "epoch": 0.6515561569688768, "grad_norm": 0.286921888589859, "learning_rate": 4.483746828286339e-06, "loss": 0.6112, "step": 1926 }, { "epoch": 0.6518944519621109, "grad_norm": 0.3007039427757263, "learning_rate": 4.4832015695097484e-06, "loss": 0.6263, "step": 1927 }, { "epoch": 0.652232746955345, "grad_norm": 0.30770787596702576, "learning_rate": 4.482656056128986e-06, "loss": 0.6096, "step": 1928 }, { "epoch": 0.6525710419485792, "grad_norm": 0.2954249978065491, "learning_rate": 4.482110288214087e-06, "loss": 0.6103, "step": 1929 }, { "epoch": 0.6529093369418133, "grad_norm": 0.30537140369415283, "learning_rate": 4.481564265835113e-06, "loss": 0.6444, "step": 1930 }, { "epoch": 0.6532476319350473, "grad_norm": 0.29392582178115845, "learning_rate": 4.481017989062165e-06, "loss": 0.619, "step": 1931 }, { "epoch": 0.6535859269282814, "grad_norm": 0.3098503053188324, "learning_rate": 4.480471457965374e-06, "loss": 0.6141, "step": 1932 }, { "epoch": 0.6539242219215156, "grad_norm": 0.3279946446418762, "learning_rate": 4.479924672614903e-06, "loss": 0.6255, "step": 1933 }, { "epoch": 0.6542625169147497, "grad_norm": 0.30103981494903564, "learning_rate": 4.4793776330809504e-06, "loss": 0.6332, "step": 1934 }, { "epoch": 0.6546008119079838, "grad_norm": 0.32124587893486023, "learning_rate": 4.478830339433744e-06, "loss": 0.6409, "step": 1935 }, { "epoch": 0.6549391069012178, "grad_norm": 0.31162700057029724, "learning_rate": 4.478282791743546e-06, "loss": 0.6432, "step": 1936 }, { "epoch": 0.6552774018944519, "grad_norm": 0.3058624565601349, "learning_rate": 4.477734990080649e-06, "loss": 0.6413, "step": 1937 }, { "epoch": 0.6556156968876861, "grad_norm": 0.29996979236602783, "learning_rate": 4.477186934515383e-06, "loss": 0.6187, "step": 1938 }, { "epoch": 0.6559539918809202, "grad_norm": 0.2978936433792114, "learning_rate": 4.4766386251181055e-06, "loss": 0.6186, "step": 1939 }, { "epoch": 0.6562922868741543, "grad_norm": 0.2992279529571533, "learning_rate": 4.476090061959209e-06, "loss": 0.6053, "step": 1940 }, { "epoch": 0.6566305818673883, "grad_norm": 0.320601224899292, "learning_rate": 4.475541245109118e-06, "loss": 0.5902, "step": 1941 }, { "epoch": 0.6569688768606224, "grad_norm": 0.32168227434158325, "learning_rate": 4.474992174638292e-06, "loss": 0.642, "step": 1942 }, { "epoch": 0.6573071718538566, "grad_norm": 0.29337483644485474, "learning_rate": 4.474442850617218e-06, "loss": 0.6184, "step": 1943 }, { "epoch": 0.6576454668470907, "grad_norm": 0.2891935408115387, "learning_rate": 4.47389327311642e-06, "loss": 0.6125, "step": 1944 }, { "epoch": 0.6579837618403248, "grad_norm": 0.3021150529384613, "learning_rate": 4.473343442206452e-06, "loss": 0.6144, "step": 1945 }, { "epoch": 0.6583220568335588, "grad_norm": 0.30139896273612976, "learning_rate": 4.472793357957902e-06, "loss": 0.65, "step": 1946 }, { "epoch": 0.658660351826793, "grad_norm": 0.3164536654949188, "learning_rate": 4.4722430204413905e-06, "loss": 0.6289, "step": 1947 }, { "epoch": 0.6589986468200271, "grad_norm": 0.29989132285118103, "learning_rate": 4.47169242972757e-06, "loss": 0.6043, "step": 1948 }, { "epoch": 0.6593369418132612, "grad_norm": 0.30992090702056885, "learning_rate": 4.4711415858871245e-06, "loss": 0.6203, "step": 1949 }, { "epoch": 0.6596752368064953, "grad_norm": 0.31831830739974976, "learning_rate": 4.470590488990773e-06, "loss": 0.6217, "step": 1950 }, { "epoch": 0.6600135317997293, "grad_norm": 0.30647245049476624, "learning_rate": 4.470039139109265e-06, "loss": 0.5992, "step": 1951 }, { "epoch": 0.6603518267929634, "grad_norm": 0.30912190675735474, "learning_rate": 4.469487536313381e-06, "loss": 0.6199, "step": 1952 }, { "epoch": 0.6606901217861976, "grad_norm": 0.31395310163497925, "learning_rate": 4.46893568067394e-06, "loss": 0.6285, "step": 1953 }, { "epoch": 0.6610284167794317, "grad_norm": 0.3426959812641144, "learning_rate": 4.468383572261785e-06, "loss": 0.6317, "step": 1954 }, { "epoch": 0.6613667117726658, "grad_norm": 0.3148653209209442, "learning_rate": 4.467831211147799e-06, "loss": 0.6212, "step": 1955 }, { "epoch": 0.6617050067658998, "grad_norm": 0.3014954626560211, "learning_rate": 4.4672785974028935e-06, "loss": 0.6046, "step": 1956 }, { "epoch": 0.662043301759134, "grad_norm": 0.33388400077819824, "learning_rate": 4.466725731098013e-06, "loss": 0.6218, "step": 1957 }, { "epoch": 0.6623815967523681, "grad_norm": 0.3054538071155548, "learning_rate": 4.466172612304135e-06, "loss": 0.6546, "step": 1958 }, { "epoch": 0.6627198917456022, "grad_norm": 0.31339314579963684, "learning_rate": 4.46561924109227e-06, "loss": 0.627, "step": 1959 }, { "epoch": 0.6630581867388363, "grad_norm": 0.2950597405433655, "learning_rate": 4.465065617533457e-06, "loss": 0.6252, "step": 1960 }, { "epoch": 0.6633964817320703, "grad_norm": 0.3085661828517914, "learning_rate": 4.464511741698773e-06, "loss": 0.6179, "step": 1961 }, { "epoch": 0.6637347767253045, "grad_norm": 0.3314128518104553, "learning_rate": 4.463957613659325e-06, "loss": 0.6212, "step": 1962 }, { "epoch": 0.6640730717185386, "grad_norm": 0.3055969774723053, "learning_rate": 4.46340323348625e-06, "loss": 0.6241, "step": 1963 }, { "epoch": 0.6644113667117727, "grad_norm": 0.3067176938056946, "learning_rate": 4.462848601250721e-06, "loss": 0.6261, "step": 1964 }, { "epoch": 0.6647496617050067, "grad_norm": 0.3050393760204315, "learning_rate": 4.462293717023943e-06, "loss": 0.6316, "step": 1965 }, { "epoch": 0.6650879566982408, "grad_norm": 0.32629260420799255, "learning_rate": 4.461738580877151e-06, "loss": 0.6313, "step": 1966 }, { "epoch": 0.665426251691475, "grad_norm": 0.29928138852119446, "learning_rate": 4.461183192881612e-06, "loss": 0.6434, "step": 1967 }, { "epoch": 0.6657645466847091, "grad_norm": 0.308493435382843, "learning_rate": 4.46062755310863e-06, "loss": 0.6266, "step": 1968 }, { "epoch": 0.6661028416779432, "grad_norm": 0.31029900908470154, "learning_rate": 4.4600716616295355e-06, "loss": 0.6187, "step": 1969 }, { "epoch": 0.6664411366711772, "grad_norm": 0.30931907892227173, "learning_rate": 4.459515518515696e-06, "loss": 0.6202, "step": 1970 }, { "epoch": 0.6667794316644113, "grad_norm": 0.2888835370540619, "learning_rate": 4.4589591238385085e-06, "loss": 0.6092, "step": 1971 }, { "epoch": 0.6671177266576455, "grad_norm": 0.2925364077091217, "learning_rate": 4.458402477669403e-06, "loss": 0.5867, "step": 1972 }, { "epoch": 0.6674560216508796, "grad_norm": 0.31146425008773804, "learning_rate": 4.4578455800798435e-06, "loss": 0.644, "step": 1973 }, { "epoch": 0.6677943166441137, "grad_norm": 0.28718727827072144, "learning_rate": 4.457288431141322e-06, "loss": 0.5907, "step": 1974 }, { "epoch": 0.6681326116373477, "grad_norm": 0.3077245354652405, "learning_rate": 4.456731030925368e-06, "loss": 0.624, "step": 1975 }, { "epoch": 0.6684709066305818, "grad_norm": 0.3103635311126709, "learning_rate": 4.45617337950354e-06, "loss": 0.6181, "step": 1976 }, { "epoch": 0.668809201623816, "grad_norm": 0.31054601073265076, "learning_rate": 4.455615476947428e-06, "loss": 0.6207, "step": 1977 }, { "epoch": 0.6691474966170501, "grad_norm": 0.3209282457828522, "learning_rate": 4.455057323328658e-06, "loss": 0.6072, "step": 1978 }, { "epoch": 0.6694857916102842, "grad_norm": 0.3089926540851593, "learning_rate": 4.4544989187188845e-06, "loss": 0.629, "step": 1979 }, { "epoch": 0.6698240866035182, "grad_norm": 0.3040165901184082, "learning_rate": 4.453940263189797e-06, "loss": 0.6393, "step": 1980 }, { "epoch": 0.6701623815967523, "grad_norm": 0.30424657464027405, "learning_rate": 4.4533813568131145e-06, "loss": 0.6386, "step": 1981 }, { "epoch": 0.6705006765899865, "grad_norm": 0.31967079639434814, "learning_rate": 4.4528221996605905e-06, "loss": 0.6047, "step": 1982 }, { "epoch": 0.6708389715832206, "grad_norm": 0.3124231994152069, "learning_rate": 4.452262791804009e-06, "loss": 0.6324, "step": 1983 }, { "epoch": 0.6711772665764547, "grad_norm": 0.320998877286911, "learning_rate": 4.451703133315187e-06, "loss": 0.6223, "step": 1984 }, { "epoch": 0.6715155615696887, "grad_norm": 0.30833613872528076, "learning_rate": 4.451143224265975e-06, "loss": 0.628, "step": 1985 }, { "epoch": 0.6718538565629228, "grad_norm": 0.2876376211643219, "learning_rate": 4.450583064728253e-06, "loss": 0.6139, "step": 1986 }, { "epoch": 0.672192151556157, "grad_norm": 0.29824233055114746, "learning_rate": 4.450022654773936e-06, "loss": 0.6364, "step": 1987 }, { "epoch": 0.6725304465493911, "grad_norm": 0.29504624009132385, "learning_rate": 4.449461994474968e-06, "loss": 0.6165, "step": 1988 }, { "epoch": 0.6728687415426252, "grad_norm": 0.3090459406375885, "learning_rate": 4.448901083903327e-06, "loss": 0.6539, "step": 1989 }, { "epoch": 0.6732070365358592, "grad_norm": 0.30286070704460144, "learning_rate": 4.4483399231310245e-06, "loss": 0.6271, "step": 1990 }, { "epoch": 0.6735453315290933, "grad_norm": 0.2875913977622986, "learning_rate": 4.4477785122301e-06, "loss": 0.6078, "step": 1991 }, { "epoch": 0.6738836265223275, "grad_norm": 0.3007805347442627, "learning_rate": 4.44721685127263e-06, "loss": 0.6229, "step": 1992 }, { "epoch": 0.6742219215155616, "grad_norm": 0.30888527631759644, "learning_rate": 4.4466549403307195e-06, "loss": 0.6038, "step": 1993 }, { "epoch": 0.6745602165087957, "grad_norm": 0.3012210428714752, "learning_rate": 4.446092779476507e-06, "loss": 0.604, "step": 1994 }, { "epoch": 0.6748985115020297, "grad_norm": 0.3052041232585907, "learning_rate": 4.445530368782163e-06, "loss": 0.6271, "step": 1995 }, { "epoch": 0.6752368064952639, "grad_norm": 0.30048370361328125, "learning_rate": 4.44496770831989e-06, "loss": 0.6322, "step": 1996 }, { "epoch": 0.675575101488498, "grad_norm": 0.315618097782135, "learning_rate": 4.444404798161922e-06, "loss": 0.6246, "step": 1997 }, { "epoch": 0.6759133964817321, "grad_norm": 0.3120715916156769, "learning_rate": 4.443841638380527e-06, "loss": 0.6105, "step": 1998 }, { "epoch": 0.6762516914749662, "grad_norm": 0.2947427034378052, "learning_rate": 4.443278229048002e-06, "loss": 0.6094, "step": 1999 }, { "epoch": 0.6765899864682002, "grad_norm": 0.31929072737693787, "learning_rate": 4.4427145702366804e-06, "loss": 0.6565, "step": 2000 }, { "epoch": 0.6769282814614344, "grad_norm": 0.30863380432128906, "learning_rate": 4.442150662018921e-06, "loss": 0.6154, "step": 2001 }, { "epoch": 0.6772665764546685, "grad_norm": 0.323507159948349, "learning_rate": 4.441586504467122e-06, "loss": 0.6491, "step": 2002 }, { "epoch": 0.6776048714479026, "grad_norm": 0.32120445370674133, "learning_rate": 4.441022097653709e-06, "loss": 0.633, "step": 2003 }, { "epoch": 0.6779431664411367, "grad_norm": 0.3001091182231903, "learning_rate": 4.440457441651139e-06, "loss": 0.6174, "step": 2004 }, { "epoch": 0.6782814614343707, "grad_norm": 0.3134676516056061, "learning_rate": 4.439892536531906e-06, "loss": 0.6355, "step": 2005 }, { "epoch": 0.6786197564276049, "grad_norm": 0.32472702860832214, "learning_rate": 4.43932738236853e-06, "loss": 0.6161, "step": 2006 }, { "epoch": 0.678958051420839, "grad_norm": 0.2919924259185791, "learning_rate": 4.438761979233568e-06, "loss": 0.6225, "step": 2007 }, { "epoch": 0.6792963464140731, "grad_norm": 0.28722047805786133, "learning_rate": 4.438196327199604e-06, "loss": 0.6087, "step": 2008 }, { "epoch": 0.6796346414073072, "grad_norm": 0.3046940863132477, "learning_rate": 4.437630426339259e-06, "loss": 0.6443, "step": 2009 }, { "epoch": 0.6799729364005412, "grad_norm": 0.32804107666015625, "learning_rate": 4.437064276725183e-06, "loss": 0.6077, "step": 2010 }, { "epoch": 0.6803112313937754, "grad_norm": 0.31065163016319275, "learning_rate": 4.436497878430057e-06, "loss": 0.6456, "step": 2011 }, { "epoch": 0.6806495263870095, "grad_norm": 0.3122207224369049, "learning_rate": 4.435931231526597e-06, "loss": 0.647, "step": 2012 }, { "epoch": 0.6809878213802436, "grad_norm": 0.2977723479270935, "learning_rate": 4.435364336087549e-06, "loss": 0.6344, "step": 2013 }, { "epoch": 0.6813261163734776, "grad_norm": 0.30897650122642517, "learning_rate": 4.434797192185691e-06, "loss": 0.6284, "step": 2014 }, { "epoch": 0.6816644113667117, "grad_norm": 0.31061431765556335, "learning_rate": 4.434229799893833e-06, "loss": 0.5913, "step": 2015 }, { "epoch": 0.6820027063599459, "grad_norm": 0.30263346433639526, "learning_rate": 4.433662159284818e-06, "loss": 0.6329, "step": 2016 }, { "epoch": 0.68234100135318, "grad_norm": 0.29153409600257874, "learning_rate": 4.43309427043152e-06, "loss": 0.6289, "step": 2017 }, { "epoch": 0.6826792963464141, "grad_norm": 0.29121798276901245, "learning_rate": 4.432526133406843e-06, "loss": 0.6246, "step": 2018 }, { "epoch": 0.6830175913396481, "grad_norm": 0.29549071192741394, "learning_rate": 4.431957748283725e-06, "loss": 0.6361, "step": 2019 }, { "epoch": 0.6833558863328822, "grad_norm": 0.2968063950538635, "learning_rate": 4.431389115135138e-06, "loss": 0.6018, "step": 2020 }, { "epoch": 0.6836941813261164, "grad_norm": 0.30572426319122314, "learning_rate": 4.430820234034081e-06, "loss": 0.6287, "step": 2021 }, { "epoch": 0.6840324763193505, "grad_norm": 0.29529130458831787, "learning_rate": 4.430251105053586e-06, "loss": 0.6009, "step": 2022 }, { "epoch": 0.6843707713125846, "grad_norm": 0.29441121220588684, "learning_rate": 4.429681728266721e-06, "loss": 0.6015, "step": 2023 }, { "epoch": 0.6847090663058186, "grad_norm": 0.30199480056762695, "learning_rate": 4.4291121037465825e-06, "loss": 0.6377, "step": 2024 }, { "epoch": 0.6850473612990527, "grad_norm": 0.30264589190483093, "learning_rate": 4.428542231566297e-06, "loss": 0.6215, "step": 2025 }, { "epoch": 0.6853856562922869, "grad_norm": 0.2855820953845978, "learning_rate": 4.427972111799026e-06, "loss": 0.6178, "step": 2026 }, { "epoch": 0.685723951285521, "grad_norm": 0.31441786885261536, "learning_rate": 4.427401744517963e-06, "loss": 0.6324, "step": 2027 }, { "epoch": 0.6860622462787551, "grad_norm": 0.29725101590156555, "learning_rate": 4.42683112979633e-06, "loss": 0.6196, "step": 2028 }, { "epoch": 0.6864005412719891, "grad_norm": 0.3108838200569153, "learning_rate": 4.426260267707382e-06, "loss": 0.6123, "step": 2029 }, { "epoch": 0.6867388362652233, "grad_norm": 0.3054821491241455, "learning_rate": 4.42568915832441e-06, "loss": 0.6051, "step": 2030 }, { "epoch": 0.6870771312584574, "grad_norm": 0.2996203899383545, "learning_rate": 4.42511780172073e-06, "loss": 0.6078, "step": 2031 }, { "epoch": 0.6874154262516915, "grad_norm": 0.3160284459590912, "learning_rate": 4.424546197969694e-06, "loss": 0.6135, "step": 2032 }, { "epoch": 0.6877537212449256, "grad_norm": 0.3138665556907654, "learning_rate": 4.423974347144685e-06, "loss": 0.6368, "step": 2033 }, { "epoch": 0.6880920162381596, "grad_norm": 0.2967749536037445, "learning_rate": 4.423402249319118e-06, "loss": 0.6203, "step": 2034 }, { "epoch": 0.6884303112313938, "grad_norm": 0.3004557490348816, "learning_rate": 4.422829904566438e-06, "loss": 0.6269, "step": 2035 }, { "epoch": 0.6887686062246279, "grad_norm": 0.3086588680744171, "learning_rate": 4.422257312960123e-06, "loss": 0.6152, "step": 2036 }, { "epoch": 0.689106901217862, "grad_norm": 0.3136051297187805, "learning_rate": 4.421684474573683e-06, "loss": 0.6186, "step": 2037 }, { "epoch": 0.6894451962110961, "grad_norm": 0.29930639266967773, "learning_rate": 4.421111389480659e-06, "loss": 0.6097, "step": 2038 }, { "epoch": 0.6897834912043301, "grad_norm": 0.2962172031402588, "learning_rate": 4.420538057754624e-06, "loss": 0.6144, "step": 2039 }, { "epoch": 0.6901217861975643, "grad_norm": 0.3190195560455322, "learning_rate": 4.419964479469182e-06, "loss": 0.6146, "step": 2040 }, { "epoch": 0.6904600811907984, "grad_norm": 0.29853519797325134, "learning_rate": 4.41939065469797e-06, "loss": 0.6209, "step": 2041 }, { "epoch": 0.6907983761840325, "grad_norm": 0.30535730719566345, "learning_rate": 4.418816583514656e-06, "loss": 0.6155, "step": 2042 }, { "epoch": 0.6911366711772666, "grad_norm": 0.3163444697856903, "learning_rate": 4.418242265992938e-06, "loss": 0.6461, "step": 2043 }, { "epoch": 0.6914749661705006, "grad_norm": 0.3203446567058563, "learning_rate": 4.417667702206548e-06, "loss": 0.6407, "step": 2044 }, { "epoch": 0.6918132611637348, "grad_norm": 0.30420753359794617, "learning_rate": 4.417092892229249e-06, "loss": 0.6118, "step": 2045 }, { "epoch": 0.6921515561569689, "grad_norm": 0.29890942573547363, "learning_rate": 4.416517836134836e-06, "loss": 0.5982, "step": 2046 }, { "epoch": 0.692489851150203, "grad_norm": 0.3120978772640228, "learning_rate": 4.415942533997133e-06, "loss": 0.6162, "step": 2047 }, { "epoch": 0.6928281461434371, "grad_norm": 0.30351585149765015, "learning_rate": 4.415366985889998e-06, "loss": 0.6016, "step": 2048 }, { "epoch": 0.6931664411366711, "grad_norm": 0.3094577491283417, "learning_rate": 4.4147911918873225e-06, "loss": 0.6405, "step": 2049 }, { "epoch": 0.6935047361299053, "grad_norm": 0.31087616086006165, "learning_rate": 4.414215152063023e-06, "loss": 0.5978, "step": 2050 }, { "epoch": 0.6938430311231394, "grad_norm": 0.30971506237983704, "learning_rate": 4.413638866491056e-06, "loss": 0.6391, "step": 2051 }, { "epoch": 0.6941813261163735, "grad_norm": 0.2959864139556885, "learning_rate": 4.413062335245402e-06, "loss": 0.6182, "step": 2052 }, { "epoch": 0.6945196211096076, "grad_norm": 0.30373695492744446, "learning_rate": 4.4124855584000766e-06, "loss": 0.6128, "step": 2053 }, { "epoch": 0.6948579161028416, "grad_norm": 0.3113371431827545, "learning_rate": 4.41190853602913e-06, "loss": 0.6014, "step": 2054 }, { "epoch": 0.6951962110960758, "grad_norm": 0.30726322531700134, "learning_rate": 4.411331268206637e-06, "loss": 0.6317, "step": 2055 }, { "epoch": 0.6955345060893099, "grad_norm": 0.31048333644866943, "learning_rate": 4.410753755006708e-06, "loss": 0.6007, "step": 2056 }, { "epoch": 0.695872801082544, "grad_norm": 0.3138802647590637, "learning_rate": 4.410175996503485e-06, "loss": 0.6174, "step": 2057 }, { "epoch": 0.696211096075778, "grad_norm": 0.2968793511390686, "learning_rate": 4.409597992771141e-06, "loss": 0.6037, "step": 2058 }, { "epoch": 0.6965493910690121, "grad_norm": 0.3008846640586853, "learning_rate": 4.40901974388388e-06, "loss": 0.6303, "step": 2059 }, { "epoch": 0.6968876860622463, "grad_norm": 0.32319778203964233, "learning_rate": 4.408441249915938e-06, "loss": 0.6435, "step": 2060 }, { "epoch": 0.6972259810554804, "grad_norm": 0.3118629455566406, "learning_rate": 4.4078625109415825e-06, "loss": 0.6247, "step": 2061 }, { "epoch": 0.6975642760487145, "grad_norm": 0.296186238527298, "learning_rate": 4.407283527035112e-06, "loss": 0.6303, "step": 2062 }, { "epoch": 0.6979025710419485, "grad_norm": 0.31817618012428284, "learning_rate": 4.406704298270856e-06, "loss": 0.6305, "step": 2063 }, { "epoch": 0.6982408660351827, "grad_norm": 0.3056838810443878, "learning_rate": 4.406124824723178e-06, "loss": 0.6104, "step": 2064 }, { "epoch": 0.6985791610284168, "grad_norm": 0.3022473156452179, "learning_rate": 4.405545106466469e-06, "loss": 0.6289, "step": 2065 }, { "epoch": 0.6989174560216509, "grad_norm": 0.2971550524234772, "learning_rate": 4.404965143575154e-06, "loss": 0.6225, "step": 2066 }, { "epoch": 0.699255751014885, "grad_norm": 0.3262900412082672, "learning_rate": 4.404384936123689e-06, "loss": 0.6323, "step": 2067 }, { "epoch": 0.699594046008119, "grad_norm": 0.29921954870224, "learning_rate": 4.403804484186561e-06, "loss": 0.6421, "step": 2068 }, { "epoch": 0.6999323410013532, "grad_norm": 0.296154260635376, "learning_rate": 4.40322378783829e-06, "loss": 0.6158, "step": 2069 }, { "epoch": 0.7002706359945873, "grad_norm": 0.2893257737159729, "learning_rate": 4.402642847153424e-06, "loss": 0.6101, "step": 2070 }, { "epoch": 0.7006089309878214, "grad_norm": 0.2974931001663208, "learning_rate": 4.402061662206546e-06, "loss": 0.6193, "step": 2071 }, { "epoch": 0.7009472259810555, "grad_norm": 0.30189311504364014, "learning_rate": 4.401480233072269e-06, "loss": 0.6276, "step": 2072 }, { "epoch": 0.7012855209742895, "grad_norm": 0.31920090317726135, "learning_rate": 4.400898559825234e-06, "loss": 0.6193, "step": 2073 }, { "epoch": 0.7016238159675237, "grad_norm": 0.31290367245674133, "learning_rate": 4.40031664254012e-06, "loss": 0.6413, "step": 2074 }, { "epoch": 0.7019621109607578, "grad_norm": 0.3053489029407501, "learning_rate": 4.399734481291632e-06, "loss": 0.5726, "step": 2075 }, { "epoch": 0.7023004059539919, "grad_norm": 0.29699379205703735, "learning_rate": 4.399152076154509e-06, "loss": 0.5966, "step": 2076 }, { "epoch": 0.702638700947226, "grad_norm": 0.29986077547073364, "learning_rate": 4.39856942720352e-06, "loss": 0.6061, "step": 2077 }, { "epoch": 0.70297699594046, "grad_norm": 0.30691906809806824, "learning_rate": 4.397986534513465e-06, "loss": 0.6097, "step": 2078 }, { "epoch": 0.7033152909336942, "grad_norm": 0.2931588888168335, "learning_rate": 4.397403398159176e-06, "loss": 0.5868, "step": 2079 }, { "epoch": 0.7036535859269283, "grad_norm": 0.29350966215133667, "learning_rate": 4.396820018215518e-06, "loss": 0.6257, "step": 2080 }, { "epoch": 0.7039918809201624, "grad_norm": 0.2993786036968231, "learning_rate": 4.396236394757384e-06, "loss": 0.6153, "step": 2081 }, { "epoch": 0.7043301759133965, "grad_norm": 0.31493711471557617, "learning_rate": 4.3956525278597e-06, "loss": 0.5981, "step": 2082 }, { "epoch": 0.7046684709066305, "grad_norm": 0.3099541664123535, "learning_rate": 4.395068417597423e-06, "loss": 0.6051, "step": 2083 }, { "epoch": 0.7050067658998647, "grad_norm": 0.30024874210357666, "learning_rate": 4.394484064045542e-06, "loss": 0.6399, "step": 2084 }, { "epoch": 0.7053450608930988, "grad_norm": 0.2888277471065521, "learning_rate": 4.393899467279076e-06, "loss": 0.6009, "step": 2085 }, { "epoch": 0.7056833558863329, "grad_norm": 0.31682926416397095, "learning_rate": 4.393314627373075e-06, "loss": 0.5759, "step": 2086 }, { "epoch": 0.706021650879567, "grad_norm": 0.2965947687625885, "learning_rate": 4.392729544402623e-06, "loss": 0.6215, "step": 2087 }, { "epoch": 0.706359945872801, "grad_norm": 0.2999950349330902, "learning_rate": 4.392144218442831e-06, "loss": 0.5964, "step": 2088 }, { "epoch": 0.7066982408660352, "grad_norm": 0.30933696031570435, "learning_rate": 4.391558649568844e-06, "loss": 0.5839, "step": 2089 }, { "epoch": 0.7070365358592693, "grad_norm": 0.2939186990261078, "learning_rate": 4.390972837855839e-06, "loss": 0.606, "step": 2090 }, { "epoch": 0.7073748308525034, "grad_norm": 0.3039042353630066, "learning_rate": 4.390386783379021e-06, "loss": 0.6371, "step": 2091 }, { "epoch": 0.7077131258457375, "grad_norm": 0.32648152112960815, "learning_rate": 4.389800486213629e-06, "loss": 0.641, "step": 2092 }, { "epoch": 0.7080514208389715, "grad_norm": 0.3150835931301117, "learning_rate": 4.389213946434932e-06, "loss": 0.6315, "step": 2093 }, { "epoch": 0.7083897158322057, "grad_norm": 0.3053288757801056, "learning_rate": 4.388627164118228e-06, "loss": 0.6338, "step": 2094 }, { "epoch": 0.7087280108254398, "grad_norm": 0.3217431902885437, "learning_rate": 4.3880401393388515e-06, "loss": 0.6092, "step": 2095 }, { "epoch": 0.7090663058186739, "grad_norm": 0.3065360486507416, "learning_rate": 4.387452872172163e-06, "loss": 0.6318, "step": 2096 }, { "epoch": 0.709404600811908, "grad_norm": 0.31731465458869934, "learning_rate": 4.386865362693556e-06, "loss": 0.6102, "step": 2097 }, { "epoch": 0.709742895805142, "grad_norm": 0.31071433424949646, "learning_rate": 4.386277610978457e-06, "loss": 0.6446, "step": 2098 }, { "epoch": 0.7100811907983762, "grad_norm": 0.32428860664367676, "learning_rate": 4.38568961710232e-06, "loss": 0.6522, "step": 2099 }, { "epoch": 0.7104194857916103, "grad_norm": 0.3046785295009613, "learning_rate": 4.385101381140633e-06, "loss": 0.6293, "step": 2100 }, { "epoch": 0.7107577807848444, "grad_norm": 0.34561029076576233, "learning_rate": 4.384512903168913e-06, "loss": 0.6367, "step": 2101 }, { "epoch": 0.7110960757780784, "grad_norm": 0.3116186559200287, "learning_rate": 4.383924183262709e-06, "loss": 0.6268, "step": 2102 }, { "epoch": 0.7114343707713126, "grad_norm": 0.30118685960769653, "learning_rate": 4.383335221497602e-06, "loss": 0.6293, "step": 2103 }, { "epoch": 0.7117726657645467, "grad_norm": 0.3198152482509613, "learning_rate": 4.382746017949203e-06, "loss": 0.6086, "step": 2104 }, { "epoch": 0.7121109607577808, "grad_norm": 0.33087316155433655, "learning_rate": 4.382156572693154e-06, "loss": 0.6012, "step": 2105 }, { "epoch": 0.7124492557510149, "grad_norm": 0.29373490810394287, "learning_rate": 4.3815668858051274e-06, "loss": 0.6399, "step": 2106 }, { "epoch": 0.7127875507442489, "grad_norm": 0.35433363914489746, "learning_rate": 4.380976957360829e-06, "loss": 0.6371, "step": 2107 }, { "epoch": 0.713125845737483, "grad_norm": 0.3058205246925354, "learning_rate": 4.3803867874359925e-06, "loss": 0.6296, "step": 2108 }, { "epoch": 0.7134641407307172, "grad_norm": 0.297324001789093, "learning_rate": 4.379796376106384e-06, "loss": 0.6044, "step": 2109 }, { "epoch": 0.7138024357239513, "grad_norm": 0.31194958090782166, "learning_rate": 4.379205723447803e-06, "loss": 0.6151, "step": 2110 }, { "epoch": 0.7141407307171854, "grad_norm": 0.32881027460098267, "learning_rate": 4.3786148295360755e-06, "loss": 0.6278, "step": 2111 }, { "epoch": 0.7144790257104194, "grad_norm": 0.29545143246650696, "learning_rate": 4.3780236944470615e-06, "loss": 0.6063, "step": 2112 }, { "epoch": 0.7148173207036536, "grad_norm": 0.31030046939849854, "learning_rate": 4.377432318256651e-06, "loss": 0.6279, "step": 2113 }, { "epoch": 0.7151556156968877, "grad_norm": 0.31192392110824585, "learning_rate": 4.3768407010407655e-06, "loss": 0.6201, "step": 2114 }, { "epoch": 0.7154939106901218, "grad_norm": 0.3663148880004883, "learning_rate": 4.3762488428753565e-06, "loss": 0.6217, "step": 2115 }, { "epoch": 0.7158322056833559, "grad_norm": 0.29583805799484253, "learning_rate": 4.3756567438364075e-06, "loss": 0.6118, "step": 2116 }, { "epoch": 0.7161705006765899, "grad_norm": 0.32086995244026184, "learning_rate": 4.375064403999931e-06, "loss": 0.6265, "step": 2117 }, { "epoch": 0.7165087956698241, "grad_norm": 0.32434090971946716, "learning_rate": 4.3744718234419735e-06, "loss": 0.6001, "step": 2118 }, { "epoch": 0.7168470906630582, "grad_norm": 0.316076397895813, "learning_rate": 4.3738790022386105e-06, "loss": 0.6157, "step": 2119 }, { "epoch": 0.7171853856562923, "grad_norm": 0.3119267523288727, "learning_rate": 4.373285940465948e-06, "loss": 0.5994, "step": 2120 }, { "epoch": 0.7175236806495264, "grad_norm": 0.29969432950019836, "learning_rate": 4.372692638200124e-06, "loss": 0.6253, "step": 2121 }, { "epoch": 0.7178619756427604, "grad_norm": 0.3249845802783966, "learning_rate": 4.372099095517306e-06, "loss": 0.6368, "step": 2122 }, { "epoch": 0.7182002706359946, "grad_norm": 0.31541869044303894, "learning_rate": 4.371505312493694e-06, "loss": 0.6028, "step": 2123 }, { "epoch": 0.7185385656292287, "grad_norm": 0.3059319257736206, "learning_rate": 4.370911289205518e-06, "loss": 0.6261, "step": 2124 }, { "epoch": 0.7188768606224628, "grad_norm": 0.3207683265209198, "learning_rate": 4.370317025729039e-06, "loss": 0.617, "step": 2125 }, { "epoch": 0.7192151556156969, "grad_norm": 0.31733056902885437, "learning_rate": 4.369722522140548e-06, "loss": 0.5974, "step": 2126 }, { "epoch": 0.719553450608931, "grad_norm": 0.31061556935310364, "learning_rate": 4.369127778516369e-06, "loss": 0.6519, "step": 2127 }, { "epoch": 0.7198917456021651, "grad_norm": 0.3010721802711487, "learning_rate": 4.368532794932854e-06, "loss": 0.6385, "step": 2128 }, { "epoch": 0.7202300405953992, "grad_norm": 0.30090805888175964, "learning_rate": 4.367937571466388e-06, "loss": 0.6291, "step": 2129 }, { "epoch": 0.7205683355886333, "grad_norm": 0.3015844225883484, "learning_rate": 4.3673421081933865e-06, "loss": 0.6117, "step": 2130 }, { "epoch": 0.7209066305818674, "grad_norm": 0.2912375330924988, "learning_rate": 4.366746405190294e-06, "loss": 0.6218, "step": 2131 }, { "epoch": 0.7212449255751014, "grad_norm": 0.30901646614074707, "learning_rate": 4.366150462533588e-06, "loss": 0.6179, "step": 2132 }, { "epoch": 0.7215832205683356, "grad_norm": 0.31542983651161194, "learning_rate": 4.3655542802997755e-06, "loss": 0.6134, "step": 2133 }, { "epoch": 0.7219215155615697, "grad_norm": 0.3000514805316925, "learning_rate": 4.3649578585653944e-06, "loss": 0.6181, "step": 2134 }, { "epoch": 0.7222598105548038, "grad_norm": 0.30705004930496216, "learning_rate": 4.364361197407014e-06, "loss": 0.6206, "step": 2135 }, { "epoch": 0.7225981055480379, "grad_norm": 0.29016777873039246, "learning_rate": 4.363764296901234e-06, "loss": 0.5988, "step": 2136 }, { "epoch": 0.722936400541272, "grad_norm": 0.3026140034198761, "learning_rate": 4.363167157124685e-06, "loss": 0.6365, "step": 2137 }, { "epoch": 0.7232746955345061, "grad_norm": 0.3143080174922943, "learning_rate": 4.362569778154026e-06, "loss": 0.638, "step": 2138 }, { "epoch": 0.7236129905277402, "grad_norm": 0.30859896540641785, "learning_rate": 4.361972160065951e-06, "loss": 0.6199, "step": 2139 }, { "epoch": 0.7239512855209743, "grad_norm": 0.3060165047645569, "learning_rate": 4.361374302937182e-06, "loss": 0.6386, "step": 2140 }, { "epoch": 0.7242895805142084, "grad_norm": 0.30037814378738403, "learning_rate": 4.360776206844472e-06, "loss": 0.5968, "step": 2141 }, { "epoch": 0.7246278755074425, "grad_norm": 0.3116171061992645, "learning_rate": 4.360177871864604e-06, "loss": 0.6025, "step": 2142 }, { "epoch": 0.7249661705006766, "grad_norm": 0.30952051281929016, "learning_rate": 4.359579298074392e-06, "loss": 0.5999, "step": 2143 }, { "epoch": 0.7253044654939107, "grad_norm": 0.2957140803337097, "learning_rate": 4.358980485550684e-06, "loss": 0.6096, "step": 2144 }, { "epoch": 0.7256427604871448, "grad_norm": 0.2928706109523773, "learning_rate": 4.358381434370353e-06, "loss": 0.6406, "step": 2145 }, { "epoch": 0.725981055480379, "grad_norm": 0.2954714298248291, "learning_rate": 4.357782144610306e-06, "loss": 0.6224, "step": 2146 }, { "epoch": 0.726319350473613, "grad_norm": 0.2990542948246002, "learning_rate": 4.357182616347482e-06, "loss": 0.619, "step": 2147 }, { "epoch": 0.7266576454668471, "grad_norm": 0.29735761880874634, "learning_rate": 4.356582849658845e-06, "loss": 0.6148, "step": 2148 }, { "epoch": 0.7269959404600812, "grad_norm": 0.30296590924263, "learning_rate": 4.355982844621397e-06, "loss": 0.6442, "step": 2149 }, { "epoch": 0.7273342354533153, "grad_norm": 0.289024293422699, "learning_rate": 4.355382601312164e-06, "loss": 0.6015, "step": 2150 }, { "epoch": 0.7276725304465493, "grad_norm": 0.29794010519981384, "learning_rate": 4.354782119808208e-06, "loss": 0.6026, "step": 2151 }, { "epoch": 0.7280108254397835, "grad_norm": 0.3014563024044037, "learning_rate": 4.3541814001866175e-06, "loss": 0.6069, "step": 2152 }, { "epoch": 0.7283491204330176, "grad_norm": 0.31807276606559753, "learning_rate": 4.353580442524513e-06, "loss": 0.581, "step": 2153 }, { "epoch": 0.7286874154262517, "grad_norm": 0.2963697612285614, "learning_rate": 4.352979246899046e-06, "loss": 0.616, "step": 2154 }, { "epoch": 0.7290257104194858, "grad_norm": 0.29154714941978455, "learning_rate": 4.352377813387398e-06, "loss": 0.6223, "step": 2155 }, { "epoch": 0.7293640054127198, "grad_norm": 0.30207982659339905, "learning_rate": 4.351776142066782e-06, "loss": 0.5977, "step": 2156 }, { "epoch": 0.729702300405954, "grad_norm": 0.33041995763778687, "learning_rate": 4.35117423301444e-06, "loss": 0.6263, "step": 2157 }, { "epoch": 0.7300405953991881, "grad_norm": 0.3013479709625244, "learning_rate": 4.350572086307646e-06, "loss": 0.613, "step": 2158 }, { "epoch": 0.7303788903924222, "grad_norm": 0.29995208978652954, "learning_rate": 4.349969702023703e-06, "loss": 0.6212, "step": 2159 }, { "epoch": 0.7307171853856563, "grad_norm": 0.30284181237220764, "learning_rate": 4.349367080239946e-06, "loss": 0.5916, "step": 2160 }, { "epoch": 0.7310554803788903, "grad_norm": 0.30870866775512695, "learning_rate": 4.34876422103374e-06, "loss": 0.645, "step": 2161 }, { "epoch": 0.7313937753721245, "grad_norm": 0.3040004074573517, "learning_rate": 4.3481611244824794e-06, "loss": 0.5821, "step": 2162 }, { "epoch": 0.7317320703653586, "grad_norm": 0.30425241589546204, "learning_rate": 4.347557790663591e-06, "loss": 0.6022, "step": 2163 }, { "epoch": 0.7320703653585927, "grad_norm": 0.3005933165550232, "learning_rate": 4.34695421965453e-06, "loss": 0.613, "step": 2164 }, { "epoch": 0.7324086603518268, "grad_norm": 0.30306586623191833, "learning_rate": 4.346350411532784e-06, "loss": 0.6193, "step": 2165 }, { "epoch": 0.7327469553450608, "grad_norm": 0.32029709219932556, "learning_rate": 4.34574636637587e-06, "loss": 0.6395, "step": 2166 }, { "epoch": 0.733085250338295, "grad_norm": 0.2901228070259094, "learning_rate": 4.345142084261333e-06, "loss": 0.6225, "step": 2167 }, { "epoch": 0.7334235453315291, "grad_norm": 0.30677056312561035, "learning_rate": 4.344537565266755e-06, "loss": 0.6142, "step": 2168 }, { "epoch": 0.7337618403247632, "grad_norm": 0.3195612132549286, "learning_rate": 4.343932809469742e-06, "loss": 0.616, "step": 2169 }, { "epoch": 0.7341001353179973, "grad_norm": 0.3008933365345001, "learning_rate": 4.3433278169479346e-06, "loss": 0.6205, "step": 2170 }, { "epoch": 0.7344384303112313, "grad_norm": 0.29755228757858276, "learning_rate": 4.342722587778999e-06, "loss": 0.6119, "step": 2171 }, { "epoch": 0.7347767253044655, "grad_norm": 0.3149365186691284, "learning_rate": 4.342117122040637e-06, "loss": 0.6121, "step": 2172 }, { "epoch": 0.7351150202976996, "grad_norm": 0.29928284883499146, "learning_rate": 4.341511419810578e-06, "loss": 0.6049, "step": 2173 }, { "epoch": 0.7354533152909337, "grad_norm": 0.2991817891597748, "learning_rate": 4.340905481166583e-06, "loss": 0.6285, "step": 2174 }, { "epoch": 0.7357916102841678, "grad_norm": 0.30570951104164124, "learning_rate": 4.340299306186441e-06, "loss": 0.6381, "step": 2175 }, { "epoch": 0.7361299052774019, "grad_norm": 0.31654542684555054, "learning_rate": 4.339692894947975e-06, "loss": 0.6396, "step": 2176 }, { "epoch": 0.736468200270636, "grad_norm": 0.29424408078193665, "learning_rate": 4.339086247529034e-06, "loss": 0.6155, "step": 2177 }, { "epoch": 0.7368064952638701, "grad_norm": 0.30464109778404236, "learning_rate": 4.338479364007501e-06, "loss": 0.6512, "step": 2178 }, { "epoch": 0.7371447902571042, "grad_norm": 0.29115793108940125, "learning_rate": 4.3378722444612875e-06, "loss": 0.6139, "step": 2179 }, { "epoch": 0.7374830852503383, "grad_norm": 0.32231682538986206, "learning_rate": 4.337264888968337e-06, "loss": 0.6391, "step": 2180 }, { "epoch": 0.7378213802435724, "grad_norm": 0.29485422372817993, "learning_rate": 4.33665729760662e-06, "loss": 0.5805, "step": 2181 }, { "epoch": 0.7381596752368065, "grad_norm": 0.2966797351837158, "learning_rate": 4.3360494704541415e-06, "loss": 0.6164, "step": 2182 }, { "epoch": 0.7384979702300406, "grad_norm": 0.3220391869544983, "learning_rate": 4.335441407588933e-06, "loss": 0.6149, "step": 2183 }, { "epoch": 0.7388362652232747, "grad_norm": 0.31245046854019165, "learning_rate": 4.334833109089057e-06, "loss": 0.6176, "step": 2184 }, { "epoch": 0.7391745602165088, "grad_norm": 0.3080689013004303, "learning_rate": 4.33422457503261e-06, "loss": 0.626, "step": 2185 }, { "epoch": 0.7395128552097429, "grad_norm": 0.3042171895503998, "learning_rate": 4.333615805497713e-06, "loss": 0.6094, "step": 2186 }, { "epoch": 0.739851150202977, "grad_norm": 0.30676788091659546, "learning_rate": 4.333006800562521e-06, "loss": 0.6181, "step": 2187 }, { "epoch": 0.7401894451962111, "grad_norm": 0.2994125783443451, "learning_rate": 4.33239756030522e-06, "loss": 0.6035, "step": 2188 }, { "epoch": 0.7405277401894452, "grad_norm": 0.2977219223976135, "learning_rate": 4.331788084804022e-06, "loss": 0.6145, "step": 2189 }, { "epoch": 0.7408660351826793, "grad_norm": 0.3188292980194092, "learning_rate": 4.331178374137173e-06, "loss": 0.6049, "step": 2190 }, { "epoch": 0.7412043301759134, "grad_norm": 0.28873610496520996, "learning_rate": 4.330568428382948e-06, "loss": 0.5933, "step": 2191 }, { "epoch": 0.7415426251691475, "grad_norm": 0.30604344606399536, "learning_rate": 4.329958247619651e-06, "loss": 0.6132, "step": 2192 }, { "epoch": 0.7418809201623816, "grad_norm": 0.3060908913612366, "learning_rate": 4.329347831925618e-06, "loss": 0.6088, "step": 2193 }, { "epoch": 0.7422192151556157, "grad_norm": 0.29733046889305115, "learning_rate": 4.328737181379215e-06, "loss": 0.6181, "step": 2194 }, { "epoch": 0.7425575101488497, "grad_norm": 0.2979923188686371, "learning_rate": 4.3281262960588355e-06, "loss": 0.6164, "step": 2195 }, { "epoch": 0.7428958051420839, "grad_norm": 0.29377368092536926, "learning_rate": 4.327515176042908e-06, "loss": 0.6361, "step": 2196 }, { "epoch": 0.743234100135318, "grad_norm": 0.30877503752708435, "learning_rate": 4.326903821409885e-06, "loss": 0.6199, "step": 2197 }, { "epoch": 0.7435723951285521, "grad_norm": 0.3117124140262604, "learning_rate": 4.326292232238256e-06, "loss": 0.6265, "step": 2198 }, { "epoch": 0.7439106901217862, "grad_norm": 0.3066393733024597, "learning_rate": 4.325680408606535e-06, "loss": 0.6211, "step": 2199 }, { "epoch": 0.7442489851150202, "grad_norm": 0.299986332654953, "learning_rate": 4.325068350593269e-06, "loss": 0.5956, "step": 2200 }, { "epoch": 0.7445872801082544, "grad_norm": 0.2944273352622986, "learning_rate": 4.324456058277033e-06, "loss": 0.6349, "step": 2201 }, { "epoch": 0.7449255751014885, "grad_norm": 0.3012370467185974, "learning_rate": 4.323843531736434e-06, "loss": 0.629, "step": 2202 }, { "epoch": 0.7452638700947226, "grad_norm": 0.3050519526004791, "learning_rate": 4.323230771050109e-06, "loss": 0.6096, "step": 2203 }, { "epoch": 0.7456021650879567, "grad_norm": 0.30067360401153564, "learning_rate": 4.322617776296723e-06, "loss": 0.6296, "step": 2204 }, { "epoch": 0.7459404600811907, "grad_norm": 0.30389589071273804, "learning_rate": 4.322004547554974e-06, "loss": 0.6307, "step": 2205 }, { "epoch": 0.7462787550744249, "grad_norm": 0.3155769109725952, "learning_rate": 4.321391084903588e-06, "loss": 0.6216, "step": 2206 }, { "epoch": 0.746617050067659, "grad_norm": 0.3266087770462036, "learning_rate": 4.320777388421321e-06, "loss": 0.6411, "step": 2207 }, { "epoch": 0.7469553450608931, "grad_norm": 0.2882659435272217, "learning_rate": 4.320163458186961e-06, "loss": 0.6252, "step": 2208 }, { "epoch": 0.7472936400541272, "grad_norm": 0.3083643317222595, "learning_rate": 4.319549294279323e-06, "loss": 0.6268, "step": 2209 }, { "epoch": 0.7476319350473613, "grad_norm": 0.2978135645389557, "learning_rate": 4.318934896777255e-06, "loss": 0.6321, "step": 2210 }, { "epoch": 0.7479702300405954, "grad_norm": 0.2990574538707733, "learning_rate": 4.318320265759633e-06, "loss": 0.6463, "step": 2211 }, { "epoch": 0.7483085250338295, "grad_norm": 0.31218650937080383, "learning_rate": 4.317705401305362e-06, "loss": 0.616, "step": 2212 }, { "epoch": 0.7486468200270636, "grad_norm": 0.30175313353538513, "learning_rate": 4.317090303493381e-06, "loss": 0.6045, "step": 2213 }, { "epoch": 0.7489851150202977, "grad_norm": 0.2993379831314087, "learning_rate": 4.316474972402655e-06, "loss": 0.5937, "step": 2214 }, { "epoch": 0.7493234100135318, "grad_norm": 0.3067174255847931, "learning_rate": 4.315859408112182e-06, "loss": 0.6261, "step": 2215 }, { "epoch": 0.7496617050067659, "grad_norm": 0.3013573884963989, "learning_rate": 4.315243610700987e-06, "loss": 0.6173, "step": 2216 }, { "epoch": 0.75, "grad_norm": 0.31558793783187866, "learning_rate": 4.3146275802481254e-06, "loss": 0.6196, "step": 2217 }, { "epoch": 0.7503382949932341, "grad_norm": 0.32746657729148865, "learning_rate": 4.3140113168326856e-06, "loss": 0.6211, "step": 2218 }, { "epoch": 0.7506765899864682, "grad_norm": 0.2910456657409668, "learning_rate": 4.313394820533783e-06, "loss": 0.5915, "step": 2219 }, { "epoch": 0.7510148849797023, "grad_norm": 0.29211342334747314, "learning_rate": 4.312778091430563e-06, "loss": 0.6037, "step": 2220 }, { "epoch": 0.7513531799729364, "grad_norm": 0.2989952862262726, "learning_rate": 4.312161129602201e-06, "loss": 0.634, "step": 2221 }, { "epoch": 0.7516914749661705, "grad_norm": 0.3006637394428253, "learning_rate": 4.3115439351279045e-06, "loss": 0.6103, "step": 2222 }, { "epoch": 0.7520297699594046, "grad_norm": 0.3160168528556824, "learning_rate": 4.310926508086909e-06, "loss": 0.6193, "step": 2223 }, { "epoch": 0.7523680649526387, "grad_norm": 0.2798728048801422, "learning_rate": 4.3103088485584784e-06, "loss": 0.593, "step": 2224 }, { "epoch": 0.7527063599458728, "grad_norm": 0.31656160950660706, "learning_rate": 4.309690956621909e-06, "loss": 0.6264, "step": 2225 }, { "epoch": 0.7530446549391069, "grad_norm": 0.30439293384552, "learning_rate": 4.309072832356527e-06, "loss": 0.6268, "step": 2226 }, { "epoch": 0.753382949932341, "grad_norm": 0.294822096824646, "learning_rate": 4.308454475841686e-06, "loss": 0.6165, "step": 2227 }, { "epoch": 0.7537212449255751, "grad_norm": 0.3071642220020294, "learning_rate": 4.307835887156771e-06, "loss": 0.6089, "step": 2228 }, { "epoch": 0.7540595399188093, "grad_norm": 0.29992255568504333, "learning_rate": 4.307217066381198e-06, "loss": 0.6274, "step": 2229 }, { "epoch": 0.7543978349120433, "grad_norm": 0.3074096441268921, "learning_rate": 4.3065980135944095e-06, "loss": 0.6513, "step": 2230 }, { "epoch": 0.7547361299052774, "grad_norm": 0.3223959505558014, "learning_rate": 4.30597872887588e-06, "loss": 0.6385, "step": 2231 }, { "epoch": 0.7550744248985115, "grad_norm": 0.29632002115249634, "learning_rate": 4.305359212305115e-06, "loss": 0.5938, "step": 2232 }, { "epoch": 0.7554127198917456, "grad_norm": 0.29177916049957275, "learning_rate": 4.304739463961647e-06, "loss": 0.6213, "step": 2233 }, { "epoch": 0.7557510148849798, "grad_norm": 0.29772329330444336, "learning_rate": 4.304119483925041e-06, "loss": 0.6098, "step": 2234 }, { "epoch": 0.7560893098782138, "grad_norm": 0.3082590401172638, "learning_rate": 4.303499272274888e-06, "loss": 0.5982, "step": 2235 }, { "epoch": 0.7564276048714479, "grad_norm": 0.2956467866897583, "learning_rate": 4.302878829090813e-06, "loss": 0.6481, "step": 2236 }, { "epoch": 0.756765899864682, "grad_norm": 0.2921627461910248, "learning_rate": 4.302258154452468e-06, "loss": 0.6251, "step": 2237 }, { "epoch": 0.7571041948579161, "grad_norm": 0.32271668314933777, "learning_rate": 4.301637248439534e-06, "loss": 0.5771, "step": 2238 }, { "epoch": 0.7574424898511503, "grad_norm": 0.3013802170753479, "learning_rate": 4.301016111131726e-06, "loss": 0.584, "step": 2239 }, { "epoch": 0.7577807848443843, "grad_norm": 0.3379766345024109, "learning_rate": 4.300394742608785e-06, "loss": 0.5867, "step": 2240 }, { "epoch": 0.7581190798376184, "grad_norm": 0.3216727375984192, "learning_rate": 4.29977314295048e-06, "loss": 0.6012, "step": 2241 }, { "epoch": 0.7584573748308525, "grad_norm": 0.29771074652671814, "learning_rate": 4.299151312236615e-06, "loss": 0.6351, "step": 2242 }, { "epoch": 0.7587956698240866, "grad_norm": 0.2906126379966736, "learning_rate": 4.29852925054702e-06, "loss": 0.5994, "step": 2243 }, { "epoch": 0.7591339648173207, "grad_norm": 0.3101195991039276, "learning_rate": 4.2979069579615566e-06, "loss": 0.6384, "step": 2244 }, { "epoch": 0.7594722598105548, "grad_norm": 0.316148966550827, "learning_rate": 4.297284434560112e-06, "loss": 0.6193, "step": 2245 }, { "epoch": 0.7598105548037889, "grad_norm": 0.295263409614563, "learning_rate": 4.296661680422609e-06, "loss": 0.6227, "step": 2246 }, { "epoch": 0.760148849797023, "grad_norm": 0.2999385595321655, "learning_rate": 4.296038695628996e-06, "loss": 0.6089, "step": 2247 }, { "epoch": 0.7604871447902571, "grad_norm": 0.30205249786376953, "learning_rate": 4.295415480259251e-06, "loss": 0.6349, "step": 2248 }, { "epoch": 0.7608254397834912, "grad_norm": 0.29788750410079956, "learning_rate": 4.294792034393384e-06, "loss": 0.6112, "step": 2249 }, { "epoch": 0.7611637347767253, "grad_norm": 0.2968345284461975, "learning_rate": 4.294168358111433e-06, "loss": 0.614, "step": 2250 }, { "epoch": 0.7615020297699594, "grad_norm": 0.30285030603408813, "learning_rate": 4.293544451493465e-06, "loss": 0.5997, "step": 2251 }, { "epoch": 0.7618403247631935, "grad_norm": 0.2902609407901764, "learning_rate": 4.292920314619579e-06, "loss": 0.594, "step": 2252 }, { "epoch": 0.7621786197564276, "grad_norm": 0.31383830308914185, "learning_rate": 4.2922959475699e-06, "loss": 0.6165, "step": 2253 }, { "epoch": 0.7625169147496617, "grad_norm": 0.2986510694026947, "learning_rate": 4.291671350424586e-06, "loss": 0.6325, "step": 2254 }, { "epoch": 0.7628552097428958, "grad_norm": 0.30191484093666077, "learning_rate": 4.291046523263821e-06, "loss": 0.6332, "step": 2255 }, { "epoch": 0.7631935047361299, "grad_norm": 0.2976613938808441, "learning_rate": 4.290421466167822e-06, "loss": 0.6081, "step": 2256 }, { "epoch": 0.763531799729364, "grad_norm": 0.3022184371948242, "learning_rate": 4.289796179216835e-06, "loss": 0.6107, "step": 2257 }, { "epoch": 0.7638700947225981, "grad_norm": 0.29924991726875305, "learning_rate": 4.2891706624911335e-06, "loss": 0.6248, "step": 2258 }, { "epoch": 0.7642083897158322, "grad_norm": 0.29953059554100037, "learning_rate": 4.288544916071022e-06, "loss": 0.5876, "step": 2259 }, { "epoch": 0.7645466847090663, "grad_norm": 0.2986959218978882, "learning_rate": 4.287918940036831e-06, "loss": 0.61, "step": 2260 }, { "epoch": 0.7648849797023004, "grad_norm": 0.30967196822166443, "learning_rate": 4.287292734468928e-06, "loss": 0.6271, "step": 2261 }, { "epoch": 0.7652232746955345, "grad_norm": 0.3109535872936249, "learning_rate": 4.286666299447705e-06, "loss": 0.595, "step": 2262 }, { "epoch": 0.7655615696887687, "grad_norm": 0.3030044734477997, "learning_rate": 4.286039635053582e-06, "loss": 0.6136, "step": 2263 }, { "epoch": 0.7658998646820027, "grad_norm": 0.3069053292274475, "learning_rate": 4.28541274136701e-06, "loss": 0.6211, "step": 2264 }, { "epoch": 0.7662381596752368, "grad_norm": 0.30615657567977905, "learning_rate": 4.284785618468471e-06, "loss": 0.6138, "step": 2265 }, { "epoch": 0.7665764546684709, "grad_norm": 0.3103742003440857, "learning_rate": 4.284158266438476e-06, "loss": 0.6073, "step": 2266 }, { "epoch": 0.766914749661705, "grad_norm": 0.30274397134780884, "learning_rate": 4.283530685357564e-06, "loss": 0.6354, "step": 2267 }, { "epoch": 0.7672530446549392, "grad_norm": 0.31919652223587036, "learning_rate": 4.282902875306304e-06, "loss": 0.614, "step": 2268 }, { "epoch": 0.7675913396481732, "grad_norm": 0.28906887769699097, "learning_rate": 4.282274836365294e-06, "loss": 0.6114, "step": 2269 }, { "epoch": 0.7679296346414073, "grad_norm": 0.2996445894241333, "learning_rate": 4.281646568615163e-06, "loss": 0.6156, "step": 2270 }, { "epoch": 0.7682679296346414, "grad_norm": 0.3094944357872009, "learning_rate": 4.281018072136567e-06, "loss": 0.6188, "step": 2271 }, { "epoch": 0.7686062246278755, "grad_norm": 0.3552551567554474, "learning_rate": 4.280389347010193e-06, "loss": 0.6019, "step": 2272 }, { "epoch": 0.7689445196211097, "grad_norm": 0.2939181625843048, "learning_rate": 4.279760393316759e-06, "loss": 0.6293, "step": 2273 }, { "epoch": 0.7692828146143437, "grad_norm": 0.30944308638572693, "learning_rate": 4.279131211137007e-06, "loss": 0.6104, "step": 2274 }, { "epoch": 0.7696211096075778, "grad_norm": 0.3241821527481079, "learning_rate": 4.278501800551713e-06, "loss": 0.6269, "step": 2275 }, { "epoch": 0.7699594046008119, "grad_norm": 0.2961249053478241, "learning_rate": 4.277872161641682e-06, "loss": 0.6052, "step": 2276 }, { "epoch": 0.770297699594046, "grad_norm": 0.30989935994148254, "learning_rate": 4.277242294487746e-06, "loss": 0.6172, "step": 2277 }, { "epoch": 0.7706359945872802, "grad_norm": 0.2998683452606201, "learning_rate": 4.276612199170768e-06, "loss": 0.6073, "step": 2278 }, { "epoch": 0.7709742895805142, "grad_norm": 0.30103355646133423, "learning_rate": 4.27598187577164e-06, "loss": 0.6458, "step": 2279 }, { "epoch": 0.7713125845737483, "grad_norm": 0.31310510635375977, "learning_rate": 4.275351324371283e-06, "loss": 0.6023, "step": 2280 }, { "epoch": 0.7716508795669824, "grad_norm": 0.30720946192741394, "learning_rate": 4.274720545050648e-06, "loss": 0.6116, "step": 2281 }, { "epoch": 0.7719891745602165, "grad_norm": 0.30738264322280884, "learning_rate": 4.2740895378907135e-06, "loss": 0.6165, "step": 2282 }, { "epoch": 0.7723274695534507, "grad_norm": 0.32559826970100403, "learning_rate": 4.27345830297249e-06, "loss": 0.6144, "step": 2283 }, { "epoch": 0.7726657645466847, "grad_norm": 0.3149685859680176, "learning_rate": 4.272826840377014e-06, "loss": 0.6285, "step": 2284 }, { "epoch": 0.7730040595399188, "grad_norm": 0.30466142296791077, "learning_rate": 4.2721951501853544e-06, "loss": 0.6275, "step": 2285 }, { "epoch": 0.7733423545331529, "grad_norm": 0.30216631293296814, "learning_rate": 4.271563232478606e-06, "loss": 0.6071, "step": 2286 }, { "epoch": 0.773680649526387, "grad_norm": 0.33934286236763, "learning_rate": 4.270931087337897e-06, "loss": 0.6108, "step": 2287 }, { "epoch": 0.774018944519621, "grad_norm": 0.31253665685653687, "learning_rate": 4.270298714844381e-06, "loss": 0.6435, "step": 2288 }, { "epoch": 0.7743572395128552, "grad_norm": 0.292512983083725, "learning_rate": 4.269666115079242e-06, "loss": 0.6193, "step": 2289 }, { "epoch": 0.7746955345060893, "grad_norm": 0.32233932614326477, "learning_rate": 4.269033288123694e-06, "loss": 0.6033, "step": 2290 }, { "epoch": 0.7750338294993234, "grad_norm": 0.3083772361278534, "learning_rate": 4.268400234058979e-06, "loss": 0.6366, "step": 2291 }, { "epoch": 0.7753721244925575, "grad_norm": 0.30060258507728577, "learning_rate": 4.267766952966369e-06, "loss": 0.5928, "step": 2292 }, { "epoch": 0.7757104194857916, "grad_norm": 0.32433411478996277, "learning_rate": 4.267133444927166e-06, "loss": 0.6308, "step": 2293 }, { "epoch": 0.7760487144790257, "grad_norm": 0.29127776622772217, "learning_rate": 4.266499710022698e-06, "loss": 0.5992, "step": 2294 }, { "epoch": 0.7763870094722598, "grad_norm": 0.30229055881500244, "learning_rate": 4.2658657483343255e-06, "loss": 0.6312, "step": 2295 }, { "epoch": 0.7767253044654939, "grad_norm": 0.31788191199302673, "learning_rate": 4.2652315599434355e-06, "loss": 0.628, "step": 2296 }, { "epoch": 0.777063599458728, "grad_norm": 0.3147873282432556, "learning_rate": 4.264597144931448e-06, "loss": 0.6439, "step": 2297 }, { "epoch": 0.7774018944519621, "grad_norm": 0.3166278302669525, "learning_rate": 4.263962503379807e-06, "loss": 0.6432, "step": 2298 }, { "epoch": 0.7777401894451962, "grad_norm": 0.3072454035282135, "learning_rate": 4.263327635369988e-06, "loss": 0.6057, "step": 2299 }, { "epoch": 0.7780784844384303, "grad_norm": 0.3119280934333801, "learning_rate": 4.262692540983496e-06, "loss": 0.6412, "step": 2300 }, { "epoch": 0.7784167794316644, "grad_norm": 0.30572327971458435, "learning_rate": 4.262057220301866e-06, "loss": 0.6125, "step": 2301 }, { "epoch": 0.7787550744248986, "grad_norm": 0.30425435304641724, "learning_rate": 4.261421673406659e-06, "loss": 0.6278, "step": 2302 }, { "epoch": 0.7790933694181326, "grad_norm": 0.2952420711517334, "learning_rate": 4.260785900379468e-06, "loss": 0.6134, "step": 2303 }, { "epoch": 0.7794316644113667, "grad_norm": 0.3194384276866913, "learning_rate": 4.260149901301913e-06, "loss": 0.6092, "step": 2304 }, { "epoch": 0.7797699594046008, "grad_norm": 0.3062346279621124, "learning_rate": 4.2595136762556435e-06, "loss": 0.6025, "step": 2305 }, { "epoch": 0.7801082543978349, "grad_norm": 0.3019559979438782, "learning_rate": 4.258877225322339e-06, "loss": 0.5973, "step": 2306 }, { "epoch": 0.780446549391069, "grad_norm": 0.2972407341003418, "learning_rate": 4.258240548583707e-06, "loss": 0.6136, "step": 2307 }, { "epoch": 0.7807848443843031, "grad_norm": 0.2973707914352417, "learning_rate": 4.2576036461214844e-06, "loss": 0.6184, "step": 2308 }, { "epoch": 0.7811231393775372, "grad_norm": 0.30419808626174927, "learning_rate": 4.256966518017437e-06, "loss": 0.6301, "step": 2309 }, { "epoch": 0.7814614343707713, "grad_norm": 0.2952224612236023, "learning_rate": 4.25632916435336e-06, "loss": 0.6127, "step": 2310 }, { "epoch": 0.7817997293640054, "grad_norm": 0.3098551630973816, "learning_rate": 4.2556915852110756e-06, "loss": 0.6042, "step": 2311 }, { "epoch": 0.7821380243572396, "grad_norm": 0.31795603036880493, "learning_rate": 4.255053780672438e-06, "loss": 0.6115, "step": 2312 }, { "epoch": 0.7824763193504736, "grad_norm": 0.29997360706329346, "learning_rate": 4.254415750819328e-06, "loss": 0.6205, "step": 2313 }, { "epoch": 0.7828146143437077, "grad_norm": 0.31599006056785583, "learning_rate": 4.253777495733658e-06, "loss": 0.628, "step": 2314 }, { "epoch": 0.7831529093369418, "grad_norm": 0.30331894755363464, "learning_rate": 4.253139015497364e-06, "loss": 0.635, "step": 2315 }, { "epoch": 0.7834912043301759, "grad_norm": 0.2977330684661865, "learning_rate": 4.252500310192417e-06, "loss": 0.6199, "step": 2316 }, { "epoch": 0.7838294993234101, "grad_norm": 0.2929128110408783, "learning_rate": 4.251861379900813e-06, "loss": 0.6375, "step": 2317 }, { "epoch": 0.7841677943166441, "grad_norm": 0.2989046573638916, "learning_rate": 4.251222224704578e-06, "loss": 0.6071, "step": 2318 }, { "epoch": 0.7845060893098782, "grad_norm": 0.28972285985946655, "learning_rate": 4.2505828446857685e-06, "loss": 0.6161, "step": 2319 }, { "epoch": 0.7848443843031123, "grad_norm": 0.3031672239303589, "learning_rate": 4.249943239926467e-06, "loss": 0.6261, "step": 2320 }, { "epoch": 0.7851826792963464, "grad_norm": 0.30067288875579834, "learning_rate": 4.249303410508787e-06, "loss": 0.6453, "step": 2321 }, { "epoch": 0.7855209742895806, "grad_norm": 0.2904297113418579, "learning_rate": 4.248663356514869e-06, "loss": 0.6202, "step": 2322 }, { "epoch": 0.7858592692828146, "grad_norm": 0.3080545663833618, "learning_rate": 4.248023078026884e-06, "loss": 0.6249, "step": 2323 }, { "epoch": 0.7861975642760487, "grad_norm": 0.29530856013298035, "learning_rate": 4.247382575127031e-06, "loss": 0.6149, "step": 2324 }, { "epoch": 0.7865358592692828, "grad_norm": 0.30119362473487854, "learning_rate": 4.246741847897538e-06, "loss": 0.609, "step": 2325 }, { "epoch": 0.786874154262517, "grad_norm": 0.32412683963775635, "learning_rate": 4.2461008964206626e-06, "loss": 0.6127, "step": 2326 }, { "epoch": 0.7872124492557511, "grad_norm": 0.298534095287323, "learning_rate": 4.24545972077869e-06, "loss": 0.6073, "step": 2327 }, { "epoch": 0.7875507442489851, "grad_norm": 0.30755627155303955, "learning_rate": 4.244818321053933e-06, "loss": 0.6207, "step": 2328 }, { "epoch": 0.7878890392422192, "grad_norm": 0.30254215002059937, "learning_rate": 4.244176697328738e-06, "loss": 0.6297, "step": 2329 }, { "epoch": 0.7882273342354533, "grad_norm": 0.3068731427192688, "learning_rate": 4.243534849685473e-06, "loss": 0.6512, "step": 2330 }, { "epoch": 0.7885656292286874, "grad_norm": 0.3131106197834015, "learning_rate": 4.242892778206542e-06, "loss": 0.628, "step": 2331 }, { "epoch": 0.7889039242219216, "grad_norm": 0.2996361255645752, "learning_rate": 4.242250482974373e-06, "loss": 0.5914, "step": 2332 }, { "epoch": 0.7892422192151556, "grad_norm": 0.3127598762512207, "learning_rate": 4.241607964071423e-06, "loss": 0.6097, "step": 2333 }, { "epoch": 0.7895805142083897, "grad_norm": 0.3156915605068207, "learning_rate": 4.240965221580181e-06, "loss": 0.625, "step": 2334 }, { "epoch": 0.7899188092016238, "grad_norm": 0.2918887138366699, "learning_rate": 4.24032225558316e-06, "loss": 0.6121, "step": 2335 }, { "epoch": 0.790257104194858, "grad_norm": 0.3149941563606262, "learning_rate": 4.239679066162908e-06, "loss": 0.583, "step": 2336 }, { "epoch": 0.790595399188092, "grad_norm": 0.3043084740638733, "learning_rate": 4.239035653401994e-06, "loss": 0.5879, "step": 2337 }, { "epoch": 0.7909336941813261, "grad_norm": 0.29296600818634033, "learning_rate": 4.238392017383021e-06, "loss": 0.5865, "step": 2338 }, { "epoch": 0.7912719891745602, "grad_norm": 0.3000551164150238, "learning_rate": 4.2377481581886195e-06, "loss": 0.6241, "step": 2339 }, { "epoch": 0.7916102841677943, "grad_norm": 0.3253296911716461, "learning_rate": 4.237104075901449e-06, "loss": 0.6208, "step": 2340 }, { "epoch": 0.7919485791610285, "grad_norm": 0.3103972375392914, "learning_rate": 4.236459770604196e-06, "loss": 0.6126, "step": 2341 }, { "epoch": 0.7922868741542625, "grad_norm": 0.29499924182891846, "learning_rate": 4.235815242379577e-06, "loss": 0.5787, "step": 2342 }, { "epoch": 0.7926251691474966, "grad_norm": 0.3240320086479187, "learning_rate": 4.235170491310336e-06, "loss": 0.6374, "step": 2343 }, { "epoch": 0.7929634641407307, "grad_norm": 0.3204994797706604, "learning_rate": 4.234525517479248e-06, "loss": 0.5895, "step": 2344 }, { "epoch": 0.7933017591339648, "grad_norm": 0.30940183997154236, "learning_rate": 4.233880320969114e-06, "loss": 0.6014, "step": 2345 }, { "epoch": 0.793640054127199, "grad_norm": 0.30504274368286133, "learning_rate": 4.233234901862765e-06, "loss": 0.6476, "step": 2346 }, { "epoch": 0.793978349120433, "grad_norm": 0.31054219603538513, "learning_rate": 4.232589260243058e-06, "loss": 0.645, "step": 2347 }, { "epoch": 0.7943166441136671, "grad_norm": 0.34609827399253845, "learning_rate": 4.231943396192885e-06, "loss": 0.6053, "step": 2348 }, { "epoch": 0.7946549391069012, "grad_norm": 0.32586246728897095, "learning_rate": 4.231297309795158e-06, "loss": 0.6133, "step": 2349 }, { "epoch": 0.7949932341001353, "grad_norm": 0.3187881112098694, "learning_rate": 4.2306510011328245e-06, "loss": 0.6034, "step": 2350 }, { "epoch": 0.7953315290933695, "grad_norm": 0.3788541257381439, "learning_rate": 4.230004470288857e-06, "loss": 0.5957, "step": 2351 }, { "epoch": 0.7956698240866035, "grad_norm": 0.3172380328178406, "learning_rate": 4.229357717346257e-06, "loss": 0.626, "step": 2352 }, { "epoch": 0.7960081190798376, "grad_norm": 0.3106147348880768, "learning_rate": 4.2287107423880555e-06, "loss": 0.6233, "step": 2353 }, { "epoch": 0.7963464140730717, "grad_norm": 0.32800528407096863, "learning_rate": 4.228063545497312e-06, "loss": 0.6105, "step": 2354 }, { "epoch": 0.7966847090663058, "grad_norm": 0.3131791353225708, "learning_rate": 4.227416126757112e-06, "loss": 0.5973, "step": 2355 }, { "epoch": 0.79702300405954, "grad_norm": 0.31193608045578003, "learning_rate": 4.226768486250573e-06, "loss": 0.6071, "step": 2356 }, { "epoch": 0.797361299052774, "grad_norm": 0.3071676194667816, "learning_rate": 4.226120624060838e-06, "loss": 0.6198, "step": 2357 }, { "epoch": 0.7976995940460081, "grad_norm": 0.3355953097343445, "learning_rate": 4.2254725402710806e-06, "loss": 0.6179, "step": 2358 }, { "epoch": 0.7980378890392422, "grad_norm": 0.297477662563324, "learning_rate": 4.224824234964502e-06, "loss": 0.6068, "step": 2359 }, { "epoch": 0.7983761840324763, "grad_norm": 0.3031257092952728, "learning_rate": 4.224175708224332e-06, "loss": 0.6184, "step": 2360 }, { "epoch": 0.7987144790257105, "grad_norm": 0.3067512810230255, "learning_rate": 4.223526960133829e-06, "loss": 0.6228, "step": 2361 }, { "epoch": 0.7990527740189445, "grad_norm": 0.32556161284446716, "learning_rate": 4.222877990776278e-06, "loss": 0.6174, "step": 2362 }, { "epoch": 0.7993910690121786, "grad_norm": 0.3196711838245392, "learning_rate": 4.222228800234997e-06, "loss": 0.6003, "step": 2363 }, { "epoch": 0.7997293640054127, "grad_norm": 0.30310988426208496, "learning_rate": 4.221579388593326e-06, "loss": 0.6244, "step": 2364 }, { "epoch": 0.8000676589986468, "grad_norm": 0.30188992619514465, "learning_rate": 4.220929755934638e-06, "loss": 0.6086, "step": 2365 }, { "epoch": 0.800405953991881, "grad_norm": 0.2987558841705322, "learning_rate": 4.220279902342334e-06, "loss": 0.6066, "step": 2366 }, { "epoch": 0.800744248985115, "grad_norm": 0.31522136926651, "learning_rate": 4.2196298278998405e-06, "loss": 0.6217, "step": 2367 }, { "epoch": 0.8010825439783491, "grad_norm": 0.3034696877002716, "learning_rate": 4.218979532690616e-06, "loss": 0.6142, "step": 2368 }, { "epoch": 0.8014208389715832, "grad_norm": 0.3127799332141876, "learning_rate": 4.218329016798145e-06, "loss": 0.6414, "step": 2369 }, { "epoch": 0.8017591339648173, "grad_norm": 0.3001839816570282, "learning_rate": 4.217678280305941e-06, "loss": 0.6126, "step": 2370 }, { "epoch": 0.8020974289580515, "grad_norm": 0.3111492395401001, "learning_rate": 4.2170273232975455e-06, "loss": 0.6222, "step": 2371 }, { "epoch": 0.8024357239512855, "grad_norm": 0.3090807795524597, "learning_rate": 4.216376145856529e-06, "loss": 0.63, "step": 2372 }, { "epoch": 0.8027740189445196, "grad_norm": 0.30586564540863037, "learning_rate": 4.215724748066491e-06, "loss": 0.5959, "step": 2373 }, { "epoch": 0.8031123139377537, "grad_norm": 0.30838629603385925, "learning_rate": 4.215073130011056e-06, "loss": 0.6192, "step": 2374 }, { "epoch": 0.8034506089309879, "grad_norm": 0.29361045360565186, "learning_rate": 4.21442129177388e-06, "loss": 0.5953, "step": 2375 }, { "epoch": 0.803788903924222, "grad_norm": 0.29793581366539, "learning_rate": 4.213769233438647e-06, "loss": 0.6129, "step": 2376 }, { "epoch": 0.804127198917456, "grad_norm": 0.3093686103820801, "learning_rate": 4.213116955089066e-06, "loss": 0.6105, "step": 2377 }, { "epoch": 0.8044654939106901, "grad_norm": 0.3063512444496155, "learning_rate": 4.21246445680888e-06, "loss": 0.5978, "step": 2378 }, { "epoch": 0.8048037889039242, "grad_norm": 0.31297993659973145, "learning_rate": 4.211811738681855e-06, "loss": 0.6156, "step": 2379 }, { "epoch": 0.8051420838971584, "grad_norm": 0.31399765610694885, "learning_rate": 4.211158800791788e-06, "loss": 0.6097, "step": 2380 }, { "epoch": 0.8054803788903924, "grad_norm": 0.2975424528121948, "learning_rate": 4.210505643222504e-06, "loss": 0.6481, "step": 2381 }, { "epoch": 0.8058186738836265, "grad_norm": 0.3167499303817749, "learning_rate": 4.209852266057853e-06, "loss": 0.6228, "step": 2382 }, { "epoch": 0.8061569688768606, "grad_norm": 0.31156760454177856, "learning_rate": 4.209198669381718e-06, "loss": 0.6102, "step": 2383 }, { "epoch": 0.8064952638700947, "grad_norm": 0.29924094676971436, "learning_rate": 4.208544853278008e-06, "loss": 0.5892, "step": 2384 }, { "epoch": 0.8068335588633289, "grad_norm": 0.30753546953201294, "learning_rate": 4.207890817830659e-06, "loss": 0.6128, "step": 2385 }, { "epoch": 0.8071718538565629, "grad_norm": 0.3182028532028198, "learning_rate": 4.207236563123638e-06, "loss": 0.6134, "step": 2386 }, { "epoch": 0.807510148849797, "grad_norm": 0.3230728507041931, "learning_rate": 4.206582089240936e-06, "loss": 0.6068, "step": 2387 }, { "epoch": 0.8078484438430311, "grad_norm": 0.3081299662590027, "learning_rate": 4.205927396266577e-06, "loss": 0.6069, "step": 2388 }, { "epoch": 0.8081867388362652, "grad_norm": 0.3078051507472992, "learning_rate": 4.20527248428461e-06, "loss": 0.6207, "step": 2389 }, { "epoch": 0.8085250338294994, "grad_norm": 0.30221062898635864, "learning_rate": 4.2046173533791126e-06, "loss": 0.6069, "step": 2390 }, { "epoch": 0.8088633288227334, "grad_norm": 0.3207438886165619, "learning_rate": 4.203962003634191e-06, "loss": 0.627, "step": 2391 }, { "epoch": 0.8092016238159675, "grad_norm": 0.30132848024368286, "learning_rate": 4.203306435133978e-06, "loss": 0.6253, "step": 2392 }, { "epoch": 0.8095399188092016, "grad_norm": 0.3093264102935791, "learning_rate": 4.2026506479626375e-06, "loss": 0.6189, "step": 2393 }, { "epoch": 0.8098782138024357, "grad_norm": 0.30848339200019836, "learning_rate": 4.201994642204358e-06, "loss": 0.6296, "step": 2394 }, { "epoch": 0.8102165087956699, "grad_norm": 0.3058469295501709, "learning_rate": 4.20133841794336e-06, "loss": 0.5895, "step": 2395 }, { "epoch": 0.8105548037889039, "grad_norm": 0.313781201839447, "learning_rate": 4.200681975263889e-06, "loss": 0.632, "step": 2396 }, { "epoch": 0.810893098782138, "grad_norm": 0.3138314187526703, "learning_rate": 4.2000253142502175e-06, "loss": 0.6164, "step": 2397 }, { "epoch": 0.8112313937753721, "grad_norm": 0.31005388498306274, "learning_rate": 4.199368434986651e-06, "loss": 0.6143, "step": 2398 }, { "epoch": 0.8115696887686062, "grad_norm": 0.29906076192855835, "learning_rate": 4.198711337557516e-06, "loss": 0.6004, "step": 2399 }, { "epoch": 0.8119079837618404, "grad_norm": 0.301923543214798, "learning_rate": 4.198054022047175e-06, "loss": 0.6091, "step": 2400 }, { "epoch": 0.8122462787550744, "grad_norm": 0.3076381981372833, "learning_rate": 4.197396488540011e-06, "loss": 0.5867, "step": 2401 }, { "epoch": 0.8125845737483085, "grad_norm": 0.3071296811103821, "learning_rate": 4.1967387371204415e-06, "loss": 0.5892, "step": 2402 }, { "epoch": 0.8129228687415426, "grad_norm": 0.30223989486694336, "learning_rate": 4.196080767872907e-06, "loss": 0.609, "step": 2403 }, { "epoch": 0.8132611637347767, "grad_norm": 0.2886689007282257, "learning_rate": 4.195422580881878e-06, "loss": 0.5915, "step": 2404 }, { "epoch": 0.8135994587280109, "grad_norm": 0.2973400354385376, "learning_rate": 4.194764176231852e-06, "loss": 0.6031, "step": 2405 }, { "epoch": 0.8139377537212449, "grad_norm": 0.3109757900238037, "learning_rate": 4.194105554007358e-06, "loss": 0.6461, "step": 2406 }, { "epoch": 0.814276048714479, "grad_norm": 0.32744237780570984, "learning_rate": 4.193446714292948e-06, "loss": 0.6348, "step": 2407 }, { "epoch": 0.8146143437077131, "grad_norm": 0.3049798309803009, "learning_rate": 4.192787657173204e-06, "loss": 0.6183, "step": 2408 }, { "epoch": 0.8149526387009473, "grad_norm": 0.30525657534599304, "learning_rate": 4.192128382732737e-06, "loss": 0.5968, "step": 2409 }, { "epoch": 0.8152909336941814, "grad_norm": 0.31621846556663513, "learning_rate": 4.191468891056184e-06, "loss": 0.6138, "step": 2410 }, { "epoch": 0.8156292286874154, "grad_norm": 0.2994174659252167, "learning_rate": 4.19080918222821e-06, "loss": 0.6164, "step": 2411 }, { "epoch": 0.8159675236806495, "grad_norm": 0.3103436529636383, "learning_rate": 4.1901492563335115e-06, "loss": 0.6322, "step": 2412 }, { "epoch": 0.8163058186738836, "grad_norm": 0.3064984381198883, "learning_rate": 4.189489113456808e-06, "loss": 0.5964, "step": 2413 }, { "epoch": 0.8166441136671178, "grad_norm": 0.30544745922088623, "learning_rate": 4.18882875368285e-06, "loss": 0.616, "step": 2414 }, { "epoch": 0.8169824086603519, "grad_norm": 0.31149283051490784, "learning_rate": 4.188168177096414e-06, "loss": 0.6021, "step": 2415 }, { "epoch": 0.8173207036535859, "grad_norm": 0.290450781583786, "learning_rate": 4.187507383782303e-06, "loss": 0.5883, "step": 2416 }, { "epoch": 0.81765899864682, "grad_norm": 0.31284961104393005, "learning_rate": 4.186846373825354e-06, "loss": 0.6312, "step": 2417 }, { "epoch": 0.8179972936400541, "grad_norm": 0.31574201583862305, "learning_rate": 4.186185147310425e-06, "loss": 0.6195, "step": 2418 }, { "epoch": 0.8183355886332883, "grad_norm": 0.30727875232696533, "learning_rate": 4.185523704322406e-06, "loss": 0.6322, "step": 2419 }, { "epoch": 0.8186738836265224, "grad_norm": 0.30774906277656555, "learning_rate": 4.1848620449462115e-06, "loss": 0.585, "step": 2420 }, { "epoch": 0.8190121786197564, "grad_norm": 0.3038913905620575, "learning_rate": 4.184200169266786e-06, "loss": 0.6191, "step": 2421 }, { "epoch": 0.8193504736129905, "grad_norm": 0.3066258430480957, "learning_rate": 4.183538077369104e-06, "loss": 0.6158, "step": 2422 }, { "epoch": 0.8196887686062246, "grad_norm": 0.3115321695804596, "learning_rate": 4.1828757693381625e-06, "loss": 0.6229, "step": 2423 }, { "epoch": 0.8200270635994588, "grad_norm": 0.29962000250816345, "learning_rate": 4.182213245258989e-06, "loss": 0.6226, "step": 2424 }, { "epoch": 0.8203653585926928, "grad_norm": 0.2973437011241913, "learning_rate": 4.181550505216639e-06, "loss": 0.6197, "step": 2425 }, { "epoch": 0.8207036535859269, "grad_norm": 0.31722259521484375, "learning_rate": 4.180887549296195e-06, "loss": 0.6128, "step": 2426 }, { "epoch": 0.821041948579161, "grad_norm": 0.2999642789363861, "learning_rate": 4.180224377582769e-06, "loss": 0.6114, "step": 2427 }, { "epoch": 0.8213802435723951, "grad_norm": 0.30792975425720215, "learning_rate": 4.179560990161496e-06, "loss": 0.59, "step": 2428 }, { "epoch": 0.8217185385656293, "grad_norm": 0.30365827679634094, "learning_rate": 4.178897387117547e-06, "loss": 0.6202, "step": 2429 }, { "epoch": 0.8220568335588633, "grad_norm": 0.29755693674087524, "learning_rate": 4.178233568536111e-06, "loss": 0.5968, "step": 2430 }, { "epoch": 0.8223951285520974, "grad_norm": 0.31009364128112793, "learning_rate": 4.17756953450241e-06, "loss": 0.616, "step": 2431 }, { "epoch": 0.8227334235453315, "grad_norm": 0.3053271770477295, "learning_rate": 4.176905285101696e-06, "loss": 0.5933, "step": 2432 }, { "epoch": 0.8230717185385656, "grad_norm": 0.3323591947555542, "learning_rate": 4.176240820419242e-06, "loss": 0.6047, "step": 2433 }, { "epoch": 0.8234100135317998, "grad_norm": 0.31013503670692444, "learning_rate": 4.175576140540354e-06, "loss": 0.6263, "step": 2434 }, { "epoch": 0.8237483085250338, "grad_norm": 0.30118370056152344, "learning_rate": 4.1749112455503635e-06, "loss": 0.6161, "step": 2435 }, { "epoch": 0.8240866035182679, "grad_norm": 0.29885560274124146, "learning_rate": 4.17424613553463e-06, "loss": 0.6328, "step": 2436 }, { "epoch": 0.824424898511502, "grad_norm": 0.30362245440483093, "learning_rate": 4.173580810578541e-06, "loss": 0.6067, "step": 2437 }, { "epoch": 0.8247631935047361, "grad_norm": 0.3012903928756714, "learning_rate": 4.17291527076751e-06, "loss": 0.609, "step": 2438 }, { "epoch": 0.8251014884979703, "grad_norm": 0.29213324189186096, "learning_rate": 4.172249516186982e-06, "loss": 0.6196, "step": 2439 }, { "epoch": 0.8254397834912043, "grad_norm": 0.3087073862552643, "learning_rate": 4.171583546922423e-06, "loss": 0.6174, "step": 2440 }, { "epoch": 0.8257780784844384, "grad_norm": 0.30654779076576233, "learning_rate": 4.170917363059333e-06, "loss": 0.5877, "step": 2441 }, { "epoch": 0.8261163734776725, "grad_norm": 0.2954830229282379, "learning_rate": 4.1702509646832355e-06, "loss": 0.6132, "step": 2442 }, { "epoch": 0.8264546684709067, "grad_norm": 0.2958783507347107, "learning_rate": 4.169584351879684e-06, "loss": 0.6359, "step": 2443 }, { "epoch": 0.8267929634641408, "grad_norm": 0.31136995553970337, "learning_rate": 4.168917524734259e-06, "loss": 0.633, "step": 2444 }, { "epoch": 0.8271312584573748, "grad_norm": 0.2963434159755707, "learning_rate": 4.168250483332565e-06, "loss": 0.6307, "step": 2445 }, { "epoch": 0.8274695534506089, "grad_norm": 0.3102148473262787, "learning_rate": 4.167583227760242e-06, "loss": 0.627, "step": 2446 }, { "epoch": 0.827807848443843, "grad_norm": 0.3160640001296997, "learning_rate": 4.166915758102947e-06, "loss": 0.6167, "step": 2447 }, { "epoch": 0.8281461434370772, "grad_norm": 0.3037266433238983, "learning_rate": 4.166248074446374e-06, "loss": 0.623, "step": 2448 }, { "epoch": 0.8284844384303113, "grad_norm": 0.30871710181236267, "learning_rate": 4.16558017687624e-06, "loss": 0.6234, "step": 2449 }, { "epoch": 0.8288227334235453, "grad_norm": 0.3070647120475769, "learning_rate": 4.164912065478288e-06, "loss": 0.5829, "step": 2450 }, { "epoch": 0.8291610284167794, "grad_norm": 0.3087005615234375, "learning_rate": 4.164243740338293e-06, "loss": 0.6116, "step": 2451 }, { "epoch": 0.8294993234100135, "grad_norm": 0.3228028416633606, "learning_rate": 4.163575201542052e-06, "loss": 0.6041, "step": 2452 }, { "epoch": 0.8298376184032477, "grad_norm": 0.31015029549598694, "learning_rate": 4.162906449175395e-06, "loss": 0.6173, "step": 2453 }, { "epoch": 0.8301759133964818, "grad_norm": 0.31407010555267334, "learning_rate": 4.162237483324174e-06, "loss": 0.6049, "step": 2454 }, { "epoch": 0.8305142083897158, "grad_norm": 0.30077072978019714, "learning_rate": 4.161568304074272e-06, "loss": 0.628, "step": 2455 }, { "epoch": 0.8308525033829499, "grad_norm": 0.31255874037742615, "learning_rate": 4.1608989115116005e-06, "loss": 0.5983, "step": 2456 }, { "epoch": 0.831190798376184, "grad_norm": 0.33020806312561035, "learning_rate": 4.160229305722094e-06, "loss": 0.5942, "step": 2457 }, { "epoch": 0.8315290933694182, "grad_norm": 0.31188005208969116, "learning_rate": 4.159559486791718e-06, "loss": 0.6375, "step": 2458 }, { "epoch": 0.8318673883626523, "grad_norm": 0.3300791084766388, "learning_rate": 4.158889454806464e-06, "loss": 0.5908, "step": 2459 }, { "epoch": 0.8322056833558863, "grad_norm": 0.3007451295852661, "learning_rate": 4.15821920985235e-06, "loss": 0.6051, "step": 2460 }, { "epoch": 0.8325439783491204, "grad_norm": 0.308019757270813, "learning_rate": 4.157548752015422e-06, "loss": 0.5893, "step": 2461 }, { "epoch": 0.8328822733423545, "grad_norm": 0.3098035454750061, "learning_rate": 4.156878081381756e-06, "loss": 0.6111, "step": 2462 }, { "epoch": 0.8332205683355887, "grad_norm": 0.2945764660835266, "learning_rate": 4.156207198037451e-06, "loss": 0.6372, "step": 2463 }, { "epoch": 0.8335588633288228, "grad_norm": 0.3222520053386688, "learning_rate": 4.155536102068636e-06, "loss": 0.6387, "step": 2464 }, { "epoch": 0.8338971583220568, "grad_norm": 0.3068825900554657, "learning_rate": 4.154864793561465e-06, "loss": 0.6057, "step": 2465 }, { "epoch": 0.8342354533152909, "grad_norm": 0.32351332902908325, "learning_rate": 4.154193272602124e-06, "loss": 0.6065, "step": 2466 }, { "epoch": 0.834573748308525, "grad_norm": 0.30833709239959717, "learning_rate": 4.15352153927682e-06, "loss": 0.5877, "step": 2467 }, { "epoch": 0.8349120433017592, "grad_norm": 0.3007957935333252, "learning_rate": 4.152849593671793e-06, "loss": 0.5955, "step": 2468 }, { "epoch": 0.8352503382949933, "grad_norm": 0.31723707914352417, "learning_rate": 4.152177435873305e-06, "loss": 0.64, "step": 2469 }, { "epoch": 0.8355886332882273, "grad_norm": 0.29329389333724976, "learning_rate": 4.15150506596765e-06, "loss": 0.6218, "step": 2470 }, { "epoch": 0.8359269282814614, "grad_norm": 0.31001290678977966, "learning_rate": 4.150832484041145e-06, "loss": 0.6336, "step": 2471 }, { "epoch": 0.8362652232746955, "grad_norm": 0.30639132857322693, "learning_rate": 4.150159690180139e-06, "loss": 0.6338, "step": 2472 }, { "epoch": 0.8366035182679297, "grad_norm": 0.3088569641113281, "learning_rate": 4.149486684471004e-06, "loss": 0.6046, "step": 2473 }, { "epoch": 0.8369418132611637, "grad_norm": 0.3085624873638153, "learning_rate": 4.1488134670001404e-06, "loss": 0.6125, "step": 2474 }, { "epoch": 0.8372801082543978, "grad_norm": 0.294872909784317, "learning_rate": 4.148140037853976e-06, "loss": 0.606, "step": 2475 }, { "epoch": 0.8376184032476319, "grad_norm": 0.29635724425315857, "learning_rate": 4.147466397118968e-06, "loss": 0.6239, "step": 2476 }, { "epoch": 0.837956698240866, "grad_norm": 0.31449973583221436, "learning_rate": 4.146792544881595e-06, "loss": 0.6107, "step": 2477 }, { "epoch": 0.8382949932341002, "grad_norm": 0.31982365250587463, "learning_rate": 4.146118481228371e-06, "loss": 0.6202, "step": 2478 }, { "epoch": 0.8386332882273342, "grad_norm": 0.30596596002578735, "learning_rate": 4.145444206245828e-06, "loss": 0.6054, "step": 2479 }, { "epoch": 0.8389715832205683, "grad_norm": 0.2982724905014038, "learning_rate": 4.144769720020533e-06, "loss": 0.5951, "step": 2480 }, { "epoch": 0.8393098782138024, "grad_norm": 0.29678595066070557, "learning_rate": 4.144095022639075e-06, "loss": 0.6166, "step": 2481 }, { "epoch": 0.8396481732070366, "grad_norm": 0.3216749131679535, "learning_rate": 4.1434201141880726e-06, "loss": 0.6016, "step": 2482 }, { "epoch": 0.8399864682002707, "grad_norm": 0.31580859422683716, "learning_rate": 4.142744994754171e-06, "loss": 0.5961, "step": 2483 }, { "epoch": 0.8403247631935047, "grad_norm": 0.3653385043144226, "learning_rate": 4.142069664424041e-06, "loss": 0.6222, "step": 2484 }, { "epoch": 0.8406630581867388, "grad_norm": 0.29260021448135376, "learning_rate": 4.141394123284383e-06, "loss": 0.6045, "step": 2485 }, { "epoch": 0.8410013531799729, "grad_norm": 0.3153046667575836, "learning_rate": 4.140718371421923e-06, "loss": 0.609, "step": 2486 }, { "epoch": 0.841339648173207, "grad_norm": 0.3311583697795868, "learning_rate": 4.140042408923415e-06, "loss": 0.5924, "step": 2487 }, { "epoch": 0.8416779431664412, "grad_norm": 0.31460800766944885, "learning_rate": 4.139366235875637e-06, "loss": 0.6028, "step": 2488 }, { "epoch": 0.8420162381596752, "grad_norm": 0.28972992300987244, "learning_rate": 4.1386898523654e-06, "loss": 0.608, "step": 2489 }, { "epoch": 0.8423545331529093, "grad_norm": 0.31510454416275024, "learning_rate": 4.138013258479535e-06, "loss": 0.6159, "step": 2490 }, { "epoch": 0.8426928281461434, "grad_norm": 0.2981686592102051, "learning_rate": 4.137336454304904e-06, "loss": 0.6174, "step": 2491 }, { "epoch": 0.8430311231393776, "grad_norm": 0.3189791738986969, "learning_rate": 4.136659439928397e-06, "loss": 0.6039, "step": 2492 }, { "epoch": 0.8433694181326117, "grad_norm": 0.31273263692855835, "learning_rate": 4.135982215436928e-06, "loss": 0.6068, "step": 2493 }, { "epoch": 0.8437077131258457, "grad_norm": 0.3063289523124695, "learning_rate": 4.135304780917439e-06, "loss": 0.609, "step": 2494 }, { "epoch": 0.8440460081190798, "grad_norm": 0.3077285885810852, "learning_rate": 4.134627136456902e-06, "loss": 0.619, "step": 2495 }, { "epoch": 0.8443843031123139, "grad_norm": 0.303930401802063, "learning_rate": 4.13394928214231e-06, "loss": 0.6134, "step": 2496 }, { "epoch": 0.8447225981055481, "grad_norm": 0.31286007165908813, "learning_rate": 4.1332712180606885e-06, "loss": 0.6041, "step": 2497 }, { "epoch": 0.8450608930987822, "grad_norm": 0.3026021420955658, "learning_rate": 4.1325929442990855e-06, "loss": 0.6058, "step": 2498 }, { "epoch": 0.8453991880920162, "grad_norm": 0.30549052357673645, "learning_rate": 4.131914460944579e-06, "loss": 0.6017, "step": 2499 }, { "epoch": 0.8457374830852503, "grad_norm": 0.3061017394065857, "learning_rate": 4.1312357680842735e-06, "loss": 0.581, "step": 2500 }, { "epoch": 0.8460757780784844, "grad_norm": 0.3075471818447113, "learning_rate": 4.1305568658053e-06, "loss": 0.6344, "step": 2501 }, { "epoch": 0.8464140730717186, "grad_norm": 0.29023295640945435, "learning_rate": 4.129877754194815e-06, "loss": 0.6298, "step": 2502 }, { "epoch": 0.8467523680649527, "grad_norm": 0.29860207438468933, "learning_rate": 4.129198433340004e-06, "loss": 0.6224, "step": 2503 }, { "epoch": 0.8470906630581867, "grad_norm": 0.29904088377952576, "learning_rate": 4.128518903328078e-06, "loss": 0.6118, "step": 2504 }, { "epoch": 0.8474289580514208, "grad_norm": 0.3165917694568634, "learning_rate": 4.1278391642462755e-06, "loss": 0.5659, "step": 2505 }, { "epoch": 0.847767253044655, "grad_norm": 0.30058714747428894, "learning_rate": 4.127159216181862e-06, "loss": 0.642, "step": 2506 }, { "epoch": 0.8481055480378891, "grad_norm": 0.31144291162490845, "learning_rate": 4.126479059222129e-06, "loss": 0.6078, "step": 2507 }, { "epoch": 0.8484438430311232, "grad_norm": 0.2997061610221863, "learning_rate": 4.125798693454396e-06, "loss": 0.6131, "step": 2508 }, { "epoch": 0.8487821380243572, "grad_norm": 0.29972556233406067, "learning_rate": 4.125118118966008e-06, "loss": 0.6195, "step": 2509 }, { "epoch": 0.8491204330175913, "grad_norm": 0.30682137608528137, "learning_rate": 4.124437335844337e-06, "loss": 0.6238, "step": 2510 }, { "epoch": 0.8494587280108254, "grad_norm": 0.3116617202758789, "learning_rate": 4.123756344176783e-06, "loss": 0.6114, "step": 2511 }, { "epoch": 0.8497970230040596, "grad_norm": 0.3055778443813324, "learning_rate": 4.123075144050772e-06, "loss": 0.5937, "step": 2512 }, { "epoch": 0.8501353179972937, "grad_norm": 0.31854158639907837, "learning_rate": 4.122393735553757e-06, "loss": 0.6331, "step": 2513 }, { "epoch": 0.8504736129905277, "grad_norm": 0.3081203103065491, "learning_rate": 4.121712118773216e-06, "loss": 0.6282, "step": 2514 }, { "epoch": 0.8508119079837618, "grad_norm": 0.31456461548805237, "learning_rate": 4.121030293796656e-06, "loss": 0.6218, "step": 2515 }, { "epoch": 0.851150202976996, "grad_norm": 0.3050321936607361, "learning_rate": 4.1203482607116105e-06, "loss": 0.6169, "step": 2516 }, { "epoch": 0.8514884979702301, "grad_norm": 0.30027446150779724, "learning_rate": 4.119666019605639e-06, "loss": 0.6361, "step": 2517 }, { "epoch": 0.8518267929634641, "grad_norm": 0.30657586455345154, "learning_rate": 4.118983570566328e-06, "loss": 0.6254, "step": 2518 }, { "epoch": 0.8521650879566982, "grad_norm": 0.3064988851547241, "learning_rate": 4.118300913681289e-06, "loss": 0.6033, "step": 2519 }, { "epoch": 0.8525033829499323, "grad_norm": 0.30324336886405945, "learning_rate": 4.117618049038165e-06, "loss": 0.6433, "step": 2520 }, { "epoch": 0.8528416779431665, "grad_norm": 0.3076494038105011, "learning_rate": 4.116934976724618e-06, "loss": 0.6265, "step": 2521 }, { "epoch": 0.8531799729364006, "grad_norm": 0.31042566895484924, "learning_rate": 4.116251696828345e-06, "loss": 0.6, "step": 2522 }, { "epoch": 0.8535182679296346, "grad_norm": 0.303231805562973, "learning_rate": 4.115568209437064e-06, "loss": 0.6164, "step": 2523 }, { "epoch": 0.8538565629228687, "grad_norm": 0.3145078420639038, "learning_rate": 4.114884514638522e-06, "loss": 0.6188, "step": 2524 }, { "epoch": 0.8541948579161028, "grad_norm": 0.308861643075943, "learning_rate": 4.11420061252049e-06, "loss": 0.6097, "step": 2525 }, { "epoch": 0.854533152909337, "grad_norm": 0.29943719506263733, "learning_rate": 4.11351650317077e-06, "loss": 0.6117, "step": 2526 }, { "epoch": 0.8548714479025711, "grad_norm": 0.3042926490306854, "learning_rate": 4.112832186677188e-06, "loss": 0.6499, "step": 2527 }, { "epoch": 0.8552097428958051, "grad_norm": 0.3146323561668396, "learning_rate": 4.112147663127597e-06, "loss": 0.5948, "step": 2528 }, { "epoch": 0.8555480378890392, "grad_norm": 0.3046211004257202, "learning_rate": 4.111462932609874e-06, "loss": 0.6268, "step": 2529 }, { "epoch": 0.8558863328822733, "grad_norm": 0.31243595480918884, "learning_rate": 4.110777995211926e-06, "loss": 0.6056, "step": 2530 }, { "epoch": 0.8562246278755075, "grad_norm": 0.31479743123054504, "learning_rate": 4.110092851021688e-06, "loss": 0.6181, "step": 2531 }, { "epoch": 0.8565629228687416, "grad_norm": 0.31839272379875183, "learning_rate": 4.1094075001271164e-06, "loss": 0.6067, "step": 2532 }, { "epoch": 0.8569012178619756, "grad_norm": 0.30680611729621887, "learning_rate": 4.108721942616197e-06, "loss": 0.5856, "step": 2533 }, { "epoch": 0.8572395128552097, "grad_norm": 0.3028895854949951, "learning_rate": 4.108036178576944e-06, "loss": 0.5957, "step": 2534 }, { "epoch": 0.8575778078484438, "grad_norm": 0.2930106818675995, "learning_rate": 4.107350208097393e-06, "loss": 0.6151, "step": 2535 }, { "epoch": 0.857916102841678, "grad_norm": 0.3081621825695038, "learning_rate": 4.106664031265611e-06, "loss": 0.6248, "step": 2536 }, { "epoch": 0.8582543978349121, "grad_norm": 0.3085257112979889, "learning_rate": 4.10597764816969e-06, "loss": 0.5873, "step": 2537 }, { "epoch": 0.8585926928281461, "grad_norm": 0.29894325137138367, "learning_rate": 4.105291058897747e-06, "loss": 0.5895, "step": 2538 }, { "epoch": 0.8589309878213802, "grad_norm": 0.30664926767349243, "learning_rate": 4.104604263537927e-06, "loss": 0.6536, "step": 2539 }, { "epoch": 0.8592692828146143, "grad_norm": 0.2995365560054779, "learning_rate": 4.103917262178402e-06, "loss": 0.6293, "step": 2540 }, { "epoch": 0.8596075778078485, "grad_norm": 0.30400002002716064, "learning_rate": 4.103230054907368e-06, "loss": 0.5964, "step": 2541 }, { "epoch": 0.8599458728010826, "grad_norm": 0.31295180320739746, "learning_rate": 4.10254264181305e-06, "loss": 0.6275, "step": 2542 }, { "epoch": 0.8602841677943166, "grad_norm": 0.29022055864334106, "learning_rate": 4.101855022983697e-06, "loss": 0.6004, "step": 2543 }, { "epoch": 0.8606224627875507, "grad_norm": 0.31110915541648865, "learning_rate": 4.101167198507587e-06, "loss": 0.6207, "step": 2544 }, { "epoch": 0.8609607577807848, "grad_norm": 0.31582343578338623, "learning_rate": 4.100479168473024e-06, "loss": 0.625, "step": 2545 }, { "epoch": 0.861299052774019, "grad_norm": 0.30717524886131287, "learning_rate": 4.099790932968335e-06, "loss": 0.6169, "step": 2546 }, { "epoch": 0.8616373477672531, "grad_norm": 0.30204007029533386, "learning_rate": 4.099102492081877e-06, "loss": 0.5991, "step": 2547 }, { "epoch": 0.8619756427604871, "grad_norm": 0.30182498693466187, "learning_rate": 4.098413845902033e-06, "loss": 0.6316, "step": 2548 }, { "epoch": 0.8623139377537212, "grad_norm": 0.2960222661495209, "learning_rate": 4.097724994517212e-06, "loss": 0.5938, "step": 2549 }, { "epoch": 0.8626522327469553, "grad_norm": 0.31207287311553955, "learning_rate": 4.0970359380158485e-06, "loss": 0.6287, "step": 2550 }, { "epoch": 0.8629905277401895, "grad_norm": 0.3209856152534485, "learning_rate": 4.096346676486403e-06, "loss": 0.6123, "step": 2551 }, { "epoch": 0.8633288227334236, "grad_norm": 0.3065491318702698, "learning_rate": 4.095657210017364e-06, "loss": 0.6063, "step": 2552 }, { "epoch": 0.8636671177266576, "grad_norm": 0.3043796718120575, "learning_rate": 4.094967538697245e-06, "loss": 0.616, "step": 2553 }, { "epoch": 0.8640054127198917, "grad_norm": 0.31421199440956116, "learning_rate": 4.094277662614586e-06, "loss": 0.6148, "step": 2554 }, { "epoch": 0.8643437077131259, "grad_norm": 0.30866771936416626, "learning_rate": 4.093587581857955e-06, "loss": 0.6283, "step": 2555 }, { "epoch": 0.86468200270636, "grad_norm": 0.3206896185874939, "learning_rate": 4.0928972965159445e-06, "loss": 0.6191, "step": 2556 }, { "epoch": 0.8650202976995941, "grad_norm": 0.318266898393631, "learning_rate": 4.092206806677172e-06, "loss": 0.5723, "step": 2557 }, { "epoch": 0.8653585926928281, "grad_norm": 0.3173162043094635, "learning_rate": 4.091516112430284e-06, "loss": 0.6346, "step": 2558 }, { "epoch": 0.8656968876860622, "grad_norm": 0.3083566427230835, "learning_rate": 4.090825213863953e-06, "loss": 0.6299, "step": 2559 }, { "epoch": 0.8660351826792964, "grad_norm": 0.2947406768798828, "learning_rate": 4.090134111066874e-06, "loss": 0.5935, "step": 2560 }, { "epoch": 0.8663734776725305, "grad_norm": 0.31538644433021545, "learning_rate": 4.089442804127773e-06, "loss": 0.6028, "step": 2561 }, { "epoch": 0.8667117726657646, "grad_norm": 0.32597365975379944, "learning_rate": 4.0887512931354016e-06, "loss": 0.5859, "step": 2562 }, { "epoch": 0.8670500676589986, "grad_norm": 0.30730724334716797, "learning_rate": 4.088059578178534e-06, "loss": 0.5936, "step": 2563 }, { "epoch": 0.8673883626522327, "grad_norm": 0.323289692401886, "learning_rate": 4.087367659345973e-06, "loss": 0.5876, "step": 2564 }, { "epoch": 0.8677266576454669, "grad_norm": 0.3257518708705902, "learning_rate": 4.086675536726548e-06, "loss": 0.6105, "step": 2565 }, { "epoch": 0.868064952638701, "grad_norm": 0.30332931876182556, "learning_rate": 4.085983210409114e-06, "loss": 0.6527, "step": 2566 }, { "epoch": 0.868403247631935, "grad_norm": 0.32552096247673035, "learning_rate": 4.085290680482552e-06, "loss": 0.6237, "step": 2567 }, { "epoch": 0.8687415426251691, "grad_norm": 0.31306159496307373, "learning_rate": 4.08459794703577e-06, "loss": 0.6318, "step": 2568 }, { "epoch": 0.8690798376184032, "grad_norm": 0.3016589283943176, "learning_rate": 4.0839050101577e-06, "loss": 0.5864, "step": 2569 }, { "epoch": 0.8694181326116374, "grad_norm": 0.3021901845932007, "learning_rate": 4.083211869937302e-06, "loss": 0.6214, "step": 2570 }, { "epoch": 0.8697564276048715, "grad_norm": 0.3040446639060974, "learning_rate": 4.082518526463562e-06, "loss": 0.6179, "step": 2571 }, { "epoch": 0.8700947225981055, "grad_norm": 0.31397125124931335, "learning_rate": 4.081824979825492e-06, "loss": 0.6121, "step": 2572 }, { "epoch": 0.8704330175913396, "grad_norm": 0.30847495794296265, "learning_rate": 4.081131230112128e-06, "loss": 0.6103, "step": 2573 }, { "epoch": 0.8707713125845737, "grad_norm": 0.2960346043109894, "learning_rate": 4.080437277412536e-06, "loss": 0.5899, "step": 2574 }, { "epoch": 0.8711096075778079, "grad_norm": 0.30658602714538574, "learning_rate": 4.079743121815805e-06, "loss": 0.5767, "step": 2575 }, { "epoch": 0.871447902571042, "grad_norm": 0.32288169860839844, "learning_rate": 4.07904876341105e-06, "loss": 0.6234, "step": 2576 }, { "epoch": 0.871786197564276, "grad_norm": 0.3071858584880829, "learning_rate": 4.0783542022874155e-06, "loss": 0.6106, "step": 2577 }, { "epoch": 0.8721244925575101, "grad_norm": 0.3182103931903839, "learning_rate": 4.0776594385340665e-06, "loss": 0.6347, "step": 2578 }, { "epoch": 0.8724627875507442, "grad_norm": 0.29850947856903076, "learning_rate": 4.0769644722402e-06, "loss": 0.5841, "step": 2579 }, { "epoch": 0.8728010825439784, "grad_norm": 0.33759528398513794, "learning_rate": 4.076269303495033e-06, "loss": 0.5971, "step": 2580 }, { "epoch": 0.8731393775372125, "grad_norm": 0.3135136067867279, "learning_rate": 4.075573932387814e-06, "loss": 0.6045, "step": 2581 }, { "epoch": 0.8734776725304465, "grad_norm": 0.30932480096817017, "learning_rate": 4.0748783590078136e-06, "loss": 0.6337, "step": 2582 }, { "epoch": 0.8738159675236806, "grad_norm": 0.3216150104999542, "learning_rate": 4.074182583444331e-06, "loss": 0.6145, "step": 2583 }, { "epoch": 0.8741542625169147, "grad_norm": 0.30273857712745667, "learning_rate": 4.073486605786689e-06, "loss": 0.5818, "step": 2584 }, { "epoch": 0.8744925575101489, "grad_norm": 0.31567350029945374, "learning_rate": 4.072790426124238e-06, "loss": 0.6204, "step": 2585 }, { "epoch": 0.874830852503383, "grad_norm": 0.31259241700172424, "learning_rate": 4.0720940445463536e-06, "loss": 0.6309, "step": 2586 }, { "epoch": 0.875169147496617, "grad_norm": 0.3071554899215698, "learning_rate": 4.071397461142437e-06, "loss": 0.6272, "step": 2587 }, { "epoch": 0.8755074424898511, "grad_norm": 0.31412243843078613, "learning_rate": 4.070700676001917e-06, "loss": 0.597, "step": 2588 }, { "epoch": 0.8758457374830853, "grad_norm": 0.3067740499973297, "learning_rate": 4.070003689214247e-06, "loss": 0.6275, "step": 2589 }, { "epoch": 0.8761840324763194, "grad_norm": 0.3077264130115509, "learning_rate": 4.0693065008689055e-06, "loss": 0.6126, "step": 2590 }, { "epoch": 0.8765223274695535, "grad_norm": 0.3074173033237457, "learning_rate": 4.0686091110553985e-06, "loss": 0.6069, "step": 2591 }, { "epoch": 0.8768606224627875, "grad_norm": 0.31100180745124817, "learning_rate": 4.067911519863257e-06, "loss": 0.6216, "step": 2592 }, { "epoch": 0.8771989174560216, "grad_norm": 0.3137838542461395, "learning_rate": 4.067213727382037e-06, "loss": 0.6112, "step": 2593 }, { "epoch": 0.8775372124492558, "grad_norm": 0.30964088439941406, "learning_rate": 4.066515733701325e-06, "loss": 0.6117, "step": 2594 }, { "epoch": 0.8778755074424899, "grad_norm": 0.31450891494750977, "learning_rate": 4.065817538910725e-06, "loss": 0.604, "step": 2595 }, { "epoch": 0.878213802435724, "grad_norm": 0.32473263144493103, "learning_rate": 4.065119143099874e-06, "loss": 0.6211, "step": 2596 }, { "epoch": 0.878552097428958, "grad_norm": 0.3195638358592987, "learning_rate": 4.0644205463584315e-06, "loss": 0.6167, "step": 2597 }, { "epoch": 0.8788903924221921, "grad_norm": 0.2934994399547577, "learning_rate": 4.063721748776085e-06, "loss": 0.5956, "step": 2598 }, { "epoch": 0.8792286874154263, "grad_norm": 0.32306793332099915, "learning_rate": 4.063022750442545e-06, "loss": 0.5955, "step": 2599 }, { "epoch": 0.8795669824086604, "grad_norm": 0.30271050333976746, "learning_rate": 4.062323551447549e-06, "loss": 0.6352, "step": 2600 }, { "epoch": 0.8799052774018945, "grad_norm": 0.30306926369667053, "learning_rate": 4.061624151880862e-06, "loss": 0.6152, "step": 2601 }, { "epoch": 0.8802435723951285, "grad_norm": 0.307574987411499, "learning_rate": 4.060924551832272e-06, "loss": 0.6023, "step": 2602 }, { "epoch": 0.8805818673883626, "grad_norm": 0.3127698600292206, "learning_rate": 4.060224751391593e-06, "loss": 0.6265, "step": 2603 }, { "epoch": 0.8809201623815968, "grad_norm": 0.29466134309768677, "learning_rate": 4.059524750648668e-06, "loss": 0.6319, "step": 2604 }, { "epoch": 0.8812584573748309, "grad_norm": 0.31846871972084045, "learning_rate": 4.058824549693362e-06, "loss": 0.6084, "step": 2605 }, { "epoch": 0.881596752368065, "grad_norm": 0.30549389123916626, "learning_rate": 4.058124148615567e-06, "loss": 0.6154, "step": 2606 }, { "epoch": 0.881935047361299, "grad_norm": 0.3262697756290436, "learning_rate": 4.0574235475052e-06, "loss": 0.6217, "step": 2607 }, { "epoch": 0.8822733423545331, "grad_norm": 0.33191269636154175, "learning_rate": 4.056722746452208e-06, "loss": 0.6125, "step": 2608 }, { "epoch": 0.8826116373477673, "grad_norm": 0.29111433029174805, "learning_rate": 4.056021745546555e-06, "loss": 0.6128, "step": 2609 }, { "epoch": 0.8829499323410014, "grad_norm": 0.3281172513961792, "learning_rate": 4.055320544878239e-06, "loss": 0.6013, "step": 2610 }, { "epoch": 0.8832882273342354, "grad_norm": 0.3282298147678375, "learning_rate": 4.054619144537281e-06, "loss": 0.592, "step": 2611 }, { "epoch": 0.8836265223274695, "grad_norm": 0.30221420526504517, "learning_rate": 4.053917544613723e-06, "loss": 0.6152, "step": 2612 }, { "epoch": 0.8839648173207036, "grad_norm": 0.3031254708766937, "learning_rate": 4.053215745197642e-06, "loss": 0.5999, "step": 2613 }, { "epoch": 0.8843031123139378, "grad_norm": 0.3269789516925812, "learning_rate": 4.0525137463791305e-06, "loss": 0.5801, "step": 2614 }, { "epoch": 0.8846414073071719, "grad_norm": 0.3483508229255676, "learning_rate": 4.051811548248315e-06, "loss": 0.6048, "step": 2615 }, { "epoch": 0.8849797023004059, "grad_norm": 0.32144540548324585, "learning_rate": 4.051109150895343e-06, "loss": 0.6069, "step": 2616 }, { "epoch": 0.88531799729364, "grad_norm": 0.32789748907089233, "learning_rate": 4.050406554410387e-06, "loss": 0.6015, "step": 2617 }, { "epoch": 0.8856562922868741, "grad_norm": 0.3455045819282532, "learning_rate": 4.049703758883648e-06, "loss": 0.5856, "step": 2618 }, { "epoch": 0.8859945872801083, "grad_norm": 0.3017064034938812, "learning_rate": 4.0490007644053505e-06, "loss": 0.6102, "step": 2619 }, { "epoch": 0.8863328822733424, "grad_norm": 0.3187443017959595, "learning_rate": 4.0482975710657455e-06, "loss": 0.619, "step": 2620 }, { "epoch": 0.8866711772665764, "grad_norm": 0.34462404251098633, "learning_rate": 4.04759417895511e-06, "loss": 0.5936, "step": 2621 }, { "epoch": 0.8870094722598105, "grad_norm": 0.31052395701408386, "learning_rate": 4.0468905881637445e-06, "loss": 0.6181, "step": 2622 }, { "epoch": 0.8873477672530447, "grad_norm": 0.31216633319854736, "learning_rate": 4.046186798781977e-06, "loss": 0.5945, "step": 2623 }, { "epoch": 0.8876860622462788, "grad_norm": 0.3139075040817261, "learning_rate": 4.045482810900159e-06, "loss": 0.6165, "step": 2624 }, { "epoch": 0.8880243572395129, "grad_norm": 0.32648202776908875, "learning_rate": 4.0447786246086704e-06, "loss": 0.6108, "step": 2625 }, { "epoch": 0.8883626522327469, "grad_norm": 0.3133030831813812, "learning_rate": 4.0440742399979134e-06, "loss": 0.6022, "step": 2626 }, { "epoch": 0.888700947225981, "grad_norm": 0.323303759098053, "learning_rate": 4.043369657158319e-06, "loss": 0.6191, "step": 2627 }, { "epoch": 0.8890392422192152, "grad_norm": 0.30271631479263306, "learning_rate": 4.042664876180341e-06, "loss": 0.6155, "step": 2628 }, { "epoch": 0.8893775372124493, "grad_norm": 0.3351958990097046, "learning_rate": 4.041959897154459e-06, "loss": 0.6146, "step": 2629 }, { "epoch": 0.8897158322056834, "grad_norm": 0.3067910373210907, "learning_rate": 4.041254720171178e-06, "loss": 0.6344, "step": 2630 }, { "epoch": 0.8900541271989174, "grad_norm": 0.3026675283908844, "learning_rate": 4.04054934532103e-06, "loss": 0.6075, "step": 2631 }, { "epoch": 0.8903924221921515, "grad_norm": 0.3328397572040558, "learning_rate": 4.039843772694572e-06, "loss": 0.5929, "step": 2632 }, { "epoch": 0.8907307171853857, "grad_norm": 0.30871671438217163, "learning_rate": 4.039138002382383e-06, "loss": 0.612, "step": 2633 }, { "epoch": 0.8910690121786198, "grad_norm": 0.3169931173324585, "learning_rate": 4.038432034475072e-06, "loss": 0.6241, "step": 2634 }, { "epoch": 0.8914073071718539, "grad_norm": 0.3067953586578369, "learning_rate": 4.037725869063272e-06, "loss": 0.5942, "step": 2635 }, { "epoch": 0.8917456021650879, "grad_norm": 0.30025818943977356, "learning_rate": 4.037019506237638e-06, "loss": 0.5773, "step": 2636 }, { "epoch": 0.892083897158322, "grad_norm": 0.312900573015213, "learning_rate": 4.036312946088857e-06, "loss": 0.6087, "step": 2637 }, { "epoch": 0.8924221921515562, "grad_norm": 0.2944320738315582, "learning_rate": 4.035606188707635e-06, "loss": 0.6095, "step": 2638 }, { "epoch": 0.8927604871447903, "grad_norm": 0.3260572552680969, "learning_rate": 4.0348992341847045e-06, "loss": 0.6476, "step": 2639 }, { "epoch": 0.8930987821380244, "grad_norm": 0.3164714276790619, "learning_rate": 4.034192082610829e-06, "loss": 0.6117, "step": 2640 }, { "epoch": 0.8934370771312584, "grad_norm": 0.30086609721183777, "learning_rate": 4.033484734076788e-06, "loss": 0.6281, "step": 2641 }, { "epoch": 0.8937753721244925, "grad_norm": 0.30885812640190125, "learning_rate": 4.032777188673395e-06, "loss": 0.6157, "step": 2642 }, { "epoch": 0.8941136671177267, "grad_norm": 0.31484881043434143, "learning_rate": 4.032069446491483e-06, "loss": 0.6009, "step": 2643 }, { "epoch": 0.8944519621109608, "grad_norm": 0.3178473711013794, "learning_rate": 4.0313615076219105e-06, "loss": 0.6391, "step": 2644 }, { "epoch": 0.8947902571041949, "grad_norm": 0.3143467605113983, "learning_rate": 4.030653372155567e-06, "loss": 0.6442, "step": 2645 }, { "epoch": 0.8951285520974289, "grad_norm": 0.31792086362838745, "learning_rate": 4.029945040183361e-06, "loss": 0.6117, "step": 2646 }, { "epoch": 0.895466847090663, "grad_norm": 0.30739086866378784, "learning_rate": 4.029236511796228e-06, "loss": 0.6323, "step": 2647 }, { "epoch": 0.8958051420838972, "grad_norm": 0.3313511312007904, "learning_rate": 4.02852778708513e-06, "loss": 0.6114, "step": 2648 }, { "epoch": 0.8961434370771313, "grad_norm": 0.3093686103820801, "learning_rate": 4.027818866141054e-06, "loss": 0.5859, "step": 2649 }, { "epoch": 0.8964817320703654, "grad_norm": 0.3045222759246826, "learning_rate": 4.02710974905501e-06, "loss": 0.6055, "step": 2650 }, { "epoch": 0.8968200270635994, "grad_norm": 0.3013499081134796, "learning_rate": 4.026400435918036e-06, "loss": 0.6149, "step": 2651 }, { "epoch": 0.8971583220568335, "grad_norm": 0.32199162244796753, "learning_rate": 4.025690926821192e-06, "loss": 0.6149, "step": 2652 }, { "epoch": 0.8974966170500677, "grad_norm": 0.3080962300300598, "learning_rate": 4.024981221855567e-06, "loss": 0.5905, "step": 2653 }, { "epoch": 0.8978349120433018, "grad_norm": 0.3041344881057739, "learning_rate": 4.024271321112273e-06, "loss": 0.5939, "step": 2654 }, { "epoch": 0.8981732070365359, "grad_norm": 0.3215409815311432, "learning_rate": 4.023561224682447e-06, "loss": 0.5873, "step": 2655 }, { "epoch": 0.8985115020297699, "grad_norm": 0.30556267499923706, "learning_rate": 4.02285093265725e-06, "loss": 0.5818, "step": 2656 }, { "epoch": 0.898849797023004, "grad_norm": 0.30866739153862, "learning_rate": 4.0221404451278716e-06, "loss": 0.6209, "step": 2657 }, { "epoch": 0.8991880920162382, "grad_norm": 0.3049587905406952, "learning_rate": 4.0214297621855226e-06, "loss": 0.5831, "step": 2658 }, { "epoch": 0.8995263870094723, "grad_norm": 0.3200123906135559, "learning_rate": 4.0207188839214426e-06, "loss": 0.5902, "step": 2659 }, { "epoch": 0.8998646820027063, "grad_norm": 0.3214031457901001, "learning_rate": 4.020007810426895e-06, "loss": 0.6054, "step": 2660 }, { "epoch": 0.9002029769959404, "grad_norm": 0.3044063150882721, "learning_rate": 4.019296541793165e-06, "loss": 0.599, "step": 2661 }, { "epoch": 0.9005412719891746, "grad_norm": 0.3177679777145386, "learning_rate": 4.018585078111567e-06, "loss": 0.6274, "step": 2662 }, { "epoch": 0.9008795669824087, "grad_norm": 0.31022119522094727, "learning_rate": 4.017873419473439e-06, "loss": 0.6046, "step": 2663 }, { "epoch": 0.9012178619756428, "grad_norm": 0.30733996629714966, "learning_rate": 4.017161565970144e-06, "loss": 0.6033, "step": 2664 }, { "epoch": 0.9015561569688768, "grad_norm": 0.2972017526626587, "learning_rate": 4.0164495176930705e-06, "loss": 0.5987, "step": 2665 }, { "epoch": 0.9018944519621109, "grad_norm": 0.30705738067626953, "learning_rate": 4.0157372747336295e-06, "loss": 0.6355, "step": 2666 }, { "epoch": 0.902232746955345, "grad_norm": 0.30549269914627075, "learning_rate": 4.015024837183261e-06, "loss": 0.5899, "step": 2667 }, { "epoch": 0.9025710419485792, "grad_norm": 0.31101226806640625, "learning_rate": 4.014312205133428e-06, "loss": 0.5998, "step": 2668 }, { "epoch": 0.9029093369418133, "grad_norm": 0.30988049507141113, "learning_rate": 4.013599378675618e-06, "loss": 0.6124, "step": 2669 }, { "epoch": 0.9032476319350473, "grad_norm": 0.30311518907546997, "learning_rate": 4.0128863579013435e-06, "loss": 0.5839, "step": 2670 }, { "epoch": 0.9035859269282814, "grad_norm": 0.32499635219573975, "learning_rate": 4.012173142902142e-06, "loss": 0.6102, "step": 2671 }, { "epoch": 0.9039242219215156, "grad_norm": 0.3209855854511261, "learning_rate": 4.0114597337695785e-06, "loss": 0.6189, "step": 2672 }, { "epoch": 0.9042625169147497, "grad_norm": 0.3145082890987396, "learning_rate": 4.010746130595239e-06, "loss": 0.5888, "step": 2673 }, { "epoch": 0.9046008119079838, "grad_norm": 0.30791082978248596, "learning_rate": 4.010032333470736e-06, "loss": 0.5968, "step": 2674 }, { "epoch": 0.9049391069012178, "grad_norm": 0.2905557453632355, "learning_rate": 4.009318342487708e-06, "loss": 0.5784, "step": 2675 }, { "epoch": 0.9052774018944519, "grad_norm": 0.298450231552124, "learning_rate": 4.008604157737816e-06, "loss": 0.6372, "step": 2676 }, { "epoch": 0.9056156968876861, "grad_norm": 0.3029123842716217, "learning_rate": 4.007889779312749e-06, "loss": 0.609, "step": 2677 }, { "epoch": 0.9059539918809202, "grad_norm": 0.317172110080719, "learning_rate": 4.007175207304218e-06, "loss": 0.627, "step": 2678 }, { "epoch": 0.9062922868741543, "grad_norm": 0.29541000723838806, "learning_rate": 4.006460441803961e-06, "loss": 0.5996, "step": 2679 }, { "epoch": 0.9066305818673883, "grad_norm": 0.3268113136291504, "learning_rate": 4.005745482903739e-06, "loss": 0.5988, "step": 2680 }, { "epoch": 0.9069688768606224, "grad_norm": 0.29055312275886536, "learning_rate": 4.005030330695339e-06, "loss": 0.6216, "step": 2681 }, { "epoch": 0.9073071718538566, "grad_norm": 0.3054754137992859, "learning_rate": 4.004314985270572e-06, "loss": 0.5825, "step": 2682 }, { "epoch": 0.9076454668470907, "grad_norm": 0.29997196793556213, "learning_rate": 4.003599446721276e-06, "loss": 0.6122, "step": 2683 }, { "epoch": 0.9079837618403248, "grad_norm": 0.31519967317581177, "learning_rate": 4.0028837151393095e-06, "loss": 0.609, "step": 2684 }, { "epoch": 0.9083220568335588, "grad_norm": 0.2943481504917145, "learning_rate": 4.002167790616561e-06, "loss": 0.5932, "step": 2685 }, { "epoch": 0.908660351826793, "grad_norm": 0.30532655119895935, "learning_rate": 4.001451673244939e-06, "loss": 0.5858, "step": 2686 }, { "epoch": 0.9089986468200271, "grad_norm": 0.3183039426803589, "learning_rate": 4.000735363116382e-06, "loss": 0.6036, "step": 2687 }, { "epoch": 0.9093369418132612, "grad_norm": 0.31465283036231995, "learning_rate": 4.000018860322845e-06, "loss": 0.6308, "step": 2688 }, { "epoch": 0.9096752368064953, "grad_norm": 0.3152543008327484, "learning_rate": 3.999302164956318e-06, "loss": 0.6124, "step": 2689 }, { "epoch": 0.9100135317997293, "grad_norm": 0.3163517415523529, "learning_rate": 3.998585277108808e-06, "loss": 0.5924, "step": 2690 }, { "epoch": 0.9103518267929634, "grad_norm": 0.3063450753688812, "learning_rate": 3.9978681968723505e-06, "loss": 0.6141, "step": 2691 }, { "epoch": 0.9106901217861976, "grad_norm": 0.3280673027038574, "learning_rate": 3.997150924339003e-06, "loss": 0.6099, "step": 2692 }, { "epoch": 0.9110284167794317, "grad_norm": 0.32003989815711975, "learning_rate": 3.996433459600851e-06, "loss": 0.6075, "step": 2693 }, { "epoch": 0.9113667117726658, "grad_norm": 0.3295360505580902, "learning_rate": 3.995715802750002e-06, "loss": 0.5993, "step": 2694 }, { "epoch": 0.9117050067658998, "grad_norm": 0.29726871848106384, "learning_rate": 3.994997953878589e-06, "loss": 0.6215, "step": 2695 }, { "epoch": 0.912043301759134, "grad_norm": 0.3211500346660614, "learning_rate": 3.99427991307877e-06, "loss": 0.5925, "step": 2696 }, { "epoch": 0.9123815967523681, "grad_norm": 0.30280858278274536, "learning_rate": 3.993561680442727e-06, "loss": 0.6047, "step": 2697 }, { "epoch": 0.9127198917456022, "grad_norm": 0.3184455931186676, "learning_rate": 3.992843256062667e-06, "loss": 0.6285, "step": 2698 }, { "epoch": 0.9130581867388363, "grad_norm": 0.3031151294708252, "learning_rate": 3.9921246400308226e-06, "loss": 0.5943, "step": 2699 }, { "epoch": 0.9133964817320703, "grad_norm": 0.3140672445297241, "learning_rate": 3.991405832439449e-06, "loss": 0.6112, "step": 2700 }, { "epoch": 0.9137347767253045, "grad_norm": 0.33230122923851013, "learning_rate": 3.990686833380827e-06, "loss": 0.5796, "step": 2701 }, { "epoch": 0.9140730717185386, "grad_norm": 0.30419784784317017, "learning_rate": 3.989967642947261e-06, "loss": 0.6077, "step": 2702 }, { "epoch": 0.9144113667117727, "grad_norm": 0.32081732153892517, "learning_rate": 3.989248261231084e-06, "loss": 0.5807, "step": 2703 }, { "epoch": 0.9147496617050067, "grad_norm": 0.35001420974731445, "learning_rate": 3.988528688324647e-06, "loss": 0.5917, "step": 2704 }, { "epoch": 0.9150879566982408, "grad_norm": 0.3250044584274292, "learning_rate": 3.9878089243203325e-06, "loss": 0.6045, "step": 2705 }, { "epoch": 0.915426251691475, "grad_norm": 0.32416415214538574, "learning_rate": 3.9870889693105405e-06, "loss": 0.6124, "step": 2706 }, { "epoch": 0.9157645466847091, "grad_norm": 0.3099220395088196, "learning_rate": 3.986368823387701e-06, "loss": 0.6043, "step": 2707 }, { "epoch": 0.9161028416779432, "grad_norm": 0.30382490158081055, "learning_rate": 3.985648486644267e-06, "loss": 0.621, "step": 2708 }, { "epoch": 0.9164411366711772, "grad_norm": 0.30252471566200256, "learning_rate": 3.984927959172715e-06, "loss": 0.635, "step": 2709 }, { "epoch": 0.9167794316644113, "grad_norm": 0.30487069487571716, "learning_rate": 3.984207241065546e-06, "loss": 0.604, "step": 2710 }, { "epoch": 0.9171177266576455, "grad_norm": 0.3196687400341034, "learning_rate": 3.983486332415288e-06, "loss": 0.6021, "step": 2711 }, { "epoch": 0.9174560216508796, "grad_norm": 0.30728936195373535, "learning_rate": 3.982765233314489e-06, "loss": 0.6035, "step": 2712 }, { "epoch": 0.9177943166441137, "grad_norm": 0.30180492997169495, "learning_rate": 3.982043943855726e-06, "loss": 0.613, "step": 2713 }, { "epoch": 0.9181326116373477, "grad_norm": 0.32103562355041504, "learning_rate": 3.981322464131597e-06, "loss": 0.6288, "step": 2714 }, { "epoch": 0.9184709066305818, "grad_norm": 0.3367891013622284, "learning_rate": 3.980600794234726e-06, "loss": 0.6103, "step": 2715 }, { "epoch": 0.918809201623816, "grad_norm": 0.3209404945373535, "learning_rate": 3.979878934257763e-06, "loss": 0.6361, "step": 2716 }, { "epoch": 0.9191474966170501, "grad_norm": 0.3047719895839691, "learning_rate": 3.979156884293378e-06, "loss": 0.6108, "step": 2717 }, { "epoch": 0.9194857916102842, "grad_norm": 0.3115750551223755, "learning_rate": 3.97843464443427e-06, "loss": 0.617, "step": 2718 }, { "epoch": 0.9198240866035182, "grad_norm": 0.3256355822086334, "learning_rate": 3.97771221477316e-06, "loss": 0.6153, "step": 2719 }, { "epoch": 0.9201623815967523, "grad_norm": 0.3168798089027405, "learning_rate": 3.976989595402794e-06, "loss": 0.6352, "step": 2720 }, { "epoch": 0.9205006765899865, "grad_norm": 0.31463319063186646, "learning_rate": 3.976266786415941e-06, "loss": 0.6181, "step": 2721 }, { "epoch": 0.9208389715832206, "grad_norm": 0.3075326383113861, "learning_rate": 3.975543787905397e-06, "loss": 0.6109, "step": 2722 }, { "epoch": 0.9211772665764547, "grad_norm": 0.3231752812862396, "learning_rate": 3.97482059996398e-06, "loss": 0.6216, "step": 2723 }, { "epoch": 0.9215155615696887, "grad_norm": 0.3100549578666687, "learning_rate": 3.974097222684532e-06, "loss": 0.6194, "step": 2724 }, { "epoch": 0.9218538565629228, "grad_norm": 0.3243764042854309, "learning_rate": 3.973373656159923e-06, "loss": 0.6092, "step": 2725 }, { "epoch": 0.922192151556157, "grad_norm": 0.40476149320602417, "learning_rate": 3.9726499004830435e-06, "loss": 0.6016, "step": 2726 }, { "epoch": 0.9225304465493911, "grad_norm": 0.3071513772010803, "learning_rate": 3.971925955746808e-06, "loss": 0.6241, "step": 2727 }, { "epoch": 0.9228687415426252, "grad_norm": 0.3396247327327728, "learning_rate": 3.9712018220441595e-06, "loss": 0.6003, "step": 2728 }, { "epoch": 0.9232070365358592, "grad_norm": 0.32305842638015747, "learning_rate": 3.97047749946806e-06, "loss": 0.5911, "step": 2729 }, { "epoch": 0.9235453315290933, "grad_norm": 0.3153322637081146, "learning_rate": 3.9697529881115015e-06, "loss": 0.6289, "step": 2730 }, { "epoch": 0.9238836265223275, "grad_norm": 0.29865762591362, "learning_rate": 3.969028288067495e-06, "loss": 0.6164, "step": 2731 }, { "epoch": 0.9242219215155616, "grad_norm": 0.31667447090148926, "learning_rate": 3.968303399429077e-06, "loss": 0.6183, "step": 2732 }, { "epoch": 0.9245602165087957, "grad_norm": 0.37699103355407715, "learning_rate": 3.967578322289311e-06, "loss": 0.6141, "step": 2733 }, { "epoch": 0.9248985115020297, "grad_norm": 0.30763012170791626, "learning_rate": 3.96685305674128e-06, "loss": 0.6054, "step": 2734 }, { "epoch": 0.9252368064952639, "grad_norm": 0.31958621740341187, "learning_rate": 3.966127602878096e-06, "loss": 0.6359, "step": 2735 }, { "epoch": 0.925575101488498, "grad_norm": 0.33669278025627136, "learning_rate": 3.965401960792894e-06, "loss": 0.6073, "step": 2736 }, { "epoch": 0.9259133964817321, "grad_norm": 0.31136423349380493, "learning_rate": 3.964676130578829e-06, "loss": 0.6406, "step": 2737 }, { "epoch": 0.9262516914749662, "grad_norm": 0.31435713171958923, "learning_rate": 3.963950112329086e-06, "loss": 0.6045, "step": 2738 }, { "epoch": 0.9265899864682002, "grad_norm": 0.3053027093410492, "learning_rate": 3.963223906136869e-06, "loss": 0.6029, "step": 2739 }, { "epoch": 0.9269282814614344, "grad_norm": 0.36422377824783325, "learning_rate": 3.962497512095412e-06, "loss": 0.601, "step": 2740 }, { "epoch": 0.9272665764546685, "grad_norm": 0.3055732250213623, "learning_rate": 3.961770930297968e-06, "loss": 0.6066, "step": 2741 }, { "epoch": 0.9276048714479026, "grad_norm": 0.3042244017124176, "learning_rate": 3.961044160837815e-06, "loss": 0.5942, "step": 2742 }, { "epoch": 0.9279431664411367, "grad_norm": 0.3053058087825775, "learning_rate": 3.960317203808257e-06, "loss": 0.5995, "step": 2743 }, { "epoch": 0.9282814614343707, "grad_norm": 0.2914317846298218, "learning_rate": 3.95959005930262e-06, "loss": 0.6176, "step": 2744 }, { "epoch": 0.9286197564276049, "grad_norm": 0.31416183710098267, "learning_rate": 3.958862727414256e-06, "loss": 0.6035, "step": 2745 }, { "epoch": 0.928958051420839, "grad_norm": 0.2997271418571472, "learning_rate": 3.958135208236541e-06, "loss": 0.6159, "step": 2746 }, { "epoch": 0.9292963464140731, "grad_norm": 0.3054497241973877, "learning_rate": 3.957407501862873e-06, "loss": 0.632, "step": 2747 }, { "epoch": 0.9296346414073072, "grad_norm": 0.2939889132976532, "learning_rate": 3.956679608386675e-06, "loss": 0.6327, "step": 2748 }, { "epoch": 0.9299729364005412, "grad_norm": 0.3000098764896393, "learning_rate": 3.955951527901395e-06, "loss": 0.6365, "step": 2749 }, { "epoch": 0.9303112313937754, "grad_norm": 0.3275192379951477, "learning_rate": 3.955223260500503e-06, "loss": 0.5955, "step": 2750 }, { "epoch": 0.9306495263870095, "grad_norm": 0.32291221618652344, "learning_rate": 3.954494806277496e-06, "loss": 0.6173, "step": 2751 }, { "epoch": 0.9309878213802436, "grad_norm": 0.30584001541137695, "learning_rate": 3.953766165325892e-06, "loss": 0.6249, "step": 2752 }, { "epoch": 0.9313261163734776, "grad_norm": 0.3476294279098511, "learning_rate": 3.953037337739235e-06, "loss": 0.5937, "step": 2753 }, { "epoch": 0.9316644113667117, "grad_norm": 0.30108344554901123, "learning_rate": 3.952308323611091e-06, "loss": 0.6359, "step": 2754 }, { "epoch": 0.9320027063599459, "grad_norm": 0.3059139549732208, "learning_rate": 3.951579123035053e-06, "loss": 0.6388, "step": 2755 }, { "epoch": 0.93234100135318, "grad_norm": 0.309368371963501, "learning_rate": 3.9508497361047334e-06, "loss": 0.6241, "step": 2756 }, { "epoch": 0.9326792963464141, "grad_norm": 0.3595433533191681, "learning_rate": 3.950120162913773e-06, "loss": 0.5929, "step": 2757 }, { "epoch": 0.9330175913396481, "grad_norm": 0.32127654552459717, "learning_rate": 3.9493904035558356e-06, "loss": 0.6169, "step": 2758 }, { "epoch": 0.9333558863328822, "grad_norm": 0.2973577678203583, "learning_rate": 3.948660458124606e-06, "loss": 0.6124, "step": 2759 }, { "epoch": 0.9336941813261164, "grad_norm": 0.30579710006713867, "learning_rate": 3.947930326713795e-06, "loss": 0.5929, "step": 2760 }, { "epoch": 0.9340324763193505, "grad_norm": 0.32044917345046997, "learning_rate": 3.947200009417138e-06, "loss": 0.59, "step": 2761 }, { "epoch": 0.9343707713125846, "grad_norm": 0.30661648511886597, "learning_rate": 3.946469506328393e-06, "loss": 0.6504, "step": 2762 }, { "epoch": 0.9347090663058186, "grad_norm": 0.3152296543121338, "learning_rate": 3.945738817541343e-06, "loss": 0.6298, "step": 2763 }, { "epoch": 0.9350473612990527, "grad_norm": 0.30985817313194275, "learning_rate": 3.945007943149794e-06, "loss": 0.6477, "step": 2764 }, { "epoch": 0.9353856562922869, "grad_norm": 0.3215924799442291, "learning_rate": 3.944276883247575e-06, "loss": 0.6092, "step": 2765 }, { "epoch": 0.935723951285521, "grad_norm": 0.3165227174758911, "learning_rate": 3.943545637928539e-06, "loss": 0.6021, "step": 2766 }, { "epoch": 0.9360622462787551, "grad_norm": 0.323017954826355, "learning_rate": 3.942814207286566e-06, "loss": 0.6183, "step": 2767 }, { "epoch": 0.9364005412719891, "grad_norm": 0.2983512282371521, "learning_rate": 3.942082591415556e-06, "loss": 0.5848, "step": 2768 }, { "epoch": 0.9367388362652233, "grad_norm": 0.32698339223861694, "learning_rate": 3.941350790409434e-06, "loss": 0.5975, "step": 2769 }, { "epoch": 0.9370771312584574, "grad_norm": 0.3031372129917145, "learning_rate": 3.94061880436215e-06, "loss": 0.583, "step": 2770 }, { "epoch": 0.9374154262516915, "grad_norm": 0.2965066432952881, "learning_rate": 3.939886633367674e-06, "loss": 0.6025, "step": 2771 }, { "epoch": 0.9377537212449256, "grad_norm": 0.3047961890697479, "learning_rate": 3.939154277520006e-06, "loss": 0.6077, "step": 2772 }, { "epoch": 0.9380920162381596, "grad_norm": 0.3104235529899597, "learning_rate": 3.938421736913163e-06, "loss": 0.5801, "step": 2773 }, { "epoch": 0.9384303112313938, "grad_norm": 0.30181562900543213, "learning_rate": 3.93768901164119e-06, "loss": 0.6032, "step": 2774 }, { "epoch": 0.9387686062246279, "grad_norm": 0.30882957577705383, "learning_rate": 3.936956101798156e-06, "loss": 0.6071, "step": 2775 }, { "epoch": 0.939106901217862, "grad_norm": 0.3213382661342621, "learning_rate": 3.936223007478151e-06, "loss": 0.6529, "step": 2776 }, { "epoch": 0.9394451962110961, "grad_norm": 0.30974048376083374, "learning_rate": 3.935489728775288e-06, "loss": 0.6231, "step": 2777 }, { "epoch": 0.9397834912043301, "grad_norm": 0.31085005402565, "learning_rate": 3.934756265783709e-06, "loss": 0.5694, "step": 2778 }, { "epoch": 0.9401217861975643, "grad_norm": 0.29983943700790405, "learning_rate": 3.934022618597574e-06, "loss": 0.6022, "step": 2779 }, { "epoch": 0.9404600811907984, "grad_norm": 0.3146032691001892, "learning_rate": 3.93328878731107e-06, "loss": 0.6145, "step": 2780 }, { "epoch": 0.9407983761840325, "grad_norm": 0.2978469729423523, "learning_rate": 3.932554772018405e-06, "loss": 0.6127, "step": 2781 }, { "epoch": 0.9411366711772666, "grad_norm": 0.2859659194946289, "learning_rate": 3.931820572813815e-06, "loss": 0.6107, "step": 2782 }, { "epoch": 0.9414749661705006, "grad_norm": 0.3153398931026459, "learning_rate": 3.931086189791554e-06, "loss": 0.6268, "step": 2783 }, { "epoch": 0.9418132611637348, "grad_norm": 0.2963147759437561, "learning_rate": 3.930351623045904e-06, "loss": 0.6248, "step": 2784 }, { "epoch": 0.9421515561569689, "grad_norm": 0.3015451431274414, "learning_rate": 3.929616872671168e-06, "loss": 0.6161, "step": 2785 }, { "epoch": 0.942489851150203, "grad_norm": 0.2986791133880615, "learning_rate": 3.928881938761674e-06, "loss": 0.6027, "step": 2786 }, { "epoch": 0.9428281461434371, "grad_norm": 0.3088212013244629, "learning_rate": 3.928146821411773e-06, "loss": 0.6087, "step": 2787 }, { "epoch": 0.9431664411366711, "grad_norm": 0.3106054365634918, "learning_rate": 3.92741152071584e-06, "loss": 0.6186, "step": 2788 }, { "epoch": 0.9435047361299053, "grad_norm": 0.314456969499588, "learning_rate": 3.926676036768273e-06, "loss": 0.6141, "step": 2789 }, { "epoch": 0.9438430311231394, "grad_norm": 0.3026145398616791, "learning_rate": 3.925940369663494e-06, "loss": 0.6058, "step": 2790 }, { "epoch": 0.9441813261163735, "grad_norm": 0.3158716559410095, "learning_rate": 3.925204519495946e-06, "loss": 0.6106, "step": 2791 }, { "epoch": 0.9445196211096076, "grad_norm": 0.31110692024230957, "learning_rate": 3.924468486360101e-06, "loss": 0.6032, "step": 2792 }, { "epoch": 0.9448579161028416, "grad_norm": 0.3241216242313385, "learning_rate": 3.9237322703504495e-06, "loss": 0.593, "step": 2793 }, { "epoch": 0.9451962110960758, "grad_norm": 0.31992748379707336, "learning_rate": 3.922995871561508e-06, "loss": 0.6314, "step": 2794 }, { "epoch": 0.9455345060893099, "grad_norm": 0.30860215425491333, "learning_rate": 3.922259290087814e-06, "loss": 0.6047, "step": 2795 }, { "epoch": 0.945872801082544, "grad_norm": 0.3147035241127014, "learning_rate": 3.9215225260239315e-06, "loss": 0.6355, "step": 2796 }, { "epoch": 0.946211096075778, "grad_norm": 0.3141655921936035, "learning_rate": 3.920785579464447e-06, "loss": 0.6038, "step": 2797 }, { "epoch": 0.9465493910690121, "grad_norm": 0.3091292381286621, "learning_rate": 3.9200484505039685e-06, "loss": 0.645, "step": 2798 }, { "epoch": 0.9468876860622463, "grad_norm": 0.29969021677970886, "learning_rate": 3.919311139237129e-06, "loss": 0.6036, "step": 2799 }, { "epoch": 0.9472259810554804, "grad_norm": 0.3013696074485779, "learning_rate": 3.918573645758586e-06, "loss": 0.6194, "step": 2800 }, { "epoch": 0.9475642760487145, "grad_norm": 0.30952438712120056, "learning_rate": 3.917835970163019e-06, "loss": 0.6001, "step": 2801 }, { "epoch": 0.9479025710419485, "grad_norm": 0.313272088766098, "learning_rate": 3.917098112545131e-06, "loss": 0.5993, "step": 2802 }, { "epoch": 0.9482408660351827, "grad_norm": 0.31668946146965027, "learning_rate": 3.9163600729996474e-06, "loss": 0.6078, "step": 2803 }, { "epoch": 0.9485791610284168, "grad_norm": 0.30897411704063416, "learning_rate": 3.915621851621318e-06, "loss": 0.5887, "step": 2804 }, { "epoch": 0.9489174560216509, "grad_norm": 0.3138033449649811, "learning_rate": 3.914883448504918e-06, "loss": 0.615, "step": 2805 }, { "epoch": 0.949255751014885, "grad_norm": 0.3061548173427582, "learning_rate": 3.914144863745243e-06, "loss": 0.6038, "step": 2806 }, { "epoch": 0.949594046008119, "grad_norm": 0.310561865568161, "learning_rate": 3.913406097437111e-06, "loss": 0.6094, "step": 2807 }, { "epoch": 0.9499323410013532, "grad_norm": 0.3132334351539612, "learning_rate": 3.9126671496753665e-06, "loss": 0.6124, "step": 2808 }, { "epoch": 0.9502706359945873, "grad_norm": 0.32545071840286255, "learning_rate": 3.911928020554877e-06, "loss": 0.6089, "step": 2809 }, { "epoch": 0.9506089309878214, "grad_norm": 0.30556997656822205, "learning_rate": 3.91118871017053e-06, "loss": 0.5793, "step": 2810 }, { "epoch": 0.9509472259810555, "grad_norm": 0.32106563448905945, "learning_rate": 3.910449218617239e-06, "loss": 0.598, "step": 2811 }, { "epoch": 0.9512855209742895, "grad_norm": 0.311365008354187, "learning_rate": 3.909709545989942e-06, "loss": 0.6421, "step": 2812 }, { "epoch": 0.9516238159675237, "grad_norm": 0.3102574348449707, "learning_rate": 3.908969692383597e-06, "loss": 0.6311, "step": 2813 }, { "epoch": 0.9519621109607578, "grad_norm": 0.3076390326023102, "learning_rate": 3.908229657893186e-06, "loss": 0.6083, "step": 2814 }, { "epoch": 0.9523004059539919, "grad_norm": 0.3074639141559601, "learning_rate": 3.907489442613716e-06, "loss": 0.6183, "step": 2815 }, { "epoch": 0.952638700947226, "grad_norm": 0.31547728180885315, "learning_rate": 3.906749046640216e-06, "loss": 0.6267, "step": 2816 }, { "epoch": 0.95297699594046, "grad_norm": 0.3008100688457489, "learning_rate": 3.906008470067738e-06, "loss": 0.6207, "step": 2817 }, { "epoch": 0.9533152909336942, "grad_norm": 0.307170569896698, "learning_rate": 3.9052677129913575e-06, "loss": 0.601, "step": 2818 }, { "epoch": 0.9536535859269283, "grad_norm": 0.2974233627319336, "learning_rate": 3.904526775506174e-06, "loss": 0.6406, "step": 2819 }, { "epoch": 0.9539918809201624, "grad_norm": 0.3185262978076935, "learning_rate": 3.903785657707308e-06, "loss": 0.6407, "step": 2820 }, { "epoch": 0.9543301759133965, "grad_norm": 0.3027080297470093, "learning_rate": 3.903044359689905e-06, "loss": 0.6286, "step": 2821 }, { "epoch": 0.9546684709066305, "grad_norm": 0.3234482705593109, "learning_rate": 3.902302881549134e-06, "loss": 0.6347, "step": 2822 }, { "epoch": 0.9550067658998647, "grad_norm": 0.3211921453475952, "learning_rate": 3.9015612233801845e-06, "loss": 0.6094, "step": 2823 }, { "epoch": 0.9553450608930988, "grad_norm": 0.2871362864971161, "learning_rate": 3.9008193852782735e-06, "loss": 0.6051, "step": 2824 }, { "epoch": 0.9556833558863329, "grad_norm": 0.3141300082206726, "learning_rate": 3.900077367338635e-06, "loss": 0.6121, "step": 2825 }, { "epoch": 0.956021650879567, "grad_norm": 0.31430670619010925, "learning_rate": 3.899335169656533e-06, "loss": 0.6163, "step": 2826 }, { "epoch": 0.956359945872801, "grad_norm": 0.29906728863716125, "learning_rate": 3.898592792327248e-06, "loss": 0.6102, "step": 2827 }, { "epoch": 0.9566982408660352, "grad_norm": 0.30080512166023254, "learning_rate": 3.897850235446089e-06, "loss": 0.5982, "step": 2828 }, { "epoch": 0.9570365358592693, "grad_norm": 0.29242387413978577, "learning_rate": 3.897107499108385e-06, "loss": 0.6152, "step": 2829 }, { "epoch": 0.9573748308525034, "grad_norm": 0.2824954390525818, "learning_rate": 3.896364583409488e-06, "loss": 0.6023, "step": 2830 }, { "epoch": 0.9577131258457375, "grad_norm": 0.3062789738178253, "learning_rate": 3.895621488444777e-06, "loss": 0.6267, "step": 2831 }, { "epoch": 0.9580514208389715, "grad_norm": 0.3123863637447357, "learning_rate": 3.894878214309645e-06, "loss": 0.6391, "step": 2832 }, { "epoch": 0.9583897158322057, "grad_norm": 0.29575687646865845, "learning_rate": 3.894134761099519e-06, "loss": 0.6311, "step": 2833 }, { "epoch": 0.9587280108254398, "grad_norm": 0.2981586158275604, "learning_rate": 3.893391128909841e-06, "loss": 0.6131, "step": 2834 }, { "epoch": 0.9590663058186739, "grad_norm": 0.3023635149002075, "learning_rate": 3.89264731783608e-06, "loss": 0.5943, "step": 2835 }, { "epoch": 0.959404600811908, "grad_norm": 0.3178964853286743, "learning_rate": 3.891903327973727e-06, "loss": 0.6031, "step": 2836 }, { "epoch": 0.959742895805142, "grad_norm": 0.30327779054641724, "learning_rate": 3.891159159418294e-06, "loss": 0.613, "step": 2837 }, { "epoch": 0.9600811907983762, "grad_norm": 0.30893656611442566, "learning_rate": 3.89041481226532e-06, "loss": 0.6023, "step": 2838 }, { "epoch": 0.9604194857916103, "grad_norm": 0.2962033450603485, "learning_rate": 3.889670286610363e-06, "loss": 0.649, "step": 2839 }, { "epoch": 0.9607577807848444, "grad_norm": 0.330939382314682, "learning_rate": 3.888925582549006e-06, "loss": 0.595, "step": 2840 }, { "epoch": 0.9610960757780784, "grad_norm": 0.29611915349960327, "learning_rate": 3.888180700176854e-06, "loss": 0.5919, "step": 2841 }, { "epoch": 0.9614343707713126, "grad_norm": 0.29588085412979126, "learning_rate": 3.887435639589535e-06, "loss": 0.6002, "step": 2842 }, { "epoch": 0.9617726657645467, "grad_norm": 0.3030603528022766, "learning_rate": 3.886690400882702e-06, "loss": 0.5704, "step": 2843 }, { "epoch": 0.9621109607577808, "grad_norm": 0.3341445028781891, "learning_rate": 3.885944984152027e-06, "loss": 0.6159, "step": 2844 }, { "epoch": 0.9624492557510149, "grad_norm": 0.307426393032074, "learning_rate": 3.885199389493207e-06, "loss": 0.6229, "step": 2845 }, { "epoch": 0.9627875507442489, "grad_norm": 0.3231599032878876, "learning_rate": 3.884453617001964e-06, "loss": 0.612, "step": 2846 }, { "epoch": 0.963125845737483, "grad_norm": 0.3122326731681824, "learning_rate": 3.883707666774037e-06, "loss": 0.6397, "step": 2847 }, { "epoch": 0.9634641407307172, "grad_norm": 0.29532092809677124, "learning_rate": 3.882961538905194e-06, "loss": 0.5833, "step": 2848 }, { "epoch": 0.9638024357239513, "grad_norm": 0.31446200609207153, "learning_rate": 3.882215233491224e-06, "loss": 0.6094, "step": 2849 }, { "epoch": 0.9641407307171854, "grad_norm": 0.3175473213195801, "learning_rate": 3.881468750627934e-06, "loss": 0.6163, "step": 2850 }, { "epoch": 0.9644790257104194, "grad_norm": 0.3167509138584137, "learning_rate": 3.880722090411161e-06, "loss": 0.6268, "step": 2851 }, { "epoch": 0.9648173207036536, "grad_norm": 0.3074794411659241, "learning_rate": 3.879975252936761e-06, "loss": 0.5633, "step": 2852 }, { "epoch": 0.9651556156968877, "grad_norm": 0.3008774220943451, "learning_rate": 3.879228238300613e-06, "loss": 0.6086, "step": 2853 }, { "epoch": 0.9654939106901218, "grad_norm": 0.30035844445228577, "learning_rate": 3.878481046598619e-06, "loss": 0.5901, "step": 2854 }, { "epoch": 0.9658322056833559, "grad_norm": 0.31396716833114624, "learning_rate": 3.877733677926704e-06, "loss": 0.6236, "step": 2855 }, { "epoch": 0.9661705006765899, "grad_norm": 0.31159961223602295, "learning_rate": 3.8769861323808146e-06, "loss": 0.5898, "step": 2856 }, { "epoch": 0.9665087956698241, "grad_norm": 0.30015599727630615, "learning_rate": 3.876238410056922e-06, "loss": 0.6235, "step": 2857 }, { "epoch": 0.9668470906630582, "grad_norm": 0.30023249983787537, "learning_rate": 3.875490511051018e-06, "loss": 0.5967, "step": 2858 }, { "epoch": 0.9671853856562923, "grad_norm": 0.3081527352333069, "learning_rate": 3.874742435459119e-06, "loss": 0.6186, "step": 2859 }, { "epoch": 0.9675236806495264, "grad_norm": 0.29001158475875854, "learning_rate": 3.8739941833772645e-06, "loss": 0.6069, "step": 2860 }, { "epoch": 0.9678619756427604, "grad_norm": 0.31207314133644104, "learning_rate": 3.873245754901513e-06, "loss": 0.6173, "step": 2861 }, { "epoch": 0.9682002706359946, "grad_norm": 0.3128235936164856, "learning_rate": 3.872497150127948e-06, "loss": 0.5948, "step": 2862 }, { "epoch": 0.9685385656292287, "grad_norm": 0.3001565933227539, "learning_rate": 3.871748369152678e-06, "loss": 0.6256, "step": 2863 }, { "epoch": 0.9688768606224628, "grad_norm": 0.3021819591522217, "learning_rate": 3.87099941207183e-06, "loss": 0.6186, "step": 2864 }, { "epoch": 0.9692151556156969, "grad_norm": 0.31208890676498413, "learning_rate": 3.870250278981556e-06, "loss": 0.5921, "step": 2865 }, { "epoch": 0.969553450608931, "grad_norm": 0.3111448585987091, "learning_rate": 3.8695009699780295e-06, "loss": 0.588, "step": 2866 }, { "epoch": 0.9698917456021651, "grad_norm": 0.29972732067108154, "learning_rate": 3.868751485157448e-06, "loss": 0.6179, "step": 2867 }, { "epoch": 0.9702300405953992, "grad_norm": 0.3013609051704407, "learning_rate": 3.868001824616029e-06, "loss": 0.5683, "step": 2868 }, { "epoch": 0.9705683355886333, "grad_norm": 0.32852277159690857, "learning_rate": 3.867251988450017e-06, "loss": 0.6204, "step": 2869 }, { "epoch": 0.9709066305818674, "grad_norm": 0.3015483021736145, "learning_rate": 3.866501976755673e-06, "loss": 0.608, "step": 2870 }, { "epoch": 0.9712449255751014, "grad_norm": 0.3175235390663147, "learning_rate": 3.865751789629285e-06, "loss": 0.6, "step": 2871 }, { "epoch": 0.9715832205683356, "grad_norm": 0.30726557970046997, "learning_rate": 3.865001427167164e-06, "loss": 0.5984, "step": 2872 }, { "epoch": 0.9719215155615697, "grad_norm": 0.2998640537261963, "learning_rate": 3.86425088946564e-06, "loss": 0.5782, "step": 2873 }, { "epoch": 0.9722598105548038, "grad_norm": 0.31798794865608215, "learning_rate": 3.863500176621068e-06, "loss": 0.5884, "step": 2874 }, { "epoch": 0.9725981055480379, "grad_norm": 0.3241869807243347, "learning_rate": 3.8627492887298234e-06, "loss": 0.5987, "step": 2875 }, { "epoch": 0.972936400541272, "grad_norm": 0.309407502412796, "learning_rate": 3.8619982258883074e-06, "loss": 0.6201, "step": 2876 }, { "epoch": 0.9732746955345061, "grad_norm": 0.29837316274642944, "learning_rate": 3.861246988192941e-06, "loss": 0.6208, "step": 2877 }, { "epoch": 0.9736129905277402, "grad_norm": 0.3008418083190918, "learning_rate": 3.860495575740167e-06, "loss": 0.6403, "step": 2878 }, { "epoch": 0.9739512855209743, "grad_norm": 0.3007146418094635, "learning_rate": 3.859743988626456e-06, "loss": 0.6002, "step": 2879 }, { "epoch": 0.9742895805142084, "grad_norm": 0.30299657583236694, "learning_rate": 3.8589922269482925e-06, "loss": 0.5899, "step": 2880 }, { "epoch": 0.9746278755074425, "grad_norm": 0.3261502683162689, "learning_rate": 3.85824029080219e-06, "loss": 0.617, "step": 2881 }, { "epoch": 0.9749661705006766, "grad_norm": 0.3027297854423523, "learning_rate": 3.857488180284682e-06, "loss": 0.6109, "step": 2882 }, { "epoch": 0.9753044654939107, "grad_norm": 0.3169786036014557, "learning_rate": 3.856735895492325e-06, "loss": 0.5901, "step": 2883 }, { "epoch": 0.9756427604871448, "grad_norm": 0.3069666624069214, "learning_rate": 3.855983436521698e-06, "loss": 0.6233, "step": 2884 }, { "epoch": 0.975981055480379, "grad_norm": 0.2997592091560364, "learning_rate": 3.855230803469402e-06, "loss": 0.6072, "step": 2885 }, { "epoch": 0.976319350473613, "grad_norm": 0.3085188567638397, "learning_rate": 3.854477996432059e-06, "loss": 0.6224, "step": 2886 }, { "epoch": 0.9766576454668471, "grad_norm": 0.3022494912147522, "learning_rate": 3.8537250155063155e-06, "loss": 0.5949, "step": 2887 }, { "epoch": 0.9769959404600812, "grad_norm": 0.3045576512813568, "learning_rate": 3.8529718607888394e-06, "loss": 0.5966, "step": 2888 }, { "epoch": 0.9773342354533153, "grad_norm": 0.31438544392585754, "learning_rate": 3.8522185323763215e-06, "loss": 0.6054, "step": 2889 }, { "epoch": 0.9776725304465493, "grad_norm": 0.30769291520118713, "learning_rate": 3.851465030365473e-06, "loss": 0.6155, "step": 2890 }, { "epoch": 0.9780108254397835, "grad_norm": 0.31114768981933594, "learning_rate": 3.850711354853031e-06, "loss": 0.6059, "step": 2891 }, { "epoch": 0.9783491204330176, "grad_norm": 0.3039344251155853, "learning_rate": 3.849957505935751e-06, "loss": 0.5995, "step": 2892 }, { "epoch": 0.9786874154262517, "grad_norm": 0.309730589389801, "learning_rate": 3.849203483710412e-06, "loss": 0.6121, "step": 2893 }, { "epoch": 0.9790257104194858, "grad_norm": 0.3096121847629547, "learning_rate": 3.8484492882738165e-06, "loss": 0.6118, "step": 2894 }, { "epoch": 0.9793640054127198, "grad_norm": 0.30355656147003174, "learning_rate": 3.847694919722789e-06, "loss": 0.6314, "step": 2895 }, { "epoch": 0.979702300405954, "grad_norm": 0.3071487545967102, "learning_rate": 3.8469403781541745e-06, "loss": 0.6022, "step": 2896 }, { "epoch": 0.9800405953991881, "grad_norm": 0.3176629841327667, "learning_rate": 3.846185663664842e-06, "loss": 0.606, "step": 2897 }, { "epoch": 0.9803788903924222, "grad_norm": 0.30271345376968384, "learning_rate": 3.845430776351682e-06, "loss": 0.6023, "step": 2898 }, { "epoch": 0.9807171853856563, "grad_norm": 0.29765617847442627, "learning_rate": 3.844675716311607e-06, "loss": 0.5989, "step": 2899 }, { "epoch": 0.9810554803788903, "grad_norm": 0.2919057309627533, "learning_rate": 3.843920483641551e-06, "loss": 0.6009, "step": 2900 }, { "epoch": 0.9813937753721245, "grad_norm": 0.31493720412254333, "learning_rate": 3.843165078438473e-06, "loss": 0.601, "step": 2901 }, { "epoch": 0.9817320703653586, "grad_norm": 0.3035096526145935, "learning_rate": 3.842409500799351e-06, "loss": 0.6052, "step": 2902 }, { "epoch": 0.9820703653585927, "grad_norm": 0.30932676792144775, "learning_rate": 3.841653750821185e-06, "loss": 0.5919, "step": 2903 }, { "epoch": 0.9824086603518268, "grad_norm": 0.3056243062019348, "learning_rate": 3.840897828601002e-06, "loss": 0.6193, "step": 2904 }, { "epoch": 0.9827469553450608, "grad_norm": 0.3046973645687103, "learning_rate": 3.840141734235845e-06, "loss": 0.6029, "step": 2905 }, { "epoch": 0.983085250338295, "grad_norm": 0.30299368500709534, "learning_rate": 3.839385467822782e-06, "loss": 0.6203, "step": 2906 }, { "epoch": 0.9834235453315291, "grad_norm": 0.29637983441352844, "learning_rate": 3.838629029458903e-06, "loss": 0.605, "step": 2907 }, { "epoch": 0.9837618403247632, "grad_norm": 0.2971667945384979, "learning_rate": 3.837872419241321e-06, "loss": 0.6026, "step": 2908 }, { "epoch": 0.9841001353179973, "grad_norm": 0.3333316147327423, "learning_rate": 3.837115637267167e-06, "loss": 0.6092, "step": 2909 }, { "epoch": 0.9844384303112313, "grad_norm": 0.31869786977767944, "learning_rate": 3.8363586836336e-06, "loss": 0.6202, "step": 2910 }, { "epoch": 0.9847767253044655, "grad_norm": 0.33361250162124634, "learning_rate": 3.835601558437796e-06, "loss": 0.5869, "step": 2911 }, { "epoch": 0.9851150202976996, "grad_norm": 0.30500105023384094, "learning_rate": 3.834844261776957e-06, "loss": 0.628, "step": 2912 }, { "epoch": 0.9854533152909337, "grad_norm": 0.2937752902507782, "learning_rate": 3.834086793748302e-06, "loss": 0.6018, "step": 2913 }, { "epoch": 0.9857916102841678, "grad_norm": 0.3345789909362793, "learning_rate": 3.833329154449078e-06, "loss": 0.6189, "step": 2914 }, { "epoch": 0.9861299052774019, "grad_norm": 0.31569352746009827, "learning_rate": 3.83257134397655e-06, "loss": 0.6182, "step": 2915 }, { "epoch": 0.986468200270636, "grad_norm": 0.3059478998184204, "learning_rate": 3.831813362428005e-06, "loss": 0.615, "step": 2916 }, { "epoch": 0.9868064952638701, "grad_norm": 0.31113189458847046, "learning_rate": 3.831055209900754e-06, "loss": 0.608, "step": 2917 }, { "epoch": 0.9871447902571042, "grad_norm": 0.3061697781085968, "learning_rate": 3.830296886492129e-06, "loss": 0.6223, "step": 2918 }, { "epoch": 0.9874830852503383, "grad_norm": 0.3173771798610687, "learning_rate": 3.829538392299484e-06, "loss": 0.6239, "step": 2919 }, { "epoch": 0.9878213802435724, "grad_norm": 0.3158840537071228, "learning_rate": 3.828779727420193e-06, "loss": 0.614, "step": 2920 }, { "epoch": 0.9881596752368065, "grad_norm": 0.2991497218608856, "learning_rate": 3.828020891951656e-06, "loss": 0.5877, "step": 2921 }, { "epoch": 0.9884979702300406, "grad_norm": 0.3036811947822571, "learning_rate": 3.8272618859912914e-06, "loss": 0.6183, "step": 2922 }, { "epoch": 0.9888362652232747, "grad_norm": 0.3067082464694977, "learning_rate": 3.826502709636541e-06, "loss": 0.606, "step": 2923 }, { "epoch": 0.9891745602165088, "grad_norm": 0.3082413971424103, "learning_rate": 3.825743362984868e-06, "loss": 0.5925, "step": 2924 }, { "epoch": 0.9895128552097429, "grad_norm": 0.3056364059448242, "learning_rate": 3.824983846133758e-06, "loss": 0.5819, "step": 2925 }, { "epoch": 0.989851150202977, "grad_norm": 0.3220091164112091, "learning_rate": 3.824224159180717e-06, "loss": 0.6179, "step": 2926 }, { "epoch": 0.9901894451962111, "grad_norm": 0.31654736399650574, "learning_rate": 3.8234643022232756e-06, "loss": 0.5863, "step": 2927 }, { "epoch": 0.9905277401894452, "grad_norm": 0.31448447704315186, "learning_rate": 3.822704275358983e-06, "loss": 0.6154, "step": 2928 }, { "epoch": 0.9908660351826793, "grad_norm": 0.29887446761131287, "learning_rate": 3.821944078685411e-06, "loss": 0.6123, "step": 2929 }, { "epoch": 0.9912043301759134, "grad_norm": 0.3173508942127228, "learning_rate": 3.8211837123001564e-06, "loss": 0.6175, "step": 2930 }, { "epoch": 0.9915426251691475, "grad_norm": 0.3123912513256073, "learning_rate": 3.820423176300834e-06, "loss": 0.5858, "step": 2931 }, { "epoch": 0.9918809201623816, "grad_norm": 0.2958592176437378, "learning_rate": 3.819662470785082e-06, "loss": 0.6147, "step": 2932 }, { "epoch": 0.9922192151556157, "grad_norm": 0.30872708559036255, "learning_rate": 3.818901595850561e-06, "loss": 0.6271, "step": 2933 }, { "epoch": 0.9925575101488497, "grad_norm": 0.30018481612205505, "learning_rate": 3.81814055159495e-06, "loss": 0.6332, "step": 2934 }, { "epoch": 0.9928958051420839, "grad_norm": 0.3036275804042816, "learning_rate": 3.817379338115953e-06, "loss": 0.6368, "step": 2935 }, { "epoch": 0.993234100135318, "grad_norm": 0.3049243092536926, "learning_rate": 3.816617955511296e-06, "loss": 0.5834, "step": 2936 }, { "epoch": 0.9935723951285521, "grad_norm": 0.3022926449775696, "learning_rate": 3.815856403878724e-06, "loss": 0.5902, "step": 2937 }, { "epoch": 0.9939106901217862, "grad_norm": 0.31031396985054016, "learning_rate": 3.815094683316007e-06, "loss": 0.5986, "step": 2938 }, { "epoch": 0.9942489851150202, "grad_norm": 0.30683907866477966, "learning_rate": 3.814332793920935e-06, "loss": 0.5875, "step": 2939 }, { "epoch": 0.9945872801082544, "grad_norm": 0.3027743995189667, "learning_rate": 3.8135707357913177e-06, "loss": 0.5862, "step": 2940 }, { "epoch": 0.9949255751014885, "grad_norm": 0.2959025502204895, "learning_rate": 3.81280850902499e-06, "loss": 0.5967, "step": 2941 }, { "epoch": 0.9952638700947226, "grad_norm": 0.3143369257450104, "learning_rate": 3.812046113719806e-06, "loss": 0.6155, "step": 2942 }, { "epoch": 0.9956021650879567, "grad_norm": 0.3105371594429016, "learning_rate": 3.8112835499736434e-06, "loss": 0.6392, "step": 2943 }, { "epoch": 0.9959404600811907, "grad_norm": 0.3036314845085144, "learning_rate": 3.8105208178843988e-06, "loss": 0.5768, "step": 2944 }, { "epoch": 0.9962787550744249, "grad_norm": 0.30836254358291626, "learning_rate": 3.8097579175499942e-06, "loss": 0.625, "step": 2945 }, { "epoch": 0.996617050067659, "grad_norm": 0.29898902773857117, "learning_rate": 3.8089948490683692e-06, "loss": 0.6177, "step": 2946 }, { "epoch": 0.9969553450608931, "grad_norm": 0.3097745180130005, "learning_rate": 3.8082316125374874e-06, "loss": 0.5863, "step": 2947 }, { "epoch": 0.9972936400541272, "grad_norm": 0.3105376362800598, "learning_rate": 3.807468208055334e-06, "loss": 0.6305, "step": 2948 }, { "epoch": 0.9976319350473613, "grad_norm": 0.30030593276023865, "learning_rate": 3.8067046357199135e-06, "loss": 0.6263, "step": 2949 }, { "epoch": 0.9979702300405954, "grad_norm": 0.3033324182033539, "learning_rate": 3.8059408956292553e-06, "loss": 0.5988, "step": 2950 }, { "epoch": 0.9983085250338295, "grad_norm": 0.30815133452415466, "learning_rate": 3.805176987881408e-06, "loss": 0.5969, "step": 2951 }, { "epoch": 0.9986468200270636, "grad_norm": 0.3052274286746979, "learning_rate": 3.8044129125744423e-06, "loss": 0.6082, "step": 2952 }, { "epoch": 0.9989851150202977, "grad_norm": 0.3146717846393585, "learning_rate": 3.8036486698064513e-06, "loss": 0.5964, "step": 2953 }, { "epoch": 0.9993234100135318, "grad_norm": 0.33001822233200073, "learning_rate": 3.8028842596755467e-06, "loss": 0.6081, "step": 2954 }, { "epoch": 0.9996617050067659, "grad_norm": 0.30166327953338623, "learning_rate": 3.8021196822798656e-06, "loss": 0.5951, "step": 2955 }, { "epoch": 1.0, "grad_norm": 0.32355165481567383, "learning_rate": 3.8013549377175653e-06, "loss": 0.6117, "step": 2956 } ], "logging_steps": 1, "max_steps": 8868, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 2956, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.648689084357778e+19, "train_batch_size": 12, "trial_name": null, "trial_params": null }