| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.5001442723862652, |
| "eval_steps": 1300, |
| "global_step": 1300, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0003847263633740502, |
| "grad_norm": 108.82730102539062, |
| "learning_rate": 1.0000000000000001e-07, |
| "loss": 3.3651, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0003847263633740502, |
| "eval_loss": 3.230529308319092, |
| "eval_runtime": 238.8764, |
| "eval_samples_per_second": 0.804, |
| "eval_steps_per_second": 0.402, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0007694527267481004, |
| "grad_norm": 93.2563705444336, |
| "learning_rate": 2.0000000000000002e-07, |
| "loss": 3.0404, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.0011541790901221506, |
| "grad_norm": 92.04035186767578, |
| "learning_rate": 3.0000000000000004e-07, |
| "loss": 3.1693, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.0015389054534962008, |
| "grad_norm": 92.20787048339844, |
| "learning_rate": 4.0000000000000003e-07, |
| "loss": 3.111, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.001923631816870251, |
| "grad_norm": 95.61177825927734, |
| "learning_rate": 5.000000000000001e-07, |
| "loss": 3.1856, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.002308358180244301, |
| "grad_norm": 99.54902648925781, |
| "learning_rate": 6.000000000000001e-07, |
| "loss": 3.1906, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.0026930845436183514, |
| "grad_norm": 91.08406829833984, |
| "learning_rate": 7.000000000000001e-07, |
| "loss": 2.8621, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.0030778109069924016, |
| "grad_norm": 76.93069458007812, |
| "learning_rate": 8.000000000000001e-07, |
| "loss": 2.868, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.003462537270366452, |
| "grad_norm": 73.68675994873047, |
| "learning_rate": 9.000000000000001e-07, |
| "loss": 2.7618, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.003847263633740502, |
| "grad_norm": 65.52332305908203, |
| "learning_rate": 1.0000000000000002e-06, |
| "loss": 2.467, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.004231989997114553, |
| "grad_norm": 51.694793701171875, |
| "learning_rate": 1.1e-06, |
| "loss": 2.1402, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.004616716360488602, |
| "grad_norm": 56.9593391418457, |
| "learning_rate": 1.2000000000000002e-06, |
| "loss": 2.2299, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.005001442723862653, |
| "grad_norm": 52.62051773071289, |
| "learning_rate": 1.3e-06, |
| "loss": 1.9982, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.005386169087236703, |
| "grad_norm": 41.68424606323242, |
| "learning_rate": 1.4000000000000001e-06, |
| "loss": 1.5395, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.005770895450610753, |
| "grad_norm": 36.1357307434082, |
| "learning_rate": 1.5e-06, |
| "loss": 1.2412, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.006155621813984803, |
| "grad_norm": 33.65456008911133, |
| "learning_rate": 1.6000000000000001e-06, |
| "loss": 1.14, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.006540348177358854, |
| "grad_norm": 33.47177505493164, |
| "learning_rate": 1.7000000000000002e-06, |
| "loss": 0.9993, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.006925074540732904, |
| "grad_norm": 31.6041316986084, |
| "learning_rate": 1.8000000000000001e-06, |
| "loss": 0.9344, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.007309800904106954, |
| "grad_norm": 26.2973690032959, |
| "learning_rate": 1.9000000000000002e-06, |
| "loss": 0.6089, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.007694527267481004, |
| "grad_norm": 29.93291473388672, |
| "learning_rate": 2.0000000000000003e-06, |
| "loss": 0.4644, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.008079253630855054, |
| "grad_norm": 25.220720291137695, |
| "learning_rate": 2.1000000000000002e-06, |
| "loss": 0.3234, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.008463979994229105, |
| "grad_norm": 23.863779067993164, |
| "learning_rate": 2.2e-06, |
| "loss": 0.258, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.008848706357603155, |
| "grad_norm": 13.900153160095215, |
| "learning_rate": 2.3000000000000004e-06, |
| "loss": 0.1722, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.009233432720977205, |
| "grad_norm": 8.517366409301758, |
| "learning_rate": 2.4000000000000003e-06, |
| "loss": 0.1005, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.009618159084351255, |
| "grad_norm": 11.185029983520508, |
| "learning_rate": 2.5e-06, |
| "loss": 0.1114, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.010002885447725306, |
| "grad_norm": 10.078015327453613, |
| "learning_rate": 2.6e-06, |
| "loss": 0.0737, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.010387611811099356, |
| "grad_norm": 4.785120964050293, |
| "learning_rate": 2.7000000000000004e-06, |
| "loss": 0.0508, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.010772338174473406, |
| "grad_norm": 10.350395202636719, |
| "learning_rate": 2.8000000000000003e-06, |
| "loss": 0.0708, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.011157064537847455, |
| "grad_norm": 7.701849937438965, |
| "learning_rate": 2.9e-06, |
| "loss": 0.0475, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.011541790901221507, |
| "grad_norm": 3.197638750076294, |
| "learning_rate": 3e-06, |
| "loss": 0.0392, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.011926517264595557, |
| "grad_norm": 3.299790382385254, |
| "learning_rate": 3.1000000000000004e-06, |
| "loss": 0.0397, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.012311243627969606, |
| "grad_norm": 4.075237274169922, |
| "learning_rate": 3.2000000000000003e-06, |
| "loss": 0.0337, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.012695969991343656, |
| "grad_norm": 3.679044008255005, |
| "learning_rate": 3.3000000000000006e-06, |
| "loss": 0.0353, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.013080696354717708, |
| "grad_norm": 4.004962921142578, |
| "learning_rate": 3.4000000000000005e-06, |
| "loss": 0.0251, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.013465422718091757, |
| "grad_norm": 4.201568126678467, |
| "learning_rate": 3.5e-06, |
| "loss": 0.0381, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.013850149081465807, |
| "grad_norm": 8.480195045471191, |
| "learning_rate": 3.6000000000000003e-06, |
| "loss": 0.0568, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.014234875444839857, |
| "grad_norm": 3.5017919540405273, |
| "learning_rate": 3.7e-06, |
| "loss": 0.0731, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.014619601808213908, |
| "grad_norm": 12.31709098815918, |
| "learning_rate": 3.8000000000000005e-06, |
| "loss": 0.0727, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.015004328171587958, |
| "grad_norm": 3.9239490032196045, |
| "learning_rate": 3.900000000000001e-06, |
| "loss": 0.0406, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.015389054534962008, |
| "grad_norm": 2.3493189811706543, |
| "learning_rate": 4.000000000000001e-06, |
| "loss": 0.043, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.01577378089833606, |
| "grad_norm": 4.353029727935791, |
| "learning_rate": 4.1e-06, |
| "loss": 0.0439, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.016158507261710107, |
| "grad_norm": 2.912537097930908, |
| "learning_rate": 4.2000000000000004e-06, |
| "loss": 0.0445, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.01654323362508416, |
| "grad_norm": 3.65114688873291, |
| "learning_rate": 4.3e-06, |
| "loss": 0.0426, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.01692795998845821, |
| "grad_norm": 11.928956031799316, |
| "learning_rate": 4.4e-06, |
| "loss": 0.0634, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.01731268635183226, |
| "grad_norm": 9.41391372680664, |
| "learning_rate": 4.5e-06, |
| "loss": 0.0824, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.01769741271520631, |
| "grad_norm": 3.015249013900757, |
| "learning_rate": 4.600000000000001e-06, |
| "loss": 0.0382, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.018082139078580358, |
| "grad_norm": 6.8663554191589355, |
| "learning_rate": 4.7e-06, |
| "loss": 0.0411, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.01846686544195441, |
| "grad_norm": 6.370840549468994, |
| "learning_rate": 4.800000000000001e-06, |
| "loss": 0.0527, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.01885159180532846, |
| "grad_norm": 2.8191823959350586, |
| "learning_rate": 4.9000000000000005e-06, |
| "loss": 0.0384, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.01923631816870251, |
| "grad_norm": 9.229619026184082, |
| "learning_rate": 5e-06, |
| "loss": 0.0463, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.01962104453207656, |
| "grad_norm": 7.707767486572266, |
| "learning_rate": 5.1e-06, |
| "loss": 0.0566, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.020005770895450612, |
| "grad_norm": 3.9060797691345215, |
| "learning_rate": 5.2e-06, |
| "loss": 0.0289, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.02039049725882466, |
| "grad_norm": 1.166146993637085, |
| "learning_rate": 5.300000000000001e-06, |
| "loss": 0.0196, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.02077522362219871, |
| "grad_norm": 5.692835330963135, |
| "learning_rate": 5.400000000000001e-06, |
| "loss": 0.0443, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.021159949985572763, |
| "grad_norm": 7.362571716308594, |
| "learning_rate": 5.500000000000001e-06, |
| "loss": 0.0387, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.02154467634894681, |
| "grad_norm": 4.404002666473389, |
| "learning_rate": 5.600000000000001e-06, |
| "loss": 0.0325, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.021929402712320863, |
| "grad_norm": 7.550673007965088, |
| "learning_rate": 5.7e-06, |
| "loss": 0.0473, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.02231412907569491, |
| "grad_norm": 5.290981769561768, |
| "learning_rate": 5.8e-06, |
| "loss": 0.0322, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.022698855439068962, |
| "grad_norm": 3.3678693771362305, |
| "learning_rate": 5.9e-06, |
| "loss": 0.0259, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.023083581802443014, |
| "grad_norm": 5.019497871398926, |
| "learning_rate": 6e-06, |
| "loss": 0.0349, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.02346830816581706, |
| "grad_norm": 5.121387958526611, |
| "learning_rate": 6.1e-06, |
| "loss": 0.0323, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.023853034529191113, |
| "grad_norm": 3.188506841659546, |
| "learning_rate": 6.200000000000001e-06, |
| "loss": 0.0501, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.024237760892565165, |
| "grad_norm": 1.362047791481018, |
| "learning_rate": 6.300000000000001e-06, |
| "loss": 0.0266, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.024622487255939213, |
| "grad_norm": 4.539747714996338, |
| "learning_rate": 6.4000000000000006e-06, |
| "loss": 0.0303, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.025007213619313264, |
| "grad_norm": 8.616043090820312, |
| "learning_rate": 6.5000000000000004e-06, |
| "loss": 0.0486, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.025391939982687312, |
| "grad_norm": 5.378427028656006, |
| "learning_rate": 6.600000000000001e-06, |
| "loss": 0.033, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.025776666346061364, |
| "grad_norm": 2.1194162368774414, |
| "learning_rate": 6.700000000000001e-06, |
| "loss": 0.0288, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.026161392709435415, |
| "grad_norm": 2.0167043209075928, |
| "learning_rate": 6.800000000000001e-06, |
| "loss": 0.0326, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.026546119072809463, |
| "grad_norm": 1.795593023300171, |
| "learning_rate": 6.9e-06, |
| "loss": 0.0295, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.026930845436183515, |
| "grad_norm": 1.237252116203308, |
| "learning_rate": 7e-06, |
| "loss": 0.0177, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.027315571799557566, |
| "grad_norm": 1.0062570571899414, |
| "learning_rate": 7.100000000000001e-06, |
| "loss": 0.0233, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.027700298162931614, |
| "grad_norm": 1.7850754261016846, |
| "learning_rate": 7.2000000000000005e-06, |
| "loss": 0.0211, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.028085024526305666, |
| "grad_norm": 1.5070022344589233, |
| "learning_rate": 7.3e-06, |
| "loss": 0.0156, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.028469750889679714, |
| "grad_norm": 2.268380641937256, |
| "learning_rate": 7.4e-06, |
| "loss": 0.0353, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.028854477253053765, |
| "grad_norm": 3.3155412673950195, |
| "learning_rate": 7.500000000000001e-06, |
| "loss": 0.0163, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.029239203616427817, |
| "grad_norm": 3.727926731109619, |
| "learning_rate": 7.600000000000001e-06, |
| "loss": 0.0281, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.029623929979801865, |
| "grad_norm": 8.840143203735352, |
| "learning_rate": 7.7e-06, |
| "loss": 0.0443, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.030008656343175916, |
| "grad_norm": 5.514863014221191, |
| "learning_rate": 7.800000000000002e-06, |
| "loss": 0.075, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.030393382706549968, |
| "grad_norm": 5.712233543395996, |
| "learning_rate": 7.9e-06, |
| "loss": 0.0474, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.030778109069924016, |
| "grad_norm": 12.506179809570312, |
| "learning_rate": 8.000000000000001e-06, |
| "loss": 0.0539, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.031162835433298067, |
| "grad_norm": 2.7478084564208984, |
| "learning_rate": 8.1e-06, |
| "loss": 0.0225, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.03154756179667212, |
| "grad_norm": 7.17296838760376, |
| "learning_rate": 8.2e-06, |
| "loss": 0.0577, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.03193228816004617, |
| "grad_norm": 9.32388687133789, |
| "learning_rate": 8.3e-06, |
| "loss": 0.0613, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.032317014523420215, |
| "grad_norm": 9.240764617919922, |
| "learning_rate": 8.400000000000001e-06, |
| "loss": 0.0513, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.03270174088679427, |
| "grad_norm": 3.8488717079162598, |
| "learning_rate": 8.5e-06, |
| "loss": 0.0313, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.03308646725016832, |
| "grad_norm": 4.666772365570068, |
| "learning_rate": 8.6e-06, |
| "loss": 0.0379, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.033471193613542366, |
| "grad_norm": 16.0006160736084, |
| "learning_rate": 8.700000000000001e-06, |
| "loss": 0.0857, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.03385591997691642, |
| "grad_norm": 7.749240875244141, |
| "learning_rate": 8.8e-06, |
| "loss": 0.0615, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.03424064634029047, |
| "grad_norm": 3.0161995887756348, |
| "learning_rate": 8.900000000000001e-06, |
| "loss": 0.0233, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.03462537270366452, |
| "grad_norm": 1.6129286289215088, |
| "learning_rate": 9e-06, |
| "loss": 0.022, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.03501009906703857, |
| "grad_norm": 3.569190740585327, |
| "learning_rate": 9.100000000000001e-06, |
| "loss": 0.0249, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.03539482543041262, |
| "grad_norm": 5.3800740242004395, |
| "learning_rate": 9.200000000000002e-06, |
| "loss": 0.0523, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.03577955179378667, |
| "grad_norm": 4.84494686126709, |
| "learning_rate": 9.3e-06, |
| "loss": 0.0434, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.036164278157160716, |
| "grad_norm": 2.5774073600769043, |
| "learning_rate": 9.4e-06, |
| "loss": 0.0598, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.03654900452053477, |
| "grad_norm": 3.4920051097869873, |
| "learning_rate": 9.5e-06, |
| "loss": 0.0211, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.03693373088390882, |
| "grad_norm": 2.574754238128662, |
| "learning_rate": 9.600000000000001e-06, |
| "loss": 0.0226, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.03731845724728287, |
| "grad_norm": 6.462972164154053, |
| "learning_rate": 9.7e-06, |
| "loss": 0.0375, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.03770318361065692, |
| "grad_norm": 6.1067986488342285, |
| "learning_rate": 9.800000000000001e-06, |
| "loss": 0.0412, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.03808790997403097, |
| "grad_norm": 2.107085704803467, |
| "learning_rate": 9.9e-06, |
| "loss": 0.0249, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.03847263633740502, |
| "grad_norm": 1.3796989917755127, |
| "learning_rate": 1e-05, |
| "loss": 0.022, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.03885736270077907, |
| "grad_norm": 1.5101048946380615, |
| "learning_rate": 9.99999985161259e-06, |
| "loss": 0.043, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.03924208906415312, |
| "grad_norm": 4.264603614807129, |
| "learning_rate": 9.999999406450364e-06, |
| "loss": 0.0591, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.03962681542752717, |
| "grad_norm": 3.4575819969177246, |
| "learning_rate": 9.999998664513351e-06, |
| "loss": 0.0313, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.040011541790901224, |
| "grad_norm": 1.9395074844360352, |
| "learning_rate": 9.999997625801593e-06, |
| "loss": 0.0141, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.04039626815427527, |
| "grad_norm": 2.3632755279541016, |
| "learning_rate": 9.999996290315154e-06, |
| "loss": 0.0163, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.04078099451764932, |
| "grad_norm": 3.7122342586517334, |
| "learning_rate": 9.999994658054113e-06, |
| "loss": 0.0144, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.041165720881023375, |
| "grad_norm": 8.778124809265137, |
| "learning_rate": 9.999992729018565e-06, |
| "loss": 0.0989, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.04155044724439742, |
| "grad_norm": 3.6246390342712402, |
| "learning_rate": 9.999990503208625e-06, |
| "loss": 0.053, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.04193517360777147, |
| "grad_norm": 2.6841275691986084, |
| "learning_rate": 9.999987980624426e-06, |
| "loss": 0.0221, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.042319899971145526, |
| "grad_norm": 1.4657002687454224, |
| "learning_rate": 9.999985161266116e-06, |
| "loss": 0.0178, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.042704626334519574, |
| "grad_norm": 4.295589447021484, |
| "learning_rate": 9.999982045133868e-06, |
| "loss": 0.0394, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.04308935269789362, |
| "grad_norm": 5.708546161651611, |
| "learning_rate": 9.999978632227859e-06, |
| "loss": 0.0308, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.04347407906126767, |
| "grad_norm": 1.5994555950164795, |
| "learning_rate": 9.999974922548297e-06, |
| "loss": 0.0147, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.043858805424641725, |
| "grad_norm": 5.406169414520264, |
| "learning_rate": 9.9999709160954e-06, |
| "loss": 0.0643, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.04424353178801577, |
| "grad_norm": 1.2986438274383545, |
| "learning_rate": 9.999966612869404e-06, |
| "loss": 0.0129, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.04462825815138982, |
| "grad_norm": 3.2887203693389893, |
| "learning_rate": 9.999962012870571e-06, |
| "loss": 0.0334, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.045012984514763876, |
| "grad_norm": 4.520976543426514, |
| "learning_rate": 9.999957116099169e-06, |
| "loss": 0.0259, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.045397710878137924, |
| "grad_norm": 2.6903226375579834, |
| "learning_rate": 9.999951922555486e-06, |
| "loss": 0.0118, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.04578243724151197, |
| "grad_norm": 9.5642671585083, |
| "learning_rate": 9.999946432239835e-06, |
| "loss": 0.0835, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.04616716360488603, |
| "grad_norm": 3.200307607650757, |
| "learning_rate": 9.999940645152541e-06, |
| "loss": 0.0305, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.046551889968260075, |
| "grad_norm": 2.1902754306793213, |
| "learning_rate": 9.999934561293948e-06, |
| "loss": 0.0264, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.04693661633163412, |
| "grad_norm": 2.826099395751953, |
| "learning_rate": 9.999928180664415e-06, |
| "loss": 0.0257, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.04732134269500818, |
| "grad_norm": 1.8656409978866577, |
| "learning_rate": 9.999921503264322e-06, |
| "loss": 0.0223, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.047706069058382226, |
| "grad_norm": 0.8881422281265259, |
| "learning_rate": 9.999914529094066e-06, |
| "loss": 0.0207, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.048090795421756274, |
| "grad_norm": 4.812993049621582, |
| "learning_rate": 9.99990725815406e-06, |
| "loss": 0.0141, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.04847552178513033, |
| "grad_norm": 1.3018031120300293, |
| "learning_rate": 9.999899690444736e-06, |
| "loss": 0.0295, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.04886024814850438, |
| "grad_norm": 4.202988624572754, |
| "learning_rate": 9.999891825966541e-06, |
| "loss": 0.0377, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.049244974511878425, |
| "grad_norm": 12.903936386108398, |
| "learning_rate": 9.999883664719945e-06, |
| "loss": 0.0459, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.049629700875252473, |
| "grad_norm": 1.8323734998703003, |
| "learning_rate": 9.999875206705432e-06, |
| "loss": 0.0308, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.05001442723862653, |
| "grad_norm": 2.0135998725891113, |
| "learning_rate": 9.999866451923502e-06, |
| "loss": 0.0382, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.050399153602000576, |
| "grad_norm": 3.817152261734009, |
| "learning_rate": 9.999857400374676e-06, |
| "loss": 0.0285, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.050783879965374625, |
| "grad_norm": 4.566162586212158, |
| "learning_rate": 9.999848052059489e-06, |
| "loss": 0.0352, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.05116860632874868, |
| "grad_norm": 8.352324485778809, |
| "learning_rate": 9.999838406978499e-06, |
| "loss": 0.0515, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.05155333269212273, |
| "grad_norm": 3.5633044242858887, |
| "learning_rate": 9.999828465132278e-06, |
| "loss": 0.0281, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.051938059055496776, |
| "grad_norm": 2.960449457168579, |
| "learning_rate": 9.999818226521416e-06, |
| "loss": 0.0448, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.05232278541887083, |
| "grad_norm": 2.096433639526367, |
| "learning_rate": 9.99980769114652e-06, |
| "loss": 0.0312, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.05270751178224488, |
| "grad_norm": 1.4195860624313354, |
| "learning_rate": 9.999796859008215e-06, |
| "loss": 0.0193, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.05309223814561893, |
| "grad_norm": 1.6590723991394043, |
| "learning_rate": 9.999785730107145e-06, |
| "loss": 0.0331, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.05347696450899298, |
| "grad_norm": 1.200443148612976, |
| "learning_rate": 9.99977430444397e-06, |
| "loss": 0.0276, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.05386169087236703, |
| "grad_norm": 1.6274645328521729, |
| "learning_rate": 9.999762582019365e-06, |
| "loss": 0.0205, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.05424641723574108, |
| "grad_norm": 1.1515076160430908, |
| "learning_rate": 9.999750562834032e-06, |
| "loss": 0.0199, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.05463114359911513, |
| "grad_norm": 2.4453465938568115, |
| "learning_rate": 9.999738246888682e-06, |
| "loss": 0.0303, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.05501586996248918, |
| "grad_norm": 2.5557849407196045, |
| "learning_rate": 9.999725634184044e-06, |
| "loss": 0.026, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.05540059632586323, |
| "grad_norm": 1.422838568687439, |
| "learning_rate": 9.999712724720868e-06, |
| "loss": 0.0153, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.05578532268923728, |
| "grad_norm": 1.7006760835647583, |
| "learning_rate": 9.999699518499922e-06, |
| "loss": 0.0423, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.05617004905261133, |
| "grad_norm": 2.2471675872802734, |
| "learning_rate": 9.999686015521986e-06, |
| "loss": 0.0134, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.05655477541598538, |
| "grad_norm": 4.883077621459961, |
| "learning_rate": 9.999672215787864e-06, |
| "loss": 0.0356, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.05693950177935943, |
| "grad_norm": 3.122020721435547, |
| "learning_rate": 9.999658119298374e-06, |
| "loss": 0.0452, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.05732422814273348, |
| "grad_norm": 2.787843942642212, |
| "learning_rate": 9.999643726054354e-06, |
| "loss": 0.066, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.05770895450610753, |
| "grad_norm": 2.476574659347534, |
| "learning_rate": 9.999629036056657e-06, |
| "loss": 0.0231, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.05809368086948158, |
| "grad_norm": 1.8053635358810425, |
| "learning_rate": 9.999614049306157e-06, |
| "loss": 0.021, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.058478407232855634, |
| "grad_norm": 4.4272894859313965, |
| "learning_rate": 9.999598765803742e-06, |
| "loss": 0.0529, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.05886313359622968, |
| "grad_norm": 5.861669063568115, |
| "learning_rate": 9.999583185550318e-06, |
| "loss": 0.0415, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.05924785995960373, |
| "grad_norm": 1.1911544799804688, |
| "learning_rate": 9.999567308546811e-06, |
| "loss": 0.0213, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.059632586322977785, |
| "grad_norm": 1.1022003889083862, |
| "learning_rate": 9.999551134794164e-06, |
| "loss": 0.0175, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.06001731268635183, |
| "grad_norm": 3.5450422763824463, |
| "learning_rate": 9.999534664293337e-06, |
| "loss": 0.0349, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.06040203904972588, |
| "grad_norm": 2.6744449138641357, |
| "learning_rate": 9.999517897045306e-06, |
| "loss": 0.0128, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.060786765413099936, |
| "grad_norm": 4.565217018127441, |
| "learning_rate": 9.999500833051067e-06, |
| "loss": 0.049, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.061171491776473984, |
| "grad_norm": 1.150743007659912, |
| "learning_rate": 9.999483472311636e-06, |
| "loss": 0.0187, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.06155621813984803, |
| "grad_norm": 2.818730115890503, |
| "learning_rate": 9.999465814828037e-06, |
| "loss": 0.0223, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.06194094450322208, |
| "grad_norm": 2.2844345569610596, |
| "learning_rate": 9.999447860601322e-06, |
| "loss": 0.0211, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.062325670866596135, |
| "grad_norm": 1.219356656074524, |
| "learning_rate": 9.999429609632557e-06, |
| "loss": 0.0223, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.06271039722997018, |
| "grad_norm": 2.913196086883545, |
| "learning_rate": 9.999411061922824e-06, |
| "loss": 0.0179, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.06309512359334424, |
| "grad_norm": 1.3902113437652588, |
| "learning_rate": 9.999392217473225e-06, |
| "loss": 0.027, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.06347984995671828, |
| "grad_norm": 2.247837781906128, |
| "learning_rate": 9.999373076284877e-06, |
| "loss": 0.0154, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.06386457632009233, |
| "grad_norm": 2.8625357151031494, |
| "learning_rate": 9.99935363835892e-06, |
| "loss": 0.021, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.06424930268346639, |
| "grad_norm": 4.006026744842529, |
| "learning_rate": 9.999333903696502e-06, |
| "loss": 0.029, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.06463402904684043, |
| "grad_norm": 2.9252421855926514, |
| "learning_rate": 9.999313872298796e-06, |
| "loss": 0.022, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.06501875541021448, |
| "grad_norm": 6.485391616821289, |
| "learning_rate": 9.999293544166995e-06, |
| "loss": 0.08, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.06540348177358854, |
| "grad_norm": 4.041164398193359, |
| "learning_rate": 9.9992729193023e-06, |
| "loss": 0.0433, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.06578820813696258, |
| "grad_norm": 3.4366846084594727, |
| "learning_rate": 9.999251997705941e-06, |
| "loss": 0.0458, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.06617293450033664, |
| "grad_norm": 0.8214027285575867, |
| "learning_rate": 9.999230779379155e-06, |
| "loss": 0.0186, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.06655766086371069, |
| "grad_norm": 6.1616597175598145, |
| "learning_rate": 9.999209264323201e-06, |
| "loss": 0.0627, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.06694238722708473, |
| "grad_norm": 10.068628311157227, |
| "learning_rate": 9.999187452539361e-06, |
| "loss": 0.0602, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.06732711359045879, |
| "grad_norm": 5.898168087005615, |
| "learning_rate": 9.999165344028927e-06, |
| "loss": 0.0418, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.06771183995383284, |
| "grad_norm": 2.946211099624634, |
| "learning_rate": 9.99914293879321e-06, |
| "loss": 0.0157, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.06809656631720688, |
| "grad_norm": 3.2756588459014893, |
| "learning_rate": 9.99912023683354e-06, |
| "loss": 0.0306, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.06848129268058094, |
| "grad_norm": 23.782278060913086, |
| "learning_rate": 9.999097238151266e-06, |
| "loss": 0.0323, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.06886601904395499, |
| "grad_norm": 3.2240705490112305, |
| "learning_rate": 9.999073942747752e-06, |
| "loss": 0.0457, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.06925074540732903, |
| "grad_norm": 1.9313907623291016, |
| "learning_rate": 9.999050350624381e-06, |
| "loss": 0.0086, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.06963547177070309, |
| "grad_norm": 1.5393195152282715, |
| "learning_rate": 9.999026461782556e-06, |
| "loss": 0.01, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.07002019813407714, |
| "grad_norm": 0.6853839159011841, |
| "learning_rate": 9.999002276223688e-06, |
| "loss": 0.0111, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.07040492449745119, |
| "grad_norm": 2.393374443054199, |
| "learning_rate": 9.99897779394922e-06, |
| "loss": 0.0218, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.07078965086082524, |
| "grad_norm": 2.253481388092041, |
| "learning_rate": 9.998953014960603e-06, |
| "loss": 0.0197, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.0711743772241993, |
| "grad_norm": 1.3223674297332764, |
| "learning_rate": 9.998927939259303e-06, |
| "loss": 0.009, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.07155910358757334, |
| "grad_norm": 1.9413704872131348, |
| "learning_rate": 9.998902566846814e-06, |
| "loss": 0.0098, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.07194382995094739, |
| "grad_norm": 1.4487104415893555, |
| "learning_rate": 9.998876897724641e-06, |
| "loss": 0.0191, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.07232855631432143, |
| "grad_norm": 1.8149521350860596, |
| "learning_rate": 9.998850931894305e-06, |
| "loss": 0.0348, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.07271328267769549, |
| "grad_norm": 2.054905891418457, |
| "learning_rate": 9.99882466935735e-06, |
| "loss": 0.0153, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.07309800904106954, |
| "grad_norm": 2.0988175868988037, |
| "learning_rate": 9.998798110115333e-06, |
| "loss": 0.0188, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.07348273540444358, |
| "grad_norm": 1.3500646352767944, |
| "learning_rate": 9.998771254169833e-06, |
| "loss": 0.0195, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.07386746176781764, |
| "grad_norm": 1.4596278667449951, |
| "learning_rate": 9.99874410152244e-06, |
| "loss": 0.0345, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.0742521881311917, |
| "grad_norm": 1.385651707649231, |
| "learning_rate": 9.99871665217477e-06, |
| "loss": 0.0181, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.07463691449456573, |
| "grad_norm": 1.3517183065414429, |
| "learning_rate": 9.998688906128446e-06, |
| "loss": 0.0255, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.07502164085793979, |
| "grad_norm": 1.5442254543304443, |
| "learning_rate": 9.998660863385124e-06, |
| "loss": 0.0249, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.07540636722131384, |
| "grad_norm": 0.9525883793830872, |
| "learning_rate": 9.99863252394646e-06, |
| "loss": 0.0181, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.07579109358468789, |
| "grad_norm": 2.393604040145874, |
| "learning_rate": 9.99860388781414e-06, |
| "loss": 0.0287, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.07617581994806194, |
| "grad_norm": 3.1305532455444336, |
| "learning_rate": 9.998574954989863e-06, |
| "loss": 0.0363, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.076560546311436, |
| "grad_norm": 1.5628687143325806, |
| "learning_rate": 9.998545725475348e-06, |
| "loss": 0.0471, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.07694527267481004, |
| "grad_norm": 1.5641419887542725, |
| "learning_rate": 9.998516199272327e-06, |
| "loss": 0.0232, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.07732999903818409, |
| "grad_norm": 1.4028891324996948, |
| "learning_rate": 9.998486376382555e-06, |
| "loss": 0.0352, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.07771472540155815, |
| "grad_norm": 1.6165571212768555, |
| "learning_rate": 9.9984562568078e-06, |
| "loss": 0.0195, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.07809945176493219, |
| "grad_norm": 2.3587136268615723, |
| "learning_rate": 9.998425840549853e-06, |
| "loss": 0.0203, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.07848417812830624, |
| "grad_norm": 3.3537991046905518, |
| "learning_rate": 9.998395127610515e-06, |
| "loss": 0.0552, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.0788689044916803, |
| "grad_norm": 1.2197805643081665, |
| "learning_rate": 9.998364117991612e-06, |
| "loss": 0.0345, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.07925363085505434, |
| "grad_norm": 1.2044868469238281, |
| "learning_rate": 9.998332811694985e-06, |
| "loss": 0.0186, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.0796383572184284, |
| "grad_norm": 0.8047626614570618, |
| "learning_rate": 9.998301208722488e-06, |
| "loss": 0.0245, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.08002308358180245, |
| "grad_norm": 1.0816692113876343, |
| "learning_rate": 9.998269309076001e-06, |
| "loss": 0.0279, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.08040780994517649, |
| "grad_norm": 1.4782438278198242, |
| "learning_rate": 9.998237112757417e-06, |
| "loss": 0.0126, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.08079253630855054, |
| "grad_norm": 2.366748332977295, |
| "learning_rate": 9.998204619768645e-06, |
| "loss": 0.0281, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.0811772626719246, |
| "grad_norm": 1.8751951456069946, |
| "learning_rate": 9.998171830111615e-06, |
| "loss": 0.0237, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.08156198903529864, |
| "grad_norm": 1.4843281507492065, |
| "learning_rate": 9.998138743788273e-06, |
| "loss": 0.0185, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.0819467153986727, |
| "grad_norm": 10.534754753112793, |
| "learning_rate": 9.998105360800583e-06, |
| "loss": 0.0545, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.08233144176204675, |
| "grad_norm": 3.7792885303497314, |
| "learning_rate": 9.998071681150525e-06, |
| "loss": 0.0325, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.08271616812542079, |
| "grad_norm": 3.8291914463043213, |
| "learning_rate": 9.998037704840103e-06, |
| "loss": 0.0472, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.08310089448879485, |
| "grad_norm": 1.368371605873108, |
| "learning_rate": 9.998003431871325e-06, |
| "loss": 0.0288, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.0834856208521689, |
| "grad_norm": 3.24894118309021, |
| "learning_rate": 9.997968862246234e-06, |
| "loss": 0.0326, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.08387034721554294, |
| "grad_norm": 1.941206455230713, |
| "learning_rate": 9.997933995966877e-06, |
| "loss": 0.0348, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.084255073578917, |
| "grad_norm": 2.0731537342071533, |
| "learning_rate": 9.997898833035324e-06, |
| "loss": 0.0249, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.08463979994229105, |
| "grad_norm": 0.5582459568977356, |
| "learning_rate": 9.997863373453664e-06, |
| "loss": 0.0206, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.0850245263056651, |
| "grad_norm": 1.5135104656219482, |
| "learning_rate": 9.997827617223998e-06, |
| "loss": 0.0246, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.08540925266903915, |
| "grad_norm": 0.9743750691413879, |
| "learning_rate": 9.997791564348454e-06, |
| "loss": 0.0235, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.08579397903241319, |
| "grad_norm": 1.0159460306167603, |
| "learning_rate": 9.997755214829166e-06, |
| "loss": 0.0146, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.08617870539578724, |
| "grad_norm": 1.4491690397262573, |
| "learning_rate": 9.997718568668295e-06, |
| "loss": 0.0237, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.0865634317591613, |
| "grad_norm": 0.8503080010414124, |
| "learning_rate": 9.997681625868014e-06, |
| "loss": 0.0269, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.08694815812253534, |
| "grad_norm": 1.2467992305755615, |
| "learning_rate": 9.99764438643052e-06, |
| "loss": 0.0102, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.0873328844859094, |
| "grad_norm": 2.121068239212036, |
| "learning_rate": 9.997606850358018e-06, |
| "loss": 0.0267, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.08771761084928345, |
| "grad_norm": 4.5842390060424805, |
| "learning_rate": 9.99756901765274e-06, |
| "loss": 0.0385, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.08810233721265749, |
| "grad_norm": 0.8507031798362732, |
| "learning_rate": 9.997530888316927e-06, |
| "loss": 0.0204, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.08848706357603155, |
| "grad_norm": 3.37808895111084, |
| "learning_rate": 9.997492462352846e-06, |
| "loss": 0.0232, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.0888717899394056, |
| "grad_norm": 1.019683837890625, |
| "learning_rate": 9.997453739762779e-06, |
| "loss": 0.0244, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.08925651630277964, |
| "grad_norm": 2.1636667251586914, |
| "learning_rate": 9.99741472054902e-06, |
| "loss": 0.029, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.0896412426661537, |
| "grad_norm": 1.8102247714996338, |
| "learning_rate": 9.997375404713889e-06, |
| "loss": 0.0151, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.09002596902952775, |
| "grad_norm": 1.4596302509307861, |
| "learning_rate": 9.997335792259717e-06, |
| "loss": 0.0386, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.0904106953929018, |
| "grad_norm": 4.365789413452148, |
| "learning_rate": 9.997295883188855e-06, |
| "loss": 0.0429, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.09079542175627585, |
| "grad_norm": 1.7287161350250244, |
| "learning_rate": 9.997255677503674e-06, |
| "loss": 0.0242, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.0911801481196499, |
| "grad_norm": 2.087010383605957, |
| "learning_rate": 9.997215175206559e-06, |
| "loss": 0.0241, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.09156487448302394, |
| "grad_norm": 5.224289417266846, |
| "learning_rate": 9.997174376299915e-06, |
| "loss": 0.0354, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.091949600846398, |
| "grad_norm": 3.8172953128814697, |
| "learning_rate": 9.997133280786162e-06, |
| "loss": 0.0394, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.09233432720977205, |
| "grad_norm": 4.284353256225586, |
| "learning_rate": 9.997091888667739e-06, |
| "loss": 0.061, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.0927190535731461, |
| "grad_norm": 1.4266258478164673, |
| "learning_rate": 9.997050199947105e-06, |
| "loss": 0.0234, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.09310377993652015, |
| "grad_norm": 0.6778956651687622, |
| "learning_rate": 9.997008214626732e-06, |
| "loss": 0.0194, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.0934885062998942, |
| "grad_norm": 2.573453426361084, |
| "learning_rate": 9.996965932709115e-06, |
| "loss": 0.0279, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.09387323266326825, |
| "grad_norm": 3.498032808303833, |
| "learning_rate": 9.996923354196761e-06, |
| "loss": 0.0397, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.0942579590266423, |
| "grad_norm": 2.4384148120880127, |
| "learning_rate": 9.996880479092199e-06, |
| "loss": 0.0168, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.09464268539001636, |
| "grad_norm": 0.9454012513160706, |
| "learning_rate": 9.996837307397972e-06, |
| "loss": 0.037, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.0950274117533904, |
| "grad_norm": 2.0980632305145264, |
| "learning_rate": 9.996793839116643e-06, |
| "loss": 0.0332, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.09541213811676445, |
| "grad_norm": 0.8643459677696228, |
| "learning_rate": 9.996750074250793e-06, |
| "loss": 0.0176, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.09579686448013851, |
| "grad_norm": 2.557846784591675, |
| "learning_rate": 9.996706012803022e-06, |
| "loss": 0.0226, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.09618159084351255, |
| "grad_norm": 1.4250762462615967, |
| "learning_rate": 9.996661654775938e-06, |
| "loss": 0.0141, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.0965663172068866, |
| "grad_norm": 3.252248764038086, |
| "learning_rate": 9.996617000172181e-06, |
| "loss": 0.0514, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.09695104357026066, |
| "grad_norm": 1.6876319646835327, |
| "learning_rate": 9.9965720489944e-06, |
| "loss": 0.0391, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.0973357699336347, |
| "grad_norm": 0.6499518156051636, |
| "learning_rate": 9.99652680124526e-06, |
| "loss": 0.0118, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.09772049629700875, |
| "grad_norm": 2.352055311203003, |
| "learning_rate": 9.996481256927449e-06, |
| "loss": 0.0369, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.0981052226603828, |
| "grad_norm": 3.2287955284118652, |
| "learning_rate": 9.99643541604367e-06, |
| "loss": 0.0368, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.09848994902375685, |
| "grad_norm": 4.091669082641602, |
| "learning_rate": 9.996389278596642e-06, |
| "loss": 0.0501, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.0988746753871309, |
| "grad_norm": 2.6284351348876953, |
| "learning_rate": 9.99634284458911e-06, |
| "loss": 0.0431, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.09925940175050495, |
| "grad_norm": 3.2887837886810303, |
| "learning_rate": 9.99629611402382e-06, |
| "loss": 0.037, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.099644128113879, |
| "grad_norm": 0.5075850486755371, |
| "learning_rate": 9.996249086903553e-06, |
| "loss": 0.0136, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.10002885447725306, |
| "grad_norm": 1.5473183393478394, |
| "learning_rate": 9.9962017632311e-06, |
| "loss": 0.0178, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.1004135808406271, |
| "grad_norm": 1.7333952188491821, |
| "learning_rate": 9.996154143009267e-06, |
| "loss": 0.0237, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.10079830720400115, |
| "grad_norm": 1.8454315662384033, |
| "learning_rate": 9.996106226240881e-06, |
| "loss": 0.0105, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.10118303356737521, |
| "grad_norm": 1.2308003902435303, |
| "learning_rate": 9.996058012928786e-06, |
| "loss": 0.0149, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.10156775993074925, |
| "grad_norm": 0.9899033308029175, |
| "learning_rate": 9.996009503075848e-06, |
| "loss": 0.0094, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.1019524862941233, |
| "grad_norm": 1.0548290014266968, |
| "learning_rate": 9.995960696684939e-06, |
| "loss": 0.0094, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.10233721265749736, |
| "grad_norm": 0.6202391982078552, |
| "learning_rate": 9.995911593758963e-06, |
| "loss": 0.0063, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.1027219390208714, |
| "grad_norm": 1.7102643251419067, |
| "learning_rate": 9.99586219430083e-06, |
| "loss": 0.021, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.10310666538424546, |
| "grad_norm": 0.6108173727989197, |
| "learning_rate": 9.995812498313472e-06, |
| "loss": 0.0033, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.10349139174761951, |
| "grad_norm": 3.20573353767395, |
| "learning_rate": 9.99576250579984e-06, |
| "loss": 0.0275, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.10387611811099355, |
| "grad_norm": 2.09236741065979, |
| "learning_rate": 9.995712216762903e-06, |
| "loss": 0.0119, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.1042608444743676, |
| "grad_norm": 2.949190855026245, |
| "learning_rate": 9.995661631205644e-06, |
| "loss": 0.0274, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.10464557083774166, |
| "grad_norm": 10.193642616271973, |
| "learning_rate": 9.995610749131064e-06, |
| "loss": 0.0464, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.1050302972011157, |
| "grad_norm": 3.7807576656341553, |
| "learning_rate": 9.995559570542187e-06, |
| "loss": 0.0677, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.10541502356448976, |
| "grad_norm": 4.352870464324951, |
| "learning_rate": 9.995508095442048e-06, |
| "loss": 0.0508, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.10579974992786381, |
| "grad_norm": 5.404750347137451, |
| "learning_rate": 9.995456323833702e-06, |
| "loss": 0.0462, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.10618447629123785, |
| "grad_norm": 2.6962974071502686, |
| "learning_rate": 9.995404255720223e-06, |
| "loss": 0.0266, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.10656920265461191, |
| "grad_norm": 2.8063340187072754, |
| "learning_rate": 9.9953518911047e-06, |
| "loss": 0.0649, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.10695392901798596, |
| "grad_norm": 1.472765326499939, |
| "learning_rate": 9.995299229990245e-06, |
| "loss": 0.0266, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.10733865538136, |
| "grad_norm": 1.273878574371338, |
| "learning_rate": 9.99524627237998e-06, |
| "loss": 0.0116, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.10772338174473406, |
| "grad_norm": 0.9871545433998108, |
| "learning_rate": 9.99519301827705e-06, |
| "loss": 0.0165, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.10810810810810811, |
| "grad_norm": 1.1046977043151855, |
| "learning_rate": 9.995139467684614e-06, |
| "loss": 0.0115, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.10849283447148216, |
| "grad_norm": 2.761289358139038, |
| "learning_rate": 9.99508562060585e-06, |
| "loss": 0.0231, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.10887756083485621, |
| "grad_norm": 1.1875776052474976, |
| "learning_rate": 9.99503147704396e-06, |
| "loss": 0.0133, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.10926228719823027, |
| "grad_norm": 0.36607322096824646, |
| "learning_rate": 9.994977037002152e-06, |
| "loss": 0.0082, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.1096470135616043, |
| "grad_norm": 0.9858017563819885, |
| "learning_rate": 9.994922300483657e-06, |
| "loss": 0.0457, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.11003173992497836, |
| "grad_norm": 1.2574812173843384, |
| "learning_rate": 9.994867267491729e-06, |
| "loss": 0.0175, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.11041646628835242, |
| "grad_norm": 1.4595177173614502, |
| "learning_rate": 9.994811938029627e-06, |
| "loss": 0.0153, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.11080119265172646, |
| "grad_norm": 1.0112245082855225, |
| "learning_rate": 9.994756312100642e-06, |
| "loss": 0.0132, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.11118591901510051, |
| "grad_norm": 1.3767755031585693, |
| "learning_rate": 9.994700389708071e-06, |
| "loss": 0.0141, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.11157064537847455, |
| "grad_norm": 0.3537527024745941, |
| "learning_rate": 9.994644170855237e-06, |
| "loss": 0.0079, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.11195537174184861, |
| "grad_norm": 1.6381282806396484, |
| "learning_rate": 9.994587655545476e-06, |
| "loss": 0.0206, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.11234009810522266, |
| "grad_norm": 3.2190628051757812, |
| "learning_rate": 9.99453084378214e-06, |
| "loss": 0.0301, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.1127248244685967, |
| "grad_norm": 4.9315032958984375, |
| "learning_rate": 9.994473735568602e-06, |
| "loss": 0.0098, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.11310955083197076, |
| "grad_norm": 0.7560256719589233, |
| "learning_rate": 9.994416330908252e-06, |
| "loss": 0.0138, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.11349427719534481, |
| "grad_norm": 1.0898628234863281, |
| "learning_rate": 9.9943586298045e-06, |
| "loss": 0.0184, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.11387900355871886, |
| "grad_norm": 2.7064716815948486, |
| "learning_rate": 9.994300632260766e-06, |
| "loss": 0.0218, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.11426372992209291, |
| "grad_norm": 1.0446937084197998, |
| "learning_rate": 9.994242338280495e-06, |
| "loss": 0.0108, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.11464845628546697, |
| "grad_norm": 3.4280526638031006, |
| "learning_rate": 9.994183747867148e-06, |
| "loss": 0.0355, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.115033182648841, |
| "grad_norm": 1.0543782711029053, |
| "learning_rate": 9.994124861024199e-06, |
| "loss": 0.024, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.11541790901221506, |
| "grad_norm": 1.5086958408355713, |
| "learning_rate": 9.994065677755148e-06, |
| "loss": 0.0214, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.11580263537558912, |
| "grad_norm": 3.212959051132202, |
| "learning_rate": 9.994006198063506e-06, |
| "loss": 0.0285, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.11618736173896316, |
| "grad_norm": 0.6459766030311584, |
| "learning_rate": 9.9939464219528e-06, |
| "loss": 0.004, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.11657208810233721, |
| "grad_norm": 2.0472171306610107, |
| "learning_rate": 9.993886349426584e-06, |
| "loss": 0.0217, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.11695681446571127, |
| "grad_norm": 0.8928171992301941, |
| "learning_rate": 9.993825980488418e-06, |
| "loss": 0.006, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.11734154082908531, |
| "grad_norm": 1.5749402046203613, |
| "learning_rate": 9.99376531514189e-06, |
| "loss": 0.0186, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.11772626719245936, |
| "grad_norm": 1.007002592086792, |
| "learning_rate": 9.993704353390597e-06, |
| "loss": 0.0519, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.11811099355583342, |
| "grad_norm": 2.073533535003662, |
| "learning_rate": 9.99364309523816e-06, |
| "loss": 0.0258, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.11849571991920746, |
| "grad_norm": 1.1913518905639648, |
| "learning_rate": 9.993581540688213e-06, |
| "loss": 0.0203, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.11888044628258151, |
| "grad_norm": 0.6313387751579285, |
| "learning_rate": 9.993519689744411e-06, |
| "loss": 0.0061, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.11926517264595557, |
| "grad_norm": 0.5906733870506287, |
| "learning_rate": 9.993457542410424e-06, |
| "loss": 0.0129, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.11964989900932961, |
| "grad_norm": 0.5875241756439209, |
| "learning_rate": 9.993395098689943e-06, |
| "loss": 0.0147, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.12003462537270367, |
| "grad_norm": 0.4100046753883362, |
| "learning_rate": 9.993332358586669e-06, |
| "loss": 0.0095, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.12041935173607772, |
| "grad_norm": 2.3552939891815186, |
| "learning_rate": 9.993269322104332e-06, |
| "loss": 0.0563, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.12080407809945176, |
| "grad_norm": 2.6373159885406494, |
| "learning_rate": 9.993205989246672e-06, |
| "loss": 0.0474, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.12118880446282582, |
| "grad_norm": 2.1648263931274414, |
| "learning_rate": 9.993142360017447e-06, |
| "loss": 0.0182, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.12157353082619987, |
| "grad_norm": 2.0293798446655273, |
| "learning_rate": 9.993078434420433e-06, |
| "loss": 0.0161, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.12195825718957391, |
| "grad_norm": 2.0390355587005615, |
| "learning_rate": 9.993014212459425e-06, |
| "loss": 0.0276, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.12234298355294797, |
| "grad_norm": 1.5906345844268799, |
| "learning_rate": 9.992949694138236e-06, |
| "loss": 0.0221, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.12272770991632202, |
| "grad_norm": 1.5202964544296265, |
| "learning_rate": 9.992884879460694e-06, |
| "loss": 0.0252, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.12311243627969606, |
| "grad_norm": 1.605220079421997, |
| "learning_rate": 9.992819768430648e-06, |
| "loss": 0.02, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.12349716264307012, |
| "grad_norm": 1.02753746509552, |
| "learning_rate": 9.992754361051959e-06, |
| "loss": 0.0176, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.12388188900644416, |
| "grad_norm": 1.7800108194351196, |
| "learning_rate": 9.992688657328515e-06, |
| "loss": 0.019, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.12426661536981821, |
| "grad_norm": 5.6064839363098145, |
| "learning_rate": 9.992622657264211e-06, |
| "loss": 0.1288, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.12465134173319227, |
| "grad_norm": 4.702009201049805, |
| "learning_rate": 9.992556360862966e-06, |
| "loss": 0.0451, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.1250360680965663, |
| "grad_norm": 7.251805782318115, |
| "learning_rate": 9.992489768128714e-06, |
| "loss": 0.0272, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.12542079445994037, |
| "grad_norm": 2.643155336380005, |
| "learning_rate": 9.992422879065409e-06, |
| "loss": 0.025, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.12580552082331442, |
| "grad_norm": 0.8993635773658752, |
| "learning_rate": 9.99235569367702e-06, |
| "loss": 0.0092, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.12619024718668848, |
| "grad_norm": 4.789489269256592, |
| "learning_rate": 9.992288211967537e-06, |
| "loss": 0.0462, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.12657497355006253, |
| "grad_norm": 0.475421667098999, |
| "learning_rate": 9.992220433940963e-06, |
| "loss": 0.0037, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.12695969991343656, |
| "grad_norm": 2.3975934982299805, |
| "learning_rate": 9.992152359601323e-06, |
| "loss": 0.0332, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.1273444262768106, |
| "grad_norm": 1.1064420938491821, |
| "learning_rate": 9.992083988952654e-06, |
| "loss": 0.0217, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.12772915264018467, |
| "grad_norm": 1.686231255531311, |
| "learning_rate": 9.99201532199902e-06, |
| "loss": 0.0159, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.12811387900355872, |
| "grad_norm": 0.7885820269584656, |
| "learning_rate": 9.99194635874449e-06, |
| "loss": 0.0065, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.12849860536693278, |
| "grad_norm": 2.0389482975006104, |
| "learning_rate": 9.991877099193164e-06, |
| "loss": 0.0182, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.12888333173030683, |
| "grad_norm": 1.462136149406433, |
| "learning_rate": 9.991807543349148e-06, |
| "loss": 0.0271, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.12926805809368086, |
| "grad_norm": 1.666369080543518, |
| "learning_rate": 9.99173769121657e-06, |
| "loss": 0.0145, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.12965278445705491, |
| "grad_norm": 1.0154768228530884, |
| "learning_rate": 9.99166754279958e-06, |
| "loss": 0.0084, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.13003751082042897, |
| "grad_norm": 6.3970112800598145, |
| "learning_rate": 9.991597098102339e-06, |
| "loss": 0.0658, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.13042223718380302, |
| "grad_norm": 2.765911817550659, |
| "learning_rate": 9.991526357129028e-06, |
| "loss": 0.0249, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.13080696354717708, |
| "grad_norm": 3.0030531883239746, |
| "learning_rate": 9.991455319883849e-06, |
| "loss": 0.0266, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.1311916899105511, |
| "grad_norm": 4.774355888366699, |
| "learning_rate": 9.991383986371016e-06, |
| "loss": 0.0494, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.13157641627392516, |
| "grad_norm": 1.2228891849517822, |
| "learning_rate": 9.991312356594762e-06, |
| "loss": 0.0182, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.13196114263729922, |
| "grad_norm": 0.6857689023017883, |
| "learning_rate": 9.991240430559342e-06, |
| "loss": 0.0098, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.13234586900067327, |
| "grad_norm": 1.3903642892837524, |
| "learning_rate": 9.99116820826902e-06, |
| "loss": 0.0201, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.13273059536404733, |
| "grad_norm": 1.7887595891952515, |
| "learning_rate": 9.991095689728088e-06, |
| "loss": 0.0098, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.13311532172742138, |
| "grad_norm": 3.239908456802368, |
| "learning_rate": 9.991022874940845e-06, |
| "loss": 0.0596, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.1335000480907954, |
| "grad_norm": 2.807297706604004, |
| "learning_rate": 9.990949763911619e-06, |
| "loss": 0.0384, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.13388477445416946, |
| "grad_norm": 1.4440059661865234, |
| "learning_rate": 9.990876356644746e-06, |
| "loss": 0.0163, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.13426950081754352, |
| "grad_norm": 1.5381439924240112, |
| "learning_rate": 9.990802653144583e-06, |
| "loss": 0.0309, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.13465422718091757, |
| "grad_norm": 12.466649055480957, |
| "learning_rate": 9.990728653415504e-06, |
| "loss": 0.0578, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.13503895354429163, |
| "grad_norm": 1.6615933179855347, |
| "learning_rate": 9.990654357461903e-06, |
| "loss": 0.0384, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.13542367990766568, |
| "grad_norm": 0.5724029541015625, |
| "learning_rate": 9.990579765288191e-06, |
| "loss": 0.0046, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.1358084062710397, |
| "grad_norm": 0.7477350234985352, |
| "learning_rate": 9.990504876898792e-06, |
| "loss": 0.0122, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.13619313263441377, |
| "grad_norm": 1.5245766639709473, |
| "learning_rate": 9.990429692298154e-06, |
| "loss": 0.008, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.13657785899778782, |
| "grad_norm": 2.3614602088928223, |
| "learning_rate": 9.990354211490736e-06, |
| "loss": 0.0338, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.13696258536116188, |
| "grad_norm": 2.2919530868530273, |
| "learning_rate": 9.990278434481022e-06, |
| "loss": 0.027, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.13734731172453593, |
| "grad_norm": 0.5782844424247742, |
| "learning_rate": 9.99020236127351e-06, |
| "loss": 0.0089, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.13773203808790999, |
| "grad_norm": 2.5682413578033447, |
| "learning_rate": 9.99012599187271e-06, |
| "loss": 0.0232, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.138116764451284, |
| "grad_norm": 6.079646587371826, |
| "learning_rate": 9.99004932628316e-06, |
| "loss": 0.0329, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.13850149081465807, |
| "grad_norm": 4.070837497711182, |
| "learning_rate": 9.989972364509408e-06, |
| "loss": 0.0342, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.13888621717803212, |
| "grad_norm": 0.6112903952598572, |
| "learning_rate": 9.989895106556025e-06, |
| "loss": 0.0164, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.13927094354140618, |
| "grad_norm": 1.106340765953064, |
| "learning_rate": 9.989817552427594e-06, |
| "loss": 0.0145, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.13965566990478023, |
| "grad_norm": 0.9722648859024048, |
| "learning_rate": 9.989739702128717e-06, |
| "loss": 0.0139, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.1400403962681543, |
| "grad_norm": 0.8131417036056519, |
| "learning_rate": 9.989661555664019e-06, |
| "loss": 0.0083, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.14042512263152832, |
| "grad_norm": 1.2104814052581787, |
| "learning_rate": 9.989583113038134e-06, |
| "loss": 0.0229, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.14080984899490237, |
| "grad_norm": 0.8418599367141724, |
| "learning_rate": 9.98950437425572e-06, |
| "loss": 0.0164, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.14119457535827643, |
| "grad_norm": 2.4839096069335938, |
| "learning_rate": 9.989425339321453e-06, |
| "loss": 0.0298, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.14157930172165048, |
| "grad_norm": 2.681351661682129, |
| "learning_rate": 9.98934600824002e-06, |
| "loss": 0.0482, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.14196402808502454, |
| "grad_norm": 1.8847802877426147, |
| "learning_rate": 9.989266381016131e-06, |
| "loss": 0.0208, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.1423487544483986, |
| "grad_norm": 11.408592224121094, |
| "learning_rate": 9.989186457654515e-06, |
| "loss": 0.014, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.14273348081177262, |
| "grad_norm": 2.6127874851226807, |
| "learning_rate": 9.989106238159909e-06, |
| "loss": 0.0315, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.14311820717514667, |
| "grad_norm": 6.132943153381348, |
| "learning_rate": 9.989025722537082e-06, |
| "loss": 0.0233, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.14350293353852073, |
| "grad_norm": 0.842147946357727, |
| "learning_rate": 9.988944910790808e-06, |
| "loss": 0.0173, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.14388765990189478, |
| "grad_norm": 20.986244201660156, |
| "learning_rate": 9.988863802925887e-06, |
| "loss": 0.0222, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.14427238626526884, |
| "grad_norm": 1.4051076173782349, |
| "learning_rate": 9.988782398947132e-06, |
| "loss": 0.0286, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.14465711262864286, |
| "grad_norm": 1.1612248420715332, |
| "learning_rate": 9.988700698859373e-06, |
| "loss": 0.0246, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.14504183899201692, |
| "grad_norm": 0.6646268367767334, |
| "learning_rate": 9.988618702667461e-06, |
| "loss": 0.0132, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.14542656535539097, |
| "grad_norm": 1.8865305185317993, |
| "learning_rate": 9.988536410376261e-06, |
| "loss": 0.0254, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.14581129171876503, |
| "grad_norm": 0.6670300960540771, |
| "learning_rate": 9.988453821990663e-06, |
| "loss": 0.0094, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.14619601808213908, |
| "grad_norm": 0.9475501775741577, |
| "learning_rate": 9.988370937515562e-06, |
| "loss": 0.0103, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.14658074444551314, |
| "grad_norm": 21.623615264892578, |
| "learning_rate": 9.98828775695588e-06, |
| "loss": 0.0772, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.14696547080888717, |
| "grad_norm": 1.0714162588119507, |
| "learning_rate": 9.988204280316556e-06, |
| "loss": 0.0141, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.14735019717226122, |
| "grad_norm": 0.8990387320518494, |
| "learning_rate": 9.988120507602544e-06, |
| "loss": 0.0282, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.14773492353563528, |
| "grad_norm": 0.7467316389083862, |
| "learning_rate": 9.988036438818815e-06, |
| "loss": 0.0068, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.14811964989900933, |
| "grad_norm": 0.75644451379776, |
| "learning_rate": 9.98795207397036e-06, |
| "loss": 0.0104, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.1485043762623834, |
| "grad_norm": 1.473006248474121, |
| "learning_rate": 9.987867413062187e-06, |
| "loss": 0.0257, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.14888910262575744, |
| "grad_norm": 1.1377049684524536, |
| "learning_rate": 9.987782456099319e-06, |
| "loss": 0.0231, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.14927382898913147, |
| "grad_norm": 0.35376957058906555, |
| "learning_rate": 9.9876972030868e-06, |
| "loss": 0.011, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.14965855535250552, |
| "grad_norm": 1.170577883720398, |
| "learning_rate": 9.987611654029691e-06, |
| "loss": 0.0232, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.15004328171587958, |
| "grad_norm": 2.140615701675415, |
| "learning_rate": 9.987525808933069e-06, |
| "loss": 0.0421, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.15042800807925363, |
| "grad_norm": 1.2613564729690552, |
| "learning_rate": 9.987439667802028e-06, |
| "loss": 0.0138, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.1508127344426277, |
| "grad_norm": 1.3194247484207153, |
| "learning_rate": 9.987353230641683e-06, |
| "loss": 0.0127, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.15119746080600174, |
| "grad_norm": 1.2532031536102295, |
| "learning_rate": 9.987266497457161e-06, |
| "loss": 0.02, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.15158218716937577, |
| "grad_norm": 1.568895697593689, |
| "learning_rate": 9.987179468253616e-06, |
| "loss": 0.016, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.15196691353274983, |
| "grad_norm": 2.125100612640381, |
| "learning_rate": 9.98709214303621e-06, |
| "loss": 0.0197, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.15235163989612388, |
| "grad_norm": 2.59309983253479, |
| "learning_rate": 9.987004521810124e-06, |
| "loss": 0.0368, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.15273636625949794, |
| "grad_norm": 0.5584582090377808, |
| "learning_rate": 9.986916604580564e-06, |
| "loss": 0.0097, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.153121092622872, |
| "grad_norm": 1.2047398090362549, |
| "learning_rate": 9.986828391352743e-06, |
| "loss": 0.0182, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.15350581898624605, |
| "grad_norm": 1.0749913454055786, |
| "learning_rate": 9.986739882131901e-06, |
| "loss": 0.0175, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.15389054534962007, |
| "grad_norm": 0.9072372913360596, |
| "learning_rate": 9.986651076923288e-06, |
| "loss": 0.0158, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.15427527171299413, |
| "grad_norm": 0.31551575660705566, |
| "learning_rate": 9.986561975732179e-06, |
| "loss": 0.0048, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.15465999807636818, |
| "grad_norm": 0.3892665505409241, |
| "learning_rate": 9.986472578563859e-06, |
| "loss": 0.0035, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.15504472443974224, |
| "grad_norm": 1.0639790296554565, |
| "learning_rate": 9.986382885423637e-06, |
| "loss": 0.004, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.1554294508031163, |
| "grad_norm": 0.8102086186408997, |
| "learning_rate": 9.986292896316834e-06, |
| "loss": 0.0108, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.15581417716649035, |
| "grad_norm": 1.0145649909973145, |
| "learning_rate": 9.986202611248794e-06, |
| "loss": 0.0183, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.15619890352986437, |
| "grad_norm": 1.0815285444259644, |
| "learning_rate": 9.986112030224872e-06, |
| "loss": 0.0074, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.15658362989323843, |
| "grad_norm": 0.902445375919342, |
| "learning_rate": 9.986021153250449e-06, |
| "loss": 0.007, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.15696835625661248, |
| "grad_norm": 2.0999677181243896, |
| "learning_rate": 9.985929980330917e-06, |
| "loss": 0.0144, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.15735308261998654, |
| "grad_norm": 0.6674321293830872, |
| "learning_rate": 9.985838511471688e-06, |
| "loss": 0.011, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.1577378089833606, |
| "grad_norm": 2.6808223724365234, |
| "learning_rate": 9.98574674667819e-06, |
| "loss": 0.0108, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.15812253534673462, |
| "grad_norm": 0.9309936165809631, |
| "learning_rate": 9.98565468595587e-06, |
| "loss": 0.0034, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.15850726171010868, |
| "grad_norm": 3.7667415142059326, |
| "learning_rate": 9.985562329310192e-06, |
| "loss": 0.0218, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.15889198807348273, |
| "grad_norm": 1.3515875339508057, |
| "learning_rate": 9.98546967674664e-06, |
| "loss": 0.0261, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.1592767144368568, |
| "grad_norm": 1.2153210639953613, |
| "learning_rate": 9.98537672827071e-06, |
| "loss": 0.0276, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.15966144080023084, |
| "grad_norm": 2.072326183319092, |
| "learning_rate": 9.985283483887923e-06, |
| "loss": 0.0354, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.1600461671636049, |
| "grad_norm": 1.6364926099777222, |
| "learning_rate": 9.985189943603811e-06, |
| "loss": 0.0586, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.16043089352697892, |
| "grad_norm": 1.4485971927642822, |
| "learning_rate": 9.985096107423925e-06, |
| "loss": 0.0128, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.16081561989035298, |
| "grad_norm": 0.7800815105438232, |
| "learning_rate": 9.985001975353835e-06, |
| "loss": 0.0117, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.16120034625372703, |
| "grad_norm": 1.6513843536376953, |
| "learning_rate": 9.984907547399132e-06, |
| "loss": 0.0232, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.1615850726171011, |
| "grad_norm": 2.340419292449951, |
| "learning_rate": 9.984812823565417e-06, |
| "loss": 0.0282, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.16196979898047514, |
| "grad_norm": 0.8637623190879822, |
| "learning_rate": 9.984717803858312e-06, |
| "loss": 0.0224, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.1623545253438492, |
| "grad_norm": 1.3352808952331543, |
| "learning_rate": 9.98462248828346e-06, |
| "loss": 0.0161, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.16273925170722323, |
| "grad_norm": 1.0792741775512695, |
| "learning_rate": 9.984526876846517e-06, |
| "loss": 0.0353, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.16312397807059728, |
| "grad_norm": 1.0091253519058228, |
| "learning_rate": 9.984430969553155e-06, |
| "loss": 0.0114, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.16350870443397134, |
| "grad_norm": 1.8374234437942505, |
| "learning_rate": 9.984334766409072e-06, |
| "loss": 0.0186, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.1638934307973454, |
| "grad_norm": 0.6512305736541748, |
| "learning_rate": 9.984238267419974e-06, |
| "loss": 0.014, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.16427815716071945, |
| "grad_norm": 1.6066893339157104, |
| "learning_rate": 9.984141472591591e-06, |
| "loss": 0.0154, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.1646628835240935, |
| "grad_norm": 2.240696430206299, |
| "learning_rate": 9.984044381929667e-06, |
| "loss": 0.0127, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.16504760988746753, |
| "grad_norm": 1.4211338758468628, |
| "learning_rate": 9.983946995439964e-06, |
| "loss": 0.0148, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.16543233625084158, |
| "grad_norm": 0.8117442727088928, |
| "learning_rate": 9.983849313128265e-06, |
| "loss": 0.0161, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.16581706261421564, |
| "grad_norm": 1.8538448810577393, |
| "learning_rate": 9.983751335000365e-06, |
| "loss": 0.0168, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.1662017889775897, |
| "grad_norm": 0.8695465326309204, |
| "learning_rate": 9.983653061062084e-06, |
| "loss": 0.0075, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.16658651534096375, |
| "grad_norm": 1.9306254386901855, |
| "learning_rate": 9.983554491319248e-06, |
| "loss": 0.0211, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.1669712417043378, |
| "grad_norm": 0.9681547284126282, |
| "learning_rate": 9.983455625777713e-06, |
| "loss": 0.0219, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.16735596806771183, |
| "grad_norm": 1.8259018659591675, |
| "learning_rate": 9.983356464443349e-06, |
| "loss": 0.0388, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.16774069443108588, |
| "grad_norm": 0.9157015681266785, |
| "learning_rate": 9.983257007322033e-06, |
| "loss": 0.0082, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.16812542079445994, |
| "grad_norm": 2.4303884506225586, |
| "learning_rate": 9.983157254419678e-06, |
| "loss": 0.0507, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.168510147157834, |
| "grad_norm": 1.1336638927459717, |
| "learning_rate": 9.983057205742199e-06, |
| "loss": 0.012, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.16889487352120805, |
| "grad_norm": 1.2588491439819336, |
| "learning_rate": 9.982956861295536e-06, |
| "loss": 0.0236, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.1692795998845821, |
| "grad_norm": 0.6215165853500366, |
| "learning_rate": 9.982856221085644e-06, |
| "loss": 0.0135, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.16966432624795613, |
| "grad_norm": 0.6819311380386353, |
| "learning_rate": 9.982755285118499e-06, |
| "loss": 0.0089, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.1700490526113302, |
| "grad_norm": 0.6803324818611145, |
| "learning_rate": 9.982654053400089e-06, |
| "loss": 0.0073, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.17043377897470424, |
| "grad_norm": 1.0501588582992554, |
| "learning_rate": 9.982552525936425e-06, |
| "loss": 0.0118, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.1708185053380783, |
| "grad_norm": 0.6693577766418457, |
| "learning_rate": 9.982450702733532e-06, |
| "loss": 0.0085, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.17120323170145235, |
| "grad_norm": 1.0826107263565063, |
| "learning_rate": 9.982348583797454e-06, |
| "loss": 0.0165, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.17158795806482638, |
| "grad_norm": 0.35750290751457214, |
| "learning_rate": 9.982246169134251e-06, |
| "loss": 0.0037, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.17197268442820043, |
| "grad_norm": 0.6661262512207031, |
| "learning_rate": 9.982143458750005e-06, |
| "loss": 0.0073, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.1723574107915745, |
| "grad_norm": 0.6900920867919922, |
| "learning_rate": 9.98204045265081e-06, |
| "loss": 0.0157, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.17274213715494854, |
| "grad_norm": 2.380176067352295, |
| "learning_rate": 9.98193715084278e-06, |
| "loss": 0.0271, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.1731268635183226, |
| "grad_norm": 0.618948221206665, |
| "learning_rate": 9.981833553332045e-06, |
| "loss": 0.0089, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.17351158988169665, |
| "grad_norm": 0.5611693859100342, |
| "learning_rate": 9.981729660124759e-06, |
| "loss": 0.0062, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.17389631624507068, |
| "grad_norm": 1.119604229927063, |
| "learning_rate": 9.981625471227083e-06, |
| "loss": 0.0155, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.17428104260844474, |
| "grad_norm": 1.747314453125, |
| "learning_rate": 9.981520986645204e-06, |
| "loss": 0.0235, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.1746657689718188, |
| "grad_norm": 2.7685446739196777, |
| "learning_rate": 9.981416206385323e-06, |
| "loss": 0.0571, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.17505049533519285, |
| "grad_norm": 0.5638480186462402, |
| "learning_rate": 9.98131113045366e-06, |
| "loss": 0.0047, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.1754352216985669, |
| "grad_norm": 1.8126139640808105, |
| "learning_rate": 9.981205758856452e-06, |
| "loss": 0.0121, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.17581994806194096, |
| "grad_norm": 2.3808107376098633, |
| "learning_rate": 9.98110009159995e-06, |
| "loss": 0.0255, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.17620467442531498, |
| "grad_norm": 1.3327685594558716, |
| "learning_rate": 9.98099412869043e-06, |
| "loss": 0.0135, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.17658940078868904, |
| "grad_norm": 1.5651696920394897, |
| "learning_rate": 9.980887870134181e-06, |
| "loss": 0.0261, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.1769741271520631, |
| "grad_norm": 0.8582300543785095, |
| "learning_rate": 9.980781315937507e-06, |
| "loss": 0.0088, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.17735885351543715, |
| "grad_norm": 2.216838836669922, |
| "learning_rate": 9.980674466106735e-06, |
| "loss": 0.022, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.1777435798788112, |
| "grad_norm": 1.1542237997055054, |
| "learning_rate": 9.980567320648207e-06, |
| "loss": 0.0095, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.17812830624218526, |
| "grad_norm": 0.5303864479064941, |
| "learning_rate": 9.98045987956828e-06, |
| "loss": 0.0145, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.17851303260555929, |
| "grad_norm": 0.49043771624565125, |
| "learning_rate": 9.980352142873335e-06, |
| "loss": 0.0057, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.17889775896893334, |
| "grad_norm": 0.9760215878486633, |
| "learning_rate": 9.980244110569765e-06, |
| "loss": 0.0102, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.1792824853323074, |
| "grad_norm": 0.7870310544967651, |
| "learning_rate": 9.980135782663981e-06, |
| "loss": 0.0113, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.17966721169568145, |
| "grad_norm": 0.9848241806030273, |
| "learning_rate": 9.980027159162415e-06, |
| "loss": 0.0224, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.1800519380590555, |
| "grad_norm": 0.3691968321800232, |
| "learning_rate": 9.979918240071512e-06, |
| "loss": 0.0029, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.18043666442242956, |
| "grad_norm": 1.2080693244934082, |
| "learning_rate": 9.97980902539774e-06, |
| "loss": 0.0133, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.1808213907858036, |
| "grad_norm": 1.9378516674041748, |
| "learning_rate": 9.979699515147579e-06, |
| "loss": 0.0285, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.18120611714917764, |
| "grad_norm": 0.5868281126022339, |
| "learning_rate": 9.979589709327528e-06, |
| "loss": 0.0104, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.1815908435125517, |
| "grad_norm": 1.4102672338485718, |
| "learning_rate": 9.979479607944107e-06, |
| "loss": 0.0129, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.18197556987592575, |
| "grad_norm": 1.0837740898132324, |
| "learning_rate": 9.97936921100385e-06, |
| "loss": 0.0078, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.1823602962392998, |
| "grad_norm": 0.7247804403305054, |
| "learning_rate": 9.97925851851331e-06, |
| "loss": 0.0138, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.18274502260267383, |
| "grad_norm": 1.0309706926345825, |
| "learning_rate": 9.979147530479057e-06, |
| "loss": 0.0214, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.1831297489660479, |
| "grad_norm": 1.6426202058792114, |
| "learning_rate": 9.979036246907679e-06, |
| "loss": 0.0194, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.18351447532942194, |
| "grad_norm": 1.030686855316162, |
| "learning_rate": 9.97892466780578e-06, |
| "loss": 0.0083, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.183899201692796, |
| "grad_norm": 1.575244665145874, |
| "learning_rate": 9.978812793179985e-06, |
| "loss": 0.0125, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.18428392805617005, |
| "grad_norm": 0.7389955520629883, |
| "learning_rate": 9.97870062303693e-06, |
| "loss": 0.0123, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.1846686544195441, |
| "grad_norm": 1.0610774755477905, |
| "learning_rate": 9.978588157383277e-06, |
| "loss": 0.0101, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.18505338078291814, |
| "grad_norm": 0.9599776268005371, |
| "learning_rate": 9.978475396225702e-06, |
| "loss": 0.0161, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.1854381071462922, |
| "grad_norm": 1.2432290315628052, |
| "learning_rate": 9.978362339570896e-06, |
| "loss": 0.0251, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.18582283350966625, |
| "grad_norm": 1.463511347770691, |
| "learning_rate": 9.978248987425567e-06, |
| "loss": 0.0152, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.1862075598730403, |
| "grad_norm": 1.1043009757995605, |
| "learning_rate": 9.978135339796448e-06, |
| "loss": 0.0188, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.18659228623641436, |
| "grad_norm": 0.21234546601772308, |
| "learning_rate": 9.97802139669028e-06, |
| "loss": 0.0031, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.1869770125997884, |
| "grad_norm": 1.1203808784484863, |
| "learning_rate": 9.977907158113832e-06, |
| "loss": 0.0165, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.18736173896316244, |
| "grad_norm": 3.042773962020874, |
| "learning_rate": 9.977792624073876e-06, |
| "loss": 0.03, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.1877464653265365, |
| "grad_norm": 1.725350260734558, |
| "learning_rate": 9.977677794577218e-06, |
| "loss": 0.023, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.18813119168991055, |
| "grad_norm": 1.014220118522644, |
| "learning_rate": 9.977562669630669e-06, |
| "loss": 0.0071, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.1885159180532846, |
| "grad_norm": 1.6001348495483398, |
| "learning_rate": 9.977447249241065e-06, |
| "loss": 0.0195, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.18890064441665866, |
| "grad_norm": 0.877295732498169, |
| "learning_rate": 9.977331533415256e-06, |
| "loss": 0.0085, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.1892853707800327, |
| "grad_norm": 0.8194282650947571, |
| "learning_rate": 9.97721552216011e-06, |
| "loss": 0.0067, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.18967009714340674, |
| "grad_norm": 3.308549642562866, |
| "learning_rate": 9.977099215482512e-06, |
| "loss": 0.0431, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.1900548235067808, |
| "grad_norm": 0.5826371908187866, |
| "learning_rate": 9.976982613389368e-06, |
| "loss": 0.0135, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.19043954987015485, |
| "grad_norm": 1.85649836063385, |
| "learning_rate": 9.976865715887595e-06, |
| "loss": 0.0217, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.1908242762335289, |
| "grad_norm": 1.886124849319458, |
| "learning_rate": 9.976748522984137e-06, |
| "loss": 0.0193, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.19120900259690296, |
| "grad_norm": 1.2473396062850952, |
| "learning_rate": 9.976631034685943e-06, |
| "loss": 0.0107, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.19159372896027702, |
| "grad_norm": 0.38580647110939026, |
| "learning_rate": 9.97651325099999e-06, |
| "loss": 0.009, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.19197845532365104, |
| "grad_norm": 1.0286208391189575, |
| "learning_rate": 9.976395171933271e-06, |
| "loss": 0.0104, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.1923631816870251, |
| "grad_norm": 9.139874458312988, |
| "learning_rate": 9.976276797492793e-06, |
| "loss": 0.0105, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.19274790805039915, |
| "grad_norm": 1.5034836530685425, |
| "learning_rate": 9.976158127685583e-06, |
| "loss": 0.0255, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.1931326344137732, |
| "grad_norm": 3.2329092025756836, |
| "learning_rate": 9.976039162518681e-06, |
| "loss": 0.0406, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.19351736077714726, |
| "grad_norm": 3.273045301437378, |
| "learning_rate": 9.97591990199915e-06, |
| "loss": 0.0409, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.19390208714052132, |
| "grad_norm": 1.2689261436462402, |
| "learning_rate": 9.975800346134071e-06, |
| "loss": 0.0186, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.19428681350389534, |
| "grad_norm": 2.2513368129730225, |
| "learning_rate": 9.975680494930538e-06, |
| "loss": 0.0263, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.1946715398672694, |
| "grad_norm": 0.4429526925086975, |
| "learning_rate": 9.975560348395666e-06, |
| "loss": 0.0078, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.19505626623064345, |
| "grad_norm": 11.008428573608398, |
| "learning_rate": 9.975439906536586e-06, |
| "loss": 0.0458, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.1954409925940175, |
| "grad_norm": 1.4643393754959106, |
| "learning_rate": 9.975319169360446e-06, |
| "loss": 0.0394, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.19582571895739156, |
| "grad_norm": 2.285447120666504, |
| "learning_rate": 9.975198136874412e-06, |
| "loss": 0.0195, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.1962104453207656, |
| "grad_norm": 2.2482571601867676, |
| "learning_rate": 9.97507680908567e-06, |
| "loss": 0.0158, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.19659517168413965, |
| "grad_norm": 2.518343210220337, |
| "learning_rate": 9.974955186001419e-06, |
| "loss": 0.026, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.1969798980475137, |
| "grad_norm": 2.389946222305298, |
| "learning_rate": 9.974833267628879e-06, |
| "loss": 0.0156, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.19736462441088776, |
| "grad_norm": 1.022152066230774, |
| "learning_rate": 9.974711053975287e-06, |
| "loss": 0.0127, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.1977493507742618, |
| "grad_norm": 0.8410608768463135, |
| "learning_rate": 9.974588545047897e-06, |
| "loss": 0.0125, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.19813407713763587, |
| "grad_norm": 0.7129068374633789, |
| "learning_rate": 9.974465740853981e-06, |
| "loss": 0.0156, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.1985188035010099, |
| "grad_norm": 0.5806570649147034, |
| "learning_rate": 9.974342641400826e-06, |
| "loss": 0.0086, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.19890352986438395, |
| "grad_norm": 1.541094183921814, |
| "learning_rate": 9.974219246695737e-06, |
| "loss": 0.0315, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.199288256227758, |
| "grad_norm": 1.3908828496932983, |
| "learning_rate": 9.974095556746043e-06, |
| "loss": 0.0148, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.19967298259113206, |
| "grad_norm": 1.4197914600372314, |
| "learning_rate": 9.973971571559085e-06, |
| "loss": 0.0184, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.2000577089545061, |
| "grad_norm": 0.2611946761608124, |
| "learning_rate": 9.973847291142218e-06, |
| "loss": 0.0033, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.20044243531788017, |
| "grad_norm": 0.8428516983985901, |
| "learning_rate": 9.973722715502821e-06, |
| "loss": 0.0103, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.2008271616812542, |
| "grad_norm": 0.4746531844139099, |
| "learning_rate": 9.973597844648291e-06, |
| "loss": 0.0061, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.20121188804462825, |
| "grad_norm": 1.7854464054107666, |
| "learning_rate": 9.973472678586033e-06, |
| "loss": 0.0143, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.2015966144080023, |
| "grad_norm": 1.9014302492141724, |
| "learning_rate": 9.973347217323484e-06, |
| "loss": 0.0221, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.20198134077137636, |
| "grad_norm": 1.26873779296875, |
| "learning_rate": 9.973221460868086e-06, |
| "loss": 0.0129, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.20236606713475042, |
| "grad_norm": 2.484241485595703, |
| "learning_rate": 9.973095409227303e-06, |
| "loss": 0.0089, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.20275079349812447, |
| "grad_norm": 1.4181262254714966, |
| "learning_rate": 9.972969062408618e-06, |
| "loss": 0.0322, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.2031355198614985, |
| "grad_norm": 1.1394925117492676, |
| "learning_rate": 9.97284242041953e-06, |
| "loss": 0.0171, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.20352024622487255, |
| "grad_norm": 0.7076565623283386, |
| "learning_rate": 9.972715483267558e-06, |
| "loss": 0.0132, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.2039049725882466, |
| "grad_norm": 0.8092791438102722, |
| "learning_rate": 9.972588250960235e-06, |
| "loss": 0.0069, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.20428969895162066, |
| "grad_norm": 1.4082260131835938, |
| "learning_rate": 9.97246072350511e-06, |
| "loss": 0.0193, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.20467442531499472, |
| "grad_norm": 1.6323784589767456, |
| "learning_rate": 9.972332900909755e-06, |
| "loss": 0.0198, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.20505915167836877, |
| "grad_norm": 1.7675199508666992, |
| "learning_rate": 9.972204783181759e-06, |
| "loss": 0.0139, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.2054438780417428, |
| "grad_norm": 1.2901581525802612, |
| "learning_rate": 9.972076370328722e-06, |
| "loss": 0.0125, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.20582860440511686, |
| "grad_norm": 0.7784814834594727, |
| "learning_rate": 9.971947662358271e-06, |
| "loss": 0.0062, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.2062133307684909, |
| "grad_norm": 0.5156156420707703, |
| "learning_rate": 9.97181865927804e-06, |
| "loss": 0.0091, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.20659805713186497, |
| "grad_norm": 1.0497115850448608, |
| "learning_rate": 9.971689361095688e-06, |
| "loss": 0.0074, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.20698278349523902, |
| "grad_norm": 1.1292210817337036, |
| "learning_rate": 9.971559767818891e-06, |
| "loss": 0.0176, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.20736750985861307, |
| "grad_norm": 0.7185404300689697, |
| "learning_rate": 9.971429879455339e-06, |
| "loss": 0.0081, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.2077522362219871, |
| "grad_norm": 1.4201236963272095, |
| "learning_rate": 9.971299696012744e-06, |
| "loss": 0.0239, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.20813696258536116, |
| "grad_norm": 1.1339606046676636, |
| "learning_rate": 9.97116921749883e-06, |
| "loss": 0.0212, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.2085216889487352, |
| "grad_norm": 0.13454781472682953, |
| "learning_rate": 9.971038443921344e-06, |
| "loss": 0.0014, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.20890641531210927, |
| "grad_norm": 1.0006049871444702, |
| "learning_rate": 9.970907375288048e-06, |
| "loss": 0.0161, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.20929114167548332, |
| "grad_norm": 1.2062994241714478, |
| "learning_rate": 9.97077601160672e-06, |
| "loss": 0.023, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.20967586803885735, |
| "grad_norm": 1.2538203001022339, |
| "learning_rate": 9.970644352885157e-06, |
| "loss": 0.0412, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.2100605944022314, |
| "grad_norm": 2.4046273231506348, |
| "learning_rate": 9.970512399131174e-06, |
| "loss": 0.0337, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.21044532076560546, |
| "grad_norm": 0.40798303484916687, |
| "learning_rate": 9.970380150352606e-06, |
| "loss": 0.0109, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.21083004712897951, |
| "grad_norm": 0.9853060841560364, |
| "learning_rate": 9.970247606557297e-06, |
| "loss": 0.0344, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.21121477349235357, |
| "grad_norm": 0.3079826533794403, |
| "learning_rate": 9.97011476775312e-06, |
| "loss": 0.0116, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.21159949985572762, |
| "grad_norm": 0.7892830967903137, |
| "learning_rate": 9.969981633947956e-06, |
| "loss": 0.0163, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.21198422621910165, |
| "grad_norm": 1.4530398845672607, |
| "learning_rate": 9.969848205149706e-06, |
| "loss": 0.0211, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.2123689525824757, |
| "grad_norm": 0.7890651226043701, |
| "learning_rate": 9.969714481366295e-06, |
| "loss": 0.0172, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.21275367894584976, |
| "grad_norm": 1.151655912399292, |
| "learning_rate": 9.969580462605656e-06, |
| "loss": 0.0228, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.21313840530922382, |
| "grad_norm": 0.722616970539093, |
| "learning_rate": 9.969446148875743e-06, |
| "loss": 0.0157, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.21352313167259787, |
| "grad_norm": 0.3728174567222595, |
| "learning_rate": 9.969311540184532e-06, |
| "loss": 0.0133, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.21390785803597193, |
| "grad_norm": 1.246963620185852, |
| "learning_rate": 9.969176636540007e-06, |
| "loss": 0.0166, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.21429258439934595, |
| "grad_norm": 0.4768514037132263, |
| "learning_rate": 9.969041437950182e-06, |
| "loss": 0.0077, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.21467731076272, |
| "grad_norm": 0.5575240254402161, |
| "learning_rate": 9.968905944423077e-06, |
| "loss": 0.0065, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.21506203712609406, |
| "grad_norm": 0.7773118615150452, |
| "learning_rate": 9.968770155966736e-06, |
| "loss": 0.0159, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.21544676348946812, |
| "grad_norm": 0.2891775369644165, |
| "learning_rate": 9.968634072589219e-06, |
| "loss": 0.0025, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.21583148985284217, |
| "grad_norm": 0.7732624411582947, |
| "learning_rate": 9.968497694298602e-06, |
| "loss": 0.0112, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.21621621621621623, |
| "grad_norm": 1.4917421340942383, |
| "learning_rate": 9.96836102110298e-06, |
| "loss": 0.0087, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.21660094257959026, |
| "grad_norm": 1.883601188659668, |
| "learning_rate": 9.968224053010464e-06, |
| "loss": 0.0375, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.2169856689429643, |
| "grad_norm": 1.3303236961364746, |
| "learning_rate": 9.968086790029187e-06, |
| "loss": 0.0315, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.21737039530633837, |
| "grad_norm": 2.322463035583496, |
| "learning_rate": 9.967949232167295e-06, |
| "loss": 0.0343, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.21775512166971242, |
| "grad_norm": 3.1290431022644043, |
| "learning_rate": 9.96781137943295e-06, |
| "loss": 0.0359, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.21813984803308648, |
| "grad_norm": 2.3872196674346924, |
| "learning_rate": 9.967673231834338e-06, |
| "loss": 0.036, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.21852457439646053, |
| "grad_norm": 2.2352898120880127, |
| "learning_rate": 9.967534789379657e-06, |
| "loss": 0.0206, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.21890930075983456, |
| "grad_norm": 1.576250672340393, |
| "learning_rate": 9.967396052077125e-06, |
| "loss": 0.0196, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.2192940271232086, |
| "grad_norm": 1.1456630229949951, |
| "learning_rate": 9.967257019934976e-06, |
| "loss": 0.0155, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.21967875348658267, |
| "grad_norm": 0.7318146824836731, |
| "learning_rate": 9.96711769296146e-06, |
| "loss": 0.0108, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.22006347984995672, |
| "grad_norm": 0.5068544745445251, |
| "learning_rate": 9.966978071164851e-06, |
| "loss": 0.0064, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.22044820621333078, |
| "grad_norm": 1.3462088108062744, |
| "learning_rate": 9.966838154553436e-06, |
| "loss": 0.0208, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.22083293257670483, |
| "grad_norm": 1.4697437286376953, |
| "learning_rate": 9.966697943135516e-06, |
| "loss": 0.0245, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.22121765894007886, |
| "grad_norm": 0.5085150003433228, |
| "learning_rate": 9.966557436919416e-06, |
| "loss": 0.0104, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.22160238530345291, |
| "grad_norm": 0.722129225730896, |
| "learning_rate": 9.966416635913475e-06, |
| "loss": 0.0219, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.22198711166682697, |
| "grad_norm": 1.4348595142364502, |
| "learning_rate": 9.96627554012605e-06, |
| "loss": 0.0209, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.22237183803020102, |
| "grad_norm": 0.4099920988082886, |
| "learning_rate": 9.966134149565518e-06, |
| "loss": 0.0075, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.22275656439357508, |
| "grad_norm": 0.4294058680534363, |
| "learning_rate": 9.965992464240268e-06, |
| "loss": 0.0114, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.2231412907569491, |
| "grad_norm": 0.6493693590164185, |
| "learning_rate": 9.96585048415871e-06, |
| "loss": 0.0092, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.22352601712032316, |
| "grad_norm": 0.8157480955123901, |
| "learning_rate": 9.965708209329275e-06, |
| "loss": 0.013, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.22391074348369722, |
| "grad_norm": 1.7782557010650635, |
| "learning_rate": 9.965565639760405e-06, |
| "loss": 0.0147, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.22429546984707127, |
| "grad_norm": 1.609440803527832, |
| "learning_rate": 9.965422775460559e-06, |
| "loss": 0.0287, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.22468019621044533, |
| "grad_norm": 1.303137183189392, |
| "learning_rate": 9.96527961643822e-06, |
| "loss": 0.0135, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.22506492257381938, |
| "grad_norm": 0.9957038760185242, |
| "learning_rate": 9.965136162701889e-06, |
| "loss": 0.0088, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.2254496489371934, |
| "grad_norm": 1.0103319883346558, |
| "learning_rate": 9.964992414260076e-06, |
| "loss": 0.0215, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.22583437530056746, |
| "grad_norm": 2.134277582168579, |
| "learning_rate": 9.964848371121312e-06, |
| "loss": 0.0378, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.22621910166394152, |
| "grad_norm": 1.829238772392273, |
| "learning_rate": 9.964704033294148e-06, |
| "loss": 0.0334, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.22660382802731557, |
| "grad_norm": 0.39848384261131287, |
| "learning_rate": 9.964559400787155e-06, |
| "loss": 0.0055, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.22698855439068963, |
| "grad_norm": 0.5261602997779846, |
| "learning_rate": 9.964414473608912e-06, |
| "loss": 0.008, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.22737328075406368, |
| "grad_norm": 0.9430747032165527, |
| "learning_rate": 9.964269251768025e-06, |
| "loss": 0.0125, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.2277580071174377, |
| "grad_norm": 0.9117157459259033, |
| "learning_rate": 9.964123735273112e-06, |
| "loss": 0.0205, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.22814273348081177, |
| "grad_norm": 1.4581772089004517, |
| "learning_rate": 9.96397792413281e-06, |
| "loss": 0.008, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.22852745984418582, |
| "grad_norm": 0.778039276599884, |
| "learning_rate": 9.963831818355774e-06, |
| "loss": 0.0113, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.22891218620755988, |
| "grad_norm": 0.2719285190105438, |
| "learning_rate": 9.963685417950678e-06, |
| "loss": 0.0081, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.22929691257093393, |
| "grad_norm": 0.4310326874256134, |
| "learning_rate": 9.963538722926208e-06, |
| "loss": 0.0079, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.22968163893430799, |
| "grad_norm": 1.2294691801071167, |
| "learning_rate": 9.963391733291072e-06, |
| "loss": 0.0131, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.230066365297682, |
| "grad_norm": 0.7488526701927185, |
| "learning_rate": 9.963244449053997e-06, |
| "loss": 0.0067, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.23045109166105607, |
| "grad_norm": 0.5428204536437988, |
| "learning_rate": 9.963096870223722e-06, |
| "loss": 0.0177, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.23083581802443012, |
| "grad_norm": 0.5847753882408142, |
| "learning_rate": 9.962948996809008e-06, |
| "loss": 0.0073, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.23122054438780418, |
| "grad_norm": 0.7159871459007263, |
| "learning_rate": 9.962800828818633e-06, |
| "loss": 0.0084, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.23160527075117823, |
| "grad_norm": 0.6076216101646423, |
| "learning_rate": 9.962652366261392e-06, |
| "loss": 0.0042, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.2319899971145523, |
| "grad_norm": 0.7141630053520203, |
| "learning_rate": 9.962503609146092e-06, |
| "loss": 0.0111, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.23237472347792631, |
| "grad_norm": 0.9941124320030212, |
| "learning_rate": 9.962354557481569e-06, |
| "loss": 0.0178, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.23275944984130037, |
| "grad_norm": 1.804352879524231, |
| "learning_rate": 9.962205211276666e-06, |
| "loss": 0.0134, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.23314417620467442, |
| "grad_norm": 2.7440221309661865, |
| "learning_rate": 9.962055570540247e-06, |
| "loss": 0.0163, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.23352890256804848, |
| "grad_norm": 1.269745945930481, |
| "learning_rate": 9.961905635281196e-06, |
| "loss": 0.0036, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.23391362893142253, |
| "grad_norm": 1.0230756998062134, |
| "learning_rate": 9.961755405508413e-06, |
| "loss": 0.014, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.2342983552947966, |
| "grad_norm": 1.520411729812622, |
| "learning_rate": 9.961604881230812e-06, |
| "loss": 0.0092, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.23468308165817062, |
| "grad_norm": 0.7422340512275696, |
| "learning_rate": 9.96145406245733e-06, |
| "loss": 0.0048, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.23506780802154467, |
| "grad_norm": 1.6586474180221558, |
| "learning_rate": 9.961302949196916e-06, |
| "loss": 0.0196, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.23545253438491873, |
| "grad_norm": 0.3466293215751648, |
| "learning_rate": 9.961151541458542e-06, |
| "loss": 0.0023, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.23583726074829278, |
| "grad_norm": 1.8197641372680664, |
| "learning_rate": 9.960999839251195e-06, |
| "loss": 0.0392, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.23622198711166684, |
| "grad_norm": 1.4489648342132568, |
| "learning_rate": 9.960847842583878e-06, |
| "loss": 0.0147, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.23660671347504086, |
| "grad_norm": 1.529948353767395, |
| "learning_rate": 9.96069555146561e-06, |
| "loss": 0.0199, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.23699143983841492, |
| "grad_norm": 1.676714539527893, |
| "learning_rate": 9.960542965905438e-06, |
| "loss": 0.0135, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.23737616620178897, |
| "grad_norm": 1.0007433891296387, |
| "learning_rate": 9.96039008591241e-06, |
| "loss": 0.0235, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.23776089256516303, |
| "grad_norm": 0.8955550193786621, |
| "learning_rate": 9.960236911495605e-06, |
| "loss": 0.016, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.23814561892853708, |
| "grad_norm": 0.43766599893569946, |
| "learning_rate": 9.960083442664114e-06, |
| "loss": 0.0078, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.23853034529191114, |
| "grad_norm": 6.031861782073975, |
| "learning_rate": 9.959929679427047e-06, |
| "loss": 0.0163, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.23891507165528517, |
| "grad_norm": 0.5251021981239319, |
| "learning_rate": 9.959775621793528e-06, |
| "loss": 0.0101, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.23929979801865922, |
| "grad_norm": 1.166603922843933, |
| "learning_rate": 9.959621269772704e-06, |
| "loss": 0.016, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.23968452438203328, |
| "grad_norm": 1.8840091228485107, |
| "learning_rate": 9.959466623373732e-06, |
| "loss": 0.0244, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.24006925074540733, |
| "grad_norm": 1.1539957523345947, |
| "learning_rate": 9.959311682605797e-06, |
| "loss": 0.0163, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.24045397710878139, |
| "grad_norm": 0.8867762088775635, |
| "learning_rate": 9.959156447478091e-06, |
| "loss": 0.006, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.24083870347215544, |
| "grad_norm": 1.2318607568740845, |
| "learning_rate": 9.959000917999831e-06, |
| "loss": 0.0096, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.24122342983552947, |
| "grad_norm": 2.0298075675964355, |
| "learning_rate": 9.958845094180247e-06, |
| "loss": 0.023, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.24160815619890352, |
| "grad_norm": 0.8165722489356995, |
| "learning_rate": 9.958688976028588e-06, |
| "loss": 0.0216, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.24199288256227758, |
| "grad_norm": 0.2466529905796051, |
| "learning_rate": 9.958532563554119e-06, |
| "loss": 0.0041, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.24237760892565163, |
| "grad_norm": 0.6395401358604431, |
| "learning_rate": 9.958375856766128e-06, |
| "loss": 0.0192, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.2427623352890257, |
| "grad_norm": 1.0328831672668457, |
| "learning_rate": 9.95821885567391e-06, |
| "loss": 0.0236, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.24314706165239974, |
| "grad_norm": 0.6815221905708313, |
| "learning_rate": 9.95806156028679e-06, |
| "loss": 0.0158, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.24353178801577377, |
| "grad_norm": 0.6181775331497192, |
| "learning_rate": 9.9579039706141e-06, |
| "loss": 0.0087, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.24391651437914783, |
| "grad_norm": 1.2611945867538452, |
| "learning_rate": 9.957746086665196e-06, |
| "loss": 0.0137, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.24430124074252188, |
| "grad_norm": 0.1974414438009262, |
| "learning_rate": 9.957587908449448e-06, |
| "loss": 0.0035, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.24468596710589594, |
| "grad_norm": 0.9455735683441162, |
| "learning_rate": 9.957429435976245e-06, |
| "loss": 0.0063, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.24507069346927, |
| "grad_norm": 0.764569103717804, |
| "learning_rate": 9.957270669254994e-06, |
| "loss": 0.0133, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.24545541983264405, |
| "grad_norm": 0.3373180031776428, |
| "learning_rate": 9.957111608295119e-06, |
| "loss": 0.0055, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.24584014619601807, |
| "grad_norm": 0.24801532924175262, |
| "learning_rate": 9.956952253106059e-06, |
| "loss": 0.0056, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.24622487255939213, |
| "grad_norm": 0.1746530830860138, |
| "learning_rate": 9.956792603697274e-06, |
| "loss": 0.003, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.24660959892276618, |
| "grad_norm": 0.575825035572052, |
| "learning_rate": 9.956632660078239e-06, |
| "loss": 0.0099, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.24699432528614024, |
| "grad_norm": 1.7585387229919434, |
| "learning_rate": 9.956472422258447e-06, |
| "loss": 0.0477, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.2473790516495143, |
| "grad_norm": 1.1238574981689453, |
| "learning_rate": 9.956311890247411e-06, |
| "loss": 0.0328, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.24776377801288832, |
| "grad_norm": 1.6307287216186523, |
| "learning_rate": 9.956151064054658e-06, |
| "loss": 0.0134, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.24814850437626237, |
| "grad_norm": 0.32868483662605286, |
| "learning_rate": 9.955989943689734e-06, |
| "loss": 0.0017, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.24853323073963643, |
| "grad_norm": 0.8028136491775513, |
| "learning_rate": 9.955828529162201e-06, |
| "loss": 0.0056, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.24891795710301048, |
| "grad_norm": 1.284749984741211, |
| "learning_rate": 9.955666820481645e-06, |
| "loss": 0.0103, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.24930268346638454, |
| "grad_norm": 1.407700777053833, |
| "learning_rate": 9.955504817657656e-06, |
| "loss": 0.0311, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.2496874098297586, |
| "grad_norm": 0.9083035588264465, |
| "learning_rate": 9.955342520699856e-06, |
| "loss": 0.0156, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.2500721361931326, |
| "grad_norm": 1.101758599281311, |
| "learning_rate": 9.955179929617875e-06, |
| "loss": 0.018, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.2504568625565067, |
| "grad_norm": 2.050755023956299, |
| "learning_rate": 9.955017044421368e-06, |
| "loss": 0.0167, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.25084158891988073, |
| "grad_norm": 0.8650450706481934, |
| "learning_rate": 9.954853865119996e-06, |
| "loss": 0.0108, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.2512263152832548, |
| "grad_norm": 1.0544840097427368, |
| "learning_rate": 9.95469039172345e-06, |
| "loss": 0.0154, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.25161104164662884, |
| "grad_norm": 5.561423301696777, |
| "learning_rate": 9.954526624241429e-06, |
| "loss": 0.0283, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.2519957680100029, |
| "grad_norm": 1.3561756610870361, |
| "learning_rate": 9.954362562683658e-06, |
| "loss": 0.023, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.25238049437337695, |
| "grad_norm": 1.6020667552947998, |
| "learning_rate": 9.954198207059872e-06, |
| "loss": 0.0175, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.252765220736751, |
| "grad_norm": 0.3961346745491028, |
| "learning_rate": 9.954033557379826e-06, |
| "loss": 0.0073, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.25314994710012506, |
| "grad_norm": 1.5307796001434326, |
| "learning_rate": 9.953868613653295e-06, |
| "loss": 0.0092, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.25353467346349906, |
| "grad_norm": 1.0183442831039429, |
| "learning_rate": 9.953703375890067e-06, |
| "loss": 0.0445, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.2539193998268731, |
| "grad_norm": 0.954535722732544, |
| "learning_rate": 9.95353784409995e-06, |
| "loss": 0.0095, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.25430412619024717, |
| "grad_norm": 0.8993096351623535, |
| "learning_rate": 9.953372018292771e-06, |
| "loss": 0.0151, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.2546888525536212, |
| "grad_norm": 0.7639009952545166, |
| "learning_rate": 9.95320589847837e-06, |
| "loss": 0.0082, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.2550735789169953, |
| "grad_norm": 0.5023514032363892, |
| "learning_rate": 9.953039484666607e-06, |
| "loss": 0.0065, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.25545830528036934, |
| "grad_norm": 0.5645049214363098, |
| "learning_rate": 9.952872776867365e-06, |
| "loss": 0.0074, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.2558430316437434, |
| "grad_norm": 0.6301894783973694, |
| "learning_rate": 9.95270577509053e-06, |
| "loss": 0.0108, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.25622775800711745, |
| "grad_norm": 0.7627872824668884, |
| "learning_rate": 9.952538479346022e-06, |
| "loss": 0.0235, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.2566124843704915, |
| "grad_norm": 1.0123822689056396, |
| "learning_rate": 9.952370889643766e-06, |
| "loss": 0.0149, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.25699721073386556, |
| "grad_norm": 0.6568852663040161, |
| "learning_rate": 9.952203005993713e-06, |
| "loss": 0.0112, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.2573819370972396, |
| "grad_norm": 0.11951254308223724, |
| "learning_rate": 9.952034828405824e-06, |
| "loss": 0.0024, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.25776666346061367, |
| "grad_norm": 1.049953579902649, |
| "learning_rate": 9.951866356890084e-06, |
| "loss": 0.0118, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.25815138982398766, |
| "grad_norm": 0.5883866548538208, |
| "learning_rate": 9.951697591456493e-06, |
| "loss": 0.0048, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.2585361161873617, |
| "grad_norm": 0.340031236410141, |
| "learning_rate": 9.951528532115065e-06, |
| "loss": 0.0086, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.2589208425507358, |
| "grad_norm": 1.1816928386688232, |
| "learning_rate": 9.951359178875837e-06, |
| "loss": 0.0075, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.25930556891410983, |
| "grad_norm": 1.2891496419906616, |
| "learning_rate": 9.95118953174886e-06, |
| "loss": 0.008, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.2596902952774839, |
| "grad_norm": 0.6510321497917175, |
| "learning_rate": 9.951019590744203e-06, |
| "loss": 0.0109, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.26007502164085794, |
| "grad_norm": 1.740363597869873, |
| "learning_rate": 9.950849355871954e-06, |
| "loss": 0.0136, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.260459748004232, |
| "grad_norm": 0.9862236976623535, |
| "learning_rate": 9.950678827142218e-06, |
| "loss": 0.0126, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.26084447436760605, |
| "grad_norm": 1.5269832611083984, |
| "learning_rate": 9.950508004565114e-06, |
| "loss": 0.0197, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.2612292007309801, |
| "grad_norm": 0.9901081919670105, |
| "learning_rate": 9.950336888150781e-06, |
| "loss": 0.0091, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.26161392709435416, |
| "grad_norm": 0.7796134352684021, |
| "learning_rate": 9.95016547790938e-06, |
| "loss": 0.008, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.2619986534577282, |
| "grad_norm": 1.631371259689331, |
| "learning_rate": 9.949993773851082e-06, |
| "loss": 0.0254, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.2623833798211022, |
| "grad_norm": 0.6020075678825378, |
| "learning_rate": 9.949821775986078e-06, |
| "loss": 0.0136, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.26276810618447627, |
| "grad_norm": 0.6335283517837524, |
| "learning_rate": 9.949649484324579e-06, |
| "loss": 0.0145, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.2631528325478503, |
| "grad_norm": 1.1868330240249634, |
| "learning_rate": 9.949476898876808e-06, |
| "loss": 0.014, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.2635375589112244, |
| "grad_norm": 0.5912204384803772, |
| "learning_rate": 9.949304019653012e-06, |
| "loss": 0.0053, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.26392228527459843, |
| "grad_norm": 0.32141926884651184, |
| "learning_rate": 9.949130846663451e-06, |
| "loss": 0.0041, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.2643070116379725, |
| "grad_norm": 0.2612176835536957, |
| "learning_rate": 9.948957379918405e-06, |
| "loss": 0.0041, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.26469173800134654, |
| "grad_norm": 1.4371662139892578, |
| "learning_rate": 9.948783619428168e-06, |
| "loss": 0.0122, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.2650764643647206, |
| "grad_norm": 0.8866683840751648, |
| "learning_rate": 9.948609565203054e-06, |
| "loss": 0.0147, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.26546119072809465, |
| "grad_norm": 0.2633998692035675, |
| "learning_rate": 9.948435217253394e-06, |
| "loss": 0.0022, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.2658459170914687, |
| "grad_norm": 0.6015472412109375, |
| "learning_rate": 9.948260575589538e-06, |
| "loss": 0.0134, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.26623064345484276, |
| "grad_norm": 0.7728354930877686, |
| "learning_rate": 9.94808564022185e-06, |
| "loss": 0.0286, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.2666153698182168, |
| "grad_norm": 1.7141587734222412, |
| "learning_rate": 9.947910411160715e-06, |
| "loss": 0.0105, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.2670000961815908, |
| "grad_norm": 0.2638167142868042, |
| "learning_rate": 9.947734888416532e-06, |
| "loss": 0.0143, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.2673848225449649, |
| "grad_norm": 3.141570806503296, |
| "learning_rate": 9.947559071999719e-06, |
| "loss": 0.0379, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.26776954890833893, |
| "grad_norm": 2.544487953186035, |
| "learning_rate": 9.947382961920713e-06, |
| "loss": 0.028, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.268154275271713, |
| "grad_norm": 0.16364744305610657, |
| "learning_rate": 9.947206558189967e-06, |
| "loss": 0.0024, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.26853900163508704, |
| "grad_norm": 0.9908127188682556, |
| "learning_rate": 9.94702986081795e-06, |
| "loss": 0.0092, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.2689237279984611, |
| "grad_norm": 1.1755777597427368, |
| "learning_rate": 9.946852869815152e-06, |
| "loss": 0.0178, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.26930845436183515, |
| "grad_norm": 1.6439647674560547, |
| "learning_rate": 9.946675585192076e-06, |
| "loss": 0.0174, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.2696931807252092, |
| "grad_norm": 1.5522323846817017, |
| "learning_rate": 9.946498006959246e-06, |
| "loss": 0.0265, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.27007790708858326, |
| "grad_norm": 0.6794743537902832, |
| "learning_rate": 9.946320135127203e-06, |
| "loss": 0.022, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.2704626334519573, |
| "grad_norm": 0.8453839421272278, |
| "learning_rate": 9.946141969706501e-06, |
| "loss": 0.0339, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.27084735981533137, |
| "grad_norm": 2.207174301147461, |
| "learning_rate": 9.94596351070772e-06, |
| "loss": 0.0112, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.2712320861787054, |
| "grad_norm": 0.6483219265937805, |
| "learning_rate": 9.945784758141449e-06, |
| "loss": 0.0156, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.2716168125420794, |
| "grad_norm": 1.7637494802474976, |
| "learning_rate": 9.9456057120183e-06, |
| "loss": 0.0187, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.2720015389054535, |
| "grad_norm": 0.9741355776786804, |
| "learning_rate": 9.945426372348896e-06, |
| "loss": 0.014, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.27238626526882753, |
| "grad_norm": 0.42849379777908325, |
| "learning_rate": 9.945246739143888e-06, |
| "loss": 0.0147, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.2727709916322016, |
| "grad_norm": 0.6075026392936707, |
| "learning_rate": 9.945066812413932e-06, |
| "loss": 0.0105, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.27315571799557564, |
| "grad_norm": 0.5715228319168091, |
| "learning_rate": 9.944886592169712e-06, |
| "loss": 0.0049, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.2735404443589497, |
| "grad_norm": 0.5022349953651428, |
| "learning_rate": 9.944706078421923e-06, |
| "loss": 0.0154, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.27392517072232375, |
| "grad_norm": 3.6961655616760254, |
| "learning_rate": 9.94452527118128e-06, |
| "loss": 0.1265, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.2743098970856978, |
| "grad_norm": 0.6201106905937195, |
| "learning_rate": 9.944344170458516e-06, |
| "loss": 0.0051, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.27469462344907186, |
| "grad_norm": 0.5065118670463562, |
| "learning_rate": 9.944162776264376e-06, |
| "loss": 0.0041, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.2750793498124459, |
| "grad_norm": 0.09867949038743973, |
| "learning_rate": 9.94398108860963e-06, |
| "loss": 0.001, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.27546407617581997, |
| "grad_norm": 0.9551201462745667, |
| "learning_rate": 9.943799107505063e-06, |
| "loss": 0.0142, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.27584880253919397, |
| "grad_norm": 1.8438011407852173, |
| "learning_rate": 9.943616832961475e-06, |
| "loss": 0.0185, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.276233528902568, |
| "grad_norm": 1.110871434211731, |
| "learning_rate": 9.943434264989684e-06, |
| "loss": 0.0099, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.2766182552659421, |
| "grad_norm": 0.585672914981842, |
| "learning_rate": 9.943251403600526e-06, |
| "loss": 0.0171, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.27700298162931614, |
| "grad_norm": 0.41809943318367004, |
| "learning_rate": 9.94306824880486e-06, |
| "loss": 0.0034, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.2773877079926902, |
| "grad_norm": 2.4627175331115723, |
| "learning_rate": 9.94288480061355e-06, |
| "loss": 0.0175, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.27777243435606425, |
| "grad_norm": 0.7623758316040039, |
| "learning_rate": 9.942701059037487e-06, |
| "loss": 0.0114, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.2781571607194383, |
| "grad_norm": 0.40503352880477905, |
| "learning_rate": 9.942517024087579e-06, |
| "loss": 0.0042, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.27854188708281236, |
| "grad_norm": 1.7551190853118896, |
| "learning_rate": 9.942332695774747e-06, |
| "loss": 0.0115, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.2789266134461864, |
| "grad_norm": 0.7830015420913696, |
| "learning_rate": 9.942148074109934e-06, |
| "loss": 0.0067, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.27931133980956047, |
| "grad_norm": 2.3621063232421875, |
| "learning_rate": 9.941963159104095e-06, |
| "loss": 0.0261, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.2796960661729345, |
| "grad_norm": 1.2633928060531616, |
| "learning_rate": 9.94177795076821e-06, |
| "loss": 0.0084, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.2800807925363086, |
| "grad_norm": 0.08101649582386017, |
| "learning_rate": 9.941592449113268e-06, |
| "loss": 0.0008, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.2804655188996826, |
| "grad_norm": 2.2081878185272217, |
| "learning_rate": 9.941406654150283e-06, |
| "loss": 0.0216, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.28085024526305663, |
| "grad_norm": 2.1128742694854736, |
| "learning_rate": 9.94122056589028e-06, |
| "loss": 0.0111, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.2812349716264307, |
| "grad_norm": 0.06335221230983734, |
| "learning_rate": 9.941034184344305e-06, |
| "loss": 0.0011, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.28161969798980474, |
| "grad_norm": 0.8481754064559937, |
| "learning_rate": 9.940847509523422e-06, |
| "loss": 0.0065, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.2820044243531788, |
| "grad_norm": 1.116052269935608, |
| "learning_rate": 9.940660541438708e-06, |
| "loss": 0.0156, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.28238915071655285, |
| "grad_norm": 1.1566013097763062, |
| "learning_rate": 9.940473280101263e-06, |
| "loss": 0.0182, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.2827738770799269, |
| "grad_norm": 2.7523629665374756, |
| "learning_rate": 9.940285725522203e-06, |
| "loss": 0.0417, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.28315860344330096, |
| "grad_norm": 2.2533061504364014, |
| "learning_rate": 9.940097877712659e-06, |
| "loss": 0.027, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.283543329806675, |
| "grad_norm": 1.0717406272888184, |
| "learning_rate": 9.939909736683778e-06, |
| "loss": 0.0092, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.28392805617004907, |
| "grad_norm": 0.7087541818618774, |
| "learning_rate": 9.93972130244673e-06, |
| "loss": 0.0081, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.2843127825334231, |
| "grad_norm": 1.6095730066299438, |
| "learning_rate": 9.939532575012698e-06, |
| "loss": 0.0172, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.2846975088967972, |
| "grad_norm": 1.3097370862960815, |
| "learning_rate": 9.939343554392887e-06, |
| "loss": 0.0129, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.2850822352601712, |
| "grad_norm": 0.4617692828178406, |
| "learning_rate": 9.939154240598513e-06, |
| "loss": 0.0056, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.28546696162354523, |
| "grad_norm": 0.5378155708312988, |
| "learning_rate": 9.938964633640815e-06, |
| "loss": 0.0061, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.2858516879869193, |
| "grad_norm": 0.8007967472076416, |
| "learning_rate": 9.938774733531045e-06, |
| "loss": 0.0115, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.28623641435029334, |
| "grad_norm": 1.1707913875579834, |
| "learning_rate": 9.938584540280477e-06, |
| "loss": 0.0214, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.2866211407136674, |
| "grad_norm": 0.751804769039154, |
| "learning_rate": 9.938394053900396e-06, |
| "loss": 0.006, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.28700586707704145, |
| "grad_norm": 0.6133852005004883, |
| "learning_rate": 9.938203274402113e-06, |
| "loss": 0.0175, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.2873905934404155, |
| "grad_norm": 1.4026976823806763, |
| "learning_rate": 9.938012201796948e-06, |
| "loss": 0.0157, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.28777531980378956, |
| "grad_norm": 0.9868363738059998, |
| "learning_rate": 9.937820836096244e-06, |
| "loss": 0.0076, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.2881600461671636, |
| "grad_norm": 0.4354103207588196, |
| "learning_rate": 9.937629177311359e-06, |
| "loss": 0.0065, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.2885447725305377, |
| "grad_norm": 0.6645623445510864, |
| "learning_rate": 9.937437225453669e-06, |
| "loss": 0.0067, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.28892949889391173, |
| "grad_norm": 1.250994086265564, |
| "learning_rate": 9.937244980534568e-06, |
| "loss": 0.0182, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.28931422525728573, |
| "grad_norm": 0.5992980003356934, |
| "learning_rate": 9.937052442565464e-06, |
| "loss": 0.0033, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.2896989516206598, |
| "grad_norm": 0.38558635115623474, |
| "learning_rate": 9.93685961155779e-06, |
| "loss": 0.0051, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.29008367798403384, |
| "grad_norm": 7.663323879241943, |
| "learning_rate": 9.936666487522985e-06, |
| "loss": 0.0408, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.2904684043474079, |
| "grad_norm": 0.9667056202888489, |
| "learning_rate": 9.93647307047252e-06, |
| "loss": 0.0104, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.29085313071078195, |
| "grad_norm": 1.2200483083724976, |
| "learning_rate": 9.936279360417866e-06, |
| "loss": 0.0277, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.291237857074156, |
| "grad_norm": 0.49670878052711487, |
| "learning_rate": 9.93608535737053e-06, |
| "loss": 0.0094, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.29162258343753006, |
| "grad_norm": 1.3144538402557373, |
| "learning_rate": 9.935891061342017e-06, |
| "loss": 0.0274, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.2920073098009041, |
| "grad_norm": 0.2239675670862198, |
| "learning_rate": 9.935696472343867e-06, |
| "loss": 0.0035, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.29239203616427817, |
| "grad_norm": 3.239274740219116, |
| "learning_rate": 9.935501590387629e-06, |
| "loss": 0.0383, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.2927767625276522, |
| "grad_norm": 0.9120214581489563, |
| "learning_rate": 9.935306415484868e-06, |
| "loss": 0.0074, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.2931614888910263, |
| "grad_norm": 0.6452634334564209, |
| "learning_rate": 9.935110947647168e-06, |
| "loss": 0.0117, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.29354621525440033, |
| "grad_norm": 0.6486735343933105, |
| "learning_rate": 9.934915186886136e-06, |
| "loss": 0.0061, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.29393094161777433, |
| "grad_norm": 1.0978666543960571, |
| "learning_rate": 9.934719133213383e-06, |
| "loss": 0.0099, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.2943156679811484, |
| "grad_norm": 0.5332254767417908, |
| "learning_rate": 9.934522786640555e-06, |
| "loss": 0.0091, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.29470039434452244, |
| "grad_norm": 1.3437743186950684, |
| "learning_rate": 9.9343261471793e-06, |
| "loss": 0.0092, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.2950851207078965, |
| "grad_norm": 1.3608458042144775, |
| "learning_rate": 9.93412921484129e-06, |
| "loss": 0.0118, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.29546984707127055, |
| "grad_norm": 3.0785293579101562, |
| "learning_rate": 9.933931989638216e-06, |
| "loss": 0.0292, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.2958545734346446, |
| "grad_norm": 2.402841806411743, |
| "learning_rate": 9.933734471581784e-06, |
| "loss": 0.0345, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.29623929979801866, |
| "grad_norm": 3.3671507835388184, |
| "learning_rate": 9.933536660683718e-06, |
| "loss": 0.0718, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.2966240261613927, |
| "grad_norm": 0.6962792277336121, |
| "learning_rate": 9.933338556955756e-06, |
| "loss": 0.0066, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.2970087525247668, |
| "grad_norm": 1.287801742553711, |
| "learning_rate": 9.933140160409659e-06, |
| "loss": 0.0117, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.2973934788881408, |
| "grad_norm": 0.6097240447998047, |
| "learning_rate": 9.932941471057202e-06, |
| "loss": 0.0112, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.2977782052515149, |
| "grad_norm": 0.5029175281524658, |
| "learning_rate": 9.93274248891018e-06, |
| "loss": 0.0096, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.29816293161488894, |
| "grad_norm": 0.8087600469589233, |
| "learning_rate": 9.932543213980402e-06, |
| "loss": 0.0059, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.29854765797826294, |
| "grad_norm": 1.2778784036636353, |
| "learning_rate": 9.932343646279697e-06, |
| "loss": 0.0087, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.298932384341637, |
| "grad_norm": 2.181772470474243, |
| "learning_rate": 9.932143785819908e-06, |
| "loss": 0.0397, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.29931711070501105, |
| "grad_norm": 2.003788709640503, |
| "learning_rate": 9.931943632612897e-06, |
| "loss": 0.0455, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.2997018370683851, |
| "grad_norm": 2.2337660789489746, |
| "learning_rate": 9.93174318667055e-06, |
| "loss": 0.0294, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.30008656343175916, |
| "grad_norm": 1.9997296333312988, |
| "learning_rate": 9.93154244800476e-06, |
| "loss": 0.0365, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.3004712897951332, |
| "grad_norm": 1.1483068466186523, |
| "learning_rate": 9.931341416627443e-06, |
| "loss": 0.0236, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.30085601615850727, |
| "grad_norm": 0.22705046832561493, |
| "learning_rate": 9.931140092550528e-06, |
| "loss": 0.0022, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.3012407425218813, |
| "grad_norm": 1.8457714319229126, |
| "learning_rate": 9.93093847578597e-06, |
| "loss": 0.0226, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.3016254688852554, |
| "grad_norm": 1.0760217905044556, |
| "learning_rate": 9.930736566345732e-06, |
| "loss": 0.0051, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.30201019524862943, |
| "grad_norm": 0.5447891354560852, |
| "learning_rate": 9.930534364241801e-06, |
| "loss": 0.0029, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.3023949216120035, |
| "grad_norm": 1.1627178192138672, |
| "learning_rate": 9.930331869486176e-06, |
| "loss": 0.0058, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.3027796479753775, |
| "grad_norm": 0.5428863167762756, |
| "learning_rate": 9.930129082090878e-06, |
| "loss": 0.0084, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.30316437433875154, |
| "grad_norm": 0.6576860547065735, |
| "learning_rate": 9.929926002067944e-06, |
| "loss": 0.0056, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.3035491007021256, |
| "grad_norm": 0.17010360956192017, |
| "learning_rate": 9.929722629429425e-06, |
| "loss": 0.0032, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.30393382706549965, |
| "grad_norm": 0.7153604626655579, |
| "learning_rate": 9.929518964187395e-06, |
| "loss": 0.0183, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.3043185534288737, |
| "grad_norm": 0.2032863050699234, |
| "learning_rate": 9.92931500635394e-06, |
| "loss": 0.0014, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.30470327979224776, |
| "grad_norm": 0.4419695734977722, |
| "learning_rate": 9.929110755941168e-06, |
| "loss": 0.0032, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.3050880061556218, |
| "grad_norm": 1.048749327659607, |
| "learning_rate": 9.928906212961202e-06, |
| "loss": 0.0072, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.30547273251899587, |
| "grad_norm": 0.09899991005659103, |
| "learning_rate": 9.928701377426182e-06, |
| "loss": 0.0012, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.3058574588823699, |
| "grad_norm": 0.7091645002365112, |
| "learning_rate": 9.928496249348265e-06, |
| "loss": 0.0268, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.306242185245744, |
| "grad_norm": 0.4991755485534668, |
| "learning_rate": 9.928290828739631e-06, |
| "loss": 0.0064, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.30662691160911804, |
| "grad_norm": 0.48444482684135437, |
| "learning_rate": 9.928085115612465e-06, |
| "loss": 0.0037, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.3070116379724921, |
| "grad_norm": 0.4473024606704712, |
| "learning_rate": 9.927879109978984e-06, |
| "loss": 0.0049, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.3073963643358661, |
| "grad_norm": 0.7630549669265747, |
| "learning_rate": 9.927672811851412e-06, |
| "loss": 0.015, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.30778109069924015, |
| "grad_norm": 0.3428303301334381, |
| "learning_rate": 9.927466221241995e-06, |
| "loss": 0.0037, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.3081658170626142, |
| "grad_norm": 1.1008957624435425, |
| "learning_rate": 9.927259338162995e-06, |
| "loss": 0.0114, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.30855054342598826, |
| "grad_norm": 0.953392505645752, |
| "learning_rate": 9.927052162626693e-06, |
| "loss": 0.0061, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.3089352697893623, |
| "grad_norm": 0.8205415606498718, |
| "learning_rate": 9.926844694645382e-06, |
| "loss": 0.0109, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.30931999615273637, |
| "grad_norm": 0.1952117681503296, |
| "learning_rate": 9.92663693423138e-06, |
| "loss": 0.0016, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.3097047225161104, |
| "grad_norm": 0.8701672554016113, |
| "learning_rate": 9.926428881397015e-06, |
| "loss": 0.0074, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.3100894488794845, |
| "grad_norm": 0.9610002040863037, |
| "learning_rate": 9.92622053615464e-06, |
| "loss": 0.0093, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.31047417524285853, |
| "grad_norm": 0.9083012938499451, |
| "learning_rate": 9.926011898516619e-06, |
| "loss": 0.0211, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.3108589016062326, |
| "grad_norm": 0.12275370955467224, |
| "learning_rate": 9.925802968495337e-06, |
| "loss": 0.0022, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.31124362796960664, |
| "grad_norm": 0.48980191349983215, |
| "learning_rate": 9.925593746103193e-06, |
| "loss": 0.0033, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.3116283543329807, |
| "grad_norm": 1.5572385787963867, |
| "learning_rate": 9.925384231352607e-06, |
| "loss": 0.0246, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.3120130806963547, |
| "grad_norm": 2.358025312423706, |
| "learning_rate": 9.925174424256015e-06, |
| "loss": 0.034, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.31239780705972875, |
| "grad_norm": 4.787092685699463, |
| "learning_rate": 9.924964324825867e-06, |
| "loss": 0.0058, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.3127825334231028, |
| "grad_norm": 0.6324902176856995, |
| "learning_rate": 9.924753933074637e-06, |
| "loss": 0.0055, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.31316725978647686, |
| "grad_norm": 1.22360360622406, |
| "learning_rate": 9.924543249014814e-06, |
| "loss": 0.0376, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.3135519861498509, |
| "grad_norm": 0.9349619746208191, |
| "learning_rate": 9.9243322726589e-06, |
| "loss": 0.0217, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.31393671251322497, |
| "grad_norm": 1.0631873607635498, |
| "learning_rate": 9.924121004019416e-06, |
| "loss": 0.0106, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.314321438876599, |
| "grad_norm": 0.6873127222061157, |
| "learning_rate": 9.923909443108906e-06, |
| "loss": 0.015, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.3147061652399731, |
| "grad_norm": 1.320493459701538, |
| "learning_rate": 9.923697589939925e-06, |
| "loss": 0.0195, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.31509089160334713, |
| "grad_norm": 1.1849138736724854, |
| "learning_rate": 9.923485444525047e-06, |
| "loss": 0.017, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.3154756179667212, |
| "grad_norm": 0.3958847224712372, |
| "learning_rate": 9.923273006876865e-06, |
| "loss": 0.0108, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.31586034433009524, |
| "grad_norm": 1.8317941427230835, |
| "learning_rate": 9.923060277007987e-06, |
| "loss": 0.0222, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.31624507069346924, |
| "grad_norm": 0.6959751844406128, |
| "learning_rate": 9.922847254931043e-06, |
| "loss": 0.0132, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.3166297970568433, |
| "grad_norm": 0.4210129976272583, |
| "learning_rate": 9.922633940658674e-06, |
| "loss": 0.0056, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.31701452342021735, |
| "grad_norm": 0.5277615785598755, |
| "learning_rate": 9.922420334203539e-06, |
| "loss": 0.0107, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.3173992497835914, |
| "grad_norm": 1.4433057308197021, |
| "learning_rate": 9.922206435578324e-06, |
| "loss": 0.0203, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.31778397614696546, |
| "grad_norm": 0.3783744275569916, |
| "learning_rate": 9.921992244795716e-06, |
| "loss": 0.003, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.3181687025103395, |
| "grad_norm": 0.7307786345481873, |
| "learning_rate": 9.921777761868434e-06, |
| "loss": 0.0119, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.3185534288737136, |
| "grad_norm": 0.5341587066650391, |
| "learning_rate": 9.921562986809207e-06, |
| "loss": 0.0075, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.31893815523708763, |
| "grad_norm": 0.9260216355323792, |
| "learning_rate": 9.921347919630784e-06, |
| "loss": 0.0315, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.3193228816004617, |
| "grad_norm": 1.7915147542953491, |
| "learning_rate": 9.92113256034593e-06, |
| "loss": 0.0187, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.31970760796383574, |
| "grad_norm": 0.3053416609764099, |
| "learning_rate": 9.920916908967424e-06, |
| "loss": 0.0114, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.3200923343272098, |
| "grad_norm": 1.0637885332107544, |
| "learning_rate": 9.920700965508072e-06, |
| "loss": 0.0129, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.32047706069058385, |
| "grad_norm": 0.7841233015060425, |
| "learning_rate": 9.920484729980689e-06, |
| "loss": 0.0066, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.32086178705395785, |
| "grad_norm": 0.5703051090240479, |
| "learning_rate": 9.920268202398107e-06, |
| "loss": 0.0138, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.3212465134173319, |
| "grad_norm": 0.6354265213012695, |
| "learning_rate": 9.920051382773179e-06, |
| "loss": 0.0127, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.32163123978070596, |
| "grad_norm": 0.2680123448371887, |
| "learning_rate": 9.919834271118778e-06, |
| "loss": 0.0023, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.32201596614408, |
| "grad_norm": 0.36873406171798706, |
| "learning_rate": 9.919616867447786e-06, |
| "loss": 0.0082, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.32240069250745407, |
| "grad_norm": 0.748493492603302, |
| "learning_rate": 9.91939917177311e-06, |
| "loss": 0.0138, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.3227854188708281, |
| "grad_norm": 1.3479591608047485, |
| "learning_rate": 9.91918118410767e-06, |
| "loss": 0.0135, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.3231701452342022, |
| "grad_norm": 0.6877148747444153, |
| "learning_rate": 9.918962904464406e-06, |
| "loss": 0.025, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.32355487159757623, |
| "grad_norm": 0.6824389696121216, |
| "learning_rate": 9.918744332856273e-06, |
| "loss": 0.0074, |
| "step": 841 |
| }, |
| { |
| "epoch": 0.3239395979609503, |
| "grad_norm": 0.2313074767589569, |
| "learning_rate": 9.918525469296243e-06, |
| "loss": 0.0046, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.32432432432432434, |
| "grad_norm": 0.27155447006225586, |
| "learning_rate": 9.918306313797309e-06, |
| "loss": 0.003, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.3247090506876984, |
| "grad_norm": 1.2494877576828003, |
| "learning_rate": 9.918086866372475e-06, |
| "loss": 0.015, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.32509377705107245, |
| "grad_norm": 0.7449502944946289, |
| "learning_rate": 9.917867127034773e-06, |
| "loss": 0.0179, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.32547850341444645, |
| "grad_norm": 0.2732839584350586, |
| "learning_rate": 9.917647095797241e-06, |
| "loss": 0.0022, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.3258632297778205, |
| "grad_norm": 1.2635875940322876, |
| "learning_rate": 9.917426772672938e-06, |
| "loss": 0.0155, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.32624795614119456, |
| "grad_norm": 0.8429836630821228, |
| "learning_rate": 9.917206157674943e-06, |
| "loss": 0.0092, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.3266326825045686, |
| "grad_norm": 1.1740093231201172, |
| "learning_rate": 9.916985250816351e-06, |
| "loss": 0.0163, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.32701740886794267, |
| "grad_norm": 1.1592316627502441, |
| "learning_rate": 9.916764052110274e-06, |
| "loss": 0.015, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.3274021352313167, |
| "grad_norm": 0.21816864609718323, |
| "learning_rate": 9.916542561569843e-06, |
| "loss": 0.0027, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.3277868615946908, |
| "grad_norm": 0.8437023758888245, |
| "learning_rate": 9.916320779208199e-06, |
| "loss": 0.0039, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.32817158795806484, |
| "grad_norm": 0.7825914621353149, |
| "learning_rate": 9.91609870503851e-06, |
| "loss": 0.0086, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.3285563143214389, |
| "grad_norm": 0.21168985962867737, |
| "learning_rate": 9.915876339073955e-06, |
| "loss": 0.0045, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.32894104068481295, |
| "grad_norm": 0.4206426441669464, |
| "learning_rate": 9.915653681327736e-06, |
| "loss": 0.0077, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.329325767048187, |
| "grad_norm": 0.9891207814216614, |
| "learning_rate": 9.915430731813067e-06, |
| "loss": 0.0086, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.329710493411561, |
| "grad_norm": 0.5929882526397705, |
| "learning_rate": 9.915207490543179e-06, |
| "loss": 0.0101, |
| "step": 857 |
| }, |
| { |
| "epoch": 0.33009521977493506, |
| "grad_norm": 0.8196332454681396, |
| "learning_rate": 9.914983957531327e-06, |
| "loss": 0.027, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.3304799461383091, |
| "grad_norm": 0.8502101302146912, |
| "learning_rate": 9.914760132790776e-06, |
| "loss": 0.0055, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.33086467250168317, |
| "grad_norm": 0.08041344583034515, |
| "learning_rate": 9.914536016334808e-06, |
| "loss": 0.0005, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.3312493988650572, |
| "grad_norm": 0.30731120705604553, |
| "learning_rate": 9.914311608176732e-06, |
| "loss": 0.0019, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.3316341252284313, |
| "grad_norm": 1.0212372541427612, |
| "learning_rate": 9.914086908329863e-06, |
| "loss": 0.0069, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.33201885159180533, |
| "grad_norm": 0.44622012972831726, |
| "learning_rate": 9.913861916807539e-06, |
| "loss": 0.0017, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.3324035779551794, |
| "grad_norm": 0.5307901501655579, |
| "learning_rate": 9.913636633623116e-06, |
| "loss": 0.003, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.33278830431855344, |
| "grad_norm": 0.32324591279029846, |
| "learning_rate": 9.913411058789964e-06, |
| "loss": 0.0009, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.3331730306819275, |
| "grad_norm": 5.367404937744141, |
| "learning_rate": 9.913185192321473e-06, |
| "loss": 0.0318, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.33355775704530155, |
| "grad_norm": 1.1971156597137451, |
| "learning_rate": 9.912959034231049e-06, |
| "loss": 0.0189, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.3339424834086756, |
| "grad_norm": 1.5217525959014893, |
| "learning_rate": 9.912732584532114e-06, |
| "loss": 0.0178, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.3343272097720496, |
| "grad_norm": 0.824066698551178, |
| "learning_rate": 9.912505843238112e-06, |
| "loss": 0.0118, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.33471193613542366, |
| "grad_norm": 1.85015070438385, |
| "learning_rate": 9.912278810362499e-06, |
| "loss": 0.0351, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.3350966624987977, |
| "grad_norm": 1.2828856706619263, |
| "learning_rate": 9.912051485918752e-06, |
| "loss": 0.0118, |
| "step": 871 |
| }, |
| { |
| "epoch": 0.33548138886217177, |
| "grad_norm": 1.711953043937683, |
| "learning_rate": 9.911823869920362e-06, |
| "loss": 0.0453, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.3358661152255458, |
| "grad_norm": 0.9143729209899902, |
| "learning_rate": 9.91159596238084e-06, |
| "loss": 0.0303, |
| "step": 873 |
| }, |
| { |
| "epoch": 0.3362508415889199, |
| "grad_norm": 1.732001543045044, |
| "learning_rate": 9.911367763313713e-06, |
| "loss": 0.043, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.33663556795229393, |
| "grad_norm": 0.790520429611206, |
| "learning_rate": 9.911139272732528e-06, |
| "loss": 0.0114, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.337020294315668, |
| "grad_norm": 1.3358290195465088, |
| "learning_rate": 9.910910490650844e-06, |
| "loss": 0.0199, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.33740502067904204, |
| "grad_norm": 0.4768337309360504, |
| "learning_rate": 9.910681417082241e-06, |
| "loss": 0.0097, |
| "step": 877 |
| }, |
| { |
| "epoch": 0.3377897470424161, |
| "grad_norm": 1.01479971408844, |
| "learning_rate": 9.910452052040318e-06, |
| "loss": 0.0079, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.33817447340579015, |
| "grad_norm": 0.8050641417503357, |
| "learning_rate": 9.910222395538686e-06, |
| "loss": 0.0216, |
| "step": 879 |
| }, |
| { |
| "epoch": 0.3385591997691642, |
| "grad_norm": 1.4165840148925781, |
| "learning_rate": 9.90999244759098e-06, |
| "loss": 0.0175, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.3389439261325382, |
| "grad_norm": 0.4710328280925751, |
| "learning_rate": 9.909762208210843e-06, |
| "loss": 0.0062, |
| "step": 881 |
| }, |
| { |
| "epoch": 0.33932865249591226, |
| "grad_norm": 0.6688194870948792, |
| "learning_rate": 9.909531677411945e-06, |
| "loss": 0.0177, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.3397133788592863, |
| "grad_norm": 0.6211721301078796, |
| "learning_rate": 9.909300855207969e-06, |
| "loss": 0.012, |
| "step": 883 |
| }, |
| { |
| "epoch": 0.3400981052226604, |
| "grad_norm": 0.8269939422607422, |
| "learning_rate": 9.909069741612614e-06, |
| "loss": 0.0116, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.34048283158603443, |
| "grad_norm": 1.0314085483551025, |
| "learning_rate": 9.908838336639597e-06, |
| "loss": 0.0155, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.3408675579494085, |
| "grad_norm": 1.0547674894332886, |
| "learning_rate": 9.908606640302656e-06, |
| "loss": 0.0167, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.34125228431278254, |
| "grad_norm": 1.4624102115631104, |
| "learning_rate": 9.90837465261554e-06, |
| "loss": 0.0138, |
| "step": 887 |
| }, |
| { |
| "epoch": 0.3416370106761566, |
| "grad_norm": 0.5952054858207703, |
| "learning_rate": 9.908142373592022e-06, |
| "loss": 0.0148, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.34202173703953065, |
| "grad_norm": 1.6350924968719482, |
| "learning_rate": 9.907909803245887e-06, |
| "loss": 0.0161, |
| "step": 889 |
| }, |
| { |
| "epoch": 0.3424064634029047, |
| "grad_norm": 1.3565099239349365, |
| "learning_rate": 9.90767694159094e-06, |
| "loss": 0.0231, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.34279118976627876, |
| "grad_norm": 0.9824076294898987, |
| "learning_rate": 9.907443788641e-06, |
| "loss": 0.0099, |
| "step": 891 |
| }, |
| { |
| "epoch": 0.34317591612965276, |
| "grad_norm": 0.9306924343109131, |
| "learning_rate": 9.907210344409908e-06, |
| "loss": 0.0063, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.3435606424930268, |
| "grad_norm": 0.978356122970581, |
| "learning_rate": 9.906976608911521e-06, |
| "loss": 0.0139, |
| "step": 893 |
| }, |
| { |
| "epoch": 0.34394536885640087, |
| "grad_norm": 0.7147358655929565, |
| "learning_rate": 9.90674258215971e-06, |
| "loss": 0.0054, |
| "step": 894 |
| }, |
| { |
| "epoch": 0.3443300952197749, |
| "grad_norm": 1.1813998222351074, |
| "learning_rate": 9.906508264168366e-06, |
| "loss": 0.0207, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.344714821583149, |
| "grad_norm": 1.3888568878173828, |
| "learning_rate": 9.906273654951399e-06, |
| "loss": 0.0131, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.34509954794652303, |
| "grad_norm": 0.6323987245559692, |
| "learning_rate": 9.906038754522733e-06, |
| "loss": 0.0094, |
| "step": 897 |
| }, |
| { |
| "epoch": 0.3454842743098971, |
| "grad_norm": 0.9774952530860901, |
| "learning_rate": 9.90580356289631e-06, |
| "loss": 0.0204, |
| "step": 898 |
| }, |
| { |
| "epoch": 0.34586900067327114, |
| "grad_norm": 1.0801823139190674, |
| "learning_rate": 9.90556808008609e-06, |
| "loss": 0.0129, |
| "step": 899 |
| }, |
| { |
| "epoch": 0.3462537270366452, |
| "grad_norm": 0.7844304442405701, |
| "learning_rate": 9.905332306106051e-06, |
| "loss": 0.0065, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.34663845340001925, |
| "grad_norm": 0.9649366736412048, |
| "learning_rate": 9.905096240970184e-06, |
| "loss": 0.0129, |
| "step": 901 |
| }, |
| { |
| "epoch": 0.3470231797633933, |
| "grad_norm": 0.8109127283096313, |
| "learning_rate": 9.904859884692507e-06, |
| "loss": 0.0184, |
| "step": 902 |
| }, |
| { |
| "epoch": 0.34740790612676736, |
| "grad_norm": 0.47789689898490906, |
| "learning_rate": 9.904623237287044e-06, |
| "loss": 0.0042, |
| "step": 903 |
| }, |
| { |
| "epoch": 0.34779263249014136, |
| "grad_norm": 0.3855844736099243, |
| "learning_rate": 9.904386298767841e-06, |
| "loss": 0.0041, |
| "step": 904 |
| }, |
| { |
| "epoch": 0.3481773588535154, |
| "grad_norm": 0.8192360401153564, |
| "learning_rate": 9.904149069148962e-06, |
| "loss": 0.0057, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.3485620852168895, |
| "grad_norm": 0.7672753930091858, |
| "learning_rate": 9.90391154844449e-06, |
| "loss": 0.0146, |
| "step": 906 |
| }, |
| { |
| "epoch": 0.3489468115802635, |
| "grad_norm": 0.6804184913635254, |
| "learning_rate": 9.903673736668524e-06, |
| "loss": 0.0074, |
| "step": 907 |
| }, |
| { |
| "epoch": 0.3493315379436376, |
| "grad_norm": 0.7533726096153259, |
| "learning_rate": 9.903435633835174e-06, |
| "loss": 0.0107, |
| "step": 908 |
| }, |
| { |
| "epoch": 0.34971626430701164, |
| "grad_norm": 1.295393466949463, |
| "learning_rate": 9.903197239958578e-06, |
| "loss": 0.013, |
| "step": 909 |
| }, |
| { |
| "epoch": 0.3501009906703857, |
| "grad_norm": 0.5216909646987915, |
| "learning_rate": 9.902958555052882e-06, |
| "loss": 0.0029, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.35048571703375975, |
| "grad_norm": 0.47120431065559387, |
| "learning_rate": 9.902719579132253e-06, |
| "loss": 0.0046, |
| "step": 911 |
| }, |
| { |
| "epoch": 0.3508704433971338, |
| "grad_norm": 1.1378737688064575, |
| "learning_rate": 9.90248031221088e-06, |
| "loss": 0.0151, |
| "step": 912 |
| }, |
| { |
| "epoch": 0.35125516976050786, |
| "grad_norm": 0.936591386795044, |
| "learning_rate": 9.90224075430296e-06, |
| "loss": 0.0106, |
| "step": 913 |
| }, |
| { |
| "epoch": 0.3516398961238819, |
| "grad_norm": 0.3474515676498413, |
| "learning_rate": 9.902000905422712e-06, |
| "loss": 0.0049, |
| "step": 914 |
| }, |
| { |
| "epoch": 0.3520246224872559, |
| "grad_norm": 0.588219404220581, |
| "learning_rate": 9.901760765584376e-06, |
| "loss": 0.0116, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.35240934885062997, |
| "grad_norm": 1.3973325490951538, |
| "learning_rate": 9.901520334802203e-06, |
| "loss": 0.0184, |
| "step": 916 |
| }, |
| { |
| "epoch": 0.352794075214004, |
| "grad_norm": 0.41826605796813965, |
| "learning_rate": 9.901279613090464e-06, |
| "loss": 0.0095, |
| "step": 917 |
| }, |
| { |
| "epoch": 0.3531788015773781, |
| "grad_norm": 1.216929316520691, |
| "learning_rate": 9.901038600463446e-06, |
| "loss": 0.0138, |
| "step": 918 |
| }, |
| { |
| "epoch": 0.35356352794075213, |
| "grad_norm": 0.243361234664917, |
| "learning_rate": 9.900797296935455e-06, |
| "loss": 0.0043, |
| "step": 919 |
| }, |
| { |
| "epoch": 0.3539482543041262, |
| "grad_norm": 0.4657389521598816, |
| "learning_rate": 9.900555702520817e-06, |
| "loss": 0.008, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.35433298066750024, |
| "grad_norm": 0.6215384006500244, |
| "learning_rate": 9.900313817233867e-06, |
| "loss": 0.0218, |
| "step": 921 |
| }, |
| { |
| "epoch": 0.3547177070308743, |
| "grad_norm": 0.2648528516292572, |
| "learning_rate": 9.900071641088962e-06, |
| "loss": 0.0031, |
| "step": 922 |
| }, |
| { |
| "epoch": 0.35510243339424835, |
| "grad_norm": 1.0400984287261963, |
| "learning_rate": 9.89982917410048e-06, |
| "loss": 0.0153, |
| "step": 923 |
| }, |
| { |
| "epoch": 0.3554871597576224, |
| "grad_norm": 0.3454299867153168, |
| "learning_rate": 9.899586416282811e-06, |
| "loss": 0.0039, |
| "step": 924 |
| }, |
| { |
| "epoch": 0.35587188612099646, |
| "grad_norm": 1.5531036853790283, |
| "learning_rate": 9.899343367650364e-06, |
| "loss": 0.0069, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.3562566124843705, |
| "grad_norm": 0.21192412078380585, |
| "learning_rate": 9.899100028217566e-06, |
| "loss": 0.0037, |
| "step": 926 |
| }, |
| { |
| "epoch": 0.3566413388477445, |
| "grad_norm": 0.9417625665664673, |
| "learning_rate": 9.898856397998856e-06, |
| "loss": 0.0311, |
| "step": 927 |
| }, |
| { |
| "epoch": 0.35702606521111857, |
| "grad_norm": 0.4254940450191498, |
| "learning_rate": 9.8986124770087e-06, |
| "loss": 0.0091, |
| "step": 928 |
| }, |
| { |
| "epoch": 0.3574107915744926, |
| "grad_norm": 0.2836921811103821, |
| "learning_rate": 9.898368265261573e-06, |
| "loss": 0.0042, |
| "step": 929 |
| }, |
| { |
| "epoch": 0.3577955179378667, |
| "grad_norm": 1.2444170713424683, |
| "learning_rate": 9.898123762771972e-06, |
| "loss": 0.0122, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.35818024430124074, |
| "grad_norm": 0.540425181388855, |
| "learning_rate": 9.897878969554407e-06, |
| "loss": 0.0045, |
| "step": 931 |
| }, |
| { |
| "epoch": 0.3585649706646148, |
| "grad_norm": 0.6315858960151672, |
| "learning_rate": 9.89763388562341e-06, |
| "loss": 0.0142, |
| "step": 932 |
| }, |
| { |
| "epoch": 0.35894969702798885, |
| "grad_norm": 0.6477653980255127, |
| "learning_rate": 9.897388510993527e-06, |
| "loss": 0.0071, |
| "step": 933 |
| }, |
| { |
| "epoch": 0.3593344233913629, |
| "grad_norm": 0.7630776166915894, |
| "learning_rate": 9.897142845679325e-06, |
| "loss": 0.0163, |
| "step": 934 |
| }, |
| { |
| "epoch": 0.35971914975473696, |
| "grad_norm": 1.7266794443130493, |
| "learning_rate": 9.896896889695377e-06, |
| "loss": 0.0233, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.360103876118111, |
| "grad_norm": 0.28014814853668213, |
| "learning_rate": 9.896650643056292e-06, |
| "loss": 0.0025, |
| "step": 936 |
| }, |
| { |
| "epoch": 0.36048860248148507, |
| "grad_norm": 0.8066553473472595, |
| "learning_rate": 9.89640410577668e-06, |
| "loss": 0.0067, |
| "step": 937 |
| }, |
| { |
| "epoch": 0.3608733288448591, |
| "grad_norm": 0.7131628394126892, |
| "learning_rate": 9.896157277871175e-06, |
| "loss": 0.0092, |
| "step": 938 |
| }, |
| { |
| "epoch": 0.3612580552082331, |
| "grad_norm": 0.8047205209732056, |
| "learning_rate": 9.89591015935443e-06, |
| "loss": 0.0099, |
| "step": 939 |
| }, |
| { |
| "epoch": 0.3616427815716072, |
| "grad_norm": 0.2616739273071289, |
| "learning_rate": 9.895662750241109e-06, |
| "loss": 0.0035, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.36202750793498123, |
| "grad_norm": 1.0631747245788574, |
| "learning_rate": 9.8954150505459e-06, |
| "loss": 0.0202, |
| "step": 941 |
| }, |
| { |
| "epoch": 0.3624122342983553, |
| "grad_norm": 1.4130353927612305, |
| "learning_rate": 9.895167060283504e-06, |
| "loss": 0.0166, |
| "step": 942 |
| }, |
| { |
| "epoch": 0.36279696066172934, |
| "grad_norm": 0.5478530526161194, |
| "learning_rate": 9.894918779468639e-06, |
| "loss": 0.0054, |
| "step": 943 |
| }, |
| { |
| "epoch": 0.3631816870251034, |
| "grad_norm": 0.7579345107078552, |
| "learning_rate": 9.894670208116044e-06, |
| "loss": 0.017, |
| "step": 944 |
| }, |
| { |
| "epoch": 0.36356641338847745, |
| "grad_norm": 1.7270785570144653, |
| "learning_rate": 9.894421346240472e-06, |
| "loss": 0.0252, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.3639511397518515, |
| "grad_norm": 0.6318998336791992, |
| "learning_rate": 9.894172193856695e-06, |
| "loss": 0.0111, |
| "step": 946 |
| }, |
| { |
| "epoch": 0.36433586611522556, |
| "grad_norm": 0.5709639191627502, |
| "learning_rate": 9.8939227509795e-06, |
| "loss": 0.005, |
| "step": 947 |
| }, |
| { |
| "epoch": 0.3647205924785996, |
| "grad_norm": 0.7520560622215271, |
| "learning_rate": 9.893673017623692e-06, |
| "loss": 0.014, |
| "step": 948 |
| }, |
| { |
| "epoch": 0.36510531884197367, |
| "grad_norm": 1.3842604160308838, |
| "learning_rate": 9.893422993804097e-06, |
| "loss": 0.0186, |
| "step": 949 |
| }, |
| { |
| "epoch": 0.36549004520534767, |
| "grad_norm": 0.60210782289505, |
| "learning_rate": 9.893172679535554e-06, |
| "loss": 0.0109, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.3658747715687217, |
| "grad_norm": 0.819170355796814, |
| "learning_rate": 9.892922074832918e-06, |
| "loss": 0.0109, |
| "step": 951 |
| }, |
| { |
| "epoch": 0.3662594979320958, |
| "grad_norm": 1.0711147785186768, |
| "learning_rate": 9.892671179711067e-06, |
| "loss": 0.0079, |
| "step": 952 |
| }, |
| { |
| "epoch": 0.36664422429546983, |
| "grad_norm": 0.4525466859340668, |
| "learning_rate": 9.89241999418489e-06, |
| "loss": 0.0065, |
| "step": 953 |
| }, |
| { |
| "epoch": 0.3670289506588439, |
| "grad_norm": 1.9187066555023193, |
| "learning_rate": 9.892168518269298e-06, |
| "loss": 0.0113, |
| "step": 954 |
| }, |
| { |
| "epoch": 0.36741367702221794, |
| "grad_norm": 0.6548601984977722, |
| "learning_rate": 9.891916751979218e-06, |
| "loss": 0.0086, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.367798403385592, |
| "grad_norm": 0.6366754174232483, |
| "learning_rate": 9.89166469532959e-06, |
| "loss": 0.0088, |
| "step": 956 |
| }, |
| { |
| "epoch": 0.36818312974896605, |
| "grad_norm": 0.8541706800460815, |
| "learning_rate": 9.891412348335379e-06, |
| "loss": 0.0152, |
| "step": 957 |
| }, |
| { |
| "epoch": 0.3685678561123401, |
| "grad_norm": 0.5631262063980103, |
| "learning_rate": 9.89115971101156e-06, |
| "loss": 0.0128, |
| "step": 958 |
| }, |
| { |
| "epoch": 0.36895258247571416, |
| "grad_norm": 71.88910675048828, |
| "learning_rate": 9.890906783373131e-06, |
| "loss": 0.1686, |
| "step": 959 |
| }, |
| { |
| "epoch": 0.3693373088390882, |
| "grad_norm": 1.38483464717865, |
| "learning_rate": 9.890653565435102e-06, |
| "loss": 0.019, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.3697220352024623, |
| "grad_norm": 0.4331851899623871, |
| "learning_rate": 9.890400057212504e-06, |
| "loss": 0.0058, |
| "step": 961 |
| }, |
| { |
| "epoch": 0.3701067615658363, |
| "grad_norm": 1.5132745504379272, |
| "learning_rate": 9.890146258720384e-06, |
| "loss": 0.0167, |
| "step": 962 |
| }, |
| { |
| "epoch": 0.37049148792921033, |
| "grad_norm": 1.5691161155700684, |
| "learning_rate": 9.889892169973806e-06, |
| "loss": 0.0106, |
| "step": 963 |
| }, |
| { |
| "epoch": 0.3708762142925844, |
| "grad_norm": 3.956345319747925, |
| "learning_rate": 9.889637790987852e-06, |
| "loss": 0.0238, |
| "step": 964 |
| }, |
| { |
| "epoch": 0.37126094065595844, |
| "grad_norm": 1.152163028717041, |
| "learning_rate": 9.889383121777618e-06, |
| "loss": 0.0066, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.3716456670193325, |
| "grad_norm": 0.507671058177948, |
| "learning_rate": 9.889128162358223e-06, |
| "loss": 0.0088, |
| "step": 966 |
| }, |
| { |
| "epoch": 0.37203039338270655, |
| "grad_norm": 0.4464673101902008, |
| "learning_rate": 9.888872912744799e-06, |
| "loss": 0.0055, |
| "step": 967 |
| }, |
| { |
| "epoch": 0.3724151197460806, |
| "grad_norm": 0.769550621509552, |
| "learning_rate": 9.888617372952497e-06, |
| "loss": 0.0073, |
| "step": 968 |
| }, |
| { |
| "epoch": 0.37279984610945466, |
| "grad_norm": 2.157733917236328, |
| "learning_rate": 9.888361542996483e-06, |
| "loss": 0.0428, |
| "step": 969 |
| }, |
| { |
| "epoch": 0.3731845724728287, |
| "grad_norm": 1.0443061590194702, |
| "learning_rate": 9.888105422891942e-06, |
| "loss": 0.0151, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.37356929883620277, |
| "grad_norm": 1.335445761680603, |
| "learning_rate": 9.887849012654079e-06, |
| "loss": 0.0193, |
| "step": 971 |
| }, |
| { |
| "epoch": 0.3739540251995768, |
| "grad_norm": 0.2949625849723816, |
| "learning_rate": 9.887592312298108e-06, |
| "loss": 0.0024, |
| "step": 972 |
| }, |
| { |
| "epoch": 0.3743387515629509, |
| "grad_norm": 1.2898743152618408, |
| "learning_rate": 9.88733532183927e-06, |
| "loss": 0.0109, |
| "step": 973 |
| }, |
| { |
| "epoch": 0.3747234779263249, |
| "grad_norm": 0.6975098252296448, |
| "learning_rate": 9.887078041292818e-06, |
| "loss": 0.0108, |
| "step": 974 |
| }, |
| { |
| "epoch": 0.37510820428969893, |
| "grad_norm": 0.5480782389640808, |
| "learning_rate": 9.88682047067402e-06, |
| "loss": 0.0033, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.375492930653073, |
| "grad_norm": 1.3251780271530151, |
| "learning_rate": 9.886562609998165e-06, |
| "loss": 0.0145, |
| "step": 976 |
| }, |
| { |
| "epoch": 0.37587765701644704, |
| "grad_norm": 1.0030848979949951, |
| "learning_rate": 9.886304459280563e-06, |
| "loss": 0.0126, |
| "step": 977 |
| }, |
| { |
| "epoch": 0.3762623833798211, |
| "grad_norm": 0.46078020334243774, |
| "learning_rate": 9.886046018536529e-06, |
| "loss": 0.0033, |
| "step": 978 |
| }, |
| { |
| "epoch": 0.37664710974319515, |
| "grad_norm": 0.5685471296310425, |
| "learning_rate": 9.88578728778141e-06, |
| "loss": 0.0039, |
| "step": 979 |
| }, |
| { |
| "epoch": 0.3770318361065692, |
| "grad_norm": 0.578025758266449, |
| "learning_rate": 9.885528267030556e-06, |
| "loss": 0.016, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.37741656246994326, |
| "grad_norm": 0.30404070019721985, |
| "learning_rate": 9.885268956299348e-06, |
| "loss": 0.0026, |
| "step": 981 |
| }, |
| { |
| "epoch": 0.3778012888333173, |
| "grad_norm": 0.7305883169174194, |
| "learning_rate": 9.885009355603172e-06, |
| "loss": 0.0052, |
| "step": 982 |
| }, |
| { |
| "epoch": 0.37818601519669137, |
| "grad_norm": 0.5870364904403687, |
| "learning_rate": 9.884749464957438e-06, |
| "loss": 0.0063, |
| "step": 983 |
| }, |
| { |
| "epoch": 0.3785707415600654, |
| "grad_norm": 1.9455591440200806, |
| "learning_rate": 9.884489284377575e-06, |
| "loss": 0.0235, |
| "step": 984 |
| }, |
| { |
| "epoch": 0.3789554679234394, |
| "grad_norm": 0.6894332766532898, |
| "learning_rate": 9.88422881387902e-06, |
| "loss": 0.0213, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.3793401942868135, |
| "grad_norm": 0.99710613489151, |
| "learning_rate": 9.88396805347724e-06, |
| "loss": 0.0058, |
| "step": 986 |
| }, |
| { |
| "epoch": 0.37972492065018754, |
| "grad_norm": 1.2745070457458496, |
| "learning_rate": 9.883707003187708e-06, |
| "loss": 0.0103, |
| "step": 987 |
| }, |
| { |
| "epoch": 0.3801096470135616, |
| "grad_norm": 0.676323652267456, |
| "learning_rate": 9.88344566302592e-06, |
| "loss": 0.0053, |
| "step": 988 |
| }, |
| { |
| "epoch": 0.38049437337693565, |
| "grad_norm": 1.0734655857086182, |
| "learning_rate": 9.883184033007385e-06, |
| "loss": 0.0062, |
| "step": 989 |
| }, |
| { |
| "epoch": 0.3808790997403097, |
| "grad_norm": 2.1206910610198975, |
| "learning_rate": 9.882922113147637e-06, |
| "loss": 0.0335, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.38126382610368376, |
| "grad_norm": 0.948715090751648, |
| "learning_rate": 9.88265990346222e-06, |
| "loss": 0.0198, |
| "step": 991 |
| }, |
| { |
| "epoch": 0.3816485524670578, |
| "grad_norm": 0.38453373312950134, |
| "learning_rate": 9.882397403966696e-06, |
| "loss": 0.0024, |
| "step": 992 |
| }, |
| { |
| "epoch": 0.38203327883043187, |
| "grad_norm": 2.032207727432251, |
| "learning_rate": 9.882134614676647e-06, |
| "loss": 0.0197, |
| "step": 993 |
| }, |
| { |
| "epoch": 0.3824180051938059, |
| "grad_norm": 0.744788408279419, |
| "learning_rate": 9.881871535607672e-06, |
| "loss": 0.0091, |
| "step": 994 |
| }, |
| { |
| "epoch": 0.38280273155718, |
| "grad_norm": 0.6455265879631042, |
| "learning_rate": 9.881608166775384e-06, |
| "loss": 0.0097, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.38318745792055403, |
| "grad_norm": 1.2942465543746948, |
| "learning_rate": 9.881344508195416e-06, |
| "loss": 0.0043, |
| "step": 996 |
| }, |
| { |
| "epoch": 0.38357218428392803, |
| "grad_norm": 0.12089524418115616, |
| "learning_rate": 9.881080559883418e-06, |
| "loss": 0.0013, |
| "step": 997 |
| }, |
| { |
| "epoch": 0.3839569106473021, |
| "grad_norm": 1.5285307168960571, |
| "learning_rate": 9.880816321855055e-06, |
| "loss": 0.028, |
| "step": 998 |
| }, |
| { |
| "epoch": 0.38434163701067614, |
| "grad_norm": 0.778884768486023, |
| "learning_rate": 9.880551794126015e-06, |
| "loss": 0.0036, |
| "step": 999 |
| }, |
| { |
| "epoch": 0.3847263633740502, |
| "grad_norm": 1.3341624736785889, |
| "learning_rate": 9.880286976711992e-06, |
| "loss": 0.0091, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.38511108973742425, |
| "grad_norm": 0.40356457233428955, |
| "learning_rate": 9.880021869628711e-06, |
| "loss": 0.0035, |
| "step": 1001 |
| }, |
| { |
| "epoch": 0.3854958161007983, |
| "grad_norm": 0.27904146909713745, |
| "learning_rate": 9.879756472891904e-06, |
| "loss": 0.0013, |
| "step": 1002 |
| }, |
| { |
| "epoch": 0.38588054246417236, |
| "grad_norm": 1.152552843093872, |
| "learning_rate": 9.879490786517326e-06, |
| "loss": 0.0176, |
| "step": 1003 |
| }, |
| { |
| "epoch": 0.3862652688275464, |
| "grad_norm": 0.23684851825237274, |
| "learning_rate": 9.879224810520743e-06, |
| "loss": 0.0013, |
| "step": 1004 |
| }, |
| { |
| "epoch": 0.38664999519092047, |
| "grad_norm": 0.44542622566223145, |
| "learning_rate": 9.878958544917943e-06, |
| "loss": 0.0025, |
| "step": 1005 |
| }, |
| { |
| "epoch": 0.3870347215542945, |
| "grad_norm": 0.21203474700450897, |
| "learning_rate": 9.878691989724734e-06, |
| "loss": 0.0022, |
| "step": 1006 |
| }, |
| { |
| "epoch": 0.3874194479176686, |
| "grad_norm": 0.6239928007125854, |
| "learning_rate": 9.878425144956933e-06, |
| "loss": 0.0058, |
| "step": 1007 |
| }, |
| { |
| "epoch": 0.38780417428104264, |
| "grad_norm": 1.1376228332519531, |
| "learning_rate": 9.87815801063038e-06, |
| "loss": 0.0116, |
| "step": 1008 |
| }, |
| { |
| "epoch": 0.38818890064441663, |
| "grad_norm": 1.0717668533325195, |
| "learning_rate": 9.877890586760932e-06, |
| "loss": 0.021, |
| "step": 1009 |
| }, |
| { |
| "epoch": 0.3885736270077907, |
| "grad_norm": 1.4471285343170166, |
| "learning_rate": 9.877622873364461e-06, |
| "loss": 0.0195, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.38895835337116474, |
| "grad_norm": 1.0057027339935303, |
| "learning_rate": 9.877354870456856e-06, |
| "loss": 0.007, |
| "step": 1011 |
| }, |
| { |
| "epoch": 0.3893430797345388, |
| "grad_norm": 0.5728893280029297, |
| "learning_rate": 9.877086578054026e-06, |
| "loss": 0.0068, |
| "step": 1012 |
| }, |
| { |
| "epoch": 0.38972780609791285, |
| "grad_norm": 0.2538914382457733, |
| "learning_rate": 9.876817996171895e-06, |
| "loss": 0.0028, |
| "step": 1013 |
| }, |
| { |
| "epoch": 0.3901125324612869, |
| "grad_norm": 1.154489517211914, |
| "learning_rate": 9.876549124826405e-06, |
| "loss": 0.0227, |
| "step": 1014 |
| }, |
| { |
| "epoch": 0.39049725882466096, |
| "grad_norm": 0.3190124034881592, |
| "learning_rate": 9.876279964033513e-06, |
| "loss": 0.0025, |
| "step": 1015 |
| }, |
| { |
| "epoch": 0.390881985188035, |
| "grad_norm": 1.2935168743133545, |
| "learning_rate": 9.876010513809195e-06, |
| "loss": 0.0095, |
| "step": 1016 |
| }, |
| { |
| "epoch": 0.3912667115514091, |
| "grad_norm": 0.6997138261795044, |
| "learning_rate": 9.875740774169449e-06, |
| "loss": 0.0052, |
| "step": 1017 |
| }, |
| { |
| "epoch": 0.39165143791478313, |
| "grad_norm": 1.2706716060638428, |
| "learning_rate": 9.87547074513028e-06, |
| "loss": 0.0135, |
| "step": 1018 |
| }, |
| { |
| "epoch": 0.3920361642781572, |
| "grad_norm": 0.1183147132396698, |
| "learning_rate": 9.875200426707718e-06, |
| "loss": 0.0018, |
| "step": 1019 |
| }, |
| { |
| "epoch": 0.3924208906415312, |
| "grad_norm": 0.41291019320487976, |
| "learning_rate": 9.874929818917806e-06, |
| "loss": 0.008, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.39280561700490524, |
| "grad_norm": 0.35821545124053955, |
| "learning_rate": 9.874658921776609e-06, |
| "loss": 0.0033, |
| "step": 1021 |
| }, |
| { |
| "epoch": 0.3931903433682793, |
| "grad_norm": 0.6069442629814148, |
| "learning_rate": 9.874387735300204e-06, |
| "loss": 0.0067, |
| "step": 1022 |
| }, |
| { |
| "epoch": 0.39357506973165335, |
| "grad_norm": 1.0225082635879517, |
| "learning_rate": 9.874116259504687e-06, |
| "loss": 0.0093, |
| "step": 1023 |
| }, |
| { |
| "epoch": 0.3939597960950274, |
| "grad_norm": 0.4544816315174103, |
| "learning_rate": 9.873844494406173e-06, |
| "loss": 0.0047, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.39434452245840146, |
| "grad_norm": 0.37393704056739807, |
| "learning_rate": 9.873572440020792e-06, |
| "loss": 0.0028, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.3947292488217755, |
| "grad_norm": 0.9215484261512756, |
| "learning_rate": 9.873300096364688e-06, |
| "loss": 0.0143, |
| "step": 1026 |
| }, |
| { |
| "epoch": 0.39511397518514957, |
| "grad_norm": 0.4308564066886902, |
| "learning_rate": 9.873027463454032e-06, |
| "loss": 0.0123, |
| "step": 1027 |
| }, |
| { |
| "epoch": 0.3954987015485236, |
| "grad_norm": 0.06539972871541977, |
| "learning_rate": 9.872754541305003e-06, |
| "loss": 0.0007, |
| "step": 1028 |
| }, |
| { |
| "epoch": 0.3958834279118977, |
| "grad_norm": 1.0676727294921875, |
| "learning_rate": 9.8724813299338e-06, |
| "loss": 0.0177, |
| "step": 1029 |
| }, |
| { |
| "epoch": 0.39626815427527173, |
| "grad_norm": 1.1856839656829834, |
| "learning_rate": 9.872207829356641e-06, |
| "loss": 0.0129, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.3966528806386458, |
| "grad_norm": 0.5124572515487671, |
| "learning_rate": 9.871934039589758e-06, |
| "loss": 0.0047, |
| "step": 1031 |
| }, |
| { |
| "epoch": 0.3970376070020198, |
| "grad_norm": 0.4818362295627594, |
| "learning_rate": 9.871659960649402e-06, |
| "loss": 0.0085, |
| "step": 1032 |
| }, |
| { |
| "epoch": 0.39742233336539384, |
| "grad_norm": 0.4717956483364105, |
| "learning_rate": 9.871385592551843e-06, |
| "loss": 0.005, |
| "step": 1033 |
| }, |
| { |
| "epoch": 0.3978070597287679, |
| "grad_norm": 0.8839301466941833, |
| "learning_rate": 9.871110935313364e-06, |
| "loss": 0.011, |
| "step": 1034 |
| }, |
| { |
| "epoch": 0.39819178609214195, |
| "grad_norm": 0.579458475112915, |
| "learning_rate": 9.87083598895027e-06, |
| "loss": 0.0048, |
| "step": 1035 |
| }, |
| { |
| "epoch": 0.398576512455516, |
| "grad_norm": 0.11299765110015869, |
| "learning_rate": 9.870560753478875e-06, |
| "loss": 0.0016, |
| "step": 1036 |
| }, |
| { |
| "epoch": 0.39896123881889006, |
| "grad_norm": 0.35690218210220337, |
| "learning_rate": 9.87028522891552e-06, |
| "loss": 0.0067, |
| "step": 1037 |
| }, |
| { |
| "epoch": 0.3993459651822641, |
| "grad_norm": 1.820987582206726, |
| "learning_rate": 9.870009415276557e-06, |
| "loss": 0.0232, |
| "step": 1038 |
| }, |
| { |
| "epoch": 0.3997306915456382, |
| "grad_norm": 0.9093754291534424, |
| "learning_rate": 9.86973331257836e-06, |
| "loss": 0.0087, |
| "step": 1039 |
| }, |
| { |
| "epoch": 0.4001154179090122, |
| "grad_norm": 0.2319750189781189, |
| "learning_rate": 9.869456920837312e-06, |
| "loss": 0.0021, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.4005001442723863, |
| "grad_norm": 0.22607071697711945, |
| "learning_rate": 9.869180240069822e-06, |
| "loss": 0.0085, |
| "step": 1041 |
| }, |
| { |
| "epoch": 0.40088487063576034, |
| "grad_norm": 1.4031366109848022, |
| "learning_rate": 9.868903270292311e-06, |
| "loss": 0.0204, |
| "step": 1042 |
| }, |
| { |
| "epoch": 0.4012695969991344, |
| "grad_norm": 0.7285005450248718, |
| "learning_rate": 9.868626011521219e-06, |
| "loss": 0.0076, |
| "step": 1043 |
| }, |
| { |
| "epoch": 0.4016543233625084, |
| "grad_norm": 0.4827679395675659, |
| "learning_rate": 9.868348463773003e-06, |
| "loss": 0.0069, |
| "step": 1044 |
| }, |
| { |
| "epoch": 0.40203904972588245, |
| "grad_norm": 0.32623255252838135, |
| "learning_rate": 9.868070627064135e-06, |
| "loss": 0.002, |
| "step": 1045 |
| }, |
| { |
| "epoch": 0.4024237760892565, |
| "grad_norm": 0.22089290618896484, |
| "learning_rate": 9.867792501411108e-06, |
| "loss": 0.0017, |
| "step": 1046 |
| }, |
| { |
| "epoch": 0.40280850245263056, |
| "grad_norm": 0.9565873146057129, |
| "learning_rate": 9.86751408683043e-06, |
| "loss": 0.0077, |
| "step": 1047 |
| }, |
| { |
| "epoch": 0.4031932288160046, |
| "grad_norm": 0.15053868293762207, |
| "learning_rate": 9.867235383338625e-06, |
| "loss": 0.0017, |
| "step": 1048 |
| }, |
| { |
| "epoch": 0.40357795517937867, |
| "grad_norm": 1.419770359992981, |
| "learning_rate": 9.866956390952236e-06, |
| "loss": 0.0205, |
| "step": 1049 |
| }, |
| { |
| "epoch": 0.4039626815427527, |
| "grad_norm": 0.6458820700645447, |
| "learning_rate": 9.866677109687823e-06, |
| "loss": 0.0047, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.4043474079061268, |
| "grad_norm": 0.46830523014068604, |
| "learning_rate": 9.866397539561962e-06, |
| "loss": 0.004, |
| "step": 1051 |
| }, |
| { |
| "epoch": 0.40473213426950083, |
| "grad_norm": 0.7895929217338562, |
| "learning_rate": 9.866117680591248e-06, |
| "loss": 0.0274, |
| "step": 1052 |
| }, |
| { |
| "epoch": 0.4051168606328749, |
| "grad_norm": 0.4475204646587372, |
| "learning_rate": 9.86583753279229e-06, |
| "loss": 0.0033, |
| "step": 1053 |
| }, |
| { |
| "epoch": 0.40550158699624894, |
| "grad_norm": 0.9515151977539062, |
| "learning_rate": 9.865557096181718e-06, |
| "loss": 0.0072, |
| "step": 1054 |
| }, |
| { |
| "epoch": 0.40588631335962294, |
| "grad_norm": 1.6716892719268799, |
| "learning_rate": 9.865276370776178e-06, |
| "loss": 0.0156, |
| "step": 1055 |
| }, |
| { |
| "epoch": 0.406271039722997, |
| "grad_norm": 0.8594703674316406, |
| "learning_rate": 9.86499535659233e-06, |
| "loss": 0.0177, |
| "step": 1056 |
| }, |
| { |
| "epoch": 0.40665576608637105, |
| "grad_norm": 0.5373953580856323, |
| "learning_rate": 9.864714053646856e-06, |
| "loss": 0.0108, |
| "step": 1057 |
| }, |
| { |
| "epoch": 0.4070404924497451, |
| "grad_norm": 1.1601587533950806, |
| "learning_rate": 9.86443246195645e-06, |
| "loss": 0.0148, |
| "step": 1058 |
| }, |
| { |
| "epoch": 0.40742521881311916, |
| "grad_norm": 0.06577706336975098, |
| "learning_rate": 9.864150581537828e-06, |
| "loss": 0.001, |
| "step": 1059 |
| }, |
| { |
| "epoch": 0.4078099451764932, |
| "grad_norm": 0.7323742508888245, |
| "learning_rate": 9.863868412407721e-06, |
| "loss": 0.0077, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.40819467153986727, |
| "grad_norm": 0.14481770992279053, |
| "learning_rate": 9.863585954582876e-06, |
| "loss": 0.0022, |
| "step": 1061 |
| }, |
| { |
| "epoch": 0.4085793979032413, |
| "grad_norm": 0.33891233801841736, |
| "learning_rate": 9.86330320808006e-06, |
| "loss": 0.0077, |
| "step": 1062 |
| }, |
| { |
| "epoch": 0.4089641242666154, |
| "grad_norm": 0.4427297115325928, |
| "learning_rate": 9.863020172916054e-06, |
| "loss": 0.0064, |
| "step": 1063 |
| }, |
| { |
| "epoch": 0.40934885062998944, |
| "grad_norm": 0.6941469311714172, |
| "learning_rate": 9.862736849107656e-06, |
| "loss": 0.006, |
| "step": 1064 |
| }, |
| { |
| "epoch": 0.4097335769933635, |
| "grad_norm": 0.2557992935180664, |
| "learning_rate": 9.862453236671685e-06, |
| "loss": 0.0171, |
| "step": 1065 |
| }, |
| { |
| "epoch": 0.41011830335673755, |
| "grad_norm": 0.44381460547447205, |
| "learning_rate": 9.862169335624976e-06, |
| "loss": 0.007, |
| "step": 1066 |
| }, |
| { |
| "epoch": 0.41050302972011155, |
| "grad_norm": 1.5972293615341187, |
| "learning_rate": 9.861885145984377e-06, |
| "loss": 0.0077, |
| "step": 1067 |
| }, |
| { |
| "epoch": 0.4108877560834856, |
| "grad_norm": 2.6916608810424805, |
| "learning_rate": 9.861600667766758e-06, |
| "loss": 0.0161, |
| "step": 1068 |
| }, |
| { |
| "epoch": 0.41127248244685966, |
| "grad_norm": 0.8969532251358032, |
| "learning_rate": 9.861315900989001e-06, |
| "loss": 0.0019, |
| "step": 1069 |
| }, |
| { |
| "epoch": 0.4116572088102337, |
| "grad_norm": 0.25799357891082764, |
| "learning_rate": 9.861030845668014e-06, |
| "loss": 0.0029, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.41204193517360777, |
| "grad_norm": 0.9042412638664246, |
| "learning_rate": 9.860745501820712e-06, |
| "loss": 0.0098, |
| "step": 1071 |
| }, |
| { |
| "epoch": 0.4124266615369818, |
| "grad_norm": 0.4798334538936615, |
| "learning_rate": 9.860459869464032e-06, |
| "loss": 0.0032, |
| "step": 1072 |
| }, |
| { |
| "epoch": 0.4128113879003559, |
| "grad_norm": 0.728665828704834, |
| "learning_rate": 9.860173948614929e-06, |
| "loss": 0.005, |
| "step": 1073 |
| }, |
| { |
| "epoch": 0.41319611426372993, |
| "grad_norm": 0.6425368189811707, |
| "learning_rate": 9.859887739290375e-06, |
| "loss": 0.0149, |
| "step": 1074 |
| }, |
| { |
| "epoch": 0.413580840627104, |
| "grad_norm": 0.6536305546760559, |
| "learning_rate": 9.859601241507354e-06, |
| "loss": 0.0055, |
| "step": 1075 |
| }, |
| { |
| "epoch": 0.41396556699047804, |
| "grad_norm": 1.2144076824188232, |
| "learning_rate": 9.859314455282873e-06, |
| "loss": 0.0102, |
| "step": 1076 |
| }, |
| { |
| "epoch": 0.4143502933538521, |
| "grad_norm": 0.04166639596223831, |
| "learning_rate": 9.859027380633956e-06, |
| "loss": 0.0005, |
| "step": 1077 |
| }, |
| { |
| "epoch": 0.41473501971722615, |
| "grad_norm": 0.9971767067909241, |
| "learning_rate": 9.858740017577642e-06, |
| "loss": 0.0052, |
| "step": 1078 |
| }, |
| { |
| "epoch": 0.41511974608060015, |
| "grad_norm": 0.9212055206298828, |
| "learning_rate": 9.858452366130983e-06, |
| "loss": 0.0186, |
| "step": 1079 |
| }, |
| { |
| "epoch": 0.4155044724439742, |
| "grad_norm": 0.07661807537078857, |
| "learning_rate": 9.858164426311059e-06, |
| "loss": 0.0007, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.41588919880734826, |
| "grad_norm": 1.0191240310668945, |
| "learning_rate": 9.857876198134957e-06, |
| "loss": 0.0183, |
| "step": 1081 |
| }, |
| { |
| "epoch": 0.4162739251707223, |
| "grad_norm": 0.6929414868354797, |
| "learning_rate": 9.857587681619784e-06, |
| "loss": 0.0115, |
| "step": 1082 |
| }, |
| { |
| "epoch": 0.41665865153409637, |
| "grad_norm": 0.6691508293151855, |
| "learning_rate": 9.857298876782666e-06, |
| "loss": 0.0113, |
| "step": 1083 |
| }, |
| { |
| "epoch": 0.4170433778974704, |
| "grad_norm": 1.7734836339950562, |
| "learning_rate": 9.857009783640746e-06, |
| "loss": 0.0107, |
| "step": 1084 |
| }, |
| { |
| "epoch": 0.4174281042608445, |
| "grad_norm": 0.8787226676940918, |
| "learning_rate": 9.856720402211182e-06, |
| "loss": 0.0037, |
| "step": 1085 |
| }, |
| { |
| "epoch": 0.41781283062421853, |
| "grad_norm": 0.6904507279396057, |
| "learning_rate": 9.85643073251115e-06, |
| "loss": 0.0177, |
| "step": 1086 |
| }, |
| { |
| "epoch": 0.4181975569875926, |
| "grad_norm": 0.550852358341217, |
| "learning_rate": 9.856140774557843e-06, |
| "loss": 0.0062, |
| "step": 1087 |
| }, |
| { |
| "epoch": 0.41858228335096664, |
| "grad_norm": 0.6680750250816345, |
| "learning_rate": 9.855850528368473e-06, |
| "loss": 0.0093, |
| "step": 1088 |
| }, |
| { |
| "epoch": 0.4189670097143407, |
| "grad_norm": 0.508996307849884, |
| "learning_rate": 9.855559993960269e-06, |
| "loss": 0.0017, |
| "step": 1089 |
| }, |
| { |
| "epoch": 0.4193517360777147, |
| "grad_norm": 0.9837608933448792, |
| "learning_rate": 9.855269171350471e-06, |
| "loss": 0.0054, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.41973646244108875, |
| "grad_norm": 0.5428667068481445, |
| "learning_rate": 9.854978060556343e-06, |
| "loss": 0.0048, |
| "step": 1091 |
| }, |
| { |
| "epoch": 0.4201211888044628, |
| "grad_norm": 1.060753345489502, |
| "learning_rate": 9.854686661595166e-06, |
| "loss": 0.0093, |
| "step": 1092 |
| }, |
| { |
| "epoch": 0.42050591516783686, |
| "grad_norm": 0.2443462312221527, |
| "learning_rate": 9.854394974484233e-06, |
| "loss": 0.0015, |
| "step": 1093 |
| }, |
| { |
| "epoch": 0.4208906415312109, |
| "grad_norm": 0.8732754588127136, |
| "learning_rate": 9.854102999240858e-06, |
| "loss": 0.0077, |
| "step": 1094 |
| }, |
| { |
| "epoch": 0.421275367894585, |
| "grad_norm": 1.7188763618469238, |
| "learning_rate": 9.853810735882371e-06, |
| "loss": 0.0202, |
| "step": 1095 |
| }, |
| { |
| "epoch": 0.42166009425795903, |
| "grad_norm": 1.274489164352417, |
| "learning_rate": 9.85351818442612e-06, |
| "loss": 0.0182, |
| "step": 1096 |
| }, |
| { |
| "epoch": 0.4220448206213331, |
| "grad_norm": 1.6367267370224, |
| "learning_rate": 9.85322534488947e-06, |
| "loss": 0.011, |
| "step": 1097 |
| }, |
| { |
| "epoch": 0.42242954698470714, |
| "grad_norm": 1.1013633012771606, |
| "learning_rate": 9.852932217289798e-06, |
| "loss": 0.0096, |
| "step": 1098 |
| }, |
| { |
| "epoch": 0.4228142733480812, |
| "grad_norm": 0.5000425577163696, |
| "learning_rate": 9.852638801644509e-06, |
| "loss": 0.0049, |
| "step": 1099 |
| }, |
| { |
| "epoch": 0.42319899971145525, |
| "grad_norm": 1.0980358123779297, |
| "learning_rate": 9.852345097971017e-06, |
| "loss": 0.0102, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.4235837260748293, |
| "grad_norm": 1.013664960861206, |
| "learning_rate": 9.85205110628675e-06, |
| "loss": 0.0202, |
| "step": 1101 |
| }, |
| { |
| "epoch": 0.4239684524382033, |
| "grad_norm": 0.8884641528129578, |
| "learning_rate": 9.851756826609164e-06, |
| "loss": 0.0173, |
| "step": 1102 |
| }, |
| { |
| "epoch": 0.42435317880157736, |
| "grad_norm": 0.13429048657417297, |
| "learning_rate": 9.851462258955722e-06, |
| "loss": 0.001, |
| "step": 1103 |
| }, |
| { |
| "epoch": 0.4247379051649514, |
| "grad_norm": 0.36847949028015137, |
| "learning_rate": 9.851167403343911e-06, |
| "loss": 0.0101, |
| "step": 1104 |
| }, |
| { |
| "epoch": 0.42512263152832547, |
| "grad_norm": 1.019138216972351, |
| "learning_rate": 9.850872259791228e-06, |
| "loss": 0.009, |
| "step": 1105 |
| }, |
| { |
| "epoch": 0.4255073578916995, |
| "grad_norm": 0.6060003042221069, |
| "learning_rate": 9.850576828315196e-06, |
| "loss": 0.0054, |
| "step": 1106 |
| }, |
| { |
| "epoch": 0.4258920842550736, |
| "grad_norm": 1.8947292566299438, |
| "learning_rate": 9.850281108933346e-06, |
| "loss": 0.0138, |
| "step": 1107 |
| }, |
| { |
| "epoch": 0.42627681061844763, |
| "grad_norm": 1.506212592124939, |
| "learning_rate": 9.849985101663235e-06, |
| "loss": 0.021, |
| "step": 1108 |
| }, |
| { |
| "epoch": 0.4266615369818217, |
| "grad_norm": 1.6545723676681519, |
| "learning_rate": 9.849688806522428e-06, |
| "loss": 0.0157, |
| "step": 1109 |
| }, |
| { |
| "epoch": 0.42704626334519574, |
| "grad_norm": 0.5730804800987244, |
| "learning_rate": 9.849392223528514e-06, |
| "loss": 0.0055, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.4274309897085698, |
| "grad_norm": 0.5096026659011841, |
| "learning_rate": 9.849095352699096e-06, |
| "loss": 0.0033, |
| "step": 1111 |
| }, |
| { |
| "epoch": 0.42781571607194385, |
| "grad_norm": 2.3961026668548584, |
| "learning_rate": 9.848798194051797e-06, |
| "loss": 0.0082, |
| "step": 1112 |
| }, |
| { |
| "epoch": 0.4282004424353179, |
| "grad_norm": 0.8801290988922119, |
| "learning_rate": 9.84850074760425e-06, |
| "loss": 0.0119, |
| "step": 1113 |
| }, |
| { |
| "epoch": 0.4285851687986919, |
| "grad_norm": 3.352750778198242, |
| "learning_rate": 9.848203013374113e-06, |
| "loss": 0.0191, |
| "step": 1114 |
| }, |
| { |
| "epoch": 0.42896989516206596, |
| "grad_norm": 1.3553608655929565, |
| "learning_rate": 9.847904991379061e-06, |
| "loss": 0.0211, |
| "step": 1115 |
| }, |
| { |
| "epoch": 0.42935462152544, |
| "grad_norm": 2.213486671447754, |
| "learning_rate": 9.847606681636776e-06, |
| "loss": 0.0346, |
| "step": 1116 |
| }, |
| { |
| "epoch": 0.42973934788881407, |
| "grad_norm": 0.18214163184165955, |
| "learning_rate": 9.84730808416497e-06, |
| "loss": 0.0013, |
| "step": 1117 |
| }, |
| { |
| "epoch": 0.4301240742521881, |
| "grad_norm": 0.08196653425693512, |
| "learning_rate": 9.847009198981364e-06, |
| "loss": 0.0009, |
| "step": 1118 |
| }, |
| { |
| "epoch": 0.4305088006155622, |
| "grad_norm": 0.6414639353752136, |
| "learning_rate": 9.846710026103698e-06, |
| "loss": 0.0187, |
| "step": 1119 |
| }, |
| { |
| "epoch": 0.43089352697893624, |
| "grad_norm": 0.3984313905239105, |
| "learning_rate": 9.846410565549732e-06, |
| "loss": 0.0026, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.4312782533423103, |
| "grad_norm": 0.411411851644516, |
| "learning_rate": 9.846110817337237e-06, |
| "loss": 0.0023, |
| "step": 1121 |
| }, |
| { |
| "epoch": 0.43166297970568435, |
| "grad_norm": 0.3851987421512604, |
| "learning_rate": 9.845810781484005e-06, |
| "loss": 0.0032, |
| "step": 1122 |
| }, |
| { |
| "epoch": 0.4320477060690584, |
| "grad_norm": 2.438159942626953, |
| "learning_rate": 9.845510458007848e-06, |
| "loss": 0.0126, |
| "step": 1123 |
| }, |
| { |
| "epoch": 0.43243243243243246, |
| "grad_norm": 0.19791530072689056, |
| "learning_rate": 9.845209846926587e-06, |
| "loss": 0.001, |
| "step": 1124 |
| }, |
| { |
| "epoch": 0.43281715879580646, |
| "grad_norm": 0.5351514220237732, |
| "learning_rate": 9.844908948258067e-06, |
| "loss": 0.0074, |
| "step": 1125 |
| }, |
| { |
| "epoch": 0.4332018851591805, |
| "grad_norm": 0.5280462503433228, |
| "learning_rate": 9.84460776202015e-06, |
| "loss": 0.0113, |
| "step": 1126 |
| }, |
| { |
| "epoch": 0.43358661152255457, |
| "grad_norm": 1.4508618116378784, |
| "learning_rate": 9.844306288230709e-06, |
| "loss": 0.0255, |
| "step": 1127 |
| }, |
| { |
| "epoch": 0.4339713378859286, |
| "grad_norm": 1.2629048824310303, |
| "learning_rate": 9.84400452690764e-06, |
| "loss": 0.0135, |
| "step": 1128 |
| }, |
| { |
| "epoch": 0.4343560642493027, |
| "grad_norm": 1.1086742877960205, |
| "learning_rate": 9.843702478068855e-06, |
| "loss": 0.0168, |
| "step": 1129 |
| }, |
| { |
| "epoch": 0.43474079061267673, |
| "grad_norm": 0.42978718876838684, |
| "learning_rate": 9.84340014173228e-06, |
| "loss": 0.0031, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.4351255169760508, |
| "grad_norm": 0.5256184339523315, |
| "learning_rate": 9.84309751791586e-06, |
| "loss": 0.003, |
| "step": 1131 |
| }, |
| { |
| "epoch": 0.43551024333942484, |
| "grad_norm": 0.3392198085784912, |
| "learning_rate": 9.84279460663756e-06, |
| "loss": 0.0025, |
| "step": 1132 |
| }, |
| { |
| "epoch": 0.4358949697027989, |
| "grad_norm": 0.4952617287635803, |
| "learning_rate": 9.842491407915358e-06, |
| "loss": 0.0038, |
| "step": 1133 |
| }, |
| { |
| "epoch": 0.43627969606617295, |
| "grad_norm": 1.2189668416976929, |
| "learning_rate": 9.842187921767248e-06, |
| "loss": 0.0062, |
| "step": 1134 |
| }, |
| { |
| "epoch": 0.436664422429547, |
| "grad_norm": 0.4146406650543213, |
| "learning_rate": 9.841884148211248e-06, |
| "loss": 0.0205, |
| "step": 1135 |
| }, |
| { |
| "epoch": 0.43704914879292106, |
| "grad_norm": 0.19388249516487122, |
| "learning_rate": 9.841580087265384e-06, |
| "loss": 0.0013, |
| "step": 1136 |
| }, |
| { |
| "epoch": 0.43743387515629506, |
| "grad_norm": 0.3461119532585144, |
| "learning_rate": 9.841275738947704e-06, |
| "loss": 0.004, |
| "step": 1137 |
| }, |
| { |
| "epoch": 0.4378186015196691, |
| "grad_norm": 0.6838563084602356, |
| "learning_rate": 9.840971103276276e-06, |
| "loss": 0.0099, |
| "step": 1138 |
| }, |
| { |
| "epoch": 0.43820332788304317, |
| "grad_norm": 0.8505207896232605, |
| "learning_rate": 9.840666180269178e-06, |
| "loss": 0.0071, |
| "step": 1139 |
| }, |
| { |
| "epoch": 0.4385880542464172, |
| "grad_norm": 0.705162525177002, |
| "learning_rate": 9.840360969944511e-06, |
| "loss": 0.0037, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.4389727806097913, |
| "grad_norm": 0.9783769845962524, |
| "learning_rate": 9.84005547232039e-06, |
| "loss": 0.006, |
| "step": 1141 |
| }, |
| { |
| "epoch": 0.43935750697316533, |
| "grad_norm": 1.1325137615203857, |
| "learning_rate": 9.839749687414947e-06, |
| "loss": 0.0202, |
| "step": 1142 |
| }, |
| { |
| "epoch": 0.4397422333365394, |
| "grad_norm": 1.34829580783844, |
| "learning_rate": 9.839443615246334e-06, |
| "loss": 0.0029, |
| "step": 1143 |
| }, |
| { |
| "epoch": 0.44012695969991344, |
| "grad_norm": 0.5483993291854858, |
| "learning_rate": 9.839137255832715e-06, |
| "loss": 0.0047, |
| "step": 1144 |
| }, |
| { |
| "epoch": 0.4405116860632875, |
| "grad_norm": 0.7500666975975037, |
| "learning_rate": 9.838830609192277e-06, |
| "loss": 0.0193, |
| "step": 1145 |
| }, |
| { |
| "epoch": 0.44089641242666155, |
| "grad_norm": 0.3582031726837158, |
| "learning_rate": 9.83852367534322e-06, |
| "loss": 0.0035, |
| "step": 1146 |
| }, |
| { |
| "epoch": 0.4412811387900356, |
| "grad_norm": 1.813923954963684, |
| "learning_rate": 9.83821645430376e-06, |
| "loss": 0.0267, |
| "step": 1147 |
| }, |
| { |
| "epoch": 0.44166586515340966, |
| "grad_norm": 1.7281469106674194, |
| "learning_rate": 9.837908946092134e-06, |
| "loss": 0.0104, |
| "step": 1148 |
| }, |
| { |
| "epoch": 0.44205059151678366, |
| "grad_norm": 4.751184463500977, |
| "learning_rate": 9.837601150726594e-06, |
| "loss": 0.0282, |
| "step": 1149 |
| }, |
| { |
| "epoch": 0.4424353178801577, |
| "grad_norm": 0.6789389252662659, |
| "learning_rate": 9.837293068225408e-06, |
| "loss": 0.0082, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.4428200442435318, |
| "grad_norm": 0.36475417017936707, |
| "learning_rate": 9.836984698606865e-06, |
| "loss": 0.0105, |
| "step": 1151 |
| }, |
| { |
| "epoch": 0.44320477060690583, |
| "grad_norm": 2.3141651153564453, |
| "learning_rate": 9.836676041889265e-06, |
| "loss": 0.0109, |
| "step": 1152 |
| }, |
| { |
| "epoch": 0.4435894969702799, |
| "grad_norm": 0.6193933486938477, |
| "learning_rate": 9.836367098090931e-06, |
| "loss": 0.0074, |
| "step": 1153 |
| }, |
| { |
| "epoch": 0.44397422333365394, |
| "grad_norm": 0.4467495083808899, |
| "learning_rate": 9.836057867230198e-06, |
| "loss": 0.0107, |
| "step": 1154 |
| }, |
| { |
| "epoch": 0.444358949697028, |
| "grad_norm": 0.3161061108112335, |
| "learning_rate": 9.835748349325423e-06, |
| "loss": 0.009, |
| "step": 1155 |
| }, |
| { |
| "epoch": 0.44474367606040205, |
| "grad_norm": 0.33670666813850403, |
| "learning_rate": 9.835438544394973e-06, |
| "loss": 0.0061, |
| "step": 1156 |
| }, |
| { |
| "epoch": 0.4451284024237761, |
| "grad_norm": 1.2605667114257812, |
| "learning_rate": 9.835128452457241e-06, |
| "loss": 0.0127, |
| "step": 1157 |
| }, |
| { |
| "epoch": 0.44551312878715016, |
| "grad_norm": 0.40594059228897095, |
| "learning_rate": 9.834818073530632e-06, |
| "loss": 0.0066, |
| "step": 1158 |
| }, |
| { |
| "epoch": 0.4458978551505242, |
| "grad_norm": 1.4011011123657227, |
| "learning_rate": 9.834507407633567e-06, |
| "loss": 0.0164, |
| "step": 1159 |
| }, |
| { |
| "epoch": 0.4462825815138982, |
| "grad_norm": 0.4081282615661621, |
| "learning_rate": 9.834196454784485e-06, |
| "loss": 0.0077, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.44666730787727227, |
| "grad_norm": 0.34367305040359497, |
| "learning_rate": 9.833885215001844e-06, |
| "loss": 0.0023, |
| "step": 1161 |
| }, |
| { |
| "epoch": 0.4470520342406463, |
| "grad_norm": 0.45217686891555786, |
| "learning_rate": 9.833573688304117e-06, |
| "loss": 0.0052, |
| "step": 1162 |
| }, |
| { |
| "epoch": 0.4474367606040204, |
| "grad_norm": 1.1429780721664429, |
| "learning_rate": 9.833261874709794e-06, |
| "loss": 0.0444, |
| "step": 1163 |
| }, |
| { |
| "epoch": 0.44782148696739443, |
| "grad_norm": 0.1226806491613388, |
| "learning_rate": 9.832949774237385e-06, |
| "loss": 0.0014, |
| "step": 1164 |
| }, |
| { |
| "epoch": 0.4482062133307685, |
| "grad_norm": 1.016471266746521, |
| "learning_rate": 9.832637386905413e-06, |
| "loss": 0.0053, |
| "step": 1165 |
| }, |
| { |
| "epoch": 0.44859093969414254, |
| "grad_norm": 0.697564959526062, |
| "learning_rate": 9.832324712732419e-06, |
| "loss": 0.0151, |
| "step": 1166 |
| }, |
| { |
| "epoch": 0.4489756660575166, |
| "grad_norm": 0.4925800561904907, |
| "learning_rate": 9.832011751736965e-06, |
| "loss": 0.0059, |
| "step": 1167 |
| }, |
| { |
| "epoch": 0.44936039242089065, |
| "grad_norm": 0.31300026178359985, |
| "learning_rate": 9.831698503937623e-06, |
| "loss": 0.003, |
| "step": 1168 |
| }, |
| { |
| "epoch": 0.4497451187842647, |
| "grad_norm": 1.6890839338302612, |
| "learning_rate": 9.831384969352985e-06, |
| "loss": 0.0255, |
| "step": 1169 |
| }, |
| { |
| "epoch": 0.45012984514763876, |
| "grad_norm": 0.5058892965316772, |
| "learning_rate": 9.831071148001668e-06, |
| "loss": 0.0036, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.4505145715110128, |
| "grad_norm": 0.48593196272850037, |
| "learning_rate": 9.83075703990229e-06, |
| "loss": 0.0039, |
| "step": 1171 |
| }, |
| { |
| "epoch": 0.4508992978743868, |
| "grad_norm": 0.8874308466911316, |
| "learning_rate": 9.8304426450735e-06, |
| "loss": 0.015, |
| "step": 1172 |
| }, |
| { |
| "epoch": 0.4512840242377609, |
| "grad_norm": 0.7723665237426758, |
| "learning_rate": 9.83012796353396e-06, |
| "loss": 0.006, |
| "step": 1173 |
| }, |
| { |
| "epoch": 0.4516687506011349, |
| "grad_norm": 2.9279704093933105, |
| "learning_rate": 9.829812995302344e-06, |
| "loss": 0.0163, |
| "step": 1174 |
| }, |
| { |
| "epoch": 0.452053476964509, |
| "grad_norm": 0.4482285976409912, |
| "learning_rate": 9.829497740397349e-06, |
| "loss": 0.0053, |
| "step": 1175 |
| }, |
| { |
| "epoch": 0.45243820332788304, |
| "grad_norm": 0.8719068765640259, |
| "learning_rate": 9.829182198837686e-06, |
| "loss": 0.0078, |
| "step": 1176 |
| }, |
| { |
| "epoch": 0.4528229296912571, |
| "grad_norm": 0.6514776349067688, |
| "learning_rate": 9.828866370642086e-06, |
| "loss": 0.0116, |
| "step": 1177 |
| }, |
| { |
| "epoch": 0.45320765605463115, |
| "grad_norm": 1.1655035018920898, |
| "learning_rate": 9.828550255829291e-06, |
| "loss": 0.013, |
| "step": 1178 |
| }, |
| { |
| "epoch": 0.4535923824180052, |
| "grad_norm": 0.6634146571159363, |
| "learning_rate": 9.828233854418067e-06, |
| "loss": 0.0074, |
| "step": 1179 |
| }, |
| { |
| "epoch": 0.45397710878137926, |
| "grad_norm": 1.4584119319915771, |
| "learning_rate": 9.827917166427196e-06, |
| "loss": 0.0141, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.4543618351447533, |
| "grad_norm": 2.0052123069763184, |
| "learning_rate": 9.82760019187547e-06, |
| "loss": 0.0076, |
| "step": 1181 |
| }, |
| { |
| "epoch": 0.45474656150812737, |
| "grad_norm": 0.17516352236270905, |
| "learning_rate": 9.827282930781706e-06, |
| "loss": 0.0012, |
| "step": 1182 |
| }, |
| { |
| "epoch": 0.4551312878715014, |
| "grad_norm": 0.6905176043510437, |
| "learning_rate": 9.826965383164736e-06, |
| "loss": 0.0053, |
| "step": 1183 |
| }, |
| { |
| "epoch": 0.4555160142348754, |
| "grad_norm": 1.4227741956710815, |
| "learning_rate": 9.826647549043404e-06, |
| "loss": 0.0105, |
| "step": 1184 |
| }, |
| { |
| "epoch": 0.4559007405982495, |
| "grad_norm": 0.27875468134880066, |
| "learning_rate": 9.82632942843658e-06, |
| "loss": 0.002, |
| "step": 1185 |
| }, |
| { |
| "epoch": 0.45628546696162353, |
| "grad_norm": 0.11191385239362717, |
| "learning_rate": 9.826011021363142e-06, |
| "loss": 0.001, |
| "step": 1186 |
| }, |
| { |
| "epoch": 0.4566701933249976, |
| "grad_norm": 1.0875827074050903, |
| "learning_rate": 9.825692327841991e-06, |
| "loss": 0.0186, |
| "step": 1187 |
| }, |
| { |
| "epoch": 0.45705491968837164, |
| "grad_norm": 1.3509730100631714, |
| "learning_rate": 9.825373347892044e-06, |
| "loss": 0.0168, |
| "step": 1188 |
| }, |
| { |
| "epoch": 0.4574396460517457, |
| "grad_norm": 0.4952923357486725, |
| "learning_rate": 9.825054081532233e-06, |
| "loss": 0.0044, |
| "step": 1189 |
| }, |
| { |
| "epoch": 0.45782437241511975, |
| "grad_norm": 0.09577671438455582, |
| "learning_rate": 9.824734528781506e-06, |
| "loss": 0.0014, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.4582090987784938, |
| "grad_norm": 1.6081347465515137, |
| "learning_rate": 9.824414689658832e-06, |
| "loss": 0.0132, |
| "step": 1191 |
| }, |
| { |
| "epoch": 0.45859382514186786, |
| "grad_norm": 1.3940715789794922, |
| "learning_rate": 9.824094564183194e-06, |
| "loss": 0.0075, |
| "step": 1192 |
| }, |
| { |
| "epoch": 0.4589785515052419, |
| "grad_norm": 0.5964284539222717, |
| "learning_rate": 9.823774152373597e-06, |
| "loss": 0.0093, |
| "step": 1193 |
| }, |
| { |
| "epoch": 0.45936327786861597, |
| "grad_norm": 1.5253177881240845, |
| "learning_rate": 9.823453454249055e-06, |
| "loss": 0.0132, |
| "step": 1194 |
| }, |
| { |
| "epoch": 0.45974800423198997, |
| "grad_norm": 1.2830257415771484, |
| "learning_rate": 9.823132469828603e-06, |
| "loss": 0.0131, |
| "step": 1195 |
| }, |
| { |
| "epoch": 0.460132730595364, |
| "grad_norm": 0.6056346893310547, |
| "learning_rate": 9.822811199131293e-06, |
| "loss": 0.0123, |
| "step": 1196 |
| }, |
| { |
| "epoch": 0.4605174569587381, |
| "grad_norm": 0.48189786076545715, |
| "learning_rate": 9.822489642176195e-06, |
| "loss": 0.0078, |
| "step": 1197 |
| }, |
| { |
| "epoch": 0.46090218332211214, |
| "grad_norm": 0.28781089186668396, |
| "learning_rate": 9.822167798982398e-06, |
| "loss": 0.0061, |
| "step": 1198 |
| }, |
| { |
| "epoch": 0.4612869096854862, |
| "grad_norm": 0.8235536217689514, |
| "learning_rate": 9.821845669569e-06, |
| "loss": 0.0105, |
| "step": 1199 |
| }, |
| { |
| "epoch": 0.46167163604886025, |
| "grad_norm": 0.42384400963783264, |
| "learning_rate": 9.821523253955123e-06, |
| "loss": 0.0117, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.4620563624122343, |
| "grad_norm": 0.3422315716743469, |
| "learning_rate": 9.821200552159906e-06, |
| "loss": 0.0085, |
| "step": 1201 |
| }, |
| { |
| "epoch": 0.46244108877560836, |
| "grad_norm": 0.5151508450508118, |
| "learning_rate": 9.820877564202498e-06, |
| "loss": 0.0067, |
| "step": 1202 |
| }, |
| { |
| "epoch": 0.4628258151389824, |
| "grad_norm": 0.6236201524734497, |
| "learning_rate": 9.820554290102074e-06, |
| "loss": 0.0079, |
| "step": 1203 |
| }, |
| { |
| "epoch": 0.46321054150235647, |
| "grad_norm": 0.8919386267662048, |
| "learning_rate": 9.82023072987782e-06, |
| "loss": 0.0066, |
| "step": 1204 |
| }, |
| { |
| "epoch": 0.4635952678657305, |
| "grad_norm": 1.0158860683441162, |
| "learning_rate": 9.819906883548943e-06, |
| "loss": 0.0149, |
| "step": 1205 |
| }, |
| { |
| "epoch": 0.4639799942291046, |
| "grad_norm": 0.2793009579181671, |
| "learning_rate": 9.819582751134663e-06, |
| "loss": 0.0022, |
| "step": 1206 |
| }, |
| { |
| "epoch": 0.4643647205924786, |
| "grad_norm": 0.1821567565202713, |
| "learning_rate": 9.81925833265422e-06, |
| "loss": 0.0026, |
| "step": 1207 |
| }, |
| { |
| "epoch": 0.46474944695585263, |
| "grad_norm": 0.7648979425430298, |
| "learning_rate": 9.818933628126867e-06, |
| "loss": 0.0064, |
| "step": 1208 |
| }, |
| { |
| "epoch": 0.4651341733192267, |
| "grad_norm": 0.1448449194431305, |
| "learning_rate": 9.818608637571882e-06, |
| "loss": 0.0022, |
| "step": 1209 |
| }, |
| { |
| "epoch": 0.46551889968260074, |
| "grad_norm": 5.033487796783447, |
| "learning_rate": 9.81828336100855e-06, |
| "loss": 0.0216, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.4659036260459748, |
| "grad_norm": 1.1414070129394531, |
| "learning_rate": 9.817957798456181e-06, |
| "loss": 0.011, |
| "step": 1211 |
| }, |
| { |
| "epoch": 0.46628835240934885, |
| "grad_norm": 3.526010513305664, |
| "learning_rate": 9.817631949934096e-06, |
| "loss": 0.0065, |
| "step": 1212 |
| }, |
| { |
| "epoch": 0.4666730787727229, |
| "grad_norm": 0.4803553819656372, |
| "learning_rate": 9.81730581546164e-06, |
| "loss": 0.0084, |
| "step": 1213 |
| }, |
| { |
| "epoch": 0.46705780513609696, |
| "grad_norm": 0.8895028233528137, |
| "learning_rate": 9.816979395058164e-06, |
| "loss": 0.0048, |
| "step": 1214 |
| }, |
| { |
| "epoch": 0.467442531499471, |
| "grad_norm": 0.428845077753067, |
| "learning_rate": 9.81665268874305e-06, |
| "loss": 0.0015, |
| "step": 1215 |
| }, |
| { |
| "epoch": 0.46782725786284507, |
| "grad_norm": 13.160736083984375, |
| "learning_rate": 9.816325696535684e-06, |
| "loss": 0.0737, |
| "step": 1216 |
| }, |
| { |
| "epoch": 0.4682119842262191, |
| "grad_norm": 1.7152873277664185, |
| "learning_rate": 9.815998418455477e-06, |
| "loss": 0.0029, |
| "step": 1217 |
| }, |
| { |
| "epoch": 0.4685967105895932, |
| "grad_norm": 0.5329907536506653, |
| "learning_rate": 9.815670854521855e-06, |
| "loss": 0.0038, |
| "step": 1218 |
| }, |
| { |
| "epoch": 0.4689814369529672, |
| "grad_norm": 1.1879760026931763, |
| "learning_rate": 9.815343004754259e-06, |
| "loss": 0.0045, |
| "step": 1219 |
| }, |
| { |
| "epoch": 0.46936616331634123, |
| "grad_norm": 0.43010175228118896, |
| "learning_rate": 9.81501486917215e-06, |
| "loss": 0.0023, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.4697508896797153, |
| "grad_norm": 1.874674916267395, |
| "learning_rate": 9.814686447795004e-06, |
| "loss": 0.0243, |
| "step": 1221 |
| }, |
| { |
| "epoch": 0.47013561604308934, |
| "grad_norm": 1.4021985530853271, |
| "learning_rate": 9.814357740642314e-06, |
| "loss": 0.0359, |
| "step": 1222 |
| }, |
| { |
| "epoch": 0.4705203424064634, |
| "grad_norm": 0.5732538104057312, |
| "learning_rate": 9.81402874773359e-06, |
| "loss": 0.0143, |
| "step": 1223 |
| }, |
| { |
| "epoch": 0.47090506876983745, |
| "grad_norm": 0.4352494776248932, |
| "learning_rate": 9.813699469088362e-06, |
| "loss": 0.0037, |
| "step": 1224 |
| }, |
| { |
| "epoch": 0.4712897951332115, |
| "grad_norm": 0.9380084276199341, |
| "learning_rate": 9.81336990472617e-06, |
| "loss": 0.0078, |
| "step": 1225 |
| }, |
| { |
| "epoch": 0.47167452149658556, |
| "grad_norm": 1.202060580253601, |
| "learning_rate": 9.81304005466658e-06, |
| "loss": 0.0107, |
| "step": 1226 |
| }, |
| { |
| "epoch": 0.4720592478599596, |
| "grad_norm": 1.4367294311523438, |
| "learning_rate": 9.812709918929168e-06, |
| "loss": 0.0051, |
| "step": 1227 |
| }, |
| { |
| "epoch": 0.4724439742233337, |
| "grad_norm": 1.4543755054473877, |
| "learning_rate": 9.812379497533528e-06, |
| "loss": 0.0135, |
| "step": 1228 |
| }, |
| { |
| "epoch": 0.47282870058670773, |
| "grad_norm": 2.6267175674438477, |
| "learning_rate": 9.812048790499273e-06, |
| "loss": 0.0133, |
| "step": 1229 |
| }, |
| { |
| "epoch": 0.47321342695008173, |
| "grad_norm": 2.746267795562744, |
| "learning_rate": 9.811717797846035e-06, |
| "loss": 0.0191, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.4735981533134558, |
| "grad_norm": 3.3681936264038086, |
| "learning_rate": 9.811386519593455e-06, |
| "loss": 0.0129, |
| "step": 1231 |
| }, |
| { |
| "epoch": 0.47398287967682984, |
| "grad_norm": 1.2645982503890991, |
| "learning_rate": 9.811054955761199e-06, |
| "loss": 0.0094, |
| "step": 1232 |
| }, |
| { |
| "epoch": 0.4743676060402039, |
| "grad_norm": 1.5508323907852173, |
| "learning_rate": 9.810723106368946e-06, |
| "loss": 0.0171, |
| "step": 1233 |
| }, |
| { |
| "epoch": 0.47475233240357795, |
| "grad_norm": 0.6971696615219116, |
| "learning_rate": 9.810390971436393e-06, |
| "loss": 0.0053, |
| "step": 1234 |
| }, |
| { |
| "epoch": 0.475137058766952, |
| "grad_norm": 0.4388929009437561, |
| "learning_rate": 9.810058550983255e-06, |
| "loss": 0.005, |
| "step": 1235 |
| }, |
| { |
| "epoch": 0.47552178513032606, |
| "grad_norm": 1.5642904043197632, |
| "learning_rate": 9.809725845029262e-06, |
| "loss": 0.0123, |
| "step": 1236 |
| }, |
| { |
| "epoch": 0.4759065114937001, |
| "grad_norm": 0.6748471260070801, |
| "learning_rate": 9.809392853594162e-06, |
| "loss": 0.0049, |
| "step": 1237 |
| }, |
| { |
| "epoch": 0.47629123785707417, |
| "grad_norm": 1.3276309967041016, |
| "learning_rate": 9.809059576697719e-06, |
| "loss": 0.0311, |
| "step": 1238 |
| }, |
| { |
| "epoch": 0.4766759642204482, |
| "grad_norm": 4.248703479766846, |
| "learning_rate": 9.808726014359715e-06, |
| "loss": 0.0061, |
| "step": 1239 |
| }, |
| { |
| "epoch": 0.4770606905838223, |
| "grad_norm": 1.4735746383666992, |
| "learning_rate": 9.808392166599948e-06, |
| "loss": 0.0173, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.47744541694719633, |
| "grad_norm": 1.083099603652954, |
| "learning_rate": 9.808058033438235e-06, |
| "loss": 0.0171, |
| "step": 1241 |
| }, |
| { |
| "epoch": 0.47783014331057033, |
| "grad_norm": 0.7577223777770996, |
| "learning_rate": 9.807723614894407e-06, |
| "loss": 0.0066, |
| "step": 1242 |
| }, |
| { |
| "epoch": 0.4782148696739444, |
| "grad_norm": 0.8862943053245544, |
| "learning_rate": 9.807388910988316e-06, |
| "loss": 0.0111, |
| "step": 1243 |
| }, |
| { |
| "epoch": 0.47859959603731844, |
| "grad_norm": 0.418156236410141, |
| "learning_rate": 9.807053921739825e-06, |
| "loss": 0.0041, |
| "step": 1244 |
| }, |
| { |
| "epoch": 0.4789843224006925, |
| "grad_norm": 0.23023249208927155, |
| "learning_rate": 9.806718647168818e-06, |
| "loss": 0.0035, |
| "step": 1245 |
| }, |
| { |
| "epoch": 0.47936904876406655, |
| "grad_norm": 0.7867752909660339, |
| "learning_rate": 9.806383087295197e-06, |
| "loss": 0.0177, |
| "step": 1246 |
| }, |
| { |
| "epoch": 0.4797537751274406, |
| "grad_norm": 0.9785721302032471, |
| "learning_rate": 9.806047242138877e-06, |
| "loss": 0.0087, |
| "step": 1247 |
| }, |
| { |
| "epoch": 0.48013850149081466, |
| "grad_norm": 0.673989474773407, |
| "learning_rate": 9.805711111719794e-06, |
| "loss": 0.0146, |
| "step": 1248 |
| }, |
| { |
| "epoch": 0.4805232278541887, |
| "grad_norm": 0.9172596335411072, |
| "learning_rate": 9.805374696057896e-06, |
| "loss": 0.0097, |
| "step": 1249 |
| }, |
| { |
| "epoch": 0.48090795421756277, |
| "grad_norm": 0.4231598377227783, |
| "learning_rate": 9.805037995173156e-06, |
| "loss": 0.0043, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.4812926805809368, |
| "grad_norm": 1.4501237869262695, |
| "learning_rate": 9.804701009085554e-06, |
| "loss": 0.0126, |
| "step": 1251 |
| }, |
| { |
| "epoch": 0.4816774069443109, |
| "grad_norm": 0.2911331355571747, |
| "learning_rate": 9.804363737815095e-06, |
| "loss": 0.0038, |
| "step": 1252 |
| }, |
| { |
| "epoch": 0.48206213330768494, |
| "grad_norm": 1.0575670003890991, |
| "learning_rate": 9.804026181381796e-06, |
| "loss": 0.0202, |
| "step": 1253 |
| }, |
| { |
| "epoch": 0.48244685967105894, |
| "grad_norm": 0.6805642247200012, |
| "learning_rate": 9.803688339805693e-06, |
| "loss": 0.0041, |
| "step": 1254 |
| }, |
| { |
| "epoch": 0.482831586034433, |
| "grad_norm": 0.6432686448097229, |
| "learning_rate": 9.803350213106837e-06, |
| "loss": 0.0074, |
| "step": 1255 |
| }, |
| { |
| "epoch": 0.48321631239780705, |
| "grad_norm": 0.19403788447380066, |
| "learning_rate": 9.8030118013053e-06, |
| "loss": 0.0015, |
| "step": 1256 |
| }, |
| { |
| "epoch": 0.4836010387611811, |
| "grad_norm": 0.8219681978225708, |
| "learning_rate": 9.80267310442117e-06, |
| "loss": 0.0182, |
| "step": 1257 |
| }, |
| { |
| "epoch": 0.48398576512455516, |
| "grad_norm": 0.2699289321899414, |
| "learning_rate": 9.802334122474544e-06, |
| "loss": 0.0024, |
| "step": 1258 |
| }, |
| { |
| "epoch": 0.4843704914879292, |
| "grad_norm": 0.44587215781211853, |
| "learning_rate": 9.801994855485549e-06, |
| "loss": 0.0026, |
| "step": 1259 |
| }, |
| { |
| "epoch": 0.48475521785130327, |
| "grad_norm": 4.586838245391846, |
| "learning_rate": 9.801655303474319e-06, |
| "loss": 0.0091, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.4851399442146773, |
| "grad_norm": 1.5810905694961548, |
| "learning_rate": 9.801315466461008e-06, |
| "loss": 0.013, |
| "step": 1261 |
| }, |
| { |
| "epoch": 0.4855246705780514, |
| "grad_norm": 1.4810349941253662, |
| "learning_rate": 9.800975344465787e-06, |
| "loss": 0.0247, |
| "step": 1262 |
| }, |
| { |
| "epoch": 0.48590939694142543, |
| "grad_norm": 0.39250168204307556, |
| "learning_rate": 9.800634937508846e-06, |
| "loss": 0.0024, |
| "step": 1263 |
| }, |
| { |
| "epoch": 0.4862941233047995, |
| "grad_norm": 0.8914512395858765, |
| "learning_rate": 9.800294245610387e-06, |
| "loss": 0.0248, |
| "step": 1264 |
| }, |
| { |
| "epoch": 0.4866788496681735, |
| "grad_norm": 0.3135848641395569, |
| "learning_rate": 9.799953268790633e-06, |
| "loss": 0.0024, |
| "step": 1265 |
| }, |
| { |
| "epoch": 0.48706357603154754, |
| "grad_norm": 2.137585163116455, |
| "learning_rate": 9.799612007069823e-06, |
| "loss": 0.012, |
| "step": 1266 |
| }, |
| { |
| "epoch": 0.4874483023949216, |
| "grad_norm": 1.12441086769104, |
| "learning_rate": 9.79927046046821e-06, |
| "loss": 0.0097, |
| "step": 1267 |
| }, |
| { |
| "epoch": 0.48783302875829565, |
| "grad_norm": 0.8531357645988464, |
| "learning_rate": 9.798928629006072e-06, |
| "loss": 0.0139, |
| "step": 1268 |
| }, |
| { |
| "epoch": 0.4882177551216697, |
| "grad_norm": 0.7545097470283508, |
| "learning_rate": 9.798586512703695e-06, |
| "loss": 0.0027, |
| "step": 1269 |
| }, |
| { |
| "epoch": 0.48860248148504376, |
| "grad_norm": 2.2054431438446045, |
| "learning_rate": 9.798244111581382e-06, |
| "loss": 0.0124, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.4889872078484178, |
| "grad_norm": 1.043981671333313, |
| "learning_rate": 9.797901425659463e-06, |
| "loss": 0.0118, |
| "step": 1271 |
| }, |
| { |
| "epoch": 0.48937193421179187, |
| "grad_norm": 0.8491412401199341, |
| "learning_rate": 9.797558454958273e-06, |
| "loss": 0.0054, |
| "step": 1272 |
| }, |
| { |
| "epoch": 0.4897566605751659, |
| "grad_norm": 0.9706763625144958, |
| "learning_rate": 9.797215199498171e-06, |
| "loss": 0.0053, |
| "step": 1273 |
| }, |
| { |
| "epoch": 0.49014138693854, |
| "grad_norm": 3.596212148666382, |
| "learning_rate": 9.796871659299531e-06, |
| "loss": 0.0406, |
| "step": 1274 |
| }, |
| { |
| "epoch": 0.49052611330191404, |
| "grad_norm": 1.1573026180267334, |
| "learning_rate": 9.796527834382745e-06, |
| "loss": 0.0057, |
| "step": 1275 |
| }, |
| { |
| "epoch": 0.4909108396652881, |
| "grad_norm": 5.570591449737549, |
| "learning_rate": 9.796183724768218e-06, |
| "loss": 0.015, |
| "step": 1276 |
| }, |
| { |
| "epoch": 0.4912955660286621, |
| "grad_norm": 3.9478743076324463, |
| "learning_rate": 9.795839330476376e-06, |
| "loss": 0.0391, |
| "step": 1277 |
| }, |
| { |
| "epoch": 0.49168029239203614, |
| "grad_norm": 2.5740485191345215, |
| "learning_rate": 9.795494651527658e-06, |
| "loss": 0.0173, |
| "step": 1278 |
| }, |
| { |
| "epoch": 0.4920650187554102, |
| "grad_norm": 2.615067958831787, |
| "learning_rate": 9.795149687942527e-06, |
| "loss": 0.0221, |
| "step": 1279 |
| }, |
| { |
| "epoch": 0.49244974511878425, |
| "grad_norm": 0.9380441308021545, |
| "learning_rate": 9.794804439741455e-06, |
| "loss": 0.0072, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.4928344714821583, |
| "grad_norm": 1.8455308675765991, |
| "learning_rate": 9.794458906944937e-06, |
| "loss": 0.0049, |
| "step": 1281 |
| }, |
| { |
| "epoch": 0.49321919784553236, |
| "grad_norm": 0.48025840520858765, |
| "learning_rate": 9.794113089573479e-06, |
| "loss": 0.003, |
| "step": 1282 |
| }, |
| { |
| "epoch": 0.4936039242089064, |
| "grad_norm": 0.4569655656814575, |
| "learning_rate": 9.793766987647607e-06, |
| "loss": 0.0064, |
| "step": 1283 |
| }, |
| { |
| "epoch": 0.4939886505722805, |
| "grad_norm": 0.36443641781806946, |
| "learning_rate": 9.793420601187867e-06, |
| "loss": 0.0026, |
| "step": 1284 |
| }, |
| { |
| "epoch": 0.49437337693565453, |
| "grad_norm": 0.8915921449661255, |
| "learning_rate": 9.793073930214817e-06, |
| "loss": 0.0093, |
| "step": 1285 |
| }, |
| { |
| "epoch": 0.4947581032990286, |
| "grad_norm": 1.146666169166565, |
| "learning_rate": 9.792726974749032e-06, |
| "loss": 0.0083, |
| "step": 1286 |
| }, |
| { |
| "epoch": 0.49514282966240264, |
| "grad_norm": 0.8065195679664612, |
| "learning_rate": 9.792379734811108e-06, |
| "loss": 0.0099, |
| "step": 1287 |
| }, |
| { |
| "epoch": 0.49552755602577664, |
| "grad_norm": 0.5037975907325745, |
| "learning_rate": 9.792032210421656e-06, |
| "loss": 0.0117, |
| "step": 1288 |
| }, |
| { |
| "epoch": 0.4959122823891507, |
| "grad_norm": 1.7066353559494019, |
| "learning_rate": 9.7916844016013e-06, |
| "loss": 0.0153, |
| "step": 1289 |
| }, |
| { |
| "epoch": 0.49629700875252475, |
| "grad_norm": 0.36341947317123413, |
| "learning_rate": 9.791336308370687e-06, |
| "loss": 0.0019, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.4966817351158988, |
| "grad_norm": 2.803046464920044, |
| "learning_rate": 9.790987930750475e-06, |
| "loss": 0.009, |
| "step": 1291 |
| }, |
| { |
| "epoch": 0.49706646147927286, |
| "grad_norm": 0.07424652576446533, |
| "learning_rate": 9.790639268761346e-06, |
| "loss": 0.0011, |
| "step": 1292 |
| }, |
| { |
| "epoch": 0.4974511878426469, |
| "grad_norm": 0.5972120761871338, |
| "learning_rate": 9.790290322423992e-06, |
| "loss": 0.0054, |
| "step": 1293 |
| }, |
| { |
| "epoch": 0.49783591420602097, |
| "grad_norm": 1.204017996788025, |
| "learning_rate": 9.789941091759125e-06, |
| "loss": 0.0055, |
| "step": 1294 |
| }, |
| { |
| "epoch": 0.498220640569395, |
| "grad_norm": 0.9379375576972961, |
| "learning_rate": 9.789591576787476e-06, |
| "loss": 0.0036, |
| "step": 1295 |
| }, |
| { |
| "epoch": 0.4986053669327691, |
| "grad_norm": 0.4606468975543976, |
| "learning_rate": 9.789241777529787e-06, |
| "loss": 0.0057, |
| "step": 1296 |
| }, |
| { |
| "epoch": 0.49899009329614313, |
| "grad_norm": 1.5546983480453491, |
| "learning_rate": 9.78889169400682e-06, |
| "loss": 0.0143, |
| "step": 1297 |
| }, |
| { |
| "epoch": 0.4993748196595172, |
| "grad_norm": 4.400365352630615, |
| "learning_rate": 9.788541326239361e-06, |
| "loss": 0.0086, |
| "step": 1298 |
| }, |
| { |
| "epoch": 0.49975954602289124, |
| "grad_norm": 0.3501478135585785, |
| "learning_rate": 9.788190674248197e-06, |
| "loss": 0.0006, |
| "step": 1299 |
| }, |
| { |
| "epoch": 0.5001442723862652, |
| "grad_norm": 0.5171528458595276, |
| "learning_rate": 9.787839738054147e-06, |
| "loss": 0.0045, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.5001442723862652, |
| "eval_loss": 0.015632135793566704, |
| "eval_runtime": 232.3635, |
| "eval_samples_per_second": 0.826, |
| "eval_steps_per_second": 0.413, |
| "step": 1300 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 12995, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 1300, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.9457475174465536e+18, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|