| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 5.987052551408987, |
| "eval_steps": 500, |
| "global_step": 2622, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.002284843869002285, |
| "grad_norm": 3.668196201324463, |
| "learning_rate": 5.0000000000000004e-08, |
| "loss": 1.5687, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.00456968773800457, |
| "grad_norm": 3.6277146339416504, |
| "learning_rate": 1.0000000000000001e-07, |
| "loss": 1.5714, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.006854531607006854, |
| "grad_norm": 3.813422918319702, |
| "learning_rate": 1.5000000000000002e-07, |
| "loss": 1.58, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.00913937547600914, |
| "grad_norm": 3.4566409587860107, |
| "learning_rate": 2.0000000000000002e-07, |
| "loss": 1.5604, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.011424219345011425, |
| "grad_norm": 3.287661552429199, |
| "learning_rate": 2.5000000000000004e-07, |
| "loss": 1.5425, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.013709063214013708, |
| "grad_norm": 3.318340301513672, |
| "learning_rate": 3.0000000000000004e-07, |
| "loss": 1.5477, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.015993907083015995, |
| "grad_norm": 3.407221555709839, |
| "learning_rate": 3.5000000000000004e-07, |
| "loss": 1.5848, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.01827875095201828, |
| "grad_norm": 3.732999563217163, |
| "learning_rate": 4.0000000000000003e-07, |
| "loss": 1.5884, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.020563594821020565, |
| "grad_norm": 3.532766580581665, |
| "learning_rate": 4.5000000000000003e-07, |
| "loss": 1.5892, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.02284843869002285, |
| "grad_norm": 3.5676348209381104, |
| "learning_rate": 5.000000000000001e-07, |
| "loss": 1.5619, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.02513328255902513, |
| "grad_norm": 3.1015849113464355, |
| "learning_rate": 5.5e-07, |
| "loss": 1.5649, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.027418126428027417, |
| "grad_norm": 3.163240909576416, |
| "learning_rate": 6.000000000000001e-07, |
| "loss": 1.5807, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.0297029702970297, |
| "grad_norm": 2.894922971725464, |
| "learning_rate": 6.5e-07, |
| "loss": 1.5454, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.03198781416603199, |
| "grad_norm": 2.8211843967437744, |
| "learning_rate": 7.000000000000001e-07, |
| "loss": 1.5801, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.03427265803503427, |
| "grad_norm": 2.676609516143799, |
| "learning_rate": 7.5e-07, |
| "loss": 1.5446, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.03655750190403656, |
| "grad_norm": 2.6186320781707764, |
| "learning_rate": 8.000000000000001e-07, |
| "loss": 1.5443, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.03884234577303884, |
| "grad_norm": 2.460139513015747, |
| "learning_rate": 8.500000000000001e-07, |
| "loss": 1.5489, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.04112718964204113, |
| "grad_norm": 2.368126630783081, |
| "learning_rate": 9.000000000000001e-07, |
| "loss": 1.5317, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.04341203351104341, |
| "grad_norm": 2.244192123413086, |
| "learning_rate": 9.500000000000001e-07, |
| "loss": 1.4805, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.0456968773800457, |
| "grad_norm": 2.242701292037964, |
| "learning_rate": 1.0000000000000002e-06, |
| "loss": 1.5478, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.04798172124904798, |
| "grad_norm": 2.13895583152771, |
| "learning_rate": 1.0500000000000001e-06, |
| "loss": 1.5194, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.05026656511805026, |
| "grad_norm": 2.0152103900909424, |
| "learning_rate": 1.1e-06, |
| "loss": 1.5067, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.05255140898705255, |
| "grad_norm": 1.9156895875930786, |
| "learning_rate": 1.1500000000000002e-06, |
| "loss": 1.5145, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.05483625285605483, |
| "grad_norm": 1.7710504531860352, |
| "learning_rate": 1.2000000000000002e-06, |
| "loss": 1.5147, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.05712109672505712, |
| "grad_norm": 1.807431936264038, |
| "learning_rate": 1.25e-06, |
| "loss": 1.5357, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.0594059405940594, |
| "grad_norm": 1.6638832092285156, |
| "learning_rate": 1.3e-06, |
| "loss": 1.489, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.06169078446306169, |
| "grad_norm": 1.5708481073379517, |
| "learning_rate": 1.3500000000000002e-06, |
| "loss": 1.4768, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.06397562833206398, |
| "grad_norm": 1.615577220916748, |
| "learning_rate": 1.4000000000000001e-06, |
| "loss": 1.5159, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.06626047220106626, |
| "grad_norm": 1.5125129222869873, |
| "learning_rate": 1.45e-06, |
| "loss": 1.4972, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.06854531607006854, |
| "grad_norm": 1.479811668395996, |
| "learning_rate": 1.5e-06, |
| "loss": 1.4674, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.07083015993907082, |
| "grad_norm": 1.4502017498016357, |
| "learning_rate": 1.5500000000000002e-06, |
| "loss": 1.4811, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.07311500380807312, |
| "grad_norm": 1.3617135286331177, |
| "learning_rate": 1.6000000000000001e-06, |
| "loss": 1.4872, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.0753998476770754, |
| "grad_norm": 1.367607831954956, |
| "learning_rate": 1.6500000000000003e-06, |
| "loss": 1.4699, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.07768469154607768, |
| "grad_norm": 1.3374927043914795, |
| "learning_rate": 1.7000000000000002e-06, |
| "loss": 1.4659, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.07996953541507996, |
| "grad_norm": 1.354506254196167, |
| "learning_rate": 1.75e-06, |
| "loss": 1.4351, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.08225437928408226, |
| "grad_norm": 1.2532024383544922, |
| "learning_rate": 1.8000000000000001e-06, |
| "loss": 1.4358, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.08453922315308454, |
| "grad_norm": 1.2684043645858765, |
| "learning_rate": 1.85e-06, |
| "loss": 1.4534, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.08682406702208682, |
| "grad_norm": 1.2418140172958374, |
| "learning_rate": 1.9000000000000002e-06, |
| "loss": 1.4624, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.0891089108910891, |
| "grad_norm": 1.2266045808792114, |
| "learning_rate": 1.9500000000000004e-06, |
| "loss": 1.4282, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.0913937547600914, |
| "grad_norm": 1.180330753326416, |
| "learning_rate": 2.0000000000000003e-06, |
| "loss": 1.4107, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.09367859862909368, |
| "grad_norm": 1.1651424169540405, |
| "learning_rate": 2.05e-06, |
| "loss": 1.4041, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.09596344249809596, |
| "grad_norm": 1.181652307510376, |
| "learning_rate": 2.1000000000000002e-06, |
| "loss": 1.4558, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.09824828636709824, |
| "grad_norm": 1.2221183776855469, |
| "learning_rate": 2.15e-06, |
| "loss": 1.4449, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.10053313023610053, |
| "grad_norm": 1.085172414779663, |
| "learning_rate": 2.2e-06, |
| "loss": 1.4235, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.10281797410510282, |
| "grad_norm": 1.0497649908065796, |
| "learning_rate": 2.25e-06, |
| "loss": 1.3891, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.1051028179741051, |
| "grad_norm": 1.0502350330352783, |
| "learning_rate": 2.3000000000000004e-06, |
| "loss": 1.4048, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.10738766184310738, |
| "grad_norm": 1.0798920392990112, |
| "learning_rate": 2.35e-06, |
| "loss": 1.4383, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.10967250571210967, |
| "grad_norm": 1.067581057548523, |
| "learning_rate": 2.4000000000000003e-06, |
| "loss": 1.4128, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.11195734958111196, |
| "grad_norm": 1.062606930732727, |
| "learning_rate": 2.4500000000000003e-06, |
| "loss": 1.4438, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.11424219345011424, |
| "grad_norm": 1.0157577991485596, |
| "learning_rate": 2.5e-06, |
| "loss": 1.4257, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.11652703731911652, |
| "grad_norm": 1.0165379047393799, |
| "learning_rate": 2.55e-06, |
| "loss": 1.407, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.1188118811881188, |
| "grad_norm": 1.0268282890319824, |
| "learning_rate": 2.6e-06, |
| "loss": 1.3942, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.1210967250571211, |
| "grad_norm": 1.0133647918701172, |
| "learning_rate": 2.6500000000000005e-06, |
| "loss": 1.3737, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.12338156892612338, |
| "grad_norm": 1.0097134113311768, |
| "learning_rate": 2.7000000000000004e-06, |
| "loss": 1.3994, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.12566641279512566, |
| "grad_norm": 1.1268850564956665, |
| "learning_rate": 2.7500000000000004e-06, |
| "loss": 1.3676, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.12795125666412796, |
| "grad_norm": 0.981015682220459, |
| "learning_rate": 2.8000000000000003e-06, |
| "loss": 1.3819, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.13023610053313023, |
| "grad_norm": 1.0456632375717163, |
| "learning_rate": 2.85e-06, |
| "loss": 1.4031, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.13252094440213252, |
| "grad_norm": 1.0366231203079224, |
| "learning_rate": 2.9e-06, |
| "loss": 1.4017, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.13480578827113482, |
| "grad_norm": 0.9980257749557495, |
| "learning_rate": 2.95e-06, |
| "loss": 1.4261, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.1370906321401371, |
| "grad_norm": 0.990281879901886, |
| "learning_rate": 3e-06, |
| "loss": 1.3699, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.13937547600913938, |
| "grad_norm": 1.0530250072479248, |
| "learning_rate": 3.05e-06, |
| "loss": 1.3656, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.14166031987814165, |
| "grad_norm": 0.9878147840499878, |
| "learning_rate": 3.1000000000000004e-06, |
| "loss": 1.3712, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.14394516374714394, |
| "grad_norm": 0.9554497599601746, |
| "learning_rate": 3.1500000000000003e-06, |
| "loss": 1.3507, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.14623000761614624, |
| "grad_norm": 1.0152994394302368, |
| "learning_rate": 3.2000000000000003e-06, |
| "loss": 1.3531, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.1485148514851485, |
| "grad_norm": 0.9816209077835083, |
| "learning_rate": 3.2500000000000002e-06, |
| "loss": 1.3733, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.1507996953541508, |
| "grad_norm": 1.014113187789917, |
| "learning_rate": 3.3000000000000006e-06, |
| "loss": 1.3798, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.15308453922315307, |
| "grad_norm": 1.005303978919983, |
| "learning_rate": 3.3500000000000005e-06, |
| "loss": 1.3877, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.15536938309215537, |
| "grad_norm": 1.109976887702942, |
| "learning_rate": 3.4000000000000005e-06, |
| "loss": 1.4184, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.15765422696115766, |
| "grad_norm": 1.033060908317566, |
| "learning_rate": 3.45e-06, |
| "loss": 1.4043, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.15993907083015993, |
| "grad_norm": 0.9719234108924866, |
| "learning_rate": 3.5e-06, |
| "loss": 1.3481, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.16222391469916222, |
| "grad_norm": 1.0430618524551392, |
| "learning_rate": 3.5500000000000003e-06, |
| "loss": 1.3227, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.16450875856816452, |
| "grad_norm": 1.0481953620910645, |
| "learning_rate": 3.6000000000000003e-06, |
| "loss": 1.3174, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.1667936024371668, |
| "grad_norm": 0.9868738055229187, |
| "learning_rate": 3.65e-06, |
| "loss": 1.356, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.16907844630616908, |
| "grad_norm": 1.0015943050384521, |
| "learning_rate": 3.7e-06, |
| "loss": 1.3462, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.17136329017517135, |
| "grad_norm": 1.0458308458328247, |
| "learning_rate": 3.7500000000000005e-06, |
| "loss": 1.3962, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.17364813404417365, |
| "grad_norm": 1.0376830101013184, |
| "learning_rate": 3.8000000000000005e-06, |
| "loss": 1.3523, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.17593297791317594, |
| "grad_norm": 0.9821555018424988, |
| "learning_rate": 3.85e-06, |
| "loss": 1.3559, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.1782178217821782, |
| "grad_norm": 0.9579638838768005, |
| "learning_rate": 3.900000000000001e-06, |
| "loss": 1.3073, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.1805026656511805, |
| "grad_norm": 0.9736194014549255, |
| "learning_rate": 3.95e-06, |
| "loss": 1.3494, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.1827875095201828, |
| "grad_norm": 1.0055922269821167, |
| "learning_rate": 4.000000000000001e-06, |
| "loss": 1.3697, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.18507235338918507, |
| "grad_norm": 0.9767876267433167, |
| "learning_rate": 4.05e-06, |
| "loss": 1.3225, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.18735719725818736, |
| "grad_norm": 1.003092885017395, |
| "learning_rate": 4.1e-06, |
| "loss": 1.335, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.18964204112718963, |
| "grad_norm": 0.9898741245269775, |
| "learning_rate": 4.15e-06, |
| "loss": 1.3103, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.19192688499619193, |
| "grad_norm": 0.9903189539909363, |
| "learning_rate": 4.2000000000000004e-06, |
| "loss": 1.3741, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.19421172886519422, |
| "grad_norm": 0.9661535620689392, |
| "learning_rate": 4.25e-06, |
| "loss": 1.3381, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.1964965727341965, |
| "grad_norm": 0.9668599367141724, |
| "learning_rate": 4.3e-06, |
| "loss": 1.3511, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.19878141660319879, |
| "grad_norm": 0.9633579254150391, |
| "learning_rate": 4.350000000000001e-06, |
| "loss": 1.3841, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.20106626047220105, |
| "grad_norm": 0.9665766358375549, |
| "learning_rate": 4.4e-06, |
| "loss": 1.3211, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.20335110434120335, |
| "grad_norm": 1.0263577699661255, |
| "learning_rate": 4.450000000000001e-06, |
| "loss": 1.3398, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.20563594821020564, |
| "grad_norm": 1.0054337978363037, |
| "learning_rate": 4.5e-06, |
| "loss": 1.3598, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.2079207920792079, |
| "grad_norm": 0.9768564701080322, |
| "learning_rate": 4.5500000000000005e-06, |
| "loss": 1.3386, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.2102056359482102, |
| "grad_norm": 0.9710814356803894, |
| "learning_rate": 4.600000000000001e-06, |
| "loss": 1.306, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.2124904798172125, |
| "grad_norm": 0.9943618774414062, |
| "learning_rate": 4.65e-06, |
| "loss": 1.3368, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.21477532368621477, |
| "grad_norm": 1.0000272989273071, |
| "learning_rate": 4.7e-06, |
| "loss": 1.3561, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.21706016755521707, |
| "grad_norm": 0.9748716950416565, |
| "learning_rate": 4.75e-06, |
| "loss": 1.3216, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.21934501142421933, |
| "grad_norm": 0.977959930896759, |
| "learning_rate": 4.800000000000001e-06, |
| "loss": 1.3275, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.22162985529322163, |
| "grad_norm": 0.9991240501403809, |
| "learning_rate": 4.85e-06, |
| "loss": 1.3143, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.22391469916222392, |
| "grad_norm": 1.0590916872024536, |
| "learning_rate": 4.9000000000000005e-06, |
| "loss": 1.3467, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.2261995430312262, |
| "grad_norm": 0.9592604041099548, |
| "learning_rate": 4.95e-06, |
| "loss": 1.3568, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.2284843869002285, |
| "grad_norm": 0.9900586605072021, |
| "learning_rate": 5e-06, |
| "loss": 1.3162, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.23076923076923078, |
| "grad_norm": 0.9882398843765259, |
| "learning_rate": 4.999998060367119e-06, |
| "loss": 1.3348, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.23305407463823305, |
| "grad_norm": 0.9522809982299805, |
| "learning_rate": 4.999992241471486e-06, |
| "loss": 1.3004, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.23533891850723535, |
| "grad_norm": 0.9822378754615784, |
| "learning_rate": 4.9999825433221295e-06, |
| "loss": 1.3326, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.2376237623762376, |
| "grad_norm": 0.9944847822189331, |
| "learning_rate": 4.999968965934098e-06, |
| "loss": 1.3429, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.2399086062452399, |
| "grad_norm": 1.052456021308899, |
| "learning_rate": 4.9999515093284605e-06, |
| "loss": 1.3476, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.2421934501142422, |
| "grad_norm": 0.9862610697746277, |
| "learning_rate": 4.999930173532304e-06, |
| "loss": 1.3638, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.24447829398324447, |
| "grad_norm": 0.9718945622444153, |
| "learning_rate": 4.999904958578735e-06, |
| "loss": 1.3013, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.24676313785224677, |
| "grad_norm": 0.9535952210426331, |
| "learning_rate": 4.9998758645068805e-06, |
| "loss": 1.3317, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.24904798172124903, |
| "grad_norm": 1.1905543804168701, |
| "learning_rate": 4.999842891361885e-06, |
| "loss": 1.3325, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.25133282559025133, |
| "grad_norm": 1.0306485891342163, |
| "learning_rate": 4.9998060391949145e-06, |
| "loss": 1.3198, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.2536176694592536, |
| "grad_norm": 1.0334984064102173, |
| "learning_rate": 4.999765308063152e-06, |
| "loss": 1.3075, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.2559025133282559, |
| "grad_norm": 1.0020740032196045, |
| "learning_rate": 4.9997206980298e-06, |
| "loss": 1.3324, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.25818735719725816, |
| "grad_norm": 0.9771923422813416, |
| "learning_rate": 4.9996722091640805e-06, |
| "loss": 1.3072, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.26047220106626046, |
| "grad_norm": 0.9955299496650696, |
| "learning_rate": 4.999619841541234e-06, |
| "loss": 1.3501, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.26275704493526275, |
| "grad_norm": 1.0125700235366821, |
| "learning_rate": 4.9995635952425205e-06, |
| "loss": 1.3387, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.26504188880426505, |
| "grad_norm": 1.005936622619629, |
| "learning_rate": 4.999503470355215e-06, |
| "loss": 1.342, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.26732673267326734, |
| "grad_norm": 0.9978262782096863, |
| "learning_rate": 4.999439466972616e-06, |
| "loss": 1.2954, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.26961157654226964, |
| "grad_norm": 0.9668537974357605, |
| "learning_rate": 4.999371585194039e-06, |
| "loss": 1.3318, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.2718964204112719, |
| "grad_norm": 1.0156077146530151, |
| "learning_rate": 4.999299825124814e-06, |
| "loss": 1.2681, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.2741812642802742, |
| "grad_norm": 0.99967360496521, |
| "learning_rate": 4.999224186876293e-06, |
| "loss": 1.2666, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.27646610814927647, |
| "grad_norm": 1.0085562467575073, |
| "learning_rate": 4.999144670565842e-06, |
| "loss": 1.3261, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.27875095201827876, |
| "grad_norm": 1.0338691473007202, |
| "learning_rate": 4.999061276316851e-06, |
| "loss": 1.2943, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.28103579588728106, |
| "grad_norm": 0.9880859851837158, |
| "learning_rate": 4.99897400425872e-06, |
| "loss": 1.3035, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.2833206397562833, |
| "grad_norm": 0.9832742810249329, |
| "learning_rate": 4.998882854526872e-06, |
| "loss": 1.3015, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.2856054836252856, |
| "grad_norm": 0.976040780544281, |
| "learning_rate": 4.998787827262743e-06, |
| "loss": 1.3325, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.2878903274942879, |
| "grad_norm": 1.0309007167816162, |
| "learning_rate": 4.998688922613788e-06, |
| "loss": 1.2998, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.2901751713632902, |
| "grad_norm": 1.0828396081924438, |
| "learning_rate": 4.998586140733477e-06, |
| "loss": 1.3093, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.2924600152322925, |
| "grad_norm": 0.9725452661514282, |
| "learning_rate": 4.998479481781299e-06, |
| "loss": 1.2811, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.2947448591012947, |
| "grad_norm": 0.9891279339790344, |
| "learning_rate": 4.998368945922757e-06, |
| "loss": 1.3104, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.297029702970297, |
| "grad_norm": 1.022490382194519, |
| "learning_rate": 4.998254533329369e-06, |
| "loss": 1.3425, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.2993145468392993, |
| "grad_norm": 1.00505530834198, |
| "learning_rate": 4.99813624417867e-06, |
| "loss": 1.3494, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.3015993907083016, |
| "grad_norm": 1.033308982849121, |
| "learning_rate": 4.998014078654211e-06, |
| "loss": 1.278, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.3038842345773039, |
| "grad_norm": 1.0194460153579712, |
| "learning_rate": 4.997888036945556e-06, |
| "loss": 1.2963, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.30616907844630614, |
| "grad_norm": 1.005299687385559, |
| "learning_rate": 4.997758119248286e-06, |
| "loss": 1.3187, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.30845392231530844, |
| "grad_norm": 1.0271679162979126, |
| "learning_rate": 4.997624325763994e-06, |
| "loss": 1.3106, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.31073876618431073, |
| "grad_norm": 1.0343165397644043, |
| "learning_rate": 4.997486656700289e-06, |
| "loss": 1.3355, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.31302361005331303, |
| "grad_norm": 1.0498188734054565, |
| "learning_rate": 4.997345112270792e-06, |
| "loss": 1.3126, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.3153084539223153, |
| "grad_norm": 0.9742498993873596, |
| "learning_rate": 4.997199692695138e-06, |
| "loss": 1.3006, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.3175932977913176, |
| "grad_norm": 1.0044124126434326, |
| "learning_rate": 4.997050398198977e-06, |
| "loss": 1.3298, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.31987814166031986, |
| "grad_norm": 1.0173184871673584, |
| "learning_rate": 4.99689722901397e-06, |
| "loss": 1.3286, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.32216298552932215, |
| "grad_norm": 0.9835124611854553, |
| "learning_rate": 4.99674018537779e-06, |
| "loss": 1.2937, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.32444782939832445, |
| "grad_norm": 1.0389831066131592, |
| "learning_rate": 4.996579267534122e-06, |
| "loss": 1.3077, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.32673267326732675, |
| "grad_norm": 1.0412015914916992, |
| "learning_rate": 4.996414475732664e-06, |
| "loss": 1.3131, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.32901751713632904, |
| "grad_norm": 1.0527534484863281, |
| "learning_rate": 4.9962458102291254e-06, |
| "loss": 1.3075, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.3313023610053313, |
| "grad_norm": 1.036034345626831, |
| "learning_rate": 4.9960732712852236e-06, |
| "loss": 1.3198, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.3335872048743336, |
| "grad_norm": 1.0121785402297974, |
| "learning_rate": 4.99589685916869e-06, |
| "loss": 1.3346, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.33587204874333587, |
| "grad_norm": 1.0597130060195923, |
| "learning_rate": 4.9957165741532635e-06, |
| "loss": 1.3025, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.33815689261233817, |
| "grad_norm": 1.0982815027236938, |
| "learning_rate": 4.995532416518693e-06, |
| "loss": 1.3177, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.34044173648134046, |
| "grad_norm": 1.012061357498169, |
| "learning_rate": 4.995344386550738e-06, |
| "loss": 1.2905, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.3427265803503427, |
| "grad_norm": 1.0748074054718018, |
| "learning_rate": 4.995152484541166e-06, |
| "loss": 1.3191, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.345011424219345, |
| "grad_norm": 1.0346341133117676, |
| "learning_rate": 4.994956710787752e-06, |
| "loss": 1.2923, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.3472962680883473, |
| "grad_norm": 1.0333645343780518, |
| "learning_rate": 4.99475706559428e-06, |
| "loss": 1.3272, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.3495811119573496, |
| "grad_norm": 1.0411094427108765, |
| "learning_rate": 4.9945535492705385e-06, |
| "loss": 1.3102, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.3518659558263519, |
| "grad_norm": 1.0394591093063354, |
| "learning_rate": 4.994346162132329e-06, |
| "loss": 1.2912, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.3541507996953541, |
| "grad_norm": 1.1258337497711182, |
| "learning_rate": 4.994134904501452e-06, |
| "loss": 1.295, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.3564356435643564, |
| "grad_norm": 1.0196075439453125, |
| "learning_rate": 4.993919776705718e-06, |
| "loss": 1.2935, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.3587204874333587, |
| "grad_norm": 1.020180583000183, |
| "learning_rate": 4.993700779078943e-06, |
| "loss": 1.3118, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.361005331302361, |
| "grad_norm": 1.1170531511306763, |
| "learning_rate": 4.993477911960948e-06, |
| "loss": 1.2924, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.3632901751713633, |
| "grad_norm": 1.0637717247009277, |
| "learning_rate": 4.993251175697554e-06, |
| "loss": 1.2797, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.3655750190403656, |
| "grad_norm": 1.046305775642395, |
| "learning_rate": 4.993020570640592e-06, |
| "loss": 1.3142, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.36785986290936784, |
| "grad_norm": 1.039476752281189, |
| "learning_rate": 4.992786097147892e-06, |
| "loss": 1.2773, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.37014470677837014, |
| "grad_norm": 1.0379183292388916, |
| "learning_rate": 4.992547755583288e-06, |
| "loss": 1.3057, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.37242955064737243, |
| "grad_norm": 1.0063403844833374, |
| "learning_rate": 4.992305546316617e-06, |
| "loss": 1.3108, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.3747143945163747, |
| "grad_norm": 1.0467029809951782, |
| "learning_rate": 4.992059469723716e-06, |
| "loss": 1.2675, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.376999238385377, |
| "grad_norm": 0.9822115898132324, |
| "learning_rate": 4.991809526186424e-06, |
| "loss": 1.2987, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.37928408225437926, |
| "grad_norm": 0.9957991242408752, |
| "learning_rate": 4.9915557160925795e-06, |
| "loss": 1.2927, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.38156892612338156, |
| "grad_norm": 1.020486831665039, |
| "learning_rate": 4.991298039836021e-06, |
| "loss": 1.2891, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.38385376999238385, |
| "grad_norm": 0.9941042065620422, |
| "learning_rate": 4.991036497816587e-06, |
| "loss": 1.3279, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.38613861386138615, |
| "grad_norm": 1.030573844909668, |
| "learning_rate": 4.990771090440114e-06, |
| "loss": 1.2715, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.38842345773038844, |
| "grad_norm": 0.9810742735862732, |
| "learning_rate": 4.990501818118436e-06, |
| "loss": 1.2808, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.3907083015993907, |
| "grad_norm": 1.0300201177597046, |
| "learning_rate": 4.990228681269383e-06, |
| "loss": 1.3079, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.392993145468393, |
| "grad_norm": 1.0107353925704956, |
| "learning_rate": 4.989951680316787e-06, |
| "loss": 1.2872, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.3952779893373953, |
| "grad_norm": 1.0361515283584595, |
| "learning_rate": 4.989670815690469e-06, |
| "loss": 1.2784, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.39756283320639757, |
| "grad_norm": 1.0452970266342163, |
| "learning_rate": 4.989386087826248e-06, |
| "loss": 1.2976, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.39984767707539987, |
| "grad_norm": 1.0585196018218994, |
| "learning_rate": 4.9890974971659405e-06, |
| "loss": 1.2921, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.4021325209444021, |
| "grad_norm": 1.018211007118225, |
| "learning_rate": 4.988805044157353e-06, |
| "loss": 1.3046, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.4044173648134044, |
| "grad_norm": 1.0587507486343384, |
| "learning_rate": 4.9885087292542865e-06, |
| "loss": 1.2901, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.4067022086824067, |
| "grad_norm": 1.0261503458023071, |
| "learning_rate": 4.988208552916535e-06, |
| "loss": 1.3081, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.408987052551409, |
| "grad_norm": 1.0412943363189697, |
| "learning_rate": 4.9879045156098846e-06, |
| "loss": 1.3052, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.4112718964204113, |
| "grad_norm": 1.0323666334152222, |
| "learning_rate": 4.987596617806111e-06, |
| "loss": 1.3048, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.4135567402894136, |
| "grad_norm": 1.0095067024230957, |
| "learning_rate": 4.9872848599829825e-06, |
| "loss": 1.3292, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.4158415841584158, |
| "grad_norm": 0.9761002659797668, |
| "learning_rate": 4.986969242624254e-06, |
| "loss": 1.2884, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.4181264280274181, |
| "grad_norm": 1.0436338186264038, |
| "learning_rate": 4.986649766219671e-06, |
| "loss": 1.3211, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.4204112718964204, |
| "grad_norm": 1.0505225658416748, |
| "learning_rate": 4.986326431264969e-06, |
| "loss": 1.2863, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.4226961157654227, |
| "grad_norm": 1.006611943244934, |
| "learning_rate": 4.985999238261867e-06, |
| "loss": 1.2812, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.424980959634425, |
| "grad_norm": 1.0494719743728638, |
| "learning_rate": 4.985668187718073e-06, |
| "loss": 1.3105, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.42726580350342724, |
| "grad_norm": 0.9847164750099182, |
| "learning_rate": 4.985333280147281e-06, |
| "loss": 1.2811, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.42955064737242954, |
| "grad_norm": 1.0337165594100952, |
| "learning_rate": 4.984994516069168e-06, |
| "loss": 1.2876, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.43183549124143183, |
| "grad_norm": 1.0178074836730957, |
| "learning_rate": 4.984651896009396e-06, |
| "loss": 1.2597, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.43412033511043413, |
| "grad_norm": 1.0170668363571167, |
| "learning_rate": 4.984305420499612e-06, |
| "loss": 1.2916, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.4364051789794364, |
| "grad_norm": 1.0148853063583374, |
| "learning_rate": 4.983955090077445e-06, |
| "loss": 1.2785, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.43869002284843867, |
| "grad_norm": 1.0563602447509766, |
| "learning_rate": 4.983600905286502e-06, |
| "loss": 1.295, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.44097486671744096, |
| "grad_norm": 0.9817858338356018, |
| "learning_rate": 4.983242866676376e-06, |
| "loss": 1.2832, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.44325971058644326, |
| "grad_norm": 1.0299488306045532, |
| "learning_rate": 4.982880974802638e-06, |
| "loss": 1.2952, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.44554455445544555, |
| "grad_norm": 0.9951279163360596, |
| "learning_rate": 4.982515230226837e-06, |
| "loss": 1.2901, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.44782939832444785, |
| "grad_norm": 1.0001885890960693, |
| "learning_rate": 4.982145633516501e-06, |
| "loss": 1.2554, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.4501142421934501, |
| "grad_norm": 1.0821017026901245, |
| "learning_rate": 4.981772185245135e-06, |
| "loss": 1.2903, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.4523990860624524, |
| "grad_norm": 1.0269831418991089, |
| "learning_rate": 4.981394885992223e-06, |
| "loss": 1.3077, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.4546839299314547, |
| "grad_norm": 1.025965929031372, |
| "learning_rate": 4.981013736343221e-06, |
| "loss": 1.2771, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.456968773800457, |
| "grad_norm": 0.9828860759735107, |
| "learning_rate": 4.980628736889562e-06, |
| "loss": 1.2788, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.45925361766945927, |
| "grad_norm": 1.077913761138916, |
| "learning_rate": 4.9802398882286515e-06, |
| "loss": 1.2815, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.46153846153846156, |
| "grad_norm": 1.1024688482284546, |
| "learning_rate": 4.97984719096387e-06, |
| "loss": 1.3135, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.4638233054074638, |
| "grad_norm": 1.0494202375411987, |
| "learning_rate": 4.979450645704567e-06, |
| "loss": 1.3027, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.4661081492764661, |
| "grad_norm": 1.0050199031829834, |
| "learning_rate": 4.979050253066064e-06, |
| "loss": 1.3016, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.4683929931454684, |
| "grad_norm": 1.0264744758605957, |
| "learning_rate": 4.978646013669652e-06, |
| "loss": 1.343, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.4706778370144707, |
| "grad_norm": 1.001989722251892, |
| "learning_rate": 4.978237928142594e-06, |
| "loss": 1.3088, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.472962680883473, |
| "grad_norm": 1.0501984357833862, |
| "learning_rate": 4.977825997118119e-06, |
| "loss": 1.2875, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.4752475247524752, |
| "grad_norm": 1.0487364530563354, |
| "learning_rate": 4.977410221235421e-06, |
| "loss": 1.2917, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.4775323686214775, |
| "grad_norm": 1.0768541097640991, |
| "learning_rate": 4.976990601139662e-06, |
| "loss": 1.3, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.4798172124904798, |
| "grad_norm": 0.9696170687675476, |
| "learning_rate": 4.9765671374819715e-06, |
| "loss": 1.2822, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.4821020563594821, |
| "grad_norm": 0.9987464547157288, |
| "learning_rate": 4.9761398309194385e-06, |
| "loss": 1.3076, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.4843869002284844, |
| "grad_norm": 1.0254422426223755, |
| "learning_rate": 4.975708682115118e-06, |
| "loss": 1.281, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.48667174409748665, |
| "grad_norm": 1.0040076971054077, |
| "learning_rate": 4.9752736917380274e-06, |
| "loss": 1.2821, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.48895658796648894, |
| "grad_norm": 1.004184365272522, |
| "learning_rate": 4.9748348604631416e-06, |
| "loss": 1.2641, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.49124143183549124, |
| "grad_norm": 1.0694694519042969, |
| "learning_rate": 4.9743921889714005e-06, |
| "loss": 1.2853, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.49352627570449353, |
| "grad_norm": 1.0564874410629272, |
| "learning_rate": 4.973945677949699e-06, |
| "loss": 1.2882, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.49581111957349583, |
| "grad_norm": 1.0076894760131836, |
| "learning_rate": 4.973495328090891e-06, |
| "loss": 1.2868, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.49809596344249807, |
| "grad_norm": 1.0476043224334717, |
| "learning_rate": 4.973041140093786e-06, |
| "loss": 1.2642, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.5003808073115004, |
| "grad_norm": 1.050991415977478, |
| "learning_rate": 4.972583114663153e-06, |
| "loss": 1.2751, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.5026656511805027, |
| "grad_norm": 0.9902971386909485, |
| "learning_rate": 4.972121252509712e-06, |
| "loss": 1.2685, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.504950495049505, |
| "grad_norm": 1.0011303424835205, |
| "learning_rate": 4.971655554350137e-06, |
| "loss": 1.2829, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.5072353389185073, |
| "grad_norm": 1.010233998298645, |
| "learning_rate": 4.971186020907054e-06, |
| "loss": 1.277, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.5095201827875095, |
| "grad_norm": 1.0275652408599854, |
| "learning_rate": 4.970712652909042e-06, |
| "loss": 1.2971, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.5118050266565118, |
| "grad_norm": 1.0285537242889404, |
| "learning_rate": 4.970235451090629e-06, |
| "loss": 1.231, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.5140898705255141, |
| "grad_norm": 1.0604579448699951, |
| "learning_rate": 4.969754416192292e-06, |
| "loss": 1.269, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.5163747143945163, |
| "grad_norm": 1.0375958681106567, |
| "learning_rate": 4.969269548960456e-06, |
| "loss": 1.2712, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.5186595582635186, |
| "grad_norm": 1.037304401397705, |
| "learning_rate": 4.9687808501474925e-06, |
| "loss": 1.2826, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.5209444021325209, |
| "grad_norm": 1.0280749797821045, |
| "learning_rate": 4.968288320511718e-06, |
| "loss": 1.2726, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.5232292460015232, |
| "grad_norm": 1.0595530271530151, |
| "learning_rate": 4.967791960817395e-06, |
| "loss": 1.281, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.5255140898705255, |
| "grad_norm": 0.9964226484298706, |
| "learning_rate": 4.967291771834727e-06, |
| "loss": 1.3188, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.5277989337395278, |
| "grad_norm": 1.0433804988861084, |
| "learning_rate": 4.966787754339861e-06, |
| "loss": 1.274, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.5300837776085301, |
| "grad_norm": 1.079641580581665, |
| "learning_rate": 4.966279909114883e-06, |
| "loss": 1.2991, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.5323686214775324, |
| "grad_norm": 1.0351816415786743, |
| "learning_rate": 4.965768236947821e-06, |
| "loss": 1.2659, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.5346534653465347, |
| "grad_norm": 1.0495244264602661, |
| "learning_rate": 4.96525273863264e-06, |
| "loss": 1.2898, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.536938309215537, |
| "grad_norm": 1.0479910373687744, |
| "learning_rate": 4.964733414969241e-06, |
| "loss": 1.2536, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.5392231530845393, |
| "grad_norm": 1.0365879535675049, |
| "learning_rate": 4.964210266763461e-06, |
| "loss": 1.2369, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.5415079969535415, |
| "grad_norm": 1.0398730039596558, |
| "learning_rate": 4.9636832948270745e-06, |
| "loss": 1.2669, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.5437928408225438, |
| "grad_norm": 1.0146657228469849, |
| "learning_rate": 4.963152499977786e-06, |
| "loss": 1.2893, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.546077684691546, |
| "grad_norm": 1.0974043607711792, |
| "learning_rate": 4.962617883039233e-06, |
| "loss": 1.2452, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.5483625285605483, |
| "grad_norm": 0.9900649189949036, |
| "learning_rate": 4.962079444840985e-06, |
| "loss": 1.2215, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.5506473724295506, |
| "grad_norm": 1.003464937210083, |
| "learning_rate": 4.9615371862185394e-06, |
| "loss": 1.2744, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.5529322162985529, |
| "grad_norm": 1.004382848739624, |
| "learning_rate": 4.960991108013322e-06, |
| "loss": 1.271, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.5552170601675552, |
| "grad_norm": 1.0129280090332031, |
| "learning_rate": 4.960441211072686e-06, |
| "loss": 1.2874, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.5575019040365575, |
| "grad_norm": 1.040189266204834, |
| "learning_rate": 4.9598874962499096e-06, |
| "loss": 1.2918, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.5597867479055598, |
| "grad_norm": 1.0145982503890991, |
| "learning_rate": 4.959329964404197e-06, |
| "loss": 1.2713, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.5620715917745621, |
| "grad_norm": 1.0469987392425537, |
| "learning_rate": 4.958768616400672e-06, |
| "loss": 1.2689, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.5643564356435643, |
| "grad_norm": 1.0191642045974731, |
| "learning_rate": 4.958203453110384e-06, |
| "loss": 1.2718, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.5666412795125666, |
| "grad_norm": 1.0718231201171875, |
| "learning_rate": 4.957634475410298e-06, |
| "loss": 1.3128, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.5689261233815689, |
| "grad_norm": 1.0109634399414062, |
| "learning_rate": 4.957061684183301e-06, |
| "loss": 1.2586, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.5712109672505712, |
| "grad_norm": 0.9942657947540283, |
| "learning_rate": 4.956485080318198e-06, |
| "loss": 1.328, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.5734958111195735, |
| "grad_norm": 1.0184757709503174, |
| "learning_rate": 4.955904664709707e-06, |
| "loss": 1.2815, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.5757806549885758, |
| "grad_norm": 1.015625, |
| "learning_rate": 4.955320438258465e-06, |
| "loss": 1.2585, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.5780654988575781, |
| "grad_norm": 0.9848981499671936, |
| "learning_rate": 4.954732401871018e-06, |
| "loss": 1.2866, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.5803503427265804, |
| "grad_norm": 1.0482749938964844, |
| "learning_rate": 4.954140556459826e-06, |
| "loss": 1.2732, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.5826351865955827, |
| "grad_norm": 1.0250680446624756, |
| "learning_rate": 4.95354490294326e-06, |
| "loss": 1.3053, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.584920030464585, |
| "grad_norm": 1.0545597076416016, |
| "learning_rate": 4.952945442245598e-06, |
| "loss": 1.2638, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.5872048743335873, |
| "grad_norm": 1.044873833656311, |
| "learning_rate": 4.952342175297028e-06, |
| "loss": 1.2683, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.5894897182025894, |
| "grad_norm": 1.0361744165420532, |
| "learning_rate": 4.951735103033644e-06, |
| "loss": 1.2887, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.5917745620715917, |
| "grad_norm": 1.0238685607910156, |
| "learning_rate": 4.951124226397441e-06, |
| "loss": 1.2736, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.594059405940594, |
| "grad_norm": 1.0217833518981934, |
| "learning_rate": 4.950509546336323e-06, |
| "loss": 1.2681, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.5963442498095963, |
| "grad_norm": 1.0546188354492188, |
| "learning_rate": 4.949891063804091e-06, |
| "loss": 1.2582, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.5986290936785986, |
| "grad_norm": 1.0834907293319702, |
| "learning_rate": 4.94926877976045e-06, |
| "loss": 1.2487, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.6009139375476009, |
| "grad_norm": 1.062184453010559, |
| "learning_rate": 4.948642695171e-06, |
| "loss": 1.3188, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.6031987814166032, |
| "grad_norm": 1.0373252630233765, |
| "learning_rate": 4.948012811007242e-06, |
| "loss": 1.277, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.6054836252856055, |
| "grad_norm": 1.0140316486358643, |
| "learning_rate": 4.947379128246571e-06, |
| "loss": 1.2617, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.6077684691546078, |
| "grad_norm": 1.054410696029663, |
| "learning_rate": 4.946741647872277e-06, |
| "loss": 1.238, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.6100533130236101, |
| "grad_norm": 1.0967663526535034, |
| "learning_rate": 4.94610037087354e-06, |
| "loss": 1.2682, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.6123381568926123, |
| "grad_norm": 1.043338656425476, |
| "learning_rate": 4.945455298245436e-06, |
| "loss": 1.2572, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.6146230007616146, |
| "grad_norm": 1.0187970399856567, |
| "learning_rate": 4.944806430988927e-06, |
| "loss": 1.2613, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.6169078446306169, |
| "grad_norm": 1.0666472911834717, |
| "learning_rate": 4.9441537701108654e-06, |
| "loss": 1.2611, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.6191926884996192, |
| "grad_norm": 1.0025635957717896, |
| "learning_rate": 4.943497316623988e-06, |
| "loss": 1.2519, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.6214775323686215, |
| "grad_norm": 1.0135135650634766, |
| "learning_rate": 4.942837071546919e-06, |
| "loss": 1.2759, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.6237623762376238, |
| "grad_norm": 0.9985151886940002, |
| "learning_rate": 4.942173035904164e-06, |
| "loss": 1.2844, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.6260472201066261, |
| "grad_norm": 0.9952817559242249, |
| "learning_rate": 4.941505210726112e-06, |
| "loss": 1.2356, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.6283320639756284, |
| "grad_norm": 1.0448962450027466, |
| "learning_rate": 4.9408335970490305e-06, |
| "loss": 1.2587, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.6306169078446306, |
| "grad_norm": 1.011099100112915, |
| "learning_rate": 4.940158195915067e-06, |
| "loss": 1.2729, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.6329017517136329, |
| "grad_norm": 1.052904725074768, |
| "learning_rate": 4.939479008372247e-06, |
| "loss": 1.2536, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.6351865955826352, |
| "grad_norm": 1.058173418045044, |
| "learning_rate": 4.938796035474469e-06, |
| "loss": 1.2807, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.6374714394516374, |
| "grad_norm": 1.022147536277771, |
| "learning_rate": 4.938109278281506e-06, |
| "loss": 1.2887, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.6397562833206397, |
| "grad_norm": 1.0064011812210083, |
| "learning_rate": 4.937418737859004e-06, |
| "loss": 1.2192, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.642041127189642, |
| "grad_norm": 1.0092360973358154, |
| "learning_rate": 4.936724415278479e-06, |
| "loss": 1.3159, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.6443259710586443, |
| "grad_norm": 1.076401710510254, |
| "learning_rate": 4.936026311617316e-06, |
| "loss": 1.2872, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.6466108149276466, |
| "grad_norm": 1.057209849357605, |
| "learning_rate": 4.935324427958766e-06, |
| "loss": 1.257, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.6488956587966489, |
| "grad_norm": 1.1738762855529785, |
| "learning_rate": 4.934618765391946e-06, |
| "loss": 1.2547, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.6511805026656512, |
| "grad_norm": 1.0405137538909912, |
| "learning_rate": 4.933909325011838e-06, |
| "loss": 1.2766, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.6534653465346535, |
| "grad_norm": 1.0377894639968872, |
| "learning_rate": 4.933196107919286e-06, |
| "loss": 1.2624, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.6557501904036558, |
| "grad_norm": 1.032714605331421, |
| "learning_rate": 4.932479115220991e-06, |
| "loss": 1.2527, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.6580350342726581, |
| "grad_norm": 1.0755581855773926, |
| "learning_rate": 4.9317583480295175e-06, |
| "loss": 1.2966, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.6603198781416603, |
| "grad_norm": 1.0262556076049805, |
| "learning_rate": 4.931033807463283e-06, |
| "loss": 1.2585, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.6626047220106626, |
| "grad_norm": 1.0510430335998535, |
| "learning_rate": 4.930305494646562e-06, |
| "loss": 1.2662, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.6648895658796649, |
| "grad_norm": 1.035854458808899, |
| "learning_rate": 4.9295734107094825e-06, |
| "loss": 1.2346, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.6671744097486672, |
| "grad_norm": 1.0485846996307373, |
| "learning_rate": 4.928837556788023e-06, |
| "loss": 1.2978, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.6694592536176694, |
| "grad_norm": 1.02550208568573, |
| "learning_rate": 4.928097934024013e-06, |
| "loss": 1.2478, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.6717440974866717, |
| "grad_norm": 1.0328837633132935, |
| "learning_rate": 4.927354543565131e-06, |
| "loss": 1.2788, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.674028941355674, |
| "grad_norm": 0.9913997054100037, |
| "learning_rate": 4.926607386564898e-06, |
| "loss": 1.2423, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.6763137852246763, |
| "grad_norm": 1.0034306049346924, |
| "learning_rate": 4.925856464182685e-06, |
| "loss": 1.2562, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.6785986290936786, |
| "grad_norm": 1.0546495914459229, |
| "learning_rate": 4.925101777583701e-06, |
| "loss": 1.2598, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.6808834729626809, |
| "grad_norm": 1.0412935018539429, |
| "learning_rate": 4.924343327938999e-06, |
| "loss": 1.2744, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.6831683168316832, |
| "grad_norm": 1.0731669664382935, |
| "learning_rate": 4.923581116425471e-06, |
| "loss": 1.2912, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.6854531607006854, |
| "grad_norm": 1.0394880771636963, |
| "learning_rate": 4.922815144225843e-06, |
| "loss": 1.276, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.6877380045696877, |
| "grad_norm": 1.0383579730987549, |
| "learning_rate": 4.92204541252868e-06, |
| "loss": 1.255, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.69002284843869, |
| "grad_norm": 1.0251744985580444, |
| "learning_rate": 4.92127192252838e-06, |
| "loss": 1.2688, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.6923076923076923, |
| "grad_norm": 1.017650842666626, |
| "learning_rate": 4.9204946754251724e-06, |
| "loss": 1.2818, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.6945925361766946, |
| "grad_norm": 1.0219080448150635, |
| "learning_rate": 4.919713672425116e-06, |
| "loss": 1.2828, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.6968773800456969, |
| "grad_norm": 1.0862151384353638, |
| "learning_rate": 4.918928914740098e-06, |
| "loss": 1.2514, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.6991622239146992, |
| "grad_norm": 1.0639281272888184, |
| "learning_rate": 4.918140403587831e-06, |
| "loss": 1.2739, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.7014470677837015, |
| "grad_norm": 1.0512444972991943, |
| "learning_rate": 4.9173481401918556e-06, |
| "loss": 1.2576, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.7037319116527038, |
| "grad_norm": 1.0291866064071655, |
| "learning_rate": 4.916552125781529e-06, |
| "loss": 1.2934, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.7060167555217061, |
| "grad_norm": 1.0338629484176636, |
| "learning_rate": 4.915752361592032e-06, |
| "loss": 1.263, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.7083015993907082, |
| "grad_norm": 1.0358542203903198, |
| "learning_rate": 4.914948848864365e-06, |
| "loss": 1.2453, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.7105864432597105, |
| "grad_norm": 1.1184923648834229, |
| "learning_rate": 4.914141588845344e-06, |
| "loss": 1.2653, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.7128712871287128, |
| "grad_norm": 1.0791000127792358, |
| "learning_rate": 4.913330582787598e-06, |
| "loss": 1.2659, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.7151561309977151, |
| "grad_norm": 1.0901819467544556, |
| "learning_rate": 4.912515831949571e-06, |
| "loss": 1.2208, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.7174409748667174, |
| "grad_norm": 1.0219902992248535, |
| "learning_rate": 4.9116973375955166e-06, |
| "loss": 1.2711, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.7197258187357197, |
| "grad_norm": 1.014364242553711, |
| "learning_rate": 4.910875100995499e-06, |
| "loss": 1.2877, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.722010662604722, |
| "grad_norm": 1.0699234008789062, |
| "learning_rate": 4.910049123425386e-06, |
| "loss": 1.2425, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.7242955064737243, |
| "grad_norm": 1.0614267587661743, |
| "learning_rate": 4.9092194061668535e-06, |
| "loss": 1.2475, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.7265803503427266, |
| "grad_norm": 1.0620336532592773, |
| "learning_rate": 4.908385950507378e-06, |
| "loss": 1.2618, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.7288651942117289, |
| "grad_norm": 1.0389032363891602, |
| "learning_rate": 4.90754875774024e-06, |
| "loss": 1.2742, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.7311500380807312, |
| "grad_norm": 0.9754124879837036, |
| "learning_rate": 4.9067078291645144e-06, |
| "loss": 1.25, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.7334348819497334, |
| "grad_norm": 1.056058406829834, |
| "learning_rate": 4.905863166085076e-06, |
| "loss": 1.2451, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.7357197258187357, |
| "grad_norm": 1.0641580820083618, |
| "learning_rate": 4.9050147698125944e-06, |
| "loss": 1.2532, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.738004569687738, |
| "grad_norm": 1.0407251119613647, |
| "learning_rate": 4.904162641663532e-06, |
| "loss": 1.3103, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.7402894135567403, |
| "grad_norm": 1.0477187633514404, |
| "learning_rate": 4.9033067829601385e-06, |
| "loss": 1.2658, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.7425742574257426, |
| "grad_norm": 1.0202401876449585, |
| "learning_rate": 4.902447195030459e-06, |
| "loss": 1.2569, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.7448591012947449, |
| "grad_norm": 1.0629253387451172, |
| "learning_rate": 4.9015838792083196e-06, |
| "loss": 1.247, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.7471439451637472, |
| "grad_norm": 1.0284748077392578, |
| "learning_rate": 4.900716836833333e-06, |
| "loss": 1.2659, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.7494287890327495, |
| "grad_norm": 1.0653586387634277, |
| "learning_rate": 4.899846069250894e-06, |
| "loss": 1.2673, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.7517136329017517, |
| "grad_norm": 1.0795682668685913, |
| "learning_rate": 4.898971577812179e-06, |
| "loss": 1.2778, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.753998476770754, |
| "grad_norm": 1.0359232425689697, |
| "learning_rate": 4.8980933638741426e-06, |
| "loss": 1.2732, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.7562833206397562, |
| "grad_norm": 1.0286237001419067, |
| "learning_rate": 4.897211428799512e-06, |
| "loss": 1.2455, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.7585681645087585, |
| "grad_norm": 1.0179105997085571, |
| "learning_rate": 4.896325773956793e-06, |
| "loss": 1.2413, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.7608530083777608, |
| "grad_norm": 1.0381865501403809, |
| "learning_rate": 4.895436400720264e-06, |
| "loss": 1.2409, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.7631378522467631, |
| "grad_norm": 0.9918906688690186, |
| "learning_rate": 4.894543310469968e-06, |
| "loss": 1.2556, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.7654226961157654, |
| "grad_norm": 1.0300416946411133, |
| "learning_rate": 4.8936465045917204e-06, |
| "loss": 1.2325, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.7677075399847677, |
| "grad_norm": 1.052534580230713, |
| "learning_rate": 4.8927459844770995e-06, |
| "loss": 1.2561, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.76999238385377, |
| "grad_norm": 1.0454604625701904, |
| "learning_rate": 4.891841751523448e-06, |
| "loss": 1.2845, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.7722772277227723, |
| "grad_norm": 1.0518709421157837, |
| "learning_rate": 4.8909338071338706e-06, |
| "loss": 1.2485, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.7745620715917746, |
| "grad_norm": 1.0326422452926636, |
| "learning_rate": 4.890022152717231e-06, |
| "loss": 1.2757, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.7768469154607769, |
| "grad_norm": 1.2617943286895752, |
| "learning_rate": 4.889106789688148e-06, |
| "loss": 1.2656, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.7791317593297792, |
| "grad_norm": 1.0038459300994873, |
| "learning_rate": 4.888187719466996e-06, |
| "loss": 1.2636, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.7814166031987814, |
| "grad_norm": 1.1393420696258545, |
| "learning_rate": 4.887264943479903e-06, |
| "loss": 1.2621, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.7837014470677837, |
| "grad_norm": 1.0969446897506714, |
| "learning_rate": 4.8863384631587446e-06, |
| "loss": 1.2208, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.785986290936786, |
| "grad_norm": 1.034393310546875, |
| "learning_rate": 4.885408279941148e-06, |
| "loss": 1.2101, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.7882711348057883, |
| "grad_norm": 1.1397764682769775, |
| "learning_rate": 4.884474395270484e-06, |
| "loss": 1.2823, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.7905559786747905, |
| "grad_norm": 1.1488789319992065, |
| "learning_rate": 4.883536810595867e-06, |
| "loss": 1.2615, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.7928408225437928, |
| "grad_norm": 1.0274580717086792, |
| "learning_rate": 4.8825955273721524e-06, |
| "loss": 1.2334, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.7951256664127951, |
| "grad_norm": 1.0355713367462158, |
| "learning_rate": 4.8816505470599365e-06, |
| "loss": 1.2224, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.7974105102817974, |
| "grad_norm": 1.0540703535079956, |
| "learning_rate": 4.880701871125551e-06, |
| "loss": 1.262, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.7996953541507997, |
| "grad_norm": 1.0765819549560547, |
| "learning_rate": 4.879749501041062e-06, |
| "loss": 1.2731, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.801980198019802, |
| "grad_norm": 1.0639638900756836, |
| "learning_rate": 4.878793438284268e-06, |
| "loss": 1.2673, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.8042650418888042, |
| "grad_norm": 1.0149368047714233, |
| "learning_rate": 4.877833684338698e-06, |
| "loss": 1.2479, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.8065498857578065, |
| "grad_norm": 1.1710435152053833, |
| "learning_rate": 4.876870240693608e-06, |
| "loss": 1.2775, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.8088347296268088, |
| "grad_norm": 1.1317570209503174, |
| "learning_rate": 4.875903108843979e-06, |
| "loss": 1.2732, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.8111195734958111, |
| "grad_norm": 1.0417158603668213, |
| "learning_rate": 4.874932290290517e-06, |
| "loss": 1.2647, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.8134044173648134, |
| "grad_norm": 1.073765516281128, |
| "learning_rate": 4.873957786539646e-06, |
| "loss": 1.2738, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.8156892612338157, |
| "grad_norm": 1.018481731414795, |
| "learning_rate": 4.872979599103511e-06, |
| "loss": 1.2509, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.817974105102818, |
| "grad_norm": 1.0737470388412476, |
| "learning_rate": 4.8719977294999695e-06, |
| "loss": 1.232, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.8202589489718203, |
| "grad_norm": 1.0921229124069214, |
| "learning_rate": 4.871012179252597e-06, |
| "loss": 1.2342, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.8225437928408226, |
| "grad_norm": 1.0502641201019287, |
| "learning_rate": 4.870022949890676e-06, |
| "loss": 1.2463, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.8248286367098249, |
| "grad_norm": 1.1755155324935913, |
| "learning_rate": 4.869030042949202e-06, |
| "loss": 1.2625, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.8271134805788272, |
| "grad_norm": 1.0167341232299805, |
| "learning_rate": 4.868033459968874e-06, |
| "loss": 1.2563, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.8293983244478293, |
| "grad_norm": 1.0481575727462769, |
| "learning_rate": 4.8670332024960954e-06, |
| "loss": 1.2541, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.8316831683168316, |
| "grad_norm": 1.0657804012298584, |
| "learning_rate": 4.866029272082973e-06, |
| "loss": 1.2444, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.8339680121858339, |
| "grad_norm": 1.0473397970199585, |
| "learning_rate": 4.865021670287311e-06, |
| "loss": 1.2356, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.8362528560548362, |
| "grad_norm": 1.011077880859375, |
| "learning_rate": 4.864010398672612e-06, |
| "loss": 1.2417, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.8385376999238385, |
| "grad_norm": 1.0485464334487915, |
| "learning_rate": 4.862995458808073e-06, |
| "loss": 1.2728, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.8408225437928408, |
| "grad_norm": 1.0683908462524414, |
| "learning_rate": 4.861976852268582e-06, |
| "loss": 1.2354, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.8431073876618431, |
| "grad_norm": 1.0323604345321655, |
| "learning_rate": 4.860954580634718e-06, |
| "loss": 1.2665, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.8453922315308454, |
| "grad_norm": 1.024782419204712, |
| "learning_rate": 4.859928645492746e-06, |
| "loss": 1.2515, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.8476770753998477, |
| "grad_norm": 1.02902090549469, |
| "learning_rate": 4.858899048434614e-06, |
| "loss": 1.2274, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.84996191926885, |
| "grad_norm": 1.0355148315429688, |
| "learning_rate": 4.857865791057957e-06, |
| "loss": 1.2289, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.8522467631378522, |
| "grad_norm": 1.0638132095336914, |
| "learning_rate": 4.856828874966086e-06, |
| "loss": 1.2245, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.8545316070068545, |
| "grad_norm": 1.0459909439086914, |
| "learning_rate": 4.8557883017679895e-06, |
| "loss": 1.2347, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.8568164508758568, |
| "grad_norm": 1.0818232297897339, |
| "learning_rate": 4.854744073078333e-06, |
| "loss": 1.2564, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.8591012947448591, |
| "grad_norm": 1.0551162958145142, |
| "learning_rate": 4.853696190517452e-06, |
| "loss": 1.2809, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.8613861386138614, |
| "grad_norm": 1.0419256687164307, |
| "learning_rate": 4.8526446557113525e-06, |
| "loss": 1.2532, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.8636709824828637, |
| "grad_norm": 1.058478832244873, |
| "learning_rate": 4.851589470291707e-06, |
| "loss": 1.229, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.865955826351866, |
| "grad_norm": 1.0275694131851196, |
| "learning_rate": 4.850530635895854e-06, |
| "loss": 1.2555, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.8682406702208683, |
| "grad_norm": 1.0653144121170044, |
| "learning_rate": 4.849468154166794e-06, |
| "loss": 1.2397, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.8705255140898706, |
| "grad_norm": 1.0227371454238892, |
| "learning_rate": 4.8484020267531855e-06, |
| "loss": 1.2568, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.8728103579588729, |
| "grad_norm": 1.0583505630493164, |
| "learning_rate": 4.847332255309346e-06, |
| "loss": 1.2489, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.8750952018278751, |
| "grad_norm": 1.0397239923477173, |
| "learning_rate": 4.846258841495246e-06, |
| "loss": 1.273, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.8773800456968773, |
| "grad_norm": 1.020776391029358, |
| "learning_rate": 4.845181786976509e-06, |
| "loss": 1.2257, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.8796648895658796, |
| "grad_norm": 1.0420705080032349, |
| "learning_rate": 4.844101093424407e-06, |
| "loss": 1.296, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.8819497334348819, |
| "grad_norm": 1.0465624332427979, |
| "learning_rate": 4.84301676251586e-06, |
| "loss": 1.2514, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.8842345773038842, |
| "grad_norm": 1.0915330648422241, |
| "learning_rate": 4.841928795933429e-06, |
| "loss": 1.2664, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.8865194211728865, |
| "grad_norm": 1.0246195793151855, |
| "learning_rate": 4.84083719536532e-06, |
| "loss": 1.2499, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.8888042650418888, |
| "grad_norm": 1.0145692825317383, |
| "learning_rate": 4.839741962505376e-06, |
| "loss": 1.2638, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.8910891089108911, |
| "grad_norm": 1.05404531955719, |
| "learning_rate": 4.838643099053077e-06, |
| "loss": 1.1875, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.8933739527798934, |
| "grad_norm": 1.1422752141952515, |
| "learning_rate": 4.837540606713538e-06, |
| "loss": 1.2496, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.8956587966488957, |
| "grad_norm": 1.0648959875106812, |
| "learning_rate": 4.8364344871975e-06, |
| "loss": 1.2375, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.897943640517898, |
| "grad_norm": 1.0459322929382324, |
| "learning_rate": 4.835324742221338e-06, |
| "loss": 1.2419, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.9002284843869002, |
| "grad_norm": 1.0693044662475586, |
| "learning_rate": 4.834211373507048e-06, |
| "loss": 1.2485, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.9025133282559025, |
| "grad_norm": 1.0930724143981934, |
| "learning_rate": 4.833094382782255e-06, |
| "loss": 1.2389, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.9047981721249048, |
| "grad_norm": 1.1270296573638916, |
| "learning_rate": 4.831973771780197e-06, |
| "loss": 1.2033, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.9070830159939071, |
| "grad_norm": 1.044074535369873, |
| "learning_rate": 4.830849542239735e-06, |
| "loss": 1.2464, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.9093678598629094, |
| "grad_norm": 1.0138458013534546, |
| "learning_rate": 4.829721695905343e-06, |
| "loss": 1.2473, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.9116527037319117, |
| "grad_norm": 1.1201279163360596, |
| "learning_rate": 4.828590234527107e-06, |
| "loss": 1.2729, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.913937547600914, |
| "grad_norm": 1.0771571397781372, |
| "learning_rate": 4.8274551598607214e-06, |
| "loss": 1.2665, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.9162223914699162, |
| "grad_norm": 1.0691912174224854, |
| "learning_rate": 4.8263164736674905e-06, |
| "loss": 1.2094, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.9185072353389185, |
| "grad_norm": 1.0740418434143066, |
| "learning_rate": 4.8251741777143205e-06, |
| "loss": 1.2879, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.9207920792079208, |
| "grad_norm": 1.0185081958770752, |
| "learning_rate": 4.824028273773719e-06, |
| "loss": 1.2459, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.9230769230769231, |
| "grad_norm": 1.0672869682312012, |
| "learning_rate": 4.822878763623792e-06, |
| "loss": 1.2394, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.9253617669459253, |
| "grad_norm": 1.08120858669281, |
| "learning_rate": 4.821725649048242e-06, |
| "loss": 1.2918, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.9276466108149276, |
| "grad_norm": 1.0407681465148926, |
| "learning_rate": 4.820568931836364e-06, |
| "loss": 1.2443, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.9299314546839299, |
| "grad_norm": 1.0847117900848389, |
| "learning_rate": 4.8194086137830445e-06, |
| "loss": 1.2505, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.9322162985529322, |
| "grad_norm": 1.0484883785247803, |
| "learning_rate": 4.818244696688754e-06, |
| "loss": 1.2469, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.9345011424219345, |
| "grad_norm": 1.0654011964797974, |
| "learning_rate": 4.817077182359553e-06, |
| "loss": 1.2544, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.9367859862909368, |
| "grad_norm": 1.108176589012146, |
| "learning_rate": 4.815906072607079e-06, |
| "loss": 1.2387, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.9390708301599391, |
| "grad_norm": 1.0624432563781738, |
| "learning_rate": 4.8147313692485495e-06, |
| "loss": 1.2488, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.9413556740289414, |
| "grad_norm": 1.0391454696655273, |
| "learning_rate": 4.813553074106761e-06, |
| "loss": 1.2514, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.9436405178979437, |
| "grad_norm": 1.1086232662200928, |
| "learning_rate": 4.812371189010081e-06, |
| "loss": 1.2694, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.945925361766946, |
| "grad_norm": 1.0448237657546997, |
| "learning_rate": 4.8111857157924465e-06, |
| "loss": 1.2366, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.9482102056359482, |
| "grad_norm": 1.0393203496932983, |
| "learning_rate": 4.809996656293367e-06, |
| "loss": 1.2747, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.9504950495049505, |
| "grad_norm": 1.083590030670166, |
| "learning_rate": 4.8088040123579106e-06, |
| "loss": 1.2167, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.9527798933739527, |
| "grad_norm": 1.071567177772522, |
| "learning_rate": 4.807607785836711e-06, |
| "loss": 1.2108, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.955064737242955, |
| "grad_norm": 1.0953818559646606, |
| "learning_rate": 4.8064079785859615e-06, |
| "loss": 1.2381, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.9573495811119573, |
| "grad_norm": 1.0628875494003296, |
| "learning_rate": 4.8052045924674105e-06, |
| "loss": 1.232, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.9596344249809596, |
| "grad_norm": 1.0838161706924438, |
| "learning_rate": 4.803997629348359e-06, |
| "loss": 1.2699, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.9619192688499619, |
| "grad_norm": 0.9980992078781128, |
| "learning_rate": 4.802787091101659e-06, |
| "loss": 1.2473, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.9642041127189642, |
| "grad_norm": 1.094283103942871, |
| "learning_rate": 4.801572979605712e-06, |
| "loss": 1.2656, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.9664889565879665, |
| "grad_norm": 1.0554611682891846, |
| "learning_rate": 4.800355296744461e-06, |
| "loss": 1.2584, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.9687738004569688, |
| "grad_norm": 1.1019188165664673, |
| "learning_rate": 4.799134044407392e-06, |
| "loss": 1.2877, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.9710586443259711, |
| "grad_norm": 1.087965726852417, |
| "learning_rate": 4.797909224489531e-06, |
| "loss": 1.2662, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.9733434881949733, |
| "grad_norm": 1.08269202709198, |
| "learning_rate": 4.796680838891438e-06, |
| "loss": 1.2419, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.9756283320639756, |
| "grad_norm": 1.071199893951416, |
| "learning_rate": 4.795448889519207e-06, |
| "loss": 1.2489, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.9779131759329779, |
| "grad_norm": 1.0306544303894043, |
| "learning_rate": 4.794213378284462e-06, |
| "loss": 1.2467, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.9801980198019802, |
| "grad_norm": 1.0567327737808228, |
| "learning_rate": 4.792974307104353e-06, |
| "loss": 1.2637, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.9824828636709825, |
| "grad_norm": 1.0448797941207886, |
| "learning_rate": 4.7917316779015554e-06, |
| "loss": 1.2244, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.9847677075399848, |
| "grad_norm": 1.0123138427734375, |
| "learning_rate": 4.790485492604264e-06, |
| "loss": 1.2326, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.9870525514089871, |
| "grad_norm": 1.0484559535980225, |
| "learning_rate": 4.789235753146192e-06, |
| "loss": 1.2436, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.9893373952779894, |
| "grad_norm": 1.0161617994308472, |
| "learning_rate": 4.787982461466568e-06, |
| "loss": 1.2185, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.9916222391469917, |
| "grad_norm": 1.0779787302017212, |
| "learning_rate": 4.786725619510134e-06, |
| "loss": 1.2256, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.993907083015994, |
| "grad_norm": 1.061590552330017, |
| "learning_rate": 4.785465229227139e-06, |
| "loss": 1.2747, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.9961919268849961, |
| "grad_norm": 1.102403163909912, |
| "learning_rate": 4.784201292573337e-06, |
| "loss": 1.2561, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.9984767707539984, |
| "grad_norm": 0.9936567544937134, |
| "learning_rate": 4.782933811509988e-06, |
| "loss": 1.2409, |
| "step": 437 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.9936567544937134, |
| "learning_rate": 4.781662788003851e-06, |
| "loss": 1.2271, |
| "step": 438 |
| }, |
| { |
| "epoch": 1.0022848438690022, |
| "grad_norm": 1.4983975887298584, |
| "learning_rate": 4.780388224027179e-06, |
| "loss": 1.2312, |
| "step": 439 |
| }, |
| { |
| "epoch": 1.0045696877380046, |
| "grad_norm": 1.0163486003875732, |
| "learning_rate": 4.779110121557723e-06, |
| "loss": 1.1992, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.0068545316070068, |
| "grad_norm": 1.0127511024475098, |
| "learning_rate": 4.777828482578722e-06, |
| "loss": 1.2135, |
| "step": 441 |
| }, |
| { |
| "epoch": 1.0091393754760092, |
| "grad_norm": 1.0328449010849, |
| "learning_rate": 4.776543309078903e-06, |
| "loss": 1.2143, |
| "step": 442 |
| }, |
| { |
| "epoch": 1.0114242193450114, |
| "grad_norm": 1.1579132080078125, |
| "learning_rate": 4.7752546030524775e-06, |
| "loss": 1.2051, |
| "step": 443 |
| }, |
| { |
| "epoch": 1.0137090632140138, |
| "grad_norm": 1.0556671619415283, |
| "learning_rate": 4.77396236649914e-06, |
| "loss": 1.2136, |
| "step": 444 |
| }, |
| { |
| "epoch": 1.015993907083016, |
| "grad_norm": 1.0315356254577637, |
| "learning_rate": 4.772666601424061e-06, |
| "loss": 1.2444, |
| "step": 445 |
| }, |
| { |
| "epoch": 1.0182787509520184, |
| "grad_norm": 1.0836431980133057, |
| "learning_rate": 4.771367309837888e-06, |
| "loss": 1.1967, |
| "step": 446 |
| }, |
| { |
| "epoch": 1.0205635948210205, |
| "grad_norm": 1.0516763925552368, |
| "learning_rate": 4.7700644937567385e-06, |
| "loss": 1.2012, |
| "step": 447 |
| }, |
| { |
| "epoch": 1.022848438690023, |
| "grad_norm": 1.0447187423706055, |
| "learning_rate": 4.768758155202202e-06, |
| "loss": 1.2281, |
| "step": 448 |
| }, |
| { |
| "epoch": 1.0251332825590251, |
| "grad_norm": 1.0647971630096436, |
| "learning_rate": 4.767448296201332e-06, |
| "loss": 1.1907, |
| "step": 449 |
| }, |
| { |
| "epoch": 1.0274181264280273, |
| "grad_norm": 1.0792133808135986, |
| "learning_rate": 4.766134918786646e-06, |
| "loss": 1.2346, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.0297029702970297, |
| "grad_norm": 1.1311832666397095, |
| "learning_rate": 4.764818024996117e-06, |
| "loss": 1.2652, |
| "step": 451 |
| }, |
| { |
| "epoch": 1.031987814166032, |
| "grad_norm": 1.090455174446106, |
| "learning_rate": 4.763497616873181e-06, |
| "loss": 1.2258, |
| "step": 452 |
| }, |
| { |
| "epoch": 1.0342726580350343, |
| "grad_norm": 1.0815131664276123, |
| "learning_rate": 4.7621736964667204e-06, |
| "loss": 1.2233, |
| "step": 453 |
| }, |
| { |
| "epoch": 1.0365575019040365, |
| "grad_norm": 1.112673044204712, |
| "learning_rate": 4.760846265831073e-06, |
| "loss": 1.2136, |
| "step": 454 |
| }, |
| { |
| "epoch": 1.038842345773039, |
| "grad_norm": 1.068178653717041, |
| "learning_rate": 4.759515327026019e-06, |
| "loss": 1.214, |
| "step": 455 |
| }, |
| { |
| "epoch": 1.041127189642041, |
| "grad_norm": 1.079059362411499, |
| "learning_rate": 4.758180882116788e-06, |
| "loss": 1.2024, |
| "step": 456 |
| }, |
| { |
| "epoch": 1.0434120335110435, |
| "grad_norm": 1.1010431051254272, |
| "learning_rate": 4.756842933174044e-06, |
| "loss": 1.2239, |
| "step": 457 |
| }, |
| { |
| "epoch": 1.0456968773800457, |
| "grad_norm": 1.1039162874221802, |
| "learning_rate": 4.755501482273892e-06, |
| "loss": 1.2212, |
| "step": 458 |
| }, |
| { |
| "epoch": 1.047981721249048, |
| "grad_norm": 1.0536257028579712, |
| "learning_rate": 4.754156531497869e-06, |
| "loss": 1.1672, |
| "step": 459 |
| }, |
| { |
| "epoch": 1.0502665651180503, |
| "grad_norm": 1.085410475730896, |
| "learning_rate": 4.752808082932943e-06, |
| "loss": 1.2471, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.0525514089870525, |
| "grad_norm": 1.0432335138320923, |
| "learning_rate": 4.751456138671512e-06, |
| "loss": 1.2345, |
| "step": 461 |
| }, |
| { |
| "epoch": 1.0548362528560549, |
| "grad_norm": 1.072708249092102, |
| "learning_rate": 4.750100700811395e-06, |
| "loss": 1.2328, |
| "step": 462 |
| }, |
| { |
| "epoch": 1.057121096725057, |
| "grad_norm": 1.0852991342544556, |
| "learning_rate": 4.748741771455835e-06, |
| "loss": 1.19, |
| "step": 463 |
| }, |
| { |
| "epoch": 1.0594059405940595, |
| "grad_norm": 1.0906398296356201, |
| "learning_rate": 4.747379352713489e-06, |
| "loss": 1.229, |
| "step": 464 |
| }, |
| { |
| "epoch": 1.0616907844630616, |
| "grad_norm": 1.055309772491455, |
| "learning_rate": 4.746013446698432e-06, |
| "loss": 1.2419, |
| "step": 465 |
| }, |
| { |
| "epoch": 1.063975628332064, |
| "grad_norm": 1.0667710304260254, |
| "learning_rate": 4.744644055530149e-06, |
| "loss": 1.1943, |
| "step": 466 |
| }, |
| { |
| "epoch": 1.0662604722010662, |
| "grad_norm": 1.091098427772522, |
| "learning_rate": 4.743271181333533e-06, |
| "loss": 1.171, |
| "step": 467 |
| }, |
| { |
| "epoch": 1.0685453160700686, |
| "grad_norm": 1.0701195001602173, |
| "learning_rate": 4.741894826238882e-06, |
| "loss": 1.2163, |
| "step": 468 |
| }, |
| { |
| "epoch": 1.0708301599390708, |
| "grad_norm": 1.0526652336120605, |
| "learning_rate": 4.740514992381893e-06, |
| "loss": 1.2329, |
| "step": 469 |
| }, |
| { |
| "epoch": 1.073115003808073, |
| "grad_norm": 1.0725401639938354, |
| "learning_rate": 4.739131681903666e-06, |
| "loss": 1.1793, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.0753998476770754, |
| "grad_norm": 1.126091480255127, |
| "learning_rate": 4.737744896950689e-06, |
| "loss": 1.1769, |
| "step": 471 |
| }, |
| { |
| "epoch": 1.0776846915460776, |
| "grad_norm": 1.039844036102295, |
| "learning_rate": 4.736354639674847e-06, |
| "loss": 1.205, |
| "step": 472 |
| }, |
| { |
| "epoch": 1.07996953541508, |
| "grad_norm": 1.0432695150375366, |
| "learning_rate": 4.734960912233411e-06, |
| "loss": 1.1909, |
| "step": 473 |
| }, |
| { |
| "epoch": 1.0822543792840822, |
| "grad_norm": 1.0890403985977173, |
| "learning_rate": 4.7335637167890366e-06, |
| "loss": 1.1928, |
| "step": 474 |
| }, |
| { |
| "epoch": 1.0845392231530846, |
| "grad_norm": 1.0710458755493164, |
| "learning_rate": 4.732163055509759e-06, |
| "loss": 1.2402, |
| "step": 475 |
| }, |
| { |
| "epoch": 1.0868240670220868, |
| "grad_norm": 1.0940525531768799, |
| "learning_rate": 4.730758930568997e-06, |
| "loss": 1.181, |
| "step": 476 |
| }, |
| { |
| "epoch": 1.0891089108910892, |
| "grad_norm": 1.177641749382019, |
| "learning_rate": 4.729351344145536e-06, |
| "loss": 1.2232, |
| "step": 477 |
| }, |
| { |
| "epoch": 1.0913937547600914, |
| "grad_norm": 1.1401522159576416, |
| "learning_rate": 4.72794029842354e-06, |
| "loss": 1.1878, |
| "step": 478 |
| }, |
| { |
| "epoch": 1.0936785986290938, |
| "grad_norm": 1.0874228477478027, |
| "learning_rate": 4.726525795592535e-06, |
| "loss": 1.1658, |
| "step": 479 |
| }, |
| { |
| "epoch": 1.095963442498096, |
| "grad_norm": 1.0325064659118652, |
| "learning_rate": 4.725107837847414e-06, |
| "loss": 1.2084, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.0982482863670981, |
| "grad_norm": 1.0492074489593506, |
| "learning_rate": 4.723686427388434e-06, |
| "loss": 1.2208, |
| "step": 481 |
| }, |
| { |
| "epoch": 1.1005331302361006, |
| "grad_norm": 1.1399495601654053, |
| "learning_rate": 4.722261566421204e-06, |
| "loss": 1.2158, |
| "step": 482 |
| }, |
| { |
| "epoch": 1.1028179741051027, |
| "grad_norm": 1.1156668663024902, |
| "learning_rate": 4.72083325715669e-06, |
| "loss": 1.2252, |
| "step": 483 |
| }, |
| { |
| "epoch": 1.1051028179741051, |
| "grad_norm": 1.072943091392517, |
| "learning_rate": 4.719401501811209e-06, |
| "loss": 1.2381, |
| "step": 484 |
| }, |
| { |
| "epoch": 1.1073876618431073, |
| "grad_norm": 1.0337257385253906, |
| "learning_rate": 4.717966302606424e-06, |
| "loss": 1.1782, |
| "step": 485 |
| }, |
| { |
| "epoch": 1.1096725057121097, |
| "grad_norm": 1.0744901895523071, |
| "learning_rate": 4.716527661769344e-06, |
| "loss": 1.2412, |
| "step": 486 |
| }, |
| { |
| "epoch": 1.111957349581112, |
| "grad_norm": 1.0331535339355469, |
| "learning_rate": 4.715085581532316e-06, |
| "loss": 1.1869, |
| "step": 487 |
| }, |
| { |
| "epoch": 1.1142421934501143, |
| "grad_norm": 1.0795518159866333, |
| "learning_rate": 4.7136400641330245e-06, |
| "loss": 1.214, |
| "step": 488 |
| }, |
| { |
| "epoch": 1.1165270373191165, |
| "grad_norm": 1.084125280380249, |
| "learning_rate": 4.71219111181449e-06, |
| "loss": 1.2049, |
| "step": 489 |
| }, |
| { |
| "epoch": 1.118811881188119, |
| "grad_norm": 1.1166882514953613, |
| "learning_rate": 4.710738726825059e-06, |
| "loss": 1.2143, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.121096725057121, |
| "grad_norm": 1.0764187574386597, |
| "learning_rate": 4.709282911418408e-06, |
| "loss": 1.2301, |
| "step": 491 |
| }, |
| { |
| "epoch": 1.1233815689261233, |
| "grad_norm": 1.069149374961853, |
| "learning_rate": 4.7078236678535335e-06, |
| "loss": 1.2094, |
| "step": 492 |
| }, |
| { |
| "epoch": 1.1256664127951257, |
| "grad_norm": 1.0744988918304443, |
| "learning_rate": 4.7063609983947535e-06, |
| "loss": 1.1893, |
| "step": 493 |
| }, |
| { |
| "epoch": 1.1279512566641279, |
| "grad_norm": 1.090267300605774, |
| "learning_rate": 4.704894905311701e-06, |
| "loss": 1.1575, |
| "step": 494 |
| }, |
| { |
| "epoch": 1.1302361005331303, |
| "grad_norm": 1.067543625831604, |
| "learning_rate": 4.703425390879323e-06, |
| "loss": 1.1801, |
| "step": 495 |
| }, |
| { |
| "epoch": 1.1325209444021325, |
| "grad_norm": 1.0365897417068481, |
| "learning_rate": 4.701952457377874e-06, |
| "loss": 1.2197, |
| "step": 496 |
| }, |
| { |
| "epoch": 1.1348057882711349, |
| "grad_norm": 1.066163420677185, |
| "learning_rate": 4.700476107092913e-06, |
| "loss": 1.2156, |
| "step": 497 |
| }, |
| { |
| "epoch": 1.137090632140137, |
| "grad_norm": 1.1297317743301392, |
| "learning_rate": 4.698996342315303e-06, |
| "loss": 1.2064, |
| "step": 498 |
| }, |
| { |
| "epoch": 1.1393754760091395, |
| "grad_norm": 1.069610834121704, |
| "learning_rate": 4.697513165341204e-06, |
| "loss": 1.1986, |
| "step": 499 |
| }, |
| { |
| "epoch": 1.1416603198781416, |
| "grad_norm": 1.0844234228134155, |
| "learning_rate": 4.696026578472073e-06, |
| "loss": 1.1892, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.1439451637471438, |
| "grad_norm": 1.079389214515686, |
| "learning_rate": 4.694536584014653e-06, |
| "loss": 1.201, |
| "step": 501 |
| }, |
| { |
| "epoch": 1.1462300076161462, |
| "grad_norm": 1.075682520866394, |
| "learning_rate": 4.693043184280978e-06, |
| "loss": 1.2261, |
| "step": 502 |
| }, |
| { |
| "epoch": 1.1485148514851484, |
| "grad_norm": 1.0244231224060059, |
| "learning_rate": 4.69154638158837e-06, |
| "loss": 1.2048, |
| "step": 503 |
| }, |
| { |
| "epoch": 1.1507996953541508, |
| "grad_norm": 1.0907280445098877, |
| "learning_rate": 4.690046178259423e-06, |
| "loss": 1.2202, |
| "step": 504 |
| }, |
| { |
| "epoch": 1.153084539223153, |
| "grad_norm": 1.097701907157898, |
| "learning_rate": 4.688542576622013e-06, |
| "loss": 1.1781, |
| "step": 505 |
| }, |
| { |
| "epoch": 1.1553693830921554, |
| "grad_norm": 1.0993037223815918, |
| "learning_rate": 4.687035579009288e-06, |
| "loss": 1.2113, |
| "step": 506 |
| }, |
| { |
| "epoch": 1.1576542269611576, |
| "grad_norm": 1.085300087928772, |
| "learning_rate": 4.685525187759666e-06, |
| "loss": 1.1996, |
| "step": 507 |
| }, |
| { |
| "epoch": 1.15993907083016, |
| "grad_norm": 1.0483977794647217, |
| "learning_rate": 4.684011405216832e-06, |
| "loss": 1.2343, |
| "step": 508 |
| }, |
| { |
| "epoch": 1.1622239146991622, |
| "grad_norm": 1.064441442489624, |
| "learning_rate": 4.682494233729729e-06, |
| "loss": 1.2405, |
| "step": 509 |
| }, |
| { |
| "epoch": 1.1645087585681646, |
| "grad_norm": 1.05643630027771, |
| "learning_rate": 4.680973675652564e-06, |
| "loss": 1.2112, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.1667936024371668, |
| "grad_norm": 1.0908793210983276, |
| "learning_rate": 4.679449733344796e-06, |
| "loss": 1.2077, |
| "step": 511 |
| }, |
| { |
| "epoch": 1.1690784463061692, |
| "grad_norm": 1.1248105764389038, |
| "learning_rate": 4.677922409171136e-06, |
| "loss": 1.1987, |
| "step": 512 |
| }, |
| { |
| "epoch": 1.1713632901751714, |
| "grad_norm": 1.0605143308639526, |
| "learning_rate": 4.6763917055015414e-06, |
| "loss": 1.2027, |
| "step": 513 |
| }, |
| { |
| "epoch": 1.1736481340441736, |
| "grad_norm": 1.0496442317962646, |
| "learning_rate": 4.674857624711216e-06, |
| "loss": 1.2259, |
| "step": 514 |
| }, |
| { |
| "epoch": 1.175932977913176, |
| "grad_norm": 1.1550832986831665, |
| "learning_rate": 4.673320169180601e-06, |
| "loss": 1.2418, |
| "step": 515 |
| }, |
| { |
| "epoch": 1.1782178217821782, |
| "grad_norm": 1.1532083749771118, |
| "learning_rate": 4.671779341295378e-06, |
| "loss": 1.2265, |
| "step": 516 |
| }, |
| { |
| "epoch": 1.1805026656511806, |
| "grad_norm": 1.081101417541504, |
| "learning_rate": 4.670235143446457e-06, |
| "loss": 1.2078, |
| "step": 517 |
| }, |
| { |
| "epoch": 1.1827875095201827, |
| "grad_norm": 1.0701441764831543, |
| "learning_rate": 4.668687578029983e-06, |
| "loss": 1.2252, |
| "step": 518 |
| }, |
| { |
| "epoch": 1.1850723533891852, |
| "grad_norm": 1.0859651565551758, |
| "learning_rate": 4.667136647447319e-06, |
| "loss": 1.2131, |
| "step": 519 |
| }, |
| { |
| "epoch": 1.1873571972581873, |
| "grad_norm": 1.122533917427063, |
| "learning_rate": 4.6655823541050575e-06, |
| "loss": 1.1608, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.1896420411271897, |
| "grad_norm": 1.0594309568405151, |
| "learning_rate": 4.664024700415002e-06, |
| "loss": 1.2151, |
| "step": 521 |
| }, |
| { |
| "epoch": 1.191926884996192, |
| "grad_norm": 1.1281721591949463, |
| "learning_rate": 4.662463688794175e-06, |
| "loss": 1.2197, |
| "step": 522 |
| }, |
| { |
| "epoch": 1.194211728865194, |
| "grad_norm": 1.1049555540084839, |
| "learning_rate": 4.660899321664808e-06, |
| "loss": 1.2416, |
| "step": 523 |
| }, |
| { |
| "epoch": 1.1964965727341965, |
| "grad_norm": 1.1022320985794067, |
| "learning_rate": 4.65933160145434e-06, |
| "loss": 1.1842, |
| "step": 524 |
| }, |
| { |
| "epoch": 1.1987814166031987, |
| "grad_norm": 1.1182475090026855, |
| "learning_rate": 4.657760530595411e-06, |
| "loss": 1.1417, |
| "step": 525 |
| }, |
| { |
| "epoch": 1.201066260472201, |
| "grad_norm": 1.1111788749694824, |
| "learning_rate": 4.656186111525863e-06, |
| "loss": 1.2092, |
| "step": 526 |
| }, |
| { |
| "epoch": 1.2033511043412033, |
| "grad_norm": 1.0595399141311646, |
| "learning_rate": 4.654608346688731e-06, |
| "loss": 1.1549, |
| "step": 527 |
| }, |
| { |
| "epoch": 1.2056359482102057, |
| "grad_norm": 1.1151765584945679, |
| "learning_rate": 4.6530272385322426e-06, |
| "loss": 1.2469, |
| "step": 528 |
| }, |
| { |
| "epoch": 1.2079207920792079, |
| "grad_norm": 1.0334808826446533, |
| "learning_rate": 4.651442789509813e-06, |
| "loss": 1.229, |
| "step": 529 |
| }, |
| { |
| "epoch": 1.2102056359482103, |
| "grad_norm": 1.0765459537506104, |
| "learning_rate": 4.649855002080044e-06, |
| "loss": 1.2163, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.2124904798172125, |
| "grad_norm": 1.1509029865264893, |
| "learning_rate": 4.648263878706712e-06, |
| "loss": 1.2101, |
| "step": 531 |
| }, |
| { |
| "epoch": 1.2147753236862147, |
| "grad_norm": 1.1111629009246826, |
| "learning_rate": 4.646669421858776e-06, |
| "loss": 1.2379, |
| "step": 532 |
| }, |
| { |
| "epoch": 1.217060167555217, |
| "grad_norm": 1.1683619022369385, |
| "learning_rate": 4.645071634010363e-06, |
| "loss": 1.2011, |
| "step": 533 |
| }, |
| { |
| "epoch": 1.2193450114242192, |
| "grad_norm": 1.2373298406600952, |
| "learning_rate": 4.643470517640772e-06, |
| "loss": 1.1502, |
| "step": 534 |
| }, |
| { |
| "epoch": 1.2216298552932217, |
| "grad_norm": 1.080675482749939, |
| "learning_rate": 4.641866075234463e-06, |
| "loss": 1.2173, |
| "step": 535 |
| }, |
| { |
| "epoch": 1.2239146991622238, |
| "grad_norm": 1.0971184968948364, |
| "learning_rate": 4.640258309281062e-06, |
| "loss": 1.2117, |
| "step": 536 |
| }, |
| { |
| "epoch": 1.2261995430312262, |
| "grad_norm": 1.183856725692749, |
| "learning_rate": 4.638647222275349e-06, |
| "loss": 1.2137, |
| "step": 537 |
| }, |
| { |
| "epoch": 1.2284843869002284, |
| "grad_norm": 1.2277085781097412, |
| "learning_rate": 4.637032816717256e-06, |
| "loss": 1.1977, |
| "step": 538 |
| }, |
| { |
| "epoch": 1.2307692307692308, |
| "grad_norm": 1.1087970733642578, |
| "learning_rate": 4.6354150951118676e-06, |
| "loss": 1.2256, |
| "step": 539 |
| }, |
| { |
| "epoch": 1.233054074638233, |
| "grad_norm": 1.0706229209899902, |
| "learning_rate": 4.633794059969413e-06, |
| "loss": 1.2429, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.2353389185072354, |
| "grad_norm": 1.1261042356491089, |
| "learning_rate": 4.632169713805262e-06, |
| "loss": 1.219, |
| "step": 541 |
| }, |
| { |
| "epoch": 1.2376237623762376, |
| "grad_norm": 1.1817506551742554, |
| "learning_rate": 4.630542059139923e-06, |
| "loss": 1.2367, |
| "step": 542 |
| }, |
| { |
| "epoch": 1.23990860624524, |
| "grad_norm": 1.1145075559616089, |
| "learning_rate": 4.628911098499039e-06, |
| "loss": 1.2029, |
| "step": 543 |
| }, |
| { |
| "epoch": 1.2421934501142422, |
| "grad_norm": 1.1309086084365845, |
| "learning_rate": 4.62727683441338e-06, |
| "loss": 1.2374, |
| "step": 544 |
| }, |
| { |
| "epoch": 1.2444782939832444, |
| "grad_norm": 1.0949103832244873, |
| "learning_rate": 4.6256392694188445e-06, |
| "loss": 1.2204, |
| "step": 545 |
| }, |
| { |
| "epoch": 1.2467631378522468, |
| "grad_norm": 1.2004865407943726, |
| "learning_rate": 4.6239984060564535e-06, |
| "loss": 1.2327, |
| "step": 546 |
| }, |
| { |
| "epoch": 1.249047981721249, |
| "grad_norm": 1.286232829093933, |
| "learning_rate": 4.622354246872344e-06, |
| "loss": 1.1838, |
| "step": 547 |
| }, |
| { |
| "epoch": 1.2513328255902514, |
| "grad_norm": 1.0974533557891846, |
| "learning_rate": 4.620706794417769e-06, |
| "loss": 1.1678, |
| "step": 548 |
| }, |
| { |
| "epoch": 1.2536176694592536, |
| "grad_norm": 1.0960924625396729, |
| "learning_rate": 4.61905605124909e-06, |
| "loss": 1.2314, |
| "step": 549 |
| }, |
| { |
| "epoch": 1.255902513328256, |
| "grad_norm": 1.1535454988479614, |
| "learning_rate": 4.617402019927776e-06, |
| "loss": 1.1928, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.2581873571972582, |
| "grad_norm": 1.2693071365356445, |
| "learning_rate": 4.615744703020396e-06, |
| "loss": 1.1966, |
| "step": 551 |
| }, |
| { |
| "epoch": 1.2604722010662606, |
| "grad_norm": 1.1645997762680054, |
| "learning_rate": 4.614084103098623e-06, |
| "loss": 1.2251, |
| "step": 552 |
| }, |
| { |
| "epoch": 1.2627570449352628, |
| "grad_norm": 1.1186461448669434, |
| "learning_rate": 4.6124202227392175e-06, |
| "loss": 1.2037, |
| "step": 553 |
| }, |
| { |
| "epoch": 1.265041888804265, |
| "grad_norm": 1.1100102663040161, |
| "learning_rate": 4.610753064524034e-06, |
| "loss": 1.2011, |
| "step": 554 |
| }, |
| { |
| "epoch": 1.2673267326732673, |
| "grad_norm": 1.1173806190490723, |
| "learning_rate": 4.609082631040012e-06, |
| "loss": 1.1871, |
| "step": 555 |
| }, |
| { |
| "epoch": 1.2696115765422697, |
| "grad_norm": 1.1128157377243042, |
| "learning_rate": 4.6074089248791735e-06, |
| "loss": 1.1965, |
| "step": 556 |
| }, |
| { |
| "epoch": 1.271896420411272, |
| "grad_norm": 1.0940717458724976, |
| "learning_rate": 4.60573194863862e-06, |
| "loss": 1.193, |
| "step": 557 |
| }, |
| { |
| "epoch": 1.2741812642802741, |
| "grad_norm": 1.0955843925476074, |
| "learning_rate": 4.604051704920526e-06, |
| "loss": 1.187, |
| "step": 558 |
| }, |
| { |
| "epoch": 1.2764661081492765, |
| "grad_norm": 1.0802319049835205, |
| "learning_rate": 4.602368196332134e-06, |
| "loss": 1.1753, |
| "step": 559 |
| }, |
| { |
| "epoch": 1.2787509520182787, |
| "grad_norm": 1.1677578687667847, |
| "learning_rate": 4.600681425485757e-06, |
| "loss": 1.1964, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.2810357958872811, |
| "grad_norm": 1.0803853273391724, |
| "learning_rate": 4.598991394998768e-06, |
| "loss": 1.2052, |
| "step": 561 |
| }, |
| { |
| "epoch": 1.2833206397562833, |
| "grad_norm": 1.128667950630188, |
| "learning_rate": 4.5972981074935975e-06, |
| "loss": 1.1774, |
| "step": 562 |
| }, |
| { |
| "epoch": 1.2856054836252855, |
| "grad_norm": 1.0685629844665527, |
| "learning_rate": 4.59560156559773e-06, |
| "loss": 1.1897, |
| "step": 563 |
| }, |
| { |
| "epoch": 1.287890327494288, |
| "grad_norm": 1.1464303731918335, |
| "learning_rate": 4.593901771943702e-06, |
| "loss": 1.1809, |
| "step": 564 |
| }, |
| { |
| "epoch": 1.2901751713632903, |
| "grad_norm": 1.1095281839370728, |
| "learning_rate": 4.592198729169091e-06, |
| "loss": 1.2118, |
| "step": 565 |
| }, |
| { |
| "epoch": 1.2924600152322925, |
| "grad_norm": 1.0897274017333984, |
| "learning_rate": 4.5904924399165215e-06, |
| "loss": 1.177, |
| "step": 566 |
| }, |
| { |
| "epoch": 1.2947448591012947, |
| "grad_norm": 1.0702495574951172, |
| "learning_rate": 4.588782906833653e-06, |
| "loss": 1.1872, |
| "step": 567 |
| }, |
| { |
| "epoch": 1.297029702970297, |
| "grad_norm": 1.0990184545516968, |
| "learning_rate": 4.587070132573178e-06, |
| "loss": 1.1903, |
| "step": 568 |
| }, |
| { |
| "epoch": 1.2993145468392993, |
| "grad_norm": 1.121097207069397, |
| "learning_rate": 4.58535411979282e-06, |
| "loss": 1.2469, |
| "step": 569 |
| }, |
| { |
| "epoch": 1.3015993907083017, |
| "grad_norm": 1.0787534713745117, |
| "learning_rate": 4.583634871155326e-06, |
| "loss": 1.1995, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.3038842345773038, |
| "grad_norm": 1.0721417665481567, |
| "learning_rate": 4.581912389328466e-06, |
| "loss": 1.1889, |
| "step": 571 |
| }, |
| { |
| "epoch": 1.306169078446306, |
| "grad_norm": 1.1017696857452393, |
| "learning_rate": 4.580186676985024e-06, |
| "loss": 1.2133, |
| "step": 572 |
| }, |
| { |
| "epoch": 1.3084539223153084, |
| "grad_norm": 1.1040468215942383, |
| "learning_rate": 4.578457736802801e-06, |
| "loss": 1.1894, |
| "step": 573 |
| }, |
| { |
| "epoch": 1.3107387661843108, |
| "grad_norm": 1.0856465101242065, |
| "learning_rate": 4.576725571464604e-06, |
| "loss": 1.2234, |
| "step": 574 |
| }, |
| { |
| "epoch": 1.313023610053313, |
| "grad_norm": 1.0786073207855225, |
| "learning_rate": 4.574990183658244e-06, |
| "loss": 1.1989, |
| "step": 575 |
| }, |
| { |
| "epoch": 1.3153084539223152, |
| "grad_norm": 1.0701881647109985, |
| "learning_rate": 4.573251576076532e-06, |
| "loss": 1.2095, |
| "step": 576 |
| }, |
| { |
| "epoch": 1.3175932977913176, |
| "grad_norm": 1.0697689056396484, |
| "learning_rate": 4.5715097514172794e-06, |
| "loss": 1.2198, |
| "step": 577 |
| }, |
| { |
| "epoch": 1.3198781416603198, |
| "grad_norm": 1.1303515434265137, |
| "learning_rate": 4.569764712383284e-06, |
| "loss": 1.2456, |
| "step": 578 |
| }, |
| { |
| "epoch": 1.3221629855293222, |
| "grad_norm": 1.1471296548843384, |
| "learning_rate": 4.5680164616823355e-06, |
| "loss": 1.2155, |
| "step": 579 |
| }, |
| { |
| "epoch": 1.3244478293983244, |
| "grad_norm": 1.0679783821105957, |
| "learning_rate": 4.566265002027204e-06, |
| "loss": 1.2346, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.3267326732673268, |
| "grad_norm": 1.087063193321228, |
| "learning_rate": 4.564510336135642e-06, |
| "loss": 1.1735, |
| "step": 581 |
| }, |
| { |
| "epoch": 1.329017517136329, |
| "grad_norm": 1.190617322921753, |
| "learning_rate": 4.562752466730374e-06, |
| "loss": 1.2472, |
| "step": 582 |
| }, |
| { |
| "epoch": 1.3313023610053314, |
| "grad_norm": 1.0759129524230957, |
| "learning_rate": 4.560991396539099e-06, |
| "loss": 1.2263, |
| "step": 583 |
| }, |
| { |
| "epoch": 1.3335872048743336, |
| "grad_norm": 1.080640196800232, |
| "learning_rate": 4.559227128294479e-06, |
| "loss": 1.1773, |
| "step": 584 |
| }, |
| { |
| "epoch": 1.3358720487433358, |
| "grad_norm": 1.0868074893951416, |
| "learning_rate": 4.5574596647341414e-06, |
| "loss": 1.254, |
| "step": 585 |
| }, |
| { |
| "epoch": 1.3381568926123382, |
| "grad_norm": 1.0621445178985596, |
| "learning_rate": 4.55568900860067e-06, |
| "loss": 1.2091, |
| "step": 586 |
| }, |
| { |
| "epoch": 1.3404417364813406, |
| "grad_norm": 1.1124675273895264, |
| "learning_rate": 4.553915162641602e-06, |
| "loss": 1.2093, |
| "step": 587 |
| }, |
| { |
| "epoch": 1.3427265803503428, |
| "grad_norm": 1.0877987146377563, |
| "learning_rate": 4.552138129609428e-06, |
| "loss": 1.2399, |
| "step": 588 |
| }, |
| { |
| "epoch": 1.345011424219345, |
| "grad_norm": 1.1441737413406372, |
| "learning_rate": 4.550357912261579e-06, |
| "loss": 1.2274, |
| "step": 589 |
| }, |
| { |
| "epoch": 1.3472962680883473, |
| "grad_norm": 1.131813645362854, |
| "learning_rate": 4.548574513360431e-06, |
| "loss": 1.2296, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.3495811119573495, |
| "grad_norm": 1.0901340246200562, |
| "learning_rate": 4.546787935673294e-06, |
| "loss": 1.2151, |
| "step": 591 |
| }, |
| { |
| "epoch": 1.351865955826352, |
| "grad_norm": 1.1450812816619873, |
| "learning_rate": 4.544998181972412e-06, |
| "loss": 1.2054, |
| "step": 592 |
| }, |
| { |
| "epoch": 1.3541507996953541, |
| "grad_norm": 1.0988374948501587, |
| "learning_rate": 4.543205255034958e-06, |
| "loss": 1.2133, |
| "step": 593 |
| }, |
| { |
| "epoch": 1.3564356435643563, |
| "grad_norm": 1.0787577629089355, |
| "learning_rate": 4.541409157643027e-06, |
| "loss": 1.1972, |
| "step": 594 |
| }, |
| { |
| "epoch": 1.3587204874333587, |
| "grad_norm": 1.0694243907928467, |
| "learning_rate": 4.539609892583637e-06, |
| "loss": 1.2182, |
| "step": 595 |
| }, |
| { |
| "epoch": 1.3610053313023611, |
| "grad_norm": 1.065865397453308, |
| "learning_rate": 4.537807462648716e-06, |
| "loss": 1.2057, |
| "step": 596 |
| }, |
| { |
| "epoch": 1.3632901751713633, |
| "grad_norm": 1.0816274881362915, |
| "learning_rate": 4.5360018706351075e-06, |
| "loss": 1.1846, |
| "step": 597 |
| }, |
| { |
| "epoch": 1.3655750190403655, |
| "grad_norm": 1.105301022529602, |
| "learning_rate": 4.5341931193445585e-06, |
| "loss": 1.2219, |
| "step": 598 |
| }, |
| { |
| "epoch": 1.367859862909368, |
| "grad_norm": 1.1194454431533813, |
| "learning_rate": 4.5323812115837215e-06, |
| "loss": 1.2021, |
| "step": 599 |
| }, |
| { |
| "epoch": 1.37014470677837, |
| "grad_norm": 1.0899710655212402, |
| "learning_rate": 4.530566150164145e-06, |
| "loss": 1.173, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.3724295506473725, |
| "grad_norm": 1.0824511051177979, |
| "learning_rate": 4.528747937902271e-06, |
| "loss": 1.2131, |
| "step": 601 |
| }, |
| { |
| "epoch": 1.3747143945163747, |
| "grad_norm": 1.0796427726745605, |
| "learning_rate": 4.52692657761943e-06, |
| "loss": 1.1911, |
| "step": 602 |
| }, |
| { |
| "epoch": 1.376999238385377, |
| "grad_norm": 1.1047371625900269, |
| "learning_rate": 4.525102072141839e-06, |
| "loss": 1.1734, |
| "step": 603 |
| }, |
| { |
| "epoch": 1.3792840822543793, |
| "grad_norm": 1.1101821660995483, |
| "learning_rate": 4.523274424300596e-06, |
| "loss": 1.2274, |
| "step": 604 |
| }, |
| { |
| "epoch": 1.3815689261233817, |
| "grad_norm": 1.1410820484161377, |
| "learning_rate": 4.521443636931671e-06, |
| "loss": 1.2, |
| "step": 605 |
| }, |
| { |
| "epoch": 1.3838537699923839, |
| "grad_norm": 1.0687155723571777, |
| "learning_rate": 4.5196097128759095e-06, |
| "loss": 1.2028, |
| "step": 606 |
| }, |
| { |
| "epoch": 1.386138613861386, |
| "grad_norm": 1.0923937559127808, |
| "learning_rate": 4.517772654979024e-06, |
| "loss": 1.2522, |
| "step": 607 |
| }, |
| { |
| "epoch": 1.3884234577303884, |
| "grad_norm": 1.1132218837738037, |
| "learning_rate": 4.515932466091587e-06, |
| "loss": 1.1797, |
| "step": 608 |
| }, |
| { |
| "epoch": 1.3907083015993906, |
| "grad_norm": 1.182809591293335, |
| "learning_rate": 4.514089149069033e-06, |
| "loss": 1.1885, |
| "step": 609 |
| }, |
| { |
| "epoch": 1.392993145468393, |
| "grad_norm": 1.064723253250122, |
| "learning_rate": 4.512242706771647e-06, |
| "loss": 1.174, |
| "step": 610 |
| }, |
| { |
| "epoch": 1.3952779893373952, |
| "grad_norm": 1.1065499782562256, |
| "learning_rate": 4.510393142064567e-06, |
| "loss": 1.1919, |
| "step": 611 |
| }, |
| { |
| "epoch": 1.3975628332063976, |
| "grad_norm": 1.0989713668823242, |
| "learning_rate": 4.508540457817772e-06, |
| "loss": 1.1835, |
| "step": 612 |
| }, |
| { |
| "epoch": 1.3998476770753998, |
| "grad_norm": 1.0850595235824585, |
| "learning_rate": 4.506684656906085e-06, |
| "loss": 1.1945, |
| "step": 613 |
| }, |
| { |
| "epoch": 1.4021325209444022, |
| "grad_norm": 1.1323665380477905, |
| "learning_rate": 4.5048257422091655e-06, |
| "loss": 1.209, |
| "step": 614 |
| }, |
| { |
| "epoch": 1.4044173648134044, |
| "grad_norm": 1.1112160682678223, |
| "learning_rate": 4.5029637166115e-06, |
| "loss": 1.1742, |
| "step": 615 |
| }, |
| { |
| "epoch": 1.4067022086824066, |
| "grad_norm": 1.1052254438400269, |
| "learning_rate": 4.5010985830024086e-06, |
| "loss": 1.1916, |
| "step": 616 |
| }, |
| { |
| "epoch": 1.408987052551409, |
| "grad_norm": 1.0695083141326904, |
| "learning_rate": 4.4992303442760286e-06, |
| "loss": 1.1829, |
| "step": 617 |
| }, |
| { |
| "epoch": 1.4112718964204114, |
| "grad_norm": 1.0871409177780151, |
| "learning_rate": 4.497359003331318e-06, |
| "loss": 1.2053, |
| "step": 618 |
| }, |
| { |
| "epoch": 1.4135567402894136, |
| "grad_norm": 1.093496322631836, |
| "learning_rate": 4.495484563072049e-06, |
| "loss": 1.1825, |
| "step": 619 |
| }, |
| { |
| "epoch": 1.4158415841584158, |
| "grad_norm": 1.0716845989227295, |
| "learning_rate": 4.493607026406802e-06, |
| "loss": 1.1911, |
| "step": 620 |
| }, |
| { |
| "epoch": 1.4181264280274182, |
| "grad_norm": 1.1274534463882446, |
| "learning_rate": 4.4917263962489635e-06, |
| "loss": 1.1737, |
| "step": 621 |
| }, |
| { |
| "epoch": 1.4204112718964204, |
| "grad_norm": 1.082309603691101, |
| "learning_rate": 4.489842675516718e-06, |
| "loss": 1.1986, |
| "step": 622 |
| }, |
| { |
| "epoch": 1.4226961157654228, |
| "grad_norm": 1.0890616178512573, |
| "learning_rate": 4.487955867133047e-06, |
| "loss": 1.2273, |
| "step": 623 |
| }, |
| { |
| "epoch": 1.424980959634425, |
| "grad_norm": 1.0633172988891602, |
| "learning_rate": 4.486065974025723e-06, |
| "loss": 1.1834, |
| "step": 624 |
| }, |
| { |
| "epoch": 1.4272658035034271, |
| "grad_norm": 1.0931994915008545, |
| "learning_rate": 4.484172999127305e-06, |
| "loss": 1.1976, |
| "step": 625 |
| }, |
| { |
| "epoch": 1.4295506473724295, |
| "grad_norm": 1.1375906467437744, |
| "learning_rate": 4.482276945375135e-06, |
| "loss": 1.2093, |
| "step": 626 |
| }, |
| { |
| "epoch": 1.431835491241432, |
| "grad_norm": 1.3243980407714844, |
| "learning_rate": 4.480377815711331e-06, |
| "loss": 1.2102, |
| "step": 627 |
| }, |
| { |
| "epoch": 1.4341203351104341, |
| "grad_norm": 1.0940284729003906, |
| "learning_rate": 4.478475613082783e-06, |
| "loss": 1.1888, |
| "step": 628 |
| }, |
| { |
| "epoch": 1.4364051789794363, |
| "grad_norm": 1.1363506317138672, |
| "learning_rate": 4.4765703404411534e-06, |
| "loss": 1.1833, |
| "step": 629 |
| }, |
| { |
| "epoch": 1.4386900228484387, |
| "grad_norm": 1.1287343502044678, |
| "learning_rate": 4.474662000742864e-06, |
| "loss": 1.2344, |
| "step": 630 |
| }, |
| { |
| "epoch": 1.440974866717441, |
| "grad_norm": 1.3280748128890991, |
| "learning_rate": 4.472750596949098e-06, |
| "loss": 1.2025, |
| "step": 631 |
| }, |
| { |
| "epoch": 1.4432597105864433, |
| "grad_norm": 1.1119890213012695, |
| "learning_rate": 4.470836132025793e-06, |
| "loss": 1.1586, |
| "step": 632 |
| }, |
| { |
| "epoch": 1.4455445544554455, |
| "grad_norm": 1.066416621208191, |
| "learning_rate": 4.4689186089436365e-06, |
| "loss": 1.1717, |
| "step": 633 |
| }, |
| { |
| "epoch": 1.447829398324448, |
| "grad_norm": 1.0481845140457153, |
| "learning_rate": 4.4669980306780605e-06, |
| "loss": 1.1949, |
| "step": 634 |
| }, |
| { |
| "epoch": 1.45011424219345, |
| "grad_norm": 1.094254732131958, |
| "learning_rate": 4.4650744002092384e-06, |
| "loss": 1.2005, |
| "step": 635 |
| }, |
| { |
| "epoch": 1.4523990860624525, |
| "grad_norm": 1.1029901504516602, |
| "learning_rate": 4.46314772052208e-06, |
| "loss": 1.1956, |
| "step": 636 |
| }, |
| { |
| "epoch": 1.4546839299314547, |
| "grad_norm": 1.129492163658142, |
| "learning_rate": 4.461217994606225e-06, |
| "loss": 1.2053, |
| "step": 637 |
| }, |
| { |
| "epoch": 1.4569687738004569, |
| "grad_norm": 1.1537097692489624, |
| "learning_rate": 4.459285225456044e-06, |
| "loss": 1.1668, |
| "step": 638 |
| }, |
| { |
| "epoch": 1.4592536176694593, |
| "grad_norm": 1.0732276439666748, |
| "learning_rate": 4.457349416070626e-06, |
| "loss": 1.2107, |
| "step": 639 |
| }, |
| { |
| "epoch": 1.4615384615384617, |
| "grad_norm": 1.186018943786621, |
| "learning_rate": 4.455410569453777e-06, |
| "loss": 1.1789, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.4638233054074639, |
| "grad_norm": 1.1642903089523315, |
| "learning_rate": 4.453468688614019e-06, |
| "loss": 1.2169, |
| "step": 641 |
| }, |
| { |
| "epoch": 1.466108149276466, |
| "grad_norm": 1.1415472030639648, |
| "learning_rate": 4.451523776564581e-06, |
| "loss": 1.1716, |
| "step": 642 |
| }, |
| { |
| "epoch": 1.4683929931454685, |
| "grad_norm": 1.1644552946090698, |
| "learning_rate": 4.449575836323394e-06, |
| "loss": 1.1497, |
| "step": 643 |
| }, |
| { |
| "epoch": 1.4706778370144706, |
| "grad_norm": 1.2200912237167358, |
| "learning_rate": 4.447624870913091e-06, |
| "loss": 1.2289, |
| "step": 644 |
| }, |
| { |
| "epoch": 1.472962680883473, |
| "grad_norm": 1.1080158948898315, |
| "learning_rate": 4.445670883360996e-06, |
| "loss": 1.1378, |
| "step": 645 |
| }, |
| { |
| "epoch": 1.4752475247524752, |
| "grad_norm": 1.1372804641723633, |
| "learning_rate": 4.443713876699124e-06, |
| "loss": 1.1639, |
| "step": 646 |
| }, |
| { |
| "epoch": 1.4775323686214774, |
| "grad_norm": 1.1383754014968872, |
| "learning_rate": 4.441753853964174e-06, |
| "loss": 1.1558, |
| "step": 647 |
| }, |
| { |
| "epoch": 1.4798172124904798, |
| "grad_norm": 1.1565297842025757, |
| "learning_rate": 4.439790818197527e-06, |
| "loss": 1.242, |
| "step": 648 |
| }, |
| { |
| "epoch": 1.4821020563594822, |
| "grad_norm": 1.156384825706482, |
| "learning_rate": 4.4378247724452375e-06, |
| "loss": 1.2241, |
| "step": 649 |
| }, |
| { |
| "epoch": 1.4843869002284844, |
| "grad_norm": 1.2158401012420654, |
| "learning_rate": 4.43585571975803e-06, |
| "loss": 1.1977, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.4866717440974866, |
| "grad_norm": 1.2885035276412964, |
| "learning_rate": 4.433883663191297e-06, |
| "loss": 1.1916, |
| "step": 651 |
| }, |
| { |
| "epoch": 1.488956587966489, |
| "grad_norm": 1.1530733108520508, |
| "learning_rate": 4.431908605805092e-06, |
| "loss": 1.2362, |
| "step": 652 |
| }, |
| { |
| "epoch": 1.4912414318354912, |
| "grad_norm": 1.1524220705032349, |
| "learning_rate": 4.429930550664121e-06, |
| "loss": 1.2263, |
| "step": 653 |
| }, |
| { |
| "epoch": 1.4935262757044936, |
| "grad_norm": 1.1547584533691406, |
| "learning_rate": 4.427949500837749e-06, |
| "loss": 1.1478, |
| "step": 654 |
| }, |
| { |
| "epoch": 1.4958111195734958, |
| "grad_norm": 1.1083858013153076, |
| "learning_rate": 4.425965459399979e-06, |
| "loss": 1.2183, |
| "step": 655 |
| }, |
| { |
| "epoch": 1.498095963442498, |
| "grad_norm": 1.130506992340088, |
| "learning_rate": 4.423978429429463e-06, |
| "loss": 1.1923, |
| "step": 656 |
| }, |
| { |
| "epoch": 1.5003808073115004, |
| "grad_norm": 1.106553077697754, |
| "learning_rate": 4.421988414009488e-06, |
| "loss": 1.192, |
| "step": 657 |
| }, |
| { |
| "epoch": 1.5026656511805028, |
| "grad_norm": 1.186131477355957, |
| "learning_rate": 4.419995416227973e-06, |
| "loss": 1.19, |
| "step": 658 |
| }, |
| { |
| "epoch": 1.504950495049505, |
| "grad_norm": 1.0756375789642334, |
| "learning_rate": 4.417999439177465e-06, |
| "loss": 1.1992, |
| "step": 659 |
| }, |
| { |
| "epoch": 1.5072353389185071, |
| "grad_norm": 1.1160579919815063, |
| "learning_rate": 4.416000485955135e-06, |
| "loss": 1.1747, |
| "step": 660 |
| }, |
| { |
| "epoch": 1.5095201827875095, |
| "grad_norm": 1.1178874969482422, |
| "learning_rate": 4.413998559662771e-06, |
| "loss": 1.1654, |
| "step": 661 |
| }, |
| { |
| "epoch": 1.511805026656512, |
| "grad_norm": 1.0919098854064941, |
| "learning_rate": 4.411993663406774e-06, |
| "loss": 1.2013, |
| "step": 662 |
| }, |
| { |
| "epoch": 1.5140898705255141, |
| "grad_norm": 1.058582067489624, |
| "learning_rate": 4.409985800298155e-06, |
| "loss": 1.1823, |
| "step": 663 |
| }, |
| { |
| "epoch": 1.5163747143945163, |
| "grad_norm": 1.0792784690856934, |
| "learning_rate": 4.407974973452527e-06, |
| "loss": 1.2013, |
| "step": 664 |
| }, |
| { |
| "epoch": 1.5186595582635185, |
| "grad_norm": 1.112774133682251, |
| "learning_rate": 4.405961185990103e-06, |
| "loss": 1.2005, |
| "step": 665 |
| }, |
| { |
| "epoch": 1.520944402132521, |
| "grad_norm": 1.1190800666809082, |
| "learning_rate": 4.403944441035691e-06, |
| "loss": 1.2146, |
| "step": 666 |
| }, |
| { |
| "epoch": 1.5232292460015233, |
| "grad_norm": 1.1045669317245483, |
| "learning_rate": 4.401924741718685e-06, |
| "loss": 1.2217, |
| "step": 667 |
| }, |
| { |
| "epoch": 1.5255140898705255, |
| "grad_norm": 1.1048752069473267, |
| "learning_rate": 4.399902091173065e-06, |
| "loss": 1.1944, |
| "step": 668 |
| }, |
| { |
| "epoch": 1.5277989337395277, |
| "grad_norm": 1.0909706354141235, |
| "learning_rate": 4.397876492537392e-06, |
| "loss": 1.2058, |
| "step": 669 |
| }, |
| { |
| "epoch": 1.53008377760853, |
| "grad_norm": 1.1354328393936157, |
| "learning_rate": 4.3958479489548e-06, |
| "loss": 1.2164, |
| "step": 670 |
| }, |
| { |
| "epoch": 1.5323686214775325, |
| "grad_norm": 1.1490201950073242, |
| "learning_rate": 4.393816463572993e-06, |
| "loss": 1.182, |
| "step": 671 |
| }, |
| { |
| "epoch": 1.5346534653465347, |
| "grad_norm": 1.1395319700241089, |
| "learning_rate": 4.391782039544239e-06, |
| "loss": 1.2201, |
| "step": 672 |
| }, |
| { |
| "epoch": 1.5369383092155369, |
| "grad_norm": 1.0788644552230835, |
| "learning_rate": 4.389744680025366e-06, |
| "loss": 1.2212, |
| "step": 673 |
| }, |
| { |
| "epoch": 1.5392231530845393, |
| "grad_norm": 1.0663102865219116, |
| "learning_rate": 4.387704388177759e-06, |
| "loss": 1.1872, |
| "step": 674 |
| }, |
| { |
| "epoch": 1.5415079969535415, |
| "grad_norm": 1.1177600622177124, |
| "learning_rate": 4.3856611671673505e-06, |
| "loss": 1.2032, |
| "step": 675 |
| }, |
| { |
| "epoch": 1.5437928408225439, |
| "grad_norm": 1.1109418869018555, |
| "learning_rate": 4.383615020164621e-06, |
| "loss": 1.2041, |
| "step": 676 |
| }, |
| { |
| "epoch": 1.546077684691546, |
| "grad_norm": 1.096182107925415, |
| "learning_rate": 4.3815659503445875e-06, |
| "loss": 1.1988, |
| "step": 677 |
| }, |
| { |
| "epoch": 1.5483625285605482, |
| "grad_norm": 1.2027829885482788, |
| "learning_rate": 4.379513960886807e-06, |
| "loss": 1.1812, |
| "step": 678 |
| }, |
| { |
| "epoch": 1.5506473724295506, |
| "grad_norm": 1.0674421787261963, |
| "learning_rate": 4.377459054975363e-06, |
| "loss": 1.1948, |
| "step": 679 |
| }, |
| { |
| "epoch": 1.552932216298553, |
| "grad_norm": 1.0463448762893677, |
| "learning_rate": 4.375401235798866e-06, |
| "loss": 1.2174, |
| "step": 680 |
| }, |
| { |
| "epoch": 1.5552170601675552, |
| "grad_norm": 1.1295356750488281, |
| "learning_rate": 4.373340506550447e-06, |
| "loss": 1.2013, |
| "step": 681 |
| }, |
| { |
| "epoch": 1.5575019040365574, |
| "grad_norm": 1.116245150566101, |
| "learning_rate": 4.3712768704277535e-06, |
| "loss": 1.1983, |
| "step": 682 |
| }, |
| { |
| "epoch": 1.5597867479055598, |
| "grad_norm": 1.1043322086334229, |
| "learning_rate": 4.369210330632942e-06, |
| "loss": 1.2042, |
| "step": 683 |
| }, |
| { |
| "epoch": 1.5620715917745622, |
| "grad_norm": 1.1363909244537354, |
| "learning_rate": 4.367140890372674e-06, |
| "loss": 1.1793, |
| "step": 684 |
| }, |
| { |
| "epoch": 1.5643564356435644, |
| "grad_norm": 1.099576473236084, |
| "learning_rate": 4.365068552858116e-06, |
| "loss": 1.1849, |
| "step": 685 |
| }, |
| { |
| "epoch": 1.5666412795125666, |
| "grad_norm": 1.0956041812896729, |
| "learning_rate": 4.3629933213049245e-06, |
| "loss": 1.169, |
| "step": 686 |
| }, |
| { |
| "epoch": 1.5689261233815688, |
| "grad_norm": 1.1022474765777588, |
| "learning_rate": 4.36091519893325e-06, |
| "loss": 1.2376, |
| "step": 687 |
| }, |
| { |
| "epoch": 1.5712109672505712, |
| "grad_norm": 1.1013610363006592, |
| "learning_rate": 4.35883418896773e-06, |
| "loss": 1.1665, |
| "step": 688 |
| }, |
| { |
| "epoch": 1.5734958111195736, |
| "grad_norm": 1.1273926496505737, |
| "learning_rate": 4.356750294637478e-06, |
| "loss": 1.1723, |
| "step": 689 |
| }, |
| { |
| "epoch": 1.5757806549885758, |
| "grad_norm": 1.1341313123703003, |
| "learning_rate": 4.3546635191760875e-06, |
| "loss": 1.1813, |
| "step": 690 |
| }, |
| { |
| "epoch": 1.578065498857578, |
| "grad_norm": 1.0935291051864624, |
| "learning_rate": 4.352573865821621e-06, |
| "loss": 1.1932, |
| "step": 691 |
| }, |
| { |
| "epoch": 1.5803503427265804, |
| "grad_norm": 1.189144492149353, |
| "learning_rate": 4.350481337816606e-06, |
| "loss": 1.1798, |
| "step": 692 |
| }, |
| { |
| "epoch": 1.5826351865955828, |
| "grad_norm": 1.2104555368423462, |
| "learning_rate": 4.348385938408033e-06, |
| "loss": 1.1895, |
| "step": 693 |
| }, |
| { |
| "epoch": 1.584920030464585, |
| "grad_norm": 1.1346546411514282, |
| "learning_rate": 4.346287670847345e-06, |
| "loss": 1.1896, |
| "step": 694 |
| }, |
| { |
| "epoch": 1.5872048743335871, |
| "grad_norm": 1.131858468055725, |
| "learning_rate": 4.344186538390438e-06, |
| "loss": 1.1895, |
| "step": 695 |
| }, |
| { |
| "epoch": 1.5894897182025893, |
| "grad_norm": 1.1175106763839722, |
| "learning_rate": 4.342082544297652e-06, |
| "loss": 1.2215, |
| "step": 696 |
| }, |
| { |
| "epoch": 1.5917745620715917, |
| "grad_norm": 1.1056832075119019, |
| "learning_rate": 4.3399756918337675e-06, |
| "loss": 1.1617, |
| "step": 697 |
| }, |
| { |
| "epoch": 1.5940594059405941, |
| "grad_norm": 1.0777372121810913, |
| "learning_rate": 4.337865984268002e-06, |
| "loss": 1.2716, |
| "step": 698 |
| }, |
| { |
| "epoch": 1.5963442498095963, |
| "grad_norm": 1.1472179889678955, |
| "learning_rate": 4.335753424874e-06, |
| "loss": 1.1842, |
| "step": 699 |
| }, |
| { |
| "epoch": 1.5986290936785985, |
| "grad_norm": 1.0825904607772827, |
| "learning_rate": 4.333638016929835e-06, |
| "loss": 1.2377, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.600913937547601, |
| "grad_norm": 1.0884586572647095, |
| "learning_rate": 4.331519763717998e-06, |
| "loss": 1.2003, |
| "step": 701 |
| }, |
| { |
| "epoch": 1.6031987814166033, |
| "grad_norm": 1.132827639579773, |
| "learning_rate": 4.329398668525396e-06, |
| "loss": 1.186, |
| "step": 702 |
| }, |
| { |
| "epoch": 1.6054836252856055, |
| "grad_norm": 1.1931272745132446, |
| "learning_rate": 4.327274734643346e-06, |
| "loss": 1.21, |
| "step": 703 |
| }, |
| { |
| "epoch": 1.6077684691546077, |
| "grad_norm": 1.060774803161621, |
| "learning_rate": 4.3251479653675705e-06, |
| "loss": 1.1893, |
| "step": 704 |
| }, |
| { |
| "epoch": 1.61005331302361, |
| "grad_norm": 1.0615568161010742, |
| "learning_rate": 4.323018363998189e-06, |
| "loss": 1.1814, |
| "step": 705 |
| }, |
| { |
| "epoch": 1.6123381568926123, |
| "grad_norm": 1.0800261497497559, |
| "learning_rate": 4.320885933839718e-06, |
| "loss": 1.1995, |
| "step": 706 |
| }, |
| { |
| "epoch": 1.6146230007616147, |
| "grad_norm": 1.1502355337142944, |
| "learning_rate": 4.318750678201064e-06, |
| "loss": 1.219, |
| "step": 707 |
| }, |
| { |
| "epoch": 1.6169078446306169, |
| "grad_norm": 1.07515287399292, |
| "learning_rate": 4.316612600395515e-06, |
| "loss": 1.1787, |
| "step": 708 |
| }, |
| { |
| "epoch": 1.619192688499619, |
| "grad_norm": 1.075150728225708, |
| "learning_rate": 4.31447170374074e-06, |
| "loss": 1.2062, |
| "step": 709 |
| }, |
| { |
| "epoch": 1.6214775323686215, |
| "grad_norm": 1.1458789110183716, |
| "learning_rate": 4.312327991558782e-06, |
| "loss": 1.2009, |
| "step": 710 |
| }, |
| { |
| "epoch": 1.6237623762376239, |
| "grad_norm": 1.1655830144882202, |
| "learning_rate": 4.3101814671760546e-06, |
| "loss": 1.2004, |
| "step": 711 |
| }, |
| { |
| "epoch": 1.626047220106626, |
| "grad_norm": 1.1394225358963013, |
| "learning_rate": 4.30803213392333e-06, |
| "loss": 1.2059, |
| "step": 712 |
| }, |
| { |
| "epoch": 1.6283320639756282, |
| "grad_norm": 1.109095573425293, |
| "learning_rate": 4.305879995135745e-06, |
| "loss": 1.1727, |
| "step": 713 |
| }, |
| { |
| "epoch": 1.6306169078446306, |
| "grad_norm": 1.072530746459961, |
| "learning_rate": 4.303725054152785e-06, |
| "loss": 1.2059, |
| "step": 714 |
| }, |
| { |
| "epoch": 1.632901751713633, |
| "grad_norm": 1.079399824142456, |
| "learning_rate": 4.3015673143182864e-06, |
| "loss": 1.1929, |
| "step": 715 |
| }, |
| { |
| "epoch": 1.6351865955826352, |
| "grad_norm": 1.1387250423431396, |
| "learning_rate": 4.299406778980428e-06, |
| "loss": 1.1924, |
| "step": 716 |
| }, |
| { |
| "epoch": 1.6374714394516374, |
| "grad_norm": 1.1101268529891968, |
| "learning_rate": 4.297243451491724e-06, |
| "loss": 1.1678, |
| "step": 717 |
| }, |
| { |
| "epoch": 1.6397562833206396, |
| "grad_norm": 1.0745279788970947, |
| "learning_rate": 4.295077335209027e-06, |
| "loss": 1.1632, |
| "step": 718 |
| }, |
| { |
| "epoch": 1.642041127189642, |
| "grad_norm": 1.1062053442001343, |
| "learning_rate": 4.29290843349351e-06, |
| "loss": 1.183, |
| "step": 719 |
| }, |
| { |
| "epoch": 1.6443259710586444, |
| "grad_norm": 1.1918151378631592, |
| "learning_rate": 4.290736749710672e-06, |
| "loss": 1.2, |
| "step": 720 |
| }, |
| { |
| "epoch": 1.6466108149276466, |
| "grad_norm": 1.1596369743347168, |
| "learning_rate": 4.28856228723033e-06, |
| "loss": 1.2232, |
| "step": 721 |
| }, |
| { |
| "epoch": 1.6488956587966488, |
| "grad_norm": 1.1261903047561646, |
| "learning_rate": 4.28638504942661e-06, |
| "loss": 1.1614, |
| "step": 722 |
| }, |
| { |
| "epoch": 1.6511805026656512, |
| "grad_norm": 1.1628916263580322, |
| "learning_rate": 4.284205039677946e-06, |
| "loss": 1.1866, |
| "step": 723 |
| }, |
| { |
| "epoch": 1.6534653465346536, |
| "grad_norm": 1.114009976387024, |
| "learning_rate": 4.282022261367074e-06, |
| "loss": 1.2027, |
| "step": 724 |
| }, |
| { |
| "epoch": 1.6557501904036558, |
| "grad_norm": 1.2100892066955566, |
| "learning_rate": 4.279836717881022e-06, |
| "loss": 1.1922, |
| "step": 725 |
| }, |
| { |
| "epoch": 1.658035034272658, |
| "grad_norm": 1.122144103050232, |
| "learning_rate": 4.277648412611114e-06, |
| "loss": 1.178, |
| "step": 726 |
| }, |
| { |
| "epoch": 1.6603198781416602, |
| "grad_norm": 1.0997899770736694, |
| "learning_rate": 4.275457348952955e-06, |
| "loss": 1.2276, |
| "step": 727 |
| }, |
| { |
| "epoch": 1.6626047220106626, |
| "grad_norm": 1.1784963607788086, |
| "learning_rate": 4.273263530306435e-06, |
| "loss": 1.1889, |
| "step": 728 |
| }, |
| { |
| "epoch": 1.664889565879665, |
| "grad_norm": 1.1602816581726074, |
| "learning_rate": 4.271066960075715e-06, |
| "loss": 1.1671, |
| "step": 729 |
| }, |
| { |
| "epoch": 1.6671744097486672, |
| "grad_norm": 1.132406234741211, |
| "learning_rate": 4.268867641669225e-06, |
| "loss": 1.2017, |
| "step": 730 |
| }, |
| { |
| "epoch": 1.6694592536176693, |
| "grad_norm": 1.1267459392547607, |
| "learning_rate": 4.266665578499664e-06, |
| "loss": 1.2135, |
| "step": 731 |
| }, |
| { |
| "epoch": 1.6717440974866717, |
| "grad_norm": 1.0951600074768066, |
| "learning_rate": 4.2644607739839875e-06, |
| "loss": 1.2463, |
| "step": 732 |
| }, |
| { |
| "epoch": 1.6740289413556741, |
| "grad_norm": 1.073875069618225, |
| "learning_rate": 4.262253231543401e-06, |
| "loss": 1.1879, |
| "step": 733 |
| }, |
| { |
| "epoch": 1.6763137852246763, |
| "grad_norm": 1.1010491847991943, |
| "learning_rate": 4.260042954603366e-06, |
| "loss": 1.1812, |
| "step": 734 |
| }, |
| { |
| "epoch": 1.6785986290936785, |
| "grad_norm": 1.1129403114318848, |
| "learning_rate": 4.2578299465935805e-06, |
| "loss": 1.2281, |
| "step": 735 |
| }, |
| { |
| "epoch": 1.680883472962681, |
| "grad_norm": 1.3570629358291626, |
| "learning_rate": 4.255614210947985e-06, |
| "loss": 1.2013, |
| "step": 736 |
| }, |
| { |
| "epoch": 1.6831683168316833, |
| "grad_norm": 1.104535460472107, |
| "learning_rate": 4.2533957511047485e-06, |
| "loss": 1.1708, |
| "step": 737 |
| }, |
| { |
| "epoch": 1.6854531607006855, |
| "grad_norm": 1.1040364503860474, |
| "learning_rate": 4.25117457050627e-06, |
| "loss": 1.2154, |
| "step": 738 |
| }, |
| { |
| "epoch": 1.6877380045696877, |
| "grad_norm": 1.0932183265686035, |
| "learning_rate": 4.24895067259917e-06, |
| "loss": 1.2182, |
| "step": 739 |
| }, |
| { |
| "epoch": 1.6900228484386899, |
| "grad_norm": 1.0946629047393799, |
| "learning_rate": 4.246724060834284e-06, |
| "loss": 1.2058, |
| "step": 740 |
| }, |
| { |
| "epoch": 1.6923076923076923, |
| "grad_norm": 1.060051679611206, |
| "learning_rate": 4.24449473866666e-06, |
| "loss": 1.1919, |
| "step": 741 |
| }, |
| { |
| "epoch": 1.6945925361766947, |
| "grad_norm": 1.0483872890472412, |
| "learning_rate": 4.242262709555552e-06, |
| "loss": 1.1638, |
| "step": 742 |
| }, |
| { |
| "epoch": 1.6968773800456969, |
| "grad_norm": 1.0956077575683594, |
| "learning_rate": 4.240027976964412e-06, |
| "loss": 1.1805, |
| "step": 743 |
| }, |
| { |
| "epoch": 1.699162223914699, |
| "grad_norm": 1.1185762882232666, |
| "learning_rate": 4.237790544360889e-06, |
| "loss": 1.1923, |
| "step": 744 |
| }, |
| { |
| "epoch": 1.7014470677837015, |
| "grad_norm": 1.0720428228378296, |
| "learning_rate": 4.2355504152168235e-06, |
| "loss": 1.1895, |
| "step": 745 |
| }, |
| { |
| "epoch": 1.7037319116527039, |
| "grad_norm": 1.1020833253860474, |
| "learning_rate": 4.2333075930082345e-06, |
| "loss": 1.1845, |
| "step": 746 |
| }, |
| { |
| "epoch": 1.706016755521706, |
| "grad_norm": 1.107071876525879, |
| "learning_rate": 4.231062081215326e-06, |
| "loss": 1.1751, |
| "step": 747 |
| }, |
| { |
| "epoch": 1.7083015993907082, |
| "grad_norm": 1.1301578283309937, |
| "learning_rate": 4.228813883322472e-06, |
| "loss": 1.151, |
| "step": 748 |
| }, |
| { |
| "epoch": 1.7105864432597104, |
| "grad_norm": 1.096433401107788, |
| "learning_rate": 4.226563002818215e-06, |
| "loss": 1.1728, |
| "step": 749 |
| }, |
| { |
| "epoch": 1.7128712871287128, |
| "grad_norm": 1.1047178506851196, |
| "learning_rate": 4.224309443195261e-06, |
| "loss": 1.1947, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.7151561309977152, |
| "grad_norm": 1.110417127609253, |
| "learning_rate": 4.222053207950472e-06, |
| "loss": 1.2186, |
| "step": 751 |
| }, |
| { |
| "epoch": 1.7174409748667174, |
| "grad_norm": 1.1027036905288696, |
| "learning_rate": 4.219794300584863e-06, |
| "loss": 1.2164, |
| "step": 752 |
| }, |
| { |
| "epoch": 1.7197258187357196, |
| "grad_norm": 1.109299898147583, |
| "learning_rate": 4.217532724603595e-06, |
| "loss": 1.2202, |
| "step": 753 |
| }, |
| { |
| "epoch": 1.722010662604722, |
| "grad_norm": 1.143134593963623, |
| "learning_rate": 4.2152684835159695e-06, |
| "loss": 1.1837, |
| "step": 754 |
| }, |
| { |
| "epoch": 1.7242955064737244, |
| "grad_norm": 1.0689709186553955, |
| "learning_rate": 4.213001580835423e-06, |
| "loss": 1.1874, |
| "step": 755 |
| }, |
| { |
| "epoch": 1.7265803503427266, |
| "grad_norm": 1.128243327140808, |
| "learning_rate": 4.2107320200795236e-06, |
| "loss": 1.1756, |
| "step": 756 |
| }, |
| { |
| "epoch": 1.7288651942117288, |
| "grad_norm": 1.067436933517456, |
| "learning_rate": 4.208459804769963e-06, |
| "loss": 1.2212, |
| "step": 757 |
| }, |
| { |
| "epoch": 1.7311500380807312, |
| "grad_norm": 1.1413803100585938, |
| "learning_rate": 4.206184938432552e-06, |
| "loss": 1.1491, |
| "step": 758 |
| }, |
| { |
| "epoch": 1.7334348819497334, |
| "grad_norm": 1.141803503036499, |
| "learning_rate": 4.203907424597214e-06, |
| "loss": 1.2636, |
| "step": 759 |
| }, |
| { |
| "epoch": 1.7357197258187358, |
| "grad_norm": 1.1099580526351929, |
| "learning_rate": 4.2016272667979814e-06, |
| "loss": 1.2192, |
| "step": 760 |
| }, |
| { |
| "epoch": 1.738004569687738, |
| "grad_norm": 1.100486397743225, |
| "learning_rate": 4.199344468572992e-06, |
| "loss": 1.2044, |
| "step": 761 |
| }, |
| { |
| "epoch": 1.7402894135567402, |
| "grad_norm": 1.0598186254501343, |
| "learning_rate": 4.197059033464476e-06, |
| "loss": 1.1983, |
| "step": 762 |
| }, |
| { |
| "epoch": 1.7425742574257426, |
| "grad_norm": 1.0922818183898926, |
| "learning_rate": 4.194770965018758e-06, |
| "loss": 1.2194, |
| "step": 763 |
| }, |
| { |
| "epoch": 1.744859101294745, |
| "grad_norm": 1.0975127220153809, |
| "learning_rate": 4.1924802667862485e-06, |
| "loss": 1.1465, |
| "step": 764 |
| }, |
| { |
| "epoch": 1.7471439451637472, |
| "grad_norm": 1.0934858322143555, |
| "learning_rate": 4.190186942321438e-06, |
| "loss": 1.1544, |
| "step": 765 |
| }, |
| { |
| "epoch": 1.7494287890327493, |
| "grad_norm": 1.0712271928787231, |
| "learning_rate": 4.187890995182893e-06, |
| "loss": 1.1893, |
| "step": 766 |
| }, |
| { |
| "epoch": 1.7517136329017517, |
| "grad_norm": 1.1157736778259277, |
| "learning_rate": 4.1855924289332485e-06, |
| "loss": 1.2362, |
| "step": 767 |
| }, |
| { |
| "epoch": 1.7539984767707542, |
| "grad_norm": 1.2225691080093384, |
| "learning_rate": 4.183291247139204e-06, |
| "loss": 1.22, |
| "step": 768 |
| }, |
| { |
| "epoch": 1.7562833206397563, |
| "grad_norm": 1.1402082443237305, |
| "learning_rate": 4.180987453371519e-06, |
| "loss": 1.2024, |
| "step": 769 |
| }, |
| { |
| "epoch": 1.7585681645087585, |
| "grad_norm": 1.1124638319015503, |
| "learning_rate": 4.178681051205004e-06, |
| "loss": 1.17, |
| "step": 770 |
| }, |
| { |
| "epoch": 1.7608530083777607, |
| "grad_norm": 1.1337512731552124, |
| "learning_rate": 4.176372044218519e-06, |
| "loss": 1.1862, |
| "step": 771 |
| }, |
| { |
| "epoch": 1.7631378522467631, |
| "grad_norm": 1.0702142715454102, |
| "learning_rate": 4.174060435994962e-06, |
| "loss": 1.2038, |
| "step": 772 |
| }, |
| { |
| "epoch": 1.7654226961157655, |
| "grad_norm": 1.112242579460144, |
| "learning_rate": 4.171746230121273e-06, |
| "loss": 1.2146, |
| "step": 773 |
| }, |
| { |
| "epoch": 1.7677075399847677, |
| "grad_norm": 1.1164225339889526, |
| "learning_rate": 4.169429430188418e-06, |
| "loss": 1.1768, |
| "step": 774 |
| }, |
| { |
| "epoch": 1.76999238385377, |
| "grad_norm": 1.091208577156067, |
| "learning_rate": 4.16711003979139e-06, |
| "loss": 1.193, |
| "step": 775 |
| }, |
| { |
| "epoch": 1.7722772277227723, |
| "grad_norm": 1.138411283493042, |
| "learning_rate": 4.164788062529203e-06, |
| "loss": 1.203, |
| "step": 776 |
| }, |
| { |
| "epoch": 1.7745620715917747, |
| "grad_norm": 1.168305516242981, |
| "learning_rate": 4.1624635020048835e-06, |
| "loss": 1.2154, |
| "step": 777 |
| }, |
| { |
| "epoch": 1.7768469154607769, |
| "grad_norm": 1.0742619037628174, |
| "learning_rate": 4.160136361825465e-06, |
| "loss": 1.214, |
| "step": 778 |
| }, |
| { |
| "epoch": 1.779131759329779, |
| "grad_norm": 1.076762080192566, |
| "learning_rate": 4.1578066456019885e-06, |
| "loss": 1.1834, |
| "step": 779 |
| }, |
| { |
| "epoch": 1.7814166031987813, |
| "grad_norm": 1.1189744472503662, |
| "learning_rate": 4.155474356949487e-06, |
| "loss": 1.191, |
| "step": 780 |
| }, |
| { |
| "epoch": 1.7837014470677837, |
| "grad_norm": 1.0916801691055298, |
| "learning_rate": 4.153139499486988e-06, |
| "loss": 1.2104, |
| "step": 781 |
| }, |
| { |
| "epoch": 1.785986290936786, |
| "grad_norm": 1.1265934705734253, |
| "learning_rate": 4.150802076837506e-06, |
| "loss": 1.2366, |
| "step": 782 |
| }, |
| { |
| "epoch": 1.7882711348057883, |
| "grad_norm": 1.1008100509643555, |
| "learning_rate": 4.148462092628032e-06, |
| "loss": 1.1919, |
| "step": 783 |
| }, |
| { |
| "epoch": 1.7905559786747904, |
| "grad_norm": 1.5858978033065796, |
| "learning_rate": 4.146119550489536e-06, |
| "loss": 1.1927, |
| "step": 784 |
| }, |
| { |
| "epoch": 1.7928408225437928, |
| "grad_norm": 1.1155521869659424, |
| "learning_rate": 4.143774454056954e-06, |
| "loss": 1.1948, |
| "step": 785 |
| }, |
| { |
| "epoch": 1.7951256664127953, |
| "grad_norm": 1.1289353370666504, |
| "learning_rate": 4.141426806969189e-06, |
| "loss": 1.1719, |
| "step": 786 |
| }, |
| { |
| "epoch": 1.7974105102817974, |
| "grad_norm": 1.1492801904678345, |
| "learning_rate": 4.139076612869098e-06, |
| "loss": 1.169, |
| "step": 787 |
| }, |
| { |
| "epoch": 1.7996953541507996, |
| "grad_norm": 1.0931838750839233, |
| "learning_rate": 4.1367238754034935e-06, |
| "loss": 1.1581, |
| "step": 788 |
| }, |
| { |
| "epoch": 1.801980198019802, |
| "grad_norm": 1.0901176929473877, |
| "learning_rate": 4.134368598223132e-06, |
| "loss": 1.2223, |
| "step": 789 |
| }, |
| { |
| "epoch": 1.8042650418888042, |
| "grad_norm": 1.0907678604125977, |
| "learning_rate": 4.132010784982711e-06, |
| "loss": 1.1839, |
| "step": 790 |
| }, |
| { |
| "epoch": 1.8065498857578066, |
| "grad_norm": 1.1389234066009521, |
| "learning_rate": 4.129650439340866e-06, |
| "loss": 1.1765, |
| "step": 791 |
| }, |
| { |
| "epoch": 1.8088347296268088, |
| "grad_norm": 1.0889054536819458, |
| "learning_rate": 4.12728756496016e-06, |
| "loss": 1.1913, |
| "step": 792 |
| }, |
| { |
| "epoch": 1.811119573495811, |
| "grad_norm": 1.090705156326294, |
| "learning_rate": 4.12492216550708e-06, |
| "loss": 1.1692, |
| "step": 793 |
| }, |
| { |
| "epoch": 1.8134044173648134, |
| "grad_norm": 1.1290946006774902, |
| "learning_rate": 4.12255424465203e-06, |
| "loss": 1.2114, |
| "step": 794 |
| }, |
| { |
| "epoch": 1.8156892612338158, |
| "grad_norm": 1.108325719833374, |
| "learning_rate": 4.120183806069328e-06, |
| "loss": 1.1941, |
| "step": 795 |
| }, |
| { |
| "epoch": 1.817974105102818, |
| "grad_norm": 1.0901302099227905, |
| "learning_rate": 4.1178108534371995e-06, |
| "loss": 1.1709, |
| "step": 796 |
| }, |
| { |
| "epoch": 1.8202589489718202, |
| "grad_norm": 1.1386867761611938, |
| "learning_rate": 4.11543539043777e-06, |
| "loss": 1.2008, |
| "step": 797 |
| }, |
| { |
| "epoch": 1.8225437928408226, |
| "grad_norm": 1.1768696308135986, |
| "learning_rate": 4.11305742075706e-06, |
| "loss": 1.1735, |
| "step": 798 |
| }, |
| { |
| "epoch": 1.824828636709825, |
| "grad_norm": 1.093137264251709, |
| "learning_rate": 4.1106769480849795e-06, |
| "loss": 1.1952, |
| "step": 799 |
| }, |
| { |
| "epoch": 1.8271134805788272, |
| "grad_norm": 1.12264084815979, |
| "learning_rate": 4.108293976115325e-06, |
| "loss": 1.2118, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.8293983244478293, |
| "grad_norm": 1.089824914932251, |
| "learning_rate": 4.105908508545766e-06, |
| "loss": 1.1856, |
| "step": 801 |
| }, |
| { |
| "epoch": 1.8316831683168315, |
| "grad_norm": 1.3425248861312866, |
| "learning_rate": 4.1035205490778505e-06, |
| "loss": 1.1942, |
| "step": 802 |
| }, |
| { |
| "epoch": 1.833968012185834, |
| "grad_norm": 1.097115159034729, |
| "learning_rate": 4.101130101416988e-06, |
| "loss": 1.2083, |
| "step": 803 |
| }, |
| { |
| "epoch": 1.8362528560548363, |
| "grad_norm": 1.0973830223083496, |
| "learning_rate": 4.098737169272452e-06, |
| "loss": 1.2033, |
| "step": 804 |
| }, |
| { |
| "epoch": 1.8385376999238385, |
| "grad_norm": 1.127233624458313, |
| "learning_rate": 4.096341756357371e-06, |
| "loss": 1.1941, |
| "step": 805 |
| }, |
| { |
| "epoch": 1.8408225437928407, |
| "grad_norm": 1.097070574760437, |
| "learning_rate": 4.093943866388723e-06, |
| "loss": 1.1971, |
| "step": 806 |
| }, |
| { |
| "epoch": 1.8431073876618431, |
| "grad_norm": 1.0978144407272339, |
| "learning_rate": 4.091543503087327e-06, |
| "loss": 1.2029, |
| "step": 807 |
| }, |
| { |
| "epoch": 1.8453922315308455, |
| "grad_norm": 1.0643872022628784, |
| "learning_rate": 4.089140670177843e-06, |
| "loss": 1.1532, |
| "step": 808 |
| }, |
| { |
| "epoch": 1.8476770753998477, |
| "grad_norm": 1.1128400564193726, |
| "learning_rate": 4.086735371388762e-06, |
| "loss": 1.1851, |
| "step": 809 |
| }, |
| { |
| "epoch": 1.84996191926885, |
| "grad_norm": 1.1439098119735718, |
| "learning_rate": 4.0843276104524e-06, |
| "loss": 1.1816, |
| "step": 810 |
| }, |
| { |
| "epoch": 1.852246763137852, |
| "grad_norm": 1.1020617485046387, |
| "learning_rate": 4.0819173911048965e-06, |
| "loss": 1.2081, |
| "step": 811 |
| }, |
| { |
| "epoch": 1.8545316070068545, |
| "grad_norm": 1.0913503170013428, |
| "learning_rate": 4.079504717086203e-06, |
| "loss": 1.1892, |
| "step": 812 |
| }, |
| { |
| "epoch": 1.856816450875857, |
| "grad_norm": 1.1332125663757324, |
| "learning_rate": 4.077089592140082e-06, |
| "loss": 1.182, |
| "step": 813 |
| }, |
| { |
| "epoch": 1.859101294744859, |
| "grad_norm": 1.0584102869033813, |
| "learning_rate": 4.074672020014098e-06, |
| "loss": 1.2169, |
| "step": 814 |
| }, |
| { |
| "epoch": 1.8613861386138613, |
| "grad_norm": 1.1291085481643677, |
| "learning_rate": 4.072252004459612e-06, |
| "loss": 1.1796, |
| "step": 815 |
| }, |
| { |
| "epoch": 1.8636709824828637, |
| "grad_norm": 1.0868487358093262, |
| "learning_rate": 4.069829549231778e-06, |
| "loss": 1.1832, |
| "step": 816 |
| }, |
| { |
| "epoch": 1.865955826351866, |
| "grad_norm": 1.1105777025222778, |
| "learning_rate": 4.067404658089535e-06, |
| "loss": 1.2242, |
| "step": 817 |
| }, |
| { |
| "epoch": 1.8682406702208683, |
| "grad_norm": 1.1203657388687134, |
| "learning_rate": 4.0649773347956005e-06, |
| "loss": 1.1755, |
| "step": 818 |
| }, |
| { |
| "epoch": 1.8705255140898704, |
| "grad_norm": 1.1000312566757202, |
| "learning_rate": 4.062547583116469e-06, |
| "loss": 1.1829, |
| "step": 819 |
| }, |
| { |
| "epoch": 1.8728103579588729, |
| "grad_norm": 1.1125813722610474, |
| "learning_rate": 4.060115406822402e-06, |
| "loss": 1.2013, |
| "step": 820 |
| }, |
| { |
| "epoch": 1.8750952018278753, |
| "grad_norm": 1.0868099927902222, |
| "learning_rate": 4.057680809687421e-06, |
| "loss": 1.1749, |
| "step": 821 |
| }, |
| { |
| "epoch": 1.8773800456968774, |
| "grad_norm": 1.1242718696594238, |
| "learning_rate": 4.055243795489307e-06, |
| "loss": 1.1601, |
| "step": 822 |
| }, |
| { |
| "epoch": 1.8796648895658796, |
| "grad_norm": 1.1220780611038208, |
| "learning_rate": 4.052804368009589e-06, |
| "loss": 1.197, |
| "step": 823 |
| }, |
| { |
| "epoch": 1.8819497334348818, |
| "grad_norm": 1.0715032815933228, |
| "learning_rate": 4.050362531033545e-06, |
| "loss": 1.1834, |
| "step": 824 |
| }, |
| { |
| "epoch": 1.8842345773038842, |
| "grad_norm": 1.1281424760818481, |
| "learning_rate": 4.0479182883501855e-06, |
| "loss": 1.1653, |
| "step": 825 |
| }, |
| { |
| "epoch": 1.8865194211728866, |
| "grad_norm": 1.0727750062942505, |
| "learning_rate": 4.045471643752258e-06, |
| "loss": 1.1907, |
| "step": 826 |
| }, |
| { |
| "epoch": 1.8888042650418888, |
| "grad_norm": 1.122889757156372, |
| "learning_rate": 4.043022601036238e-06, |
| "loss": 1.1935, |
| "step": 827 |
| }, |
| { |
| "epoch": 1.891089108910891, |
| "grad_norm": 1.1605268716812134, |
| "learning_rate": 4.040571164002319e-06, |
| "loss": 1.211, |
| "step": 828 |
| }, |
| { |
| "epoch": 1.8933739527798934, |
| "grad_norm": 1.0915296077728271, |
| "learning_rate": 4.038117336454411e-06, |
| "loss": 1.1614, |
| "step": 829 |
| }, |
| { |
| "epoch": 1.8956587966488958, |
| "grad_norm": 1.1206241846084595, |
| "learning_rate": 4.035661122200135e-06, |
| "loss": 1.1592, |
| "step": 830 |
| }, |
| { |
| "epoch": 1.897943640517898, |
| "grad_norm": 1.1132665872573853, |
| "learning_rate": 4.033202525050813e-06, |
| "loss": 1.1865, |
| "step": 831 |
| }, |
| { |
| "epoch": 1.9002284843869002, |
| "grad_norm": 1.0694245100021362, |
| "learning_rate": 4.0307415488214675e-06, |
| "loss": 1.1767, |
| "step": 832 |
| }, |
| { |
| "epoch": 1.9025133282559024, |
| "grad_norm": 1.1312873363494873, |
| "learning_rate": 4.028278197330808e-06, |
| "loss": 1.2344, |
| "step": 833 |
| }, |
| { |
| "epoch": 1.9047981721249048, |
| "grad_norm": 1.0864746570587158, |
| "learning_rate": 4.025812474401236e-06, |
| "loss": 1.2146, |
| "step": 834 |
| }, |
| { |
| "epoch": 1.9070830159939072, |
| "grad_norm": 1.0774086713790894, |
| "learning_rate": 4.023344383858826e-06, |
| "loss": 1.1496, |
| "step": 835 |
| }, |
| { |
| "epoch": 1.9093678598629094, |
| "grad_norm": 1.0805225372314453, |
| "learning_rate": 4.0208739295333314e-06, |
| "loss": 1.2098, |
| "step": 836 |
| }, |
| { |
| "epoch": 1.9116527037319115, |
| "grad_norm": 1.0994813442230225, |
| "learning_rate": 4.018401115258172e-06, |
| "loss": 1.1881, |
| "step": 837 |
| }, |
| { |
| "epoch": 1.913937547600914, |
| "grad_norm": 1.0419007539749146, |
| "learning_rate": 4.015925944870428e-06, |
| "loss": 1.1935, |
| "step": 838 |
| }, |
| { |
| "epoch": 1.9162223914699164, |
| "grad_norm": 1.1358449459075928, |
| "learning_rate": 4.013448422210838e-06, |
| "loss": 1.1989, |
| "step": 839 |
| }, |
| { |
| "epoch": 1.9185072353389185, |
| "grad_norm": 1.121999740600586, |
| "learning_rate": 4.010968551123788e-06, |
| "loss": 1.2108, |
| "step": 840 |
| }, |
| { |
| "epoch": 1.9207920792079207, |
| "grad_norm": 1.1504106521606445, |
| "learning_rate": 4.008486335457312e-06, |
| "loss": 1.1768, |
| "step": 841 |
| }, |
| { |
| "epoch": 1.9230769230769231, |
| "grad_norm": 1.128138780593872, |
| "learning_rate": 4.006001779063078e-06, |
| "loss": 1.1992, |
| "step": 842 |
| }, |
| { |
| "epoch": 1.9253617669459253, |
| "grad_norm": 1.1590732336044312, |
| "learning_rate": 4.003514885796388e-06, |
| "loss": 1.181, |
| "step": 843 |
| }, |
| { |
| "epoch": 1.9276466108149277, |
| "grad_norm": 1.0851722955703735, |
| "learning_rate": 4.001025659516171e-06, |
| "loss": 1.1711, |
| "step": 844 |
| }, |
| { |
| "epoch": 1.92993145468393, |
| "grad_norm": 1.066331148147583, |
| "learning_rate": 3.998534104084974e-06, |
| "loss": 1.1728, |
| "step": 845 |
| }, |
| { |
| "epoch": 1.932216298552932, |
| "grad_norm": 1.110464096069336, |
| "learning_rate": 3.99604022336896e-06, |
| "loss": 1.178, |
| "step": 846 |
| }, |
| { |
| "epoch": 1.9345011424219345, |
| "grad_norm": 1.1028679609298706, |
| "learning_rate": 3.993544021237899e-06, |
| "loss": 1.2122, |
| "step": 847 |
| }, |
| { |
| "epoch": 1.936785986290937, |
| "grad_norm": 1.1760601997375488, |
| "learning_rate": 3.991045501565163e-06, |
| "loss": 1.2103, |
| "step": 848 |
| }, |
| { |
| "epoch": 1.939070830159939, |
| "grad_norm": 1.1260336637496948, |
| "learning_rate": 3.988544668227721e-06, |
| "loss": 1.1443, |
| "step": 849 |
| }, |
| { |
| "epoch": 1.9413556740289413, |
| "grad_norm": 1.1055935621261597, |
| "learning_rate": 3.9860415251061334e-06, |
| "loss": 1.1795, |
| "step": 850 |
| }, |
| { |
| "epoch": 1.9436405178979437, |
| "grad_norm": 1.1292855739593506, |
| "learning_rate": 3.983536076084541e-06, |
| "loss": 1.182, |
| "step": 851 |
| }, |
| { |
| "epoch": 1.945925361766946, |
| "grad_norm": 1.1108464002609253, |
| "learning_rate": 3.981028325050667e-06, |
| "loss": 1.1876, |
| "step": 852 |
| }, |
| { |
| "epoch": 1.9482102056359483, |
| "grad_norm": 1.1306401491165161, |
| "learning_rate": 3.978518275895802e-06, |
| "loss": 1.1645, |
| "step": 853 |
| }, |
| { |
| "epoch": 1.9504950495049505, |
| "grad_norm": 1.1031887531280518, |
| "learning_rate": 3.976005932514807e-06, |
| "loss": 1.2047, |
| "step": 854 |
| }, |
| { |
| "epoch": 1.9527798933739526, |
| "grad_norm": 1.0953725576400757, |
| "learning_rate": 3.973491298806101e-06, |
| "loss": 1.1756, |
| "step": 855 |
| }, |
| { |
| "epoch": 1.955064737242955, |
| "grad_norm": 1.109553575515747, |
| "learning_rate": 3.970974378671656e-06, |
| "loss": 1.2228, |
| "step": 856 |
| }, |
| { |
| "epoch": 1.9573495811119574, |
| "grad_norm": 1.1159707307815552, |
| "learning_rate": 3.968455176016993e-06, |
| "loss": 1.2037, |
| "step": 857 |
| }, |
| { |
| "epoch": 1.9596344249809596, |
| "grad_norm": 1.1045714616775513, |
| "learning_rate": 3.965933694751175e-06, |
| "loss": 1.196, |
| "step": 858 |
| }, |
| { |
| "epoch": 1.9619192688499618, |
| "grad_norm": 1.110876202583313, |
| "learning_rate": 3.963409938786801e-06, |
| "loss": 1.1772, |
| "step": 859 |
| }, |
| { |
| "epoch": 1.9642041127189642, |
| "grad_norm": 1.1226321458816528, |
| "learning_rate": 3.9608839120399975e-06, |
| "loss": 1.1875, |
| "step": 860 |
| }, |
| { |
| "epoch": 1.9664889565879666, |
| "grad_norm": 1.1401004791259766, |
| "learning_rate": 3.958355618430417e-06, |
| "loss": 1.2137, |
| "step": 861 |
| }, |
| { |
| "epoch": 1.9687738004569688, |
| "grad_norm": 1.0866281986236572, |
| "learning_rate": 3.95582506188123e-06, |
| "loss": 1.2001, |
| "step": 862 |
| }, |
| { |
| "epoch": 1.971058644325971, |
| "grad_norm": 1.1426069736480713, |
| "learning_rate": 3.9532922463191145e-06, |
| "loss": 1.1794, |
| "step": 863 |
| }, |
| { |
| "epoch": 1.9733434881949732, |
| "grad_norm": 1.1191396713256836, |
| "learning_rate": 3.950757175674257e-06, |
| "loss": 1.2118, |
| "step": 864 |
| }, |
| { |
| "epoch": 1.9756283320639756, |
| "grad_norm": 1.0993397235870361, |
| "learning_rate": 3.948219853880344e-06, |
| "loss": 1.2209, |
| "step": 865 |
| }, |
| { |
| "epoch": 1.977913175932978, |
| "grad_norm": 1.0973010063171387, |
| "learning_rate": 3.945680284874553e-06, |
| "loss": 1.1738, |
| "step": 866 |
| }, |
| { |
| "epoch": 1.9801980198019802, |
| "grad_norm": 1.2131692171096802, |
| "learning_rate": 3.943138472597549e-06, |
| "loss": 1.1833, |
| "step": 867 |
| }, |
| { |
| "epoch": 1.9824828636709824, |
| "grad_norm": 1.1128953695297241, |
| "learning_rate": 3.940594420993479e-06, |
| "loss": 1.1925, |
| "step": 868 |
| }, |
| { |
| "epoch": 1.9847677075399848, |
| "grad_norm": 1.0862925052642822, |
| "learning_rate": 3.938048134009962e-06, |
| "loss": 1.1965, |
| "step": 869 |
| }, |
| { |
| "epoch": 1.9870525514089872, |
| "grad_norm": 1.1464707851409912, |
| "learning_rate": 3.935499615598088e-06, |
| "loss": 1.1579, |
| "step": 870 |
| }, |
| { |
| "epoch": 1.9893373952779894, |
| "grad_norm": 1.1059821844100952, |
| "learning_rate": 3.932948869712412e-06, |
| "loss": 1.169, |
| "step": 871 |
| }, |
| { |
| "epoch": 1.9916222391469915, |
| "grad_norm": 1.1403911113739014, |
| "learning_rate": 3.930395900310939e-06, |
| "loss": 1.1586, |
| "step": 872 |
| }, |
| { |
| "epoch": 1.993907083015994, |
| "grad_norm": 1.0881669521331787, |
| "learning_rate": 3.9278407113551295e-06, |
| "loss": 1.2262, |
| "step": 873 |
| }, |
| { |
| "epoch": 1.9961919268849961, |
| "grad_norm": 1.1039564609527588, |
| "learning_rate": 3.925283306809885e-06, |
| "loss": 1.1951, |
| "step": 874 |
| }, |
| { |
| "epoch": 1.9984767707539985, |
| "grad_norm": 1.1270387172698975, |
| "learning_rate": 3.9227236906435484e-06, |
| "loss": 1.1808, |
| "step": 875 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 1.307372808456421, |
| "learning_rate": 3.92016186682789e-06, |
| "loss": 1.1959, |
| "step": 876 |
| }, |
| { |
| "epoch": 2.002284843869002, |
| "grad_norm": 1.3203482627868652, |
| "learning_rate": 3.917597839338108e-06, |
| "loss": 1.1606, |
| "step": 877 |
| }, |
| { |
| "epoch": 2.0045696877380044, |
| "grad_norm": 1.1091164350509644, |
| "learning_rate": 3.915031612152823e-06, |
| "loss": 1.1532, |
| "step": 878 |
| }, |
| { |
| "epoch": 2.006854531607007, |
| "grad_norm": 1.1063039302825928, |
| "learning_rate": 3.912463189254063e-06, |
| "loss": 1.1635, |
| "step": 879 |
| }, |
| { |
| "epoch": 2.009139375476009, |
| "grad_norm": 1.1640090942382812, |
| "learning_rate": 3.909892574627267e-06, |
| "loss": 1.1174, |
| "step": 880 |
| }, |
| { |
| "epoch": 2.0114242193450114, |
| "grad_norm": 1.147412657737732, |
| "learning_rate": 3.907319772261273e-06, |
| "loss": 1.1285, |
| "step": 881 |
| }, |
| { |
| "epoch": 2.0137090632140136, |
| "grad_norm": 1.1952067613601685, |
| "learning_rate": 3.904744786148316e-06, |
| "loss": 1.1657, |
| "step": 882 |
| }, |
| { |
| "epoch": 2.015993907083016, |
| "grad_norm": 1.1174241304397583, |
| "learning_rate": 3.902167620284017e-06, |
| "loss": 1.1424, |
| "step": 883 |
| }, |
| { |
| "epoch": 2.0182787509520184, |
| "grad_norm": 1.0976516008377075, |
| "learning_rate": 3.899588278667382e-06, |
| "loss": 1.1328, |
| "step": 884 |
| }, |
| { |
| "epoch": 2.0205635948210205, |
| "grad_norm": 1.1376157999038696, |
| "learning_rate": 3.897006765300791e-06, |
| "loss": 1.155, |
| "step": 885 |
| }, |
| { |
| "epoch": 2.0228484386900227, |
| "grad_norm": 1.175310730934143, |
| "learning_rate": 3.8944230841899935e-06, |
| "loss": 1.1799, |
| "step": 886 |
| }, |
| { |
| "epoch": 2.025133282559025, |
| "grad_norm": 1.1553736925125122, |
| "learning_rate": 3.8918372393441036e-06, |
| "loss": 1.1656, |
| "step": 887 |
| }, |
| { |
| "epoch": 2.0274181264280275, |
| "grad_norm": 1.0809762477874756, |
| "learning_rate": 3.889249234775596e-06, |
| "loss": 1.158, |
| "step": 888 |
| }, |
| { |
| "epoch": 2.0297029702970297, |
| "grad_norm": 1.2475727796554565, |
| "learning_rate": 3.886659074500291e-06, |
| "loss": 1.1958, |
| "step": 889 |
| }, |
| { |
| "epoch": 2.031987814166032, |
| "grad_norm": 1.2172832489013672, |
| "learning_rate": 3.884066762537357e-06, |
| "loss": 1.1703, |
| "step": 890 |
| }, |
| { |
| "epoch": 2.034272658035034, |
| "grad_norm": 1.1374081373214722, |
| "learning_rate": 3.8814723029093014e-06, |
| "loss": 1.1384, |
| "step": 891 |
| }, |
| { |
| "epoch": 2.0365575019040367, |
| "grad_norm": 1.1220778226852417, |
| "learning_rate": 3.878875699641964e-06, |
| "loss": 1.1368, |
| "step": 892 |
| }, |
| { |
| "epoch": 2.038842345773039, |
| "grad_norm": 1.0943710803985596, |
| "learning_rate": 3.876276956764509e-06, |
| "loss": 1.1345, |
| "step": 893 |
| }, |
| { |
| "epoch": 2.041127189642041, |
| "grad_norm": 1.079895257949829, |
| "learning_rate": 3.873676078309423e-06, |
| "loss": 1.1469, |
| "step": 894 |
| }, |
| { |
| "epoch": 2.0434120335110433, |
| "grad_norm": 1.164766550064087, |
| "learning_rate": 3.871073068312506e-06, |
| "loss": 1.1458, |
| "step": 895 |
| }, |
| { |
| "epoch": 2.045696877380046, |
| "grad_norm": 1.1422792673110962, |
| "learning_rate": 3.868467930812864e-06, |
| "loss": 1.1286, |
| "step": 896 |
| }, |
| { |
| "epoch": 2.047981721249048, |
| "grad_norm": 1.104988694190979, |
| "learning_rate": 3.865860669852906e-06, |
| "loss": 1.1316, |
| "step": 897 |
| }, |
| { |
| "epoch": 2.0502665651180503, |
| "grad_norm": 1.157842755317688, |
| "learning_rate": 3.8632512894783345e-06, |
| "loss": 1.1515, |
| "step": 898 |
| }, |
| { |
| "epoch": 2.0525514089870525, |
| "grad_norm": 1.255118727684021, |
| "learning_rate": 3.860639793738143e-06, |
| "loss": 1.1806, |
| "step": 899 |
| }, |
| { |
| "epoch": 2.0548362528560546, |
| "grad_norm": 1.2516144514083862, |
| "learning_rate": 3.858026186684604e-06, |
| "loss": 1.1973, |
| "step": 900 |
| }, |
| { |
| "epoch": 2.0571210967250573, |
| "grad_norm": 1.143211007118225, |
| "learning_rate": 3.85541047237327e-06, |
| "loss": 1.1654, |
| "step": 901 |
| }, |
| { |
| "epoch": 2.0594059405940595, |
| "grad_norm": 1.1019172668457031, |
| "learning_rate": 3.852792654862959e-06, |
| "loss": 1.1534, |
| "step": 902 |
| }, |
| { |
| "epoch": 2.0616907844630616, |
| "grad_norm": 1.1939572095870972, |
| "learning_rate": 3.850172738215757e-06, |
| "loss": 1.1145, |
| "step": 903 |
| }, |
| { |
| "epoch": 2.063975628332064, |
| "grad_norm": 1.2124632596969604, |
| "learning_rate": 3.847550726497004e-06, |
| "loss": 1.1037, |
| "step": 904 |
| }, |
| { |
| "epoch": 2.0662604722010665, |
| "grad_norm": 1.144073486328125, |
| "learning_rate": 3.844926623775293e-06, |
| "loss": 1.1605, |
| "step": 905 |
| }, |
| { |
| "epoch": 2.0685453160700686, |
| "grad_norm": 1.1347826719284058, |
| "learning_rate": 3.84230043412246e-06, |
| "loss": 1.1421, |
| "step": 906 |
| }, |
| { |
| "epoch": 2.070830159939071, |
| "grad_norm": 1.1252721548080444, |
| "learning_rate": 3.8396721616135805e-06, |
| "loss": 1.1767, |
| "step": 907 |
| }, |
| { |
| "epoch": 2.073115003808073, |
| "grad_norm": 1.224308729171753, |
| "learning_rate": 3.837041810326961e-06, |
| "loss": 1.1325, |
| "step": 908 |
| }, |
| { |
| "epoch": 2.075399847677075, |
| "grad_norm": 1.25150728225708, |
| "learning_rate": 3.8344093843441345e-06, |
| "loss": 1.1235, |
| "step": 909 |
| }, |
| { |
| "epoch": 2.077684691546078, |
| "grad_norm": 1.244081974029541, |
| "learning_rate": 3.831774887749854e-06, |
| "loss": 1.2037, |
| "step": 910 |
| }, |
| { |
| "epoch": 2.07996953541508, |
| "grad_norm": 1.14827561378479, |
| "learning_rate": 3.829138324632082e-06, |
| "loss": 1.1378, |
| "step": 911 |
| }, |
| { |
| "epoch": 2.082254379284082, |
| "grad_norm": 1.1389387845993042, |
| "learning_rate": 3.826499699081992e-06, |
| "loss": 1.1785, |
| "step": 912 |
| }, |
| { |
| "epoch": 2.0845392231530844, |
| "grad_norm": 1.2572706937789917, |
| "learning_rate": 3.823859015193957e-06, |
| "loss": 1.1474, |
| "step": 913 |
| }, |
| { |
| "epoch": 2.086824067022087, |
| "grad_norm": 1.1964038610458374, |
| "learning_rate": 3.8212162770655405e-06, |
| "loss": 1.1508, |
| "step": 914 |
| }, |
| { |
| "epoch": 2.089108910891089, |
| "grad_norm": 1.2023462057113647, |
| "learning_rate": 3.818571488797496e-06, |
| "loss": 1.1401, |
| "step": 915 |
| }, |
| { |
| "epoch": 2.0913937547600914, |
| "grad_norm": 1.1288152933120728, |
| "learning_rate": 3.815924654493759e-06, |
| "loss": 1.1395, |
| "step": 916 |
| }, |
| { |
| "epoch": 2.0936785986290936, |
| "grad_norm": 1.15272855758667, |
| "learning_rate": 3.8132757782614405e-06, |
| "loss": 1.1357, |
| "step": 917 |
| }, |
| { |
| "epoch": 2.095963442498096, |
| "grad_norm": 1.176313877105713, |
| "learning_rate": 3.810624864210816e-06, |
| "loss": 1.1529, |
| "step": 918 |
| }, |
| { |
| "epoch": 2.0982482863670984, |
| "grad_norm": 1.2011158466339111, |
| "learning_rate": 3.807971916455325e-06, |
| "loss": 1.1301, |
| "step": 919 |
| }, |
| { |
| "epoch": 2.1005331302361006, |
| "grad_norm": 1.135551929473877, |
| "learning_rate": 3.8053169391115665e-06, |
| "loss": 1.1545, |
| "step": 920 |
| }, |
| { |
| "epoch": 2.1028179741051027, |
| "grad_norm": 1.1023021936416626, |
| "learning_rate": 3.802659936299283e-06, |
| "loss": 1.1619, |
| "step": 921 |
| }, |
| { |
| "epoch": 2.105102817974105, |
| "grad_norm": 1.1410151720046997, |
| "learning_rate": 3.800000912141363e-06, |
| "loss": 1.168, |
| "step": 922 |
| }, |
| { |
| "epoch": 2.1073876618431076, |
| "grad_norm": 1.1780641078948975, |
| "learning_rate": 3.797339870763831e-06, |
| "loss": 1.1895, |
| "step": 923 |
| }, |
| { |
| "epoch": 2.1096725057121097, |
| "grad_norm": 1.13074791431427, |
| "learning_rate": 3.7946768162958424e-06, |
| "loss": 1.1786, |
| "step": 924 |
| }, |
| { |
| "epoch": 2.111957349581112, |
| "grad_norm": 1.1389769315719604, |
| "learning_rate": 3.792011752869676e-06, |
| "loss": 1.1401, |
| "step": 925 |
| }, |
| { |
| "epoch": 2.114242193450114, |
| "grad_norm": 1.1424660682678223, |
| "learning_rate": 3.7893446846207254e-06, |
| "loss": 1.1269, |
| "step": 926 |
| }, |
| { |
| "epoch": 2.1165270373191167, |
| "grad_norm": 1.2282761335372925, |
| "learning_rate": 3.7866756156874996e-06, |
| "loss": 1.1739, |
| "step": 927 |
| }, |
| { |
| "epoch": 2.118811881188119, |
| "grad_norm": 1.1365275382995605, |
| "learning_rate": 3.7840045502116073e-06, |
| "loss": 1.1506, |
| "step": 928 |
| }, |
| { |
| "epoch": 2.121096725057121, |
| "grad_norm": 1.1741012334823608, |
| "learning_rate": 3.7813314923377603e-06, |
| "loss": 1.1436, |
| "step": 929 |
| }, |
| { |
| "epoch": 2.1233815689261233, |
| "grad_norm": 1.1063398122787476, |
| "learning_rate": 3.778656446213757e-06, |
| "loss": 1.1567, |
| "step": 930 |
| }, |
| { |
| "epoch": 2.1256664127951255, |
| "grad_norm": 1.1437269449234009, |
| "learning_rate": 3.775979415990485e-06, |
| "loss": 1.1524, |
| "step": 931 |
| }, |
| { |
| "epoch": 2.127951256664128, |
| "grad_norm": 1.180048942565918, |
| "learning_rate": 3.773300405821908e-06, |
| "loss": 1.1283, |
| "step": 932 |
| }, |
| { |
| "epoch": 2.1302361005331303, |
| "grad_norm": 1.179042100906372, |
| "learning_rate": 3.7706194198650635e-06, |
| "loss": 1.1285, |
| "step": 933 |
| }, |
| { |
| "epoch": 2.1325209444021325, |
| "grad_norm": 1.1634424924850464, |
| "learning_rate": 3.767936462280054e-06, |
| "loss": 1.1491, |
| "step": 934 |
| }, |
| { |
| "epoch": 2.1348057882711347, |
| "grad_norm": 1.1401522159576416, |
| "learning_rate": 3.7652515372300415e-06, |
| "loss": 1.1256, |
| "step": 935 |
| }, |
| { |
| "epoch": 2.1370906321401373, |
| "grad_norm": 1.240664005279541, |
| "learning_rate": 3.762564648881242e-06, |
| "loss": 1.1735, |
| "step": 936 |
| }, |
| { |
| "epoch": 2.1393754760091395, |
| "grad_norm": 1.23284113407135, |
| "learning_rate": 3.7598758014029158e-06, |
| "loss": 1.1421, |
| "step": 937 |
| }, |
| { |
| "epoch": 2.1416603198781416, |
| "grad_norm": 1.1896634101867676, |
| "learning_rate": 3.757184998967366e-06, |
| "loss": 1.1041, |
| "step": 938 |
| }, |
| { |
| "epoch": 2.143945163747144, |
| "grad_norm": 1.1241672039031982, |
| "learning_rate": 3.7544922457499256e-06, |
| "loss": 1.1566, |
| "step": 939 |
| }, |
| { |
| "epoch": 2.146230007616146, |
| "grad_norm": 1.1452815532684326, |
| "learning_rate": 3.751797545928959e-06, |
| "loss": 1.1952, |
| "step": 940 |
| }, |
| { |
| "epoch": 2.1485148514851486, |
| "grad_norm": 1.1131870746612549, |
| "learning_rate": 3.7491009036858483e-06, |
| "loss": 1.14, |
| "step": 941 |
| }, |
| { |
| "epoch": 2.150799695354151, |
| "grad_norm": 1.16527259349823, |
| "learning_rate": 3.7464023232049895e-06, |
| "loss": 1.1648, |
| "step": 942 |
| }, |
| { |
| "epoch": 2.153084539223153, |
| "grad_norm": 1.1289446353912354, |
| "learning_rate": 3.7437018086737876e-06, |
| "loss": 1.179, |
| "step": 943 |
| }, |
| { |
| "epoch": 2.155369383092155, |
| "grad_norm": 1.1474318504333496, |
| "learning_rate": 3.740999364282647e-06, |
| "loss": 1.2018, |
| "step": 944 |
| }, |
| { |
| "epoch": 2.157654226961158, |
| "grad_norm": 1.177672266960144, |
| "learning_rate": 3.7382949942249695e-06, |
| "loss": 1.1756, |
| "step": 945 |
| }, |
| { |
| "epoch": 2.15993907083016, |
| "grad_norm": 1.152292251586914, |
| "learning_rate": 3.7355887026971417e-06, |
| "loss": 1.1387, |
| "step": 946 |
| }, |
| { |
| "epoch": 2.162223914699162, |
| "grad_norm": 1.1584752798080444, |
| "learning_rate": 3.7328804938985335e-06, |
| "loss": 1.1648, |
| "step": 947 |
| }, |
| { |
| "epoch": 2.1645087585681644, |
| "grad_norm": 1.1106222867965698, |
| "learning_rate": 3.7301703720314897e-06, |
| "loss": 1.1875, |
| "step": 948 |
| }, |
| { |
| "epoch": 2.166793602437167, |
| "grad_norm": 1.177903413772583, |
| "learning_rate": 3.727458341301324e-06, |
| "loss": 1.1488, |
| "step": 949 |
| }, |
| { |
| "epoch": 2.169078446306169, |
| "grad_norm": 1.149046778678894, |
| "learning_rate": 3.7247444059163106e-06, |
| "loss": 1.1601, |
| "step": 950 |
| }, |
| { |
| "epoch": 2.1713632901751714, |
| "grad_norm": 1.1731359958648682, |
| "learning_rate": 3.7220285700876812e-06, |
| "loss": 1.1743, |
| "step": 951 |
| }, |
| { |
| "epoch": 2.1736481340441736, |
| "grad_norm": 1.1620594263076782, |
| "learning_rate": 3.719310838029615e-06, |
| "loss": 1.2199, |
| "step": 952 |
| }, |
| { |
| "epoch": 2.1759329779131757, |
| "grad_norm": 1.1532787084579468, |
| "learning_rate": 3.716591213959234e-06, |
| "loss": 1.1403, |
| "step": 953 |
| }, |
| { |
| "epoch": 2.1782178217821784, |
| "grad_norm": 1.1255860328674316, |
| "learning_rate": 3.7138697020965945e-06, |
| "loss": 1.1262, |
| "step": 954 |
| }, |
| { |
| "epoch": 2.1805026656511806, |
| "grad_norm": 1.1715703010559082, |
| "learning_rate": 3.7111463066646858e-06, |
| "loss": 1.1371, |
| "step": 955 |
| }, |
| { |
| "epoch": 2.1827875095201827, |
| "grad_norm": 1.121799111366272, |
| "learning_rate": 3.7084210318894177e-06, |
| "loss": 1.1942, |
| "step": 956 |
| }, |
| { |
| "epoch": 2.185072353389185, |
| "grad_norm": 1.1630092859268188, |
| "learning_rate": 3.7056938819996146e-06, |
| "loss": 1.1928, |
| "step": 957 |
| }, |
| { |
| "epoch": 2.1873571972581876, |
| "grad_norm": 1.132199764251709, |
| "learning_rate": 3.702964861227013e-06, |
| "loss": 1.1706, |
| "step": 958 |
| }, |
| { |
| "epoch": 2.1896420411271897, |
| "grad_norm": 1.1396610736846924, |
| "learning_rate": 3.7002339738062513e-06, |
| "loss": 1.1628, |
| "step": 959 |
| }, |
| { |
| "epoch": 2.191926884996192, |
| "grad_norm": 1.0978549718856812, |
| "learning_rate": 3.6975012239748664e-06, |
| "loss": 1.1966, |
| "step": 960 |
| }, |
| { |
| "epoch": 2.194211728865194, |
| "grad_norm": 1.1913878917694092, |
| "learning_rate": 3.694766615973281e-06, |
| "loss": 1.159, |
| "step": 961 |
| }, |
| { |
| "epoch": 2.1964965727341963, |
| "grad_norm": 1.1240078210830688, |
| "learning_rate": 3.6920301540448054e-06, |
| "loss": 1.1767, |
| "step": 962 |
| }, |
| { |
| "epoch": 2.198781416603199, |
| "grad_norm": 1.2754088640213013, |
| "learning_rate": 3.6892918424356238e-06, |
| "loss": 1.1382, |
| "step": 963 |
| }, |
| { |
| "epoch": 2.201066260472201, |
| "grad_norm": 1.1081241369247437, |
| "learning_rate": 3.6865516853947923e-06, |
| "loss": 1.1634, |
| "step": 964 |
| }, |
| { |
| "epoch": 2.2033511043412033, |
| "grad_norm": 1.1287072896957397, |
| "learning_rate": 3.683809687174229e-06, |
| "loss": 1.1354, |
| "step": 965 |
| }, |
| { |
| "epoch": 2.2056359482102055, |
| "grad_norm": 1.1273839473724365, |
| "learning_rate": 3.6810658520287106e-06, |
| "loss": 1.147, |
| "step": 966 |
| }, |
| { |
| "epoch": 2.207920792079208, |
| "grad_norm": 1.2143324613571167, |
| "learning_rate": 3.6783201842158633e-06, |
| "loss": 1.1793, |
| "step": 967 |
| }, |
| { |
| "epoch": 2.2102056359482103, |
| "grad_norm": 1.2204785346984863, |
| "learning_rate": 3.6755726879961575e-06, |
| "loss": 1.1915, |
| "step": 968 |
| }, |
| { |
| "epoch": 2.2124904798172125, |
| "grad_norm": 1.168456792831421, |
| "learning_rate": 3.6728233676328988e-06, |
| "loss": 1.1502, |
| "step": 969 |
| }, |
| { |
| "epoch": 2.2147753236862147, |
| "grad_norm": 1.1369385719299316, |
| "learning_rate": 3.670072227392226e-06, |
| "loss": 1.1787, |
| "step": 970 |
| }, |
| { |
| "epoch": 2.217060167555217, |
| "grad_norm": 1.1360613107681274, |
| "learning_rate": 3.6673192715431016e-06, |
| "loss": 1.1279, |
| "step": 971 |
| }, |
| { |
| "epoch": 2.2193450114242195, |
| "grad_norm": 1.2034990787506104, |
| "learning_rate": 3.6645645043573044e-06, |
| "loss": 1.1586, |
| "step": 972 |
| }, |
| { |
| "epoch": 2.2216298552932217, |
| "grad_norm": 1.133101224899292, |
| "learning_rate": 3.661807930109422e-06, |
| "loss": 1.1521, |
| "step": 973 |
| }, |
| { |
| "epoch": 2.223914699162224, |
| "grad_norm": 1.185314655303955, |
| "learning_rate": 3.6590495530768493e-06, |
| "loss": 1.1347, |
| "step": 974 |
| }, |
| { |
| "epoch": 2.226199543031226, |
| "grad_norm": 1.1746361255645752, |
| "learning_rate": 3.656289377539778e-06, |
| "loss": 1.1749, |
| "step": 975 |
| }, |
| { |
| "epoch": 2.2284843869002287, |
| "grad_norm": 1.1611051559448242, |
| "learning_rate": 3.65352740778119e-06, |
| "loss": 1.1399, |
| "step": 976 |
| }, |
| { |
| "epoch": 2.230769230769231, |
| "grad_norm": 1.1828947067260742, |
| "learning_rate": 3.650763648086849e-06, |
| "loss": 1.1815, |
| "step": 977 |
| }, |
| { |
| "epoch": 2.233054074638233, |
| "grad_norm": 1.1289913654327393, |
| "learning_rate": 3.6479981027453002e-06, |
| "loss": 1.1524, |
| "step": 978 |
| }, |
| { |
| "epoch": 2.235338918507235, |
| "grad_norm": 1.1266071796417236, |
| "learning_rate": 3.6452307760478583e-06, |
| "loss": 1.1502, |
| "step": 979 |
| }, |
| { |
| "epoch": 2.237623762376238, |
| "grad_norm": 1.128389596939087, |
| "learning_rate": 3.6424616722886004e-06, |
| "loss": 1.1611, |
| "step": 980 |
| }, |
| { |
| "epoch": 2.23990860624524, |
| "grad_norm": 1.216874122619629, |
| "learning_rate": 3.6396907957643623e-06, |
| "loss": 1.1693, |
| "step": 981 |
| }, |
| { |
| "epoch": 2.242193450114242, |
| "grad_norm": 1.1498830318450928, |
| "learning_rate": 3.6369181507747305e-06, |
| "loss": 1.1304, |
| "step": 982 |
| }, |
| { |
| "epoch": 2.2444782939832444, |
| "grad_norm": 1.1365691423416138, |
| "learning_rate": 3.634143741622036e-06, |
| "loss": 1.1673, |
| "step": 983 |
| }, |
| { |
| "epoch": 2.2467631378522466, |
| "grad_norm": 1.1243813037872314, |
| "learning_rate": 3.631367572611348e-06, |
| "loss": 1.1665, |
| "step": 984 |
| }, |
| { |
| "epoch": 2.249047981721249, |
| "grad_norm": 1.1227095127105713, |
| "learning_rate": 3.6285896480504633e-06, |
| "loss": 1.1566, |
| "step": 985 |
| }, |
| { |
| "epoch": 2.2513328255902514, |
| "grad_norm": 1.1476339101791382, |
| "learning_rate": 3.6258099722499063e-06, |
| "loss": 1.1759, |
| "step": 986 |
| }, |
| { |
| "epoch": 2.2536176694592536, |
| "grad_norm": 1.130340814590454, |
| "learning_rate": 3.623028549522918e-06, |
| "loss": 1.1835, |
| "step": 987 |
| }, |
| { |
| "epoch": 2.2559025133282558, |
| "grad_norm": 1.1270296573638916, |
| "learning_rate": 3.620245384185448e-06, |
| "loss": 1.177, |
| "step": 988 |
| }, |
| { |
| "epoch": 2.258187357197258, |
| "grad_norm": 1.170242190361023, |
| "learning_rate": 3.6174604805561524e-06, |
| "loss": 1.1589, |
| "step": 989 |
| }, |
| { |
| "epoch": 2.2604722010662606, |
| "grad_norm": 1.1653361320495605, |
| "learning_rate": 3.6146738429563837e-06, |
| "loss": 1.1349, |
| "step": 990 |
| }, |
| { |
| "epoch": 2.2627570449352628, |
| "grad_norm": 1.195779800415039, |
| "learning_rate": 3.6118854757101855e-06, |
| "loss": 1.1427, |
| "step": 991 |
| }, |
| { |
| "epoch": 2.265041888804265, |
| "grad_norm": 1.1496593952178955, |
| "learning_rate": 3.609095383144284e-06, |
| "loss": 1.1922, |
| "step": 992 |
| }, |
| { |
| "epoch": 2.2673267326732676, |
| "grad_norm": 1.1693689823150635, |
| "learning_rate": 3.6063035695880838e-06, |
| "loss": 1.1701, |
| "step": 993 |
| }, |
| { |
| "epoch": 2.2696115765422697, |
| "grad_norm": 1.1333836317062378, |
| "learning_rate": 3.60351003937366e-06, |
| "loss": 1.163, |
| "step": 994 |
| }, |
| { |
| "epoch": 2.271896420411272, |
| "grad_norm": 1.1533620357513428, |
| "learning_rate": 3.6007147968357505e-06, |
| "loss": 1.1117, |
| "step": 995 |
| }, |
| { |
| "epoch": 2.274181264280274, |
| "grad_norm": 1.1433489322662354, |
| "learning_rate": 3.5979178463117505e-06, |
| "loss": 1.1871, |
| "step": 996 |
| }, |
| { |
| "epoch": 2.2764661081492763, |
| "grad_norm": 1.1923670768737793, |
| "learning_rate": 3.5951191921417063e-06, |
| "loss": 1.1502, |
| "step": 997 |
| }, |
| { |
| "epoch": 2.278750952018279, |
| "grad_norm": 1.13682222366333, |
| "learning_rate": 3.5923188386683067e-06, |
| "loss": 1.1314, |
| "step": 998 |
| }, |
| { |
| "epoch": 2.281035795887281, |
| "grad_norm": 1.2247021198272705, |
| "learning_rate": 3.589516790236879e-06, |
| "loss": 1.1392, |
| "step": 999 |
| }, |
| { |
| "epoch": 2.2833206397562833, |
| "grad_norm": 1.1454575061798096, |
| "learning_rate": 3.586713051195378e-06, |
| "loss": 1.1473, |
| "step": 1000 |
| }, |
| { |
| "epoch": 2.2856054836252855, |
| "grad_norm": 1.1719251871109009, |
| "learning_rate": 3.583907625894384e-06, |
| "loss": 1.1642, |
| "step": 1001 |
| }, |
| { |
| "epoch": 2.2878903274942877, |
| "grad_norm": 1.171013355255127, |
| "learning_rate": 3.5811005186870927e-06, |
| "loss": 1.1811, |
| "step": 1002 |
| }, |
| { |
| "epoch": 2.2901751713632903, |
| "grad_norm": 1.1246569156646729, |
| "learning_rate": 3.578291733929311e-06, |
| "loss": 1.1172, |
| "step": 1003 |
| }, |
| { |
| "epoch": 2.2924600152322925, |
| "grad_norm": 1.1219955682754517, |
| "learning_rate": 3.5754812759794465e-06, |
| "loss": 1.1369, |
| "step": 1004 |
| }, |
| { |
| "epoch": 2.2947448591012947, |
| "grad_norm": 1.1373074054718018, |
| "learning_rate": 3.572669149198506e-06, |
| "loss": 1.1703, |
| "step": 1005 |
| }, |
| { |
| "epoch": 2.297029702970297, |
| "grad_norm": 1.204938530921936, |
| "learning_rate": 3.569855357950084e-06, |
| "loss": 1.1089, |
| "step": 1006 |
| }, |
| { |
| "epoch": 2.2993145468392995, |
| "grad_norm": 1.1538746356964111, |
| "learning_rate": 3.567039906600357e-06, |
| "loss": 1.1508, |
| "step": 1007 |
| }, |
| { |
| "epoch": 2.3015993907083017, |
| "grad_norm": 1.135554313659668, |
| "learning_rate": 3.5642227995180787e-06, |
| "loss": 1.1507, |
| "step": 1008 |
| }, |
| { |
| "epoch": 2.303884234577304, |
| "grad_norm": 1.1385624408721924, |
| "learning_rate": 3.5614040410745737e-06, |
| "loss": 1.1578, |
| "step": 1009 |
| }, |
| { |
| "epoch": 2.306169078446306, |
| "grad_norm": 1.144099473953247, |
| "learning_rate": 3.5585836356437266e-06, |
| "loss": 1.1549, |
| "step": 1010 |
| }, |
| { |
| "epoch": 2.3084539223153087, |
| "grad_norm": 1.1651406288146973, |
| "learning_rate": 3.555761587601976e-06, |
| "loss": 1.1613, |
| "step": 1011 |
| }, |
| { |
| "epoch": 2.310738766184311, |
| "grad_norm": 1.1579418182373047, |
| "learning_rate": 3.552937901328315e-06, |
| "loss": 1.1261, |
| "step": 1012 |
| }, |
| { |
| "epoch": 2.313023610053313, |
| "grad_norm": 1.2125619649887085, |
| "learning_rate": 3.550112581204273e-06, |
| "loss": 1.1778, |
| "step": 1013 |
| }, |
| { |
| "epoch": 2.315308453922315, |
| "grad_norm": 1.2006595134735107, |
| "learning_rate": 3.5472856316139193e-06, |
| "loss": 1.1755, |
| "step": 1014 |
| }, |
| { |
| "epoch": 2.3175932977913174, |
| "grad_norm": 1.1417663097381592, |
| "learning_rate": 3.5444570569438465e-06, |
| "loss": 1.1604, |
| "step": 1015 |
| }, |
| { |
| "epoch": 2.31987814166032, |
| "grad_norm": 1.1562373638153076, |
| "learning_rate": 3.5416268615831737e-06, |
| "loss": 1.17, |
| "step": 1016 |
| }, |
| { |
| "epoch": 2.322162985529322, |
| "grad_norm": 1.4638527631759644, |
| "learning_rate": 3.5387950499235323e-06, |
| "loss": 1.159, |
| "step": 1017 |
| }, |
| { |
| "epoch": 2.3244478293983244, |
| "grad_norm": 1.1422604322433472, |
| "learning_rate": 3.5359616263590637e-06, |
| "loss": 1.1391, |
| "step": 1018 |
| }, |
| { |
| "epoch": 2.3267326732673266, |
| "grad_norm": 1.1229259967803955, |
| "learning_rate": 3.5331265952864065e-06, |
| "loss": 1.2002, |
| "step": 1019 |
| }, |
| { |
| "epoch": 2.329017517136329, |
| "grad_norm": 1.1740734577178955, |
| "learning_rate": 3.530289961104698e-06, |
| "loss": 1.1347, |
| "step": 1020 |
| }, |
| { |
| "epoch": 2.3313023610053314, |
| "grad_norm": 1.1417142152786255, |
| "learning_rate": 3.527451728215561e-06, |
| "loss": 1.1979, |
| "step": 1021 |
| }, |
| { |
| "epoch": 2.3335872048743336, |
| "grad_norm": 1.1507670879364014, |
| "learning_rate": 3.5246119010230994e-06, |
| "loss": 1.1522, |
| "step": 1022 |
| }, |
| { |
| "epoch": 2.3358720487433358, |
| "grad_norm": 1.1694669723510742, |
| "learning_rate": 3.521770483933891e-06, |
| "loss": 1.1215, |
| "step": 1023 |
| }, |
| { |
| "epoch": 2.3381568926123384, |
| "grad_norm": 1.134131669998169, |
| "learning_rate": 3.5189274813569807e-06, |
| "loss": 1.1648, |
| "step": 1024 |
| }, |
| { |
| "epoch": 2.3404417364813406, |
| "grad_norm": 1.144411325454712, |
| "learning_rate": 3.516082897703873e-06, |
| "loss": 1.1526, |
| "step": 1025 |
| }, |
| { |
| "epoch": 2.3427265803503428, |
| "grad_norm": 1.1161473989486694, |
| "learning_rate": 3.5132367373885267e-06, |
| "loss": 1.1495, |
| "step": 1026 |
| }, |
| { |
| "epoch": 2.345011424219345, |
| "grad_norm": 1.143911600112915, |
| "learning_rate": 3.5103890048273464e-06, |
| "loss": 1.1724, |
| "step": 1027 |
| }, |
| { |
| "epoch": 2.347296268088347, |
| "grad_norm": 1.2014847993850708, |
| "learning_rate": 3.507539704439177e-06, |
| "loss": 1.1622, |
| "step": 1028 |
| }, |
| { |
| "epoch": 2.3495811119573498, |
| "grad_norm": 1.2054967880249023, |
| "learning_rate": 3.5046888406452966e-06, |
| "loss": 1.1539, |
| "step": 1029 |
| }, |
| { |
| "epoch": 2.351865955826352, |
| "grad_norm": 1.1581631898880005, |
| "learning_rate": 3.5018364178694077e-06, |
| "loss": 1.16, |
| "step": 1030 |
| }, |
| { |
| "epoch": 2.354150799695354, |
| "grad_norm": 1.1760412454605103, |
| "learning_rate": 3.4989824405376314e-06, |
| "loss": 1.1736, |
| "step": 1031 |
| }, |
| { |
| "epoch": 2.3564356435643563, |
| "grad_norm": 1.1309703588485718, |
| "learning_rate": 3.4961269130785047e-06, |
| "loss": 1.166, |
| "step": 1032 |
| }, |
| { |
| "epoch": 2.3587204874333585, |
| "grad_norm": 1.181168556213379, |
| "learning_rate": 3.493269839922967e-06, |
| "loss": 1.1701, |
| "step": 1033 |
| }, |
| { |
| "epoch": 2.361005331302361, |
| "grad_norm": 1.1288000345230103, |
| "learning_rate": 3.490411225504355e-06, |
| "loss": 1.1546, |
| "step": 1034 |
| }, |
| { |
| "epoch": 2.3632901751713633, |
| "grad_norm": 1.1293566226959229, |
| "learning_rate": 3.4875510742584006e-06, |
| "loss": 1.1389, |
| "step": 1035 |
| }, |
| { |
| "epoch": 2.3655750190403655, |
| "grad_norm": 1.161147952079773, |
| "learning_rate": 3.484689390623218e-06, |
| "loss": 1.2149, |
| "step": 1036 |
| }, |
| { |
| "epoch": 2.3678598629093677, |
| "grad_norm": 1.1328603029251099, |
| "learning_rate": 3.4818261790393e-06, |
| "loss": 1.1373, |
| "step": 1037 |
| }, |
| { |
| "epoch": 2.3701447067783703, |
| "grad_norm": 1.14400315284729, |
| "learning_rate": 3.478961443949509e-06, |
| "loss": 1.1547, |
| "step": 1038 |
| }, |
| { |
| "epoch": 2.3724295506473725, |
| "grad_norm": 1.1662416458129883, |
| "learning_rate": 3.4760951897990734e-06, |
| "loss": 1.1395, |
| "step": 1039 |
| }, |
| { |
| "epoch": 2.3747143945163747, |
| "grad_norm": 1.3636192083358765, |
| "learning_rate": 3.473227421035578e-06, |
| "loss": 1.1528, |
| "step": 1040 |
| }, |
| { |
| "epoch": 2.376999238385377, |
| "grad_norm": 1.1440818309783936, |
| "learning_rate": 3.4703581421089566e-06, |
| "loss": 1.1655, |
| "step": 1041 |
| }, |
| { |
| "epoch": 2.3792840822543795, |
| "grad_norm": 1.2090609073638916, |
| "learning_rate": 3.4674873574714886e-06, |
| "loss": 1.0997, |
| "step": 1042 |
| }, |
| { |
| "epoch": 2.3815689261233817, |
| "grad_norm": 1.1551830768585205, |
| "learning_rate": 3.464615071577788e-06, |
| "loss": 1.1652, |
| "step": 1043 |
| }, |
| { |
| "epoch": 2.383853769992384, |
| "grad_norm": 1.1676479578018188, |
| "learning_rate": 3.4617412888847984e-06, |
| "loss": 1.1596, |
| "step": 1044 |
| }, |
| { |
| "epoch": 2.386138613861386, |
| "grad_norm": 1.1927589178085327, |
| "learning_rate": 3.458866013851788e-06, |
| "loss": 1.1462, |
| "step": 1045 |
| }, |
| { |
| "epoch": 2.388423457730388, |
| "grad_norm": 1.2201225757598877, |
| "learning_rate": 3.455989250940338e-06, |
| "loss": 1.1703, |
| "step": 1046 |
| }, |
| { |
| "epoch": 2.390708301599391, |
| "grad_norm": 1.1762723922729492, |
| "learning_rate": 3.45311100461434e-06, |
| "loss": 1.1706, |
| "step": 1047 |
| }, |
| { |
| "epoch": 2.392993145468393, |
| "grad_norm": 1.163713812828064, |
| "learning_rate": 3.4502312793399873e-06, |
| "loss": 1.1208, |
| "step": 1048 |
| }, |
| { |
| "epoch": 2.395277989337395, |
| "grad_norm": 1.3621329069137573, |
| "learning_rate": 3.4473500795857674e-06, |
| "loss": 1.1493, |
| "step": 1049 |
| }, |
| { |
| "epoch": 2.3975628332063974, |
| "grad_norm": 1.1427867412567139, |
| "learning_rate": 3.4444674098224555e-06, |
| "loss": 1.1454, |
| "step": 1050 |
| }, |
| { |
| "epoch": 2.3998476770754, |
| "grad_norm": 1.1881464719772339, |
| "learning_rate": 3.4415832745231092e-06, |
| "loss": 1.1094, |
| "step": 1051 |
| }, |
| { |
| "epoch": 2.402132520944402, |
| "grad_norm": 1.134605050086975, |
| "learning_rate": 3.4386976781630594e-06, |
| "loss": 1.1676, |
| "step": 1052 |
| }, |
| { |
| "epoch": 2.4044173648134044, |
| "grad_norm": 1.1633696556091309, |
| "learning_rate": 3.4358106252199043e-06, |
| "loss": 1.1258, |
| "step": 1053 |
| }, |
| { |
| "epoch": 2.4067022086824066, |
| "grad_norm": 1.1574831008911133, |
| "learning_rate": 3.4329221201735015e-06, |
| "loss": 1.1499, |
| "step": 1054 |
| }, |
| { |
| "epoch": 2.408987052551409, |
| "grad_norm": 1.169659972190857, |
| "learning_rate": 3.430032167505962e-06, |
| "loss": 1.1197, |
| "step": 1055 |
| }, |
| { |
| "epoch": 2.4112718964204114, |
| "grad_norm": 1.191874623298645, |
| "learning_rate": 3.4271407717016456e-06, |
| "loss": 1.1673, |
| "step": 1056 |
| }, |
| { |
| "epoch": 2.4135567402894136, |
| "grad_norm": 1.1627485752105713, |
| "learning_rate": 3.424247937247148e-06, |
| "loss": 1.1634, |
| "step": 1057 |
| }, |
| { |
| "epoch": 2.4158415841584158, |
| "grad_norm": 1.1143652200698853, |
| "learning_rate": 3.421353668631299e-06, |
| "loss": 1.1509, |
| "step": 1058 |
| }, |
| { |
| "epoch": 2.418126428027418, |
| "grad_norm": 1.1409255266189575, |
| "learning_rate": 3.418457970345153e-06, |
| "loss": 1.1155, |
| "step": 1059 |
| }, |
| { |
| "epoch": 2.4204112718964206, |
| "grad_norm": 1.1744626760482788, |
| "learning_rate": 3.415560846881984e-06, |
| "loss": 1.176, |
| "step": 1060 |
| }, |
| { |
| "epoch": 2.4226961157654228, |
| "grad_norm": 1.1737017631530762, |
| "learning_rate": 3.4126623027372763e-06, |
| "loss": 1.1526, |
| "step": 1061 |
| }, |
| { |
| "epoch": 2.424980959634425, |
| "grad_norm": 1.1165390014648438, |
| "learning_rate": 3.4097623424087196e-06, |
| "loss": 1.1041, |
| "step": 1062 |
| }, |
| { |
| "epoch": 2.427265803503427, |
| "grad_norm": 1.1141180992126465, |
| "learning_rate": 3.4068609703961997e-06, |
| "loss": 1.153, |
| "step": 1063 |
| }, |
| { |
| "epoch": 2.4295506473724293, |
| "grad_norm": 1.193962574005127, |
| "learning_rate": 3.4039581912017946e-06, |
| "loss": 1.1109, |
| "step": 1064 |
| }, |
| { |
| "epoch": 2.431835491241432, |
| "grad_norm": 1.154571533203125, |
| "learning_rate": 3.401054009329765e-06, |
| "loss": 1.1416, |
| "step": 1065 |
| }, |
| { |
| "epoch": 2.434120335110434, |
| "grad_norm": 1.2508164644241333, |
| "learning_rate": 3.398148429286547e-06, |
| "loss": 1.122, |
| "step": 1066 |
| }, |
| { |
| "epoch": 2.4364051789794363, |
| "grad_norm": 1.1385433673858643, |
| "learning_rate": 3.3952414555807493e-06, |
| "loss": 1.167, |
| "step": 1067 |
| }, |
| { |
| "epoch": 2.4386900228484385, |
| "grad_norm": 1.1891573667526245, |
| "learning_rate": 3.392333092723141e-06, |
| "loss": 1.1732, |
| "step": 1068 |
| }, |
| { |
| "epoch": 2.440974866717441, |
| "grad_norm": 1.230272889137268, |
| "learning_rate": 3.389423345226647e-06, |
| "loss": 1.1714, |
| "step": 1069 |
| }, |
| { |
| "epoch": 2.4432597105864433, |
| "grad_norm": 1.1934008598327637, |
| "learning_rate": 3.386512217606339e-06, |
| "loss": 1.1613, |
| "step": 1070 |
| }, |
| { |
| "epoch": 2.4455445544554455, |
| "grad_norm": 1.1615201234817505, |
| "learning_rate": 3.383599714379435e-06, |
| "loss": 1.1551, |
| "step": 1071 |
| }, |
| { |
| "epoch": 2.4478293983244477, |
| "grad_norm": 1.1495729684829712, |
| "learning_rate": 3.3806858400652825e-06, |
| "loss": 1.1311, |
| "step": 1072 |
| }, |
| { |
| "epoch": 2.4501142421934503, |
| "grad_norm": 1.1871975660324097, |
| "learning_rate": 3.37777059918536e-06, |
| "loss": 1.1294, |
| "step": 1073 |
| }, |
| { |
| "epoch": 2.4523990860624525, |
| "grad_norm": 1.1563098430633545, |
| "learning_rate": 3.374853996263264e-06, |
| "loss": 1.1412, |
| "step": 1074 |
| }, |
| { |
| "epoch": 2.4546839299314547, |
| "grad_norm": 1.27406644821167, |
| "learning_rate": 3.3719360358247054e-06, |
| "loss": 1.1515, |
| "step": 1075 |
| }, |
| { |
| "epoch": 2.456968773800457, |
| "grad_norm": 1.227537751197815, |
| "learning_rate": 3.369016722397504e-06, |
| "loss": 1.1525, |
| "step": 1076 |
| }, |
| { |
| "epoch": 2.459253617669459, |
| "grad_norm": 1.1642208099365234, |
| "learning_rate": 3.366096060511575e-06, |
| "loss": 1.1476, |
| "step": 1077 |
| }, |
| { |
| "epoch": 2.4615384615384617, |
| "grad_norm": 1.1597189903259277, |
| "learning_rate": 3.363174054698928e-06, |
| "loss": 1.1567, |
| "step": 1078 |
| }, |
| { |
| "epoch": 2.463823305407464, |
| "grad_norm": 1.2041665315628052, |
| "learning_rate": 3.3602507094936576e-06, |
| "loss": 1.1265, |
| "step": 1079 |
| }, |
| { |
| "epoch": 2.466108149276466, |
| "grad_norm": 1.2899173498153687, |
| "learning_rate": 3.357326029431939e-06, |
| "loss": 1.157, |
| "step": 1080 |
| }, |
| { |
| "epoch": 2.4683929931454682, |
| "grad_norm": 1.2047252655029297, |
| "learning_rate": 3.3544000190520144e-06, |
| "loss": 1.1466, |
| "step": 1081 |
| }, |
| { |
| "epoch": 2.470677837014471, |
| "grad_norm": 1.2135494947433472, |
| "learning_rate": 3.351472682894193e-06, |
| "loss": 1.1474, |
| "step": 1082 |
| }, |
| { |
| "epoch": 2.472962680883473, |
| "grad_norm": 1.1497875452041626, |
| "learning_rate": 3.348544025500841e-06, |
| "loss": 1.0876, |
| "step": 1083 |
| }, |
| { |
| "epoch": 2.4752475247524752, |
| "grad_norm": 1.260158658027649, |
| "learning_rate": 3.3456140514163756e-06, |
| "loss": 1.1698, |
| "step": 1084 |
| }, |
| { |
| "epoch": 2.4775323686214774, |
| "grad_norm": 1.2946076393127441, |
| "learning_rate": 3.342682765187254e-06, |
| "loss": 1.1504, |
| "step": 1085 |
| }, |
| { |
| "epoch": 2.47981721249048, |
| "grad_norm": 1.2317827939987183, |
| "learning_rate": 3.3397501713619736e-06, |
| "loss": 1.121, |
| "step": 1086 |
| }, |
| { |
| "epoch": 2.4821020563594822, |
| "grad_norm": 1.1697484254837036, |
| "learning_rate": 3.336816274491057e-06, |
| "loss": 1.185, |
| "step": 1087 |
| }, |
| { |
| "epoch": 2.4843869002284844, |
| "grad_norm": 1.1670335531234741, |
| "learning_rate": 3.333881079127052e-06, |
| "loss": 1.1294, |
| "step": 1088 |
| }, |
| { |
| "epoch": 2.4866717440974866, |
| "grad_norm": 1.2509000301361084, |
| "learning_rate": 3.3309445898245184e-06, |
| "loss": 1.178, |
| "step": 1089 |
| }, |
| { |
| "epoch": 2.4889565879664888, |
| "grad_norm": 1.178087592124939, |
| "learning_rate": 3.328006811140026e-06, |
| "loss": 1.1516, |
| "step": 1090 |
| }, |
| { |
| "epoch": 2.4912414318354914, |
| "grad_norm": 1.2256996631622314, |
| "learning_rate": 3.3250677476321442e-06, |
| "loss": 1.1598, |
| "step": 1091 |
| }, |
| { |
| "epoch": 2.4935262757044936, |
| "grad_norm": 1.1501520872116089, |
| "learning_rate": 3.322127403861437e-06, |
| "loss": 1.1678, |
| "step": 1092 |
| }, |
| { |
| "epoch": 2.4958111195734958, |
| "grad_norm": 1.2015535831451416, |
| "learning_rate": 3.319185784390453e-06, |
| "loss": 1.159, |
| "step": 1093 |
| }, |
| { |
| "epoch": 2.498095963442498, |
| "grad_norm": 1.203966736793518, |
| "learning_rate": 3.3162428937837233e-06, |
| "loss": 1.1335, |
| "step": 1094 |
| }, |
| { |
| "epoch": 2.5003808073115, |
| "grad_norm": 1.2212433815002441, |
| "learning_rate": 3.313298736607748e-06, |
| "loss": 1.1801, |
| "step": 1095 |
| }, |
| { |
| "epoch": 2.5026656511805028, |
| "grad_norm": 1.222063422203064, |
| "learning_rate": 3.3103533174309967e-06, |
| "loss": 1.1536, |
| "step": 1096 |
| }, |
| { |
| "epoch": 2.504950495049505, |
| "grad_norm": 1.1464064121246338, |
| "learning_rate": 3.3074066408238927e-06, |
| "loss": 1.0955, |
| "step": 1097 |
| }, |
| { |
| "epoch": 2.507235338918507, |
| "grad_norm": 1.2135506868362427, |
| "learning_rate": 3.3044587113588134e-06, |
| "loss": 1.1545, |
| "step": 1098 |
| }, |
| { |
| "epoch": 2.5095201827875098, |
| "grad_norm": 1.208592414855957, |
| "learning_rate": 3.3015095336100795e-06, |
| "loss": 1.1504, |
| "step": 1099 |
| }, |
| { |
| "epoch": 2.511805026656512, |
| "grad_norm": 1.119957685470581, |
| "learning_rate": 3.2985591121539495e-06, |
| "loss": 1.1504, |
| "step": 1100 |
| }, |
| { |
| "epoch": 2.514089870525514, |
| "grad_norm": 1.1869513988494873, |
| "learning_rate": 3.2956074515686105e-06, |
| "loss": 1.1694, |
| "step": 1101 |
| }, |
| { |
| "epoch": 2.5163747143945163, |
| "grad_norm": 1.1720670461654663, |
| "learning_rate": 3.2926545564341715e-06, |
| "loss": 1.1518, |
| "step": 1102 |
| }, |
| { |
| "epoch": 2.5186595582635185, |
| "grad_norm": 1.1721378564834595, |
| "learning_rate": 3.2897004313326608e-06, |
| "loss": 1.1388, |
| "step": 1103 |
| }, |
| { |
| "epoch": 2.520944402132521, |
| "grad_norm": 1.1609101295471191, |
| "learning_rate": 3.2867450808480115e-06, |
| "loss": 1.1309, |
| "step": 1104 |
| }, |
| { |
| "epoch": 2.5232292460015233, |
| "grad_norm": 1.1172316074371338, |
| "learning_rate": 3.2837885095660598e-06, |
| "loss": 1.1626, |
| "step": 1105 |
| }, |
| { |
| "epoch": 2.5255140898705255, |
| "grad_norm": 1.2131626605987549, |
| "learning_rate": 3.280830722074536e-06, |
| "loss": 1.1458, |
| "step": 1106 |
| }, |
| { |
| "epoch": 2.5277989337395277, |
| "grad_norm": 1.2087211608886719, |
| "learning_rate": 3.2778717229630584e-06, |
| "loss": 1.1665, |
| "step": 1107 |
| }, |
| { |
| "epoch": 2.53008377760853, |
| "grad_norm": 1.171183705329895, |
| "learning_rate": 3.2749115168231238e-06, |
| "loss": 1.1922, |
| "step": 1108 |
| }, |
| { |
| "epoch": 2.5323686214775325, |
| "grad_norm": 1.1399853229522705, |
| "learning_rate": 3.271950108248102e-06, |
| "loss": 1.1292, |
| "step": 1109 |
| }, |
| { |
| "epoch": 2.5346534653465347, |
| "grad_norm": 1.1595377922058105, |
| "learning_rate": 3.268987501833231e-06, |
| "loss": 1.1481, |
| "step": 1110 |
| }, |
| { |
| "epoch": 2.536938309215537, |
| "grad_norm": 1.2080212831497192, |
| "learning_rate": 3.2660237021756047e-06, |
| "loss": 1.17, |
| "step": 1111 |
| }, |
| { |
| "epoch": 2.5392231530845395, |
| "grad_norm": 1.150943398475647, |
| "learning_rate": 3.26305871387417e-06, |
| "loss": 1.1193, |
| "step": 1112 |
| }, |
| { |
| "epoch": 2.5415079969535412, |
| "grad_norm": 1.1977055072784424, |
| "learning_rate": 3.260092541529718e-06, |
| "loss": 1.1656, |
| "step": 1113 |
| }, |
| { |
| "epoch": 2.543792840822544, |
| "grad_norm": 1.1943557262420654, |
| "learning_rate": 3.257125189744877e-06, |
| "loss": 1.1436, |
| "step": 1114 |
| }, |
| { |
| "epoch": 2.546077684691546, |
| "grad_norm": 1.1852405071258545, |
| "learning_rate": 3.254156663124106e-06, |
| "loss": 1.1743, |
| "step": 1115 |
| }, |
| { |
| "epoch": 2.5483625285605482, |
| "grad_norm": 1.1327954530715942, |
| "learning_rate": 3.2511869662736855e-06, |
| "loss": 1.1275, |
| "step": 1116 |
| }, |
| { |
| "epoch": 2.550647372429551, |
| "grad_norm": 1.1748192310333252, |
| "learning_rate": 3.248216103801713e-06, |
| "loss": 1.1277, |
| "step": 1117 |
| }, |
| { |
| "epoch": 2.552932216298553, |
| "grad_norm": 1.2016124725341797, |
| "learning_rate": 3.2452440803180953e-06, |
| "loss": 1.1692, |
| "step": 1118 |
| }, |
| { |
| "epoch": 2.5552170601675552, |
| "grad_norm": 1.1545820236206055, |
| "learning_rate": 3.24227090043454e-06, |
| "loss": 1.1335, |
| "step": 1119 |
| }, |
| { |
| "epoch": 2.5575019040365574, |
| "grad_norm": 1.168172836303711, |
| "learning_rate": 3.239296568764547e-06, |
| "loss": 1.1515, |
| "step": 1120 |
| }, |
| { |
| "epoch": 2.5597867479055596, |
| "grad_norm": 1.1570290327072144, |
| "learning_rate": 3.236321089923408e-06, |
| "loss": 1.1921, |
| "step": 1121 |
| }, |
| { |
| "epoch": 2.5620715917745622, |
| "grad_norm": 1.1722872257232666, |
| "learning_rate": 3.233344468528192e-06, |
| "loss": 1.1842, |
| "step": 1122 |
| }, |
| { |
| "epoch": 2.5643564356435644, |
| "grad_norm": 1.2346643209457397, |
| "learning_rate": 3.2303667091977397e-06, |
| "loss": 1.1987, |
| "step": 1123 |
| }, |
| { |
| "epoch": 2.5666412795125666, |
| "grad_norm": 1.1846752166748047, |
| "learning_rate": 3.2273878165526603e-06, |
| "loss": 1.1672, |
| "step": 1124 |
| }, |
| { |
| "epoch": 2.568926123381569, |
| "grad_norm": 1.1800742149353027, |
| "learning_rate": 3.224407795215319e-06, |
| "loss": 1.1405, |
| "step": 1125 |
| }, |
| { |
| "epoch": 2.571210967250571, |
| "grad_norm": 1.2667362689971924, |
| "learning_rate": 3.2214266498098357e-06, |
| "loss": 1.097, |
| "step": 1126 |
| }, |
| { |
| "epoch": 2.5734958111195736, |
| "grad_norm": 1.1848291158676147, |
| "learning_rate": 3.218444384962071e-06, |
| "loss": 1.1309, |
| "step": 1127 |
| }, |
| { |
| "epoch": 2.575780654988576, |
| "grad_norm": 1.2490592002868652, |
| "learning_rate": 3.215461005299624e-06, |
| "loss": 1.1677, |
| "step": 1128 |
| }, |
| { |
| "epoch": 2.578065498857578, |
| "grad_norm": 1.1780728101730347, |
| "learning_rate": 3.2124765154518245e-06, |
| "loss": 1.1438, |
| "step": 1129 |
| }, |
| { |
| "epoch": 2.5803503427265806, |
| "grad_norm": 1.1854690313339233, |
| "learning_rate": 3.209490920049724e-06, |
| "loss": 1.1854, |
| "step": 1130 |
| }, |
| { |
| "epoch": 2.5826351865955828, |
| "grad_norm": 1.1640013456344604, |
| "learning_rate": 3.2065042237260897e-06, |
| "loss": 1.1421, |
| "step": 1131 |
| }, |
| { |
| "epoch": 2.584920030464585, |
| "grad_norm": 1.1875327825546265, |
| "learning_rate": 3.2035164311153967e-06, |
| "loss": 1.1617, |
| "step": 1132 |
| }, |
| { |
| "epoch": 2.587204874333587, |
| "grad_norm": 1.188707709312439, |
| "learning_rate": 3.200527546853822e-06, |
| "loss": 1.1618, |
| "step": 1133 |
| }, |
| { |
| "epoch": 2.5894897182025893, |
| "grad_norm": 1.1984432935714722, |
| "learning_rate": 3.1975375755792358e-06, |
| "loss": 1.1647, |
| "step": 1134 |
| }, |
| { |
| "epoch": 2.591774562071592, |
| "grad_norm": 1.2000738382339478, |
| "learning_rate": 3.1945465219311964e-06, |
| "loss": 1.1555, |
| "step": 1135 |
| }, |
| { |
| "epoch": 2.594059405940594, |
| "grad_norm": 1.1691455841064453, |
| "learning_rate": 3.19155439055094e-06, |
| "loss": 1.1337, |
| "step": 1136 |
| }, |
| { |
| "epoch": 2.5963442498095963, |
| "grad_norm": 1.3328726291656494, |
| "learning_rate": 3.1885611860813747e-06, |
| "loss": 1.1662, |
| "step": 1137 |
| }, |
| { |
| "epoch": 2.5986290936785985, |
| "grad_norm": 1.159832239151001, |
| "learning_rate": 3.185566913167076e-06, |
| "loss": 1.1231, |
| "step": 1138 |
| }, |
| { |
| "epoch": 2.6009139375476007, |
| "grad_norm": 1.139963984489441, |
| "learning_rate": 3.1825715764542765e-06, |
| "loss": 1.1348, |
| "step": 1139 |
| }, |
| { |
| "epoch": 2.6031987814166033, |
| "grad_norm": 1.1485956907272339, |
| "learning_rate": 3.1795751805908578e-06, |
| "loss": 1.1157, |
| "step": 1140 |
| }, |
| { |
| "epoch": 2.6054836252856055, |
| "grad_norm": 1.1575555801391602, |
| "learning_rate": 3.1765777302263464e-06, |
| "loss": 1.1738, |
| "step": 1141 |
| }, |
| { |
| "epoch": 2.6077684691546077, |
| "grad_norm": 1.1840925216674805, |
| "learning_rate": 3.173579230011905e-06, |
| "loss": 1.1345, |
| "step": 1142 |
| }, |
| { |
| "epoch": 2.6100533130236103, |
| "grad_norm": 1.144516944885254, |
| "learning_rate": 3.1705796846003267e-06, |
| "loss": 1.1219, |
| "step": 1143 |
| }, |
| { |
| "epoch": 2.612338156892612, |
| "grad_norm": 1.1362788677215576, |
| "learning_rate": 3.1675790986460233e-06, |
| "loss": 1.1382, |
| "step": 1144 |
| }, |
| { |
| "epoch": 2.6146230007616147, |
| "grad_norm": 1.2047488689422607, |
| "learning_rate": 3.1645774768050224e-06, |
| "loss": 1.1427, |
| "step": 1145 |
| }, |
| { |
| "epoch": 2.616907844630617, |
| "grad_norm": 1.1238912343978882, |
| "learning_rate": 3.1615748237349626e-06, |
| "loss": 1.1298, |
| "step": 1146 |
| }, |
| { |
| "epoch": 2.619192688499619, |
| "grad_norm": 1.1865835189819336, |
| "learning_rate": 3.158571144095076e-06, |
| "loss": 1.1537, |
| "step": 1147 |
| }, |
| { |
| "epoch": 2.6214775323686217, |
| "grad_norm": 1.1614654064178467, |
| "learning_rate": 3.155566442546194e-06, |
| "loss": 1.1608, |
| "step": 1148 |
| }, |
| { |
| "epoch": 2.623762376237624, |
| "grad_norm": 1.1808875799179077, |
| "learning_rate": 3.1525607237507296e-06, |
| "loss": 1.1168, |
| "step": 1149 |
| }, |
| { |
| "epoch": 2.626047220106626, |
| "grad_norm": 1.2047436237335205, |
| "learning_rate": 3.1495539923726757e-06, |
| "loss": 1.1402, |
| "step": 1150 |
| }, |
| { |
| "epoch": 2.6283320639756282, |
| "grad_norm": 1.190082311630249, |
| "learning_rate": 3.146546253077597e-06, |
| "loss": 1.1257, |
| "step": 1151 |
| }, |
| { |
| "epoch": 2.6306169078446304, |
| "grad_norm": 1.1529537439346313, |
| "learning_rate": 3.1435375105326198e-06, |
| "loss": 1.1332, |
| "step": 1152 |
| }, |
| { |
| "epoch": 2.632901751713633, |
| "grad_norm": 1.1505589485168457, |
| "learning_rate": 3.1405277694064306e-06, |
| "loss": 1.1654, |
| "step": 1153 |
| }, |
| { |
| "epoch": 2.6351865955826352, |
| "grad_norm": 1.1244786977767944, |
| "learning_rate": 3.1375170343692642e-06, |
| "loss": 1.1625, |
| "step": 1154 |
| }, |
| { |
| "epoch": 2.6374714394516374, |
| "grad_norm": 1.2105226516723633, |
| "learning_rate": 3.134505310092895e-06, |
| "loss": 1.1743, |
| "step": 1155 |
| }, |
| { |
| "epoch": 2.6397562833206396, |
| "grad_norm": 1.1880476474761963, |
| "learning_rate": 3.131492601250636e-06, |
| "loss": 1.1119, |
| "step": 1156 |
| }, |
| { |
| "epoch": 2.642041127189642, |
| "grad_norm": 1.1480690240859985, |
| "learning_rate": 3.1284789125173257e-06, |
| "loss": 1.128, |
| "step": 1157 |
| }, |
| { |
| "epoch": 2.6443259710586444, |
| "grad_norm": 1.174415111541748, |
| "learning_rate": 3.1254642485693255e-06, |
| "loss": 1.1545, |
| "step": 1158 |
| }, |
| { |
| "epoch": 2.6466108149276466, |
| "grad_norm": 1.1595776081085205, |
| "learning_rate": 3.1224486140845063e-06, |
| "loss": 1.1502, |
| "step": 1159 |
| }, |
| { |
| "epoch": 2.648895658796649, |
| "grad_norm": 1.1877551078796387, |
| "learning_rate": 3.1194320137422483e-06, |
| "loss": 1.1487, |
| "step": 1160 |
| }, |
| { |
| "epoch": 2.6511805026656514, |
| "grad_norm": 1.1842609643936157, |
| "learning_rate": 3.116414452223429e-06, |
| "loss": 1.1329, |
| "step": 1161 |
| }, |
| { |
| "epoch": 2.6534653465346536, |
| "grad_norm": 1.1747876405715942, |
| "learning_rate": 3.1133959342104186e-06, |
| "loss": 1.138, |
| "step": 1162 |
| }, |
| { |
| "epoch": 2.655750190403656, |
| "grad_norm": 1.1728745698928833, |
| "learning_rate": 3.110376464387069e-06, |
| "loss": 1.1593, |
| "step": 1163 |
| }, |
| { |
| "epoch": 2.658035034272658, |
| "grad_norm": 1.1547205448150635, |
| "learning_rate": 3.1073560474387114e-06, |
| "loss": 1.1244, |
| "step": 1164 |
| }, |
| { |
| "epoch": 2.66031987814166, |
| "grad_norm": 1.1600390672683716, |
| "learning_rate": 3.1043346880521456e-06, |
| "loss": 1.1664, |
| "step": 1165 |
| }, |
| { |
| "epoch": 2.662604722010663, |
| "grad_norm": 1.1489025354385376, |
| "learning_rate": 3.1013123909156347e-06, |
| "loss": 1.1869, |
| "step": 1166 |
| }, |
| { |
| "epoch": 2.664889565879665, |
| "grad_norm": 1.1556684970855713, |
| "learning_rate": 3.0982891607188948e-06, |
| "loss": 1.1408, |
| "step": 1167 |
| }, |
| { |
| "epoch": 2.667174409748667, |
| "grad_norm": 1.1727960109710693, |
| "learning_rate": 3.095265002153092e-06, |
| "loss": 1.1676, |
| "step": 1168 |
| }, |
| { |
| "epoch": 2.6694592536176693, |
| "grad_norm": 1.1960148811340332, |
| "learning_rate": 3.0922399199108326e-06, |
| "loss": 1.138, |
| "step": 1169 |
| }, |
| { |
| "epoch": 2.6717440974866715, |
| "grad_norm": 1.1238528490066528, |
| "learning_rate": 3.0892139186861563e-06, |
| "loss": 1.1308, |
| "step": 1170 |
| }, |
| { |
| "epoch": 2.674028941355674, |
| "grad_norm": 1.1751792430877686, |
| "learning_rate": 3.0861870031745266e-06, |
| "loss": 1.1518, |
| "step": 1171 |
| }, |
| { |
| "epoch": 2.6763137852246763, |
| "grad_norm": 1.1722700595855713, |
| "learning_rate": 3.0831591780728282e-06, |
| "loss": 1.0687, |
| "step": 1172 |
| }, |
| { |
| "epoch": 2.6785986290936785, |
| "grad_norm": 1.1473584175109863, |
| "learning_rate": 3.0801304480793563e-06, |
| "loss": 1.1313, |
| "step": 1173 |
| }, |
| { |
| "epoch": 2.680883472962681, |
| "grad_norm": 1.1742531061172485, |
| "learning_rate": 3.0771008178938112e-06, |
| "loss": 1.1293, |
| "step": 1174 |
| }, |
| { |
| "epoch": 2.6831683168316833, |
| "grad_norm": 1.1747832298278809, |
| "learning_rate": 3.074070292217288e-06, |
| "loss": 1.1334, |
| "step": 1175 |
| }, |
| { |
| "epoch": 2.6854531607006855, |
| "grad_norm": 1.1738357543945312, |
| "learning_rate": 3.0710388757522724e-06, |
| "loss": 1.1663, |
| "step": 1176 |
| }, |
| { |
| "epoch": 2.6877380045696877, |
| "grad_norm": 1.1576931476593018, |
| "learning_rate": 3.068006573202634e-06, |
| "loss": 1.1508, |
| "step": 1177 |
| }, |
| { |
| "epoch": 2.69002284843869, |
| "grad_norm": 1.1925418376922607, |
| "learning_rate": 3.0649733892736143e-06, |
| "loss": 1.135, |
| "step": 1178 |
| }, |
| { |
| "epoch": 2.6923076923076925, |
| "grad_norm": 1.1554484367370605, |
| "learning_rate": 3.061939328671824e-06, |
| "loss": 1.1328, |
| "step": 1179 |
| }, |
| { |
| "epoch": 2.6945925361766947, |
| "grad_norm": 1.153615117073059, |
| "learning_rate": 3.0589043961052344e-06, |
| "loss": 1.1356, |
| "step": 1180 |
| }, |
| { |
| "epoch": 2.696877380045697, |
| "grad_norm": 1.1935803890228271, |
| "learning_rate": 3.05586859628317e-06, |
| "loss": 1.1266, |
| "step": 1181 |
| }, |
| { |
| "epoch": 2.699162223914699, |
| "grad_norm": 1.1908665895462036, |
| "learning_rate": 3.0528319339163003e-06, |
| "loss": 1.1393, |
| "step": 1182 |
| }, |
| { |
| "epoch": 2.7014470677837013, |
| "grad_norm": 1.194982647895813, |
| "learning_rate": 3.0497944137166326e-06, |
| "loss": 1.1349, |
| "step": 1183 |
| }, |
| { |
| "epoch": 2.703731911652704, |
| "grad_norm": 1.1403220891952515, |
| "learning_rate": 3.0467560403975066e-06, |
| "loss": 1.1269, |
| "step": 1184 |
| }, |
| { |
| "epoch": 2.706016755521706, |
| "grad_norm": 1.1594319343566895, |
| "learning_rate": 3.043716818673586e-06, |
| "loss": 1.1316, |
| "step": 1185 |
| }, |
| { |
| "epoch": 2.7083015993907082, |
| "grad_norm": 1.1441702842712402, |
| "learning_rate": 3.0406767532608495e-06, |
| "loss": 1.1605, |
| "step": 1186 |
| }, |
| { |
| "epoch": 2.7105864432597104, |
| "grad_norm": 1.2065651416778564, |
| "learning_rate": 3.0376358488765863e-06, |
| "loss": 1.1708, |
| "step": 1187 |
| }, |
| { |
| "epoch": 2.7128712871287126, |
| "grad_norm": 1.1729774475097656, |
| "learning_rate": 3.034594110239386e-06, |
| "loss": 1.1563, |
| "step": 1188 |
| }, |
| { |
| "epoch": 2.7151561309977152, |
| "grad_norm": 1.1797168254852295, |
| "learning_rate": 3.0315515420691354e-06, |
| "loss": 1.1732, |
| "step": 1189 |
| }, |
| { |
| "epoch": 2.7174409748667174, |
| "grad_norm": 1.1579010486602783, |
| "learning_rate": 3.0285081490870057e-06, |
| "loss": 1.1375, |
| "step": 1190 |
| }, |
| { |
| "epoch": 2.7197258187357196, |
| "grad_norm": 1.1517558097839355, |
| "learning_rate": 3.0254639360154475e-06, |
| "loss": 1.1594, |
| "step": 1191 |
| }, |
| { |
| "epoch": 2.7220106626047222, |
| "grad_norm": 1.2115283012390137, |
| "learning_rate": 3.0224189075781886e-06, |
| "loss": 1.1334, |
| "step": 1192 |
| }, |
| { |
| "epoch": 2.7242955064737244, |
| "grad_norm": 1.1372671127319336, |
| "learning_rate": 3.0193730685002153e-06, |
| "loss": 1.2064, |
| "step": 1193 |
| }, |
| { |
| "epoch": 2.7265803503427266, |
| "grad_norm": 1.2300207614898682, |
| "learning_rate": 3.0163264235077777e-06, |
| "loss": 1.1767, |
| "step": 1194 |
| }, |
| { |
| "epoch": 2.728865194211729, |
| "grad_norm": 1.1949130296707153, |
| "learning_rate": 3.0132789773283734e-06, |
| "loss": 1.1474, |
| "step": 1195 |
| }, |
| { |
| "epoch": 2.731150038080731, |
| "grad_norm": 1.1411316394805908, |
| "learning_rate": 3.0102307346907442e-06, |
| "loss": 1.1565, |
| "step": 1196 |
| }, |
| { |
| "epoch": 2.7334348819497336, |
| "grad_norm": 1.153688669204712, |
| "learning_rate": 3.0071817003248667e-06, |
| "loss": 1.1368, |
| "step": 1197 |
| }, |
| { |
| "epoch": 2.735719725818736, |
| "grad_norm": 1.2030675411224365, |
| "learning_rate": 3.0041318789619465e-06, |
| "loss": 1.1567, |
| "step": 1198 |
| }, |
| { |
| "epoch": 2.738004569687738, |
| "grad_norm": 1.1593271493911743, |
| "learning_rate": 3.001081275334412e-06, |
| "loss": 1.161, |
| "step": 1199 |
| }, |
| { |
| "epoch": 2.74028941355674, |
| "grad_norm": 1.181596279144287, |
| "learning_rate": 2.9980298941759035e-06, |
| "loss": 1.122, |
| "step": 1200 |
| }, |
| { |
| "epoch": 2.7425742574257423, |
| "grad_norm": 1.1973023414611816, |
| "learning_rate": 2.9949777402212677e-06, |
| "loss": 1.157, |
| "step": 1201 |
| }, |
| { |
| "epoch": 2.744859101294745, |
| "grad_norm": 1.1745750904083252, |
| "learning_rate": 2.9919248182065512e-06, |
| "loss": 1.1843, |
| "step": 1202 |
| }, |
| { |
| "epoch": 2.747143945163747, |
| "grad_norm": 1.1595338582992554, |
| "learning_rate": 2.9888711328689933e-06, |
| "loss": 1.1741, |
| "step": 1203 |
| }, |
| { |
| "epoch": 2.7494287890327493, |
| "grad_norm": 1.165958046913147, |
| "learning_rate": 2.985816688947017e-06, |
| "loss": 1.1217, |
| "step": 1204 |
| }, |
| { |
| "epoch": 2.751713632901752, |
| "grad_norm": 1.1976659297943115, |
| "learning_rate": 2.9827614911802205e-06, |
| "loss": 1.1443, |
| "step": 1205 |
| }, |
| { |
| "epoch": 2.753998476770754, |
| "grad_norm": 1.1795979738235474, |
| "learning_rate": 2.9797055443093744e-06, |
| "loss": 1.1624, |
| "step": 1206 |
| }, |
| { |
| "epoch": 2.7562833206397563, |
| "grad_norm": 1.1748056411743164, |
| "learning_rate": 2.9766488530764105e-06, |
| "loss": 1.1715, |
| "step": 1207 |
| }, |
| { |
| "epoch": 2.7585681645087585, |
| "grad_norm": 1.191728115081787, |
| "learning_rate": 2.9735914222244165e-06, |
| "loss": 1.1472, |
| "step": 1208 |
| }, |
| { |
| "epoch": 2.7608530083777607, |
| "grad_norm": 1.168042540550232, |
| "learning_rate": 2.970533256497627e-06, |
| "loss": 1.1389, |
| "step": 1209 |
| }, |
| { |
| "epoch": 2.7631378522467633, |
| "grad_norm": 1.2145792245864868, |
| "learning_rate": 2.9674743606414163e-06, |
| "loss": 1.1975, |
| "step": 1210 |
| }, |
| { |
| "epoch": 2.7654226961157655, |
| "grad_norm": 1.1970194578170776, |
| "learning_rate": 2.9644147394022925e-06, |
| "loss": 1.1911, |
| "step": 1211 |
| }, |
| { |
| "epoch": 2.7677075399847677, |
| "grad_norm": 1.181572675704956, |
| "learning_rate": 2.96135439752789e-06, |
| "loss": 1.1384, |
| "step": 1212 |
| }, |
| { |
| "epoch": 2.76999238385377, |
| "grad_norm": 1.1687877178192139, |
| "learning_rate": 2.95829333976696e-06, |
| "loss": 1.118, |
| "step": 1213 |
| }, |
| { |
| "epoch": 2.772277227722772, |
| "grad_norm": 1.171147108078003, |
| "learning_rate": 2.955231570869365e-06, |
| "loss": 1.1387, |
| "step": 1214 |
| }, |
| { |
| "epoch": 2.7745620715917747, |
| "grad_norm": 1.184873104095459, |
| "learning_rate": 2.9521690955860715e-06, |
| "loss": 1.1535, |
| "step": 1215 |
| }, |
| { |
| "epoch": 2.776846915460777, |
| "grad_norm": 1.1838406324386597, |
| "learning_rate": 2.9491059186691416e-06, |
| "loss": 1.157, |
| "step": 1216 |
| }, |
| { |
| "epoch": 2.779131759329779, |
| "grad_norm": 1.1575403213500977, |
| "learning_rate": 2.9460420448717264e-06, |
| "loss": 1.1179, |
| "step": 1217 |
| }, |
| { |
| "epoch": 2.7814166031987813, |
| "grad_norm": 1.1447091102600098, |
| "learning_rate": 2.9429774789480576e-06, |
| "loss": 1.1745, |
| "step": 1218 |
| }, |
| { |
| "epoch": 2.7837014470677834, |
| "grad_norm": 1.1797469854354858, |
| "learning_rate": 2.9399122256534412e-06, |
| "loss": 1.1278, |
| "step": 1219 |
| }, |
| { |
| "epoch": 2.785986290936786, |
| "grad_norm": 1.1838300228118896, |
| "learning_rate": 2.936846289744252e-06, |
| "loss": 1.1605, |
| "step": 1220 |
| }, |
| { |
| "epoch": 2.7882711348057883, |
| "grad_norm": 1.1908303499221802, |
| "learning_rate": 2.9337796759779197e-06, |
| "loss": 1.1133, |
| "step": 1221 |
| }, |
| { |
| "epoch": 2.7905559786747904, |
| "grad_norm": 1.192145824432373, |
| "learning_rate": 2.930712389112929e-06, |
| "loss": 1.1247, |
| "step": 1222 |
| }, |
| { |
| "epoch": 2.792840822543793, |
| "grad_norm": 1.1842783689498901, |
| "learning_rate": 2.9276444339088095e-06, |
| "loss": 1.0794, |
| "step": 1223 |
| }, |
| { |
| "epoch": 2.7951256664127953, |
| "grad_norm": 1.152113914489746, |
| "learning_rate": 2.924575815126125e-06, |
| "loss": 1.1397, |
| "step": 1224 |
| }, |
| { |
| "epoch": 2.7974105102817974, |
| "grad_norm": 1.220105767250061, |
| "learning_rate": 2.921506537526471e-06, |
| "loss": 1.1426, |
| "step": 1225 |
| }, |
| { |
| "epoch": 2.7996953541507996, |
| "grad_norm": 1.2473626136779785, |
| "learning_rate": 2.9184366058724655e-06, |
| "loss": 1.1645, |
| "step": 1226 |
| }, |
| { |
| "epoch": 2.801980198019802, |
| "grad_norm": 1.2051730155944824, |
| "learning_rate": 2.915366024927741e-06, |
| "loss": 1.1498, |
| "step": 1227 |
| }, |
| { |
| "epoch": 2.8042650418888044, |
| "grad_norm": 1.2148674726486206, |
| "learning_rate": 2.912294799456936e-06, |
| "loss": 1.1334, |
| "step": 1228 |
| }, |
| { |
| "epoch": 2.8065498857578066, |
| "grad_norm": 1.176007866859436, |
| "learning_rate": 2.9092229342256915e-06, |
| "loss": 1.1177, |
| "step": 1229 |
| }, |
| { |
| "epoch": 2.808834729626809, |
| "grad_norm": 1.1746013164520264, |
| "learning_rate": 2.90615043400064e-06, |
| "loss": 1.1708, |
| "step": 1230 |
| }, |
| { |
| "epoch": 2.811119573495811, |
| "grad_norm": 1.1968902349472046, |
| "learning_rate": 2.9030773035493997e-06, |
| "loss": 1.1497, |
| "step": 1231 |
| }, |
| { |
| "epoch": 2.813404417364813, |
| "grad_norm": 1.1996639966964722, |
| "learning_rate": 2.9000035476405657e-06, |
| "loss": 1.1179, |
| "step": 1232 |
| }, |
| { |
| "epoch": 2.815689261233816, |
| "grad_norm": 1.168249487876892, |
| "learning_rate": 2.8969291710437054e-06, |
| "loss": 1.1528, |
| "step": 1233 |
| }, |
| { |
| "epoch": 2.817974105102818, |
| "grad_norm": 1.1561291217803955, |
| "learning_rate": 2.893854178529347e-06, |
| "loss": 1.1591, |
| "step": 1234 |
| }, |
| { |
| "epoch": 2.82025894897182, |
| "grad_norm": 1.1894656419754028, |
| "learning_rate": 2.890778574868977e-06, |
| "loss": 1.1606, |
| "step": 1235 |
| }, |
| { |
| "epoch": 2.822543792840823, |
| "grad_norm": 1.1667678356170654, |
| "learning_rate": 2.8877023648350284e-06, |
| "loss": 1.1799, |
| "step": 1236 |
| }, |
| { |
| "epoch": 2.824828636709825, |
| "grad_norm": 1.176679253578186, |
| "learning_rate": 2.884625553200876e-06, |
| "loss": 1.1994, |
| "step": 1237 |
| }, |
| { |
| "epoch": 2.827113480578827, |
| "grad_norm": 1.1774452924728394, |
| "learning_rate": 2.8815481447408273e-06, |
| "loss": 1.1716, |
| "step": 1238 |
| }, |
| { |
| "epoch": 2.8293983244478293, |
| "grad_norm": 1.2223371267318726, |
| "learning_rate": 2.878470144230118e-06, |
| "loss": 1.131, |
| "step": 1239 |
| }, |
| { |
| "epoch": 2.8316831683168315, |
| "grad_norm": 1.1785866022109985, |
| "learning_rate": 2.875391556444898e-06, |
| "loss": 1.1411, |
| "step": 1240 |
| }, |
| { |
| "epoch": 2.833968012185834, |
| "grad_norm": 1.1710691452026367, |
| "learning_rate": 2.8723123861622338e-06, |
| "loss": 1.1718, |
| "step": 1241 |
| }, |
| { |
| "epoch": 2.8362528560548363, |
| "grad_norm": 1.1571224927902222, |
| "learning_rate": 2.8692326381600926e-06, |
| "loss": 1.1529, |
| "step": 1242 |
| }, |
| { |
| "epoch": 2.8385376999238385, |
| "grad_norm": 1.1815292835235596, |
| "learning_rate": 2.8661523172173392e-06, |
| "loss": 1.1522, |
| "step": 1243 |
| }, |
| { |
| "epoch": 2.8408225437928407, |
| "grad_norm": 1.1292341947555542, |
| "learning_rate": 2.8630714281137263e-06, |
| "loss": 1.1394, |
| "step": 1244 |
| }, |
| { |
| "epoch": 2.843107387661843, |
| "grad_norm": 1.1632921695709229, |
| "learning_rate": 2.8599899756298887e-06, |
| "loss": 1.1778, |
| "step": 1245 |
| }, |
| { |
| "epoch": 2.8453922315308455, |
| "grad_norm": 1.2025562524795532, |
| "learning_rate": 2.856907964547337e-06, |
| "loss": 1.1442, |
| "step": 1246 |
| }, |
| { |
| "epoch": 2.8476770753998477, |
| "grad_norm": 1.2091012001037598, |
| "learning_rate": 2.8538253996484465e-06, |
| "loss": 1.1388, |
| "step": 1247 |
| }, |
| { |
| "epoch": 2.84996191926885, |
| "grad_norm": 1.1544710397720337, |
| "learning_rate": 2.8507422857164523e-06, |
| "loss": 1.1402, |
| "step": 1248 |
| }, |
| { |
| "epoch": 2.852246763137852, |
| "grad_norm": 1.1689878702163696, |
| "learning_rate": 2.847658627535442e-06, |
| "loss": 1.1477, |
| "step": 1249 |
| }, |
| { |
| "epoch": 2.8545316070068543, |
| "grad_norm": 1.173471212387085, |
| "learning_rate": 2.844574429890347e-06, |
| "loss": 1.1661, |
| "step": 1250 |
| }, |
| { |
| "epoch": 2.856816450875857, |
| "grad_norm": 1.1636780500411987, |
| "learning_rate": 2.8414896975669374e-06, |
| "loss": 1.1465, |
| "step": 1251 |
| }, |
| { |
| "epoch": 2.859101294744859, |
| "grad_norm": 1.1479815244674683, |
| "learning_rate": 2.8384044353518104e-06, |
| "loss": 1.1568, |
| "step": 1252 |
| }, |
| { |
| "epoch": 2.8613861386138613, |
| "grad_norm": 1.2065167427062988, |
| "learning_rate": 2.835318648032388e-06, |
| "loss": 1.1585, |
| "step": 1253 |
| }, |
| { |
| "epoch": 2.863670982482864, |
| "grad_norm": 1.1619449853897095, |
| "learning_rate": 2.832232340396904e-06, |
| "loss": 1.1238, |
| "step": 1254 |
| }, |
| { |
| "epoch": 2.865955826351866, |
| "grad_norm": 1.17509126663208, |
| "learning_rate": 2.8291455172344045e-06, |
| "loss": 1.1614, |
| "step": 1255 |
| }, |
| { |
| "epoch": 2.8682406702208683, |
| "grad_norm": 1.1738674640655518, |
| "learning_rate": 2.82605818333473e-06, |
| "loss": 1.1523, |
| "step": 1256 |
| }, |
| { |
| "epoch": 2.8705255140898704, |
| "grad_norm": 1.1643424034118652, |
| "learning_rate": 2.8229703434885165e-06, |
| "loss": 1.1565, |
| "step": 1257 |
| }, |
| { |
| "epoch": 2.8728103579588726, |
| "grad_norm": 1.1405576467514038, |
| "learning_rate": 2.819882002487185e-06, |
| "loss": 1.1638, |
| "step": 1258 |
| }, |
| { |
| "epoch": 2.8750952018278753, |
| "grad_norm": 1.1445550918579102, |
| "learning_rate": 2.816793165122933e-06, |
| "loss": 1.129, |
| "step": 1259 |
| }, |
| { |
| "epoch": 2.8773800456968774, |
| "grad_norm": 1.1620489358901978, |
| "learning_rate": 2.8137038361887297e-06, |
| "loss": 1.128, |
| "step": 1260 |
| }, |
| { |
| "epoch": 2.8796648895658796, |
| "grad_norm": 1.2187519073486328, |
| "learning_rate": 2.8106140204783054e-06, |
| "loss": 1.1312, |
| "step": 1261 |
| }, |
| { |
| "epoch": 2.881949733434882, |
| "grad_norm": 1.178515076637268, |
| "learning_rate": 2.8075237227861475e-06, |
| "loss": 1.1075, |
| "step": 1262 |
| }, |
| { |
| "epoch": 2.884234577303884, |
| "grad_norm": 1.1716185808181763, |
| "learning_rate": 2.80443294790749e-06, |
| "loss": 1.1553, |
| "step": 1263 |
| }, |
| { |
| "epoch": 2.8865194211728866, |
| "grad_norm": 1.2317765951156616, |
| "learning_rate": 2.8013417006383078e-06, |
| "loss": 1.1377, |
| "step": 1264 |
| }, |
| { |
| "epoch": 2.888804265041889, |
| "grad_norm": 1.1073942184448242, |
| "learning_rate": 2.798249985775309e-06, |
| "loss": 1.1507, |
| "step": 1265 |
| }, |
| { |
| "epoch": 2.891089108910891, |
| "grad_norm": 1.149707317352295, |
| "learning_rate": 2.795157808115927e-06, |
| "loss": 1.1161, |
| "step": 1266 |
| }, |
| { |
| "epoch": 2.8933739527798936, |
| "grad_norm": 1.2311087846755981, |
| "learning_rate": 2.7920651724583124e-06, |
| "loss": 1.1481, |
| "step": 1267 |
| }, |
| { |
| "epoch": 2.895658796648896, |
| "grad_norm": 1.1760973930358887, |
| "learning_rate": 2.788972083601329e-06, |
| "loss": 1.16, |
| "step": 1268 |
| }, |
| { |
| "epoch": 2.897943640517898, |
| "grad_norm": 1.149901270866394, |
| "learning_rate": 2.785878546344541e-06, |
| "loss": 1.1187, |
| "step": 1269 |
| }, |
| { |
| "epoch": 2.9002284843869, |
| "grad_norm": 1.2037584781646729, |
| "learning_rate": 2.7827845654882112e-06, |
| "loss": 1.1508, |
| "step": 1270 |
| }, |
| { |
| "epoch": 2.9025133282559024, |
| "grad_norm": 1.2114837169647217, |
| "learning_rate": 2.7796901458332877e-06, |
| "loss": 1.1243, |
| "step": 1271 |
| }, |
| { |
| "epoch": 2.904798172124905, |
| "grad_norm": 1.179283857345581, |
| "learning_rate": 2.776595292181401e-06, |
| "loss": 1.1351, |
| "step": 1272 |
| }, |
| { |
| "epoch": 2.907083015993907, |
| "grad_norm": 1.2145215272903442, |
| "learning_rate": 2.7735000093348556e-06, |
| "loss": 1.1295, |
| "step": 1273 |
| }, |
| { |
| "epoch": 2.9093678598629094, |
| "grad_norm": 1.1994370222091675, |
| "learning_rate": 2.7704043020966222e-06, |
| "loss": 1.1405, |
| "step": 1274 |
| }, |
| { |
| "epoch": 2.9116527037319115, |
| "grad_norm": 1.218436360359192, |
| "learning_rate": 2.7673081752703275e-06, |
| "loss": 1.163, |
| "step": 1275 |
| }, |
| { |
| "epoch": 2.9139375476009137, |
| "grad_norm": 1.1954008340835571, |
| "learning_rate": 2.764211633660252e-06, |
| "loss": 1.1209, |
| "step": 1276 |
| }, |
| { |
| "epoch": 2.9162223914699164, |
| "grad_norm": 1.1329401731491089, |
| "learning_rate": 2.7611146820713187e-06, |
| "loss": 1.1394, |
| "step": 1277 |
| }, |
| { |
| "epoch": 2.9185072353389185, |
| "grad_norm": 1.153887391090393, |
| "learning_rate": 2.7580173253090876e-06, |
| "loss": 1.1236, |
| "step": 1278 |
| }, |
| { |
| "epoch": 2.9207920792079207, |
| "grad_norm": 1.2343658208847046, |
| "learning_rate": 2.754919568179746e-06, |
| "loss": 1.1582, |
| "step": 1279 |
| }, |
| { |
| "epoch": 2.9230769230769234, |
| "grad_norm": 1.1790735721588135, |
| "learning_rate": 2.7518214154901025e-06, |
| "loss": 1.148, |
| "step": 1280 |
| }, |
| { |
| "epoch": 2.925361766945925, |
| "grad_norm": 1.1787333488464355, |
| "learning_rate": 2.7487228720475812e-06, |
| "loss": 1.0793, |
| "step": 1281 |
| }, |
| { |
| "epoch": 2.9276466108149277, |
| "grad_norm": 1.1795237064361572, |
| "learning_rate": 2.745623942660211e-06, |
| "loss": 1.1811, |
| "step": 1282 |
| }, |
| { |
| "epoch": 2.92993145468393, |
| "grad_norm": 1.1928547620773315, |
| "learning_rate": 2.7425246321366205e-06, |
| "loss": 1.1207, |
| "step": 1283 |
| }, |
| { |
| "epoch": 2.932216298552932, |
| "grad_norm": 1.2683132886886597, |
| "learning_rate": 2.7394249452860296e-06, |
| "loss": 1.1868, |
| "step": 1284 |
| }, |
| { |
| "epoch": 2.9345011424219347, |
| "grad_norm": 1.2315943241119385, |
| "learning_rate": 2.7363248869182407e-06, |
| "loss": 1.1732, |
| "step": 1285 |
| }, |
| { |
| "epoch": 2.936785986290937, |
| "grad_norm": 1.178771734237671, |
| "learning_rate": 2.7332244618436355e-06, |
| "loss": 1.1377, |
| "step": 1286 |
| }, |
| { |
| "epoch": 2.939070830159939, |
| "grad_norm": 1.1433113813400269, |
| "learning_rate": 2.7301236748731623e-06, |
| "loss": 1.1556, |
| "step": 1287 |
| }, |
| { |
| "epoch": 2.9413556740289413, |
| "grad_norm": 1.1987029314041138, |
| "learning_rate": 2.7270225308183318e-06, |
| "loss": 1.16, |
| "step": 1288 |
| }, |
| { |
| "epoch": 2.9436405178979435, |
| "grad_norm": 1.1818667650222778, |
| "learning_rate": 2.7239210344912085e-06, |
| "loss": 1.1443, |
| "step": 1289 |
| }, |
| { |
| "epoch": 2.945925361766946, |
| "grad_norm": 1.2523951530456543, |
| "learning_rate": 2.720819190704405e-06, |
| "loss": 1.1512, |
| "step": 1290 |
| }, |
| { |
| "epoch": 2.9482102056359483, |
| "grad_norm": 1.186334490776062, |
| "learning_rate": 2.7177170042710706e-06, |
| "loss": 1.1347, |
| "step": 1291 |
| }, |
| { |
| "epoch": 2.9504950495049505, |
| "grad_norm": 1.1355229616165161, |
| "learning_rate": 2.714614480004888e-06, |
| "loss": 1.1717, |
| "step": 1292 |
| }, |
| { |
| "epoch": 2.9527798933739526, |
| "grad_norm": 1.1879637241363525, |
| "learning_rate": 2.7115116227200634e-06, |
| "loss": 1.1737, |
| "step": 1293 |
| }, |
| { |
| "epoch": 2.955064737242955, |
| "grad_norm": 1.160522699356079, |
| "learning_rate": 2.7084084372313207e-06, |
| "loss": 1.1603, |
| "step": 1294 |
| }, |
| { |
| "epoch": 2.9573495811119574, |
| "grad_norm": 1.15951406955719, |
| "learning_rate": 2.705304928353892e-06, |
| "loss": 1.1809, |
| "step": 1295 |
| }, |
| { |
| "epoch": 2.9596344249809596, |
| "grad_norm": 1.132550835609436, |
| "learning_rate": 2.702201100903511e-06, |
| "loss": 1.1537, |
| "step": 1296 |
| }, |
| { |
| "epoch": 2.961919268849962, |
| "grad_norm": 1.1578466892242432, |
| "learning_rate": 2.6990969596964066e-06, |
| "loss": 1.1578, |
| "step": 1297 |
| }, |
| { |
| "epoch": 2.9642041127189644, |
| "grad_norm": 1.1534026861190796, |
| "learning_rate": 2.6959925095492957e-06, |
| "loss": 1.1392, |
| "step": 1298 |
| }, |
| { |
| "epoch": 2.9664889565879666, |
| "grad_norm": 1.1865522861480713, |
| "learning_rate": 2.6928877552793716e-06, |
| "loss": 1.1292, |
| "step": 1299 |
| }, |
| { |
| "epoch": 2.968773800456969, |
| "grad_norm": 1.1986700296401978, |
| "learning_rate": 2.689782701704301e-06, |
| "loss": 1.1591, |
| "step": 1300 |
| }, |
| { |
| "epoch": 2.971058644325971, |
| "grad_norm": 1.1916639804840088, |
| "learning_rate": 2.6866773536422157e-06, |
| "loss": 1.1757, |
| "step": 1301 |
| }, |
| { |
| "epoch": 2.973343488194973, |
| "grad_norm": 1.1540440320968628, |
| "learning_rate": 2.6835717159117044e-06, |
| "loss": 1.132, |
| "step": 1302 |
| }, |
| { |
| "epoch": 2.975628332063976, |
| "grad_norm": 1.175986647605896, |
| "learning_rate": 2.6804657933318035e-06, |
| "loss": 1.158, |
| "step": 1303 |
| }, |
| { |
| "epoch": 2.977913175932978, |
| "grad_norm": 1.1727259159088135, |
| "learning_rate": 2.6773595907219937e-06, |
| "loss": 1.1454, |
| "step": 1304 |
| }, |
| { |
| "epoch": 2.98019801980198, |
| "grad_norm": 1.1897666454315186, |
| "learning_rate": 2.674253112902189e-06, |
| "loss": 1.1385, |
| "step": 1305 |
| }, |
| { |
| "epoch": 2.9824828636709824, |
| "grad_norm": 1.2285549640655518, |
| "learning_rate": 2.6711463646927296e-06, |
| "loss": 1.1057, |
| "step": 1306 |
| }, |
| { |
| "epoch": 2.9847677075399845, |
| "grad_norm": 1.1508749723434448, |
| "learning_rate": 2.668039350914377e-06, |
| "loss": 1.1537, |
| "step": 1307 |
| }, |
| { |
| "epoch": 2.987052551408987, |
| "grad_norm": 1.1747997999191284, |
| "learning_rate": 2.6649320763883045e-06, |
| "loss": 1.1447, |
| "step": 1308 |
| }, |
| { |
| "epoch": 2.9893373952779894, |
| "grad_norm": 1.1757659912109375, |
| "learning_rate": 2.6618245459360896e-06, |
| "loss": 1.1312, |
| "step": 1309 |
| }, |
| { |
| "epoch": 2.9916222391469915, |
| "grad_norm": 1.1855767965316772, |
| "learning_rate": 2.658716764379706e-06, |
| "loss": 1.1559, |
| "step": 1310 |
| }, |
| { |
| "epoch": 2.993907083015994, |
| "grad_norm": 1.1734507083892822, |
| "learning_rate": 2.6556087365415183e-06, |
| "loss": 1.1291, |
| "step": 1311 |
| }, |
| { |
| "epoch": 2.996191926884996, |
| "grad_norm": 1.1714792251586914, |
| "learning_rate": 2.6525004672442734e-06, |
| "loss": 1.1225, |
| "step": 1312 |
| }, |
| { |
| "epoch": 2.9984767707539985, |
| "grad_norm": 1.1722139120101929, |
| "learning_rate": 2.6493919613110923e-06, |
| "loss": 1.103, |
| "step": 1313 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 1.6373590230941772, |
| "learning_rate": 2.6462832235654625e-06, |
| "loss": 1.1512, |
| "step": 1314 |
| }, |
| { |
| "epoch": 3.002284843869002, |
| "grad_norm": 1.1781227588653564, |
| "learning_rate": 2.643174258831232e-06, |
| "loss": 1.1117, |
| "step": 1315 |
| }, |
| { |
| "epoch": 3.0045696877380044, |
| "grad_norm": 1.1707483530044556, |
| "learning_rate": 2.6400650719326014e-06, |
| "loss": 1.0957, |
| "step": 1316 |
| }, |
| { |
| "epoch": 3.006854531607007, |
| "grad_norm": 1.192963719367981, |
| "learning_rate": 2.6369556676941165e-06, |
| "loss": 1.1014, |
| "step": 1317 |
| }, |
| { |
| "epoch": 3.009139375476009, |
| "grad_norm": 1.1995853185653687, |
| "learning_rate": 2.6338460509406577e-06, |
| "loss": 1.1066, |
| "step": 1318 |
| }, |
| { |
| "epoch": 3.0114242193450114, |
| "grad_norm": 1.1763046979904175, |
| "learning_rate": 2.630736226497438e-06, |
| "loss": 1.1006, |
| "step": 1319 |
| }, |
| { |
| "epoch": 3.0137090632140136, |
| "grad_norm": 1.2747207880020142, |
| "learning_rate": 2.6276261991899915e-06, |
| "loss": 1.1031, |
| "step": 1320 |
| }, |
| { |
| "epoch": 3.015993907083016, |
| "grad_norm": 1.2675756216049194, |
| "learning_rate": 2.6245159738441673e-06, |
| "loss": 1.1416, |
| "step": 1321 |
| }, |
| { |
| "epoch": 3.0182787509520184, |
| "grad_norm": 1.1917039155960083, |
| "learning_rate": 2.6214055552861213e-06, |
| "loss": 1.1025, |
| "step": 1322 |
| }, |
| { |
| "epoch": 3.0205635948210205, |
| "grad_norm": 1.153493881225586, |
| "learning_rate": 2.618294948342309e-06, |
| "loss": 1.0846, |
| "step": 1323 |
| }, |
| { |
| "epoch": 3.0228484386900227, |
| "grad_norm": 1.2151880264282227, |
| "learning_rate": 2.61518415783948e-06, |
| "loss": 1.0835, |
| "step": 1324 |
| }, |
| { |
| "epoch": 3.025133282559025, |
| "grad_norm": 1.2547942399978638, |
| "learning_rate": 2.6120731886046673e-06, |
| "loss": 1.1196, |
| "step": 1325 |
| }, |
| { |
| "epoch": 3.0274181264280275, |
| "grad_norm": 1.2668670415878296, |
| "learning_rate": 2.608962045465181e-06, |
| "loss": 1.0744, |
| "step": 1326 |
| }, |
| { |
| "epoch": 3.0297029702970297, |
| "grad_norm": 1.2131301164627075, |
| "learning_rate": 2.6058507332486014e-06, |
| "loss": 1.1415, |
| "step": 1327 |
| }, |
| { |
| "epoch": 3.031987814166032, |
| "grad_norm": 1.230054259300232, |
| "learning_rate": 2.602739256782772e-06, |
| "loss": 1.0854, |
| "step": 1328 |
| }, |
| { |
| "epoch": 3.034272658035034, |
| "grad_norm": 1.1794737577438354, |
| "learning_rate": 2.599627620895791e-06, |
| "loss": 1.1035, |
| "step": 1329 |
| }, |
| { |
| "epoch": 3.0365575019040367, |
| "grad_norm": 1.1808216571807861, |
| "learning_rate": 2.5965158304160017e-06, |
| "loss": 1.1099, |
| "step": 1330 |
| }, |
| { |
| "epoch": 3.038842345773039, |
| "grad_norm": 1.1999027729034424, |
| "learning_rate": 2.593403890171989e-06, |
| "loss": 1.0721, |
| "step": 1331 |
| }, |
| { |
| "epoch": 3.041127189642041, |
| "grad_norm": 1.2132296562194824, |
| "learning_rate": 2.5902918049925718e-06, |
| "loss": 1.0796, |
| "step": 1332 |
| }, |
| { |
| "epoch": 3.0434120335110433, |
| "grad_norm": 1.2564518451690674, |
| "learning_rate": 2.5871795797067924e-06, |
| "loss": 1.1248, |
| "step": 1333 |
| }, |
| { |
| "epoch": 3.045696877380046, |
| "grad_norm": 1.2996151447296143, |
| "learning_rate": 2.584067219143908e-06, |
| "loss": 1.1724, |
| "step": 1334 |
| }, |
| { |
| "epoch": 3.047981721249048, |
| "grad_norm": 1.2047386169433594, |
| "learning_rate": 2.5809547281333904e-06, |
| "loss": 1.1231, |
| "step": 1335 |
| }, |
| { |
| "epoch": 3.0502665651180503, |
| "grad_norm": 1.1555238962173462, |
| "learning_rate": 2.57784211150491e-06, |
| "loss": 1.1404, |
| "step": 1336 |
| }, |
| { |
| "epoch": 3.0525514089870525, |
| "grad_norm": 1.1934109926223755, |
| "learning_rate": 2.5747293740883355e-06, |
| "loss": 1.1408, |
| "step": 1337 |
| }, |
| { |
| "epoch": 3.0548362528560546, |
| "grad_norm": 1.2881134748458862, |
| "learning_rate": 2.5716165207137196e-06, |
| "loss": 1.0963, |
| "step": 1338 |
| }, |
| { |
| "epoch": 3.0571210967250573, |
| "grad_norm": 1.2293163537979126, |
| "learning_rate": 2.5685035562112964e-06, |
| "loss": 1.0969, |
| "step": 1339 |
| }, |
| { |
| "epoch": 3.0594059405940595, |
| "grad_norm": 1.1550770998001099, |
| "learning_rate": 2.5653904854114735e-06, |
| "loss": 1.0955, |
| "step": 1340 |
| }, |
| { |
| "epoch": 3.0616907844630616, |
| "grad_norm": 1.1883620023727417, |
| "learning_rate": 2.562277313144823e-06, |
| "loss": 1.1117, |
| "step": 1341 |
| }, |
| { |
| "epoch": 3.063975628332064, |
| "grad_norm": 1.179228663444519, |
| "learning_rate": 2.559164044242072e-06, |
| "loss": 1.1012, |
| "step": 1342 |
| }, |
| { |
| "epoch": 3.0662604722010665, |
| "grad_norm": 1.3026498556137085, |
| "learning_rate": 2.5560506835341003e-06, |
| "loss": 1.1422, |
| "step": 1343 |
| }, |
| { |
| "epoch": 3.0685453160700686, |
| "grad_norm": 1.2500083446502686, |
| "learning_rate": 2.5529372358519307e-06, |
| "loss": 1.1184, |
| "step": 1344 |
| }, |
| { |
| "epoch": 3.070830159939071, |
| "grad_norm": 1.2166169881820679, |
| "learning_rate": 2.549823706026717e-06, |
| "loss": 1.0977, |
| "step": 1345 |
| }, |
| { |
| "epoch": 3.073115003808073, |
| "grad_norm": 1.183247685432434, |
| "learning_rate": 2.546710098889746e-06, |
| "loss": 1.1148, |
| "step": 1346 |
| }, |
| { |
| "epoch": 3.075399847677075, |
| "grad_norm": 1.185694694519043, |
| "learning_rate": 2.543596419272419e-06, |
| "loss": 1.1361, |
| "step": 1347 |
| }, |
| { |
| "epoch": 3.077684691546078, |
| "grad_norm": 1.1907778978347778, |
| "learning_rate": 2.5404826720062544e-06, |
| "loss": 1.1144, |
| "step": 1348 |
| }, |
| { |
| "epoch": 3.07996953541508, |
| "grad_norm": 1.2550450563430786, |
| "learning_rate": 2.5373688619228713e-06, |
| "loss": 1.1247, |
| "step": 1349 |
| }, |
| { |
| "epoch": 3.082254379284082, |
| "grad_norm": 1.2171252965927124, |
| "learning_rate": 2.53425499385399e-06, |
| "loss": 1.116, |
| "step": 1350 |
| }, |
| { |
| "epoch": 3.0845392231530844, |
| "grad_norm": 1.1736383438110352, |
| "learning_rate": 2.5311410726314183e-06, |
| "loss": 1.1364, |
| "step": 1351 |
| }, |
| { |
| "epoch": 3.086824067022087, |
| "grad_norm": 1.1960982084274292, |
| "learning_rate": 2.528027103087049e-06, |
| "loss": 1.0907, |
| "step": 1352 |
| }, |
| { |
| "epoch": 3.089108910891089, |
| "grad_norm": 1.1903541088104248, |
| "learning_rate": 2.5249130900528464e-06, |
| "loss": 1.08, |
| "step": 1353 |
| }, |
| { |
| "epoch": 3.0913937547600914, |
| "grad_norm": 1.2436158657073975, |
| "learning_rate": 2.5217990383608448e-06, |
| "loss": 1.0973, |
| "step": 1354 |
| }, |
| { |
| "epoch": 3.0936785986290936, |
| "grad_norm": 1.2662099599838257, |
| "learning_rate": 2.518684952843138e-06, |
| "loss": 1.1526, |
| "step": 1355 |
| }, |
| { |
| "epoch": 3.095963442498096, |
| "grad_norm": 1.1999155282974243, |
| "learning_rate": 2.5155708383318726e-06, |
| "loss": 1.1421, |
| "step": 1356 |
| }, |
| { |
| "epoch": 3.0982482863670984, |
| "grad_norm": 1.2298964262008667, |
| "learning_rate": 2.5124566996592388e-06, |
| "loss": 1.1227, |
| "step": 1357 |
| }, |
| { |
| "epoch": 3.1005331302361006, |
| "grad_norm": 1.2199616432189941, |
| "learning_rate": 2.509342541657465e-06, |
| "loss": 1.111, |
| "step": 1358 |
| }, |
| { |
| "epoch": 3.1028179741051027, |
| "grad_norm": 1.1966962814331055, |
| "learning_rate": 2.506228369158811e-06, |
| "loss": 1.0991, |
| "step": 1359 |
| }, |
| { |
| "epoch": 3.105102817974105, |
| "grad_norm": 1.2209211587905884, |
| "learning_rate": 2.503114186995558e-06, |
| "loss": 1.0993, |
| "step": 1360 |
| }, |
| { |
| "epoch": 3.1073876618431076, |
| "grad_norm": 1.1826435327529907, |
| "learning_rate": 2.5e-06, |
| "loss": 1.1074, |
| "step": 1361 |
| }, |
| { |
| "epoch": 3.1096725057121097, |
| "grad_norm": 1.1526274681091309, |
| "learning_rate": 2.496885813004443e-06, |
| "loss": 1.106, |
| "step": 1362 |
| }, |
| { |
| "epoch": 3.111957349581112, |
| "grad_norm": 1.248950481414795, |
| "learning_rate": 2.4937716308411896e-06, |
| "loss": 1.1285, |
| "step": 1363 |
| }, |
| { |
| "epoch": 3.114242193450114, |
| "grad_norm": 1.2252432107925415, |
| "learning_rate": 2.4906574583425357e-06, |
| "loss": 1.1145, |
| "step": 1364 |
| }, |
| { |
| "epoch": 3.1165270373191167, |
| "grad_norm": 1.2389261722564697, |
| "learning_rate": 2.487543300340762e-06, |
| "loss": 1.1149, |
| "step": 1365 |
| }, |
| { |
| "epoch": 3.118811881188119, |
| "grad_norm": 1.1963584423065186, |
| "learning_rate": 2.4844291616681283e-06, |
| "loss": 1.1128, |
| "step": 1366 |
| }, |
| { |
| "epoch": 3.121096725057121, |
| "grad_norm": 1.1800618171691895, |
| "learning_rate": 2.4813150471568624e-06, |
| "loss": 1.1178, |
| "step": 1367 |
| }, |
| { |
| "epoch": 3.1233815689261233, |
| "grad_norm": 1.2248040437698364, |
| "learning_rate": 2.4782009616391556e-06, |
| "loss": 1.1204, |
| "step": 1368 |
| }, |
| { |
| "epoch": 3.1256664127951255, |
| "grad_norm": 1.2025740146636963, |
| "learning_rate": 2.475086909947154e-06, |
| "loss": 1.1018, |
| "step": 1369 |
| }, |
| { |
| "epoch": 3.127951256664128, |
| "grad_norm": 1.1733136177062988, |
| "learning_rate": 2.4719728969129517e-06, |
| "loss": 1.0994, |
| "step": 1370 |
| }, |
| { |
| "epoch": 3.1302361005331303, |
| "grad_norm": 1.2059153318405151, |
| "learning_rate": 2.468858927368582e-06, |
| "loss": 1.1036, |
| "step": 1371 |
| }, |
| { |
| "epoch": 3.1325209444021325, |
| "grad_norm": 1.1862396001815796, |
| "learning_rate": 2.465745006146011e-06, |
| "loss": 1.1116, |
| "step": 1372 |
| }, |
| { |
| "epoch": 3.1348057882711347, |
| "grad_norm": 1.163080096244812, |
| "learning_rate": 2.462631138077129e-06, |
| "loss": 1.1233, |
| "step": 1373 |
| }, |
| { |
| "epoch": 3.1370906321401373, |
| "grad_norm": 1.2319633960723877, |
| "learning_rate": 2.4595173279937464e-06, |
| "loss": 1.1132, |
| "step": 1374 |
| }, |
| { |
| "epoch": 3.1393754760091395, |
| "grad_norm": 1.2363816499710083, |
| "learning_rate": 2.456403580727582e-06, |
| "loss": 1.1246, |
| "step": 1375 |
| }, |
| { |
| "epoch": 3.1416603198781416, |
| "grad_norm": 1.272760272026062, |
| "learning_rate": 2.4532899011102553e-06, |
| "loss": 1.1538, |
| "step": 1376 |
| }, |
| { |
| "epoch": 3.143945163747144, |
| "grad_norm": 1.2700350284576416, |
| "learning_rate": 2.4501762939732834e-06, |
| "loss": 1.1289, |
| "step": 1377 |
| }, |
| { |
| "epoch": 3.146230007616146, |
| "grad_norm": 1.213341474533081, |
| "learning_rate": 2.4470627641480705e-06, |
| "loss": 1.0985, |
| "step": 1378 |
| }, |
| { |
| "epoch": 3.1485148514851486, |
| "grad_norm": 1.1804118156433105, |
| "learning_rate": 2.4439493164659005e-06, |
| "loss": 1.1479, |
| "step": 1379 |
| }, |
| { |
| "epoch": 3.150799695354151, |
| "grad_norm": 1.2232515811920166, |
| "learning_rate": 2.4408359557579294e-06, |
| "loss": 1.1518, |
| "step": 1380 |
| }, |
| { |
| "epoch": 3.153084539223153, |
| "grad_norm": 1.1940504312515259, |
| "learning_rate": 2.4377226868551783e-06, |
| "loss": 1.1409, |
| "step": 1381 |
| }, |
| { |
| "epoch": 3.155369383092155, |
| "grad_norm": 1.1823115348815918, |
| "learning_rate": 2.434609514588527e-06, |
| "loss": 1.0937, |
| "step": 1382 |
| }, |
| { |
| "epoch": 3.157654226961158, |
| "grad_norm": 1.2245404720306396, |
| "learning_rate": 2.4314964437887044e-06, |
| "loss": 1.1219, |
| "step": 1383 |
| }, |
| { |
| "epoch": 3.15993907083016, |
| "grad_norm": 1.1697916984558105, |
| "learning_rate": 2.4283834792862817e-06, |
| "loss": 1.1052, |
| "step": 1384 |
| }, |
| { |
| "epoch": 3.162223914699162, |
| "grad_norm": 1.2614245414733887, |
| "learning_rate": 2.4252706259116657e-06, |
| "loss": 1.1095, |
| "step": 1385 |
| }, |
| { |
| "epoch": 3.1645087585681644, |
| "grad_norm": 1.2271556854248047, |
| "learning_rate": 2.4221578884950903e-06, |
| "loss": 1.1381, |
| "step": 1386 |
| }, |
| { |
| "epoch": 3.166793602437167, |
| "grad_norm": 1.2251800298690796, |
| "learning_rate": 2.419045271866611e-06, |
| "loss": 1.1286, |
| "step": 1387 |
| }, |
| { |
| "epoch": 3.169078446306169, |
| "grad_norm": 1.2880722284317017, |
| "learning_rate": 2.415932780856093e-06, |
| "loss": 1.1236, |
| "step": 1388 |
| }, |
| { |
| "epoch": 3.1713632901751714, |
| "grad_norm": 1.240476131439209, |
| "learning_rate": 2.4128204202932093e-06, |
| "loss": 1.1369, |
| "step": 1389 |
| }, |
| { |
| "epoch": 3.1736481340441736, |
| "grad_norm": 1.246504545211792, |
| "learning_rate": 2.409708195007429e-06, |
| "loss": 1.1193, |
| "step": 1390 |
| }, |
| { |
| "epoch": 3.1759329779131757, |
| "grad_norm": 1.251848578453064, |
| "learning_rate": 2.406596109828012e-06, |
| "loss": 1.1451, |
| "step": 1391 |
| }, |
| { |
| "epoch": 3.1782178217821784, |
| "grad_norm": 1.2193578481674194, |
| "learning_rate": 2.403484169584e-06, |
| "loss": 1.1143, |
| "step": 1392 |
| }, |
| { |
| "epoch": 3.1805026656511806, |
| "grad_norm": 1.217989206314087, |
| "learning_rate": 2.4003723791042107e-06, |
| "loss": 1.0959, |
| "step": 1393 |
| }, |
| { |
| "epoch": 3.1827875095201827, |
| "grad_norm": 1.2069993019104004, |
| "learning_rate": 2.3972607432172283e-06, |
| "loss": 1.1417, |
| "step": 1394 |
| }, |
| { |
| "epoch": 3.185072353389185, |
| "grad_norm": 1.21833074092865, |
| "learning_rate": 2.3941492667514e-06, |
| "loss": 1.1575, |
| "step": 1395 |
| }, |
| { |
| "epoch": 3.1873571972581876, |
| "grad_norm": 1.2144832611083984, |
| "learning_rate": 2.3910379545348203e-06, |
| "loss": 1.1602, |
| "step": 1396 |
| }, |
| { |
| "epoch": 3.1896420411271897, |
| "grad_norm": 1.229985237121582, |
| "learning_rate": 2.387926811395334e-06, |
| "loss": 1.1053, |
| "step": 1397 |
| }, |
| { |
| "epoch": 3.191926884996192, |
| "grad_norm": 1.237524151802063, |
| "learning_rate": 2.384815842160521e-06, |
| "loss": 1.1359, |
| "step": 1398 |
| }, |
| { |
| "epoch": 3.194211728865194, |
| "grad_norm": 1.2233461141586304, |
| "learning_rate": 2.381705051657692e-06, |
| "loss": 1.1003, |
| "step": 1399 |
| }, |
| { |
| "epoch": 3.1964965727341963, |
| "grad_norm": 1.2777819633483887, |
| "learning_rate": 2.3785944447138804e-06, |
| "loss": 1.1138, |
| "step": 1400 |
| }, |
| { |
| "epoch": 3.198781416603199, |
| "grad_norm": 1.2171168327331543, |
| "learning_rate": 2.3754840261558336e-06, |
| "loss": 1.1149, |
| "step": 1401 |
| }, |
| { |
| "epoch": 3.201066260472201, |
| "grad_norm": 1.2730437517166138, |
| "learning_rate": 2.372373800810009e-06, |
| "loss": 1.1022, |
| "step": 1402 |
| }, |
| { |
| "epoch": 3.2033511043412033, |
| "grad_norm": 1.2138127088546753, |
| "learning_rate": 2.369263773502562e-06, |
| "loss": 1.1267, |
| "step": 1403 |
| }, |
| { |
| "epoch": 3.2056359482102055, |
| "grad_norm": 1.2073695659637451, |
| "learning_rate": 2.3661539490593423e-06, |
| "loss": 1.1151, |
| "step": 1404 |
| }, |
| { |
| "epoch": 3.207920792079208, |
| "grad_norm": 1.2361245155334473, |
| "learning_rate": 2.3630443323058843e-06, |
| "loss": 1.1224, |
| "step": 1405 |
| }, |
| { |
| "epoch": 3.2102056359482103, |
| "grad_norm": 1.2753217220306396, |
| "learning_rate": 2.3599349280673985e-06, |
| "loss": 1.1328, |
| "step": 1406 |
| }, |
| { |
| "epoch": 3.2124904798172125, |
| "grad_norm": 1.182378888130188, |
| "learning_rate": 2.356825741168768e-06, |
| "loss": 1.1256, |
| "step": 1407 |
| }, |
| { |
| "epoch": 3.2147753236862147, |
| "grad_norm": 1.20160710811615, |
| "learning_rate": 2.353716776434538e-06, |
| "loss": 1.1306, |
| "step": 1408 |
| }, |
| { |
| "epoch": 3.217060167555217, |
| "grad_norm": 1.1793500185012817, |
| "learning_rate": 2.3506080386889086e-06, |
| "loss": 1.0768, |
| "step": 1409 |
| }, |
| { |
| "epoch": 3.2193450114242195, |
| "grad_norm": 1.2438910007476807, |
| "learning_rate": 2.3474995327557266e-06, |
| "loss": 1.1582, |
| "step": 1410 |
| }, |
| { |
| "epoch": 3.2216298552932217, |
| "grad_norm": 1.2653754949569702, |
| "learning_rate": 2.3443912634584817e-06, |
| "loss": 1.0914, |
| "step": 1411 |
| }, |
| { |
| "epoch": 3.223914699162224, |
| "grad_norm": 1.237338662147522, |
| "learning_rate": 2.3412832356202943e-06, |
| "loss": 1.0945, |
| "step": 1412 |
| }, |
| { |
| "epoch": 3.226199543031226, |
| "grad_norm": 1.262833595275879, |
| "learning_rate": 2.3381754540639108e-06, |
| "loss": 1.1204, |
| "step": 1413 |
| }, |
| { |
| "epoch": 3.2284843869002287, |
| "grad_norm": 1.2164596319198608, |
| "learning_rate": 2.3350679236116955e-06, |
| "loss": 1.1442, |
| "step": 1414 |
| }, |
| { |
| "epoch": 3.230769230769231, |
| "grad_norm": 1.1981042623519897, |
| "learning_rate": 2.331960649085623e-06, |
| "loss": 1.0826, |
| "step": 1415 |
| }, |
| { |
| "epoch": 3.233054074638233, |
| "grad_norm": 1.289568305015564, |
| "learning_rate": 2.3288536353072704e-06, |
| "loss": 1.1027, |
| "step": 1416 |
| }, |
| { |
| "epoch": 3.235338918507235, |
| "grad_norm": 1.3058327436447144, |
| "learning_rate": 2.325746887097812e-06, |
| "loss": 1.1548, |
| "step": 1417 |
| }, |
| { |
| "epoch": 3.237623762376238, |
| "grad_norm": 1.2601237297058105, |
| "learning_rate": 2.3226404092780067e-06, |
| "loss": 1.1143, |
| "step": 1418 |
| }, |
| { |
| "epoch": 3.23990860624524, |
| "grad_norm": 1.2124943733215332, |
| "learning_rate": 2.3195342066681965e-06, |
| "loss": 1.1185, |
| "step": 1419 |
| }, |
| { |
| "epoch": 3.242193450114242, |
| "grad_norm": 1.2472761869430542, |
| "learning_rate": 2.316428284088296e-06, |
| "loss": 1.1504, |
| "step": 1420 |
| }, |
| { |
| "epoch": 3.2444782939832444, |
| "grad_norm": 1.2425949573516846, |
| "learning_rate": 2.3133226463577847e-06, |
| "loss": 1.1339, |
| "step": 1421 |
| }, |
| { |
| "epoch": 3.2467631378522466, |
| "grad_norm": 1.3503245115280151, |
| "learning_rate": 2.3102172982957e-06, |
| "loss": 1.1633, |
| "step": 1422 |
| }, |
| { |
| "epoch": 3.249047981721249, |
| "grad_norm": 1.2171080112457275, |
| "learning_rate": 2.3071122447206292e-06, |
| "loss": 1.0951, |
| "step": 1423 |
| }, |
| { |
| "epoch": 3.2513328255902514, |
| "grad_norm": 1.2225638628005981, |
| "learning_rate": 2.3040074904507047e-06, |
| "loss": 1.0744, |
| "step": 1424 |
| }, |
| { |
| "epoch": 3.2536176694592536, |
| "grad_norm": 1.209750771522522, |
| "learning_rate": 2.300903040303594e-06, |
| "loss": 1.1328, |
| "step": 1425 |
| }, |
| { |
| "epoch": 3.2559025133282558, |
| "grad_norm": 1.207655668258667, |
| "learning_rate": 2.29779889909649e-06, |
| "loss": 1.1473, |
| "step": 1426 |
| }, |
| { |
| "epoch": 3.258187357197258, |
| "grad_norm": 1.1904206275939941, |
| "learning_rate": 2.2946950716461094e-06, |
| "loss": 1.1165, |
| "step": 1427 |
| }, |
| { |
| "epoch": 3.2604722010662606, |
| "grad_norm": 1.2216424942016602, |
| "learning_rate": 2.2915915627686797e-06, |
| "loss": 1.1054, |
| "step": 1428 |
| }, |
| { |
| "epoch": 3.2627570449352628, |
| "grad_norm": 1.2471741437911987, |
| "learning_rate": 2.2884883772799375e-06, |
| "loss": 1.1189, |
| "step": 1429 |
| }, |
| { |
| "epoch": 3.265041888804265, |
| "grad_norm": 1.2008448839187622, |
| "learning_rate": 2.285385519995113e-06, |
| "loss": 1.1131, |
| "step": 1430 |
| }, |
| { |
| "epoch": 3.2673267326732676, |
| "grad_norm": 1.2054619789123535, |
| "learning_rate": 2.2822829957289303e-06, |
| "loss": 1.0846, |
| "step": 1431 |
| }, |
| { |
| "epoch": 3.2696115765422697, |
| "grad_norm": 1.2204397916793823, |
| "learning_rate": 2.2791808092955957e-06, |
| "loss": 1.1471, |
| "step": 1432 |
| }, |
| { |
| "epoch": 3.271896420411272, |
| "grad_norm": 1.2435766458511353, |
| "learning_rate": 2.2760789655087923e-06, |
| "loss": 1.1167, |
| "step": 1433 |
| }, |
| { |
| "epoch": 3.274181264280274, |
| "grad_norm": 1.2288284301757812, |
| "learning_rate": 2.272977469181669e-06, |
| "loss": 1.1297, |
| "step": 1434 |
| }, |
| { |
| "epoch": 3.2764661081492763, |
| "grad_norm": 1.2264825105667114, |
| "learning_rate": 2.2698763251268386e-06, |
| "loss": 1.1011, |
| "step": 1435 |
| }, |
| { |
| "epoch": 3.278750952018279, |
| "grad_norm": 1.2367379665374756, |
| "learning_rate": 2.266775538156365e-06, |
| "loss": 1.1069, |
| "step": 1436 |
| }, |
| { |
| "epoch": 3.281035795887281, |
| "grad_norm": 1.201027274131775, |
| "learning_rate": 2.26367511308176e-06, |
| "loss": 1.1203, |
| "step": 1437 |
| }, |
| { |
| "epoch": 3.2833206397562833, |
| "grad_norm": 1.2541130781173706, |
| "learning_rate": 2.2605750547139716e-06, |
| "loss": 1.1234, |
| "step": 1438 |
| }, |
| { |
| "epoch": 3.2856054836252855, |
| "grad_norm": 1.2395600080490112, |
| "learning_rate": 2.25747536786338e-06, |
| "loss": 1.1031, |
| "step": 1439 |
| }, |
| { |
| "epoch": 3.2878903274942877, |
| "grad_norm": 1.2372959852218628, |
| "learning_rate": 2.254376057339789e-06, |
| "loss": 1.155, |
| "step": 1440 |
| }, |
| { |
| "epoch": 3.2901751713632903, |
| "grad_norm": 1.2344226837158203, |
| "learning_rate": 2.25127712795242e-06, |
| "loss": 1.1239, |
| "step": 1441 |
| }, |
| { |
| "epoch": 3.2924600152322925, |
| "grad_norm": 1.1589802503585815, |
| "learning_rate": 2.2481785845098988e-06, |
| "loss": 1.088, |
| "step": 1442 |
| }, |
| { |
| "epoch": 3.2947448591012947, |
| "grad_norm": 1.2531473636627197, |
| "learning_rate": 2.245080431820255e-06, |
| "loss": 1.1412, |
| "step": 1443 |
| }, |
| { |
| "epoch": 3.297029702970297, |
| "grad_norm": 1.2219151258468628, |
| "learning_rate": 2.241982674690913e-06, |
| "loss": 1.1138, |
| "step": 1444 |
| }, |
| { |
| "epoch": 3.2993145468392995, |
| "grad_norm": 1.2261440753936768, |
| "learning_rate": 2.2388853179286817e-06, |
| "loss": 1.102, |
| "step": 1445 |
| }, |
| { |
| "epoch": 3.3015993907083017, |
| "grad_norm": 1.2558192014694214, |
| "learning_rate": 2.2357883663397485e-06, |
| "loss": 1.159, |
| "step": 1446 |
| }, |
| { |
| "epoch": 3.303884234577304, |
| "grad_norm": 1.2133665084838867, |
| "learning_rate": 2.232691824729673e-06, |
| "loss": 1.1163, |
| "step": 1447 |
| }, |
| { |
| "epoch": 3.306169078446306, |
| "grad_norm": 1.212631106376648, |
| "learning_rate": 2.2295956979033786e-06, |
| "loss": 1.102, |
| "step": 1448 |
| }, |
| { |
| "epoch": 3.3084539223153087, |
| "grad_norm": 1.2700281143188477, |
| "learning_rate": 2.226499990665145e-06, |
| "loss": 1.0938, |
| "step": 1449 |
| }, |
| { |
| "epoch": 3.310738766184311, |
| "grad_norm": 1.2251685857772827, |
| "learning_rate": 2.2234047078185997e-06, |
| "loss": 1.1142, |
| "step": 1450 |
| }, |
| { |
| "epoch": 3.313023610053313, |
| "grad_norm": 1.223984956741333, |
| "learning_rate": 2.2203098541667136e-06, |
| "loss": 1.1267, |
| "step": 1451 |
| }, |
| { |
| "epoch": 3.315308453922315, |
| "grad_norm": 1.2254488468170166, |
| "learning_rate": 2.2172154345117896e-06, |
| "loss": 1.1202, |
| "step": 1452 |
| }, |
| { |
| "epoch": 3.3175932977913174, |
| "grad_norm": 1.2008295059204102, |
| "learning_rate": 2.2141214536554594e-06, |
| "loss": 1.1317, |
| "step": 1453 |
| }, |
| { |
| "epoch": 3.31987814166032, |
| "grad_norm": 1.2000782489776611, |
| "learning_rate": 2.211027916398672e-06, |
| "loss": 1.1056, |
| "step": 1454 |
| }, |
| { |
| "epoch": 3.322162985529322, |
| "grad_norm": 1.2368084192276, |
| "learning_rate": 2.207934827541688e-06, |
| "loss": 1.1003, |
| "step": 1455 |
| }, |
| { |
| "epoch": 3.3244478293983244, |
| "grad_norm": 1.246311068534851, |
| "learning_rate": 2.204842191884074e-06, |
| "loss": 1.1092, |
| "step": 1456 |
| }, |
| { |
| "epoch": 3.3267326732673266, |
| "grad_norm": 1.2252134084701538, |
| "learning_rate": 2.201750014224692e-06, |
| "loss": 1.116, |
| "step": 1457 |
| }, |
| { |
| "epoch": 3.329017517136329, |
| "grad_norm": 1.2413280010223389, |
| "learning_rate": 2.1986582993616926e-06, |
| "loss": 1.1387, |
| "step": 1458 |
| }, |
| { |
| "epoch": 3.3313023610053314, |
| "grad_norm": 1.2302745580673218, |
| "learning_rate": 2.1955670520925105e-06, |
| "loss": 1.0876, |
| "step": 1459 |
| }, |
| { |
| "epoch": 3.3335872048743336, |
| "grad_norm": 1.2019201517105103, |
| "learning_rate": 2.192476277213853e-06, |
| "loss": 1.0869, |
| "step": 1460 |
| }, |
| { |
| "epoch": 3.3358720487433358, |
| "grad_norm": 1.2414172887802124, |
| "learning_rate": 2.189385979521696e-06, |
| "loss": 1.113, |
| "step": 1461 |
| }, |
| { |
| "epoch": 3.3381568926123384, |
| "grad_norm": 1.2234618663787842, |
| "learning_rate": 2.186296163811272e-06, |
| "loss": 1.0967, |
| "step": 1462 |
| }, |
| { |
| "epoch": 3.3404417364813406, |
| "grad_norm": 1.2113620042800903, |
| "learning_rate": 2.183206834877068e-06, |
| "loss": 1.1307, |
| "step": 1463 |
| }, |
| { |
| "epoch": 3.3427265803503428, |
| "grad_norm": 1.2364552021026611, |
| "learning_rate": 2.1801179975128162e-06, |
| "loss": 1.0811, |
| "step": 1464 |
| }, |
| { |
| "epoch": 3.345011424219345, |
| "grad_norm": 1.2303386926651, |
| "learning_rate": 2.1770296565114847e-06, |
| "loss": 1.1306, |
| "step": 1465 |
| }, |
| { |
| "epoch": 3.347296268088347, |
| "grad_norm": 1.2606472969055176, |
| "learning_rate": 2.1739418166652712e-06, |
| "loss": 1.1429, |
| "step": 1466 |
| }, |
| { |
| "epoch": 3.3495811119573498, |
| "grad_norm": 1.211198091506958, |
| "learning_rate": 2.1708544827655963e-06, |
| "loss": 1.1043, |
| "step": 1467 |
| }, |
| { |
| "epoch": 3.351865955826352, |
| "grad_norm": 1.2446391582489014, |
| "learning_rate": 2.1677676596030963e-06, |
| "loss": 1.1265, |
| "step": 1468 |
| }, |
| { |
| "epoch": 3.354150799695354, |
| "grad_norm": 1.1910406351089478, |
| "learning_rate": 2.1646813519676136e-06, |
| "loss": 1.0555, |
| "step": 1469 |
| }, |
| { |
| "epoch": 3.3564356435643563, |
| "grad_norm": 1.2461347579956055, |
| "learning_rate": 2.1615955646481905e-06, |
| "loss": 1.1319, |
| "step": 1470 |
| }, |
| { |
| "epoch": 3.3587204874333585, |
| "grad_norm": 1.2559690475463867, |
| "learning_rate": 2.158510302433064e-06, |
| "loss": 1.1291, |
| "step": 1471 |
| }, |
| { |
| "epoch": 3.361005331302361, |
| "grad_norm": 1.209735631942749, |
| "learning_rate": 2.155425570109654e-06, |
| "loss": 1.1482, |
| "step": 1472 |
| }, |
| { |
| "epoch": 3.3632901751713633, |
| "grad_norm": 1.2300963401794434, |
| "learning_rate": 2.1523413724645593e-06, |
| "loss": 1.1061, |
| "step": 1473 |
| }, |
| { |
| "epoch": 3.3655750190403655, |
| "grad_norm": 1.2133387327194214, |
| "learning_rate": 2.149257714283549e-06, |
| "loss": 1.1067, |
| "step": 1474 |
| }, |
| { |
| "epoch": 3.3678598629093677, |
| "grad_norm": 1.228406310081482, |
| "learning_rate": 2.1461746003515547e-06, |
| "loss": 1.1392, |
| "step": 1475 |
| }, |
| { |
| "epoch": 3.3701447067783703, |
| "grad_norm": 1.2098608016967773, |
| "learning_rate": 2.143092035452664e-06, |
| "loss": 1.1148, |
| "step": 1476 |
| }, |
| { |
| "epoch": 3.3724295506473725, |
| "grad_norm": 1.2088247537612915, |
| "learning_rate": 2.140010024370112e-06, |
| "loss": 1.0957, |
| "step": 1477 |
| }, |
| { |
| "epoch": 3.3747143945163747, |
| "grad_norm": 1.2421760559082031, |
| "learning_rate": 2.136928571886275e-06, |
| "loss": 1.1514, |
| "step": 1478 |
| }, |
| { |
| "epoch": 3.376999238385377, |
| "grad_norm": 1.2158358097076416, |
| "learning_rate": 2.133847682782662e-06, |
| "loss": 1.1306, |
| "step": 1479 |
| }, |
| { |
| "epoch": 3.3792840822543795, |
| "grad_norm": 1.242077350616455, |
| "learning_rate": 2.1307673618399078e-06, |
| "loss": 1.1236, |
| "step": 1480 |
| }, |
| { |
| "epoch": 3.3815689261233817, |
| "grad_norm": 1.2471917867660522, |
| "learning_rate": 2.1276876138377662e-06, |
| "loss": 1.1039, |
| "step": 1481 |
| }, |
| { |
| "epoch": 3.383853769992384, |
| "grad_norm": 1.2467808723449707, |
| "learning_rate": 2.124608443555102e-06, |
| "loss": 1.1047, |
| "step": 1482 |
| }, |
| { |
| "epoch": 3.386138613861386, |
| "grad_norm": 1.2468382120132446, |
| "learning_rate": 2.1215298557698834e-06, |
| "loss": 1.1494, |
| "step": 1483 |
| }, |
| { |
| "epoch": 3.388423457730388, |
| "grad_norm": 1.211882472038269, |
| "learning_rate": 2.118451855259173e-06, |
| "loss": 1.0893, |
| "step": 1484 |
| }, |
| { |
| "epoch": 3.390708301599391, |
| "grad_norm": 1.219880223274231, |
| "learning_rate": 2.1153744467991243e-06, |
| "loss": 1.1203, |
| "step": 1485 |
| }, |
| { |
| "epoch": 3.392993145468393, |
| "grad_norm": 1.2225430011749268, |
| "learning_rate": 2.112297635164972e-06, |
| "loss": 1.1183, |
| "step": 1486 |
| }, |
| { |
| "epoch": 3.395277989337395, |
| "grad_norm": 1.229330062866211, |
| "learning_rate": 2.1092214251310235e-06, |
| "loss": 1.129, |
| "step": 1487 |
| }, |
| { |
| "epoch": 3.3975628332063974, |
| "grad_norm": 1.2349112033843994, |
| "learning_rate": 2.1061458214706536e-06, |
| "loss": 1.1212, |
| "step": 1488 |
| }, |
| { |
| "epoch": 3.3998476770754, |
| "grad_norm": 1.222354531288147, |
| "learning_rate": 2.103070828956295e-06, |
| "loss": 1.1216, |
| "step": 1489 |
| }, |
| { |
| "epoch": 3.402132520944402, |
| "grad_norm": 1.21956205368042, |
| "learning_rate": 2.0999964523594343e-06, |
| "loss": 1.1289, |
| "step": 1490 |
| }, |
| { |
| "epoch": 3.4044173648134044, |
| "grad_norm": 1.2225059270858765, |
| "learning_rate": 2.0969226964506007e-06, |
| "loss": 1.0957, |
| "step": 1491 |
| }, |
| { |
| "epoch": 3.4067022086824066, |
| "grad_norm": 1.2281376123428345, |
| "learning_rate": 2.09384956599936e-06, |
| "loss": 1.1346, |
| "step": 1492 |
| }, |
| { |
| "epoch": 3.408987052551409, |
| "grad_norm": 1.262031078338623, |
| "learning_rate": 2.090777065774308e-06, |
| "loss": 1.1269, |
| "step": 1493 |
| }, |
| { |
| "epoch": 3.4112718964204114, |
| "grad_norm": 1.2495571374893188, |
| "learning_rate": 2.087705200543064e-06, |
| "loss": 1.0793, |
| "step": 1494 |
| }, |
| { |
| "epoch": 3.4135567402894136, |
| "grad_norm": 1.1917531490325928, |
| "learning_rate": 2.0846339750722596e-06, |
| "loss": 1.0997, |
| "step": 1495 |
| }, |
| { |
| "epoch": 3.4158415841584158, |
| "grad_norm": 1.2409162521362305, |
| "learning_rate": 2.0815633941275345e-06, |
| "loss": 1.1022, |
| "step": 1496 |
| }, |
| { |
| "epoch": 3.418126428027418, |
| "grad_norm": 1.2216984033584595, |
| "learning_rate": 2.078493462473529e-06, |
| "loss": 1.1114, |
| "step": 1497 |
| }, |
| { |
| "epoch": 3.4204112718964206, |
| "grad_norm": 1.3940880298614502, |
| "learning_rate": 2.0754241848738755e-06, |
| "loss": 1.1224, |
| "step": 1498 |
| }, |
| { |
| "epoch": 3.4226961157654228, |
| "grad_norm": 1.1798889636993408, |
| "learning_rate": 2.0723555660911914e-06, |
| "loss": 1.1202, |
| "step": 1499 |
| }, |
| { |
| "epoch": 3.424980959634425, |
| "grad_norm": 1.2345131635665894, |
| "learning_rate": 2.0692876108870713e-06, |
| "loss": 1.1367, |
| "step": 1500 |
| }, |
| { |
| "epoch": 3.427265803503427, |
| "grad_norm": 1.2564736604690552, |
| "learning_rate": 2.066220324022081e-06, |
| "loss": 1.1096, |
| "step": 1501 |
| }, |
| { |
| "epoch": 3.4295506473724293, |
| "grad_norm": 1.2020221948623657, |
| "learning_rate": 2.063153710255749e-06, |
| "loss": 1.1245, |
| "step": 1502 |
| }, |
| { |
| "epoch": 3.431835491241432, |
| "grad_norm": 1.217174768447876, |
| "learning_rate": 2.0600877743465596e-06, |
| "loss": 1.0887, |
| "step": 1503 |
| }, |
| { |
| "epoch": 3.434120335110434, |
| "grad_norm": 1.3399721384048462, |
| "learning_rate": 2.0570225210519433e-06, |
| "loss": 1.1233, |
| "step": 1504 |
| }, |
| { |
| "epoch": 3.4364051789794363, |
| "grad_norm": 1.2029287815093994, |
| "learning_rate": 2.0539579551282745e-06, |
| "loss": 1.1148, |
| "step": 1505 |
| }, |
| { |
| "epoch": 3.4386900228484385, |
| "grad_norm": 1.2201212644577026, |
| "learning_rate": 2.050894081330859e-06, |
| "loss": 1.1215, |
| "step": 1506 |
| }, |
| { |
| "epoch": 3.440974866717441, |
| "grad_norm": 1.2642061710357666, |
| "learning_rate": 2.0478309044139293e-06, |
| "loss": 1.1203, |
| "step": 1507 |
| }, |
| { |
| "epoch": 3.4432597105864433, |
| "grad_norm": 1.225867748260498, |
| "learning_rate": 2.0447684291306354e-06, |
| "loss": 1.1241, |
| "step": 1508 |
| }, |
| { |
| "epoch": 3.4455445544554455, |
| "grad_norm": 1.2447335720062256, |
| "learning_rate": 2.0417066602330402e-06, |
| "loss": 1.1254, |
| "step": 1509 |
| }, |
| { |
| "epoch": 3.4478293983244477, |
| "grad_norm": 1.2231181859970093, |
| "learning_rate": 2.0386456024721102e-06, |
| "loss": 1.1325, |
| "step": 1510 |
| }, |
| { |
| "epoch": 3.4501142421934503, |
| "grad_norm": 1.2640868425369263, |
| "learning_rate": 2.035585260597708e-06, |
| "loss": 1.094, |
| "step": 1511 |
| }, |
| { |
| "epoch": 3.4523990860624525, |
| "grad_norm": 1.2731873989105225, |
| "learning_rate": 2.0325256393585846e-06, |
| "loss": 1.1422, |
| "step": 1512 |
| }, |
| { |
| "epoch": 3.4546839299314547, |
| "grad_norm": 1.2461851835250854, |
| "learning_rate": 2.029466743502374e-06, |
| "loss": 1.1663, |
| "step": 1513 |
| }, |
| { |
| "epoch": 3.456968773800457, |
| "grad_norm": 1.2134253978729248, |
| "learning_rate": 2.026408577775584e-06, |
| "loss": 1.114, |
| "step": 1514 |
| }, |
| { |
| "epoch": 3.459253617669459, |
| "grad_norm": 1.2513582706451416, |
| "learning_rate": 2.02335114692359e-06, |
| "loss": 1.1553, |
| "step": 1515 |
| }, |
| { |
| "epoch": 3.4615384615384617, |
| "grad_norm": 1.226331114768982, |
| "learning_rate": 2.0202944556906265e-06, |
| "loss": 1.15, |
| "step": 1516 |
| }, |
| { |
| "epoch": 3.463823305407464, |
| "grad_norm": 1.1947654485702515, |
| "learning_rate": 2.0172385088197804e-06, |
| "loss": 1.139, |
| "step": 1517 |
| }, |
| { |
| "epoch": 3.466108149276466, |
| "grad_norm": 1.2045434713363647, |
| "learning_rate": 2.014183311052984e-06, |
| "loss": 1.0923, |
| "step": 1518 |
| }, |
| { |
| "epoch": 3.4683929931454682, |
| "grad_norm": 1.2555713653564453, |
| "learning_rate": 2.011128867131007e-06, |
| "loss": 1.1475, |
| "step": 1519 |
| }, |
| { |
| "epoch": 3.470677837014471, |
| "grad_norm": 1.2348402738571167, |
| "learning_rate": 2.008075181793449e-06, |
| "loss": 1.1061, |
| "step": 1520 |
| }, |
| { |
| "epoch": 3.472962680883473, |
| "grad_norm": 1.3485833406448364, |
| "learning_rate": 2.005022259778733e-06, |
| "loss": 1.1452, |
| "step": 1521 |
| }, |
| { |
| "epoch": 3.4752475247524752, |
| "grad_norm": 1.2618969678878784, |
| "learning_rate": 2.0019701058240973e-06, |
| "loss": 1.1021, |
| "step": 1522 |
| }, |
| { |
| "epoch": 3.4775323686214774, |
| "grad_norm": 1.262932300567627, |
| "learning_rate": 1.998918724665589e-06, |
| "loss": 1.1169, |
| "step": 1523 |
| }, |
| { |
| "epoch": 3.47981721249048, |
| "grad_norm": 1.190786600112915, |
| "learning_rate": 1.995868121038054e-06, |
| "loss": 1.1142, |
| "step": 1524 |
| }, |
| { |
| "epoch": 3.4821020563594822, |
| "grad_norm": 1.2386054992675781, |
| "learning_rate": 1.992818299675134e-06, |
| "loss": 1.1276, |
| "step": 1525 |
| }, |
| { |
| "epoch": 3.4843869002284844, |
| "grad_norm": 1.3026626110076904, |
| "learning_rate": 1.9897692653092566e-06, |
| "loss": 1.1079, |
| "step": 1526 |
| }, |
| { |
| "epoch": 3.4866717440974866, |
| "grad_norm": 1.2284817695617676, |
| "learning_rate": 1.986721022671627e-06, |
| "loss": 1.1326, |
| "step": 1527 |
| }, |
| { |
| "epoch": 3.4889565879664888, |
| "grad_norm": 1.2360379695892334, |
| "learning_rate": 1.983673576492223e-06, |
| "loss": 1.1193, |
| "step": 1528 |
| }, |
| { |
| "epoch": 3.4912414318354914, |
| "grad_norm": 1.232100248336792, |
| "learning_rate": 1.980626931499785e-06, |
| "loss": 1.1155, |
| "step": 1529 |
| }, |
| { |
| "epoch": 3.4935262757044936, |
| "grad_norm": 1.261894702911377, |
| "learning_rate": 1.9775810924218126e-06, |
| "loss": 1.1127, |
| "step": 1530 |
| }, |
| { |
| "epoch": 3.4958111195734958, |
| "grad_norm": 1.3262462615966797, |
| "learning_rate": 1.974536063984553e-06, |
| "loss": 1.1199, |
| "step": 1531 |
| }, |
| { |
| "epoch": 3.498095963442498, |
| "grad_norm": 1.255900502204895, |
| "learning_rate": 1.9714918509129956e-06, |
| "loss": 1.1239, |
| "step": 1532 |
| }, |
| { |
| "epoch": 3.5003808073115, |
| "grad_norm": 1.2368223667144775, |
| "learning_rate": 1.968448457930865e-06, |
| "loss": 1.1097, |
| "step": 1533 |
| }, |
| { |
| "epoch": 3.5026656511805028, |
| "grad_norm": 1.2056398391723633, |
| "learning_rate": 1.9654058897606143e-06, |
| "loss": 1.091, |
| "step": 1534 |
| }, |
| { |
| "epoch": 3.504950495049505, |
| "grad_norm": 1.2451571226119995, |
| "learning_rate": 1.9623641511234145e-06, |
| "loss": 1.0939, |
| "step": 1535 |
| }, |
| { |
| "epoch": 3.507235338918507, |
| "grad_norm": 1.2372757196426392, |
| "learning_rate": 1.959323246739151e-06, |
| "loss": 1.0783, |
| "step": 1536 |
| }, |
| { |
| "epoch": 3.5095201827875098, |
| "grad_norm": 1.2244229316711426, |
| "learning_rate": 1.956283181326415e-06, |
| "loss": 1.0861, |
| "step": 1537 |
| }, |
| { |
| "epoch": 3.511805026656512, |
| "grad_norm": 1.2195265293121338, |
| "learning_rate": 1.9532439596024942e-06, |
| "loss": 1.1434, |
| "step": 1538 |
| }, |
| { |
| "epoch": 3.514089870525514, |
| "grad_norm": 1.2447067499160767, |
| "learning_rate": 1.9502055862833686e-06, |
| "loss": 1.103, |
| "step": 1539 |
| }, |
| { |
| "epoch": 3.5163747143945163, |
| "grad_norm": 1.2313573360443115, |
| "learning_rate": 1.947168066083701e-06, |
| "loss": 1.1518, |
| "step": 1540 |
| }, |
| { |
| "epoch": 3.5186595582635185, |
| "grad_norm": 1.2975883483886719, |
| "learning_rate": 1.9441314037168307e-06, |
| "loss": 1.1102, |
| "step": 1541 |
| }, |
| { |
| "epoch": 3.520944402132521, |
| "grad_norm": 1.2976816892623901, |
| "learning_rate": 1.941095603894767e-06, |
| "loss": 1.1408, |
| "step": 1542 |
| }, |
| { |
| "epoch": 3.5232292460015233, |
| "grad_norm": 1.3002218008041382, |
| "learning_rate": 1.9380606713281773e-06, |
| "loss": 1.125, |
| "step": 1543 |
| }, |
| { |
| "epoch": 3.5255140898705255, |
| "grad_norm": 1.225427508354187, |
| "learning_rate": 1.935026610726387e-06, |
| "loss": 1.0761, |
| "step": 1544 |
| }, |
| { |
| "epoch": 3.5277989337395277, |
| "grad_norm": 1.235769510269165, |
| "learning_rate": 1.931993426797367e-06, |
| "loss": 1.122, |
| "step": 1545 |
| }, |
| { |
| "epoch": 3.53008377760853, |
| "grad_norm": 1.2742570638656616, |
| "learning_rate": 1.9289611242477284e-06, |
| "loss": 1.0982, |
| "step": 1546 |
| }, |
| { |
| "epoch": 3.5323686214775325, |
| "grad_norm": 1.2655776739120483, |
| "learning_rate": 1.9259297077827134e-06, |
| "loss": 1.1124, |
| "step": 1547 |
| }, |
| { |
| "epoch": 3.5346534653465347, |
| "grad_norm": 1.1953189373016357, |
| "learning_rate": 1.92289918210619e-06, |
| "loss": 1.0874, |
| "step": 1548 |
| }, |
| { |
| "epoch": 3.536938309215537, |
| "grad_norm": 1.21706223487854, |
| "learning_rate": 1.9198695519206446e-06, |
| "loss": 1.1172, |
| "step": 1549 |
| }, |
| { |
| "epoch": 3.5392231530845395, |
| "grad_norm": 1.2621902227401733, |
| "learning_rate": 1.916840821927173e-06, |
| "loss": 1.1191, |
| "step": 1550 |
| }, |
| { |
| "epoch": 3.5415079969535412, |
| "grad_norm": 1.2300255298614502, |
| "learning_rate": 1.913812996825475e-06, |
| "loss": 1.1316, |
| "step": 1551 |
| }, |
| { |
| "epoch": 3.543792840822544, |
| "grad_norm": 1.238046407699585, |
| "learning_rate": 1.910786081313845e-06, |
| "loss": 1.1013, |
| "step": 1552 |
| }, |
| { |
| "epoch": 3.546077684691546, |
| "grad_norm": 1.2778140306472778, |
| "learning_rate": 1.907760080089168e-06, |
| "loss": 1.1419, |
| "step": 1553 |
| }, |
| { |
| "epoch": 3.5483625285605482, |
| "grad_norm": 1.211096167564392, |
| "learning_rate": 1.904734997846909e-06, |
| "loss": 1.1061, |
| "step": 1554 |
| }, |
| { |
| "epoch": 3.550647372429551, |
| "grad_norm": 1.293352484703064, |
| "learning_rate": 1.9017108392811065e-06, |
| "loss": 1.1339, |
| "step": 1555 |
| }, |
| { |
| "epoch": 3.552932216298553, |
| "grad_norm": 1.2381699085235596, |
| "learning_rate": 1.8986876090843668e-06, |
| "loss": 1.1017, |
| "step": 1556 |
| }, |
| { |
| "epoch": 3.5552170601675552, |
| "grad_norm": 1.2436529397964478, |
| "learning_rate": 1.8956653119478552e-06, |
| "loss": 1.1027, |
| "step": 1557 |
| }, |
| { |
| "epoch": 3.5575019040365574, |
| "grad_norm": 1.2499804496765137, |
| "learning_rate": 1.89264395256129e-06, |
| "loss": 1.1445, |
| "step": 1558 |
| }, |
| { |
| "epoch": 3.5597867479055596, |
| "grad_norm": 1.194018840789795, |
| "learning_rate": 1.889623535612931e-06, |
| "loss": 1.11, |
| "step": 1559 |
| }, |
| { |
| "epoch": 3.5620715917745622, |
| "grad_norm": 1.1932462453842163, |
| "learning_rate": 1.8866040657895816e-06, |
| "loss": 1.1377, |
| "step": 1560 |
| }, |
| { |
| "epoch": 3.5643564356435644, |
| "grad_norm": 1.254319667816162, |
| "learning_rate": 1.883585547776571e-06, |
| "loss": 1.1212, |
| "step": 1561 |
| }, |
| { |
| "epoch": 3.5666412795125666, |
| "grad_norm": 1.2139774560928345, |
| "learning_rate": 1.8805679862577519e-06, |
| "loss": 1.0974, |
| "step": 1562 |
| }, |
| { |
| "epoch": 3.568926123381569, |
| "grad_norm": 1.2384382486343384, |
| "learning_rate": 1.8775513859154937e-06, |
| "loss": 1.1471, |
| "step": 1563 |
| }, |
| { |
| "epoch": 3.571210967250571, |
| "grad_norm": 1.2261179685592651, |
| "learning_rate": 1.8745357514306754e-06, |
| "loss": 1.1437, |
| "step": 1564 |
| }, |
| { |
| "epoch": 3.5734958111195736, |
| "grad_norm": 1.2182419300079346, |
| "learning_rate": 1.8715210874826745e-06, |
| "loss": 1.1039, |
| "step": 1565 |
| }, |
| { |
| "epoch": 3.575780654988576, |
| "grad_norm": 1.2260403633117676, |
| "learning_rate": 1.868507398749364e-06, |
| "loss": 1.1445, |
| "step": 1566 |
| }, |
| { |
| "epoch": 3.578065498857578, |
| "grad_norm": 1.255858302116394, |
| "learning_rate": 1.8654946899071052e-06, |
| "loss": 1.1592, |
| "step": 1567 |
| }, |
| { |
| "epoch": 3.5803503427265806, |
| "grad_norm": 1.248203992843628, |
| "learning_rate": 1.8624829656307364e-06, |
| "loss": 1.0864, |
| "step": 1568 |
| }, |
| { |
| "epoch": 3.5826351865955828, |
| "grad_norm": 1.2393451929092407, |
| "learning_rate": 1.8594722305935691e-06, |
| "loss": 1.1392, |
| "step": 1569 |
| }, |
| { |
| "epoch": 3.584920030464585, |
| "grad_norm": 1.317401647567749, |
| "learning_rate": 1.85646248946738e-06, |
| "loss": 1.0891, |
| "step": 1570 |
| }, |
| { |
| "epoch": 3.587204874333587, |
| "grad_norm": 1.198999285697937, |
| "learning_rate": 1.8534537469224035e-06, |
| "loss": 1.1002, |
| "step": 1571 |
| }, |
| { |
| "epoch": 3.5894897182025893, |
| "grad_norm": 1.2434310913085938, |
| "learning_rate": 1.8504460076273247e-06, |
| "loss": 1.1502, |
| "step": 1572 |
| }, |
| { |
| "epoch": 3.591774562071592, |
| "grad_norm": 1.1986984014511108, |
| "learning_rate": 1.847439276249271e-06, |
| "loss": 1.0861, |
| "step": 1573 |
| }, |
| { |
| "epoch": 3.594059405940594, |
| "grad_norm": 1.2537792921066284, |
| "learning_rate": 1.8444335574538063e-06, |
| "loss": 1.12, |
| "step": 1574 |
| }, |
| { |
| "epoch": 3.5963442498095963, |
| "grad_norm": 1.2118167877197266, |
| "learning_rate": 1.8414288559049236e-06, |
| "loss": 1.1455, |
| "step": 1575 |
| }, |
| { |
| "epoch": 3.5986290936785985, |
| "grad_norm": 1.2423104047775269, |
| "learning_rate": 1.8384251762650384e-06, |
| "loss": 1.1503, |
| "step": 1576 |
| }, |
| { |
| "epoch": 3.6009139375476007, |
| "grad_norm": 1.2497063875198364, |
| "learning_rate": 1.8354225231949773e-06, |
| "loss": 1.1163, |
| "step": 1577 |
| }, |
| { |
| "epoch": 3.6031987814166033, |
| "grad_norm": 1.2626383304595947, |
| "learning_rate": 1.8324209013539775e-06, |
| "loss": 1.0694, |
| "step": 1578 |
| }, |
| { |
| "epoch": 3.6054836252856055, |
| "grad_norm": 1.2420138120651245, |
| "learning_rate": 1.8294203153996739e-06, |
| "loss": 1.1202, |
| "step": 1579 |
| }, |
| { |
| "epoch": 3.6077684691546077, |
| "grad_norm": 1.1990013122558594, |
| "learning_rate": 1.8264207699880953e-06, |
| "loss": 1.1155, |
| "step": 1580 |
| }, |
| { |
| "epoch": 3.6100533130236103, |
| "grad_norm": 1.2583009004592896, |
| "learning_rate": 1.8234222697736542e-06, |
| "loss": 1.1337, |
| "step": 1581 |
| }, |
| { |
| "epoch": 3.612338156892612, |
| "grad_norm": 1.257130742073059, |
| "learning_rate": 1.8204248194091429e-06, |
| "loss": 1.1192, |
| "step": 1582 |
| }, |
| { |
| "epoch": 3.6146230007616147, |
| "grad_norm": 1.2139629125595093, |
| "learning_rate": 1.817428423545724e-06, |
| "loss": 1.1196, |
| "step": 1583 |
| }, |
| { |
| "epoch": 3.616907844630617, |
| "grad_norm": 1.2340987920761108, |
| "learning_rate": 1.8144330868329244e-06, |
| "loss": 1.1476, |
| "step": 1584 |
| }, |
| { |
| "epoch": 3.619192688499619, |
| "grad_norm": 1.2045873403549194, |
| "learning_rate": 1.8114388139186257e-06, |
| "loss": 1.1168, |
| "step": 1585 |
| }, |
| { |
| "epoch": 3.6214775323686217, |
| "grad_norm": 1.2221472263336182, |
| "learning_rate": 1.8084456094490607e-06, |
| "loss": 1.1362, |
| "step": 1586 |
| }, |
| { |
| "epoch": 3.623762376237624, |
| "grad_norm": 1.1829921007156372, |
| "learning_rate": 1.8054534780688038e-06, |
| "loss": 1.0699, |
| "step": 1587 |
| }, |
| { |
| "epoch": 3.626047220106626, |
| "grad_norm": 1.2308509349822998, |
| "learning_rate": 1.8024624244207644e-06, |
| "loss": 1.0782, |
| "step": 1588 |
| }, |
| { |
| "epoch": 3.6283320639756282, |
| "grad_norm": 1.2337130308151245, |
| "learning_rate": 1.7994724531461788e-06, |
| "loss": 1.1058, |
| "step": 1589 |
| }, |
| { |
| "epoch": 3.6306169078446304, |
| "grad_norm": 1.219441533088684, |
| "learning_rate": 1.796483568884604e-06, |
| "loss": 1.0977, |
| "step": 1590 |
| }, |
| { |
| "epoch": 3.632901751713633, |
| "grad_norm": 1.2322226762771606, |
| "learning_rate": 1.7934957762739108e-06, |
| "loss": 1.1019, |
| "step": 1591 |
| }, |
| { |
| "epoch": 3.6351865955826352, |
| "grad_norm": 1.271531105041504, |
| "learning_rate": 1.7905090799502766e-06, |
| "loss": 1.1092, |
| "step": 1592 |
| }, |
| { |
| "epoch": 3.6374714394516374, |
| "grad_norm": 1.269434928894043, |
| "learning_rate": 1.7875234845481761e-06, |
| "loss": 1.149, |
| "step": 1593 |
| }, |
| { |
| "epoch": 3.6397562833206396, |
| "grad_norm": 1.2015925645828247, |
| "learning_rate": 1.7845389947003766e-06, |
| "loss": 1.1274, |
| "step": 1594 |
| }, |
| { |
| "epoch": 3.642041127189642, |
| "grad_norm": 1.217633843421936, |
| "learning_rate": 1.7815556150379298e-06, |
| "loss": 1.1014, |
| "step": 1595 |
| }, |
| { |
| "epoch": 3.6443259710586444, |
| "grad_norm": 1.2365998029708862, |
| "learning_rate": 1.7785733501901654e-06, |
| "loss": 1.1053, |
| "step": 1596 |
| }, |
| { |
| "epoch": 3.6466108149276466, |
| "grad_norm": 1.193575143814087, |
| "learning_rate": 1.7755922047846812e-06, |
| "loss": 1.1157, |
| "step": 1597 |
| }, |
| { |
| "epoch": 3.648895658796649, |
| "grad_norm": 1.2818313837051392, |
| "learning_rate": 1.7726121834473406e-06, |
| "loss": 1.0953, |
| "step": 1598 |
| }, |
| { |
| "epoch": 3.6511805026656514, |
| "grad_norm": 1.2339566946029663, |
| "learning_rate": 1.7696332908022607e-06, |
| "loss": 1.1119, |
| "step": 1599 |
| }, |
| { |
| "epoch": 3.6534653465346536, |
| "grad_norm": 1.2497464418411255, |
| "learning_rate": 1.7666555314718093e-06, |
| "loss": 1.1107, |
| "step": 1600 |
| }, |
| { |
| "epoch": 3.655750190403656, |
| "grad_norm": 1.2073878049850464, |
| "learning_rate": 1.7636789100765922e-06, |
| "loss": 1.0952, |
| "step": 1601 |
| }, |
| { |
| "epoch": 3.658035034272658, |
| "grad_norm": 1.287691354751587, |
| "learning_rate": 1.7607034312354531e-06, |
| "loss": 1.1212, |
| "step": 1602 |
| }, |
| { |
| "epoch": 3.66031987814166, |
| "grad_norm": 1.2657358646392822, |
| "learning_rate": 1.7577290995654611e-06, |
| "loss": 1.0946, |
| "step": 1603 |
| }, |
| { |
| "epoch": 3.662604722010663, |
| "grad_norm": 1.2619253396987915, |
| "learning_rate": 1.7547559196819056e-06, |
| "loss": 1.1182, |
| "step": 1604 |
| }, |
| { |
| "epoch": 3.664889565879665, |
| "grad_norm": 1.2667503356933594, |
| "learning_rate": 1.7517838961982876e-06, |
| "loss": 1.1519, |
| "step": 1605 |
| }, |
| { |
| "epoch": 3.667174409748667, |
| "grad_norm": 1.3911538124084473, |
| "learning_rate": 1.7488130337263153e-06, |
| "loss": 1.1396, |
| "step": 1606 |
| }, |
| { |
| "epoch": 3.6694592536176693, |
| "grad_norm": 1.267244815826416, |
| "learning_rate": 1.745843336875895e-06, |
| "loss": 1.1137, |
| "step": 1607 |
| }, |
| { |
| "epoch": 3.6717440974866715, |
| "grad_norm": 1.242616057395935, |
| "learning_rate": 1.7428748102551237e-06, |
| "loss": 1.1537, |
| "step": 1608 |
| }, |
| { |
| "epoch": 3.674028941355674, |
| "grad_norm": 1.3466277122497559, |
| "learning_rate": 1.7399074584702827e-06, |
| "loss": 1.1017, |
| "step": 1609 |
| }, |
| { |
| "epoch": 3.6763137852246763, |
| "grad_norm": 1.269391655921936, |
| "learning_rate": 1.7369412861258307e-06, |
| "loss": 1.1189, |
| "step": 1610 |
| }, |
| { |
| "epoch": 3.6785986290936785, |
| "grad_norm": 1.2533386945724487, |
| "learning_rate": 1.733976297824396e-06, |
| "loss": 1.151, |
| "step": 1611 |
| }, |
| { |
| "epoch": 3.680883472962681, |
| "grad_norm": 1.2193639278411865, |
| "learning_rate": 1.73101249816677e-06, |
| "loss": 1.1344, |
| "step": 1612 |
| }, |
| { |
| "epoch": 3.6831683168316833, |
| "grad_norm": 1.24978768825531, |
| "learning_rate": 1.7280498917518987e-06, |
| "loss": 1.1214, |
| "step": 1613 |
| }, |
| { |
| "epoch": 3.6854531607006855, |
| "grad_norm": 1.272676706314087, |
| "learning_rate": 1.7250884831768773e-06, |
| "loss": 1.1371, |
| "step": 1614 |
| }, |
| { |
| "epoch": 3.6877380045696877, |
| "grad_norm": 1.2939724922180176, |
| "learning_rate": 1.722128277036942e-06, |
| "loss": 1.153, |
| "step": 1615 |
| }, |
| { |
| "epoch": 3.69002284843869, |
| "grad_norm": 1.2304813861846924, |
| "learning_rate": 1.7191692779254646e-06, |
| "loss": 1.0707, |
| "step": 1616 |
| }, |
| { |
| "epoch": 3.6923076923076925, |
| "grad_norm": 1.2000212669372559, |
| "learning_rate": 1.716211490433941e-06, |
| "loss": 1.1075, |
| "step": 1617 |
| }, |
| { |
| "epoch": 3.6945925361766947, |
| "grad_norm": 1.2244328260421753, |
| "learning_rate": 1.7132549191519893e-06, |
| "loss": 1.0832, |
| "step": 1618 |
| }, |
| { |
| "epoch": 3.696877380045697, |
| "grad_norm": 1.2140921354293823, |
| "learning_rate": 1.7102995686673399e-06, |
| "loss": 1.1065, |
| "step": 1619 |
| }, |
| { |
| "epoch": 3.699162223914699, |
| "grad_norm": 1.2332017421722412, |
| "learning_rate": 1.7073454435658294e-06, |
| "loss": 1.134, |
| "step": 1620 |
| }, |
| { |
| "epoch": 3.7014470677837013, |
| "grad_norm": 1.306863784790039, |
| "learning_rate": 1.7043925484313911e-06, |
| "loss": 1.1334, |
| "step": 1621 |
| }, |
| { |
| "epoch": 3.703731911652704, |
| "grad_norm": 1.248136281967163, |
| "learning_rate": 1.7014408878460514e-06, |
| "loss": 1.1586, |
| "step": 1622 |
| }, |
| { |
| "epoch": 3.706016755521706, |
| "grad_norm": 1.2233672142028809, |
| "learning_rate": 1.698490466389921e-06, |
| "loss": 1.1328, |
| "step": 1623 |
| }, |
| { |
| "epoch": 3.7083015993907082, |
| "grad_norm": 1.2379913330078125, |
| "learning_rate": 1.6955412886411882e-06, |
| "loss": 1.1071, |
| "step": 1624 |
| }, |
| { |
| "epoch": 3.7105864432597104, |
| "grad_norm": 1.2285481691360474, |
| "learning_rate": 1.692593359176109e-06, |
| "loss": 1.1009, |
| "step": 1625 |
| }, |
| { |
| "epoch": 3.7128712871287126, |
| "grad_norm": 1.2411075830459595, |
| "learning_rate": 1.689646682569005e-06, |
| "loss": 1.149, |
| "step": 1626 |
| }, |
| { |
| "epoch": 3.7151561309977152, |
| "grad_norm": 1.2382875680923462, |
| "learning_rate": 1.6867012633922524e-06, |
| "loss": 1.1003, |
| "step": 1627 |
| }, |
| { |
| "epoch": 3.7174409748667174, |
| "grad_norm": 1.2481483221054077, |
| "learning_rate": 1.6837571062162783e-06, |
| "loss": 1.0967, |
| "step": 1628 |
| }, |
| { |
| "epoch": 3.7197258187357196, |
| "grad_norm": 1.2253104448318481, |
| "learning_rate": 1.680814215609548e-06, |
| "loss": 1.0991, |
| "step": 1629 |
| }, |
| { |
| "epoch": 3.7220106626047222, |
| "grad_norm": 1.240035057067871, |
| "learning_rate": 1.6778725961385642e-06, |
| "loss": 1.1111, |
| "step": 1630 |
| }, |
| { |
| "epoch": 3.7242955064737244, |
| "grad_norm": 1.254558801651001, |
| "learning_rate": 1.6749322523678562e-06, |
| "loss": 1.1436, |
| "step": 1631 |
| }, |
| { |
| "epoch": 3.7265803503427266, |
| "grad_norm": 1.2620891332626343, |
| "learning_rate": 1.6719931888599749e-06, |
| "loss": 1.1519, |
| "step": 1632 |
| }, |
| { |
| "epoch": 3.728865194211729, |
| "grad_norm": 1.2564220428466797, |
| "learning_rate": 1.6690554101754824e-06, |
| "loss": 1.1506, |
| "step": 1633 |
| }, |
| { |
| "epoch": 3.731150038080731, |
| "grad_norm": 1.1939340829849243, |
| "learning_rate": 1.6661189208729492e-06, |
| "loss": 1.1191, |
| "step": 1634 |
| }, |
| { |
| "epoch": 3.7334348819497336, |
| "grad_norm": 1.2243472337722778, |
| "learning_rate": 1.6631837255089437e-06, |
| "loss": 1.1417, |
| "step": 1635 |
| }, |
| { |
| "epoch": 3.735719725818736, |
| "grad_norm": 1.261639952659607, |
| "learning_rate": 1.660249828638028e-06, |
| "loss": 1.0566, |
| "step": 1636 |
| }, |
| { |
| "epoch": 3.738004569687738, |
| "grad_norm": 1.2218812704086304, |
| "learning_rate": 1.657317234812746e-06, |
| "loss": 1.0979, |
| "step": 1637 |
| }, |
| { |
| "epoch": 3.74028941355674, |
| "grad_norm": 1.243190884590149, |
| "learning_rate": 1.6543859485836252e-06, |
| "loss": 1.0964, |
| "step": 1638 |
| }, |
| { |
| "epoch": 3.7425742574257423, |
| "grad_norm": 1.2348755598068237, |
| "learning_rate": 1.6514559744991592e-06, |
| "loss": 1.1623, |
| "step": 1639 |
| }, |
| { |
| "epoch": 3.744859101294745, |
| "grad_norm": 1.1991803646087646, |
| "learning_rate": 1.6485273171058074e-06, |
| "loss": 1.1372, |
| "step": 1640 |
| }, |
| { |
| "epoch": 3.747143945163747, |
| "grad_norm": 1.2339205741882324, |
| "learning_rate": 1.645599980947986e-06, |
| "loss": 1.1225, |
| "step": 1641 |
| }, |
| { |
| "epoch": 3.7494287890327493, |
| "grad_norm": 1.2044298648834229, |
| "learning_rate": 1.642673970568062e-06, |
| "loss": 1.1325, |
| "step": 1642 |
| }, |
| { |
| "epoch": 3.751713632901752, |
| "grad_norm": 1.242209792137146, |
| "learning_rate": 1.6397492905063423e-06, |
| "loss": 1.1234, |
| "step": 1643 |
| }, |
| { |
| "epoch": 3.753998476770754, |
| "grad_norm": 1.2190120220184326, |
| "learning_rate": 1.6368259453010723e-06, |
| "loss": 1.1428, |
| "step": 1644 |
| }, |
| { |
| "epoch": 3.7562833206397563, |
| "grad_norm": 1.2304496765136719, |
| "learning_rate": 1.6339039394884254e-06, |
| "loss": 1.1203, |
| "step": 1645 |
| }, |
| { |
| "epoch": 3.7585681645087585, |
| "grad_norm": 1.220682144165039, |
| "learning_rate": 1.6309832776024964e-06, |
| "loss": 1.1342, |
| "step": 1646 |
| }, |
| { |
| "epoch": 3.7608530083777607, |
| "grad_norm": 1.2088897228240967, |
| "learning_rate": 1.6280639641752944e-06, |
| "loss": 1.1027, |
| "step": 1647 |
| }, |
| { |
| "epoch": 3.7631378522467633, |
| "grad_norm": 1.2930717468261719, |
| "learning_rate": 1.6251460037367368e-06, |
| "loss": 1.1363, |
| "step": 1648 |
| }, |
| { |
| "epoch": 3.7654226961157655, |
| "grad_norm": 1.2154346704483032, |
| "learning_rate": 1.6222294008146405e-06, |
| "loss": 1.1303, |
| "step": 1649 |
| }, |
| { |
| "epoch": 3.7677075399847677, |
| "grad_norm": 1.2410285472869873, |
| "learning_rate": 1.619314159934718e-06, |
| "loss": 1.0836, |
| "step": 1650 |
| }, |
| { |
| "epoch": 3.76999238385377, |
| "grad_norm": 1.2346444129943848, |
| "learning_rate": 1.6164002856205657e-06, |
| "loss": 1.0972, |
| "step": 1651 |
| }, |
| { |
| "epoch": 3.772277227722772, |
| "grad_norm": 1.241414189338684, |
| "learning_rate": 1.613487782393661e-06, |
| "loss": 1.1629, |
| "step": 1652 |
| }, |
| { |
| "epoch": 3.7745620715917747, |
| "grad_norm": 1.216896891593933, |
| "learning_rate": 1.6105766547733537e-06, |
| "loss": 1.1095, |
| "step": 1653 |
| }, |
| { |
| "epoch": 3.776846915460777, |
| "grad_norm": 1.2589776515960693, |
| "learning_rate": 1.6076669072768597e-06, |
| "loss": 1.0993, |
| "step": 1654 |
| }, |
| { |
| "epoch": 3.779131759329779, |
| "grad_norm": 1.238924503326416, |
| "learning_rate": 1.6047585444192509e-06, |
| "loss": 1.1178, |
| "step": 1655 |
| }, |
| { |
| "epoch": 3.7814166031987813, |
| "grad_norm": 1.2369179725646973, |
| "learning_rate": 1.601851570713453e-06, |
| "loss": 1.1309, |
| "step": 1656 |
| }, |
| { |
| "epoch": 3.7837014470677834, |
| "grad_norm": 1.211485743522644, |
| "learning_rate": 1.598945990670236e-06, |
| "loss": 1.1047, |
| "step": 1657 |
| }, |
| { |
| "epoch": 3.785986290936786, |
| "grad_norm": 1.2450000047683716, |
| "learning_rate": 1.5960418087982067e-06, |
| "loss": 1.1116, |
| "step": 1658 |
| }, |
| { |
| "epoch": 3.7882711348057883, |
| "grad_norm": 1.2489582300186157, |
| "learning_rate": 1.5931390296038014e-06, |
| "loss": 1.1134, |
| "step": 1659 |
| }, |
| { |
| "epoch": 3.7905559786747904, |
| "grad_norm": 1.234943151473999, |
| "learning_rate": 1.5902376575912815e-06, |
| "loss": 1.0784, |
| "step": 1660 |
| }, |
| { |
| "epoch": 3.792840822543793, |
| "grad_norm": 1.2086708545684814, |
| "learning_rate": 1.587337697262724e-06, |
| "loss": 1.1164, |
| "step": 1661 |
| }, |
| { |
| "epoch": 3.7951256664127953, |
| "grad_norm": 1.2318179607391357, |
| "learning_rate": 1.5844391531180167e-06, |
| "loss": 1.1039, |
| "step": 1662 |
| }, |
| { |
| "epoch": 3.7974105102817974, |
| "grad_norm": 1.2397037744522095, |
| "learning_rate": 1.5815420296548476e-06, |
| "loss": 1.1214, |
| "step": 1663 |
| }, |
| { |
| "epoch": 3.7996953541507996, |
| "grad_norm": 1.2473440170288086, |
| "learning_rate": 1.5786463313687016e-06, |
| "loss": 1.1327, |
| "step": 1664 |
| }, |
| { |
| "epoch": 3.801980198019802, |
| "grad_norm": 1.2378629446029663, |
| "learning_rate": 1.5757520627528522e-06, |
| "loss": 1.1323, |
| "step": 1665 |
| }, |
| { |
| "epoch": 3.8042650418888044, |
| "grad_norm": 1.2788701057434082, |
| "learning_rate": 1.5728592282983552e-06, |
| "loss": 1.0891, |
| "step": 1666 |
| }, |
| { |
| "epoch": 3.8065498857578066, |
| "grad_norm": 1.208809733390808, |
| "learning_rate": 1.569967832494038e-06, |
| "loss": 1.1107, |
| "step": 1667 |
| }, |
| { |
| "epoch": 3.808834729626809, |
| "grad_norm": 1.2094557285308838, |
| "learning_rate": 1.5670778798264997e-06, |
| "loss": 1.0916, |
| "step": 1668 |
| }, |
| { |
| "epoch": 3.811119573495811, |
| "grad_norm": 1.2201573848724365, |
| "learning_rate": 1.5641893747800968e-06, |
| "loss": 1.1196, |
| "step": 1669 |
| }, |
| { |
| "epoch": 3.813404417364813, |
| "grad_norm": 1.2208703756332397, |
| "learning_rate": 1.5613023218369416e-06, |
| "loss": 1.1172, |
| "step": 1670 |
| }, |
| { |
| "epoch": 3.815689261233816, |
| "grad_norm": 1.2877635955810547, |
| "learning_rate": 1.5584167254768918e-06, |
| "loss": 1.0968, |
| "step": 1671 |
| }, |
| { |
| "epoch": 3.817974105102818, |
| "grad_norm": 1.2119520902633667, |
| "learning_rate": 1.5555325901775453e-06, |
| "loss": 1.0905, |
| "step": 1672 |
| }, |
| { |
| "epoch": 3.82025894897182, |
| "grad_norm": 1.286763072013855, |
| "learning_rate": 1.5526499204142332e-06, |
| "loss": 1.1188, |
| "step": 1673 |
| }, |
| { |
| "epoch": 3.822543792840823, |
| "grad_norm": 1.273573637008667, |
| "learning_rate": 1.5497687206600134e-06, |
| "loss": 1.1156, |
| "step": 1674 |
| }, |
| { |
| "epoch": 3.824828636709825, |
| "grad_norm": 1.231566071510315, |
| "learning_rate": 1.5468889953856602e-06, |
| "loss": 1.0922, |
| "step": 1675 |
| }, |
| { |
| "epoch": 3.827113480578827, |
| "grad_norm": 1.228342890739441, |
| "learning_rate": 1.5440107490596623e-06, |
| "loss": 1.112, |
| "step": 1676 |
| }, |
| { |
| "epoch": 3.8293983244478293, |
| "grad_norm": 1.2194006443023682, |
| "learning_rate": 1.5411339861482121e-06, |
| "loss": 1.0727, |
| "step": 1677 |
| }, |
| { |
| "epoch": 3.8316831683168315, |
| "grad_norm": 1.2403062582015991, |
| "learning_rate": 1.538258711115202e-06, |
| "loss": 1.1327, |
| "step": 1678 |
| }, |
| { |
| "epoch": 3.833968012185834, |
| "grad_norm": 1.281432867050171, |
| "learning_rate": 1.5353849284222128e-06, |
| "loss": 1.1665, |
| "step": 1679 |
| }, |
| { |
| "epoch": 3.8362528560548363, |
| "grad_norm": 1.2705928087234497, |
| "learning_rate": 1.5325126425285122e-06, |
| "loss": 1.1195, |
| "step": 1680 |
| }, |
| { |
| "epoch": 3.8385376999238385, |
| "grad_norm": 1.2461146116256714, |
| "learning_rate": 1.5296418578910438e-06, |
| "loss": 1.1043, |
| "step": 1681 |
| }, |
| { |
| "epoch": 3.8408225437928407, |
| "grad_norm": 1.3064427375793457, |
| "learning_rate": 1.5267725789644232e-06, |
| "loss": 1.0931, |
| "step": 1682 |
| }, |
| { |
| "epoch": 3.843107387661843, |
| "grad_norm": 1.23982834815979, |
| "learning_rate": 1.5239048102009275e-06, |
| "loss": 1.1033, |
| "step": 1683 |
| }, |
| { |
| "epoch": 3.8453922315308455, |
| "grad_norm": 1.236802339553833, |
| "learning_rate": 1.5210385560504914e-06, |
| "loss": 1.1129, |
| "step": 1684 |
| }, |
| { |
| "epoch": 3.8476770753998477, |
| "grad_norm": 1.2742433547973633, |
| "learning_rate": 1.5181738209607006e-06, |
| "loss": 1.1648, |
| "step": 1685 |
| }, |
| { |
| "epoch": 3.84996191926885, |
| "grad_norm": 1.2181257009506226, |
| "learning_rate": 1.5153106093767827e-06, |
| "loss": 1.1038, |
| "step": 1686 |
| }, |
| { |
| "epoch": 3.852246763137852, |
| "grad_norm": 1.2690078020095825, |
| "learning_rate": 1.5124489257415998e-06, |
| "loss": 1.0908, |
| "step": 1687 |
| }, |
| { |
| "epoch": 3.8545316070068543, |
| "grad_norm": 1.2566394805908203, |
| "learning_rate": 1.509588774495645e-06, |
| "loss": 1.1146, |
| "step": 1688 |
| }, |
| { |
| "epoch": 3.856816450875857, |
| "grad_norm": 1.2618845701217651, |
| "learning_rate": 1.5067301600770337e-06, |
| "loss": 1.0876, |
| "step": 1689 |
| }, |
| { |
| "epoch": 3.859101294744859, |
| "grad_norm": 1.197676181793213, |
| "learning_rate": 1.5038730869214957e-06, |
| "loss": 1.1181, |
| "step": 1690 |
| }, |
| { |
| "epoch": 3.8613861386138613, |
| "grad_norm": 1.2773524522781372, |
| "learning_rate": 1.5010175594623688e-06, |
| "loss": 1.1646, |
| "step": 1691 |
| }, |
| { |
| "epoch": 3.863670982482864, |
| "grad_norm": 1.2414312362670898, |
| "learning_rate": 1.4981635821305934e-06, |
| "loss": 1.0974, |
| "step": 1692 |
| }, |
| { |
| "epoch": 3.865955826351866, |
| "grad_norm": 1.2599167823791504, |
| "learning_rate": 1.4953111593547038e-06, |
| "loss": 1.1338, |
| "step": 1693 |
| }, |
| { |
| "epoch": 3.8682406702208683, |
| "grad_norm": 1.2631725072860718, |
| "learning_rate": 1.4924602955608232e-06, |
| "loss": 1.1158, |
| "step": 1694 |
| }, |
| { |
| "epoch": 3.8705255140898704, |
| "grad_norm": 1.2844053506851196, |
| "learning_rate": 1.4896109951726542e-06, |
| "loss": 1.1067, |
| "step": 1695 |
| }, |
| { |
| "epoch": 3.8728103579588726, |
| "grad_norm": 1.236351490020752, |
| "learning_rate": 1.4867632626114742e-06, |
| "loss": 1.08, |
| "step": 1696 |
| }, |
| { |
| "epoch": 3.8750952018278753, |
| "grad_norm": 1.2237566709518433, |
| "learning_rate": 1.4839171022961279e-06, |
| "loss": 1.0924, |
| "step": 1697 |
| }, |
| { |
| "epoch": 3.8773800456968774, |
| "grad_norm": 1.2487142086029053, |
| "learning_rate": 1.4810725186430208e-06, |
| "loss": 1.1441, |
| "step": 1698 |
| }, |
| { |
| "epoch": 3.8796648895658796, |
| "grad_norm": 1.2455718517303467, |
| "learning_rate": 1.4782295160661103e-06, |
| "loss": 1.1043, |
| "step": 1699 |
| }, |
| { |
| "epoch": 3.881949733434882, |
| "grad_norm": 1.2438057661056519, |
| "learning_rate": 1.4753880989769014e-06, |
| "loss": 1.1375, |
| "step": 1700 |
| }, |
| { |
| "epoch": 3.884234577303884, |
| "grad_norm": 1.1872096061706543, |
| "learning_rate": 1.4725482717844397e-06, |
| "loss": 1.1017, |
| "step": 1701 |
| }, |
| { |
| "epoch": 3.8865194211728866, |
| "grad_norm": 1.303624153137207, |
| "learning_rate": 1.4697100388953033e-06, |
| "loss": 1.1154, |
| "step": 1702 |
| }, |
| { |
| "epoch": 3.888804265041889, |
| "grad_norm": 1.2645589113235474, |
| "learning_rate": 1.4668734047135947e-06, |
| "loss": 1.1316, |
| "step": 1703 |
| }, |
| { |
| "epoch": 3.891089108910891, |
| "grad_norm": 1.2637914419174194, |
| "learning_rate": 1.464038373640938e-06, |
| "loss": 1.1156, |
| "step": 1704 |
| }, |
| { |
| "epoch": 3.8933739527798936, |
| "grad_norm": 1.2358670234680176, |
| "learning_rate": 1.4612049500764685e-06, |
| "loss": 1.1031, |
| "step": 1705 |
| }, |
| { |
| "epoch": 3.895658796648896, |
| "grad_norm": 1.2059921026229858, |
| "learning_rate": 1.4583731384168275e-06, |
| "loss": 1.1147, |
| "step": 1706 |
| }, |
| { |
| "epoch": 3.897943640517898, |
| "grad_norm": 1.2406716346740723, |
| "learning_rate": 1.4555429430561552e-06, |
| "loss": 1.1354, |
| "step": 1707 |
| }, |
| { |
| "epoch": 3.9002284843869, |
| "grad_norm": 1.2390797138214111, |
| "learning_rate": 1.4527143683860822e-06, |
| "loss": 1.1389, |
| "step": 1708 |
| }, |
| { |
| "epoch": 3.9025133282559024, |
| "grad_norm": 1.236459732055664, |
| "learning_rate": 1.449887418795728e-06, |
| "loss": 1.0942, |
| "step": 1709 |
| }, |
| { |
| "epoch": 3.904798172124905, |
| "grad_norm": 1.2225269079208374, |
| "learning_rate": 1.4470620986716857e-06, |
| "loss": 1.105, |
| "step": 1710 |
| }, |
| { |
| "epoch": 3.907083015993907, |
| "grad_norm": 1.2086801528930664, |
| "learning_rate": 1.4442384123980247e-06, |
| "loss": 1.0923, |
| "step": 1711 |
| }, |
| { |
| "epoch": 3.9093678598629094, |
| "grad_norm": 1.2351250648498535, |
| "learning_rate": 1.4414163643562755e-06, |
| "loss": 1.1468, |
| "step": 1712 |
| }, |
| { |
| "epoch": 3.9116527037319115, |
| "grad_norm": 1.251083493232727, |
| "learning_rate": 1.4385959589254271e-06, |
| "loss": 1.104, |
| "step": 1713 |
| }, |
| { |
| "epoch": 3.9139375476009137, |
| "grad_norm": 1.2320407629013062, |
| "learning_rate": 1.4357772004819221e-06, |
| "loss": 1.0572, |
| "step": 1714 |
| }, |
| { |
| "epoch": 3.9162223914699164, |
| "grad_norm": 1.2320278882980347, |
| "learning_rate": 1.4329600933996452e-06, |
| "loss": 1.1187, |
| "step": 1715 |
| }, |
| { |
| "epoch": 3.9185072353389185, |
| "grad_norm": 1.242316722869873, |
| "learning_rate": 1.4301446420499165e-06, |
| "loss": 1.1347, |
| "step": 1716 |
| }, |
| { |
| "epoch": 3.9207920792079207, |
| "grad_norm": 1.2707537412643433, |
| "learning_rate": 1.4273308508014943e-06, |
| "loss": 1.0991, |
| "step": 1717 |
| }, |
| { |
| "epoch": 3.9230769230769234, |
| "grad_norm": 1.2760437726974487, |
| "learning_rate": 1.4245187240205537e-06, |
| "loss": 1.1327, |
| "step": 1718 |
| }, |
| { |
| "epoch": 3.925361766945925, |
| "grad_norm": 1.2317287921905518, |
| "learning_rate": 1.4217082660706893e-06, |
| "loss": 1.1126, |
| "step": 1719 |
| }, |
| { |
| "epoch": 3.9276466108149277, |
| "grad_norm": 1.2433048486709595, |
| "learning_rate": 1.4188994813129075e-06, |
| "loss": 1.0856, |
| "step": 1720 |
| }, |
| { |
| "epoch": 3.92993145468393, |
| "grad_norm": 1.241335391998291, |
| "learning_rate": 1.4160923741056159e-06, |
| "loss": 1.1551, |
| "step": 1721 |
| }, |
| { |
| "epoch": 3.932216298552932, |
| "grad_norm": 1.2815061807632446, |
| "learning_rate": 1.4132869488046224e-06, |
| "loss": 1.1107, |
| "step": 1722 |
| }, |
| { |
| "epoch": 3.9345011424219347, |
| "grad_norm": 1.237248420715332, |
| "learning_rate": 1.410483209763122e-06, |
| "loss": 1.1473, |
| "step": 1723 |
| }, |
| { |
| "epoch": 3.936785986290937, |
| "grad_norm": 1.2185921669006348, |
| "learning_rate": 1.4076811613316933e-06, |
| "loss": 1.1024, |
| "step": 1724 |
| }, |
| { |
| "epoch": 3.939070830159939, |
| "grad_norm": 1.2314651012420654, |
| "learning_rate": 1.4048808078582943e-06, |
| "loss": 1.1163, |
| "step": 1725 |
| }, |
| { |
| "epoch": 3.9413556740289413, |
| "grad_norm": 1.220934271812439, |
| "learning_rate": 1.4020821536882503e-06, |
| "loss": 1.1442, |
| "step": 1726 |
| }, |
| { |
| "epoch": 3.9436405178979435, |
| "grad_norm": 1.239525556564331, |
| "learning_rate": 1.3992852031642497e-06, |
| "loss": 1.1055, |
| "step": 1727 |
| }, |
| { |
| "epoch": 3.945925361766946, |
| "grad_norm": 1.2437052726745605, |
| "learning_rate": 1.3964899606263405e-06, |
| "loss": 1.1038, |
| "step": 1728 |
| }, |
| { |
| "epoch": 3.9482102056359483, |
| "grad_norm": 1.2381411790847778, |
| "learning_rate": 1.3936964304119158e-06, |
| "loss": 1.1116, |
| "step": 1729 |
| }, |
| { |
| "epoch": 3.9504950495049505, |
| "grad_norm": 1.2382675409317017, |
| "learning_rate": 1.390904616855716e-06, |
| "loss": 1.089, |
| "step": 1730 |
| }, |
| { |
| "epoch": 3.9527798933739526, |
| "grad_norm": 1.287566900253296, |
| "learning_rate": 1.388114524289815e-06, |
| "loss": 1.1342, |
| "step": 1731 |
| }, |
| { |
| "epoch": 3.955064737242955, |
| "grad_norm": 1.223980188369751, |
| "learning_rate": 1.385326157043616e-06, |
| "loss": 1.1262, |
| "step": 1732 |
| }, |
| { |
| "epoch": 3.9573495811119574, |
| "grad_norm": 1.2570685148239136, |
| "learning_rate": 1.3825395194438478e-06, |
| "loss": 1.1184, |
| "step": 1733 |
| }, |
| { |
| "epoch": 3.9596344249809596, |
| "grad_norm": 1.213205337524414, |
| "learning_rate": 1.379754615814553e-06, |
| "loss": 1.1161, |
| "step": 1734 |
| }, |
| { |
| "epoch": 3.961919268849962, |
| "grad_norm": 1.2163954973220825, |
| "learning_rate": 1.3769714504770825e-06, |
| "loss": 1.0887, |
| "step": 1735 |
| }, |
| { |
| "epoch": 3.9642041127189644, |
| "grad_norm": 1.2263234853744507, |
| "learning_rate": 1.374190027750094e-06, |
| "loss": 1.0997, |
| "step": 1736 |
| }, |
| { |
| "epoch": 3.9664889565879666, |
| "grad_norm": 1.2027472257614136, |
| "learning_rate": 1.3714103519495378e-06, |
| "loss": 1.0897, |
| "step": 1737 |
| }, |
| { |
| "epoch": 3.968773800456969, |
| "grad_norm": 1.250954270362854, |
| "learning_rate": 1.3686324273886531e-06, |
| "loss": 1.1171, |
| "step": 1738 |
| }, |
| { |
| "epoch": 3.971058644325971, |
| "grad_norm": 1.2648512125015259, |
| "learning_rate": 1.3658562583779644e-06, |
| "loss": 1.0782, |
| "step": 1739 |
| }, |
| { |
| "epoch": 3.973343488194973, |
| "grad_norm": 1.2568615674972534, |
| "learning_rate": 1.3630818492252695e-06, |
| "loss": 1.1025, |
| "step": 1740 |
| }, |
| { |
| "epoch": 3.975628332063976, |
| "grad_norm": 1.304031491279602, |
| "learning_rate": 1.3603092042356387e-06, |
| "loss": 1.0991, |
| "step": 1741 |
| }, |
| { |
| "epoch": 3.977913175932978, |
| "grad_norm": 1.263646125793457, |
| "learning_rate": 1.3575383277114007e-06, |
| "loss": 1.149, |
| "step": 1742 |
| }, |
| { |
| "epoch": 3.98019801980198, |
| "grad_norm": 1.2489157915115356, |
| "learning_rate": 1.354769223952142e-06, |
| "loss": 1.1303, |
| "step": 1743 |
| }, |
| { |
| "epoch": 3.9824828636709824, |
| "grad_norm": 1.2458829879760742, |
| "learning_rate": 1.3520018972547e-06, |
| "loss": 1.1047, |
| "step": 1744 |
| }, |
| { |
| "epoch": 3.9847677075399845, |
| "grad_norm": 1.2482655048370361, |
| "learning_rate": 1.3492363519131519e-06, |
| "loss": 1.1002, |
| "step": 1745 |
| }, |
| { |
| "epoch": 3.987052551408987, |
| "grad_norm": 1.2878859043121338, |
| "learning_rate": 1.3464725922188109e-06, |
| "loss": 1.1344, |
| "step": 1746 |
| }, |
| { |
| "epoch": 3.9893373952779894, |
| "grad_norm": 1.227795124053955, |
| "learning_rate": 1.3437106224602226e-06, |
| "loss": 1.1131, |
| "step": 1747 |
| }, |
| { |
| "epoch": 3.9916222391469915, |
| "grad_norm": 1.2362041473388672, |
| "learning_rate": 1.3409504469231507e-06, |
| "loss": 1.1014, |
| "step": 1748 |
| }, |
| { |
| "epoch": 3.993907083015994, |
| "grad_norm": 1.2575008869171143, |
| "learning_rate": 1.3381920698905788e-06, |
| "loss": 1.0759, |
| "step": 1749 |
| }, |
| { |
| "epoch": 3.996191926884996, |
| "grad_norm": 1.225038766860962, |
| "learning_rate": 1.3354354956426973e-06, |
| "loss": 1.1333, |
| "step": 1750 |
| }, |
| { |
| "epoch": 3.9984767707539985, |
| "grad_norm": 1.2729361057281494, |
| "learning_rate": 1.3326807284568984e-06, |
| "loss": 1.0851, |
| "step": 1751 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 1.2729361057281494, |
| "learning_rate": 1.3299277726077742e-06, |
| "loss": 1.1285, |
| "step": 1752 |
| }, |
| { |
| "epoch": 4.002284843869003, |
| "grad_norm": 1.7938593626022339, |
| "learning_rate": 1.327176632367102e-06, |
| "loss": 1.1102, |
| "step": 1753 |
| }, |
| { |
| "epoch": 4.004569687738004, |
| "grad_norm": 1.2422128915786743, |
| "learning_rate": 1.3244273120038434e-06, |
| "loss": 1.0829, |
| "step": 1754 |
| }, |
| { |
| "epoch": 4.006854531607007, |
| "grad_norm": 1.2498319149017334, |
| "learning_rate": 1.3216798157841373e-06, |
| "loss": 1.0981, |
| "step": 1755 |
| }, |
| { |
| "epoch": 4.009139375476009, |
| "grad_norm": 1.2300645112991333, |
| "learning_rate": 1.3189341479712892e-06, |
| "loss": 1.0846, |
| "step": 1756 |
| }, |
| { |
| "epoch": 4.011424219345011, |
| "grad_norm": 1.227906346321106, |
| "learning_rate": 1.3161903128257714e-06, |
| "loss": 1.1038, |
| "step": 1757 |
| }, |
| { |
| "epoch": 4.013709063214014, |
| "grad_norm": 1.248955488204956, |
| "learning_rate": 1.3134483146052088e-06, |
| "loss": 1.0592, |
| "step": 1758 |
| }, |
| { |
| "epoch": 4.015993907083016, |
| "grad_norm": 1.2333277463912964, |
| "learning_rate": 1.3107081575643766e-06, |
| "loss": 1.0929, |
| "step": 1759 |
| }, |
| { |
| "epoch": 4.018278750952018, |
| "grad_norm": 1.2562737464904785, |
| "learning_rate": 1.3079698459551955e-06, |
| "loss": 1.1445, |
| "step": 1760 |
| }, |
| { |
| "epoch": 4.020563594821021, |
| "grad_norm": 1.2237579822540283, |
| "learning_rate": 1.30523338402672e-06, |
| "loss": 1.1176, |
| "step": 1761 |
| }, |
| { |
| "epoch": 4.022848438690023, |
| "grad_norm": 1.2542140483856201, |
| "learning_rate": 1.3024987760251345e-06, |
| "loss": 1.0441, |
| "step": 1762 |
| }, |
| { |
| "epoch": 4.025133282559025, |
| "grad_norm": 1.2324272394180298, |
| "learning_rate": 1.2997660261937489e-06, |
| "loss": 1.0705, |
| "step": 1763 |
| }, |
| { |
| "epoch": 4.027418126428027, |
| "grad_norm": 1.271546721458435, |
| "learning_rate": 1.2970351387729875e-06, |
| "loss": 1.0677, |
| "step": 1764 |
| }, |
| { |
| "epoch": 4.02970297029703, |
| "grad_norm": 1.2349035739898682, |
| "learning_rate": 1.2943061180003862e-06, |
| "loss": 1.0884, |
| "step": 1765 |
| }, |
| { |
| "epoch": 4.031987814166032, |
| "grad_norm": 1.266993761062622, |
| "learning_rate": 1.291578968110584e-06, |
| "loss": 1.1132, |
| "step": 1766 |
| }, |
| { |
| "epoch": 4.034272658035034, |
| "grad_norm": 1.2842568159103394, |
| "learning_rate": 1.288853693335314e-06, |
| "loss": 1.0574, |
| "step": 1767 |
| }, |
| { |
| "epoch": 4.036557501904037, |
| "grad_norm": 1.2419517040252686, |
| "learning_rate": 1.286130297903406e-06, |
| "loss": 1.0794, |
| "step": 1768 |
| }, |
| { |
| "epoch": 4.0388423457730385, |
| "grad_norm": 1.3006045818328857, |
| "learning_rate": 1.2834087860407679e-06, |
| "loss": 1.1689, |
| "step": 1769 |
| }, |
| { |
| "epoch": 4.041127189642041, |
| "grad_norm": 1.2670701742172241, |
| "learning_rate": 1.2806891619703858e-06, |
| "loss": 1.091, |
| "step": 1770 |
| }, |
| { |
| "epoch": 4.043412033511044, |
| "grad_norm": 1.2653870582580566, |
| "learning_rate": 1.2779714299123194e-06, |
| "loss": 1.0717, |
| "step": 1771 |
| }, |
| { |
| "epoch": 4.0456968773800455, |
| "grad_norm": 1.2377089262008667, |
| "learning_rate": 1.2752555940836891e-06, |
| "loss": 1.1001, |
| "step": 1772 |
| }, |
| { |
| "epoch": 4.047981721249048, |
| "grad_norm": 1.2586807012557983, |
| "learning_rate": 1.2725416586986766e-06, |
| "loss": 1.1034, |
| "step": 1773 |
| }, |
| { |
| "epoch": 4.05026656511805, |
| "grad_norm": 1.2842795848846436, |
| "learning_rate": 1.2698296279685113e-06, |
| "loss": 1.0828, |
| "step": 1774 |
| }, |
| { |
| "epoch": 4.0525514089870525, |
| "grad_norm": 1.2472583055496216, |
| "learning_rate": 1.267119506101467e-06, |
| "loss": 1.0991, |
| "step": 1775 |
| }, |
| { |
| "epoch": 4.054836252856055, |
| "grad_norm": 1.280466914176941, |
| "learning_rate": 1.2644112973028593e-06, |
| "loss": 1.0746, |
| "step": 1776 |
| }, |
| { |
| "epoch": 4.057121096725057, |
| "grad_norm": 1.2883317470550537, |
| "learning_rate": 1.2617050057750322e-06, |
| "loss": 1.0711, |
| "step": 1777 |
| }, |
| { |
| "epoch": 4.0594059405940595, |
| "grad_norm": 1.2602671384811401, |
| "learning_rate": 1.2590006357173535e-06, |
| "loss": 1.0842, |
| "step": 1778 |
| }, |
| { |
| "epoch": 4.061690784463062, |
| "grad_norm": 1.245571494102478, |
| "learning_rate": 1.256298191326214e-06, |
| "loss": 1.0934, |
| "step": 1779 |
| }, |
| { |
| "epoch": 4.063975628332064, |
| "grad_norm": 1.2416950464248657, |
| "learning_rate": 1.2535976767950111e-06, |
| "loss": 1.0815, |
| "step": 1780 |
| }, |
| { |
| "epoch": 4.0662604722010665, |
| "grad_norm": 1.2669044733047485, |
| "learning_rate": 1.2508990963141527e-06, |
| "loss": 1.0961, |
| "step": 1781 |
| }, |
| { |
| "epoch": 4.068545316070068, |
| "grad_norm": 1.2506545782089233, |
| "learning_rate": 1.248202454071042e-06, |
| "loss": 1.0364, |
| "step": 1782 |
| }, |
| { |
| "epoch": 4.070830159939071, |
| "grad_norm": 1.232606291770935, |
| "learning_rate": 1.2455077542500748e-06, |
| "loss": 1.0741, |
| "step": 1783 |
| }, |
| { |
| "epoch": 4.0731150038080735, |
| "grad_norm": 1.2768986225128174, |
| "learning_rate": 1.2428150010326356e-06, |
| "loss": 1.1313, |
| "step": 1784 |
| }, |
| { |
| "epoch": 4.075399847677075, |
| "grad_norm": 1.2605336904525757, |
| "learning_rate": 1.2401241985970857e-06, |
| "loss": 1.1041, |
| "step": 1785 |
| }, |
| { |
| "epoch": 4.077684691546078, |
| "grad_norm": 1.264955759048462, |
| "learning_rate": 1.2374353511187593e-06, |
| "loss": 1.0845, |
| "step": 1786 |
| }, |
| { |
| "epoch": 4.07996953541508, |
| "grad_norm": 1.2806904315948486, |
| "learning_rate": 1.23474846276996e-06, |
| "loss": 1.0702, |
| "step": 1787 |
| }, |
| { |
| "epoch": 4.082254379284082, |
| "grad_norm": 1.280799150466919, |
| "learning_rate": 1.2320635377199469e-06, |
| "loss": 1.1091, |
| "step": 1788 |
| }, |
| { |
| "epoch": 4.084539223153085, |
| "grad_norm": 1.2656049728393555, |
| "learning_rate": 1.2293805801349375e-06, |
| "loss": 1.104, |
| "step": 1789 |
| }, |
| { |
| "epoch": 4.086824067022087, |
| "grad_norm": 1.304160714149475, |
| "learning_rate": 1.2266995941780934e-06, |
| "loss": 1.0992, |
| "step": 1790 |
| }, |
| { |
| "epoch": 4.089108910891089, |
| "grad_norm": 1.2726844549179077, |
| "learning_rate": 1.224020584009516e-06, |
| "loss": 1.0866, |
| "step": 1791 |
| }, |
| { |
| "epoch": 4.091393754760092, |
| "grad_norm": 1.2950150966644287, |
| "learning_rate": 1.221343553786244e-06, |
| "loss": 1.0824, |
| "step": 1792 |
| }, |
| { |
| "epoch": 4.093678598629094, |
| "grad_norm": 1.2644156217575073, |
| "learning_rate": 1.2186685076622416e-06, |
| "loss": 1.0926, |
| "step": 1793 |
| }, |
| { |
| "epoch": 4.095963442498096, |
| "grad_norm": 1.2528104782104492, |
| "learning_rate": 1.2159954497883933e-06, |
| "loss": 1.0754, |
| "step": 1794 |
| }, |
| { |
| "epoch": 4.098248286367098, |
| "grad_norm": 1.2485560178756714, |
| "learning_rate": 1.2133243843125012e-06, |
| "loss": 1.0922, |
| "step": 1795 |
| }, |
| { |
| "epoch": 4.1005331302361006, |
| "grad_norm": 1.2408583164215088, |
| "learning_rate": 1.210655315379275e-06, |
| "loss": 1.0541, |
| "step": 1796 |
| }, |
| { |
| "epoch": 4.102817974105103, |
| "grad_norm": 1.2372719049453735, |
| "learning_rate": 1.2079882471303244e-06, |
| "loss": 1.1071, |
| "step": 1797 |
| }, |
| { |
| "epoch": 4.105102817974105, |
| "grad_norm": 1.2654283046722412, |
| "learning_rate": 1.2053231837041576e-06, |
| "loss": 1.1031, |
| "step": 1798 |
| }, |
| { |
| "epoch": 4.1073876618431076, |
| "grad_norm": 1.2387542724609375, |
| "learning_rate": 1.2026601292361696e-06, |
| "loss": 1.0945, |
| "step": 1799 |
| }, |
| { |
| "epoch": 4.109672505712109, |
| "grad_norm": 1.2538166046142578, |
| "learning_rate": 1.1999990878586374e-06, |
| "loss": 1.1081, |
| "step": 1800 |
| }, |
| { |
| "epoch": 4.111957349581112, |
| "grad_norm": 1.2714624404907227, |
| "learning_rate": 1.1973400637007179e-06, |
| "loss": 1.119, |
| "step": 1801 |
| }, |
| { |
| "epoch": 4.1142421934501145, |
| "grad_norm": 1.2416982650756836, |
| "learning_rate": 1.1946830608884335e-06, |
| "loss": 1.1051, |
| "step": 1802 |
| }, |
| { |
| "epoch": 4.116527037319116, |
| "grad_norm": 1.2721920013427734, |
| "learning_rate": 1.192028083544675e-06, |
| "loss": 1.1145, |
| "step": 1803 |
| }, |
| { |
| "epoch": 4.118811881188119, |
| "grad_norm": 1.3008158206939697, |
| "learning_rate": 1.1893751357891854e-06, |
| "loss": 1.0883, |
| "step": 1804 |
| }, |
| { |
| "epoch": 4.1210967250571215, |
| "grad_norm": 1.2460572719573975, |
| "learning_rate": 1.1867242217385599e-06, |
| "loss": 1.1278, |
| "step": 1805 |
| }, |
| { |
| "epoch": 4.123381568926123, |
| "grad_norm": 1.2478859424591064, |
| "learning_rate": 1.1840753455062407e-06, |
| "loss": 1.1538, |
| "step": 1806 |
| }, |
| { |
| "epoch": 4.125666412795126, |
| "grad_norm": 1.2536922693252563, |
| "learning_rate": 1.1814285112025044e-06, |
| "loss": 1.0893, |
| "step": 1807 |
| }, |
| { |
| "epoch": 4.127951256664128, |
| "grad_norm": 1.2688579559326172, |
| "learning_rate": 1.1787837229344602e-06, |
| "loss": 1.0969, |
| "step": 1808 |
| }, |
| { |
| "epoch": 4.13023610053313, |
| "grad_norm": 1.2402559518814087, |
| "learning_rate": 1.1761409848060438e-06, |
| "loss": 1.0974, |
| "step": 1809 |
| }, |
| { |
| "epoch": 4.132520944402133, |
| "grad_norm": 1.2625503540039062, |
| "learning_rate": 1.173500300918007e-06, |
| "loss": 1.1134, |
| "step": 1810 |
| }, |
| { |
| "epoch": 4.134805788271135, |
| "grad_norm": 1.3044812679290771, |
| "learning_rate": 1.1708616753679177e-06, |
| "loss": 1.0311, |
| "step": 1811 |
| }, |
| { |
| "epoch": 4.137090632140137, |
| "grad_norm": 1.2588051557540894, |
| "learning_rate": 1.168225112250147e-06, |
| "loss": 1.0805, |
| "step": 1812 |
| }, |
| { |
| "epoch": 4.139375476009139, |
| "grad_norm": 1.3075844049453735, |
| "learning_rate": 1.1655906156558653e-06, |
| "loss": 1.1115, |
| "step": 1813 |
| }, |
| { |
| "epoch": 4.141660319878142, |
| "grad_norm": 1.283777117729187, |
| "learning_rate": 1.1629581896730394e-06, |
| "loss": 1.0912, |
| "step": 1814 |
| }, |
| { |
| "epoch": 4.143945163747144, |
| "grad_norm": 1.2859684228897095, |
| "learning_rate": 1.1603278383864206e-06, |
| "loss": 1.0894, |
| "step": 1815 |
| }, |
| { |
| "epoch": 4.146230007616146, |
| "grad_norm": 1.2679686546325684, |
| "learning_rate": 1.1576995658775405e-06, |
| "loss": 1.0892, |
| "step": 1816 |
| }, |
| { |
| "epoch": 4.148514851485149, |
| "grad_norm": 1.3373825550079346, |
| "learning_rate": 1.155073376224708e-06, |
| "loss": 1.0529, |
| "step": 1817 |
| }, |
| { |
| "epoch": 4.15079969535415, |
| "grad_norm": 1.2458816766738892, |
| "learning_rate": 1.152449273502996e-06, |
| "loss": 1.098, |
| "step": 1818 |
| }, |
| { |
| "epoch": 4.153084539223153, |
| "grad_norm": 1.270310878753662, |
| "learning_rate": 1.1498272617842438e-06, |
| "loss": 1.1336, |
| "step": 1819 |
| }, |
| { |
| "epoch": 4.155369383092156, |
| "grad_norm": 1.2812644243240356, |
| "learning_rate": 1.147207345137042e-06, |
| "loss": 1.0748, |
| "step": 1820 |
| }, |
| { |
| "epoch": 4.157654226961157, |
| "grad_norm": 1.2596019506454468, |
| "learning_rate": 1.1445895276267307e-06, |
| "loss": 1.08, |
| "step": 1821 |
| }, |
| { |
| "epoch": 4.15993907083016, |
| "grad_norm": 1.231399416923523, |
| "learning_rate": 1.1419738133153963e-06, |
| "loss": 1.1007, |
| "step": 1822 |
| }, |
| { |
| "epoch": 4.162223914699163, |
| "grad_norm": 1.272637963294983, |
| "learning_rate": 1.139360206261858e-06, |
| "loss": 1.0918, |
| "step": 1823 |
| }, |
| { |
| "epoch": 4.164508758568164, |
| "grad_norm": 1.2707762718200684, |
| "learning_rate": 1.1367487105216655e-06, |
| "loss": 1.1117, |
| "step": 1824 |
| }, |
| { |
| "epoch": 4.166793602437167, |
| "grad_norm": 1.2958250045776367, |
| "learning_rate": 1.1341393301470945e-06, |
| "loss": 1.0975, |
| "step": 1825 |
| }, |
| { |
| "epoch": 4.169078446306169, |
| "grad_norm": 1.2849308252334595, |
| "learning_rate": 1.131532069187136e-06, |
| "loss": 1.1389, |
| "step": 1826 |
| }, |
| { |
| "epoch": 4.171363290175171, |
| "grad_norm": 1.2934991121292114, |
| "learning_rate": 1.1289269316874944e-06, |
| "loss": 1.1119, |
| "step": 1827 |
| }, |
| { |
| "epoch": 4.173648134044174, |
| "grad_norm": 1.2864172458648682, |
| "learning_rate": 1.1263239216905775e-06, |
| "loss": 1.0708, |
| "step": 1828 |
| }, |
| { |
| "epoch": 4.175932977913176, |
| "grad_norm": 1.262462854385376, |
| "learning_rate": 1.1237230432354912e-06, |
| "loss": 1.0952, |
| "step": 1829 |
| }, |
| { |
| "epoch": 4.178217821782178, |
| "grad_norm": 1.2699599266052246, |
| "learning_rate": 1.1211243003580368e-06, |
| "loss": 1.1096, |
| "step": 1830 |
| }, |
| { |
| "epoch": 4.18050266565118, |
| "grad_norm": 1.2328681945800781, |
| "learning_rate": 1.1185276970906994e-06, |
| "loss": 1.086, |
| "step": 1831 |
| }, |
| { |
| "epoch": 4.182787509520183, |
| "grad_norm": 1.2541395425796509, |
| "learning_rate": 1.1159332374626434e-06, |
| "loss": 1.1087, |
| "step": 1832 |
| }, |
| { |
| "epoch": 4.185072353389185, |
| "grad_norm": 1.269580364227295, |
| "learning_rate": 1.11334092549971e-06, |
| "loss": 1.0761, |
| "step": 1833 |
| }, |
| { |
| "epoch": 4.187357197258187, |
| "grad_norm": 1.2778130769729614, |
| "learning_rate": 1.1107507652244043e-06, |
| "loss": 1.0812, |
| "step": 1834 |
| }, |
| { |
| "epoch": 4.18964204112719, |
| "grad_norm": 1.2541184425354004, |
| "learning_rate": 1.1081627606558964e-06, |
| "loss": 1.0944, |
| "step": 1835 |
| }, |
| { |
| "epoch": 4.191926884996192, |
| "grad_norm": 1.2608321905136108, |
| "learning_rate": 1.105576915810008e-06, |
| "loss": 1.1131, |
| "step": 1836 |
| }, |
| { |
| "epoch": 4.194211728865194, |
| "grad_norm": 1.2532767057418823, |
| "learning_rate": 1.1029932346992101e-06, |
| "loss": 1.0743, |
| "step": 1837 |
| }, |
| { |
| "epoch": 4.196496572734197, |
| "grad_norm": 1.2791781425476074, |
| "learning_rate": 1.1004117213326188e-06, |
| "loss": 1.0794, |
| "step": 1838 |
| }, |
| { |
| "epoch": 4.1987814166031985, |
| "grad_norm": 1.2686764001846313, |
| "learning_rate": 1.0978323797159837e-06, |
| "loss": 1.0632, |
| "step": 1839 |
| }, |
| { |
| "epoch": 4.201066260472201, |
| "grad_norm": 1.2395896911621094, |
| "learning_rate": 1.0952552138516844e-06, |
| "loss": 1.1195, |
| "step": 1840 |
| }, |
| { |
| "epoch": 4.203351104341204, |
| "grad_norm": 1.2782765626907349, |
| "learning_rate": 1.0926802277387278e-06, |
| "loss": 1.0806, |
| "step": 1841 |
| }, |
| { |
| "epoch": 4.2056359482102055, |
| "grad_norm": 1.289858341217041, |
| "learning_rate": 1.0901074253727338e-06, |
| "loss": 1.0537, |
| "step": 1842 |
| }, |
| { |
| "epoch": 4.207920792079208, |
| "grad_norm": 1.2895413637161255, |
| "learning_rate": 1.0875368107459377e-06, |
| "loss": 1.0683, |
| "step": 1843 |
| }, |
| { |
| "epoch": 4.21020563594821, |
| "grad_norm": 1.2379080057144165, |
| "learning_rate": 1.0849683878471778e-06, |
| "loss": 1.1203, |
| "step": 1844 |
| }, |
| { |
| "epoch": 4.2124904798172125, |
| "grad_norm": 1.2541252374649048, |
| "learning_rate": 1.0824021606618915e-06, |
| "loss": 1.1216, |
| "step": 1845 |
| }, |
| { |
| "epoch": 4.214775323686215, |
| "grad_norm": 1.2726837396621704, |
| "learning_rate": 1.079838133172111e-06, |
| "loss": 1.1148, |
| "step": 1846 |
| }, |
| { |
| "epoch": 4.217060167555217, |
| "grad_norm": 1.256637692451477, |
| "learning_rate": 1.077276309356453e-06, |
| "loss": 1.0731, |
| "step": 1847 |
| }, |
| { |
| "epoch": 4.2193450114242195, |
| "grad_norm": 1.2752294540405273, |
| "learning_rate": 1.0747166931901152e-06, |
| "loss": 1.1408, |
| "step": 1848 |
| }, |
| { |
| "epoch": 4.221629855293221, |
| "grad_norm": 1.3284248113632202, |
| "learning_rate": 1.0721592886448718e-06, |
| "loss": 1.0814, |
| "step": 1849 |
| }, |
| { |
| "epoch": 4.223914699162224, |
| "grad_norm": 1.2901482582092285, |
| "learning_rate": 1.0696040996890613e-06, |
| "loss": 1.0844, |
| "step": 1850 |
| }, |
| { |
| "epoch": 4.2261995430312265, |
| "grad_norm": 1.2776188850402832, |
| "learning_rate": 1.0670511302875892e-06, |
| "loss": 1.1033, |
| "step": 1851 |
| }, |
| { |
| "epoch": 4.228484386900228, |
| "grad_norm": 1.312633991241455, |
| "learning_rate": 1.0645003844019123e-06, |
| "loss": 1.0575, |
| "step": 1852 |
| }, |
| { |
| "epoch": 4.230769230769231, |
| "grad_norm": 1.2696623802185059, |
| "learning_rate": 1.0619518659900387e-06, |
| "loss": 1.0974, |
| "step": 1853 |
| }, |
| { |
| "epoch": 4.2330540746382335, |
| "grad_norm": 1.32041597366333, |
| "learning_rate": 1.0594055790065228e-06, |
| "loss": 1.0983, |
| "step": 1854 |
| }, |
| { |
| "epoch": 4.235338918507235, |
| "grad_norm": 1.2588789463043213, |
| "learning_rate": 1.0568615274024521e-06, |
| "loss": 1.1283, |
| "step": 1855 |
| }, |
| { |
| "epoch": 4.237623762376238, |
| "grad_norm": 1.2674574851989746, |
| "learning_rate": 1.0543197151254475e-06, |
| "loss": 1.1213, |
| "step": 1856 |
| }, |
| { |
| "epoch": 4.23990860624524, |
| "grad_norm": 1.2549033164978027, |
| "learning_rate": 1.0517801461196568e-06, |
| "loss": 1.0895, |
| "step": 1857 |
| }, |
| { |
| "epoch": 4.242193450114242, |
| "grad_norm": 1.3073495626449585, |
| "learning_rate": 1.049242824325743e-06, |
| "loss": 1.0966, |
| "step": 1858 |
| }, |
| { |
| "epoch": 4.244478293983245, |
| "grad_norm": 1.2715238332748413, |
| "learning_rate": 1.0467077536808867e-06, |
| "loss": 1.0803, |
| "step": 1859 |
| }, |
| { |
| "epoch": 4.246763137852247, |
| "grad_norm": 1.2352854013442993, |
| "learning_rate": 1.0441749381187718e-06, |
| "loss": 1.0713, |
| "step": 1860 |
| }, |
| { |
| "epoch": 4.249047981721249, |
| "grad_norm": 1.235982894897461, |
| "learning_rate": 1.0416443815695831e-06, |
| "loss": 1.0961, |
| "step": 1861 |
| }, |
| { |
| "epoch": 4.251332825590251, |
| "grad_norm": 1.288275122642517, |
| "learning_rate": 1.0391160879600034e-06, |
| "loss": 1.1036, |
| "step": 1862 |
| }, |
| { |
| "epoch": 4.253617669459254, |
| "grad_norm": 1.2842768430709839, |
| "learning_rate": 1.0365900612132007e-06, |
| "loss": 1.0729, |
| "step": 1863 |
| }, |
| { |
| "epoch": 4.255902513328256, |
| "grad_norm": 1.2533109188079834, |
| "learning_rate": 1.0340663052488257e-06, |
| "loss": 1.1035, |
| "step": 1864 |
| }, |
| { |
| "epoch": 4.258187357197258, |
| "grad_norm": 1.3068199157714844, |
| "learning_rate": 1.0315448239830083e-06, |
| "loss": 1.0985, |
| "step": 1865 |
| }, |
| { |
| "epoch": 4.260472201066261, |
| "grad_norm": 1.2762842178344727, |
| "learning_rate": 1.029025621328345e-06, |
| "loss": 1.0613, |
| "step": 1866 |
| }, |
| { |
| "epoch": 4.262757044935263, |
| "grad_norm": 1.3192155361175537, |
| "learning_rate": 1.0265087011939004e-06, |
| "loss": 1.0855, |
| "step": 1867 |
| }, |
| { |
| "epoch": 4.265041888804265, |
| "grad_norm": 1.2514667510986328, |
| "learning_rate": 1.0239940674851943e-06, |
| "loss": 1.0835, |
| "step": 1868 |
| }, |
| { |
| "epoch": 4.267326732673268, |
| "grad_norm": 1.2197521924972534, |
| "learning_rate": 1.0214817241041985e-06, |
| "loss": 1.0858, |
| "step": 1869 |
| }, |
| { |
| "epoch": 4.269611576542269, |
| "grad_norm": 1.271289348602295, |
| "learning_rate": 1.0189716749493345e-06, |
| "loss": 1.1279, |
| "step": 1870 |
| }, |
| { |
| "epoch": 4.271896420411272, |
| "grad_norm": 1.274689793586731, |
| "learning_rate": 1.0164639239154603e-06, |
| "loss": 1.0582, |
| "step": 1871 |
| }, |
| { |
| "epoch": 4.274181264280275, |
| "grad_norm": 1.2924543619155884, |
| "learning_rate": 1.0139584748938678e-06, |
| "loss": 1.0721, |
| "step": 1872 |
| }, |
| { |
| "epoch": 4.276466108149276, |
| "grad_norm": 1.2583580017089844, |
| "learning_rate": 1.01145533177228e-06, |
| "loss": 1.0925, |
| "step": 1873 |
| }, |
| { |
| "epoch": 4.278750952018279, |
| "grad_norm": 1.3069345951080322, |
| "learning_rate": 1.0089544984348378e-06, |
| "loss": 1.1106, |
| "step": 1874 |
| }, |
| { |
| "epoch": 4.281035795887281, |
| "grad_norm": 1.2459675073623657, |
| "learning_rate": 1.0064559787621018e-06, |
| "loss": 1.0756, |
| "step": 1875 |
| }, |
| { |
| "epoch": 4.283320639756283, |
| "grad_norm": 1.2553149461746216, |
| "learning_rate": 1.0039597766310408e-06, |
| "loss": 1.1231, |
| "step": 1876 |
| }, |
| { |
| "epoch": 4.285605483625286, |
| "grad_norm": 1.2836211919784546, |
| "learning_rate": 1.0014658959150267e-06, |
| "loss": 1.0919, |
| "step": 1877 |
| }, |
| { |
| "epoch": 4.287890327494288, |
| "grad_norm": 1.303640365600586, |
| "learning_rate": 9.98974340483829e-07, |
| "loss": 1.1021, |
| "step": 1878 |
| }, |
| { |
| "epoch": 4.29017517136329, |
| "grad_norm": 1.2877558469772339, |
| "learning_rate": 9.96485114203612e-07, |
| "loss": 1.103, |
| "step": 1879 |
| }, |
| { |
| "epoch": 4.292460015232292, |
| "grad_norm": 1.2467522621154785, |
| "learning_rate": 9.93998220936922e-07, |
| "loss": 1.0964, |
| "step": 1880 |
| }, |
| { |
| "epoch": 4.294744859101295, |
| "grad_norm": 1.29486083984375, |
| "learning_rate": 9.915136645426885e-07, |
| "loss": 1.0804, |
| "step": 1881 |
| }, |
| { |
| "epoch": 4.297029702970297, |
| "grad_norm": 1.274101734161377, |
| "learning_rate": 9.890314488762123e-07, |
| "loss": 1.1205, |
| "step": 1882 |
| }, |
| { |
| "epoch": 4.299314546839299, |
| "grad_norm": 1.2509732246398926, |
| "learning_rate": 9.865515777891626e-07, |
| "loss": 1.0654, |
| "step": 1883 |
| }, |
| { |
| "epoch": 4.301599390708302, |
| "grad_norm": 1.2939627170562744, |
| "learning_rate": 9.840740551295728e-07, |
| "loss": 1.1175, |
| "step": 1884 |
| }, |
| { |
| "epoch": 4.303884234577304, |
| "grad_norm": 1.2567757368087769, |
| "learning_rate": 9.815988847418292e-07, |
| "loss": 1.1102, |
| "step": 1885 |
| }, |
| { |
| "epoch": 4.306169078446306, |
| "grad_norm": 1.2803035974502563, |
| "learning_rate": 9.791260704666688e-07, |
| "loss": 1.0835, |
| "step": 1886 |
| }, |
| { |
| "epoch": 4.308453922315309, |
| "grad_norm": 1.264268398284912, |
| "learning_rate": 9.766556161411744e-07, |
| "loss": 1.0827, |
| "step": 1887 |
| }, |
| { |
| "epoch": 4.31073876618431, |
| "grad_norm": 1.2779146432876587, |
| "learning_rate": 9.741875255987643e-07, |
| "loss": 1.0971, |
| "step": 1888 |
| }, |
| { |
| "epoch": 4.313023610053313, |
| "grad_norm": 1.2511680126190186, |
| "learning_rate": 9.717218026691917e-07, |
| "loss": 1.0842, |
| "step": 1889 |
| }, |
| { |
| "epoch": 4.315308453922316, |
| "grad_norm": 1.2907341718673706, |
| "learning_rate": 9.692584511785333e-07, |
| "loss": 1.083, |
| "step": 1890 |
| }, |
| { |
| "epoch": 4.317593297791317, |
| "grad_norm": 1.30160653591156, |
| "learning_rate": 9.667974749491865e-07, |
| "loss": 1.1403, |
| "step": 1891 |
| }, |
| { |
| "epoch": 4.31987814166032, |
| "grad_norm": 1.2925405502319336, |
| "learning_rate": 9.643388777998652e-07, |
| "loss": 1.0918, |
| "step": 1892 |
| }, |
| { |
| "epoch": 4.322162985529322, |
| "grad_norm": 1.2780344486236572, |
| "learning_rate": 9.618826635455898e-07, |
| "loss": 1.0845, |
| "step": 1893 |
| }, |
| { |
| "epoch": 4.324447829398324, |
| "grad_norm": 1.2781273126602173, |
| "learning_rate": 9.594288359976817e-07, |
| "loss": 1.1027, |
| "step": 1894 |
| }, |
| { |
| "epoch": 4.326732673267327, |
| "grad_norm": 1.2914685010910034, |
| "learning_rate": 9.569773989637628e-07, |
| "loss": 1.0967, |
| "step": 1895 |
| }, |
| { |
| "epoch": 4.329017517136329, |
| "grad_norm": 1.2581839561462402, |
| "learning_rate": 9.54528356247742e-07, |
| "loss": 1.1195, |
| "step": 1896 |
| }, |
| { |
| "epoch": 4.331302361005331, |
| "grad_norm": 1.286125659942627, |
| "learning_rate": 9.520817116498154e-07, |
| "loss": 1.097, |
| "step": 1897 |
| }, |
| { |
| "epoch": 4.333587204874334, |
| "grad_norm": 1.2758541107177734, |
| "learning_rate": 9.496374689664564e-07, |
| "loss": 1.1183, |
| "step": 1898 |
| }, |
| { |
| "epoch": 4.335872048743336, |
| "grad_norm": 1.2484683990478516, |
| "learning_rate": 9.471956319904105e-07, |
| "loss": 1.0762, |
| "step": 1899 |
| }, |
| { |
| "epoch": 4.338156892612338, |
| "grad_norm": 1.2502278089523315, |
| "learning_rate": 9.447562045106939e-07, |
| "loss": 1.1314, |
| "step": 1900 |
| }, |
| { |
| "epoch": 4.34044173648134, |
| "grad_norm": 1.322770118713379, |
| "learning_rate": 9.423191903125803e-07, |
| "loss": 1.116, |
| "step": 1901 |
| }, |
| { |
| "epoch": 4.342726580350343, |
| "grad_norm": 1.2737778425216675, |
| "learning_rate": 9.398845931775991e-07, |
| "loss": 1.0773, |
| "step": 1902 |
| }, |
| { |
| "epoch": 4.345011424219345, |
| "grad_norm": 1.2634154558181763, |
| "learning_rate": 9.374524168835319e-07, |
| "loss": 1.1247, |
| "step": 1903 |
| }, |
| { |
| "epoch": 4.347296268088347, |
| "grad_norm": 1.2655125856399536, |
| "learning_rate": 9.350226652044001e-07, |
| "loss": 1.0569, |
| "step": 1904 |
| }, |
| { |
| "epoch": 4.34958111195735, |
| "grad_norm": 1.3117061853408813, |
| "learning_rate": 9.325953419104666e-07, |
| "loss": 1.1352, |
| "step": 1905 |
| }, |
| { |
| "epoch": 4.3518659558263515, |
| "grad_norm": 1.3022981882095337, |
| "learning_rate": 9.301704507682236e-07, |
| "loss": 1.0685, |
| "step": 1906 |
| }, |
| { |
| "epoch": 4.354150799695354, |
| "grad_norm": 1.2460218667984009, |
| "learning_rate": 9.277479955403887e-07, |
| "loss": 1.0802, |
| "step": 1907 |
| }, |
| { |
| "epoch": 4.356435643564357, |
| "grad_norm": 1.2989197969436646, |
| "learning_rate": 9.253279799859027e-07, |
| "loss": 1.0962, |
| "step": 1908 |
| }, |
| { |
| "epoch": 4.3587204874333585, |
| "grad_norm": 1.275721788406372, |
| "learning_rate": 9.229104078599186e-07, |
| "loss": 1.0964, |
| "step": 1909 |
| }, |
| { |
| "epoch": 4.361005331302361, |
| "grad_norm": 1.2741754055023193, |
| "learning_rate": 9.204952829137967e-07, |
| "loss": 1.0707, |
| "step": 1910 |
| }, |
| { |
| "epoch": 4.363290175171363, |
| "grad_norm": 1.2817518711090088, |
| "learning_rate": 9.180826088951039e-07, |
| "loss": 1.1041, |
| "step": 1911 |
| }, |
| { |
| "epoch": 4.3655750190403655, |
| "grad_norm": 1.2775477170944214, |
| "learning_rate": 9.156723895476007e-07, |
| "loss": 1.1187, |
| "step": 1912 |
| }, |
| { |
| "epoch": 4.367859862909368, |
| "grad_norm": 1.2695951461791992, |
| "learning_rate": 9.13264628611239e-07, |
| "loss": 1.0301, |
| "step": 1913 |
| }, |
| { |
| "epoch": 4.37014470677837, |
| "grad_norm": 1.2773480415344238, |
| "learning_rate": 9.10859329822158e-07, |
| "loss": 1.1125, |
| "step": 1914 |
| }, |
| { |
| "epoch": 4.3724295506473725, |
| "grad_norm": 1.297052025794983, |
| "learning_rate": 9.084564969126733e-07, |
| "loss": 1.0619, |
| "step": 1915 |
| }, |
| { |
| "epoch": 4.374714394516375, |
| "grad_norm": 1.2900358438491821, |
| "learning_rate": 9.060561336112775e-07, |
| "loss": 1.0316, |
| "step": 1916 |
| }, |
| { |
| "epoch": 4.376999238385377, |
| "grad_norm": 1.2947112321853638, |
| "learning_rate": 9.036582436426289e-07, |
| "loss": 1.0878, |
| "step": 1917 |
| }, |
| { |
| "epoch": 4.3792840822543795, |
| "grad_norm": 1.2751237154006958, |
| "learning_rate": 9.012628307275478e-07, |
| "loss": 1.1047, |
| "step": 1918 |
| }, |
| { |
| "epoch": 4.381568926123381, |
| "grad_norm": 1.2209362983703613, |
| "learning_rate": 8.988698985830127e-07, |
| "loss": 1.0891, |
| "step": 1919 |
| }, |
| { |
| "epoch": 4.383853769992384, |
| "grad_norm": 1.2782630920410156, |
| "learning_rate": 8.964794509221508e-07, |
| "loss": 1.1201, |
| "step": 1920 |
| }, |
| { |
| "epoch": 4.3861386138613865, |
| "grad_norm": 1.3166882991790771, |
| "learning_rate": 8.940914914542342e-07, |
| "loss": 1.1204, |
| "step": 1921 |
| }, |
| { |
| "epoch": 4.388423457730388, |
| "grad_norm": 1.2406049966812134, |
| "learning_rate": 8.917060238846764e-07, |
| "loss": 1.0871, |
| "step": 1922 |
| }, |
| { |
| "epoch": 4.390708301599391, |
| "grad_norm": 1.3386300802230835, |
| "learning_rate": 8.893230519150203e-07, |
| "loss": 1.1023, |
| "step": 1923 |
| }, |
| { |
| "epoch": 4.392993145468393, |
| "grad_norm": 1.345792531967163, |
| "learning_rate": 8.869425792429406e-07, |
| "loss": 1.081, |
| "step": 1924 |
| }, |
| { |
| "epoch": 4.395277989337395, |
| "grad_norm": 1.264641284942627, |
| "learning_rate": 8.84564609562231e-07, |
| "loss": 1.1115, |
| "step": 1925 |
| }, |
| { |
| "epoch": 4.397562833206398, |
| "grad_norm": 1.2565428018569946, |
| "learning_rate": 8.821891465628008e-07, |
| "loss": 1.0721, |
| "step": 1926 |
| }, |
| { |
| "epoch": 4.3998476770754, |
| "grad_norm": 1.252386450767517, |
| "learning_rate": 8.798161939306726e-07, |
| "loss": 1.1019, |
| "step": 1927 |
| }, |
| { |
| "epoch": 4.402132520944402, |
| "grad_norm": 1.2885632514953613, |
| "learning_rate": 8.774457553479717e-07, |
| "loss": 1.1058, |
| "step": 1928 |
| }, |
| { |
| "epoch": 4.404417364813405, |
| "grad_norm": 1.2969319820404053, |
| "learning_rate": 8.750778344929211e-07, |
| "loss": 1.1088, |
| "step": 1929 |
| }, |
| { |
| "epoch": 4.406702208682407, |
| "grad_norm": 1.285504698753357, |
| "learning_rate": 8.727124350398409e-07, |
| "loss": 1.0635, |
| "step": 1930 |
| }, |
| { |
| "epoch": 4.408987052551409, |
| "grad_norm": 1.2720234394073486, |
| "learning_rate": 8.703495606591339e-07, |
| "loss": 1.1078, |
| "step": 1931 |
| }, |
| { |
| "epoch": 4.411271896420411, |
| "grad_norm": 1.2709497213363647, |
| "learning_rate": 8.679892150172895e-07, |
| "loss": 1.1214, |
| "step": 1932 |
| }, |
| { |
| "epoch": 4.413556740289414, |
| "grad_norm": 1.2678945064544678, |
| "learning_rate": 8.656314017768694e-07, |
| "loss": 1.106, |
| "step": 1933 |
| }, |
| { |
| "epoch": 4.415841584158416, |
| "grad_norm": 1.2668631076812744, |
| "learning_rate": 8.632761245965071e-07, |
| "loss": 1.1026, |
| "step": 1934 |
| }, |
| { |
| "epoch": 4.418126428027418, |
| "grad_norm": 1.313750147819519, |
| "learning_rate": 8.609233871309025e-07, |
| "loss": 1.0727, |
| "step": 1935 |
| }, |
| { |
| "epoch": 4.420411271896421, |
| "grad_norm": 1.2804429531097412, |
| "learning_rate": 8.58573193030812e-07, |
| "loss": 1.0807, |
| "step": 1936 |
| }, |
| { |
| "epoch": 4.422696115765422, |
| "grad_norm": 1.3013975620269775, |
| "learning_rate": 8.562255459430463e-07, |
| "loss": 1.0885, |
| "step": 1937 |
| }, |
| { |
| "epoch": 4.424980959634425, |
| "grad_norm": 1.257980465888977, |
| "learning_rate": 8.538804495104653e-07, |
| "loss": 1.1049, |
| "step": 1938 |
| }, |
| { |
| "epoch": 4.427265803503428, |
| "grad_norm": 1.2810685634613037, |
| "learning_rate": 8.515379073719687e-07, |
| "loss": 1.0769, |
| "step": 1939 |
| }, |
| { |
| "epoch": 4.429550647372429, |
| "grad_norm": 1.2664494514465332, |
| "learning_rate": 8.491979231624956e-07, |
| "loss": 1.0969, |
| "step": 1940 |
| }, |
| { |
| "epoch": 4.431835491241432, |
| "grad_norm": 1.228659987449646, |
| "learning_rate": 8.468605005130129e-07, |
| "loss": 1.1036, |
| "step": 1941 |
| }, |
| { |
| "epoch": 4.434120335110434, |
| "grad_norm": 1.2892229557037354, |
| "learning_rate": 8.445256430505136e-07, |
| "loss": 1.0789, |
| "step": 1942 |
| }, |
| { |
| "epoch": 4.436405178979436, |
| "grad_norm": 1.253233790397644, |
| "learning_rate": 8.421933543980126e-07, |
| "loss": 1.0987, |
| "step": 1943 |
| }, |
| { |
| "epoch": 4.438690022848439, |
| "grad_norm": 1.2383204698562622, |
| "learning_rate": 8.398636381745356e-07, |
| "loss": 1.0822, |
| "step": 1944 |
| }, |
| { |
| "epoch": 4.440974866717441, |
| "grad_norm": 1.2798322439193726, |
| "learning_rate": 8.375364979951176e-07, |
| "loss": 1.0943, |
| "step": 1945 |
| }, |
| { |
| "epoch": 4.443259710586443, |
| "grad_norm": 1.2920564413070679, |
| "learning_rate": 8.352119374707979e-07, |
| "loss": 1.1353, |
| "step": 1946 |
| }, |
| { |
| "epoch": 4.445544554455446, |
| "grad_norm": 1.282945156097412, |
| "learning_rate": 8.328899602086105e-07, |
| "loss": 1.0711, |
| "step": 1947 |
| }, |
| { |
| "epoch": 4.447829398324448, |
| "grad_norm": 1.2641621828079224, |
| "learning_rate": 8.305705698115835e-07, |
| "loss": 1.082, |
| "step": 1948 |
| }, |
| { |
| "epoch": 4.45011424219345, |
| "grad_norm": 1.2811444997787476, |
| "learning_rate": 8.282537698787288e-07, |
| "loss": 1.0967, |
| "step": 1949 |
| }, |
| { |
| "epoch": 4.452399086062452, |
| "grad_norm": 1.280187964439392, |
| "learning_rate": 8.259395640050385e-07, |
| "loss": 1.0791, |
| "step": 1950 |
| }, |
| { |
| "epoch": 4.454683929931455, |
| "grad_norm": 1.2966340780258179, |
| "learning_rate": 8.236279557814828e-07, |
| "loss": 1.099, |
| "step": 1951 |
| }, |
| { |
| "epoch": 4.456968773800457, |
| "grad_norm": 1.2679872512817383, |
| "learning_rate": 8.213189487949972e-07, |
| "loss": 1.0891, |
| "step": 1952 |
| }, |
| { |
| "epoch": 4.459253617669459, |
| "grad_norm": 1.289146900177002, |
| "learning_rate": 8.19012546628481e-07, |
| "loss": 1.0827, |
| "step": 1953 |
| }, |
| { |
| "epoch": 4.461538461538462, |
| "grad_norm": 1.2679554224014282, |
| "learning_rate": 8.167087528607961e-07, |
| "loss": 1.0916, |
| "step": 1954 |
| }, |
| { |
| "epoch": 4.463823305407463, |
| "grad_norm": 1.2724610567092896, |
| "learning_rate": 8.144075710667524e-07, |
| "loss": 1.0855, |
| "step": 1955 |
| }, |
| { |
| "epoch": 4.466108149276466, |
| "grad_norm": 1.2689306735992432, |
| "learning_rate": 8.121090048171074e-07, |
| "loss": 1.116, |
| "step": 1956 |
| }, |
| { |
| "epoch": 4.468392993145469, |
| "grad_norm": 1.3125536441802979, |
| "learning_rate": 8.098130576785623e-07, |
| "loss": 1.1, |
| "step": 1957 |
| }, |
| { |
| "epoch": 4.47067783701447, |
| "grad_norm": 1.2809594869613647, |
| "learning_rate": 8.075197332137522e-07, |
| "loss": 1.0517, |
| "step": 1958 |
| }, |
| { |
| "epoch": 4.472962680883473, |
| "grad_norm": 1.2979421615600586, |
| "learning_rate": 8.052290349812419e-07, |
| "loss": 1.0963, |
| "step": 1959 |
| }, |
| { |
| "epoch": 4.475247524752476, |
| "grad_norm": 1.252105951309204, |
| "learning_rate": 8.029409665355242e-07, |
| "loss": 1.0863, |
| "step": 1960 |
| }, |
| { |
| "epoch": 4.477532368621477, |
| "grad_norm": 1.2643629312515259, |
| "learning_rate": 8.006555314270079e-07, |
| "loss": 1.0525, |
| "step": 1961 |
| }, |
| { |
| "epoch": 4.47981721249048, |
| "grad_norm": 1.2668061256408691, |
| "learning_rate": 7.98372733202018e-07, |
| "loss": 1.1325, |
| "step": 1962 |
| }, |
| { |
| "epoch": 4.482102056359482, |
| "grad_norm": 1.292205572128296, |
| "learning_rate": 7.960925754027871e-07, |
| "loss": 1.0877, |
| "step": 1963 |
| }, |
| { |
| "epoch": 4.484386900228484, |
| "grad_norm": 1.2493971586227417, |
| "learning_rate": 7.938150615674487e-07, |
| "loss": 1.1041, |
| "step": 1964 |
| }, |
| { |
| "epoch": 4.486671744097487, |
| "grad_norm": 1.2565317153930664, |
| "learning_rate": 7.915401952300372e-07, |
| "loss": 1.0833, |
| "step": 1965 |
| }, |
| { |
| "epoch": 4.488956587966489, |
| "grad_norm": 1.3105082511901855, |
| "learning_rate": 7.892679799204766e-07, |
| "loss": 1.1045, |
| "step": 1966 |
| }, |
| { |
| "epoch": 4.491241431835491, |
| "grad_norm": 1.2809815406799316, |
| "learning_rate": 7.869984191645768e-07, |
| "loss": 1.1091, |
| "step": 1967 |
| }, |
| { |
| "epoch": 4.493526275704493, |
| "grad_norm": 1.2692221403121948, |
| "learning_rate": 7.847315164840311e-07, |
| "loss": 1.1174, |
| "step": 1968 |
| }, |
| { |
| "epoch": 4.495811119573496, |
| "grad_norm": 1.2772730588912964, |
| "learning_rate": 7.82467275396405e-07, |
| "loss": 1.0935, |
| "step": 1969 |
| }, |
| { |
| "epoch": 4.498095963442498, |
| "grad_norm": 1.2484936714172363, |
| "learning_rate": 7.802056994151369e-07, |
| "loss": 1.1058, |
| "step": 1970 |
| }, |
| { |
| "epoch": 4.5003808073115, |
| "grad_norm": 1.2626773118972778, |
| "learning_rate": 7.779467920495285e-07, |
| "loss": 1.0788, |
| "step": 1971 |
| }, |
| { |
| "epoch": 4.502665651180503, |
| "grad_norm": 1.2814165353775024, |
| "learning_rate": 7.756905568047393e-07, |
| "loss": 1.1073, |
| "step": 1972 |
| }, |
| { |
| "epoch": 4.5049504950495045, |
| "grad_norm": 1.2587292194366455, |
| "learning_rate": 7.734369971817854e-07, |
| "loss": 1.095, |
| "step": 1973 |
| }, |
| { |
| "epoch": 4.507235338918507, |
| "grad_norm": 1.2431440353393555, |
| "learning_rate": 7.711861166775289e-07, |
| "loss": 1.0936, |
| "step": 1974 |
| }, |
| { |
| "epoch": 4.50952018278751, |
| "grad_norm": 1.277116060256958, |
| "learning_rate": 7.689379187846743e-07, |
| "loss": 1.0982, |
| "step": 1975 |
| }, |
| { |
| "epoch": 4.5118050266565115, |
| "grad_norm": 1.2661057710647583, |
| "learning_rate": 7.666924069917659e-07, |
| "loss": 1.1299, |
| "step": 1976 |
| }, |
| { |
| "epoch": 4.514089870525514, |
| "grad_norm": 1.2476885318756104, |
| "learning_rate": 7.644495847831771e-07, |
| "loss": 1.0804, |
| "step": 1977 |
| }, |
| { |
| "epoch": 4.516374714394516, |
| "grad_norm": 1.2756448984146118, |
| "learning_rate": 7.622094556391108e-07, |
| "loss": 1.1115, |
| "step": 1978 |
| }, |
| { |
| "epoch": 4.5186595582635185, |
| "grad_norm": 1.2629826068878174, |
| "learning_rate": 7.599720230355889e-07, |
| "loss": 1.1049, |
| "step": 1979 |
| }, |
| { |
| "epoch": 4.520944402132521, |
| "grad_norm": 1.2404588460922241, |
| "learning_rate": 7.577372904444486e-07, |
| "loss": 1.0842, |
| "step": 1980 |
| }, |
| { |
| "epoch": 4.523229246001523, |
| "grad_norm": 1.2706893682479858, |
| "learning_rate": 7.555052613333402e-07, |
| "loss": 1.0619, |
| "step": 1981 |
| }, |
| { |
| "epoch": 4.5255140898705255, |
| "grad_norm": 1.2344950437545776, |
| "learning_rate": 7.532759391657168e-07, |
| "loss": 1.1276, |
| "step": 1982 |
| }, |
| { |
| "epoch": 4.527798933739528, |
| "grad_norm": 1.2568199634552002, |
| "learning_rate": 7.510493274008304e-07, |
| "loss": 1.1366, |
| "step": 1983 |
| }, |
| { |
| "epoch": 4.53008377760853, |
| "grad_norm": 1.330805778503418, |
| "learning_rate": 7.488254294937306e-07, |
| "loss": 1.0899, |
| "step": 1984 |
| }, |
| { |
| "epoch": 4.5323686214775325, |
| "grad_norm": 1.26802659034729, |
| "learning_rate": 7.466042488952521e-07, |
| "loss": 1.1331, |
| "step": 1985 |
| }, |
| { |
| "epoch": 4.534653465346535, |
| "grad_norm": 1.2709177732467651, |
| "learning_rate": 7.443857890520162e-07, |
| "loss": 1.1079, |
| "step": 1986 |
| }, |
| { |
| "epoch": 4.536938309215537, |
| "grad_norm": 1.2737971544265747, |
| "learning_rate": 7.421700534064202e-07, |
| "loss": 1.0821, |
| "step": 1987 |
| }, |
| { |
| "epoch": 4.5392231530845395, |
| "grad_norm": 1.2980129718780518, |
| "learning_rate": 7.399570453966346e-07, |
| "loss": 1.1353, |
| "step": 1988 |
| }, |
| { |
| "epoch": 4.541507996953541, |
| "grad_norm": 1.3661080598831177, |
| "learning_rate": 7.377467684565992e-07, |
| "loss": 1.1105, |
| "step": 1989 |
| }, |
| { |
| "epoch": 4.543792840822544, |
| "grad_norm": 1.2837845087051392, |
| "learning_rate": 7.355392260160141e-07, |
| "loss": 1.0746, |
| "step": 1990 |
| }, |
| { |
| "epoch": 4.5460776846915465, |
| "grad_norm": 1.3188300132751465, |
| "learning_rate": 7.333344215003358e-07, |
| "loss": 1.1099, |
| "step": 1991 |
| }, |
| { |
| "epoch": 4.548362528560548, |
| "grad_norm": 1.2677329778671265, |
| "learning_rate": 7.31132358330775e-07, |
| "loss": 1.0969, |
| "step": 1992 |
| }, |
| { |
| "epoch": 4.550647372429551, |
| "grad_norm": 1.2950503826141357, |
| "learning_rate": 7.289330399242859e-07, |
| "loss": 1.0845, |
| "step": 1993 |
| }, |
| { |
| "epoch": 4.552932216298553, |
| "grad_norm": 1.3180036544799805, |
| "learning_rate": 7.267364696935653e-07, |
| "loss": 1.0998, |
| "step": 1994 |
| }, |
| { |
| "epoch": 4.555217060167555, |
| "grad_norm": 1.3282592296600342, |
| "learning_rate": 7.245426510470452e-07, |
| "loss": 1.0614, |
| "step": 1995 |
| }, |
| { |
| "epoch": 4.557501904036558, |
| "grad_norm": 1.3138798475265503, |
| "learning_rate": 7.223515873888867e-07, |
| "loss": 1.1126, |
| "step": 1996 |
| }, |
| { |
| "epoch": 4.55978674790556, |
| "grad_norm": 1.268276572227478, |
| "learning_rate": 7.201632821189786e-07, |
| "loss": 1.1083, |
| "step": 1997 |
| }, |
| { |
| "epoch": 4.562071591774562, |
| "grad_norm": 1.2721282243728638, |
| "learning_rate": 7.179777386329276e-07, |
| "loss": 1.1111, |
| "step": 1998 |
| }, |
| { |
| "epoch": 4.564356435643564, |
| "grad_norm": 1.292134404182434, |
| "learning_rate": 7.157949603220537e-07, |
| "loss": 1.1061, |
| "step": 1999 |
| }, |
| { |
| "epoch": 4.566641279512567, |
| "grad_norm": 1.289412260055542, |
| "learning_rate": 7.136149505733902e-07, |
| "loss": 1.0958, |
| "step": 2000 |
| }, |
| { |
| "epoch": 4.568926123381569, |
| "grad_norm": 1.267577886581421, |
| "learning_rate": 7.1143771276967e-07, |
| "loss": 1.1199, |
| "step": 2001 |
| }, |
| { |
| "epoch": 4.571210967250571, |
| "grad_norm": 1.307735562324524, |
| "learning_rate": 7.092632502893282e-07, |
| "loss": 1.0818, |
| "step": 2002 |
| }, |
| { |
| "epoch": 4.573495811119574, |
| "grad_norm": 1.2823678255081177, |
| "learning_rate": 7.070915665064915e-07, |
| "loss": 1.0677, |
| "step": 2003 |
| }, |
| { |
| "epoch": 4.575780654988575, |
| "grad_norm": 1.2783132791519165, |
| "learning_rate": 7.049226647909744e-07, |
| "loss": 1.1186, |
| "step": 2004 |
| }, |
| { |
| "epoch": 4.578065498857578, |
| "grad_norm": 1.3067246675491333, |
| "learning_rate": 7.027565485082765e-07, |
| "loss": 1.1063, |
| "step": 2005 |
| }, |
| { |
| "epoch": 4.580350342726581, |
| "grad_norm": 1.2940545082092285, |
| "learning_rate": 7.005932210195738e-07, |
| "loss": 1.1105, |
| "step": 2006 |
| }, |
| { |
| "epoch": 4.582635186595582, |
| "grad_norm": 1.2702580690383911, |
| "learning_rate": 6.984326856817145e-07, |
| "loss": 1.1062, |
| "step": 2007 |
| }, |
| { |
| "epoch": 4.584920030464585, |
| "grad_norm": 1.3609583377838135, |
| "learning_rate": 6.96274945847216e-07, |
| "loss": 1.0907, |
| "step": 2008 |
| }, |
| { |
| "epoch": 4.587204874333588, |
| "grad_norm": 1.2673293352127075, |
| "learning_rate": 6.941200048642558e-07, |
| "loss": 1.0699, |
| "step": 2009 |
| }, |
| { |
| "epoch": 4.589489718202589, |
| "grad_norm": 1.26469886302948, |
| "learning_rate": 6.919678660766705e-07, |
| "loss": 1.079, |
| "step": 2010 |
| }, |
| { |
| "epoch": 4.591774562071592, |
| "grad_norm": 1.269822597503662, |
| "learning_rate": 6.898185328239468e-07, |
| "loss": 1.1097, |
| "step": 2011 |
| }, |
| { |
| "epoch": 4.594059405940594, |
| "grad_norm": 1.3368850946426392, |
| "learning_rate": 6.876720084412181e-07, |
| "loss": 1.1076, |
| "step": 2012 |
| }, |
| { |
| "epoch": 4.596344249809596, |
| "grad_norm": 1.2837021350860596, |
| "learning_rate": 6.85528296259261e-07, |
| "loss": 1.0565, |
| "step": 2013 |
| }, |
| { |
| "epoch": 4.598629093678599, |
| "grad_norm": 1.3289958238601685, |
| "learning_rate": 6.833873996044866e-07, |
| "loss": 1.0746, |
| "step": 2014 |
| }, |
| { |
| "epoch": 4.600913937547601, |
| "grad_norm": 1.3046926259994507, |
| "learning_rate": 6.812493217989368e-07, |
| "loss": 1.1318, |
| "step": 2015 |
| }, |
| { |
| "epoch": 4.603198781416603, |
| "grad_norm": 1.321399211883545, |
| "learning_rate": 6.791140661602826e-07, |
| "loss": 1.0987, |
| "step": 2016 |
| }, |
| { |
| "epoch": 4.605483625285606, |
| "grad_norm": 1.2870993614196777, |
| "learning_rate": 6.769816360018114e-07, |
| "loss": 1.0998, |
| "step": 2017 |
| }, |
| { |
| "epoch": 4.607768469154608, |
| "grad_norm": 1.2852782011032104, |
| "learning_rate": 6.748520346324302e-07, |
| "loss": 1.1106, |
| "step": 2018 |
| }, |
| { |
| "epoch": 4.61005331302361, |
| "grad_norm": 1.3015333414077759, |
| "learning_rate": 6.727252653566543e-07, |
| "loss": 1.1247, |
| "step": 2019 |
| }, |
| { |
| "epoch": 4.612338156892612, |
| "grad_norm": 1.2432022094726562, |
| "learning_rate": 6.70601331474604e-07, |
| "loss": 1.1086, |
| "step": 2020 |
| }, |
| { |
| "epoch": 4.614623000761615, |
| "grad_norm": 1.3117856979370117, |
| "learning_rate": 6.684802362820028e-07, |
| "loss": 1.0555, |
| "step": 2021 |
| }, |
| { |
| "epoch": 4.616907844630617, |
| "grad_norm": 1.3180614709854126, |
| "learning_rate": 6.663619830701662e-07, |
| "loss": 1.0653, |
| "step": 2022 |
| }, |
| { |
| "epoch": 4.619192688499619, |
| "grad_norm": 1.2816510200500488, |
| "learning_rate": 6.642465751260008e-07, |
| "loss": 1.1072, |
| "step": 2023 |
| }, |
| { |
| "epoch": 4.621477532368622, |
| "grad_norm": 1.2523847818374634, |
| "learning_rate": 6.621340157319998e-07, |
| "loss": 1.0875, |
| "step": 2024 |
| }, |
| { |
| "epoch": 4.623762376237623, |
| "grad_norm": 1.2979053258895874, |
| "learning_rate": 6.60024308166233e-07, |
| "loss": 1.1289, |
| "step": 2025 |
| }, |
| { |
| "epoch": 4.626047220106626, |
| "grad_norm": 1.3084304332733154, |
| "learning_rate": 6.57917455702349e-07, |
| "loss": 1.0658, |
| "step": 2026 |
| }, |
| { |
| "epoch": 4.628332063975629, |
| "grad_norm": 1.2778873443603516, |
| "learning_rate": 6.55813461609563e-07, |
| "loss": 1.1038, |
| "step": 2027 |
| }, |
| { |
| "epoch": 4.63061690784463, |
| "grad_norm": 1.283216953277588, |
| "learning_rate": 6.537123291526554e-07, |
| "loss": 1.0868, |
| "step": 2028 |
| }, |
| { |
| "epoch": 4.632901751713633, |
| "grad_norm": 1.2661612033843994, |
| "learning_rate": 6.516140615919677e-07, |
| "loss": 1.0919, |
| "step": 2029 |
| }, |
| { |
| "epoch": 4.635186595582635, |
| "grad_norm": 1.2835884094238281, |
| "learning_rate": 6.495186621833946e-07, |
| "loss": 1.0823, |
| "step": 2030 |
| }, |
| { |
| "epoch": 4.637471439451637, |
| "grad_norm": 1.2521917819976807, |
| "learning_rate": 6.474261341783791e-07, |
| "loss": 1.1061, |
| "step": 2031 |
| }, |
| { |
| "epoch": 4.63975628332064, |
| "grad_norm": 1.2880902290344238, |
| "learning_rate": 6.453364808239129e-07, |
| "loss": 1.1834, |
| "step": 2032 |
| }, |
| { |
| "epoch": 4.642041127189642, |
| "grad_norm": 1.284623384475708, |
| "learning_rate": 6.432497053625228e-07, |
| "loss": 1.1079, |
| "step": 2033 |
| }, |
| { |
| "epoch": 4.644325971058644, |
| "grad_norm": 1.3427263498306274, |
| "learning_rate": 6.411658110322708e-07, |
| "loss": 1.0893, |
| "step": 2034 |
| }, |
| { |
| "epoch": 4.646610814927646, |
| "grad_norm": 1.2741354703903198, |
| "learning_rate": 6.3908480106675e-07, |
| "loss": 1.0809, |
| "step": 2035 |
| }, |
| { |
| "epoch": 4.648895658796649, |
| "grad_norm": 1.3016525506973267, |
| "learning_rate": 6.370066786950766e-07, |
| "loss": 1.1164, |
| "step": 2036 |
| }, |
| { |
| "epoch": 4.651180502665651, |
| "grad_norm": 1.3607993125915527, |
| "learning_rate": 6.349314471418849e-07, |
| "loss": 1.051, |
| "step": 2037 |
| }, |
| { |
| "epoch": 4.653465346534653, |
| "grad_norm": 1.2685065269470215, |
| "learning_rate": 6.328591096273262e-07, |
| "loss": 1.0946, |
| "step": 2038 |
| }, |
| { |
| "epoch": 4.655750190403656, |
| "grad_norm": 1.3018466234207153, |
| "learning_rate": 6.307896693670587e-07, |
| "loss": 1.0744, |
| "step": 2039 |
| }, |
| { |
| "epoch": 4.658035034272658, |
| "grad_norm": 1.2309743165969849, |
| "learning_rate": 6.28723129572247e-07, |
| "loss": 1.062, |
| "step": 2040 |
| }, |
| { |
| "epoch": 4.66031987814166, |
| "grad_norm": 1.2842991352081299, |
| "learning_rate": 6.266594934495534e-07, |
| "loss": 1.1086, |
| "step": 2041 |
| }, |
| { |
| "epoch": 4.662604722010663, |
| "grad_norm": 1.304504156112671, |
| "learning_rate": 6.245987642011344e-07, |
| "loss": 1.085, |
| "step": 2042 |
| }, |
| { |
| "epoch": 4.6648895658796645, |
| "grad_norm": 1.3170430660247803, |
| "learning_rate": 6.225409450246378e-07, |
| "loss": 1.1018, |
| "step": 2043 |
| }, |
| { |
| "epoch": 4.667174409748667, |
| "grad_norm": 1.2923517227172852, |
| "learning_rate": 6.20486039113194e-07, |
| "loss": 1.0784, |
| "step": 2044 |
| }, |
| { |
| "epoch": 4.66945925361767, |
| "grad_norm": 1.282779335975647, |
| "learning_rate": 6.184340496554125e-07, |
| "loss": 1.1002, |
| "step": 2045 |
| }, |
| { |
| "epoch": 4.6717440974866715, |
| "grad_norm": 1.2920050621032715, |
| "learning_rate": 6.1638497983538e-07, |
| "loss": 1.1296, |
| "step": 2046 |
| }, |
| { |
| "epoch": 4.674028941355674, |
| "grad_norm": 1.3137156963348389, |
| "learning_rate": 6.143388328326494e-07, |
| "loss": 1.1436, |
| "step": 2047 |
| }, |
| { |
| "epoch": 4.676313785224677, |
| "grad_norm": 1.3416036367416382, |
| "learning_rate": 6.122956118222417e-07, |
| "loss": 1.0999, |
| "step": 2048 |
| }, |
| { |
| "epoch": 4.6785986290936785, |
| "grad_norm": 1.2436286211013794, |
| "learning_rate": 6.102553199746347e-07, |
| "loss": 1.0668, |
| "step": 2049 |
| }, |
| { |
| "epoch": 4.680883472962681, |
| "grad_norm": 1.2815945148468018, |
| "learning_rate": 6.082179604557617e-07, |
| "loss": 1.0659, |
| "step": 2050 |
| }, |
| { |
| "epoch": 4.683168316831683, |
| "grad_norm": 1.2817378044128418, |
| "learning_rate": 6.061835364270077e-07, |
| "loss": 1.0815, |
| "step": 2051 |
| }, |
| { |
| "epoch": 4.6854531607006855, |
| "grad_norm": 1.2972486019134521, |
| "learning_rate": 6.041520510452003e-07, |
| "loss": 1.0921, |
| "step": 2052 |
| }, |
| { |
| "epoch": 4.687738004569688, |
| "grad_norm": 1.270463466644287, |
| "learning_rate": 6.021235074626077e-07, |
| "loss": 1.0981, |
| "step": 2053 |
| }, |
| { |
| "epoch": 4.69002284843869, |
| "grad_norm": 1.2780388593673706, |
| "learning_rate": 6.000979088269354e-07, |
| "loss": 1.0588, |
| "step": 2054 |
| }, |
| { |
| "epoch": 4.6923076923076925, |
| "grad_norm": 1.2750800848007202, |
| "learning_rate": 5.980752582813157e-07, |
| "loss": 1.0553, |
| "step": 2055 |
| }, |
| { |
| "epoch": 4.694592536176694, |
| "grad_norm": 1.2434440851211548, |
| "learning_rate": 5.960555589643097e-07, |
| "loss": 1.1097, |
| "step": 2056 |
| }, |
| { |
| "epoch": 4.696877380045697, |
| "grad_norm": 1.2729369401931763, |
| "learning_rate": 5.940388140098971e-07, |
| "loss": 1.0809, |
| "step": 2057 |
| }, |
| { |
| "epoch": 4.6991622239146995, |
| "grad_norm": 1.2687339782714844, |
| "learning_rate": 5.92025026547473e-07, |
| "loss": 1.1042, |
| "step": 2058 |
| }, |
| { |
| "epoch": 4.701447067783701, |
| "grad_norm": 1.2984797954559326, |
| "learning_rate": 5.900141997018455e-07, |
| "loss": 1.091, |
| "step": 2059 |
| }, |
| { |
| "epoch": 4.703731911652704, |
| "grad_norm": 1.3156907558441162, |
| "learning_rate": 5.880063365932264e-07, |
| "loss": 1.0744, |
| "step": 2060 |
| }, |
| { |
| "epoch": 4.706016755521706, |
| "grad_norm": 1.2851028442382812, |
| "learning_rate": 5.860014403372291e-07, |
| "loss": 1.0939, |
| "step": 2061 |
| }, |
| { |
| "epoch": 4.708301599390708, |
| "grad_norm": 1.2899366617202759, |
| "learning_rate": 5.839995140448651e-07, |
| "loss": 1.0905, |
| "step": 2062 |
| }, |
| { |
| "epoch": 4.710586443259711, |
| "grad_norm": 1.2499885559082031, |
| "learning_rate": 5.820005608225345e-07, |
| "loss": 1.0711, |
| "step": 2063 |
| }, |
| { |
| "epoch": 4.712871287128713, |
| "grad_norm": 1.2643736600875854, |
| "learning_rate": 5.800045837720272e-07, |
| "loss": 1.1536, |
| "step": 2064 |
| }, |
| { |
| "epoch": 4.715156130997715, |
| "grad_norm": 1.31736421585083, |
| "learning_rate": 5.780115859905125e-07, |
| "loss": 1.0854, |
| "step": 2065 |
| }, |
| { |
| "epoch": 4.717440974866717, |
| "grad_norm": 1.2984356880187988, |
| "learning_rate": 5.760215705705371e-07, |
| "loss": 1.0978, |
| "step": 2066 |
| }, |
| { |
| "epoch": 4.71972581873572, |
| "grad_norm": 1.3062434196472168, |
| "learning_rate": 5.740345406000219e-07, |
| "loss": 1.1127, |
| "step": 2067 |
| }, |
| { |
| "epoch": 4.722010662604722, |
| "grad_norm": 1.2795484066009521, |
| "learning_rate": 5.720504991622528e-07, |
| "loss": 1.0881, |
| "step": 2068 |
| }, |
| { |
| "epoch": 4.724295506473724, |
| "grad_norm": 1.311261773109436, |
| "learning_rate": 5.700694493358791e-07, |
| "loss": 1.0802, |
| "step": 2069 |
| }, |
| { |
| "epoch": 4.726580350342727, |
| "grad_norm": 1.2954013347625732, |
| "learning_rate": 5.680913941949093e-07, |
| "loss": 1.0841, |
| "step": 2070 |
| }, |
| { |
| "epoch": 4.728865194211729, |
| "grad_norm": 1.2342921495437622, |
| "learning_rate": 5.66116336808703e-07, |
| "loss": 1.1164, |
| "step": 2071 |
| }, |
| { |
| "epoch": 4.731150038080731, |
| "grad_norm": 1.2950637340545654, |
| "learning_rate": 5.641442802419706e-07, |
| "loss": 1.0904, |
| "step": 2072 |
| }, |
| { |
| "epoch": 4.733434881949734, |
| "grad_norm": 1.2827471494674683, |
| "learning_rate": 5.621752275547637e-07, |
| "loss": 1.0693, |
| "step": 2073 |
| }, |
| { |
| "epoch": 4.735719725818735, |
| "grad_norm": 1.266324520111084, |
| "learning_rate": 5.602091818024732e-07, |
| "loss": 1.1204, |
| "step": 2074 |
| }, |
| { |
| "epoch": 4.738004569687738, |
| "grad_norm": 1.2678186893463135, |
| "learning_rate": 5.582461460358265e-07, |
| "loss": 1.0701, |
| "step": 2075 |
| }, |
| { |
| "epoch": 4.740289413556741, |
| "grad_norm": 1.2703529596328735, |
| "learning_rate": 5.562861233008774e-07, |
| "loss": 1.0974, |
| "step": 2076 |
| }, |
| { |
| "epoch": 4.742574257425742, |
| "grad_norm": 1.2783465385437012, |
| "learning_rate": 5.543291166390047e-07, |
| "loss": 1.109, |
| "step": 2077 |
| }, |
| { |
| "epoch": 4.744859101294745, |
| "grad_norm": 1.2746630907058716, |
| "learning_rate": 5.523751290869098e-07, |
| "loss": 1.0967, |
| "step": 2078 |
| }, |
| { |
| "epoch": 4.747143945163748, |
| "grad_norm": 1.2512940168380737, |
| "learning_rate": 5.504241636766064e-07, |
| "loss": 1.0544, |
| "step": 2079 |
| }, |
| { |
| "epoch": 4.749428789032749, |
| "grad_norm": 1.29820716381073, |
| "learning_rate": 5.484762234354196e-07, |
| "loss": 1.0888, |
| "step": 2080 |
| }, |
| { |
| "epoch": 4.751713632901752, |
| "grad_norm": 1.2650960683822632, |
| "learning_rate": 5.465313113859816e-07, |
| "loss": 1.0936, |
| "step": 2081 |
| }, |
| { |
| "epoch": 4.753998476770754, |
| "grad_norm": 1.2510480880737305, |
| "learning_rate": 5.445894305462235e-07, |
| "loss": 1.1078, |
| "step": 2082 |
| }, |
| { |
| "epoch": 4.756283320639756, |
| "grad_norm": 1.258803367614746, |
| "learning_rate": 5.426505839293752e-07, |
| "loss": 1.1022, |
| "step": 2083 |
| }, |
| { |
| "epoch": 4.758568164508759, |
| "grad_norm": 1.252769112586975, |
| "learning_rate": 5.407147745439567e-07, |
| "loss": 1.103, |
| "step": 2084 |
| }, |
| { |
| "epoch": 4.760853008377761, |
| "grad_norm": 1.2640199661254883, |
| "learning_rate": 5.387820053937751e-07, |
| "loss": 1.09, |
| "step": 2085 |
| }, |
| { |
| "epoch": 4.763137852246763, |
| "grad_norm": 1.3083833456039429, |
| "learning_rate": 5.368522794779213e-07, |
| "loss": 1.1041, |
| "step": 2086 |
| }, |
| { |
| "epoch": 4.765422696115765, |
| "grad_norm": 1.2743126153945923, |
| "learning_rate": 5.349255997907632e-07, |
| "loss": 1.1266, |
| "step": 2087 |
| }, |
| { |
| "epoch": 4.767707539984768, |
| "grad_norm": 1.2865676879882812, |
| "learning_rate": 5.330019693219405e-07, |
| "loss": 1.1083, |
| "step": 2088 |
| }, |
| { |
| "epoch": 4.76999238385377, |
| "grad_norm": 1.2562209367752075, |
| "learning_rate": 5.310813910563645e-07, |
| "loss": 1.1015, |
| "step": 2089 |
| }, |
| { |
| "epoch": 4.772277227722772, |
| "grad_norm": 1.2887158393859863, |
| "learning_rate": 5.29163867974207e-07, |
| "loss": 1.103, |
| "step": 2090 |
| }, |
| { |
| "epoch": 4.774562071591775, |
| "grad_norm": 1.2844069004058838, |
| "learning_rate": 5.272494030509023e-07, |
| "loss": 1.0665, |
| "step": 2091 |
| }, |
| { |
| "epoch": 4.776846915460776, |
| "grad_norm": 1.29599928855896, |
| "learning_rate": 5.253379992571367e-07, |
| "loss": 1.0937, |
| "step": 2092 |
| }, |
| { |
| "epoch": 4.779131759329779, |
| "grad_norm": 1.267896056175232, |
| "learning_rate": 5.23429659558847e-07, |
| "loss": 1.0889, |
| "step": 2093 |
| }, |
| { |
| "epoch": 4.781416603198782, |
| "grad_norm": 1.2771025896072388, |
| "learning_rate": 5.215243869172173e-07, |
| "loss": 1.1208, |
| "step": 2094 |
| }, |
| { |
| "epoch": 4.783701447067783, |
| "grad_norm": 1.2694123983383179, |
| "learning_rate": 5.196221842886704e-07, |
| "loss": 1.0945, |
| "step": 2095 |
| }, |
| { |
| "epoch": 4.785986290936786, |
| "grad_norm": 1.3127267360687256, |
| "learning_rate": 5.177230546248654e-07, |
| "loss": 1.0761, |
| "step": 2096 |
| }, |
| { |
| "epoch": 4.788271134805788, |
| "grad_norm": 1.2706137895584106, |
| "learning_rate": 5.158270008726954e-07, |
| "loss": 1.0699, |
| "step": 2097 |
| }, |
| { |
| "epoch": 4.79055597867479, |
| "grad_norm": 1.30772066116333, |
| "learning_rate": 5.139340259742776e-07, |
| "loss": 1.1368, |
| "step": 2098 |
| }, |
| { |
| "epoch": 4.792840822543793, |
| "grad_norm": 1.2849667072296143, |
| "learning_rate": 5.12044132866954e-07, |
| "loss": 1.0956, |
| "step": 2099 |
| }, |
| { |
| "epoch": 4.795125666412795, |
| "grad_norm": 1.3047093152999878, |
| "learning_rate": 5.101573244832833e-07, |
| "loss": 1.1047, |
| "step": 2100 |
| }, |
| { |
| "epoch": 4.797410510281797, |
| "grad_norm": 1.327357292175293, |
| "learning_rate": 5.082736037510371e-07, |
| "loss": 1.1343, |
| "step": 2101 |
| }, |
| { |
| "epoch": 4.7996953541508, |
| "grad_norm": 1.2878531217575073, |
| "learning_rate": 5.063929735931985e-07, |
| "loss": 1.1009, |
| "step": 2102 |
| }, |
| { |
| "epoch": 4.801980198019802, |
| "grad_norm": 1.2837164402008057, |
| "learning_rate": 5.04515436927952e-07, |
| "loss": 1.057, |
| "step": 2103 |
| }, |
| { |
| "epoch": 4.804265041888804, |
| "grad_norm": 1.3011730909347534, |
| "learning_rate": 5.026409966686827e-07, |
| "loss": 1.0795, |
| "step": 2104 |
| }, |
| { |
| "epoch": 4.806549885757806, |
| "grad_norm": 1.266371250152588, |
| "learning_rate": 5.007696557239727e-07, |
| "loss": 1.1324, |
| "step": 2105 |
| }, |
| { |
| "epoch": 4.808834729626809, |
| "grad_norm": 1.266931176185608, |
| "learning_rate": 4.989014169975919e-07, |
| "loss": 1.1123, |
| "step": 2106 |
| }, |
| { |
| "epoch": 4.811119573495811, |
| "grad_norm": 1.2623364925384521, |
| "learning_rate": 4.970362833885001e-07, |
| "loss": 1.1137, |
| "step": 2107 |
| }, |
| { |
| "epoch": 4.813404417364813, |
| "grad_norm": 1.2980166673660278, |
| "learning_rate": 4.951742577908355e-07, |
| "loss": 1.0762, |
| "step": 2108 |
| }, |
| { |
| "epoch": 4.815689261233816, |
| "grad_norm": 1.2445948123931885, |
| "learning_rate": 4.933153430939147e-07, |
| "loss": 1.0828, |
| "step": 2109 |
| }, |
| { |
| "epoch": 4.817974105102818, |
| "grad_norm": 1.2619681358337402, |
| "learning_rate": 4.91459542182228e-07, |
| "loss": 1.0586, |
| "step": 2110 |
| }, |
| { |
| "epoch": 4.82025894897182, |
| "grad_norm": 1.2932782173156738, |
| "learning_rate": 4.896068579354338e-07, |
| "loss": 1.0878, |
| "step": 2111 |
| }, |
| { |
| "epoch": 4.822543792840823, |
| "grad_norm": 1.2484990358352661, |
| "learning_rate": 4.877572932283528e-07, |
| "loss": 1.1227, |
| "step": 2112 |
| }, |
| { |
| "epoch": 4.8248286367098245, |
| "grad_norm": 1.3282291889190674, |
| "learning_rate": 4.859108509309676e-07, |
| "loss": 1.1142, |
| "step": 2113 |
| }, |
| { |
| "epoch": 4.827113480578827, |
| "grad_norm": 1.266524314880371, |
| "learning_rate": 4.840675339084136e-07, |
| "loss": 1.0458, |
| "step": 2114 |
| }, |
| { |
| "epoch": 4.82939832444783, |
| "grad_norm": 1.2742689847946167, |
| "learning_rate": 4.822273450209767e-07, |
| "loss": 1.1246, |
| "step": 2115 |
| }, |
| { |
| "epoch": 4.8316831683168315, |
| "grad_norm": 1.246359944343567, |
| "learning_rate": 4.803902871240909e-07, |
| "loss": 1.1243, |
| "step": 2116 |
| }, |
| { |
| "epoch": 4.833968012185834, |
| "grad_norm": 1.295499563217163, |
| "learning_rate": 4.785563630683301e-07, |
| "loss": 1.0989, |
| "step": 2117 |
| }, |
| { |
| "epoch": 4.836252856054836, |
| "grad_norm": 1.2707738876342773, |
| "learning_rate": 4.767255756994049e-07, |
| "loss": 1.132, |
| "step": 2118 |
| }, |
| { |
| "epoch": 4.8385376999238385, |
| "grad_norm": 1.2814133167266846, |
| "learning_rate": 4.7489792785816116e-07, |
| "loss": 1.0663, |
| "step": 2119 |
| }, |
| { |
| "epoch": 4.840822543792841, |
| "grad_norm": 1.3483997583389282, |
| "learning_rate": 4.7307342238056996e-07, |
| "loss": 1.0886, |
| "step": 2120 |
| }, |
| { |
| "epoch": 4.843107387661843, |
| "grad_norm": 1.3065505027770996, |
| "learning_rate": 4.7125206209772955e-07, |
| "loss": 1.0979, |
| "step": 2121 |
| }, |
| { |
| "epoch": 4.8453922315308455, |
| "grad_norm": 1.2689411640167236, |
| "learning_rate": 4.6943384983585553e-07, |
| "loss": 1.1147, |
| "step": 2122 |
| }, |
| { |
| "epoch": 4.847677075399847, |
| "grad_norm": 1.2981805801391602, |
| "learning_rate": 4.676187884162783e-07, |
| "loss": 1.0844, |
| "step": 2123 |
| }, |
| { |
| "epoch": 4.84996191926885, |
| "grad_norm": 1.2894749641418457, |
| "learning_rate": 4.6580688065544164e-07, |
| "loss": 1.1104, |
| "step": 2124 |
| }, |
| { |
| "epoch": 4.8522467631378525, |
| "grad_norm": 1.2899259328842163, |
| "learning_rate": 4.6399812936489376e-07, |
| "loss": 1.1248, |
| "step": 2125 |
| }, |
| { |
| "epoch": 4.854531607006854, |
| "grad_norm": 1.302597165107727, |
| "learning_rate": 4.621925373512845e-07, |
| "loss": 1.0847, |
| "step": 2126 |
| }, |
| { |
| "epoch": 4.856816450875857, |
| "grad_norm": 1.3532848358154297, |
| "learning_rate": 4.603901074163639e-07, |
| "loss": 1.1002, |
| "step": 2127 |
| }, |
| { |
| "epoch": 4.859101294744859, |
| "grad_norm": 1.2945899963378906, |
| "learning_rate": 4.5859084235697236e-07, |
| "loss": 1.067, |
| "step": 2128 |
| }, |
| { |
| "epoch": 4.861386138613861, |
| "grad_norm": 1.3042470216751099, |
| "learning_rate": 4.5679474496504227e-07, |
| "loss": 1.0435, |
| "step": 2129 |
| }, |
| { |
| "epoch": 4.863670982482864, |
| "grad_norm": 1.3194514513015747, |
| "learning_rate": 4.550018180275886e-07, |
| "loss": 1.1081, |
| "step": 2130 |
| }, |
| { |
| "epoch": 4.865955826351866, |
| "grad_norm": 1.285345196723938, |
| "learning_rate": 4.5321206432670684e-07, |
| "loss": 1.0663, |
| "step": 2131 |
| }, |
| { |
| "epoch": 4.868240670220868, |
| "grad_norm": 1.2676403522491455, |
| "learning_rate": 4.5142548663957006e-07, |
| "loss": 1.0603, |
| "step": 2132 |
| }, |
| { |
| "epoch": 4.870525514089871, |
| "grad_norm": 1.2659584283828735, |
| "learning_rate": 4.4964208773842167e-07, |
| "loss": 1.1275, |
| "step": 2133 |
| }, |
| { |
| "epoch": 4.872810357958873, |
| "grad_norm": 1.3056273460388184, |
| "learning_rate": 4.478618703905724e-07, |
| "loss": 1.0908, |
| "step": 2134 |
| }, |
| { |
| "epoch": 4.875095201827875, |
| "grad_norm": 1.2657009363174438, |
| "learning_rate": 4.460848373583981e-07, |
| "loss": 1.0994, |
| "step": 2135 |
| }, |
| { |
| "epoch": 4.877380045696877, |
| "grad_norm": 1.2820390462875366, |
| "learning_rate": 4.4431099139933043e-07, |
| "loss": 1.1124, |
| "step": 2136 |
| }, |
| { |
| "epoch": 4.87966488956588, |
| "grad_norm": 1.2907642126083374, |
| "learning_rate": 4.4254033526585917e-07, |
| "loss": 1.1237, |
| "step": 2137 |
| }, |
| { |
| "epoch": 4.881949733434882, |
| "grad_norm": 1.3053464889526367, |
| "learning_rate": 4.407728717055215e-07, |
| "loss": 1.1263, |
| "step": 2138 |
| }, |
| { |
| "epoch": 4.884234577303884, |
| "grad_norm": 1.2887327671051025, |
| "learning_rate": 4.3900860346090146e-07, |
| "loss": 1.1195, |
| "step": 2139 |
| }, |
| { |
| "epoch": 4.886519421172887, |
| "grad_norm": 1.331275463104248, |
| "learning_rate": 4.3724753326962634e-07, |
| "loss": 1.0566, |
| "step": 2140 |
| }, |
| { |
| "epoch": 4.888804265041889, |
| "grad_norm": 1.290602445602417, |
| "learning_rate": 4.354896638643591e-07, |
| "loss": 1.0698, |
| "step": 2141 |
| }, |
| { |
| "epoch": 4.891089108910891, |
| "grad_norm": 1.3247865438461304, |
| "learning_rate": 4.3373499797279646e-07, |
| "loss": 1.0968, |
| "step": 2142 |
| }, |
| { |
| "epoch": 4.893373952779894, |
| "grad_norm": 1.2982738018035889, |
| "learning_rate": 4.319835383176657e-07, |
| "loss": 1.1062, |
| "step": 2143 |
| }, |
| { |
| "epoch": 4.895658796648895, |
| "grad_norm": 1.2912477254867554, |
| "learning_rate": 4.3023528761671624e-07, |
| "loss": 1.0881, |
| "step": 2144 |
| }, |
| { |
| "epoch": 4.897943640517898, |
| "grad_norm": 1.2807631492614746, |
| "learning_rate": 4.284902485827214e-07, |
| "loss": 1.1088, |
| "step": 2145 |
| }, |
| { |
| "epoch": 4.900228484386901, |
| "grad_norm": 1.339166283607483, |
| "learning_rate": 4.2674842392346844e-07, |
| "loss": 1.0903, |
| "step": 2146 |
| }, |
| { |
| "epoch": 4.902513328255902, |
| "grad_norm": 1.2816892862319946, |
| "learning_rate": 4.250098163417571e-07, |
| "loss": 1.1297, |
| "step": 2147 |
| }, |
| { |
| "epoch": 4.904798172124905, |
| "grad_norm": 1.2935080528259277, |
| "learning_rate": 4.232744285353968e-07, |
| "loss": 1.1105, |
| "step": 2148 |
| }, |
| { |
| "epoch": 4.907083015993907, |
| "grad_norm": 1.3085383176803589, |
| "learning_rate": 4.215422631971994e-07, |
| "loss": 1.1381, |
| "step": 2149 |
| }, |
| { |
| "epoch": 4.909367859862909, |
| "grad_norm": 1.2890045642852783, |
| "learning_rate": 4.1981332301497563e-07, |
| "loss": 1.1191, |
| "step": 2150 |
| }, |
| { |
| "epoch": 4.911652703731912, |
| "grad_norm": 1.2732130289077759, |
| "learning_rate": 4.180876106715348e-07, |
| "loss": 1.0815, |
| "step": 2151 |
| }, |
| { |
| "epoch": 4.913937547600914, |
| "grad_norm": 1.2872875928878784, |
| "learning_rate": 4.1636512884467404e-07, |
| "loss": 1.1037, |
| "step": 2152 |
| }, |
| { |
| "epoch": 4.916222391469916, |
| "grad_norm": 1.283339023590088, |
| "learning_rate": 4.146458802071804e-07, |
| "loss": 1.0855, |
| "step": 2153 |
| }, |
| { |
| "epoch": 4.918507235338918, |
| "grad_norm": 1.2937120199203491, |
| "learning_rate": 4.129298674268226e-07, |
| "loss": 1.1246, |
| "step": 2154 |
| }, |
| { |
| "epoch": 4.920792079207921, |
| "grad_norm": 1.3152278661727905, |
| "learning_rate": 4.112170931663473e-07, |
| "loss": 1.1028, |
| "step": 2155 |
| }, |
| { |
| "epoch": 4.923076923076923, |
| "grad_norm": 1.2731553316116333, |
| "learning_rate": 4.0950756008347886e-07, |
| "loss": 1.102, |
| "step": 2156 |
| }, |
| { |
| "epoch": 4.925361766945925, |
| "grad_norm": 1.2939971685409546, |
| "learning_rate": 4.0780127083090956e-07, |
| "loss": 1.0797, |
| "step": 2157 |
| }, |
| { |
| "epoch": 4.927646610814928, |
| "grad_norm": 1.2959516048431396, |
| "learning_rate": 4.060982280562989e-07, |
| "loss": 1.0698, |
| "step": 2158 |
| }, |
| { |
| "epoch": 4.9299314546839295, |
| "grad_norm": 1.289335012435913, |
| "learning_rate": 4.0439843440227027e-07, |
| "loss": 1.0682, |
| "step": 2159 |
| }, |
| { |
| "epoch": 4.932216298552932, |
| "grad_norm": 1.2806504964828491, |
| "learning_rate": 4.0270189250640273e-07, |
| "loss": 1.0625, |
| "step": 2160 |
| }, |
| { |
| "epoch": 4.934501142421935, |
| "grad_norm": 1.2870630025863647, |
| "learning_rate": 4.010086050012324e-07, |
| "loss": 1.0861, |
| "step": 2161 |
| }, |
| { |
| "epoch": 4.9367859862909365, |
| "grad_norm": 1.2742153406143188, |
| "learning_rate": 3.993185745142436e-07, |
| "loss": 1.0794, |
| "step": 2162 |
| }, |
| { |
| "epoch": 4.939070830159939, |
| "grad_norm": 1.2898108959197998, |
| "learning_rate": 3.976318036678667e-07, |
| "loss": 1.086, |
| "step": 2163 |
| }, |
| { |
| "epoch": 4.941355674028942, |
| "grad_norm": 1.2475721836090088, |
| "learning_rate": 3.9594829507947573e-07, |
| "loss": 1.0961, |
| "step": 2164 |
| }, |
| { |
| "epoch": 4.9436405178979435, |
| "grad_norm": 1.2730660438537598, |
| "learning_rate": 3.9426805136138086e-07, |
| "loss": 1.0954, |
| "step": 2165 |
| }, |
| { |
| "epoch": 4.945925361766946, |
| "grad_norm": 1.2580710649490356, |
| "learning_rate": 3.9259107512082674e-07, |
| "loss": 1.073, |
| "step": 2166 |
| }, |
| { |
| "epoch": 4.948210205635948, |
| "grad_norm": 1.2623883485794067, |
| "learning_rate": 3.9091736895998907e-07, |
| "loss": 1.0568, |
| "step": 2167 |
| }, |
| { |
| "epoch": 4.9504950495049505, |
| "grad_norm": 1.280324101448059, |
| "learning_rate": 3.8924693547596647e-07, |
| "loss": 1.1061, |
| "step": 2168 |
| }, |
| { |
| "epoch": 4.952779893373953, |
| "grad_norm": 1.276982069015503, |
| "learning_rate": 3.875797772607831e-07, |
| "loss": 1.0939, |
| "step": 2169 |
| }, |
| { |
| "epoch": 4.955064737242955, |
| "grad_norm": 1.2728464603424072, |
| "learning_rate": 3.859158969013777e-07, |
| "loss": 1.0977, |
| "step": 2170 |
| }, |
| { |
| "epoch": 4.9573495811119574, |
| "grad_norm": 1.3238383531570435, |
| "learning_rate": 3.842552969796037e-07, |
| "loss": 1.0895, |
| "step": 2171 |
| }, |
| { |
| "epoch": 4.95963442498096, |
| "grad_norm": 1.2770859003067017, |
| "learning_rate": 3.8259798007222515e-07, |
| "loss": 1.0748, |
| "step": 2172 |
| }, |
| { |
| "epoch": 4.961919268849962, |
| "grad_norm": 1.2510464191436768, |
| "learning_rate": 3.8094394875091117e-07, |
| "loss": 1.1028, |
| "step": 2173 |
| }, |
| { |
| "epoch": 4.9642041127189644, |
| "grad_norm": 1.284297227859497, |
| "learning_rate": 3.7929320558223153e-07, |
| "loss": 1.0561, |
| "step": 2174 |
| }, |
| { |
| "epoch": 4.966488956587966, |
| "grad_norm": 1.3043745756149292, |
| "learning_rate": 3.7764575312765644e-07, |
| "loss": 1.0553, |
| "step": 2175 |
| }, |
| { |
| "epoch": 4.968773800456969, |
| "grad_norm": 1.2932723760604858, |
| "learning_rate": 3.7600159394354657e-07, |
| "loss": 1.1137, |
| "step": 2176 |
| }, |
| { |
| "epoch": 4.971058644325971, |
| "grad_norm": 1.2768579721450806, |
| "learning_rate": 3.743607305811556e-07, |
| "loss": 1.1118, |
| "step": 2177 |
| }, |
| { |
| "epoch": 4.973343488194973, |
| "grad_norm": 1.2791515588760376, |
| "learning_rate": 3.7272316558662094e-07, |
| "loss": 1.0952, |
| "step": 2178 |
| }, |
| { |
| "epoch": 4.975628332063976, |
| "grad_norm": 1.2823758125305176, |
| "learning_rate": 3.7108890150096166e-07, |
| "loss": 1.0373, |
| "step": 2179 |
| }, |
| { |
| "epoch": 4.9779131759329776, |
| "grad_norm": 1.3007476329803467, |
| "learning_rate": 3.6945794086007706e-07, |
| "loss": 1.066, |
| "step": 2180 |
| }, |
| { |
| "epoch": 4.98019801980198, |
| "grad_norm": 1.3126449584960938, |
| "learning_rate": 3.678302861947386e-07, |
| "loss": 1.077, |
| "step": 2181 |
| }, |
| { |
| "epoch": 4.982482863670983, |
| "grad_norm": 1.3028063774108887, |
| "learning_rate": 3.6620594003058756e-07, |
| "loss": 1.1003, |
| "step": 2182 |
| }, |
| { |
| "epoch": 4.9847677075399845, |
| "grad_norm": 1.2599045038223267, |
| "learning_rate": 3.645849048881331e-07, |
| "loss": 1.0912, |
| "step": 2183 |
| }, |
| { |
| "epoch": 4.987052551408987, |
| "grad_norm": 1.275844931602478, |
| "learning_rate": 3.629671832827447e-07, |
| "loss": 1.1011, |
| "step": 2184 |
| }, |
| { |
| "epoch": 4.989337395277989, |
| "grad_norm": 1.259772777557373, |
| "learning_rate": 3.6135277772465226e-07, |
| "loss": 1.0799, |
| "step": 2185 |
| }, |
| { |
| "epoch": 4.9916222391469915, |
| "grad_norm": 1.2567813396453857, |
| "learning_rate": 3.597416907189388e-07, |
| "loss": 1.12, |
| "step": 2186 |
| }, |
| { |
| "epoch": 4.993907083015994, |
| "grad_norm": 1.2930412292480469, |
| "learning_rate": 3.5813392476553714e-07, |
| "loss": 1.0947, |
| "step": 2187 |
| }, |
| { |
| "epoch": 4.996191926884996, |
| "grad_norm": 1.2559951543807983, |
| "learning_rate": 3.565294823592291e-07, |
| "loss": 1.0462, |
| "step": 2188 |
| }, |
| { |
| "epoch": 4.9984767707539985, |
| "grad_norm": 1.2923510074615479, |
| "learning_rate": 3.5492836598963715e-07, |
| "loss": 1.0844, |
| "step": 2189 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 1.6771284341812134, |
| "learning_rate": 3.5333057814122414e-07, |
| "loss": 1.074, |
| "step": 2190 |
| }, |
| { |
| "epoch": 5.002284843869003, |
| "grad_norm": 1.487131118774414, |
| "learning_rate": 3.517361212932879e-07, |
| "loss": 1.0851, |
| "step": 2191 |
| }, |
| { |
| "epoch": 5.004569687738004, |
| "grad_norm": 1.3028117418289185, |
| "learning_rate": 3.50144997919957e-07, |
| "loss": 1.0614, |
| "step": 2192 |
| }, |
| { |
| "epoch": 5.006854531607007, |
| "grad_norm": 1.3251090049743652, |
| "learning_rate": 3.485572104901869e-07, |
| "loss": 1.0994, |
| "step": 2193 |
| }, |
| { |
| "epoch": 5.009139375476009, |
| "grad_norm": 1.2535759210586548, |
| "learning_rate": 3.4697276146775817e-07, |
| "loss": 1.0573, |
| "step": 2194 |
| }, |
| { |
| "epoch": 5.011424219345011, |
| "grad_norm": 1.320404052734375, |
| "learning_rate": 3.4539165331127e-07, |
| "loss": 1.07, |
| "step": 2195 |
| }, |
| { |
| "epoch": 5.013709063214014, |
| "grad_norm": 1.2744762897491455, |
| "learning_rate": 3.438138884741377e-07, |
| "loss": 1.064, |
| "step": 2196 |
| }, |
| { |
| "epoch": 5.015993907083016, |
| "grad_norm": 1.2865326404571533, |
| "learning_rate": 3.422394694045894e-07, |
| "loss": 1.0918, |
| "step": 2197 |
| }, |
| { |
| "epoch": 5.018278750952018, |
| "grad_norm": 1.3152594566345215, |
| "learning_rate": 3.406683985456605e-07, |
| "loss": 1.0854, |
| "step": 2198 |
| }, |
| { |
| "epoch": 5.020563594821021, |
| "grad_norm": 1.290440320968628, |
| "learning_rate": 3.3910067833519215e-07, |
| "loss": 1.1513, |
| "step": 2199 |
| }, |
| { |
| "epoch": 5.022848438690023, |
| "grad_norm": 1.2581883668899536, |
| "learning_rate": 3.375363112058255e-07, |
| "loss": 1.0887, |
| "step": 2200 |
| }, |
| { |
| "epoch": 5.025133282559025, |
| "grad_norm": 1.3203215599060059, |
| "learning_rate": 3.359752995849985e-07, |
| "loss": 1.0834, |
| "step": 2201 |
| }, |
| { |
| "epoch": 5.027418126428027, |
| "grad_norm": 1.2735775709152222, |
| "learning_rate": 3.344176458949436e-07, |
| "loss": 1.0951, |
| "step": 2202 |
| }, |
| { |
| "epoch": 5.02970297029703, |
| "grad_norm": 1.2835285663604736, |
| "learning_rate": 3.328633525526812e-07, |
| "loss": 1.0732, |
| "step": 2203 |
| }, |
| { |
| "epoch": 5.031987814166032, |
| "grad_norm": 1.2780119180679321, |
| "learning_rate": 3.3131242197001754e-07, |
| "loss": 1.0888, |
| "step": 2204 |
| }, |
| { |
| "epoch": 5.034272658035034, |
| "grad_norm": 1.295435905456543, |
| "learning_rate": 3.2976485655354243e-07, |
| "loss": 1.0786, |
| "step": 2205 |
| }, |
| { |
| "epoch": 5.036557501904037, |
| "grad_norm": 1.2823036909103394, |
| "learning_rate": 3.2822065870462216e-07, |
| "loss": 1.0481, |
| "step": 2206 |
| }, |
| { |
| "epoch": 5.0388423457730385, |
| "grad_norm": 1.2593215703964233, |
| "learning_rate": 3.266798308193991e-07, |
| "loss": 1.0899, |
| "step": 2207 |
| }, |
| { |
| "epoch": 5.041127189642041, |
| "grad_norm": 1.272403359413147, |
| "learning_rate": 3.2514237528878503e-07, |
| "loss": 1.0549, |
| "step": 2208 |
| }, |
| { |
| "epoch": 5.043412033511044, |
| "grad_norm": 1.2577238082885742, |
| "learning_rate": 3.236082944984589e-07, |
| "loss": 1.1043, |
| "step": 2209 |
| }, |
| { |
| "epoch": 5.0456968773800455, |
| "grad_norm": 1.2967652082443237, |
| "learning_rate": 3.2207759082886454e-07, |
| "loss": 1.1109, |
| "step": 2210 |
| }, |
| { |
| "epoch": 5.047981721249048, |
| "grad_norm": 1.263169288635254, |
| "learning_rate": 3.205502666552041e-07, |
| "loss": 1.0551, |
| "step": 2211 |
| }, |
| { |
| "epoch": 5.05026656511805, |
| "grad_norm": 1.2983647584915161, |
| "learning_rate": 3.1902632434743563e-07, |
| "loss": 1.0928, |
| "step": 2212 |
| }, |
| { |
| "epoch": 5.0525514089870525, |
| "grad_norm": 1.308638095855713, |
| "learning_rate": 3.175057662702707e-07, |
| "loss": 1.0659, |
| "step": 2213 |
| }, |
| { |
| "epoch": 5.054836252856055, |
| "grad_norm": 1.3027294874191284, |
| "learning_rate": 3.1598859478316814e-07, |
| "loss": 1.0821, |
| "step": 2214 |
| }, |
| { |
| "epoch": 5.057121096725057, |
| "grad_norm": 1.280255675315857, |
| "learning_rate": 3.1447481224033376e-07, |
| "loss": 1.1082, |
| "step": 2215 |
| }, |
| { |
| "epoch": 5.0594059405940595, |
| "grad_norm": 1.2913918495178223, |
| "learning_rate": 3.1296442099071235e-07, |
| "loss": 1.0666, |
| "step": 2216 |
| }, |
| { |
| "epoch": 5.061690784463062, |
| "grad_norm": 1.2657502889633179, |
| "learning_rate": 3.114574233779877e-07, |
| "loss": 1.1096, |
| "step": 2217 |
| }, |
| { |
| "epoch": 5.063975628332064, |
| "grad_norm": 1.2814840078353882, |
| "learning_rate": 3.099538217405779e-07, |
| "loss": 1.0969, |
| "step": 2218 |
| }, |
| { |
| "epoch": 5.0662604722010665, |
| "grad_norm": 1.2882423400878906, |
| "learning_rate": 3.08453618411631e-07, |
| "loss": 1.0349, |
| "step": 2219 |
| }, |
| { |
| "epoch": 5.068545316070068, |
| "grad_norm": 1.3120388984680176, |
| "learning_rate": 3.069568157190214e-07, |
| "loss": 1.1251, |
| "step": 2220 |
| }, |
| { |
| "epoch": 5.070830159939071, |
| "grad_norm": 1.2447402477264404, |
| "learning_rate": 3.054634159853481e-07, |
| "loss": 1.092, |
| "step": 2221 |
| }, |
| { |
| "epoch": 5.0731150038080735, |
| "grad_norm": 1.2926756143569946, |
| "learning_rate": 3.03973421527928e-07, |
| "loss": 1.0963, |
| "step": 2222 |
| }, |
| { |
| "epoch": 5.075399847677075, |
| "grad_norm": 1.255759835243225, |
| "learning_rate": 3.0248683465879587e-07, |
| "loss": 1.0646, |
| "step": 2223 |
| }, |
| { |
| "epoch": 5.077684691546078, |
| "grad_norm": 1.2734875679016113, |
| "learning_rate": 3.0100365768469753e-07, |
| "loss": 1.0786, |
| "step": 2224 |
| }, |
| { |
| "epoch": 5.07996953541508, |
| "grad_norm": 1.299229621887207, |
| "learning_rate": 2.995238929070876e-07, |
| "loss": 1.0652, |
| "step": 2225 |
| }, |
| { |
| "epoch": 5.082254379284082, |
| "grad_norm": 1.2811152935028076, |
| "learning_rate": 2.980475426221269e-07, |
| "loss": 1.085, |
| "step": 2226 |
| }, |
| { |
| "epoch": 5.084539223153085, |
| "grad_norm": 1.2939437627792358, |
| "learning_rate": 2.9657460912067753e-07, |
| "loss": 1.1039, |
| "step": 2227 |
| }, |
| { |
| "epoch": 5.086824067022087, |
| "grad_norm": 1.2752275466918945, |
| "learning_rate": 2.951050946882991e-07, |
| "loss": 1.0901, |
| "step": 2228 |
| }, |
| { |
| "epoch": 5.089108910891089, |
| "grad_norm": 1.2718929052352905, |
| "learning_rate": 2.9363900160524734e-07, |
| "loss": 1.0727, |
| "step": 2229 |
| }, |
| { |
| "epoch": 5.091393754760092, |
| "grad_norm": 1.33417546749115, |
| "learning_rate": 2.9217633214646713e-07, |
| "loss": 1.0994, |
| "step": 2230 |
| }, |
| { |
| "epoch": 5.093678598629094, |
| "grad_norm": 1.3067365884780884, |
| "learning_rate": 2.907170885815927e-07, |
| "loss": 1.1239, |
| "step": 2231 |
| }, |
| { |
| "epoch": 5.095963442498096, |
| "grad_norm": 1.3246252536773682, |
| "learning_rate": 2.892612731749414e-07, |
| "loss": 1.1102, |
| "step": 2232 |
| }, |
| { |
| "epoch": 5.098248286367098, |
| "grad_norm": 1.3094791173934937, |
| "learning_rate": 2.8780888818551014e-07, |
| "loss": 1.0473, |
| "step": 2233 |
| }, |
| { |
| "epoch": 5.1005331302361006, |
| "grad_norm": 1.2624950408935547, |
| "learning_rate": 2.8635993586697555e-07, |
| "loss": 1.1001, |
| "step": 2234 |
| }, |
| { |
| "epoch": 5.102817974105103, |
| "grad_norm": 1.3205031156539917, |
| "learning_rate": 2.8491441846768497e-07, |
| "loss": 1.0823, |
| "step": 2235 |
| }, |
| { |
| "epoch": 5.105102817974105, |
| "grad_norm": 1.2623871564865112, |
| "learning_rate": 2.8347233823065644e-07, |
| "loss": 1.0909, |
| "step": 2236 |
| }, |
| { |
| "epoch": 5.1073876618431076, |
| "grad_norm": 1.262770414352417, |
| "learning_rate": 2.820336973935764e-07, |
| "loss": 1.078, |
| "step": 2237 |
| }, |
| { |
| "epoch": 5.109672505712109, |
| "grad_norm": 1.2645272016525269, |
| "learning_rate": 2.805984981887916e-07, |
| "loss": 1.0378, |
| "step": 2238 |
| }, |
| { |
| "epoch": 5.111957349581112, |
| "grad_norm": 1.2669461965560913, |
| "learning_rate": 2.791667428433106e-07, |
| "loss": 1.0587, |
| "step": 2239 |
| }, |
| { |
| "epoch": 5.1142421934501145, |
| "grad_norm": 1.270908236503601, |
| "learning_rate": 2.777384335787969e-07, |
| "loss": 1.0601, |
| "step": 2240 |
| }, |
| { |
| "epoch": 5.116527037319116, |
| "grad_norm": 1.3161948919296265, |
| "learning_rate": 2.763135726115662e-07, |
| "loss": 1.0927, |
| "step": 2241 |
| }, |
| { |
| "epoch": 5.118811881188119, |
| "grad_norm": 1.2516229152679443, |
| "learning_rate": 2.7489216215258574e-07, |
| "loss": 1.0928, |
| "step": 2242 |
| }, |
| { |
| "epoch": 5.1210967250571215, |
| "grad_norm": 1.303397297859192, |
| "learning_rate": 2.734742044074659e-07, |
| "loss": 1.1116, |
| "step": 2243 |
| }, |
| { |
| "epoch": 5.123381568926123, |
| "grad_norm": 1.3074421882629395, |
| "learning_rate": 2.720597015764609e-07, |
| "loss": 1.0671, |
| "step": 2244 |
| }, |
| { |
| "epoch": 5.125666412795126, |
| "grad_norm": 1.3157670497894287, |
| "learning_rate": 2.706486558544644e-07, |
| "loss": 1.0835, |
| "step": 2245 |
| }, |
| { |
| "epoch": 5.127951256664128, |
| "grad_norm": 1.2734421491622925, |
| "learning_rate": 2.692410694310035e-07, |
| "loss": 1.0818, |
| "step": 2246 |
| }, |
| { |
| "epoch": 5.13023610053313, |
| "grad_norm": 1.315611720085144, |
| "learning_rate": 2.678369444902407e-07, |
| "loss": 1.0806, |
| "step": 2247 |
| }, |
| { |
| "epoch": 5.132520944402133, |
| "grad_norm": 1.2910858392715454, |
| "learning_rate": 2.664362832109643e-07, |
| "loss": 1.044, |
| "step": 2248 |
| }, |
| { |
| "epoch": 5.134805788271135, |
| "grad_norm": 1.3554924726486206, |
| "learning_rate": 2.6503908776658956e-07, |
| "loss": 1.0666, |
| "step": 2249 |
| }, |
| { |
| "epoch": 5.137090632140137, |
| "grad_norm": 1.3038955926895142, |
| "learning_rate": 2.6364536032515346e-07, |
| "loss": 1.0704, |
| "step": 2250 |
| }, |
| { |
| "epoch": 5.139375476009139, |
| "grad_norm": 1.2819843292236328, |
| "learning_rate": 2.62255103049312e-07, |
| "loss": 1.107, |
| "step": 2251 |
| }, |
| { |
| "epoch": 5.141660319878142, |
| "grad_norm": 1.2703412771224976, |
| "learning_rate": 2.6086831809633535e-07, |
| "loss": 1.104, |
| "step": 2252 |
| }, |
| { |
| "epoch": 5.143945163747144, |
| "grad_norm": 1.2854193449020386, |
| "learning_rate": 2.594850076181074e-07, |
| "loss": 1.0775, |
| "step": 2253 |
| }, |
| { |
| "epoch": 5.146230007616146, |
| "grad_norm": 1.294018268585205, |
| "learning_rate": 2.581051737611193e-07, |
| "loss": 1.0748, |
| "step": 2254 |
| }, |
| { |
| "epoch": 5.148514851485149, |
| "grad_norm": 1.3650598526000977, |
| "learning_rate": 2.567288186664674e-07, |
| "loss": 1.1148, |
| "step": 2255 |
| }, |
| { |
| "epoch": 5.15079969535415, |
| "grad_norm": 1.281511664390564, |
| "learning_rate": 2.553559444698517e-07, |
| "loss": 1.0872, |
| "step": 2256 |
| }, |
| { |
| "epoch": 5.153084539223153, |
| "grad_norm": 1.2767432928085327, |
| "learning_rate": 2.539865533015684e-07, |
| "loss": 1.0875, |
| "step": 2257 |
| }, |
| { |
| "epoch": 5.155369383092156, |
| "grad_norm": 1.314460277557373, |
| "learning_rate": 2.52620647286512e-07, |
| "loss": 1.1094, |
| "step": 2258 |
| }, |
| { |
| "epoch": 5.157654226961157, |
| "grad_norm": 1.315940260887146, |
| "learning_rate": 2.5125822854416644e-07, |
| "loss": 1.0909, |
| "step": 2259 |
| }, |
| { |
| "epoch": 5.15993907083016, |
| "grad_norm": 1.3246798515319824, |
| "learning_rate": 2.498992991886054e-07, |
| "loss": 1.0659, |
| "step": 2260 |
| }, |
| { |
| "epoch": 5.162223914699163, |
| "grad_norm": 1.3555991649627686, |
| "learning_rate": 2.485438613284888e-07, |
| "loss": 1.0844, |
| "step": 2261 |
| }, |
| { |
| "epoch": 5.164508758568164, |
| "grad_norm": 1.320139765739441, |
| "learning_rate": 2.471919170670578e-07, |
| "loss": 1.0691, |
| "step": 2262 |
| }, |
| { |
| "epoch": 5.166793602437167, |
| "grad_norm": 1.2948224544525146, |
| "learning_rate": 2.4584346850213216e-07, |
| "loss": 1.0834, |
| "step": 2263 |
| }, |
| { |
| "epoch": 5.169078446306169, |
| "grad_norm": 1.2399736642837524, |
| "learning_rate": 2.4449851772610917e-07, |
| "loss": 1.0964, |
| "step": 2264 |
| }, |
| { |
| "epoch": 5.171363290175171, |
| "grad_norm": 1.3757954835891724, |
| "learning_rate": 2.4315706682595613e-07, |
| "loss": 1.099, |
| "step": 2265 |
| }, |
| { |
| "epoch": 5.173648134044174, |
| "grad_norm": 1.273930549621582, |
| "learning_rate": 2.4181911788321243e-07, |
| "loss": 1.0505, |
| "step": 2266 |
| }, |
| { |
| "epoch": 5.175932977913176, |
| "grad_norm": 1.2793792486190796, |
| "learning_rate": 2.40484672973981e-07, |
| "loss": 1.0856, |
| "step": 2267 |
| }, |
| { |
| "epoch": 5.178217821782178, |
| "grad_norm": 1.3076246976852417, |
| "learning_rate": 2.391537341689276e-07, |
| "loss": 1.105, |
| "step": 2268 |
| }, |
| { |
| "epoch": 5.18050266565118, |
| "grad_norm": 1.3009895086288452, |
| "learning_rate": 2.3782630353328007e-07, |
| "loss": 1.0957, |
| "step": 2269 |
| }, |
| { |
| "epoch": 5.182787509520183, |
| "grad_norm": 1.2714428901672363, |
| "learning_rate": 2.3650238312682012e-07, |
| "loss": 1.0758, |
| "step": 2270 |
| }, |
| { |
| "epoch": 5.185072353389185, |
| "grad_norm": 1.292037844657898, |
| "learning_rate": 2.3518197500388278e-07, |
| "loss": 1.0341, |
| "step": 2271 |
| }, |
| { |
| "epoch": 5.187357197258187, |
| "grad_norm": 1.3077787160873413, |
| "learning_rate": 2.3386508121335472e-07, |
| "loss": 1.0913, |
| "step": 2272 |
| }, |
| { |
| "epoch": 5.18964204112719, |
| "grad_norm": 1.2806161642074585, |
| "learning_rate": 2.3255170379866799e-07, |
| "loss": 1.092, |
| "step": 2273 |
| }, |
| { |
| "epoch": 5.191926884996192, |
| "grad_norm": 1.2643924951553345, |
| "learning_rate": 2.3124184479779767e-07, |
| "loss": 1.0798, |
| "step": 2274 |
| }, |
| { |
| "epoch": 5.194211728865194, |
| "grad_norm": 1.323236107826233, |
| "learning_rate": 2.2993550624326178e-07, |
| "loss": 1.1076, |
| "step": 2275 |
| }, |
| { |
| "epoch": 5.196496572734197, |
| "grad_norm": 1.2984707355499268, |
| "learning_rate": 2.2863269016211276e-07, |
| "loss": 1.1025, |
| "step": 2276 |
| }, |
| { |
| "epoch": 5.1987814166031985, |
| "grad_norm": 1.3057256937026978, |
| "learning_rate": 2.273333985759396e-07, |
| "loss": 1.0655, |
| "step": 2277 |
| }, |
| { |
| "epoch": 5.201066260472201, |
| "grad_norm": 1.284069299697876, |
| "learning_rate": 2.2603763350086071e-07, |
| "loss": 1.0813, |
| "step": 2278 |
| }, |
| { |
| "epoch": 5.203351104341204, |
| "grad_norm": 1.2818840742111206, |
| "learning_rate": 2.2474539694752245e-07, |
| "loss": 1.1045, |
| "step": 2279 |
| }, |
| { |
| "epoch": 5.2056359482102055, |
| "grad_norm": 1.304898977279663, |
| "learning_rate": 2.234566909210975e-07, |
| "loss": 1.086, |
| "step": 2280 |
| }, |
| { |
| "epoch": 5.207920792079208, |
| "grad_norm": 1.3013020753860474, |
| "learning_rate": 2.2217151742127851e-07, |
| "loss": 1.0749, |
| "step": 2281 |
| }, |
| { |
| "epoch": 5.21020563594821, |
| "grad_norm": 1.2945225238800049, |
| "learning_rate": 2.2088987844227695e-07, |
| "loss": 1.1133, |
| "step": 2282 |
| }, |
| { |
| "epoch": 5.2124904798172125, |
| "grad_norm": 1.2707927227020264, |
| "learning_rate": 2.1961177597282112e-07, |
| "loss": 1.0757, |
| "step": 2283 |
| }, |
| { |
| "epoch": 5.214775323686215, |
| "grad_norm": 1.3085978031158447, |
| "learning_rate": 2.1833721199614992e-07, |
| "loss": 1.0901, |
| "step": 2284 |
| }, |
| { |
| "epoch": 5.217060167555217, |
| "grad_norm": 1.2786146402359009, |
| "learning_rate": 2.1706618849001188e-07, |
| "loss": 1.0862, |
| "step": 2285 |
| }, |
| { |
| "epoch": 5.2193450114242195, |
| "grad_norm": 1.270601511001587, |
| "learning_rate": 2.1579870742666347e-07, |
| "loss": 1.0941, |
| "step": 2286 |
| }, |
| { |
| "epoch": 5.221629855293221, |
| "grad_norm": 1.3005554676055908, |
| "learning_rate": 2.145347707728618e-07, |
| "loss": 1.0716, |
| "step": 2287 |
| }, |
| { |
| "epoch": 5.223914699162224, |
| "grad_norm": 1.3076614141464233, |
| "learning_rate": 2.1327438048986627e-07, |
| "loss": 1.0895, |
| "step": 2288 |
| }, |
| { |
| "epoch": 5.2261995430312265, |
| "grad_norm": 1.3139954805374146, |
| "learning_rate": 2.120175385334322e-07, |
| "loss": 1.0546, |
| "step": 2289 |
| }, |
| { |
| "epoch": 5.228484386900228, |
| "grad_norm": 1.2856640815734863, |
| "learning_rate": 2.1076424685380848e-07, |
| "loss": 1.1181, |
| "step": 2290 |
| }, |
| { |
| "epoch": 5.230769230769231, |
| "grad_norm": 1.2663482427597046, |
| "learning_rate": 2.0951450739573664e-07, |
| "loss": 1.0945, |
| "step": 2291 |
| }, |
| { |
| "epoch": 5.2330540746382335, |
| "grad_norm": 1.2739810943603516, |
| "learning_rate": 2.082683220984452e-07, |
| "loss": 1.0752, |
| "step": 2292 |
| }, |
| { |
| "epoch": 5.235338918507235, |
| "grad_norm": 1.2725884914398193, |
| "learning_rate": 2.0702569289564683e-07, |
| "loss": 1.0558, |
| "step": 2293 |
| }, |
| { |
| "epoch": 5.237623762376238, |
| "grad_norm": 1.3000556230545044, |
| "learning_rate": 2.0578662171553798e-07, |
| "loss": 1.0901, |
| "step": 2294 |
| }, |
| { |
| "epoch": 5.23990860624524, |
| "grad_norm": 1.307210087776184, |
| "learning_rate": 2.0455111048079262e-07, |
| "loss": 1.0859, |
| "step": 2295 |
| }, |
| { |
| "epoch": 5.242193450114242, |
| "grad_norm": 1.265657663345337, |
| "learning_rate": 2.033191611085622e-07, |
| "loss": 1.1058, |
| "step": 2296 |
| }, |
| { |
| "epoch": 5.244478293983245, |
| "grad_norm": 1.2767589092254639, |
| "learning_rate": 2.020907755104698e-07, |
| "loss": 1.1249, |
| "step": 2297 |
| }, |
| { |
| "epoch": 5.246763137852247, |
| "grad_norm": 1.278253436088562, |
| "learning_rate": 2.0086595559260846e-07, |
| "loss": 1.0785, |
| "step": 2298 |
| }, |
| { |
| "epoch": 5.249047981721249, |
| "grad_norm": 1.2918927669525146, |
| "learning_rate": 1.9964470325554015e-07, |
| "loss": 1.0703, |
| "step": 2299 |
| }, |
| { |
| "epoch": 5.251332825590251, |
| "grad_norm": 1.2632293701171875, |
| "learning_rate": 1.9842702039428923e-07, |
| "loss": 1.0665, |
| "step": 2300 |
| }, |
| { |
| "epoch": 5.253617669459254, |
| "grad_norm": 1.3661508560180664, |
| "learning_rate": 1.9721290889834127e-07, |
| "loss": 1.0423, |
| "step": 2301 |
| }, |
| { |
| "epoch": 5.255902513328256, |
| "grad_norm": 1.307930827140808, |
| "learning_rate": 1.96002370651642e-07, |
| "loss": 1.0686, |
| "step": 2302 |
| }, |
| { |
| "epoch": 5.258187357197258, |
| "grad_norm": 1.2975925207138062, |
| "learning_rate": 1.9479540753259003e-07, |
| "loss": 1.0971, |
| "step": 2303 |
| }, |
| { |
| "epoch": 5.260472201066261, |
| "grad_norm": 1.295094609260559, |
| "learning_rate": 1.935920214140388e-07, |
| "loss": 1.0575, |
| "step": 2304 |
| }, |
| { |
| "epoch": 5.262757044935263, |
| "grad_norm": 1.3307294845581055, |
| "learning_rate": 1.9239221416328945e-07, |
| "loss": 1.099, |
| "step": 2305 |
| }, |
| { |
| "epoch": 5.265041888804265, |
| "grad_norm": 1.276889681816101, |
| "learning_rate": 1.911959876420902e-07, |
| "loss": 1.0865, |
| "step": 2306 |
| }, |
| { |
| "epoch": 5.267326732673268, |
| "grad_norm": 1.2554212808609009, |
| "learning_rate": 1.90003343706634e-07, |
| "loss": 1.0758, |
| "step": 2307 |
| }, |
| { |
| "epoch": 5.269611576542269, |
| "grad_norm": 1.3033947944641113, |
| "learning_rate": 1.8881428420755381e-07, |
| "loss": 1.0934, |
| "step": 2308 |
| }, |
| { |
| "epoch": 5.271896420411272, |
| "grad_norm": 1.3277416229248047, |
| "learning_rate": 1.876288109899199e-07, |
| "loss": 1.0907, |
| "step": 2309 |
| }, |
| { |
| "epoch": 5.274181264280275, |
| "grad_norm": 1.28794264793396, |
| "learning_rate": 1.864469258932397e-07, |
| "loss": 1.1042, |
| "step": 2310 |
| }, |
| { |
| "epoch": 5.276466108149276, |
| "grad_norm": 1.3302158117294312, |
| "learning_rate": 1.8526863075145091e-07, |
| "loss": 1.1262, |
| "step": 2311 |
| }, |
| { |
| "epoch": 5.278750952018279, |
| "grad_norm": 1.2931283712387085, |
| "learning_rate": 1.840939273929221e-07, |
| "loss": 1.0817, |
| "step": 2312 |
| }, |
| { |
| "epoch": 5.281035795887281, |
| "grad_norm": 1.305420994758606, |
| "learning_rate": 1.8292281764044794e-07, |
| "loss": 1.0748, |
| "step": 2313 |
| }, |
| { |
| "epoch": 5.283320639756283, |
| "grad_norm": 1.2627601623535156, |
| "learning_rate": 1.8175530331124598e-07, |
| "loss": 1.1136, |
| "step": 2314 |
| }, |
| { |
| "epoch": 5.285605483625286, |
| "grad_norm": 1.2889796495437622, |
| "learning_rate": 1.8059138621695643e-07, |
| "loss": 1.0994, |
| "step": 2315 |
| }, |
| { |
| "epoch": 5.287890327494288, |
| "grad_norm": 1.522375226020813, |
| "learning_rate": 1.794310681636366e-07, |
| "loss": 1.1194, |
| "step": 2316 |
| }, |
| { |
| "epoch": 5.29017517136329, |
| "grad_norm": 1.283361554145813, |
| "learning_rate": 1.7827435095175872e-07, |
| "loss": 1.109, |
| "step": 2317 |
| }, |
| { |
| "epoch": 5.292460015232292, |
| "grad_norm": 1.3082143068313599, |
| "learning_rate": 1.77121236376209e-07, |
| "loss": 1.0764, |
| "step": 2318 |
| }, |
| { |
| "epoch": 5.294744859101295, |
| "grad_norm": 1.32005774974823, |
| "learning_rate": 1.7597172622628193e-07, |
| "loss": 1.0917, |
| "step": 2319 |
| }, |
| { |
| "epoch": 5.297029702970297, |
| "grad_norm": 1.2927436828613281, |
| "learning_rate": 1.7482582228568052e-07, |
| "loss": 1.0659, |
| "step": 2320 |
| }, |
| { |
| "epoch": 5.299314546839299, |
| "grad_norm": 1.27031672000885, |
| "learning_rate": 1.736835263325104e-07, |
| "loss": 1.0974, |
| "step": 2321 |
| }, |
| { |
| "epoch": 5.301599390708302, |
| "grad_norm": 1.2902473211288452, |
| "learning_rate": 1.725448401392793e-07, |
| "loss": 1.0911, |
| "step": 2322 |
| }, |
| { |
| "epoch": 5.303884234577304, |
| "grad_norm": 1.2780988216400146, |
| "learning_rate": 1.7140976547289438e-07, |
| "loss": 1.0803, |
| "step": 2323 |
| }, |
| { |
| "epoch": 5.306169078446306, |
| "grad_norm": 1.2977415323257446, |
| "learning_rate": 1.702783040946579e-07, |
| "loss": 1.0697, |
| "step": 2324 |
| }, |
| { |
| "epoch": 5.308453922315309, |
| "grad_norm": 1.3391021490097046, |
| "learning_rate": 1.691504577602654e-07, |
| "loss": 1.0812, |
| "step": 2325 |
| }, |
| { |
| "epoch": 5.31073876618431, |
| "grad_norm": 1.2561603784561157, |
| "learning_rate": 1.6802622821980336e-07, |
| "loss": 1.0968, |
| "step": 2326 |
| }, |
| { |
| "epoch": 5.313023610053313, |
| "grad_norm": 1.3253194093704224, |
| "learning_rate": 1.669056172177455e-07, |
| "loss": 1.0868, |
| "step": 2327 |
| }, |
| { |
| "epoch": 5.315308453922316, |
| "grad_norm": 1.2892109155654907, |
| "learning_rate": 1.6578862649295173e-07, |
| "loss": 1.0838, |
| "step": 2328 |
| }, |
| { |
| "epoch": 5.317593297791317, |
| "grad_norm": 1.2944484949111938, |
| "learning_rate": 1.646752577786631e-07, |
| "loss": 1.081, |
| "step": 2329 |
| }, |
| { |
| "epoch": 5.31987814166032, |
| "grad_norm": 1.2977781295776367, |
| "learning_rate": 1.6356551280250072e-07, |
| "loss": 1.0709, |
| "step": 2330 |
| }, |
| { |
| "epoch": 5.322162985529322, |
| "grad_norm": 1.251643419265747, |
| "learning_rate": 1.6245939328646322e-07, |
| "loss": 1.0717, |
| "step": 2331 |
| }, |
| { |
| "epoch": 5.324447829398324, |
| "grad_norm": 1.2781786918640137, |
| "learning_rate": 1.613569009469232e-07, |
| "loss": 1.0875, |
| "step": 2332 |
| }, |
| { |
| "epoch": 5.326732673267327, |
| "grad_norm": 1.2685924768447876, |
| "learning_rate": 1.6025803749462415e-07, |
| "loss": 1.1008, |
| "step": 2333 |
| }, |
| { |
| "epoch": 5.329017517136329, |
| "grad_norm": 1.3481427431106567, |
| "learning_rate": 1.5916280463468077e-07, |
| "loss": 1.1083, |
| "step": 2334 |
| }, |
| { |
| "epoch": 5.331302361005331, |
| "grad_norm": 1.2659457921981812, |
| "learning_rate": 1.5807120406657161e-07, |
| "loss": 1.0551, |
| "step": 2335 |
| }, |
| { |
| "epoch": 5.333587204874334, |
| "grad_norm": 1.2692278623580933, |
| "learning_rate": 1.5698323748414123e-07, |
| "loss": 1.0878, |
| "step": 2336 |
| }, |
| { |
| "epoch": 5.335872048743336, |
| "grad_norm": 1.7504945993423462, |
| "learning_rate": 1.5589890657559337e-07, |
| "loss": 1.0552, |
| "step": 2337 |
| }, |
| { |
| "epoch": 5.338156892612338, |
| "grad_norm": 1.3026984930038452, |
| "learning_rate": 1.5481821302349125e-07, |
| "loss": 1.076, |
| "step": 2338 |
| }, |
| { |
| "epoch": 5.34044173648134, |
| "grad_norm": 1.316756248474121, |
| "learning_rate": 1.5374115850475436e-07, |
| "loss": 1.1045, |
| "step": 2339 |
| }, |
| { |
| "epoch": 5.342726580350343, |
| "grad_norm": 1.3154984712600708, |
| "learning_rate": 1.5266774469065494e-07, |
| "loss": 1.0935, |
| "step": 2340 |
| }, |
| { |
| "epoch": 5.345011424219345, |
| "grad_norm": 1.276450753211975, |
| "learning_rate": 1.515979732468151e-07, |
| "loss": 1.0826, |
| "step": 2341 |
| }, |
| { |
| "epoch": 5.347296268088347, |
| "grad_norm": 1.292073369026184, |
| "learning_rate": 1.5053184583320697e-07, |
| "loss": 1.0719, |
| "step": 2342 |
| }, |
| { |
| "epoch": 5.34958111195735, |
| "grad_norm": 1.280203938484192, |
| "learning_rate": 1.4946936410414643e-07, |
| "loss": 1.0969, |
| "step": 2343 |
| }, |
| { |
| "epoch": 5.3518659558263515, |
| "grad_norm": 1.3046621084213257, |
| "learning_rate": 1.4841052970829385e-07, |
| "loss": 1.1077, |
| "step": 2344 |
| }, |
| { |
| "epoch": 5.354150799695354, |
| "grad_norm": 1.3253684043884277, |
| "learning_rate": 1.4735534428864857e-07, |
| "loss": 1.1174, |
| "step": 2345 |
| }, |
| { |
| "epoch": 5.356435643564357, |
| "grad_norm": 1.2969188690185547, |
| "learning_rate": 1.4630380948254864e-07, |
| "loss": 1.0404, |
| "step": 2346 |
| }, |
| { |
| "epoch": 5.3587204874333585, |
| "grad_norm": 1.2858332395553589, |
| "learning_rate": 1.4525592692166717e-07, |
| "loss": 1.0949, |
| "step": 2347 |
| }, |
| { |
| "epoch": 5.361005331302361, |
| "grad_norm": 1.2641419172286987, |
| "learning_rate": 1.442116982320105e-07, |
| "loss": 1.0805, |
| "step": 2348 |
| }, |
| { |
| "epoch": 5.363290175171363, |
| "grad_norm": 1.2731258869171143, |
| "learning_rate": 1.4317112503391433e-07, |
| "loss": 1.0358, |
| "step": 2349 |
| }, |
| { |
| "epoch": 5.3655750190403655, |
| "grad_norm": 1.269422173500061, |
| "learning_rate": 1.4213420894204326e-07, |
| "loss": 1.0712, |
| "step": 2350 |
| }, |
| { |
| "epoch": 5.367859862909368, |
| "grad_norm": 1.2818864583969116, |
| "learning_rate": 1.4110095156538633e-07, |
| "loss": 1.092, |
| "step": 2351 |
| }, |
| { |
| "epoch": 5.37014470677837, |
| "grad_norm": 1.2838257551193237, |
| "learning_rate": 1.4007135450725518e-07, |
| "loss": 1.0933, |
| "step": 2352 |
| }, |
| { |
| "epoch": 5.3724295506473725, |
| "grad_norm": 1.3167815208435059, |
| "learning_rate": 1.3904541936528266e-07, |
| "loss": 1.0955, |
| "step": 2353 |
| }, |
| { |
| "epoch": 5.374714394516375, |
| "grad_norm": 1.3507665395736694, |
| "learning_rate": 1.380231477314181e-07, |
| "loss": 1.0991, |
| "step": 2354 |
| }, |
| { |
| "epoch": 5.376999238385377, |
| "grad_norm": 1.2883590459823608, |
| "learning_rate": 1.3700454119192714e-07, |
| "loss": 1.1111, |
| "step": 2355 |
| }, |
| { |
| "epoch": 5.3792840822543795, |
| "grad_norm": 1.3028126955032349, |
| "learning_rate": 1.3598960132738813e-07, |
| "loss": 1.0776, |
| "step": 2356 |
| }, |
| { |
| "epoch": 5.381568926123381, |
| "grad_norm": 1.3255176544189453, |
| "learning_rate": 1.3497832971268943e-07, |
| "loss": 1.0694, |
| "step": 2357 |
| }, |
| { |
| "epoch": 5.383853769992384, |
| "grad_norm": 1.275755763053894, |
| "learning_rate": 1.3397072791702798e-07, |
| "loss": 1.0773, |
| "step": 2358 |
| }, |
| { |
| "epoch": 5.3861386138613865, |
| "grad_norm": 1.298366665840149, |
| "learning_rate": 1.3296679750390533e-07, |
| "loss": 1.0787, |
| "step": 2359 |
| }, |
| { |
| "epoch": 5.388423457730388, |
| "grad_norm": 1.302127480506897, |
| "learning_rate": 1.3196654003112653e-07, |
| "loss": 1.1149, |
| "step": 2360 |
| }, |
| { |
| "epoch": 5.390708301599391, |
| "grad_norm": 1.2876914739608765, |
| "learning_rate": 1.309699570507983e-07, |
| "loss": 1.0791, |
| "step": 2361 |
| }, |
| { |
| "epoch": 5.392993145468393, |
| "grad_norm": 1.277055263519287, |
| "learning_rate": 1.2997705010932394e-07, |
| "loss": 1.1154, |
| "step": 2362 |
| }, |
| { |
| "epoch": 5.395277989337395, |
| "grad_norm": 1.2551474571228027, |
| "learning_rate": 1.289878207474035e-07, |
| "loss": 1.0869, |
| "step": 2363 |
| }, |
| { |
| "epoch": 5.397562833206398, |
| "grad_norm": 1.2961163520812988, |
| "learning_rate": 1.2800227050003056e-07, |
| "loss": 1.1141, |
| "step": 2364 |
| }, |
| { |
| "epoch": 5.3998476770754, |
| "grad_norm": 1.2772424221038818, |
| "learning_rate": 1.2702040089648954e-07, |
| "loss": 1.0393, |
| "step": 2365 |
| }, |
| { |
| "epoch": 5.402132520944402, |
| "grad_norm": 1.3062280416488647, |
| "learning_rate": 1.2604221346035422e-07, |
| "loss": 1.0961, |
| "step": 2366 |
| }, |
| { |
| "epoch": 5.404417364813405, |
| "grad_norm": 1.3183172941207886, |
| "learning_rate": 1.250677097094835e-07, |
| "loss": 1.0627, |
| "step": 2367 |
| }, |
| { |
| "epoch": 5.406702208682407, |
| "grad_norm": 1.2865747213363647, |
| "learning_rate": 1.2409689115602114e-07, |
| "loss": 1.0516, |
| "step": 2368 |
| }, |
| { |
| "epoch": 5.408987052551409, |
| "grad_norm": 1.300234317779541, |
| "learning_rate": 1.2312975930639293e-07, |
| "loss": 1.1068, |
| "step": 2369 |
| }, |
| { |
| "epoch": 5.411271896420411, |
| "grad_norm": 1.2960008382797241, |
| "learning_rate": 1.2216631566130288e-07, |
| "loss": 1.08, |
| "step": 2370 |
| }, |
| { |
| "epoch": 5.413556740289414, |
| "grad_norm": 1.243540644645691, |
| "learning_rate": 1.2120656171573236e-07, |
| "loss": 1.0736, |
| "step": 2371 |
| }, |
| { |
| "epoch": 5.415841584158416, |
| "grad_norm": 1.3193697929382324, |
| "learning_rate": 1.202504989589387e-07, |
| "loss": 1.0834, |
| "step": 2372 |
| }, |
| { |
| "epoch": 5.418126428027418, |
| "grad_norm": 1.3288146257400513, |
| "learning_rate": 1.1929812887444937e-07, |
| "loss": 1.0567, |
| "step": 2373 |
| }, |
| { |
| "epoch": 5.420411271896421, |
| "grad_norm": 1.3119516372680664, |
| "learning_rate": 1.1834945294006362e-07, |
| "loss": 1.0836, |
| "step": 2374 |
| }, |
| { |
| "epoch": 5.422696115765422, |
| "grad_norm": 1.295262098312378, |
| "learning_rate": 1.1740447262784782e-07, |
| "loss": 1.0935, |
| "step": 2375 |
| }, |
| { |
| "epoch": 5.424980959634425, |
| "grad_norm": 1.3412234783172607, |
| "learning_rate": 1.1646318940413375e-07, |
| "loss": 1.0805, |
| "step": 2376 |
| }, |
| { |
| "epoch": 5.427265803503428, |
| "grad_norm": 1.295854926109314, |
| "learning_rate": 1.1552560472951669e-07, |
| "loss": 1.0854, |
| "step": 2377 |
| }, |
| { |
| "epoch": 5.429550647372429, |
| "grad_norm": 1.2979471683502197, |
| "learning_rate": 1.1459172005885255e-07, |
| "loss": 1.0677, |
| "step": 2378 |
| }, |
| { |
| "epoch": 5.431835491241432, |
| "grad_norm": 1.291488528251648, |
| "learning_rate": 1.136615368412558e-07, |
| "loss": 1.0763, |
| "step": 2379 |
| }, |
| { |
| "epoch": 5.434120335110434, |
| "grad_norm": 1.2876052856445312, |
| "learning_rate": 1.1273505652009826e-07, |
| "loss": 1.0806, |
| "step": 2380 |
| }, |
| { |
| "epoch": 5.436405178979436, |
| "grad_norm": 1.2822387218475342, |
| "learning_rate": 1.1181228053300463e-07, |
| "loss": 1.0812, |
| "step": 2381 |
| }, |
| { |
| "epoch": 5.438690022848439, |
| "grad_norm": 1.2904083728790283, |
| "learning_rate": 1.1089321031185291e-07, |
| "loss": 1.0875, |
| "step": 2382 |
| }, |
| { |
| "epoch": 5.440974866717441, |
| "grad_norm": 1.2913849353790283, |
| "learning_rate": 1.0997784728276978e-07, |
| "loss": 1.109, |
| "step": 2383 |
| }, |
| { |
| "epoch": 5.443259710586443, |
| "grad_norm": 1.2997492551803589, |
| "learning_rate": 1.0906619286612935e-07, |
| "loss": 1.1058, |
| "step": 2384 |
| }, |
| { |
| "epoch": 5.445544554455446, |
| "grad_norm": 1.2887829542160034, |
| "learning_rate": 1.0815824847655226e-07, |
| "loss": 1.073, |
| "step": 2385 |
| }, |
| { |
| "epoch": 5.447829398324448, |
| "grad_norm": 1.3243333101272583, |
| "learning_rate": 1.0725401552290154e-07, |
| "loss": 1.0456, |
| "step": 2386 |
| }, |
| { |
| "epoch": 5.45011424219345, |
| "grad_norm": 1.2761950492858887, |
| "learning_rate": 1.0635349540828038e-07, |
| "loss": 1.064, |
| "step": 2387 |
| }, |
| { |
| "epoch": 5.452399086062452, |
| "grad_norm": 1.2706801891326904, |
| "learning_rate": 1.054566895300324e-07, |
| "loss": 1.0803, |
| "step": 2388 |
| }, |
| { |
| "epoch": 5.454683929931455, |
| "grad_norm": 1.2992305755615234, |
| "learning_rate": 1.0456359927973614e-07, |
| "loss": 1.0788, |
| "step": 2389 |
| }, |
| { |
| "epoch": 5.456968773800457, |
| "grad_norm": 1.3025919198989868, |
| "learning_rate": 1.0367422604320637e-07, |
| "loss": 1.0695, |
| "step": 2390 |
| }, |
| { |
| "epoch": 5.459253617669459, |
| "grad_norm": 1.3010618686676025, |
| "learning_rate": 1.0278857120048836e-07, |
| "loss": 1.0801, |
| "step": 2391 |
| }, |
| { |
| "epoch": 5.461538461538462, |
| "grad_norm": 1.2742735147476196, |
| "learning_rate": 1.0190663612585833e-07, |
| "loss": 1.0734, |
| "step": 2392 |
| }, |
| { |
| "epoch": 5.463823305407463, |
| "grad_norm": 1.289499282836914, |
| "learning_rate": 1.0102842218782105e-07, |
| "loss": 1.0861, |
| "step": 2393 |
| }, |
| { |
| "epoch": 5.466108149276466, |
| "grad_norm": 1.313836932182312, |
| "learning_rate": 1.0015393074910618e-07, |
| "loss": 1.103, |
| "step": 2394 |
| }, |
| { |
| "epoch": 5.468392993145469, |
| "grad_norm": 1.3196077346801758, |
| "learning_rate": 9.928316316666742e-08, |
| "loss": 1.1171, |
| "step": 2395 |
| }, |
| { |
| "epoch": 5.47067783701447, |
| "grad_norm": 1.2959681749343872, |
| "learning_rate": 9.841612079168089e-08, |
| "loss": 1.0958, |
| "step": 2396 |
| }, |
| { |
| "epoch": 5.472962680883473, |
| "grad_norm": 1.2794181108474731, |
| "learning_rate": 9.755280496954123e-08, |
| "loss": 1.0765, |
| "step": 2397 |
| }, |
| { |
| "epoch": 5.475247524752476, |
| "grad_norm": 1.3223141431808472, |
| "learning_rate": 9.669321703986157e-08, |
| "loss": 1.0987, |
| "step": 2398 |
| }, |
| { |
| "epoch": 5.477532368621477, |
| "grad_norm": 1.325971007347107, |
| "learning_rate": 9.58373583364694e-08, |
| "loss": 1.0707, |
| "step": 2399 |
| }, |
| { |
| "epoch": 5.47981721249048, |
| "grad_norm": 1.3212562799453735, |
| "learning_rate": 9.498523018740601e-08, |
| "loss": 1.0905, |
| "step": 2400 |
| }, |
| { |
| "epoch": 5.482102056359482, |
| "grad_norm": 1.2809903621673584, |
| "learning_rate": 9.413683391492456e-08, |
| "loss": 1.0842, |
| "step": 2401 |
| }, |
| { |
| "epoch": 5.484386900228484, |
| "grad_norm": 1.2972325086593628, |
| "learning_rate": 9.329217083548669e-08, |
| "loss": 1.0978, |
| "step": 2402 |
| }, |
| { |
| "epoch": 5.486671744097487, |
| "grad_norm": 1.2997273206710815, |
| "learning_rate": 9.245124225976093e-08, |
| "loss": 1.0782, |
| "step": 2403 |
| }, |
| { |
| "epoch": 5.488956587966489, |
| "grad_norm": 1.2999190092086792, |
| "learning_rate": 9.161404949262209e-08, |
| "loss": 1.0756, |
| "step": 2404 |
| }, |
| { |
| "epoch": 5.491241431835491, |
| "grad_norm": 1.2872076034545898, |
| "learning_rate": 9.078059383314685e-08, |
| "loss": 1.0967, |
| "step": 2405 |
| }, |
| { |
| "epoch": 5.493526275704493, |
| "grad_norm": 1.298041582107544, |
| "learning_rate": 8.99508765746146e-08, |
| "loss": 1.0547, |
| "step": 2406 |
| }, |
| { |
| "epoch": 5.495811119573496, |
| "grad_norm": 1.2661224603652954, |
| "learning_rate": 8.912489900450182e-08, |
| "loss": 1.1012, |
| "step": 2407 |
| }, |
| { |
| "epoch": 5.498095963442498, |
| "grad_norm": 1.3128606081008911, |
| "learning_rate": 8.83026624044836e-08, |
| "loss": 1.0688, |
| "step": 2408 |
| }, |
| { |
| "epoch": 5.5003808073115, |
| "grad_norm": 1.2731242179870605, |
| "learning_rate": 8.748416805042986e-08, |
| "loss": 1.0853, |
| "step": 2409 |
| }, |
| { |
| "epoch": 5.502665651180503, |
| "grad_norm": 1.2822498083114624, |
| "learning_rate": 8.666941721240301e-08, |
| "loss": 1.0503, |
| "step": 2410 |
| }, |
| { |
| "epoch": 5.5049504950495045, |
| "grad_norm": 1.3088481426239014, |
| "learning_rate": 8.585841115465676e-08, |
| "loss": 1.065, |
| "step": 2411 |
| }, |
| { |
| "epoch": 5.507235338918507, |
| "grad_norm": 1.3032389879226685, |
| "learning_rate": 8.505115113563528e-08, |
| "loss": 1.0494, |
| "step": 2412 |
| }, |
| { |
| "epoch": 5.50952018278751, |
| "grad_norm": 1.3076543807983398, |
| "learning_rate": 8.424763840796823e-08, |
| "loss": 1.1008, |
| "step": 2413 |
| }, |
| { |
| "epoch": 5.5118050266565115, |
| "grad_norm": 1.4247517585754395, |
| "learning_rate": 8.344787421847216e-08, |
| "loss": 1.066, |
| "step": 2414 |
| }, |
| { |
| "epoch": 5.514089870525514, |
| "grad_norm": 1.3126006126403809, |
| "learning_rate": 8.26518598081455e-08, |
| "loss": 1.0893, |
| "step": 2415 |
| }, |
| { |
| "epoch": 5.516374714394516, |
| "grad_norm": 1.2457414865493774, |
| "learning_rate": 8.185959641216878e-08, |
| "loss": 1.0683, |
| "step": 2416 |
| }, |
| { |
| "epoch": 5.5186595582635185, |
| "grad_norm": 1.3255144357681274, |
| "learning_rate": 8.107108525990281e-08, |
| "loss": 1.113, |
| "step": 2417 |
| }, |
| { |
| "epoch": 5.520944402132521, |
| "grad_norm": 1.3008490800857544, |
| "learning_rate": 8.028632757488469e-08, |
| "loss": 1.0738, |
| "step": 2418 |
| }, |
| { |
| "epoch": 5.523229246001523, |
| "grad_norm": 1.2679911851882935, |
| "learning_rate": 7.950532457482785e-08, |
| "loss": 1.0643, |
| "step": 2419 |
| }, |
| { |
| "epoch": 5.5255140898705255, |
| "grad_norm": 1.2714024782180786, |
| "learning_rate": 7.872807747162009e-08, |
| "loss": 1.0638, |
| "step": 2420 |
| }, |
| { |
| "epoch": 5.527798933739528, |
| "grad_norm": 1.3037550449371338, |
| "learning_rate": 7.795458747132028e-08, |
| "loss": 1.1542, |
| "step": 2421 |
| }, |
| { |
| "epoch": 5.53008377760853, |
| "grad_norm": 1.2944037914276123, |
| "learning_rate": 7.71848557741578e-08, |
| "loss": 1.0635, |
| "step": 2422 |
| }, |
| { |
| "epoch": 5.5323686214775325, |
| "grad_norm": 1.2680352926254272, |
| "learning_rate": 7.641888357452998e-08, |
| "loss": 1.085, |
| "step": 2423 |
| }, |
| { |
| "epoch": 5.534653465346535, |
| "grad_norm": 1.3408043384552002, |
| "learning_rate": 7.565667206100108e-08, |
| "loss": 1.093, |
| "step": 2424 |
| }, |
| { |
| "epoch": 5.536938309215537, |
| "grad_norm": 1.340598702430725, |
| "learning_rate": 7.48982224162989e-08, |
| "loss": 1.1008, |
| "step": 2425 |
| }, |
| { |
| "epoch": 5.5392231530845395, |
| "grad_norm": 1.3032371997833252, |
| "learning_rate": 7.414353581731536e-08, |
| "loss": 1.0971, |
| "step": 2426 |
| }, |
| { |
| "epoch": 5.541507996953541, |
| "grad_norm": 1.2875107526779175, |
| "learning_rate": 7.339261343510207e-08, |
| "loss": 1.0771, |
| "step": 2427 |
| }, |
| { |
| "epoch": 5.543792840822544, |
| "grad_norm": 1.2665351629257202, |
| "learning_rate": 7.264545643486997e-08, |
| "loss": 1.0736, |
| "step": 2428 |
| }, |
| { |
| "epoch": 5.5460776846915465, |
| "grad_norm": 1.3342937231063843, |
| "learning_rate": 7.190206597598726e-08, |
| "loss": 1.0984, |
| "step": 2429 |
| }, |
| { |
| "epoch": 5.548362528560548, |
| "grad_norm": 1.290933609008789, |
| "learning_rate": 7.116244321197729e-08, |
| "loss": 1.0919, |
| "step": 2430 |
| }, |
| { |
| "epoch": 5.550647372429551, |
| "grad_norm": 1.2763770818710327, |
| "learning_rate": 7.042658929051816e-08, |
| "loss": 1.0922, |
| "step": 2431 |
| }, |
| { |
| "epoch": 5.552932216298553, |
| "grad_norm": 1.307851791381836, |
| "learning_rate": 6.969450535343841e-08, |
| "loss": 1.1177, |
| "step": 2432 |
| }, |
| { |
| "epoch": 5.555217060167555, |
| "grad_norm": 1.2933450937271118, |
| "learning_rate": 6.896619253671743e-08, |
| "loss": 1.0963, |
| "step": 2433 |
| }, |
| { |
| "epoch": 5.557501904036558, |
| "grad_norm": 1.2725058794021606, |
| "learning_rate": 6.824165197048316e-08, |
| "loss": 1.0783, |
| "step": 2434 |
| }, |
| { |
| "epoch": 5.55978674790556, |
| "grad_norm": 1.2997236251831055, |
| "learning_rate": 6.752088477900903e-08, |
| "loss": 1.0688, |
| "step": 2435 |
| }, |
| { |
| "epoch": 5.562071591774562, |
| "grad_norm": 1.2684441804885864, |
| "learning_rate": 6.680389208071459e-08, |
| "loss": 1.1119, |
| "step": 2436 |
| }, |
| { |
| "epoch": 5.564356435643564, |
| "grad_norm": 1.2946056127548218, |
| "learning_rate": 6.609067498816207e-08, |
| "loss": 1.0673, |
| "step": 2437 |
| }, |
| { |
| "epoch": 5.566641279512567, |
| "grad_norm": 1.2829228639602661, |
| "learning_rate": 6.538123460805428e-08, |
| "loss": 1.0584, |
| "step": 2438 |
| }, |
| { |
| "epoch": 5.568926123381569, |
| "grad_norm": 1.2506768703460693, |
| "learning_rate": 6.467557204123508e-08, |
| "loss": 1.1196, |
| "step": 2439 |
| }, |
| { |
| "epoch": 5.571210967250571, |
| "grad_norm": 1.3308897018432617, |
| "learning_rate": 6.397368838268497e-08, |
| "loss": 1.105, |
| "step": 2440 |
| }, |
| { |
| "epoch": 5.573495811119574, |
| "grad_norm": 1.247656226158142, |
| "learning_rate": 6.327558472152134e-08, |
| "loss": 1.0863, |
| "step": 2441 |
| }, |
| { |
| "epoch": 5.575780654988575, |
| "grad_norm": 1.3870344161987305, |
| "learning_rate": 6.25812621409963e-08, |
| "loss": 1.127, |
| "step": 2442 |
| }, |
| { |
| "epoch": 5.578065498857578, |
| "grad_norm": 1.300175428390503, |
| "learning_rate": 6.189072171849414e-08, |
| "loss": 1.1026, |
| "step": 2443 |
| }, |
| { |
| "epoch": 5.580350342726581, |
| "grad_norm": 1.2674915790557861, |
| "learning_rate": 6.120396452553162e-08, |
| "loss": 1.1207, |
| "step": 2444 |
| }, |
| { |
| "epoch": 5.582635186595582, |
| "grad_norm": 1.3113480806350708, |
| "learning_rate": 6.052099162775327e-08, |
| "loss": 1.0931, |
| "step": 2445 |
| }, |
| { |
| "epoch": 5.584920030464585, |
| "grad_norm": 1.277754783630371, |
| "learning_rate": 5.984180408493273e-08, |
| "loss": 1.0537, |
| "step": 2446 |
| }, |
| { |
| "epoch": 5.587204874333588, |
| "grad_norm": 1.3144258260726929, |
| "learning_rate": 5.9166402950970035e-08, |
| "loss": 1.066, |
| "step": 2447 |
| }, |
| { |
| "epoch": 5.589489718202589, |
| "grad_norm": 1.2873685359954834, |
| "learning_rate": 5.8494789273888796e-08, |
| "loss": 1.0881, |
| "step": 2448 |
| }, |
| { |
| "epoch": 5.591774562071592, |
| "grad_norm": 1.2995277643203735, |
| "learning_rate": 5.782696409583649e-08, |
| "loss": 1.1082, |
| "step": 2449 |
| }, |
| { |
| "epoch": 5.594059405940594, |
| "grad_norm": 1.2935672998428345, |
| "learning_rate": 5.716292845308169e-08, |
| "loss": 1.1162, |
| "step": 2450 |
| }, |
| { |
| "epoch": 5.596344249809596, |
| "grad_norm": 1.2738548517227173, |
| "learning_rate": 5.650268337601239e-08, |
| "loss": 1.074, |
| "step": 2451 |
| }, |
| { |
| "epoch": 5.598629093678599, |
| "grad_norm": 1.2765862941741943, |
| "learning_rate": 5.584622988913546e-08, |
| "loss": 1.0489, |
| "step": 2452 |
| }, |
| { |
| "epoch": 5.600913937547601, |
| "grad_norm": 1.3241945505142212, |
| "learning_rate": 5.519356901107359e-08, |
| "loss": 1.0913, |
| "step": 2453 |
| }, |
| { |
| "epoch": 5.603198781416603, |
| "grad_norm": 1.292481780052185, |
| "learning_rate": 5.454470175456472e-08, |
| "loss": 1.0842, |
| "step": 2454 |
| }, |
| { |
| "epoch": 5.605483625285606, |
| "grad_norm": 1.2790048122406006, |
| "learning_rate": 5.3899629126460686e-08, |
| "loss": 1.0938, |
| "step": 2455 |
| }, |
| { |
| "epoch": 5.607768469154608, |
| "grad_norm": 1.2987381219863892, |
| "learning_rate": 5.32583521277244e-08, |
| "loss": 1.0684, |
| "step": 2456 |
| }, |
| { |
| "epoch": 5.61005331302361, |
| "grad_norm": 1.2727071046829224, |
| "learning_rate": 5.2620871753429335e-08, |
| "loss": 1.0597, |
| "step": 2457 |
| }, |
| { |
| "epoch": 5.612338156892612, |
| "grad_norm": 1.2885806560516357, |
| "learning_rate": 5.1987188992758396e-08, |
| "loss": 1.0773, |
| "step": 2458 |
| }, |
| { |
| "epoch": 5.614623000761615, |
| "grad_norm": 1.3235818147659302, |
| "learning_rate": 5.135730482900059e-08, |
| "loss": 1.0766, |
| "step": 2459 |
| }, |
| { |
| "epoch": 5.616907844630617, |
| "grad_norm": 1.3038285970687866, |
| "learning_rate": 5.073122023955101e-08, |
| "loss": 1.0565, |
| "step": 2460 |
| }, |
| { |
| "epoch": 5.619192688499619, |
| "grad_norm": 1.3045719861984253, |
| "learning_rate": 5.010893619590951e-08, |
| "loss": 1.1003, |
| "step": 2461 |
| }, |
| { |
| "epoch": 5.621477532368622, |
| "grad_norm": 1.3066750764846802, |
| "learning_rate": 4.949045366367783e-08, |
| "loss": 1.0772, |
| "step": 2462 |
| }, |
| { |
| "epoch": 5.623762376237623, |
| "grad_norm": 1.2741373777389526, |
| "learning_rate": 4.8875773602559404e-08, |
| "loss": 1.071, |
| "step": 2463 |
| }, |
| { |
| "epoch": 5.626047220106626, |
| "grad_norm": 1.2700215578079224, |
| "learning_rate": 4.8264896966357386e-08, |
| "loss": 1.1054, |
| "step": 2464 |
| }, |
| { |
| "epoch": 5.628332063975629, |
| "grad_norm": 1.3035249710083008, |
| "learning_rate": 4.7657824702972144e-08, |
| "loss": 1.1259, |
| "step": 2465 |
| }, |
| { |
| "epoch": 5.63061690784463, |
| "grad_norm": 1.3194633722305298, |
| "learning_rate": 4.705455775440237e-08, |
| "loss": 1.0753, |
| "step": 2466 |
| }, |
| { |
| "epoch": 5.632901751713633, |
| "grad_norm": 1.31345534324646, |
| "learning_rate": 4.645509705674095e-08, |
| "loss": 1.0838, |
| "step": 2467 |
| }, |
| { |
| "epoch": 5.635186595582635, |
| "grad_norm": 1.3296149969100952, |
| "learning_rate": 4.585944354017435e-08, |
| "loss": 1.0917, |
| "step": 2468 |
| }, |
| { |
| "epoch": 5.637471439451637, |
| "grad_norm": 1.316845417022705, |
| "learning_rate": 4.526759812898268e-08, |
| "loss": 1.0999, |
| "step": 2469 |
| }, |
| { |
| "epoch": 5.63975628332064, |
| "grad_norm": 1.2568460702896118, |
| "learning_rate": 4.46795617415352e-08, |
| "loss": 1.0767, |
| "step": 2470 |
| }, |
| { |
| "epoch": 5.642041127189642, |
| "grad_norm": 1.2605866193771362, |
| "learning_rate": 4.4095335290292865e-08, |
| "loss": 1.0988, |
| "step": 2471 |
| }, |
| { |
| "epoch": 5.644325971058644, |
| "grad_norm": 1.3232251405715942, |
| "learning_rate": 4.3514919681802714e-08, |
| "loss": 1.0881, |
| "step": 2472 |
| }, |
| { |
| "epoch": 5.646610814927646, |
| "grad_norm": 1.2982079982757568, |
| "learning_rate": 4.293831581669933e-08, |
| "loss": 1.0808, |
| "step": 2473 |
| }, |
| { |
| "epoch": 5.648895658796649, |
| "grad_norm": 1.2938297986984253, |
| "learning_rate": 4.236552458970311e-08, |
| "loss": 1.0641, |
| "step": 2474 |
| }, |
| { |
| "epoch": 5.651180502665651, |
| "grad_norm": 1.2853021621704102, |
| "learning_rate": 4.1796546889617265e-08, |
| "loss": 1.0893, |
| "step": 2475 |
| }, |
| { |
| "epoch": 5.653465346534653, |
| "grad_norm": 1.3274376392364502, |
| "learning_rate": 4.123138359932805e-08, |
| "loss": 1.1138, |
| "step": 2476 |
| }, |
| { |
| "epoch": 5.655750190403656, |
| "grad_norm": 1.296823501586914, |
| "learning_rate": 4.0670035595803684e-08, |
| "loss": 1.0647, |
| "step": 2477 |
| }, |
| { |
| "epoch": 5.658035034272658, |
| "grad_norm": 1.3049858808517456, |
| "learning_rate": 4.011250375009018e-08, |
| "loss": 1.0294, |
| "step": 2478 |
| }, |
| { |
| "epoch": 5.66031987814166, |
| "grad_norm": 1.2485992908477783, |
| "learning_rate": 3.955878892731441e-08, |
| "loss": 1.101, |
| "step": 2479 |
| }, |
| { |
| "epoch": 5.662604722010663, |
| "grad_norm": 1.3331776857376099, |
| "learning_rate": 3.900889198667851e-08, |
| "loss": 1.1112, |
| "step": 2480 |
| }, |
| { |
| "epoch": 5.6648895658796645, |
| "grad_norm": 1.3384984731674194, |
| "learning_rate": 3.846281378146105e-08, |
| "loss": 1.0834, |
| "step": 2481 |
| }, |
| { |
| "epoch": 5.667174409748667, |
| "grad_norm": 1.2879012823104858, |
| "learning_rate": 3.792055515901533e-08, |
| "loss": 1.0664, |
| "step": 2482 |
| }, |
| { |
| "epoch": 5.66945925361767, |
| "grad_norm": 1.2924339771270752, |
| "learning_rate": 3.738211696076716e-08, |
| "loss": 1.0791, |
| "step": 2483 |
| }, |
| { |
| "epoch": 5.6717440974866715, |
| "grad_norm": 1.2877463102340698, |
| "learning_rate": 3.6847500022214597e-08, |
| "loss": 1.0931, |
| "step": 2484 |
| }, |
| { |
| "epoch": 5.674028941355674, |
| "grad_norm": 1.2863349914550781, |
| "learning_rate": 3.631670517292629e-08, |
| "loss": 1.0637, |
| "step": 2485 |
| }, |
| { |
| "epoch": 5.676313785224677, |
| "grad_norm": 1.2646994590759277, |
| "learning_rate": 3.5789733236539505e-08, |
| "loss": 1.0603, |
| "step": 2486 |
| }, |
| { |
| "epoch": 5.6785986290936785, |
| "grad_norm": 1.2881466150283813, |
| "learning_rate": 3.5266585030760416e-08, |
| "loss": 1.1304, |
| "step": 2487 |
| }, |
| { |
| "epoch": 5.680883472962681, |
| "grad_norm": 1.2763415575027466, |
| "learning_rate": 3.474726136736106e-08, |
| "loss": 1.0914, |
| "step": 2488 |
| }, |
| { |
| "epoch": 5.683168316831683, |
| "grad_norm": 1.2673840522766113, |
| "learning_rate": 3.423176305217907e-08, |
| "loss": 1.0831, |
| "step": 2489 |
| }, |
| { |
| "epoch": 5.6854531607006855, |
| "grad_norm": 1.2843284606933594, |
| "learning_rate": 3.372009088511707e-08, |
| "loss": 1.0711, |
| "step": 2490 |
| }, |
| { |
| "epoch": 5.687738004569688, |
| "grad_norm": 1.3351730108261108, |
| "learning_rate": 3.3212245660139695e-08, |
| "loss": 1.1146, |
| "step": 2491 |
| }, |
| { |
| "epoch": 5.69002284843869, |
| "grad_norm": 1.3806109428405762, |
| "learning_rate": 3.270822816527325e-08, |
| "loss": 1.0438, |
| "step": 2492 |
| }, |
| { |
| "epoch": 5.6923076923076925, |
| "grad_norm": 1.3130910396575928, |
| "learning_rate": 3.2208039182605456e-08, |
| "loss": 1.1089, |
| "step": 2493 |
| }, |
| { |
| "epoch": 5.694592536176694, |
| "grad_norm": 1.2872965335845947, |
| "learning_rate": 3.1711679488282135e-08, |
| "loss": 1.1009, |
| "step": 2494 |
| }, |
| { |
| "epoch": 5.696877380045697, |
| "grad_norm": 1.2836264371871948, |
| "learning_rate": 3.121914985250801e-08, |
| "loss": 1.0712, |
| "step": 2495 |
| }, |
| { |
| "epoch": 5.6991622239146995, |
| "grad_norm": 1.3084354400634766, |
| "learning_rate": 3.0730451039544527e-08, |
| "loss": 1.0736, |
| "step": 2496 |
| }, |
| { |
| "epoch": 5.701447067783701, |
| "grad_norm": 1.3039108514785767, |
| "learning_rate": 3.0245583807708425e-08, |
| "loss": 1.1067, |
| "step": 2497 |
| }, |
| { |
| "epoch": 5.703731911652704, |
| "grad_norm": 1.291584849357605, |
| "learning_rate": 2.976454890937175e-08, |
| "loss": 1.1144, |
| "step": 2498 |
| }, |
| { |
| "epoch": 5.706016755521706, |
| "grad_norm": 1.3033367395401, |
| "learning_rate": 2.9287347090958816e-08, |
| "loss": 1.0828, |
| "step": 2499 |
| }, |
| { |
| "epoch": 5.708301599390708, |
| "grad_norm": 1.2630605697631836, |
| "learning_rate": 2.8813979092947032e-08, |
| "loss": 1.0922, |
| "step": 2500 |
| }, |
| { |
| "epoch": 5.710586443259711, |
| "grad_norm": 1.3124958276748657, |
| "learning_rate": 2.834444564986438e-08, |
| "loss": 1.0406, |
| "step": 2501 |
| }, |
| { |
| "epoch": 5.712871287128713, |
| "grad_norm": 1.2817994356155396, |
| "learning_rate": 2.7878747490288627e-08, |
| "loss": 1.0972, |
| "step": 2502 |
| }, |
| { |
| "epoch": 5.715156130997715, |
| "grad_norm": 1.2732033729553223, |
| "learning_rate": 2.7416885336847278e-08, |
| "loss": 1.0886, |
| "step": 2503 |
| }, |
| { |
| "epoch": 5.717440974866717, |
| "grad_norm": 1.3142942190170288, |
| "learning_rate": 2.6958859906213996e-08, |
| "loss": 1.0905, |
| "step": 2504 |
| }, |
| { |
| "epoch": 5.71972581873572, |
| "grad_norm": 1.2737411260604858, |
| "learning_rate": 2.6504671909109993e-08, |
| "loss": 1.0746, |
| "step": 2505 |
| }, |
| { |
| "epoch": 5.722010662604722, |
| "grad_norm": 1.3381272554397583, |
| "learning_rate": 2.6054322050301782e-08, |
| "loss": 1.0316, |
| "step": 2506 |
| }, |
| { |
| "epoch": 5.724295506473724, |
| "grad_norm": 1.3035916090011597, |
| "learning_rate": 2.560781102859983e-08, |
| "loss": 1.1255, |
| "step": 2507 |
| }, |
| { |
| "epoch": 5.726580350342727, |
| "grad_norm": 1.3344190120697021, |
| "learning_rate": 2.516513953685823e-08, |
| "loss": 1.08, |
| "step": 2508 |
| }, |
| { |
| "epoch": 5.728865194211729, |
| "grad_norm": 1.2997770309448242, |
| "learning_rate": 2.4726308261973366e-08, |
| "loss": 1.0356, |
| "step": 2509 |
| }, |
| { |
| "epoch": 5.731150038080731, |
| "grad_norm": 1.3305389881134033, |
| "learning_rate": 2.4291317884882205e-08, |
| "loss": 1.0781, |
| "step": 2510 |
| }, |
| { |
| "epoch": 5.733434881949734, |
| "grad_norm": 1.3318109512329102, |
| "learning_rate": 2.3860169080562046e-08, |
| "loss": 1.0829, |
| "step": 2511 |
| }, |
| { |
| "epoch": 5.735719725818735, |
| "grad_norm": 1.2981702089309692, |
| "learning_rate": 2.3432862518029397e-08, |
| "loss": 1.0837, |
| "step": 2512 |
| }, |
| { |
| "epoch": 5.738004569687738, |
| "grad_norm": 1.3197274208068848, |
| "learning_rate": 2.3009398860338315e-08, |
| "loss": 1.1062, |
| "step": 2513 |
| }, |
| { |
| "epoch": 5.740289413556741, |
| "grad_norm": 1.2988827228546143, |
| "learning_rate": 2.2589778764580128e-08, |
| "loss": 1.0966, |
| "step": 2514 |
| }, |
| { |
| "epoch": 5.742574257425742, |
| "grad_norm": 1.2950987815856934, |
| "learning_rate": 2.217400288188204e-08, |
| "loss": 1.0548, |
| "step": 2515 |
| }, |
| { |
| "epoch": 5.744859101294745, |
| "grad_norm": 1.3110990524291992, |
| "learning_rate": 2.176207185740603e-08, |
| "loss": 1.0633, |
| "step": 2516 |
| }, |
| { |
| "epoch": 5.747143945163748, |
| "grad_norm": 1.2705743312835693, |
| "learning_rate": 2.135398633034802e-08, |
| "loss": 1.0739, |
| "step": 2517 |
| }, |
| { |
| "epoch": 5.749428789032749, |
| "grad_norm": 1.2896722555160522, |
| "learning_rate": 2.094974693393731e-08, |
| "loss": 1.1057, |
| "step": 2518 |
| }, |
| { |
| "epoch": 5.751713632901752, |
| "grad_norm": 1.2686283588409424, |
| "learning_rate": 2.054935429543409e-08, |
| "loss": 1.0857, |
| "step": 2519 |
| }, |
| { |
| "epoch": 5.753998476770754, |
| "grad_norm": 1.2696059942245483, |
| "learning_rate": 2.0152809036130538e-08, |
| "loss": 1.0561, |
| "step": 2520 |
| }, |
| { |
| "epoch": 5.756283320639756, |
| "grad_norm": 1.2757072448730469, |
| "learning_rate": 1.9760111771348345e-08, |
| "loss": 1.0842, |
| "step": 2521 |
| }, |
| { |
| "epoch": 5.758568164508759, |
| "grad_norm": 1.275511384010315, |
| "learning_rate": 1.937126311043813e-08, |
| "loss": 1.1007, |
| "step": 2522 |
| }, |
| { |
| "epoch": 5.760853008377761, |
| "grad_norm": 1.3018161058425903, |
| "learning_rate": 1.8986263656779193e-08, |
| "loss": 1.0786, |
| "step": 2523 |
| }, |
| { |
| "epoch": 5.763137852246763, |
| "grad_norm": 1.3318055868148804, |
| "learning_rate": 1.8605114007777258e-08, |
| "loss": 1.0835, |
| "step": 2524 |
| }, |
| { |
| "epoch": 5.765422696115765, |
| "grad_norm": 1.3084681034088135, |
| "learning_rate": 1.822781475486507e-08, |
| "loss": 1.0443, |
| "step": 2525 |
| }, |
| { |
| "epoch": 5.767707539984768, |
| "grad_norm": 1.2735244035720825, |
| "learning_rate": 1.7854366483499863e-08, |
| "loss": 1.1164, |
| "step": 2526 |
| }, |
| { |
| "epoch": 5.76999238385377, |
| "grad_norm": 1.2862602472305298, |
| "learning_rate": 1.7484769773163657e-08, |
| "loss": 1.0733, |
| "step": 2527 |
| }, |
| { |
| "epoch": 5.772277227722772, |
| "grad_norm": 1.3294838666915894, |
| "learning_rate": 1.7119025197362416e-08, |
| "loss": 1.0981, |
| "step": 2528 |
| }, |
| { |
| "epoch": 5.774562071591775, |
| "grad_norm": 1.3161261081695557, |
| "learning_rate": 1.6757133323623832e-08, |
| "loss": 1.1003, |
| "step": 2529 |
| }, |
| { |
| "epoch": 5.776846915460776, |
| "grad_norm": 1.2910784482955933, |
| "learning_rate": 1.6399094713498154e-08, |
| "loss": 1.0992, |
| "step": 2530 |
| }, |
| { |
| "epoch": 5.779131759329779, |
| "grad_norm": 1.3026399612426758, |
| "learning_rate": 1.6044909922555973e-08, |
| "loss": 1.095, |
| "step": 2531 |
| }, |
| { |
| "epoch": 5.781416603198782, |
| "grad_norm": 1.3220399618148804, |
| "learning_rate": 1.569457950038794e-08, |
| "loss": 1.1457, |
| "step": 2532 |
| }, |
| { |
| "epoch": 5.783701447067783, |
| "grad_norm": 1.3009707927703857, |
| "learning_rate": 1.5348103990604214e-08, |
| "loss": 1.0756, |
| "step": 2533 |
| }, |
| { |
| "epoch": 5.785986290936786, |
| "grad_norm": 1.2853416204452515, |
| "learning_rate": 1.5005483930833066e-08, |
| "loss": 1.0826, |
| "step": 2534 |
| }, |
| { |
| "epoch": 5.788271134805788, |
| "grad_norm": 1.2637382745742798, |
| "learning_rate": 1.4666719852719779e-08, |
| "loss": 1.0563, |
| "step": 2535 |
| }, |
| { |
| "epoch": 5.79055597867479, |
| "grad_norm": 1.267993450164795, |
| "learning_rate": 1.4331812281927204e-08, |
| "loss": 1.0781, |
| "step": 2536 |
| }, |
| { |
| "epoch": 5.792840822543793, |
| "grad_norm": 1.2659962177276611, |
| "learning_rate": 1.4000761738133528e-08, |
| "loss": 1.0997, |
| "step": 2537 |
| }, |
| { |
| "epoch": 5.795125666412795, |
| "grad_norm": 1.311763882637024, |
| "learning_rate": 1.367356873503145e-08, |
| "loss": 1.0806, |
| "step": 2538 |
| }, |
| { |
| "epoch": 5.797410510281797, |
| "grad_norm": 1.2864328622817993, |
| "learning_rate": 1.3350233780329014e-08, |
| "loss": 1.0602, |
| "step": 2539 |
| }, |
| { |
| "epoch": 5.7996953541508, |
| "grad_norm": 1.2898355722427368, |
| "learning_rate": 1.303075737574655e-08, |
| "loss": 1.0884, |
| "step": 2540 |
| }, |
| { |
| "epoch": 5.801980198019802, |
| "grad_norm": 1.2957934141159058, |
| "learning_rate": 1.2715140017018346e-08, |
| "loss": 1.0537, |
| "step": 2541 |
| }, |
| { |
| "epoch": 5.804265041888804, |
| "grad_norm": 1.2888224124908447, |
| "learning_rate": 1.2403382193889036e-08, |
| "loss": 1.0742, |
| "step": 2542 |
| }, |
| { |
| "epoch": 5.806549885757806, |
| "grad_norm": 1.2837013006210327, |
| "learning_rate": 1.2095484390115541e-08, |
| "loss": 1.0882, |
| "step": 2543 |
| }, |
| { |
| "epoch": 5.808834729626809, |
| "grad_norm": 1.3324216604232788, |
| "learning_rate": 1.1791447083465136e-08, |
| "loss": 1.1201, |
| "step": 2544 |
| }, |
| { |
| "epoch": 5.811119573495811, |
| "grad_norm": 1.2848390340805054, |
| "learning_rate": 1.1491270745714044e-08, |
| "loss": 1.0827, |
| "step": 2545 |
| }, |
| { |
| "epoch": 5.813404417364813, |
| "grad_norm": 1.2867978811264038, |
| "learning_rate": 1.1194955842647736e-08, |
| "loss": 1.0474, |
| "step": 2546 |
| }, |
| { |
| "epoch": 5.815689261233816, |
| "grad_norm": 1.2755886316299438, |
| "learning_rate": 1.090250283406008e-08, |
| "loss": 1.083, |
| "step": 2547 |
| }, |
| { |
| "epoch": 5.817974105102818, |
| "grad_norm": 1.3513338565826416, |
| "learning_rate": 1.0613912173752239e-08, |
| "loss": 1.1062, |
| "step": 2548 |
| }, |
| { |
| "epoch": 5.82025894897182, |
| "grad_norm": 1.2771309614181519, |
| "learning_rate": 1.0329184309532114e-08, |
| "loss": 1.1171, |
| "step": 2549 |
| }, |
| { |
| "epoch": 5.822543792840823, |
| "grad_norm": 1.28047776222229, |
| "learning_rate": 1.0048319683213792e-08, |
| "loss": 1.0738, |
| "step": 2550 |
| }, |
| { |
| "epoch": 5.8248286367098245, |
| "grad_norm": 1.3358758687973022, |
| "learning_rate": 9.771318730616708e-09, |
| "loss": 1.1125, |
| "step": 2551 |
| }, |
| { |
| "epoch": 5.827113480578827, |
| "grad_norm": 1.2923208475112915, |
| "learning_rate": 9.498181881564816e-09, |
| "loss": 1.0802, |
| "step": 2552 |
| }, |
| { |
| "epoch": 5.82939832444783, |
| "grad_norm": 1.274404525756836, |
| "learning_rate": 9.22890955988659e-09, |
| "loss": 1.0557, |
| "step": 2553 |
| }, |
| { |
| "epoch": 5.8316831683168315, |
| "grad_norm": 1.318577527999878, |
| "learning_rate": 8.963502183413353e-09, |
| "loss": 1.0755, |
| "step": 2554 |
| }, |
| { |
| "epoch": 5.833968012185834, |
| "grad_norm": 1.2825225591659546, |
| "learning_rate": 8.701960163979283e-09, |
| "loss": 1.1189, |
| "step": 2555 |
| }, |
| { |
| "epoch": 5.836252856054836, |
| "grad_norm": 1.2995526790618896, |
| "learning_rate": 8.444283907421136e-09, |
| "loss": 1.1139, |
| "step": 2556 |
| }, |
| { |
| "epoch": 5.8385376999238385, |
| "grad_norm": 1.3120074272155762, |
| "learning_rate": 8.190473813576571e-09, |
| "loss": 1.093, |
| "step": 2557 |
| }, |
| { |
| "epoch": 5.840822543792841, |
| "grad_norm": 1.2919102907180786, |
| "learning_rate": 7.940530276284163e-09, |
| "loss": 1.0544, |
| "step": 2558 |
| }, |
| { |
| "epoch": 5.843107387661843, |
| "grad_norm": 1.2919046878814697, |
| "learning_rate": 7.694453683383396e-09, |
| "loss": 1.0666, |
| "step": 2559 |
| }, |
| { |
| "epoch": 5.8453922315308455, |
| "grad_norm": 1.2774494886398315, |
| "learning_rate": 7.452244416712162e-09, |
| "loss": 1.0835, |
| "step": 2560 |
| }, |
| { |
| "epoch": 5.847677075399847, |
| "grad_norm": 1.2975057363510132, |
| "learning_rate": 7.2139028521087114e-09, |
| "loss": 1.0652, |
| "step": 2561 |
| }, |
| { |
| "epoch": 5.84996191926885, |
| "grad_norm": 1.3106671571731567, |
| "learning_rate": 6.979429359408874e-09, |
| "loss": 1.0754, |
| "step": 2562 |
| }, |
| { |
| "epoch": 5.8522467631378525, |
| "grad_norm": 1.2943204641342163, |
| "learning_rate": 6.748824302446611e-09, |
| "loss": 1.0634, |
| "step": 2563 |
| }, |
| { |
| "epoch": 5.854531607006854, |
| "grad_norm": 1.2920920848846436, |
| "learning_rate": 6.522088039053187e-09, |
| "loss": 1.1051, |
| "step": 2564 |
| }, |
| { |
| "epoch": 5.856816450875857, |
| "grad_norm": 1.2778362035751343, |
| "learning_rate": 6.2992209210571695e-09, |
| "loss": 1.0863, |
| "step": 2565 |
| }, |
| { |
| "epoch": 5.859101294744859, |
| "grad_norm": 1.2808789014816284, |
| "learning_rate": 6.0802232942822056e-09, |
| "loss": 1.0971, |
| "step": 2566 |
| }, |
| { |
| "epoch": 5.861386138613861, |
| "grad_norm": 1.2962878942489624, |
| "learning_rate": 5.86509549854869e-09, |
| "loss": 1.1123, |
| "step": 2567 |
| }, |
| { |
| "epoch": 5.863670982482864, |
| "grad_norm": 1.2766149044036865, |
| "learning_rate": 5.653837867671819e-09, |
| "loss": 1.0895, |
| "step": 2568 |
| }, |
| { |
| "epoch": 5.865955826351866, |
| "grad_norm": 1.3137177228927612, |
| "learning_rate": 5.4464507294613165e-09, |
| "loss": 1.0533, |
| "step": 2569 |
| }, |
| { |
| "epoch": 5.868240670220868, |
| "grad_norm": 1.3188068866729736, |
| "learning_rate": 5.242934405720879e-09, |
| "loss": 1.0744, |
| "step": 2570 |
| }, |
| { |
| "epoch": 5.870525514089871, |
| "grad_norm": 1.277276635169983, |
| "learning_rate": 5.0432892122484476e-09, |
| "loss": 1.0666, |
| "step": 2571 |
| }, |
| { |
| "epoch": 5.872810357958873, |
| "grad_norm": 1.2813732624053955, |
| "learning_rate": 4.847515458834273e-09, |
| "loss": 1.0539, |
| "step": 2572 |
| }, |
| { |
| "epoch": 5.875095201827875, |
| "grad_norm": 1.2894470691680908, |
| "learning_rate": 4.655613449262298e-09, |
| "loss": 1.0949, |
| "step": 2573 |
| }, |
| { |
| "epoch": 5.877380045696877, |
| "grad_norm": 1.2572760581970215, |
| "learning_rate": 4.46758348130738e-09, |
| "loss": 1.0463, |
| "step": 2574 |
| }, |
| { |
| "epoch": 5.87966488956588, |
| "grad_norm": 1.3162351846694946, |
| "learning_rate": 4.283425846737521e-09, |
| "loss": 1.0896, |
| "step": 2575 |
| }, |
| { |
| "epoch": 5.881949733434882, |
| "grad_norm": 1.3233922719955444, |
| "learning_rate": 4.1031408313108035e-09, |
| "loss": 1.0681, |
| "step": 2576 |
| }, |
| { |
| "epoch": 5.884234577303884, |
| "grad_norm": 1.281401515007019, |
| "learning_rate": 3.926728714776784e-09, |
| "loss": 1.084, |
| "step": 2577 |
| }, |
| { |
| "epoch": 5.886519421172887, |
| "grad_norm": 1.2803399562835693, |
| "learning_rate": 3.754189770875383e-09, |
| "loss": 1.0735, |
| "step": 2578 |
| }, |
| { |
| "epoch": 5.888804265041889, |
| "grad_norm": 1.2727675437927246, |
| "learning_rate": 3.5855242673363267e-09, |
| "loss": 1.1046, |
| "step": 2579 |
| }, |
| { |
| "epoch": 5.891089108910891, |
| "grad_norm": 1.277085542678833, |
| "learning_rate": 3.420732465878596e-09, |
| "loss": 1.0548, |
| "step": 2580 |
| }, |
| { |
| "epoch": 5.893373952779894, |
| "grad_norm": 1.3056312799453735, |
| "learning_rate": 3.2598146222109773e-09, |
| "loss": 1.0506, |
| "step": 2581 |
| }, |
| { |
| "epoch": 5.895658796648895, |
| "grad_norm": 1.2820820808410645, |
| "learning_rate": 3.102770986030679e-09, |
| "loss": 1.0938, |
| "step": 2582 |
| }, |
| { |
| "epoch": 5.897943640517898, |
| "grad_norm": 1.3365559577941895, |
| "learning_rate": 2.9496018010233275e-09, |
| "loss": 1.1194, |
| "step": 2583 |
| }, |
| { |
| "epoch": 5.900228484386901, |
| "grad_norm": 1.2692946195602417, |
| "learning_rate": 2.8003073048621376e-09, |
| "loss": 1.0947, |
| "step": 2584 |
| }, |
| { |
| "epoch": 5.902513328255902, |
| "grad_norm": 1.3099660873413086, |
| "learning_rate": 2.6548877292090215e-09, |
| "loss": 1.0575, |
| "step": 2585 |
| }, |
| { |
| "epoch": 5.904798172124905, |
| "grad_norm": 1.2974201440811157, |
| "learning_rate": 2.5133432997118125e-09, |
| "loss": 1.0671, |
| "step": 2586 |
| }, |
| { |
| "epoch": 5.907083015993907, |
| "grad_norm": 1.2797954082489014, |
| "learning_rate": 2.3756742360062092e-09, |
| "loss": 1.0683, |
| "step": 2587 |
| }, |
| { |
| "epoch": 5.909367859862909, |
| "grad_norm": 1.284724473953247, |
| "learning_rate": 2.241880751714387e-09, |
| "loss": 1.0578, |
| "step": 2588 |
| }, |
| { |
| "epoch": 5.911652703731912, |
| "grad_norm": 1.2749953269958496, |
| "learning_rate": 2.1119630544438884e-09, |
| "loss": 1.0864, |
| "step": 2589 |
| }, |
| { |
| "epoch": 5.913937547600914, |
| "grad_norm": 1.2969084978103638, |
| "learning_rate": 1.9859213457892877e-09, |
| "loss": 1.1049, |
| "step": 2590 |
| }, |
| { |
| "epoch": 5.916222391469916, |
| "grad_norm": 1.273667573928833, |
| "learning_rate": 1.863755821330249e-09, |
| "loss": 1.072, |
| "step": 2591 |
| }, |
| { |
| "epoch": 5.918507235338918, |
| "grad_norm": 1.2989200353622437, |
| "learning_rate": 1.7454666706318014e-09, |
| "loss": 1.0686, |
| "step": 2592 |
| }, |
| { |
| "epoch": 5.920792079207921, |
| "grad_norm": 1.3151251077651978, |
| "learning_rate": 1.6310540772437877e-09, |
| "loss": 1.0944, |
| "step": 2593 |
| }, |
| { |
| "epoch": 5.923076923076923, |
| "grad_norm": 1.2775644063949585, |
| "learning_rate": 1.520518218701139e-09, |
| "loss": 1.0753, |
| "step": 2594 |
| }, |
| { |
| "epoch": 5.925361766945925, |
| "grad_norm": 1.2895221710205078, |
| "learning_rate": 1.4138592665230433e-09, |
| "loss": 1.0934, |
| "step": 2595 |
| }, |
| { |
| "epoch": 5.927646610814928, |
| "grad_norm": 1.3031322956085205, |
| "learning_rate": 1.3110773862126669e-09, |
| "loss": 1.0793, |
| "step": 2596 |
| }, |
| { |
| "epoch": 5.9299314546839295, |
| "grad_norm": 1.292992115020752, |
| "learning_rate": 1.2121727372574332e-09, |
| "loss": 1.0425, |
| "step": 2597 |
| }, |
| { |
| "epoch": 5.932216298552932, |
| "grad_norm": 1.2997634410858154, |
| "learning_rate": 1.117145473128467e-09, |
| "loss": 1.0975, |
| "step": 2598 |
| }, |
| { |
| "epoch": 5.934501142421935, |
| "grad_norm": 1.313364863395691, |
| "learning_rate": 1.0259957412800393e-09, |
| "loss": 1.064, |
| "step": 2599 |
| }, |
| { |
| "epoch": 5.9367859862909365, |
| "grad_norm": 1.288731336593628, |
| "learning_rate": 9.387236831495672e-10, |
| "loss": 1.0707, |
| "step": 2600 |
| }, |
| { |
| "epoch": 5.939070830159939, |
| "grad_norm": 1.3007681369781494, |
| "learning_rate": 8.553294341578921e-10, |
| "loss": 1.0797, |
| "step": 2601 |
| }, |
| { |
| "epoch": 5.941355674028942, |
| "grad_norm": 1.2979000806808472, |
| "learning_rate": 7.75813123708169e-10, |
| "loss": 1.0696, |
| "step": 2602 |
| }, |
| { |
| "epoch": 5.9436405178979435, |
| "grad_norm": 1.2918391227722168, |
| "learning_rate": 7.001748751866988e-10, |
| "loss": 1.1133, |
| "step": 2603 |
| }, |
| { |
| "epoch": 5.945925361766946, |
| "grad_norm": 1.3077112436294556, |
| "learning_rate": 6.284148059615413e-10, |
| "loss": 1.0668, |
| "step": 2604 |
| }, |
| { |
| "epoch": 5.948210205635948, |
| "grad_norm": 1.2914233207702637, |
| "learning_rate": 5.605330273836251e-10, |
| "loss": 1.0955, |
| "step": 2605 |
| }, |
| { |
| "epoch": 5.9504950495049505, |
| "grad_norm": 1.2764394283294678, |
| "learning_rate": 4.96529644785082e-10, |
| "loss": 1.0705, |
| "step": 2606 |
| }, |
| { |
| "epoch": 5.952779893373953, |
| "grad_norm": 1.3059388399124146, |
| "learning_rate": 4.364047574803576e-10, |
| "loss": 1.0532, |
| "step": 2607 |
| }, |
| { |
| "epoch": 5.955064737242955, |
| "grad_norm": 1.3395713567733765, |
| "learning_rate": 3.801584587659335e-10, |
| "loss": 1.0662, |
| "step": 2608 |
| }, |
| { |
| "epoch": 5.9573495811119574, |
| "grad_norm": 1.2883552312850952, |
| "learning_rate": 3.277908359194948e-10, |
| "loss": 1.0876, |
| "step": 2609 |
| }, |
| { |
| "epoch": 5.95963442498096, |
| "grad_norm": 1.2938013076782227, |
| "learning_rate": 2.7930197020020753e-10, |
| "loss": 1.0563, |
| "step": 2610 |
| }, |
| { |
| "epoch": 5.961919268849962, |
| "grad_norm": 1.2887256145477295, |
| "learning_rate": 2.3469193684844125e-10, |
| "loss": 1.1038, |
| "step": 2611 |
| }, |
| { |
| "epoch": 5.9642041127189644, |
| "grad_norm": 1.2994153499603271, |
| "learning_rate": 1.9396080508576887e-10, |
| "loss": 1.0705, |
| "step": 2612 |
| }, |
| { |
| "epoch": 5.966488956587966, |
| "grad_norm": 1.2864863872528076, |
| "learning_rate": 1.5710863811524424e-10, |
| "loss": 1.0907, |
| "step": 2613 |
| }, |
| { |
| "epoch": 5.968773800456969, |
| "grad_norm": 1.3223236799240112, |
| "learning_rate": 1.2413549312029204e-10, |
| "loss": 1.0387, |
| "step": 2614 |
| }, |
| { |
| "epoch": 5.971058644325971, |
| "grad_norm": 1.2479461431503296, |
| "learning_rate": 9.504142126581794e-11, |
| "loss": 1.0723, |
| "step": 2615 |
| }, |
| { |
| "epoch": 5.973343488194973, |
| "grad_norm": 1.2943627834320068, |
| "learning_rate": 6.982646769709833e-11, |
| "loss": 1.1221, |
| "step": 2616 |
| }, |
| { |
| "epoch": 5.975628332063976, |
| "grad_norm": 1.3019261360168457, |
| "learning_rate": 4.8490671540613045e-11, |
| "loss": 1.1253, |
| "step": 2617 |
| }, |
| { |
| "epoch": 5.9779131759329776, |
| "grad_norm": 1.3006083965301514, |
| "learning_rate": 3.103406590265756e-11, |
| "loss": 1.1149, |
| "step": 2618 |
| }, |
| { |
| "epoch": 5.98019801980198, |
| "grad_norm": 1.2991771697998047, |
| "learning_rate": 1.745667787128591e-11, |
| "loss": 1.0608, |
| "step": 2619 |
| }, |
| { |
| "epoch": 5.982482863670983, |
| "grad_norm": 1.355244517326355, |
| "learning_rate": 7.758528514645313e-12, |
| "loss": 1.0679, |
| "step": 2620 |
| }, |
| { |
| "epoch": 5.9847677075399845, |
| "grad_norm": 1.272252082824707, |
| "learning_rate": 1.9396328809762054e-12, |
| "loss": 1.0655, |
| "step": 2621 |
| }, |
| { |
| "epoch": 5.987052551408987, |
| "grad_norm": 1.2879427671432495, |
| "learning_rate": 0.0, |
| "loss": 1.1149, |
| "step": 2622 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 2622, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 6, |
| "save_steps": 437, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 6.618637151628165e+18, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|