| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.9961919268849961, |
| "eval_steps": 500, |
| "global_step": 874, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.002284843869002285, |
| "grad_norm": 3.668196201324463, |
| "learning_rate": 5.0000000000000004e-08, |
| "loss": 1.5687, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.00456968773800457, |
| "grad_norm": 3.6277146339416504, |
| "learning_rate": 1.0000000000000001e-07, |
| "loss": 1.5714, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.006854531607006854, |
| "grad_norm": 3.813422918319702, |
| "learning_rate": 1.5000000000000002e-07, |
| "loss": 1.58, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.00913937547600914, |
| "grad_norm": 3.4566409587860107, |
| "learning_rate": 2.0000000000000002e-07, |
| "loss": 1.5604, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.011424219345011425, |
| "grad_norm": 3.287661552429199, |
| "learning_rate": 2.5000000000000004e-07, |
| "loss": 1.5425, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.013709063214013708, |
| "grad_norm": 3.318340301513672, |
| "learning_rate": 3.0000000000000004e-07, |
| "loss": 1.5477, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.015993907083015995, |
| "grad_norm": 3.407221555709839, |
| "learning_rate": 3.5000000000000004e-07, |
| "loss": 1.5848, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.01827875095201828, |
| "grad_norm": 3.732999563217163, |
| "learning_rate": 4.0000000000000003e-07, |
| "loss": 1.5884, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.020563594821020565, |
| "grad_norm": 3.532766580581665, |
| "learning_rate": 4.5000000000000003e-07, |
| "loss": 1.5892, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.02284843869002285, |
| "grad_norm": 3.5676348209381104, |
| "learning_rate": 5.000000000000001e-07, |
| "loss": 1.5619, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.02513328255902513, |
| "grad_norm": 3.1015849113464355, |
| "learning_rate": 5.5e-07, |
| "loss": 1.5649, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.027418126428027417, |
| "grad_norm": 3.163240909576416, |
| "learning_rate": 6.000000000000001e-07, |
| "loss": 1.5807, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.0297029702970297, |
| "grad_norm": 2.894922971725464, |
| "learning_rate": 6.5e-07, |
| "loss": 1.5454, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.03198781416603199, |
| "grad_norm": 2.8211843967437744, |
| "learning_rate": 7.000000000000001e-07, |
| "loss": 1.5801, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.03427265803503427, |
| "grad_norm": 2.676609516143799, |
| "learning_rate": 7.5e-07, |
| "loss": 1.5446, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.03655750190403656, |
| "grad_norm": 2.6186320781707764, |
| "learning_rate": 8.000000000000001e-07, |
| "loss": 1.5443, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.03884234577303884, |
| "grad_norm": 2.460139513015747, |
| "learning_rate": 8.500000000000001e-07, |
| "loss": 1.5489, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.04112718964204113, |
| "grad_norm": 2.368126630783081, |
| "learning_rate": 9.000000000000001e-07, |
| "loss": 1.5317, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.04341203351104341, |
| "grad_norm": 2.244192123413086, |
| "learning_rate": 9.500000000000001e-07, |
| "loss": 1.4805, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.0456968773800457, |
| "grad_norm": 2.242701292037964, |
| "learning_rate": 1.0000000000000002e-06, |
| "loss": 1.5478, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.04798172124904798, |
| "grad_norm": 2.13895583152771, |
| "learning_rate": 1.0500000000000001e-06, |
| "loss": 1.5194, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.05026656511805026, |
| "grad_norm": 2.0152103900909424, |
| "learning_rate": 1.1e-06, |
| "loss": 1.5067, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.05255140898705255, |
| "grad_norm": 1.9156895875930786, |
| "learning_rate": 1.1500000000000002e-06, |
| "loss": 1.5145, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.05483625285605483, |
| "grad_norm": 1.7710504531860352, |
| "learning_rate": 1.2000000000000002e-06, |
| "loss": 1.5147, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.05712109672505712, |
| "grad_norm": 1.807431936264038, |
| "learning_rate": 1.25e-06, |
| "loss": 1.5357, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.0594059405940594, |
| "grad_norm": 1.6638832092285156, |
| "learning_rate": 1.3e-06, |
| "loss": 1.489, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.06169078446306169, |
| "grad_norm": 1.5708481073379517, |
| "learning_rate": 1.3500000000000002e-06, |
| "loss": 1.4768, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.06397562833206398, |
| "grad_norm": 1.615577220916748, |
| "learning_rate": 1.4000000000000001e-06, |
| "loss": 1.5159, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.06626047220106626, |
| "grad_norm": 1.5125129222869873, |
| "learning_rate": 1.45e-06, |
| "loss": 1.4972, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.06854531607006854, |
| "grad_norm": 1.479811668395996, |
| "learning_rate": 1.5e-06, |
| "loss": 1.4674, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.07083015993907082, |
| "grad_norm": 1.4502017498016357, |
| "learning_rate": 1.5500000000000002e-06, |
| "loss": 1.4811, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.07311500380807312, |
| "grad_norm": 1.3617135286331177, |
| "learning_rate": 1.6000000000000001e-06, |
| "loss": 1.4872, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.0753998476770754, |
| "grad_norm": 1.367607831954956, |
| "learning_rate": 1.6500000000000003e-06, |
| "loss": 1.4699, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.07768469154607768, |
| "grad_norm": 1.3374927043914795, |
| "learning_rate": 1.7000000000000002e-06, |
| "loss": 1.4659, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.07996953541507996, |
| "grad_norm": 1.354506254196167, |
| "learning_rate": 1.75e-06, |
| "loss": 1.4351, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.08225437928408226, |
| "grad_norm": 1.2532024383544922, |
| "learning_rate": 1.8000000000000001e-06, |
| "loss": 1.4358, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.08453922315308454, |
| "grad_norm": 1.2684043645858765, |
| "learning_rate": 1.85e-06, |
| "loss": 1.4534, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.08682406702208682, |
| "grad_norm": 1.2418140172958374, |
| "learning_rate": 1.9000000000000002e-06, |
| "loss": 1.4624, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.0891089108910891, |
| "grad_norm": 1.2266045808792114, |
| "learning_rate": 1.9500000000000004e-06, |
| "loss": 1.4282, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.0913937547600914, |
| "grad_norm": 1.180330753326416, |
| "learning_rate": 2.0000000000000003e-06, |
| "loss": 1.4107, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.09367859862909368, |
| "grad_norm": 1.1651424169540405, |
| "learning_rate": 2.05e-06, |
| "loss": 1.4041, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.09596344249809596, |
| "grad_norm": 1.181652307510376, |
| "learning_rate": 2.1000000000000002e-06, |
| "loss": 1.4558, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.09824828636709824, |
| "grad_norm": 1.2221183776855469, |
| "learning_rate": 2.15e-06, |
| "loss": 1.4449, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.10053313023610053, |
| "grad_norm": 1.085172414779663, |
| "learning_rate": 2.2e-06, |
| "loss": 1.4235, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.10281797410510282, |
| "grad_norm": 1.0497649908065796, |
| "learning_rate": 2.25e-06, |
| "loss": 1.3891, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.1051028179741051, |
| "grad_norm": 1.0502350330352783, |
| "learning_rate": 2.3000000000000004e-06, |
| "loss": 1.4048, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.10738766184310738, |
| "grad_norm": 1.0798920392990112, |
| "learning_rate": 2.35e-06, |
| "loss": 1.4383, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.10967250571210967, |
| "grad_norm": 1.067581057548523, |
| "learning_rate": 2.4000000000000003e-06, |
| "loss": 1.4128, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.11195734958111196, |
| "grad_norm": 1.062606930732727, |
| "learning_rate": 2.4500000000000003e-06, |
| "loss": 1.4438, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.11424219345011424, |
| "grad_norm": 1.0157577991485596, |
| "learning_rate": 2.5e-06, |
| "loss": 1.4257, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.11652703731911652, |
| "grad_norm": 1.0165379047393799, |
| "learning_rate": 2.55e-06, |
| "loss": 1.407, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.1188118811881188, |
| "grad_norm": 1.0268282890319824, |
| "learning_rate": 2.6e-06, |
| "loss": 1.3942, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.1210967250571211, |
| "grad_norm": 1.0133647918701172, |
| "learning_rate": 2.6500000000000005e-06, |
| "loss": 1.3737, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.12338156892612338, |
| "grad_norm": 1.0097134113311768, |
| "learning_rate": 2.7000000000000004e-06, |
| "loss": 1.3994, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.12566641279512566, |
| "grad_norm": 1.1268850564956665, |
| "learning_rate": 2.7500000000000004e-06, |
| "loss": 1.3676, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.12795125666412796, |
| "grad_norm": 0.981015682220459, |
| "learning_rate": 2.8000000000000003e-06, |
| "loss": 1.3819, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.13023610053313023, |
| "grad_norm": 1.0456632375717163, |
| "learning_rate": 2.85e-06, |
| "loss": 1.4031, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.13252094440213252, |
| "grad_norm": 1.0366231203079224, |
| "learning_rate": 2.9e-06, |
| "loss": 1.4017, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.13480578827113482, |
| "grad_norm": 0.9980257749557495, |
| "learning_rate": 2.95e-06, |
| "loss": 1.4261, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.1370906321401371, |
| "grad_norm": 0.990281879901886, |
| "learning_rate": 3e-06, |
| "loss": 1.3699, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.13937547600913938, |
| "grad_norm": 1.0530250072479248, |
| "learning_rate": 3.05e-06, |
| "loss": 1.3656, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.14166031987814165, |
| "grad_norm": 0.9878147840499878, |
| "learning_rate": 3.1000000000000004e-06, |
| "loss": 1.3712, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.14394516374714394, |
| "grad_norm": 0.9554497599601746, |
| "learning_rate": 3.1500000000000003e-06, |
| "loss": 1.3507, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.14623000761614624, |
| "grad_norm": 1.0152994394302368, |
| "learning_rate": 3.2000000000000003e-06, |
| "loss": 1.3531, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.1485148514851485, |
| "grad_norm": 0.9816209077835083, |
| "learning_rate": 3.2500000000000002e-06, |
| "loss": 1.3733, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.1507996953541508, |
| "grad_norm": 1.014113187789917, |
| "learning_rate": 3.3000000000000006e-06, |
| "loss": 1.3798, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.15308453922315307, |
| "grad_norm": 1.005303978919983, |
| "learning_rate": 3.3500000000000005e-06, |
| "loss": 1.3877, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.15536938309215537, |
| "grad_norm": 1.109976887702942, |
| "learning_rate": 3.4000000000000005e-06, |
| "loss": 1.4184, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.15765422696115766, |
| "grad_norm": 1.033060908317566, |
| "learning_rate": 3.45e-06, |
| "loss": 1.4043, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.15993907083015993, |
| "grad_norm": 0.9719234108924866, |
| "learning_rate": 3.5e-06, |
| "loss": 1.3481, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.16222391469916222, |
| "grad_norm": 1.0430618524551392, |
| "learning_rate": 3.5500000000000003e-06, |
| "loss": 1.3227, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.16450875856816452, |
| "grad_norm": 1.0481953620910645, |
| "learning_rate": 3.6000000000000003e-06, |
| "loss": 1.3174, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.1667936024371668, |
| "grad_norm": 0.9868738055229187, |
| "learning_rate": 3.65e-06, |
| "loss": 1.356, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.16907844630616908, |
| "grad_norm": 1.0015943050384521, |
| "learning_rate": 3.7e-06, |
| "loss": 1.3462, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.17136329017517135, |
| "grad_norm": 1.0458308458328247, |
| "learning_rate": 3.7500000000000005e-06, |
| "loss": 1.3962, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.17364813404417365, |
| "grad_norm": 1.0376830101013184, |
| "learning_rate": 3.8000000000000005e-06, |
| "loss": 1.3523, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.17593297791317594, |
| "grad_norm": 0.9821555018424988, |
| "learning_rate": 3.85e-06, |
| "loss": 1.3559, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.1782178217821782, |
| "grad_norm": 0.9579638838768005, |
| "learning_rate": 3.900000000000001e-06, |
| "loss": 1.3073, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.1805026656511805, |
| "grad_norm": 0.9736194014549255, |
| "learning_rate": 3.95e-06, |
| "loss": 1.3494, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.1827875095201828, |
| "grad_norm": 1.0055922269821167, |
| "learning_rate": 4.000000000000001e-06, |
| "loss": 1.3697, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.18507235338918507, |
| "grad_norm": 0.9767876267433167, |
| "learning_rate": 4.05e-06, |
| "loss": 1.3225, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.18735719725818736, |
| "grad_norm": 1.003092885017395, |
| "learning_rate": 4.1e-06, |
| "loss": 1.335, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.18964204112718963, |
| "grad_norm": 0.9898741245269775, |
| "learning_rate": 4.15e-06, |
| "loss": 1.3103, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.19192688499619193, |
| "grad_norm": 0.9903189539909363, |
| "learning_rate": 4.2000000000000004e-06, |
| "loss": 1.3741, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.19421172886519422, |
| "grad_norm": 0.9661535620689392, |
| "learning_rate": 4.25e-06, |
| "loss": 1.3381, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.1964965727341965, |
| "grad_norm": 0.9668599367141724, |
| "learning_rate": 4.3e-06, |
| "loss": 1.3511, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.19878141660319879, |
| "grad_norm": 0.9633579254150391, |
| "learning_rate": 4.350000000000001e-06, |
| "loss": 1.3841, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.20106626047220105, |
| "grad_norm": 0.9665766358375549, |
| "learning_rate": 4.4e-06, |
| "loss": 1.3211, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.20335110434120335, |
| "grad_norm": 1.0263577699661255, |
| "learning_rate": 4.450000000000001e-06, |
| "loss": 1.3398, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.20563594821020564, |
| "grad_norm": 1.0054337978363037, |
| "learning_rate": 4.5e-06, |
| "loss": 1.3598, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.2079207920792079, |
| "grad_norm": 0.9768564701080322, |
| "learning_rate": 4.5500000000000005e-06, |
| "loss": 1.3386, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.2102056359482102, |
| "grad_norm": 0.9710814356803894, |
| "learning_rate": 4.600000000000001e-06, |
| "loss": 1.306, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.2124904798172125, |
| "grad_norm": 0.9943618774414062, |
| "learning_rate": 4.65e-06, |
| "loss": 1.3368, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.21477532368621477, |
| "grad_norm": 1.0000272989273071, |
| "learning_rate": 4.7e-06, |
| "loss": 1.3561, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.21706016755521707, |
| "grad_norm": 0.9748716950416565, |
| "learning_rate": 4.75e-06, |
| "loss": 1.3216, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.21934501142421933, |
| "grad_norm": 0.977959930896759, |
| "learning_rate": 4.800000000000001e-06, |
| "loss": 1.3275, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.22162985529322163, |
| "grad_norm": 0.9991240501403809, |
| "learning_rate": 4.85e-06, |
| "loss": 1.3143, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.22391469916222392, |
| "grad_norm": 1.0590916872024536, |
| "learning_rate": 4.9000000000000005e-06, |
| "loss": 1.3467, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.2261995430312262, |
| "grad_norm": 0.9592604041099548, |
| "learning_rate": 4.95e-06, |
| "loss": 1.3568, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.2284843869002285, |
| "grad_norm": 0.9900586605072021, |
| "learning_rate": 5e-06, |
| "loss": 1.3162, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.23076923076923078, |
| "grad_norm": 0.9882398843765259, |
| "learning_rate": 4.999998060367119e-06, |
| "loss": 1.3348, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.23305407463823305, |
| "grad_norm": 0.9522809982299805, |
| "learning_rate": 4.999992241471486e-06, |
| "loss": 1.3004, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.23533891850723535, |
| "grad_norm": 0.9822378754615784, |
| "learning_rate": 4.9999825433221295e-06, |
| "loss": 1.3326, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.2376237623762376, |
| "grad_norm": 0.9944847822189331, |
| "learning_rate": 4.999968965934098e-06, |
| "loss": 1.3429, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.2399086062452399, |
| "grad_norm": 1.052456021308899, |
| "learning_rate": 4.9999515093284605e-06, |
| "loss": 1.3476, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.2421934501142422, |
| "grad_norm": 0.9862610697746277, |
| "learning_rate": 4.999930173532304e-06, |
| "loss": 1.3638, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.24447829398324447, |
| "grad_norm": 0.9718945622444153, |
| "learning_rate": 4.999904958578735e-06, |
| "loss": 1.3013, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.24676313785224677, |
| "grad_norm": 0.9535952210426331, |
| "learning_rate": 4.9998758645068805e-06, |
| "loss": 1.3317, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.24904798172124903, |
| "grad_norm": 1.1905543804168701, |
| "learning_rate": 4.999842891361885e-06, |
| "loss": 1.3325, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.25133282559025133, |
| "grad_norm": 1.0306485891342163, |
| "learning_rate": 4.9998060391949145e-06, |
| "loss": 1.3198, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.2536176694592536, |
| "grad_norm": 1.0334984064102173, |
| "learning_rate": 4.999765308063152e-06, |
| "loss": 1.3075, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.2559025133282559, |
| "grad_norm": 1.0020740032196045, |
| "learning_rate": 4.9997206980298e-06, |
| "loss": 1.3324, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.25818735719725816, |
| "grad_norm": 0.9771923422813416, |
| "learning_rate": 4.9996722091640805e-06, |
| "loss": 1.3072, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.26047220106626046, |
| "grad_norm": 0.9955299496650696, |
| "learning_rate": 4.999619841541234e-06, |
| "loss": 1.3501, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.26275704493526275, |
| "grad_norm": 1.0125700235366821, |
| "learning_rate": 4.9995635952425205e-06, |
| "loss": 1.3387, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.26504188880426505, |
| "grad_norm": 1.005936622619629, |
| "learning_rate": 4.999503470355215e-06, |
| "loss": 1.342, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.26732673267326734, |
| "grad_norm": 0.9978262782096863, |
| "learning_rate": 4.999439466972616e-06, |
| "loss": 1.2954, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.26961157654226964, |
| "grad_norm": 0.9668537974357605, |
| "learning_rate": 4.999371585194039e-06, |
| "loss": 1.3318, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.2718964204112719, |
| "grad_norm": 1.0156077146530151, |
| "learning_rate": 4.999299825124814e-06, |
| "loss": 1.2681, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.2741812642802742, |
| "grad_norm": 0.99967360496521, |
| "learning_rate": 4.999224186876293e-06, |
| "loss": 1.2666, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.27646610814927647, |
| "grad_norm": 1.0085562467575073, |
| "learning_rate": 4.999144670565842e-06, |
| "loss": 1.3261, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.27875095201827876, |
| "grad_norm": 1.0338691473007202, |
| "learning_rate": 4.999061276316851e-06, |
| "loss": 1.2943, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.28103579588728106, |
| "grad_norm": 0.9880859851837158, |
| "learning_rate": 4.99897400425872e-06, |
| "loss": 1.3035, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.2833206397562833, |
| "grad_norm": 0.9832742810249329, |
| "learning_rate": 4.998882854526872e-06, |
| "loss": 1.3015, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.2856054836252856, |
| "grad_norm": 0.976040780544281, |
| "learning_rate": 4.998787827262743e-06, |
| "loss": 1.3325, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.2878903274942879, |
| "grad_norm": 1.0309007167816162, |
| "learning_rate": 4.998688922613788e-06, |
| "loss": 1.2998, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.2901751713632902, |
| "grad_norm": 1.0828396081924438, |
| "learning_rate": 4.998586140733477e-06, |
| "loss": 1.3093, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.2924600152322925, |
| "grad_norm": 0.9725452661514282, |
| "learning_rate": 4.998479481781299e-06, |
| "loss": 1.2811, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.2947448591012947, |
| "grad_norm": 0.9891279339790344, |
| "learning_rate": 4.998368945922757e-06, |
| "loss": 1.3104, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.297029702970297, |
| "grad_norm": 1.022490382194519, |
| "learning_rate": 4.998254533329369e-06, |
| "loss": 1.3425, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.2993145468392993, |
| "grad_norm": 1.00505530834198, |
| "learning_rate": 4.99813624417867e-06, |
| "loss": 1.3494, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.3015993907083016, |
| "grad_norm": 1.033308982849121, |
| "learning_rate": 4.998014078654211e-06, |
| "loss": 1.278, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.3038842345773039, |
| "grad_norm": 1.0194460153579712, |
| "learning_rate": 4.997888036945556e-06, |
| "loss": 1.2963, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.30616907844630614, |
| "grad_norm": 1.005299687385559, |
| "learning_rate": 4.997758119248286e-06, |
| "loss": 1.3187, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.30845392231530844, |
| "grad_norm": 1.0271679162979126, |
| "learning_rate": 4.997624325763994e-06, |
| "loss": 1.3106, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.31073876618431073, |
| "grad_norm": 1.0343165397644043, |
| "learning_rate": 4.997486656700289e-06, |
| "loss": 1.3355, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.31302361005331303, |
| "grad_norm": 1.0498188734054565, |
| "learning_rate": 4.997345112270792e-06, |
| "loss": 1.3126, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.3153084539223153, |
| "grad_norm": 0.9742498993873596, |
| "learning_rate": 4.997199692695138e-06, |
| "loss": 1.3006, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.3175932977913176, |
| "grad_norm": 1.0044124126434326, |
| "learning_rate": 4.997050398198977e-06, |
| "loss": 1.3298, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.31987814166031986, |
| "grad_norm": 1.0173184871673584, |
| "learning_rate": 4.99689722901397e-06, |
| "loss": 1.3286, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.32216298552932215, |
| "grad_norm": 0.9835124611854553, |
| "learning_rate": 4.99674018537779e-06, |
| "loss": 1.2937, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.32444782939832445, |
| "grad_norm": 1.0389831066131592, |
| "learning_rate": 4.996579267534122e-06, |
| "loss": 1.3077, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.32673267326732675, |
| "grad_norm": 1.0412015914916992, |
| "learning_rate": 4.996414475732664e-06, |
| "loss": 1.3131, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.32901751713632904, |
| "grad_norm": 1.0527534484863281, |
| "learning_rate": 4.9962458102291254e-06, |
| "loss": 1.3075, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.3313023610053313, |
| "grad_norm": 1.036034345626831, |
| "learning_rate": 4.9960732712852236e-06, |
| "loss": 1.3198, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.3335872048743336, |
| "grad_norm": 1.0121785402297974, |
| "learning_rate": 4.99589685916869e-06, |
| "loss": 1.3346, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.33587204874333587, |
| "grad_norm": 1.0597130060195923, |
| "learning_rate": 4.9957165741532635e-06, |
| "loss": 1.3025, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.33815689261233817, |
| "grad_norm": 1.0982815027236938, |
| "learning_rate": 4.995532416518693e-06, |
| "loss": 1.3177, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.34044173648134046, |
| "grad_norm": 1.012061357498169, |
| "learning_rate": 4.995344386550738e-06, |
| "loss": 1.2905, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.3427265803503427, |
| "grad_norm": 1.0748074054718018, |
| "learning_rate": 4.995152484541166e-06, |
| "loss": 1.3191, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.345011424219345, |
| "grad_norm": 1.0346341133117676, |
| "learning_rate": 4.994956710787752e-06, |
| "loss": 1.2923, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.3472962680883473, |
| "grad_norm": 1.0333645343780518, |
| "learning_rate": 4.99475706559428e-06, |
| "loss": 1.3272, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.3495811119573496, |
| "grad_norm": 1.0411094427108765, |
| "learning_rate": 4.9945535492705385e-06, |
| "loss": 1.3102, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.3518659558263519, |
| "grad_norm": 1.0394591093063354, |
| "learning_rate": 4.994346162132329e-06, |
| "loss": 1.2912, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.3541507996953541, |
| "grad_norm": 1.1258337497711182, |
| "learning_rate": 4.994134904501452e-06, |
| "loss": 1.295, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.3564356435643564, |
| "grad_norm": 1.0196075439453125, |
| "learning_rate": 4.993919776705718e-06, |
| "loss": 1.2935, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.3587204874333587, |
| "grad_norm": 1.020180583000183, |
| "learning_rate": 4.993700779078943e-06, |
| "loss": 1.3118, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.361005331302361, |
| "grad_norm": 1.1170531511306763, |
| "learning_rate": 4.993477911960948e-06, |
| "loss": 1.2924, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.3632901751713633, |
| "grad_norm": 1.0637717247009277, |
| "learning_rate": 4.993251175697554e-06, |
| "loss": 1.2797, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.3655750190403656, |
| "grad_norm": 1.046305775642395, |
| "learning_rate": 4.993020570640592e-06, |
| "loss": 1.3142, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.36785986290936784, |
| "grad_norm": 1.039476752281189, |
| "learning_rate": 4.992786097147892e-06, |
| "loss": 1.2773, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.37014470677837014, |
| "grad_norm": 1.0379183292388916, |
| "learning_rate": 4.992547755583288e-06, |
| "loss": 1.3057, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.37242955064737243, |
| "grad_norm": 1.0063403844833374, |
| "learning_rate": 4.992305546316617e-06, |
| "loss": 1.3108, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.3747143945163747, |
| "grad_norm": 1.0467029809951782, |
| "learning_rate": 4.992059469723716e-06, |
| "loss": 1.2675, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.376999238385377, |
| "grad_norm": 0.9822115898132324, |
| "learning_rate": 4.991809526186424e-06, |
| "loss": 1.2987, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.37928408225437926, |
| "grad_norm": 0.9957991242408752, |
| "learning_rate": 4.9915557160925795e-06, |
| "loss": 1.2927, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.38156892612338156, |
| "grad_norm": 1.020486831665039, |
| "learning_rate": 4.991298039836021e-06, |
| "loss": 1.2891, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.38385376999238385, |
| "grad_norm": 0.9941042065620422, |
| "learning_rate": 4.991036497816587e-06, |
| "loss": 1.3279, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.38613861386138615, |
| "grad_norm": 1.030573844909668, |
| "learning_rate": 4.990771090440114e-06, |
| "loss": 1.2715, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.38842345773038844, |
| "grad_norm": 0.9810742735862732, |
| "learning_rate": 4.990501818118436e-06, |
| "loss": 1.2808, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.3907083015993907, |
| "grad_norm": 1.0300201177597046, |
| "learning_rate": 4.990228681269383e-06, |
| "loss": 1.3079, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.392993145468393, |
| "grad_norm": 1.0107353925704956, |
| "learning_rate": 4.989951680316787e-06, |
| "loss": 1.2872, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.3952779893373953, |
| "grad_norm": 1.0361515283584595, |
| "learning_rate": 4.989670815690469e-06, |
| "loss": 1.2784, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.39756283320639757, |
| "grad_norm": 1.0452970266342163, |
| "learning_rate": 4.989386087826248e-06, |
| "loss": 1.2976, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.39984767707539987, |
| "grad_norm": 1.0585196018218994, |
| "learning_rate": 4.9890974971659405e-06, |
| "loss": 1.2921, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.4021325209444021, |
| "grad_norm": 1.018211007118225, |
| "learning_rate": 4.988805044157353e-06, |
| "loss": 1.3046, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.4044173648134044, |
| "grad_norm": 1.0587507486343384, |
| "learning_rate": 4.9885087292542865e-06, |
| "loss": 1.2901, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.4067022086824067, |
| "grad_norm": 1.0261503458023071, |
| "learning_rate": 4.988208552916535e-06, |
| "loss": 1.3081, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.408987052551409, |
| "grad_norm": 1.0412943363189697, |
| "learning_rate": 4.9879045156098846e-06, |
| "loss": 1.3052, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.4112718964204113, |
| "grad_norm": 1.0323666334152222, |
| "learning_rate": 4.987596617806111e-06, |
| "loss": 1.3048, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.4135567402894136, |
| "grad_norm": 1.0095067024230957, |
| "learning_rate": 4.9872848599829825e-06, |
| "loss": 1.3292, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.4158415841584158, |
| "grad_norm": 0.9761002659797668, |
| "learning_rate": 4.986969242624254e-06, |
| "loss": 1.2884, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.4181264280274181, |
| "grad_norm": 1.0436338186264038, |
| "learning_rate": 4.986649766219671e-06, |
| "loss": 1.3211, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.4204112718964204, |
| "grad_norm": 1.0505225658416748, |
| "learning_rate": 4.986326431264969e-06, |
| "loss": 1.2863, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.4226961157654227, |
| "grad_norm": 1.006611943244934, |
| "learning_rate": 4.985999238261867e-06, |
| "loss": 1.2812, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.424980959634425, |
| "grad_norm": 1.0494719743728638, |
| "learning_rate": 4.985668187718073e-06, |
| "loss": 1.3105, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.42726580350342724, |
| "grad_norm": 0.9847164750099182, |
| "learning_rate": 4.985333280147281e-06, |
| "loss": 1.2811, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.42955064737242954, |
| "grad_norm": 1.0337165594100952, |
| "learning_rate": 4.984994516069168e-06, |
| "loss": 1.2876, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.43183549124143183, |
| "grad_norm": 1.0178074836730957, |
| "learning_rate": 4.984651896009396e-06, |
| "loss": 1.2597, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.43412033511043413, |
| "grad_norm": 1.0170668363571167, |
| "learning_rate": 4.984305420499612e-06, |
| "loss": 1.2916, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.4364051789794364, |
| "grad_norm": 1.0148853063583374, |
| "learning_rate": 4.983955090077445e-06, |
| "loss": 1.2785, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.43869002284843867, |
| "grad_norm": 1.0563602447509766, |
| "learning_rate": 4.983600905286502e-06, |
| "loss": 1.295, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.44097486671744096, |
| "grad_norm": 0.9817858338356018, |
| "learning_rate": 4.983242866676376e-06, |
| "loss": 1.2832, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.44325971058644326, |
| "grad_norm": 1.0299488306045532, |
| "learning_rate": 4.982880974802638e-06, |
| "loss": 1.2952, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.44554455445544555, |
| "grad_norm": 0.9951279163360596, |
| "learning_rate": 4.982515230226837e-06, |
| "loss": 1.2901, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.44782939832444785, |
| "grad_norm": 1.0001885890960693, |
| "learning_rate": 4.982145633516501e-06, |
| "loss": 1.2554, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.4501142421934501, |
| "grad_norm": 1.0821017026901245, |
| "learning_rate": 4.981772185245135e-06, |
| "loss": 1.2903, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.4523990860624524, |
| "grad_norm": 1.0269831418991089, |
| "learning_rate": 4.981394885992223e-06, |
| "loss": 1.3077, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.4546839299314547, |
| "grad_norm": 1.025965929031372, |
| "learning_rate": 4.981013736343221e-06, |
| "loss": 1.2771, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.456968773800457, |
| "grad_norm": 0.9828860759735107, |
| "learning_rate": 4.980628736889562e-06, |
| "loss": 1.2788, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.45925361766945927, |
| "grad_norm": 1.077913761138916, |
| "learning_rate": 4.9802398882286515e-06, |
| "loss": 1.2815, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.46153846153846156, |
| "grad_norm": 1.1024688482284546, |
| "learning_rate": 4.97984719096387e-06, |
| "loss": 1.3135, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.4638233054074638, |
| "grad_norm": 1.0494202375411987, |
| "learning_rate": 4.979450645704567e-06, |
| "loss": 1.3027, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.4661081492764661, |
| "grad_norm": 1.0050199031829834, |
| "learning_rate": 4.979050253066064e-06, |
| "loss": 1.3016, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.4683929931454684, |
| "grad_norm": 1.0264744758605957, |
| "learning_rate": 4.978646013669652e-06, |
| "loss": 1.343, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.4706778370144707, |
| "grad_norm": 1.001989722251892, |
| "learning_rate": 4.978237928142594e-06, |
| "loss": 1.3088, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.472962680883473, |
| "grad_norm": 1.0501984357833862, |
| "learning_rate": 4.977825997118119e-06, |
| "loss": 1.2875, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.4752475247524752, |
| "grad_norm": 1.0487364530563354, |
| "learning_rate": 4.977410221235421e-06, |
| "loss": 1.2917, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.4775323686214775, |
| "grad_norm": 1.0768541097640991, |
| "learning_rate": 4.976990601139662e-06, |
| "loss": 1.3, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.4798172124904798, |
| "grad_norm": 0.9696170687675476, |
| "learning_rate": 4.9765671374819715e-06, |
| "loss": 1.2822, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.4821020563594821, |
| "grad_norm": 0.9987464547157288, |
| "learning_rate": 4.9761398309194385e-06, |
| "loss": 1.3076, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.4843869002284844, |
| "grad_norm": 1.0254422426223755, |
| "learning_rate": 4.975708682115118e-06, |
| "loss": 1.281, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.48667174409748665, |
| "grad_norm": 1.0040076971054077, |
| "learning_rate": 4.9752736917380274e-06, |
| "loss": 1.2821, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.48895658796648894, |
| "grad_norm": 1.004184365272522, |
| "learning_rate": 4.9748348604631416e-06, |
| "loss": 1.2641, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.49124143183549124, |
| "grad_norm": 1.0694694519042969, |
| "learning_rate": 4.9743921889714005e-06, |
| "loss": 1.2853, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.49352627570449353, |
| "grad_norm": 1.0564874410629272, |
| "learning_rate": 4.973945677949699e-06, |
| "loss": 1.2882, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.49581111957349583, |
| "grad_norm": 1.0076894760131836, |
| "learning_rate": 4.973495328090891e-06, |
| "loss": 1.2868, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.49809596344249807, |
| "grad_norm": 1.0476043224334717, |
| "learning_rate": 4.973041140093786e-06, |
| "loss": 1.2642, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.5003808073115004, |
| "grad_norm": 1.050991415977478, |
| "learning_rate": 4.972583114663153e-06, |
| "loss": 1.2751, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.5026656511805027, |
| "grad_norm": 0.9902971386909485, |
| "learning_rate": 4.972121252509712e-06, |
| "loss": 1.2685, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.504950495049505, |
| "grad_norm": 1.0011303424835205, |
| "learning_rate": 4.971655554350137e-06, |
| "loss": 1.2829, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.5072353389185073, |
| "grad_norm": 1.010233998298645, |
| "learning_rate": 4.971186020907054e-06, |
| "loss": 1.277, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.5095201827875095, |
| "grad_norm": 1.0275652408599854, |
| "learning_rate": 4.970712652909042e-06, |
| "loss": 1.2971, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.5118050266565118, |
| "grad_norm": 1.0285537242889404, |
| "learning_rate": 4.970235451090629e-06, |
| "loss": 1.231, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.5140898705255141, |
| "grad_norm": 1.0604579448699951, |
| "learning_rate": 4.969754416192292e-06, |
| "loss": 1.269, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.5163747143945163, |
| "grad_norm": 1.0375958681106567, |
| "learning_rate": 4.969269548960456e-06, |
| "loss": 1.2712, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.5186595582635186, |
| "grad_norm": 1.037304401397705, |
| "learning_rate": 4.9687808501474925e-06, |
| "loss": 1.2826, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.5209444021325209, |
| "grad_norm": 1.0280749797821045, |
| "learning_rate": 4.968288320511718e-06, |
| "loss": 1.2726, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.5232292460015232, |
| "grad_norm": 1.0595530271530151, |
| "learning_rate": 4.967791960817395e-06, |
| "loss": 1.281, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.5255140898705255, |
| "grad_norm": 0.9964226484298706, |
| "learning_rate": 4.967291771834727e-06, |
| "loss": 1.3188, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.5277989337395278, |
| "grad_norm": 1.0433804988861084, |
| "learning_rate": 4.966787754339861e-06, |
| "loss": 1.274, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.5300837776085301, |
| "grad_norm": 1.079641580581665, |
| "learning_rate": 4.966279909114883e-06, |
| "loss": 1.2991, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.5323686214775324, |
| "grad_norm": 1.0351816415786743, |
| "learning_rate": 4.965768236947821e-06, |
| "loss": 1.2659, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.5346534653465347, |
| "grad_norm": 1.0495244264602661, |
| "learning_rate": 4.96525273863264e-06, |
| "loss": 1.2898, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.536938309215537, |
| "grad_norm": 1.0479910373687744, |
| "learning_rate": 4.964733414969241e-06, |
| "loss": 1.2536, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.5392231530845393, |
| "grad_norm": 1.0365879535675049, |
| "learning_rate": 4.964210266763461e-06, |
| "loss": 1.2369, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.5415079969535415, |
| "grad_norm": 1.0398730039596558, |
| "learning_rate": 4.9636832948270745e-06, |
| "loss": 1.2669, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.5437928408225438, |
| "grad_norm": 1.0146657228469849, |
| "learning_rate": 4.963152499977786e-06, |
| "loss": 1.2893, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.546077684691546, |
| "grad_norm": 1.0974043607711792, |
| "learning_rate": 4.962617883039233e-06, |
| "loss": 1.2452, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.5483625285605483, |
| "grad_norm": 0.9900649189949036, |
| "learning_rate": 4.962079444840985e-06, |
| "loss": 1.2215, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.5506473724295506, |
| "grad_norm": 1.003464937210083, |
| "learning_rate": 4.9615371862185394e-06, |
| "loss": 1.2744, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.5529322162985529, |
| "grad_norm": 1.004382848739624, |
| "learning_rate": 4.960991108013322e-06, |
| "loss": 1.271, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.5552170601675552, |
| "grad_norm": 1.0129280090332031, |
| "learning_rate": 4.960441211072686e-06, |
| "loss": 1.2874, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.5575019040365575, |
| "grad_norm": 1.040189266204834, |
| "learning_rate": 4.9598874962499096e-06, |
| "loss": 1.2918, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.5597867479055598, |
| "grad_norm": 1.0145982503890991, |
| "learning_rate": 4.959329964404197e-06, |
| "loss": 1.2713, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.5620715917745621, |
| "grad_norm": 1.0469987392425537, |
| "learning_rate": 4.958768616400672e-06, |
| "loss": 1.2689, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.5643564356435643, |
| "grad_norm": 1.0191642045974731, |
| "learning_rate": 4.958203453110384e-06, |
| "loss": 1.2718, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.5666412795125666, |
| "grad_norm": 1.0718231201171875, |
| "learning_rate": 4.957634475410298e-06, |
| "loss": 1.3128, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.5689261233815689, |
| "grad_norm": 1.0109634399414062, |
| "learning_rate": 4.957061684183301e-06, |
| "loss": 1.2586, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.5712109672505712, |
| "grad_norm": 0.9942657947540283, |
| "learning_rate": 4.956485080318198e-06, |
| "loss": 1.328, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.5734958111195735, |
| "grad_norm": 1.0184757709503174, |
| "learning_rate": 4.955904664709707e-06, |
| "loss": 1.2815, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.5757806549885758, |
| "grad_norm": 1.015625, |
| "learning_rate": 4.955320438258465e-06, |
| "loss": 1.2585, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.5780654988575781, |
| "grad_norm": 0.9848981499671936, |
| "learning_rate": 4.954732401871018e-06, |
| "loss": 1.2866, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.5803503427265804, |
| "grad_norm": 1.0482749938964844, |
| "learning_rate": 4.954140556459826e-06, |
| "loss": 1.2732, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.5826351865955827, |
| "grad_norm": 1.0250680446624756, |
| "learning_rate": 4.95354490294326e-06, |
| "loss": 1.3053, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.584920030464585, |
| "grad_norm": 1.0545597076416016, |
| "learning_rate": 4.952945442245598e-06, |
| "loss": 1.2638, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.5872048743335873, |
| "grad_norm": 1.044873833656311, |
| "learning_rate": 4.952342175297028e-06, |
| "loss": 1.2683, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.5894897182025894, |
| "grad_norm": 1.0361744165420532, |
| "learning_rate": 4.951735103033644e-06, |
| "loss": 1.2887, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.5917745620715917, |
| "grad_norm": 1.0238685607910156, |
| "learning_rate": 4.951124226397441e-06, |
| "loss": 1.2736, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.594059405940594, |
| "grad_norm": 1.0217833518981934, |
| "learning_rate": 4.950509546336323e-06, |
| "loss": 1.2681, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.5963442498095963, |
| "grad_norm": 1.0546188354492188, |
| "learning_rate": 4.949891063804091e-06, |
| "loss": 1.2582, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.5986290936785986, |
| "grad_norm": 1.0834907293319702, |
| "learning_rate": 4.94926877976045e-06, |
| "loss": 1.2487, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.6009139375476009, |
| "grad_norm": 1.062184453010559, |
| "learning_rate": 4.948642695171e-06, |
| "loss": 1.3188, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.6031987814166032, |
| "grad_norm": 1.0373252630233765, |
| "learning_rate": 4.948012811007242e-06, |
| "loss": 1.277, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.6054836252856055, |
| "grad_norm": 1.0140316486358643, |
| "learning_rate": 4.947379128246571e-06, |
| "loss": 1.2617, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.6077684691546078, |
| "grad_norm": 1.054410696029663, |
| "learning_rate": 4.946741647872277e-06, |
| "loss": 1.238, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.6100533130236101, |
| "grad_norm": 1.0967663526535034, |
| "learning_rate": 4.94610037087354e-06, |
| "loss": 1.2682, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.6123381568926123, |
| "grad_norm": 1.043338656425476, |
| "learning_rate": 4.945455298245436e-06, |
| "loss": 1.2572, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.6146230007616146, |
| "grad_norm": 1.0187970399856567, |
| "learning_rate": 4.944806430988927e-06, |
| "loss": 1.2613, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.6169078446306169, |
| "grad_norm": 1.0666472911834717, |
| "learning_rate": 4.9441537701108654e-06, |
| "loss": 1.2611, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.6191926884996192, |
| "grad_norm": 1.0025635957717896, |
| "learning_rate": 4.943497316623988e-06, |
| "loss": 1.2519, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.6214775323686215, |
| "grad_norm": 1.0135135650634766, |
| "learning_rate": 4.942837071546919e-06, |
| "loss": 1.2759, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.6237623762376238, |
| "grad_norm": 0.9985151886940002, |
| "learning_rate": 4.942173035904164e-06, |
| "loss": 1.2844, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.6260472201066261, |
| "grad_norm": 0.9952817559242249, |
| "learning_rate": 4.941505210726112e-06, |
| "loss": 1.2356, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.6283320639756284, |
| "grad_norm": 1.0448962450027466, |
| "learning_rate": 4.9408335970490305e-06, |
| "loss": 1.2587, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.6306169078446306, |
| "grad_norm": 1.011099100112915, |
| "learning_rate": 4.940158195915067e-06, |
| "loss": 1.2729, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.6329017517136329, |
| "grad_norm": 1.052904725074768, |
| "learning_rate": 4.939479008372247e-06, |
| "loss": 1.2536, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.6351865955826352, |
| "grad_norm": 1.058173418045044, |
| "learning_rate": 4.938796035474469e-06, |
| "loss": 1.2807, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.6374714394516374, |
| "grad_norm": 1.022147536277771, |
| "learning_rate": 4.938109278281506e-06, |
| "loss": 1.2887, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.6397562833206397, |
| "grad_norm": 1.0064011812210083, |
| "learning_rate": 4.937418737859004e-06, |
| "loss": 1.2192, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.642041127189642, |
| "grad_norm": 1.0092360973358154, |
| "learning_rate": 4.936724415278479e-06, |
| "loss": 1.3159, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.6443259710586443, |
| "grad_norm": 1.076401710510254, |
| "learning_rate": 4.936026311617316e-06, |
| "loss": 1.2872, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.6466108149276466, |
| "grad_norm": 1.057209849357605, |
| "learning_rate": 4.935324427958766e-06, |
| "loss": 1.257, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.6488956587966489, |
| "grad_norm": 1.1738762855529785, |
| "learning_rate": 4.934618765391946e-06, |
| "loss": 1.2547, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.6511805026656512, |
| "grad_norm": 1.0405137538909912, |
| "learning_rate": 4.933909325011838e-06, |
| "loss": 1.2766, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.6534653465346535, |
| "grad_norm": 1.0377894639968872, |
| "learning_rate": 4.933196107919286e-06, |
| "loss": 1.2624, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.6557501904036558, |
| "grad_norm": 1.032714605331421, |
| "learning_rate": 4.932479115220991e-06, |
| "loss": 1.2527, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.6580350342726581, |
| "grad_norm": 1.0755581855773926, |
| "learning_rate": 4.9317583480295175e-06, |
| "loss": 1.2966, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.6603198781416603, |
| "grad_norm": 1.0262556076049805, |
| "learning_rate": 4.931033807463283e-06, |
| "loss": 1.2585, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.6626047220106626, |
| "grad_norm": 1.0510430335998535, |
| "learning_rate": 4.930305494646562e-06, |
| "loss": 1.2662, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.6648895658796649, |
| "grad_norm": 1.035854458808899, |
| "learning_rate": 4.9295734107094825e-06, |
| "loss": 1.2346, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.6671744097486672, |
| "grad_norm": 1.0485846996307373, |
| "learning_rate": 4.928837556788023e-06, |
| "loss": 1.2978, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.6694592536176694, |
| "grad_norm": 1.02550208568573, |
| "learning_rate": 4.928097934024013e-06, |
| "loss": 1.2478, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.6717440974866717, |
| "grad_norm": 1.0328837633132935, |
| "learning_rate": 4.927354543565131e-06, |
| "loss": 1.2788, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.674028941355674, |
| "grad_norm": 0.9913997054100037, |
| "learning_rate": 4.926607386564898e-06, |
| "loss": 1.2423, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.6763137852246763, |
| "grad_norm": 1.0034306049346924, |
| "learning_rate": 4.925856464182685e-06, |
| "loss": 1.2562, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.6785986290936786, |
| "grad_norm": 1.0546495914459229, |
| "learning_rate": 4.925101777583701e-06, |
| "loss": 1.2598, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.6808834729626809, |
| "grad_norm": 1.0412935018539429, |
| "learning_rate": 4.924343327938999e-06, |
| "loss": 1.2744, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.6831683168316832, |
| "grad_norm": 1.0731669664382935, |
| "learning_rate": 4.923581116425471e-06, |
| "loss": 1.2912, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.6854531607006854, |
| "grad_norm": 1.0394880771636963, |
| "learning_rate": 4.922815144225843e-06, |
| "loss": 1.276, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.6877380045696877, |
| "grad_norm": 1.0383579730987549, |
| "learning_rate": 4.92204541252868e-06, |
| "loss": 1.255, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.69002284843869, |
| "grad_norm": 1.0251744985580444, |
| "learning_rate": 4.92127192252838e-06, |
| "loss": 1.2688, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.6923076923076923, |
| "grad_norm": 1.017650842666626, |
| "learning_rate": 4.9204946754251724e-06, |
| "loss": 1.2818, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.6945925361766946, |
| "grad_norm": 1.0219080448150635, |
| "learning_rate": 4.919713672425116e-06, |
| "loss": 1.2828, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.6968773800456969, |
| "grad_norm": 1.0862151384353638, |
| "learning_rate": 4.918928914740098e-06, |
| "loss": 1.2514, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.6991622239146992, |
| "grad_norm": 1.0639281272888184, |
| "learning_rate": 4.918140403587831e-06, |
| "loss": 1.2739, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.7014470677837015, |
| "grad_norm": 1.0512444972991943, |
| "learning_rate": 4.9173481401918556e-06, |
| "loss": 1.2576, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.7037319116527038, |
| "grad_norm": 1.0291866064071655, |
| "learning_rate": 4.916552125781529e-06, |
| "loss": 1.2934, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.7060167555217061, |
| "grad_norm": 1.0338629484176636, |
| "learning_rate": 4.915752361592032e-06, |
| "loss": 1.263, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.7083015993907082, |
| "grad_norm": 1.0358542203903198, |
| "learning_rate": 4.914948848864365e-06, |
| "loss": 1.2453, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.7105864432597105, |
| "grad_norm": 1.1184923648834229, |
| "learning_rate": 4.914141588845344e-06, |
| "loss": 1.2653, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.7128712871287128, |
| "grad_norm": 1.0791000127792358, |
| "learning_rate": 4.913330582787598e-06, |
| "loss": 1.2659, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.7151561309977151, |
| "grad_norm": 1.0901819467544556, |
| "learning_rate": 4.912515831949571e-06, |
| "loss": 1.2208, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.7174409748667174, |
| "grad_norm": 1.0219902992248535, |
| "learning_rate": 4.9116973375955166e-06, |
| "loss": 1.2711, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.7197258187357197, |
| "grad_norm": 1.014364242553711, |
| "learning_rate": 4.910875100995499e-06, |
| "loss": 1.2877, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.722010662604722, |
| "grad_norm": 1.0699234008789062, |
| "learning_rate": 4.910049123425386e-06, |
| "loss": 1.2425, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.7242955064737243, |
| "grad_norm": 1.0614267587661743, |
| "learning_rate": 4.9092194061668535e-06, |
| "loss": 1.2475, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.7265803503427266, |
| "grad_norm": 1.0620336532592773, |
| "learning_rate": 4.908385950507378e-06, |
| "loss": 1.2618, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.7288651942117289, |
| "grad_norm": 1.0389032363891602, |
| "learning_rate": 4.90754875774024e-06, |
| "loss": 1.2742, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.7311500380807312, |
| "grad_norm": 0.9754124879837036, |
| "learning_rate": 4.9067078291645144e-06, |
| "loss": 1.25, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.7334348819497334, |
| "grad_norm": 1.056058406829834, |
| "learning_rate": 4.905863166085076e-06, |
| "loss": 1.2451, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.7357197258187357, |
| "grad_norm": 1.0641580820083618, |
| "learning_rate": 4.9050147698125944e-06, |
| "loss": 1.2532, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.738004569687738, |
| "grad_norm": 1.0407251119613647, |
| "learning_rate": 4.904162641663532e-06, |
| "loss": 1.3103, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.7402894135567403, |
| "grad_norm": 1.0477187633514404, |
| "learning_rate": 4.9033067829601385e-06, |
| "loss": 1.2658, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.7425742574257426, |
| "grad_norm": 1.0202401876449585, |
| "learning_rate": 4.902447195030459e-06, |
| "loss": 1.2569, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.7448591012947449, |
| "grad_norm": 1.0629253387451172, |
| "learning_rate": 4.9015838792083196e-06, |
| "loss": 1.247, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.7471439451637472, |
| "grad_norm": 1.0284748077392578, |
| "learning_rate": 4.900716836833333e-06, |
| "loss": 1.2659, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.7494287890327495, |
| "grad_norm": 1.0653586387634277, |
| "learning_rate": 4.899846069250894e-06, |
| "loss": 1.2673, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.7517136329017517, |
| "grad_norm": 1.0795682668685913, |
| "learning_rate": 4.898971577812179e-06, |
| "loss": 1.2778, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.753998476770754, |
| "grad_norm": 1.0359232425689697, |
| "learning_rate": 4.8980933638741426e-06, |
| "loss": 1.2732, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.7562833206397562, |
| "grad_norm": 1.0286237001419067, |
| "learning_rate": 4.897211428799512e-06, |
| "loss": 1.2455, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.7585681645087585, |
| "grad_norm": 1.0179105997085571, |
| "learning_rate": 4.896325773956793e-06, |
| "loss": 1.2413, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.7608530083777608, |
| "grad_norm": 1.0381865501403809, |
| "learning_rate": 4.895436400720264e-06, |
| "loss": 1.2409, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.7631378522467631, |
| "grad_norm": 0.9918906688690186, |
| "learning_rate": 4.894543310469968e-06, |
| "loss": 1.2556, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.7654226961157654, |
| "grad_norm": 1.0300416946411133, |
| "learning_rate": 4.8936465045917204e-06, |
| "loss": 1.2325, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.7677075399847677, |
| "grad_norm": 1.052534580230713, |
| "learning_rate": 4.8927459844770995e-06, |
| "loss": 1.2561, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.76999238385377, |
| "grad_norm": 1.0454604625701904, |
| "learning_rate": 4.891841751523448e-06, |
| "loss": 1.2845, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.7722772277227723, |
| "grad_norm": 1.0518709421157837, |
| "learning_rate": 4.8909338071338706e-06, |
| "loss": 1.2485, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.7745620715917746, |
| "grad_norm": 1.0326422452926636, |
| "learning_rate": 4.890022152717231e-06, |
| "loss": 1.2757, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.7768469154607769, |
| "grad_norm": 1.2617943286895752, |
| "learning_rate": 4.889106789688148e-06, |
| "loss": 1.2656, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.7791317593297792, |
| "grad_norm": 1.0038459300994873, |
| "learning_rate": 4.888187719466996e-06, |
| "loss": 1.2636, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.7814166031987814, |
| "grad_norm": 1.1393420696258545, |
| "learning_rate": 4.887264943479903e-06, |
| "loss": 1.2621, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.7837014470677837, |
| "grad_norm": 1.0969446897506714, |
| "learning_rate": 4.8863384631587446e-06, |
| "loss": 1.2208, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.785986290936786, |
| "grad_norm": 1.034393310546875, |
| "learning_rate": 4.885408279941148e-06, |
| "loss": 1.2101, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.7882711348057883, |
| "grad_norm": 1.1397764682769775, |
| "learning_rate": 4.884474395270484e-06, |
| "loss": 1.2823, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.7905559786747905, |
| "grad_norm": 1.1488789319992065, |
| "learning_rate": 4.883536810595867e-06, |
| "loss": 1.2615, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.7928408225437928, |
| "grad_norm": 1.0274580717086792, |
| "learning_rate": 4.8825955273721524e-06, |
| "loss": 1.2334, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.7951256664127951, |
| "grad_norm": 1.0355713367462158, |
| "learning_rate": 4.8816505470599365e-06, |
| "loss": 1.2224, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.7974105102817974, |
| "grad_norm": 1.0540703535079956, |
| "learning_rate": 4.880701871125551e-06, |
| "loss": 1.262, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.7996953541507997, |
| "grad_norm": 1.0765819549560547, |
| "learning_rate": 4.879749501041062e-06, |
| "loss": 1.2731, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.801980198019802, |
| "grad_norm": 1.0639638900756836, |
| "learning_rate": 4.878793438284268e-06, |
| "loss": 1.2673, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.8042650418888042, |
| "grad_norm": 1.0149368047714233, |
| "learning_rate": 4.877833684338698e-06, |
| "loss": 1.2479, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.8065498857578065, |
| "grad_norm": 1.1710435152053833, |
| "learning_rate": 4.876870240693608e-06, |
| "loss": 1.2775, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.8088347296268088, |
| "grad_norm": 1.1317570209503174, |
| "learning_rate": 4.875903108843979e-06, |
| "loss": 1.2732, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.8111195734958111, |
| "grad_norm": 1.0417158603668213, |
| "learning_rate": 4.874932290290517e-06, |
| "loss": 1.2647, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.8134044173648134, |
| "grad_norm": 1.073765516281128, |
| "learning_rate": 4.873957786539646e-06, |
| "loss": 1.2738, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.8156892612338157, |
| "grad_norm": 1.018481731414795, |
| "learning_rate": 4.872979599103511e-06, |
| "loss": 1.2509, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.817974105102818, |
| "grad_norm": 1.0737470388412476, |
| "learning_rate": 4.8719977294999695e-06, |
| "loss": 1.232, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.8202589489718203, |
| "grad_norm": 1.0921229124069214, |
| "learning_rate": 4.871012179252597e-06, |
| "loss": 1.2342, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.8225437928408226, |
| "grad_norm": 1.0502641201019287, |
| "learning_rate": 4.870022949890676e-06, |
| "loss": 1.2463, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.8248286367098249, |
| "grad_norm": 1.1755155324935913, |
| "learning_rate": 4.869030042949202e-06, |
| "loss": 1.2625, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.8271134805788272, |
| "grad_norm": 1.0167341232299805, |
| "learning_rate": 4.868033459968874e-06, |
| "loss": 1.2563, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.8293983244478293, |
| "grad_norm": 1.0481575727462769, |
| "learning_rate": 4.8670332024960954e-06, |
| "loss": 1.2541, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.8316831683168316, |
| "grad_norm": 1.0657804012298584, |
| "learning_rate": 4.866029272082973e-06, |
| "loss": 1.2444, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.8339680121858339, |
| "grad_norm": 1.0473397970199585, |
| "learning_rate": 4.865021670287311e-06, |
| "loss": 1.2356, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.8362528560548362, |
| "grad_norm": 1.011077880859375, |
| "learning_rate": 4.864010398672612e-06, |
| "loss": 1.2417, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.8385376999238385, |
| "grad_norm": 1.0485464334487915, |
| "learning_rate": 4.862995458808073e-06, |
| "loss": 1.2728, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.8408225437928408, |
| "grad_norm": 1.0683908462524414, |
| "learning_rate": 4.861976852268582e-06, |
| "loss": 1.2354, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.8431073876618431, |
| "grad_norm": 1.0323604345321655, |
| "learning_rate": 4.860954580634718e-06, |
| "loss": 1.2665, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.8453922315308454, |
| "grad_norm": 1.024782419204712, |
| "learning_rate": 4.859928645492746e-06, |
| "loss": 1.2515, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.8476770753998477, |
| "grad_norm": 1.02902090549469, |
| "learning_rate": 4.858899048434614e-06, |
| "loss": 1.2274, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.84996191926885, |
| "grad_norm": 1.0355148315429688, |
| "learning_rate": 4.857865791057957e-06, |
| "loss": 1.2289, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.8522467631378522, |
| "grad_norm": 1.0638132095336914, |
| "learning_rate": 4.856828874966086e-06, |
| "loss": 1.2245, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.8545316070068545, |
| "grad_norm": 1.0459909439086914, |
| "learning_rate": 4.8557883017679895e-06, |
| "loss": 1.2347, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.8568164508758568, |
| "grad_norm": 1.0818232297897339, |
| "learning_rate": 4.854744073078333e-06, |
| "loss": 1.2564, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.8591012947448591, |
| "grad_norm": 1.0551162958145142, |
| "learning_rate": 4.853696190517452e-06, |
| "loss": 1.2809, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.8613861386138614, |
| "grad_norm": 1.0419256687164307, |
| "learning_rate": 4.8526446557113525e-06, |
| "loss": 1.2532, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.8636709824828637, |
| "grad_norm": 1.058478832244873, |
| "learning_rate": 4.851589470291707e-06, |
| "loss": 1.229, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.865955826351866, |
| "grad_norm": 1.0275694131851196, |
| "learning_rate": 4.850530635895854e-06, |
| "loss": 1.2555, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.8682406702208683, |
| "grad_norm": 1.0653144121170044, |
| "learning_rate": 4.849468154166794e-06, |
| "loss": 1.2397, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.8705255140898706, |
| "grad_norm": 1.0227371454238892, |
| "learning_rate": 4.8484020267531855e-06, |
| "loss": 1.2568, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.8728103579588729, |
| "grad_norm": 1.0583505630493164, |
| "learning_rate": 4.847332255309346e-06, |
| "loss": 1.2489, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.8750952018278751, |
| "grad_norm": 1.0397239923477173, |
| "learning_rate": 4.846258841495246e-06, |
| "loss": 1.273, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.8773800456968773, |
| "grad_norm": 1.020776391029358, |
| "learning_rate": 4.845181786976509e-06, |
| "loss": 1.2257, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.8796648895658796, |
| "grad_norm": 1.0420705080032349, |
| "learning_rate": 4.844101093424407e-06, |
| "loss": 1.296, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.8819497334348819, |
| "grad_norm": 1.0465624332427979, |
| "learning_rate": 4.84301676251586e-06, |
| "loss": 1.2514, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.8842345773038842, |
| "grad_norm": 1.0915330648422241, |
| "learning_rate": 4.841928795933429e-06, |
| "loss": 1.2664, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.8865194211728865, |
| "grad_norm": 1.0246195793151855, |
| "learning_rate": 4.84083719536532e-06, |
| "loss": 1.2499, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.8888042650418888, |
| "grad_norm": 1.0145692825317383, |
| "learning_rate": 4.839741962505376e-06, |
| "loss": 1.2638, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.8910891089108911, |
| "grad_norm": 1.05404531955719, |
| "learning_rate": 4.838643099053077e-06, |
| "loss": 1.1875, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.8933739527798934, |
| "grad_norm": 1.1422752141952515, |
| "learning_rate": 4.837540606713538e-06, |
| "loss": 1.2496, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.8956587966488957, |
| "grad_norm": 1.0648959875106812, |
| "learning_rate": 4.8364344871975e-06, |
| "loss": 1.2375, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.897943640517898, |
| "grad_norm": 1.0459322929382324, |
| "learning_rate": 4.835324742221338e-06, |
| "loss": 1.2419, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.9002284843869002, |
| "grad_norm": 1.0693044662475586, |
| "learning_rate": 4.834211373507048e-06, |
| "loss": 1.2485, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.9025133282559025, |
| "grad_norm": 1.0930724143981934, |
| "learning_rate": 4.833094382782255e-06, |
| "loss": 1.2389, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.9047981721249048, |
| "grad_norm": 1.1270296573638916, |
| "learning_rate": 4.831973771780197e-06, |
| "loss": 1.2033, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.9070830159939071, |
| "grad_norm": 1.044074535369873, |
| "learning_rate": 4.830849542239735e-06, |
| "loss": 1.2464, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.9093678598629094, |
| "grad_norm": 1.0138458013534546, |
| "learning_rate": 4.829721695905343e-06, |
| "loss": 1.2473, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.9116527037319117, |
| "grad_norm": 1.1201279163360596, |
| "learning_rate": 4.828590234527107e-06, |
| "loss": 1.2729, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.913937547600914, |
| "grad_norm": 1.0771571397781372, |
| "learning_rate": 4.8274551598607214e-06, |
| "loss": 1.2665, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.9162223914699162, |
| "grad_norm": 1.0691912174224854, |
| "learning_rate": 4.8263164736674905e-06, |
| "loss": 1.2094, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.9185072353389185, |
| "grad_norm": 1.0740418434143066, |
| "learning_rate": 4.8251741777143205e-06, |
| "loss": 1.2879, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.9207920792079208, |
| "grad_norm": 1.0185081958770752, |
| "learning_rate": 4.824028273773719e-06, |
| "loss": 1.2459, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.9230769230769231, |
| "grad_norm": 1.0672869682312012, |
| "learning_rate": 4.822878763623792e-06, |
| "loss": 1.2394, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.9253617669459253, |
| "grad_norm": 1.08120858669281, |
| "learning_rate": 4.821725649048242e-06, |
| "loss": 1.2918, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.9276466108149276, |
| "grad_norm": 1.0407681465148926, |
| "learning_rate": 4.820568931836364e-06, |
| "loss": 1.2443, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.9299314546839299, |
| "grad_norm": 1.0847117900848389, |
| "learning_rate": 4.8194086137830445e-06, |
| "loss": 1.2505, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.9322162985529322, |
| "grad_norm": 1.0484883785247803, |
| "learning_rate": 4.818244696688754e-06, |
| "loss": 1.2469, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.9345011424219345, |
| "grad_norm": 1.0654011964797974, |
| "learning_rate": 4.817077182359553e-06, |
| "loss": 1.2544, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.9367859862909368, |
| "grad_norm": 1.108176589012146, |
| "learning_rate": 4.815906072607079e-06, |
| "loss": 1.2387, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.9390708301599391, |
| "grad_norm": 1.0624432563781738, |
| "learning_rate": 4.8147313692485495e-06, |
| "loss": 1.2488, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.9413556740289414, |
| "grad_norm": 1.0391454696655273, |
| "learning_rate": 4.813553074106761e-06, |
| "loss": 1.2514, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.9436405178979437, |
| "grad_norm": 1.1086232662200928, |
| "learning_rate": 4.812371189010081e-06, |
| "loss": 1.2694, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.945925361766946, |
| "grad_norm": 1.0448237657546997, |
| "learning_rate": 4.8111857157924465e-06, |
| "loss": 1.2366, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.9482102056359482, |
| "grad_norm": 1.0393203496932983, |
| "learning_rate": 4.809996656293367e-06, |
| "loss": 1.2747, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.9504950495049505, |
| "grad_norm": 1.083590030670166, |
| "learning_rate": 4.8088040123579106e-06, |
| "loss": 1.2167, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.9527798933739527, |
| "grad_norm": 1.071567177772522, |
| "learning_rate": 4.807607785836711e-06, |
| "loss": 1.2108, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.955064737242955, |
| "grad_norm": 1.0953818559646606, |
| "learning_rate": 4.8064079785859615e-06, |
| "loss": 1.2381, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.9573495811119573, |
| "grad_norm": 1.0628875494003296, |
| "learning_rate": 4.8052045924674105e-06, |
| "loss": 1.232, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.9596344249809596, |
| "grad_norm": 1.0838161706924438, |
| "learning_rate": 4.803997629348359e-06, |
| "loss": 1.2699, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.9619192688499619, |
| "grad_norm": 0.9980992078781128, |
| "learning_rate": 4.802787091101659e-06, |
| "loss": 1.2473, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.9642041127189642, |
| "grad_norm": 1.094283103942871, |
| "learning_rate": 4.801572979605712e-06, |
| "loss": 1.2656, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.9664889565879665, |
| "grad_norm": 1.0554611682891846, |
| "learning_rate": 4.800355296744461e-06, |
| "loss": 1.2584, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.9687738004569688, |
| "grad_norm": 1.1019188165664673, |
| "learning_rate": 4.799134044407392e-06, |
| "loss": 1.2877, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.9710586443259711, |
| "grad_norm": 1.087965726852417, |
| "learning_rate": 4.797909224489531e-06, |
| "loss": 1.2662, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.9733434881949733, |
| "grad_norm": 1.08269202709198, |
| "learning_rate": 4.796680838891438e-06, |
| "loss": 1.2419, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.9756283320639756, |
| "grad_norm": 1.071199893951416, |
| "learning_rate": 4.795448889519207e-06, |
| "loss": 1.2489, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.9779131759329779, |
| "grad_norm": 1.0306544303894043, |
| "learning_rate": 4.794213378284462e-06, |
| "loss": 1.2467, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.9801980198019802, |
| "grad_norm": 1.0567327737808228, |
| "learning_rate": 4.792974307104353e-06, |
| "loss": 1.2637, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.9824828636709825, |
| "grad_norm": 1.0448797941207886, |
| "learning_rate": 4.7917316779015554e-06, |
| "loss": 1.2244, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.9847677075399848, |
| "grad_norm": 1.0123138427734375, |
| "learning_rate": 4.790485492604264e-06, |
| "loss": 1.2326, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.9870525514089871, |
| "grad_norm": 1.0484559535980225, |
| "learning_rate": 4.789235753146192e-06, |
| "loss": 1.2436, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.9893373952779894, |
| "grad_norm": 1.0161617994308472, |
| "learning_rate": 4.787982461466568e-06, |
| "loss": 1.2185, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.9916222391469917, |
| "grad_norm": 1.0779787302017212, |
| "learning_rate": 4.786725619510134e-06, |
| "loss": 1.2256, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.993907083015994, |
| "grad_norm": 1.061590552330017, |
| "learning_rate": 4.785465229227139e-06, |
| "loss": 1.2747, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.9961919268849961, |
| "grad_norm": 1.102403163909912, |
| "learning_rate": 4.784201292573337e-06, |
| "loss": 1.2561, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.9984767707539984, |
| "grad_norm": 0.9936567544937134, |
| "learning_rate": 4.782933811509988e-06, |
| "loss": 1.2409, |
| "step": 437 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.9936567544937134, |
| "learning_rate": 4.781662788003851e-06, |
| "loss": 1.2271, |
| "step": 438 |
| }, |
| { |
| "epoch": 1.0022848438690022, |
| "grad_norm": 1.4983975887298584, |
| "learning_rate": 4.780388224027179e-06, |
| "loss": 1.2312, |
| "step": 439 |
| }, |
| { |
| "epoch": 1.0045696877380046, |
| "grad_norm": 1.0163486003875732, |
| "learning_rate": 4.779110121557723e-06, |
| "loss": 1.1992, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.0068545316070068, |
| "grad_norm": 1.0127511024475098, |
| "learning_rate": 4.777828482578722e-06, |
| "loss": 1.2135, |
| "step": 441 |
| }, |
| { |
| "epoch": 1.0091393754760092, |
| "grad_norm": 1.0328449010849, |
| "learning_rate": 4.776543309078903e-06, |
| "loss": 1.2143, |
| "step": 442 |
| }, |
| { |
| "epoch": 1.0114242193450114, |
| "grad_norm": 1.1579132080078125, |
| "learning_rate": 4.7752546030524775e-06, |
| "loss": 1.2051, |
| "step": 443 |
| }, |
| { |
| "epoch": 1.0137090632140138, |
| "grad_norm": 1.0556671619415283, |
| "learning_rate": 4.77396236649914e-06, |
| "loss": 1.2136, |
| "step": 444 |
| }, |
| { |
| "epoch": 1.015993907083016, |
| "grad_norm": 1.0315356254577637, |
| "learning_rate": 4.772666601424061e-06, |
| "loss": 1.2444, |
| "step": 445 |
| }, |
| { |
| "epoch": 1.0182787509520184, |
| "grad_norm": 1.0836431980133057, |
| "learning_rate": 4.771367309837888e-06, |
| "loss": 1.1967, |
| "step": 446 |
| }, |
| { |
| "epoch": 1.0205635948210205, |
| "grad_norm": 1.0516763925552368, |
| "learning_rate": 4.7700644937567385e-06, |
| "loss": 1.2012, |
| "step": 447 |
| }, |
| { |
| "epoch": 1.022848438690023, |
| "grad_norm": 1.0447187423706055, |
| "learning_rate": 4.768758155202202e-06, |
| "loss": 1.2281, |
| "step": 448 |
| }, |
| { |
| "epoch": 1.0251332825590251, |
| "grad_norm": 1.0647971630096436, |
| "learning_rate": 4.767448296201332e-06, |
| "loss": 1.1907, |
| "step": 449 |
| }, |
| { |
| "epoch": 1.0274181264280273, |
| "grad_norm": 1.0792133808135986, |
| "learning_rate": 4.766134918786646e-06, |
| "loss": 1.2346, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.0297029702970297, |
| "grad_norm": 1.1311832666397095, |
| "learning_rate": 4.764818024996117e-06, |
| "loss": 1.2652, |
| "step": 451 |
| }, |
| { |
| "epoch": 1.031987814166032, |
| "grad_norm": 1.090455174446106, |
| "learning_rate": 4.763497616873181e-06, |
| "loss": 1.2258, |
| "step": 452 |
| }, |
| { |
| "epoch": 1.0342726580350343, |
| "grad_norm": 1.0815131664276123, |
| "learning_rate": 4.7621736964667204e-06, |
| "loss": 1.2233, |
| "step": 453 |
| }, |
| { |
| "epoch": 1.0365575019040365, |
| "grad_norm": 1.112673044204712, |
| "learning_rate": 4.760846265831073e-06, |
| "loss": 1.2136, |
| "step": 454 |
| }, |
| { |
| "epoch": 1.038842345773039, |
| "grad_norm": 1.068178653717041, |
| "learning_rate": 4.759515327026019e-06, |
| "loss": 1.214, |
| "step": 455 |
| }, |
| { |
| "epoch": 1.041127189642041, |
| "grad_norm": 1.079059362411499, |
| "learning_rate": 4.758180882116788e-06, |
| "loss": 1.2024, |
| "step": 456 |
| }, |
| { |
| "epoch": 1.0434120335110435, |
| "grad_norm": 1.1010431051254272, |
| "learning_rate": 4.756842933174044e-06, |
| "loss": 1.2239, |
| "step": 457 |
| }, |
| { |
| "epoch": 1.0456968773800457, |
| "grad_norm": 1.1039162874221802, |
| "learning_rate": 4.755501482273892e-06, |
| "loss": 1.2212, |
| "step": 458 |
| }, |
| { |
| "epoch": 1.047981721249048, |
| "grad_norm": 1.0536257028579712, |
| "learning_rate": 4.754156531497869e-06, |
| "loss": 1.1672, |
| "step": 459 |
| }, |
| { |
| "epoch": 1.0502665651180503, |
| "grad_norm": 1.085410475730896, |
| "learning_rate": 4.752808082932943e-06, |
| "loss": 1.2471, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.0525514089870525, |
| "grad_norm": 1.0432335138320923, |
| "learning_rate": 4.751456138671512e-06, |
| "loss": 1.2345, |
| "step": 461 |
| }, |
| { |
| "epoch": 1.0548362528560549, |
| "grad_norm": 1.072708249092102, |
| "learning_rate": 4.750100700811395e-06, |
| "loss": 1.2328, |
| "step": 462 |
| }, |
| { |
| "epoch": 1.057121096725057, |
| "grad_norm": 1.0852991342544556, |
| "learning_rate": 4.748741771455835e-06, |
| "loss": 1.19, |
| "step": 463 |
| }, |
| { |
| "epoch": 1.0594059405940595, |
| "grad_norm": 1.0906398296356201, |
| "learning_rate": 4.747379352713489e-06, |
| "loss": 1.229, |
| "step": 464 |
| }, |
| { |
| "epoch": 1.0616907844630616, |
| "grad_norm": 1.055309772491455, |
| "learning_rate": 4.746013446698432e-06, |
| "loss": 1.2419, |
| "step": 465 |
| }, |
| { |
| "epoch": 1.063975628332064, |
| "grad_norm": 1.0667710304260254, |
| "learning_rate": 4.744644055530149e-06, |
| "loss": 1.1943, |
| "step": 466 |
| }, |
| { |
| "epoch": 1.0662604722010662, |
| "grad_norm": 1.091098427772522, |
| "learning_rate": 4.743271181333533e-06, |
| "loss": 1.171, |
| "step": 467 |
| }, |
| { |
| "epoch": 1.0685453160700686, |
| "grad_norm": 1.0701195001602173, |
| "learning_rate": 4.741894826238882e-06, |
| "loss": 1.2163, |
| "step": 468 |
| }, |
| { |
| "epoch": 1.0708301599390708, |
| "grad_norm": 1.0526652336120605, |
| "learning_rate": 4.740514992381893e-06, |
| "loss": 1.2329, |
| "step": 469 |
| }, |
| { |
| "epoch": 1.073115003808073, |
| "grad_norm": 1.0725401639938354, |
| "learning_rate": 4.739131681903666e-06, |
| "loss": 1.1793, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.0753998476770754, |
| "grad_norm": 1.126091480255127, |
| "learning_rate": 4.737744896950689e-06, |
| "loss": 1.1769, |
| "step": 471 |
| }, |
| { |
| "epoch": 1.0776846915460776, |
| "grad_norm": 1.039844036102295, |
| "learning_rate": 4.736354639674847e-06, |
| "loss": 1.205, |
| "step": 472 |
| }, |
| { |
| "epoch": 1.07996953541508, |
| "grad_norm": 1.0432695150375366, |
| "learning_rate": 4.734960912233411e-06, |
| "loss": 1.1909, |
| "step": 473 |
| }, |
| { |
| "epoch": 1.0822543792840822, |
| "grad_norm": 1.0890403985977173, |
| "learning_rate": 4.7335637167890366e-06, |
| "loss": 1.1928, |
| "step": 474 |
| }, |
| { |
| "epoch": 1.0845392231530846, |
| "grad_norm": 1.0710458755493164, |
| "learning_rate": 4.732163055509759e-06, |
| "loss": 1.2402, |
| "step": 475 |
| }, |
| { |
| "epoch": 1.0868240670220868, |
| "grad_norm": 1.0940525531768799, |
| "learning_rate": 4.730758930568997e-06, |
| "loss": 1.181, |
| "step": 476 |
| }, |
| { |
| "epoch": 1.0891089108910892, |
| "grad_norm": 1.177641749382019, |
| "learning_rate": 4.729351344145536e-06, |
| "loss": 1.2232, |
| "step": 477 |
| }, |
| { |
| "epoch": 1.0913937547600914, |
| "grad_norm": 1.1401522159576416, |
| "learning_rate": 4.72794029842354e-06, |
| "loss": 1.1878, |
| "step": 478 |
| }, |
| { |
| "epoch": 1.0936785986290938, |
| "grad_norm": 1.0874228477478027, |
| "learning_rate": 4.726525795592535e-06, |
| "loss": 1.1658, |
| "step": 479 |
| }, |
| { |
| "epoch": 1.095963442498096, |
| "grad_norm": 1.0325064659118652, |
| "learning_rate": 4.725107837847414e-06, |
| "loss": 1.2084, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.0982482863670981, |
| "grad_norm": 1.0492074489593506, |
| "learning_rate": 4.723686427388434e-06, |
| "loss": 1.2208, |
| "step": 481 |
| }, |
| { |
| "epoch": 1.1005331302361006, |
| "grad_norm": 1.1399495601654053, |
| "learning_rate": 4.722261566421204e-06, |
| "loss": 1.2158, |
| "step": 482 |
| }, |
| { |
| "epoch": 1.1028179741051027, |
| "grad_norm": 1.1156668663024902, |
| "learning_rate": 4.72083325715669e-06, |
| "loss": 1.2252, |
| "step": 483 |
| }, |
| { |
| "epoch": 1.1051028179741051, |
| "grad_norm": 1.072943091392517, |
| "learning_rate": 4.719401501811209e-06, |
| "loss": 1.2381, |
| "step": 484 |
| }, |
| { |
| "epoch": 1.1073876618431073, |
| "grad_norm": 1.0337257385253906, |
| "learning_rate": 4.717966302606424e-06, |
| "loss": 1.1782, |
| "step": 485 |
| }, |
| { |
| "epoch": 1.1096725057121097, |
| "grad_norm": 1.0744901895523071, |
| "learning_rate": 4.716527661769344e-06, |
| "loss": 1.2412, |
| "step": 486 |
| }, |
| { |
| "epoch": 1.111957349581112, |
| "grad_norm": 1.0331535339355469, |
| "learning_rate": 4.715085581532316e-06, |
| "loss": 1.1869, |
| "step": 487 |
| }, |
| { |
| "epoch": 1.1142421934501143, |
| "grad_norm": 1.0795518159866333, |
| "learning_rate": 4.7136400641330245e-06, |
| "loss": 1.214, |
| "step": 488 |
| }, |
| { |
| "epoch": 1.1165270373191165, |
| "grad_norm": 1.084125280380249, |
| "learning_rate": 4.71219111181449e-06, |
| "loss": 1.2049, |
| "step": 489 |
| }, |
| { |
| "epoch": 1.118811881188119, |
| "grad_norm": 1.1166882514953613, |
| "learning_rate": 4.710738726825059e-06, |
| "loss": 1.2143, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.121096725057121, |
| "grad_norm": 1.0764187574386597, |
| "learning_rate": 4.709282911418408e-06, |
| "loss": 1.2301, |
| "step": 491 |
| }, |
| { |
| "epoch": 1.1233815689261233, |
| "grad_norm": 1.069149374961853, |
| "learning_rate": 4.7078236678535335e-06, |
| "loss": 1.2094, |
| "step": 492 |
| }, |
| { |
| "epoch": 1.1256664127951257, |
| "grad_norm": 1.0744988918304443, |
| "learning_rate": 4.7063609983947535e-06, |
| "loss": 1.1893, |
| "step": 493 |
| }, |
| { |
| "epoch": 1.1279512566641279, |
| "grad_norm": 1.090267300605774, |
| "learning_rate": 4.704894905311701e-06, |
| "loss": 1.1575, |
| "step": 494 |
| }, |
| { |
| "epoch": 1.1302361005331303, |
| "grad_norm": 1.067543625831604, |
| "learning_rate": 4.703425390879323e-06, |
| "loss": 1.1801, |
| "step": 495 |
| }, |
| { |
| "epoch": 1.1325209444021325, |
| "grad_norm": 1.0365897417068481, |
| "learning_rate": 4.701952457377874e-06, |
| "loss": 1.2197, |
| "step": 496 |
| }, |
| { |
| "epoch": 1.1348057882711349, |
| "grad_norm": 1.066163420677185, |
| "learning_rate": 4.700476107092913e-06, |
| "loss": 1.2156, |
| "step": 497 |
| }, |
| { |
| "epoch": 1.137090632140137, |
| "grad_norm": 1.1297317743301392, |
| "learning_rate": 4.698996342315303e-06, |
| "loss": 1.2064, |
| "step": 498 |
| }, |
| { |
| "epoch": 1.1393754760091395, |
| "grad_norm": 1.069610834121704, |
| "learning_rate": 4.697513165341204e-06, |
| "loss": 1.1986, |
| "step": 499 |
| }, |
| { |
| "epoch": 1.1416603198781416, |
| "grad_norm": 1.0844234228134155, |
| "learning_rate": 4.696026578472073e-06, |
| "loss": 1.1892, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.1439451637471438, |
| "grad_norm": 1.079389214515686, |
| "learning_rate": 4.694536584014653e-06, |
| "loss": 1.201, |
| "step": 501 |
| }, |
| { |
| "epoch": 1.1462300076161462, |
| "grad_norm": 1.075682520866394, |
| "learning_rate": 4.693043184280978e-06, |
| "loss": 1.2261, |
| "step": 502 |
| }, |
| { |
| "epoch": 1.1485148514851484, |
| "grad_norm": 1.0244231224060059, |
| "learning_rate": 4.69154638158837e-06, |
| "loss": 1.2048, |
| "step": 503 |
| }, |
| { |
| "epoch": 1.1507996953541508, |
| "grad_norm": 1.0907280445098877, |
| "learning_rate": 4.690046178259423e-06, |
| "loss": 1.2202, |
| "step": 504 |
| }, |
| { |
| "epoch": 1.153084539223153, |
| "grad_norm": 1.097701907157898, |
| "learning_rate": 4.688542576622013e-06, |
| "loss": 1.1781, |
| "step": 505 |
| }, |
| { |
| "epoch": 1.1553693830921554, |
| "grad_norm": 1.0993037223815918, |
| "learning_rate": 4.687035579009288e-06, |
| "loss": 1.2113, |
| "step": 506 |
| }, |
| { |
| "epoch": 1.1576542269611576, |
| "grad_norm": 1.085300087928772, |
| "learning_rate": 4.685525187759666e-06, |
| "loss": 1.1996, |
| "step": 507 |
| }, |
| { |
| "epoch": 1.15993907083016, |
| "grad_norm": 1.0483977794647217, |
| "learning_rate": 4.684011405216832e-06, |
| "loss": 1.2343, |
| "step": 508 |
| }, |
| { |
| "epoch": 1.1622239146991622, |
| "grad_norm": 1.064441442489624, |
| "learning_rate": 4.682494233729729e-06, |
| "loss": 1.2405, |
| "step": 509 |
| }, |
| { |
| "epoch": 1.1645087585681646, |
| "grad_norm": 1.05643630027771, |
| "learning_rate": 4.680973675652564e-06, |
| "loss": 1.2112, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.1667936024371668, |
| "grad_norm": 1.0908793210983276, |
| "learning_rate": 4.679449733344796e-06, |
| "loss": 1.2077, |
| "step": 511 |
| }, |
| { |
| "epoch": 1.1690784463061692, |
| "grad_norm": 1.1248105764389038, |
| "learning_rate": 4.677922409171136e-06, |
| "loss": 1.1987, |
| "step": 512 |
| }, |
| { |
| "epoch": 1.1713632901751714, |
| "grad_norm": 1.0605143308639526, |
| "learning_rate": 4.6763917055015414e-06, |
| "loss": 1.2027, |
| "step": 513 |
| }, |
| { |
| "epoch": 1.1736481340441736, |
| "grad_norm": 1.0496442317962646, |
| "learning_rate": 4.674857624711216e-06, |
| "loss": 1.2259, |
| "step": 514 |
| }, |
| { |
| "epoch": 1.175932977913176, |
| "grad_norm": 1.1550832986831665, |
| "learning_rate": 4.673320169180601e-06, |
| "loss": 1.2418, |
| "step": 515 |
| }, |
| { |
| "epoch": 1.1782178217821782, |
| "grad_norm": 1.1532083749771118, |
| "learning_rate": 4.671779341295378e-06, |
| "loss": 1.2265, |
| "step": 516 |
| }, |
| { |
| "epoch": 1.1805026656511806, |
| "grad_norm": 1.081101417541504, |
| "learning_rate": 4.670235143446457e-06, |
| "loss": 1.2078, |
| "step": 517 |
| }, |
| { |
| "epoch": 1.1827875095201827, |
| "grad_norm": 1.0701441764831543, |
| "learning_rate": 4.668687578029983e-06, |
| "loss": 1.2252, |
| "step": 518 |
| }, |
| { |
| "epoch": 1.1850723533891852, |
| "grad_norm": 1.0859651565551758, |
| "learning_rate": 4.667136647447319e-06, |
| "loss": 1.2131, |
| "step": 519 |
| }, |
| { |
| "epoch": 1.1873571972581873, |
| "grad_norm": 1.122533917427063, |
| "learning_rate": 4.6655823541050575e-06, |
| "loss": 1.1608, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.1896420411271897, |
| "grad_norm": 1.0594309568405151, |
| "learning_rate": 4.664024700415002e-06, |
| "loss": 1.2151, |
| "step": 521 |
| }, |
| { |
| "epoch": 1.191926884996192, |
| "grad_norm": 1.1281721591949463, |
| "learning_rate": 4.662463688794175e-06, |
| "loss": 1.2197, |
| "step": 522 |
| }, |
| { |
| "epoch": 1.194211728865194, |
| "grad_norm": 1.1049555540084839, |
| "learning_rate": 4.660899321664808e-06, |
| "loss": 1.2416, |
| "step": 523 |
| }, |
| { |
| "epoch": 1.1964965727341965, |
| "grad_norm": 1.1022320985794067, |
| "learning_rate": 4.65933160145434e-06, |
| "loss": 1.1842, |
| "step": 524 |
| }, |
| { |
| "epoch": 1.1987814166031987, |
| "grad_norm": 1.1182475090026855, |
| "learning_rate": 4.657760530595411e-06, |
| "loss": 1.1417, |
| "step": 525 |
| }, |
| { |
| "epoch": 1.201066260472201, |
| "grad_norm": 1.1111788749694824, |
| "learning_rate": 4.656186111525863e-06, |
| "loss": 1.2092, |
| "step": 526 |
| }, |
| { |
| "epoch": 1.2033511043412033, |
| "grad_norm": 1.0595399141311646, |
| "learning_rate": 4.654608346688731e-06, |
| "loss": 1.1549, |
| "step": 527 |
| }, |
| { |
| "epoch": 1.2056359482102057, |
| "grad_norm": 1.1151765584945679, |
| "learning_rate": 4.6530272385322426e-06, |
| "loss": 1.2469, |
| "step": 528 |
| }, |
| { |
| "epoch": 1.2079207920792079, |
| "grad_norm": 1.0334808826446533, |
| "learning_rate": 4.651442789509813e-06, |
| "loss": 1.229, |
| "step": 529 |
| }, |
| { |
| "epoch": 1.2102056359482103, |
| "grad_norm": 1.0765459537506104, |
| "learning_rate": 4.649855002080044e-06, |
| "loss": 1.2163, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.2124904798172125, |
| "grad_norm": 1.1509029865264893, |
| "learning_rate": 4.648263878706712e-06, |
| "loss": 1.2101, |
| "step": 531 |
| }, |
| { |
| "epoch": 1.2147753236862147, |
| "grad_norm": 1.1111629009246826, |
| "learning_rate": 4.646669421858776e-06, |
| "loss": 1.2379, |
| "step": 532 |
| }, |
| { |
| "epoch": 1.217060167555217, |
| "grad_norm": 1.1683619022369385, |
| "learning_rate": 4.645071634010363e-06, |
| "loss": 1.2011, |
| "step": 533 |
| }, |
| { |
| "epoch": 1.2193450114242192, |
| "grad_norm": 1.2373298406600952, |
| "learning_rate": 4.643470517640772e-06, |
| "loss": 1.1502, |
| "step": 534 |
| }, |
| { |
| "epoch": 1.2216298552932217, |
| "grad_norm": 1.080675482749939, |
| "learning_rate": 4.641866075234463e-06, |
| "loss": 1.2173, |
| "step": 535 |
| }, |
| { |
| "epoch": 1.2239146991622238, |
| "grad_norm": 1.0971184968948364, |
| "learning_rate": 4.640258309281062e-06, |
| "loss": 1.2117, |
| "step": 536 |
| }, |
| { |
| "epoch": 1.2261995430312262, |
| "grad_norm": 1.183856725692749, |
| "learning_rate": 4.638647222275349e-06, |
| "loss": 1.2137, |
| "step": 537 |
| }, |
| { |
| "epoch": 1.2284843869002284, |
| "grad_norm": 1.2277085781097412, |
| "learning_rate": 4.637032816717256e-06, |
| "loss": 1.1977, |
| "step": 538 |
| }, |
| { |
| "epoch": 1.2307692307692308, |
| "grad_norm": 1.1087970733642578, |
| "learning_rate": 4.6354150951118676e-06, |
| "loss": 1.2256, |
| "step": 539 |
| }, |
| { |
| "epoch": 1.233054074638233, |
| "grad_norm": 1.0706229209899902, |
| "learning_rate": 4.633794059969413e-06, |
| "loss": 1.2429, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.2353389185072354, |
| "grad_norm": 1.1261042356491089, |
| "learning_rate": 4.632169713805262e-06, |
| "loss": 1.219, |
| "step": 541 |
| }, |
| { |
| "epoch": 1.2376237623762376, |
| "grad_norm": 1.1817506551742554, |
| "learning_rate": 4.630542059139923e-06, |
| "loss": 1.2367, |
| "step": 542 |
| }, |
| { |
| "epoch": 1.23990860624524, |
| "grad_norm": 1.1145075559616089, |
| "learning_rate": 4.628911098499039e-06, |
| "loss": 1.2029, |
| "step": 543 |
| }, |
| { |
| "epoch": 1.2421934501142422, |
| "grad_norm": 1.1309086084365845, |
| "learning_rate": 4.62727683441338e-06, |
| "loss": 1.2374, |
| "step": 544 |
| }, |
| { |
| "epoch": 1.2444782939832444, |
| "grad_norm": 1.0949103832244873, |
| "learning_rate": 4.6256392694188445e-06, |
| "loss": 1.2204, |
| "step": 545 |
| }, |
| { |
| "epoch": 1.2467631378522468, |
| "grad_norm": 1.2004865407943726, |
| "learning_rate": 4.6239984060564535e-06, |
| "loss": 1.2327, |
| "step": 546 |
| }, |
| { |
| "epoch": 1.249047981721249, |
| "grad_norm": 1.286232829093933, |
| "learning_rate": 4.622354246872344e-06, |
| "loss": 1.1838, |
| "step": 547 |
| }, |
| { |
| "epoch": 1.2513328255902514, |
| "grad_norm": 1.0974533557891846, |
| "learning_rate": 4.620706794417769e-06, |
| "loss": 1.1678, |
| "step": 548 |
| }, |
| { |
| "epoch": 1.2536176694592536, |
| "grad_norm": 1.0960924625396729, |
| "learning_rate": 4.61905605124909e-06, |
| "loss": 1.2314, |
| "step": 549 |
| }, |
| { |
| "epoch": 1.255902513328256, |
| "grad_norm": 1.1535454988479614, |
| "learning_rate": 4.617402019927776e-06, |
| "loss": 1.1928, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.2581873571972582, |
| "grad_norm": 1.2693071365356445, |
| "learning_rate": 4.615744703020396e-06, |
| "loss": 1.1966, |
| "step": 551 |
| }, |
| { |
| "epoch": 1.2604722010662606, |
| "grad_norm": 1.1645997762680054, |
| "learning_rate": 4.614084103098623e-06, |
| "loss": 1.2251, |
| "step": 552 |
| }, |
| { |
| "epoch": 1.2627570449352628, |
| "grad_norm": 1.1186461448669434, |
| "learning_rate": 4.6124202227392175e-06, |
| "loss": 1.2037, |
| "step": 553 |
| }, |
| { |
| "epoch": 1.265041888804265, |
| "grad_norm": 1.1100102663040161, |
| "learning_rate": 4.610753064524034e-06, |
| "loss": 1.2011, |
| "step": 554 |
| }, |
| { |
| "epoch": 1.2673267326732673, |
| "grad_norm": 1.1173806190490723, |
| "learning_rate": 4.609082631040012e-06, |
| "loss": 1.1871, |
| "step": 555 |
| }, |
| { |
| "epoch": 1.2696115765422697, |
| "grad_norm": 1.1128157377243042, |
| "learning_rate": 4.6074089248791735e-06, |
| "loss": 1.1965, |
| "step": 556 |
| }, |
| { |
| "epoch": 1.271896420411272, |
| "grad_norm": 1.0940717458724976, |
| "learning_rate": 4.60573194863862e-06, |
| "loss": 1.193, |
| "step": 557 |
| }, |
| { |
| "epoch": 1.2741812642802741, |
| "grad_norm": 1.0955843925476074, |
| "learning_rate": 4.604051704920526e-06, |
| "loss": 1.187, |
| "step": 558 |
| }, |
| { |
| "epoch": 1.2764661081492765, |
| "grad_norm": 1.0802319049835205, |
| "learning_rate": 4.602368196332134e-06, |
| "loss": 1.1753, |
| "step": 559 |
| }, |
| { |
| "epoch": 1.2787509520182787, |
| "grad_norm": 1.1677578687667847, |
| "learning_rate": 4.600681425485757e-06, |
| "loss": 1.1964, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.2810357958872811, |
| "grad_norm": 1.0803853273391724, |
| "learning_rate": 4.598991394998768e-06, |
| "loss": 1.2052, |
| "step": 561 |
| }, |
| { |
| "epoch": 1.2833206397562833, |
| "grad_norm": 1.128667950630188, |
| "learning_rate": 4.5972981074935975e-06, |
| "loss": 1.1774, |
| "step": 562 |
| }, |
| { |
| "epoch": 1.2856054836252855, |
| "grad_norm": 1.0685629844665527, |
| "learning_rate": 4.59560156559773e-06, |
| "loss": 1.1897, |
| "step": 563 |
| }, |
| { |
| "epoch": 1.287890327494288, |
| "grad_norm": 1.1464303731918335, |
| "learning_rate": 4.593901771943702e-06, |
| "loss": 1.1809, |
| "step": 564 |
| }, |
| { |
| "epoch": 1.2901751713632903, |
| "grad_norm": 1.1095281839370728, |
| "learning_rate": 4.592198729169091e-06, |
| "loss": 1.2118, |
| "step": 565 |
| }, |
| { |
| "epoch": 1.2924600152322925, |
| "grad_norm": 1.0897274017333984, |
| "learning_rate": 4.5904924399165215e-06, |
| "loss": 1.177, |
| "step": 566 |
| }, |
| { |
| "epoch": 1.2947448591012947, |
| "grad_norm": 1.0702495574951172, |
| "learning_rate": 4.588782906833653e-06, |
| "loss": 1.1872, |
| "step": 567 |
| }, |
| { |
| "epoch": 1.297029702970297, |
| "grad_norm": 1.0990184545516968, |
| "learning_rate": 4.587070132573178e-06, |
| "loss": 1.1903, |
| "step": 568 |
| }, |
| { |
| "epoch": 1.2993145468392993, |
| "grad_norm": 1.121097207069397, |
| "learning_rate": 4.58535411979282e-06, |
| "loss": 1.2469, |
| "step": 569 |
| }, |
| { |
| "epoch": 1.3015993907083017, |
| "grad_norm": 1.0787534713745117, |
| "learning_rate": 4.583634871155326e-06, |
| "loss": 1.1995, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.3038842345773038, |
| "grad_norm": 1.0721417665481567, |
| "learning_rate": 4.581912389328466e-06, |
| "loss": 1.1889, |
| "step": 571 |
| }, |
| { |
| "epoch": 1.306169078446306, |
| "grad_norm": 1.1017696857452393, |
| "learning_rate": 4.580186676985024e-06, |
| "loss": 1.2133, |
| "step": 572 |
| }, |
| { |
| "epoch": 1.3084539223153084, |
| "grad_norm": 1.1040468215942383, |
| "learning_rate": 4.578457736802801e-06, |
| "loss": 1.1894, |
| "step": 573 |
| }, |
| { |
| "epoch": 1.3107387661843108, |
| "grad_norm": 1.0856465101242065, |
| "learning_rate": 4.576725571464604e-06, |
| "loss": 1.2234, |
| "step": 574 |
| }, |
| { |
| "epoch": 1.313023610053313, |
| "grad_norm": 1.0786073207855225, |
| "learning_rate": 4.574990183658244e-06, |
| "loss": 1.1989, |
| "step": 575 |
| }, |
| { |
| "epoch": 1.3153084539223152, |
| "grad_norm": 1.0701881647109985, |
| "learning_rate": 4.573251576076532e-06, |
| "loss": 1.2095, |
| "step": 576 |
| }, |
| { |
| "epoch": 1.3175932977913176, |
| "grad_norm": 1.0697689056396484, |
| "learning_rate": 4.5715097514172794e-06, |
| "loss": 1.2198, |
| "step": 577 |
| }, |
| { |
| "epoch": 1.3198781416603198, |
| "grad_norm": 1.1303515434265137, |
| "learning_rate": 4.569764712383284e-06, |
| "loss": 1.2456, |
| "step": 578 |
| }, |
| { |
| "epoch": 1.3221629855293222, |
| "grad_norm": 1.1471296548843384, |
| "learning_rate": 4.5680164616823355e-06, |
| "loss": 1.2155, |
| "step": 579 |
| }, |
| { |
| "epoch": 1.3244478293983244, |
| "grad_norm": 1.0679783821105957, |
| "learning_rate": 4.566265002027204e-06, |
| "loss": 1.2346, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.3267326732673268, |
| "grad_norm": 1.087063193321228, |
| "learning_rate": 4.564510336135642e-06, |
| "loss": 1.1735, |
| "step": 581 |
| }, |
| { |
| "epoch": 1.329017517136329, |
| "grad_norm": 1.190617322921753, |
| "learning_rate": 4.562752466730374e-06, |
| "loss": 1.2472, |
| "step": 582 |
| }, |
| { |
| "epoch": 1.3313023610053314, |
| "grad_norm": 1.0759129524230957, |
| "learning_rate": 4.560991396539099e-06, |
| "loss": 1.2263, |
| "step": 583 |
| }, |
| { |
| "epoch": 1.3335872048743336, |
| "grad_norm": 1.080640196800232, |
| "learning_rate": 4.559227128294479e-06, |
| "loss": 1.1773, |
| "step": 584 |
| }, |
| { |
| "epoch": 1.3358720487433358, |
| "grad_norm": 1.0868074893951416, |
| "learning_rate": 4.5574596647341414e-06, |
| "loss": 1.254, |
| "step": 585 |
| }, |
| { |
| "epoch": 1.3381568926123382, |
| "grad_norm": 1.0621445178985596, |
| "learning_rate": 4.55568900860067e-06, |
| "loss": 1.2091, |
| "step": 586 |
| }, |
| { |
| "epoch": 1.3404417364813406, |
| "grad_norm": 1.1124675273895264, |
| "learning_rate": 4.553915162641602e-06, |
| "loss": 1.2093, |
| "step": 587 |
| }, |
| { |
| "epoch": 1.3427265803503428, |
| "grad_norm": 1.0877987146377563, |
| "learning_rate": 4.552138129609428e-06, |
| "loss": 1.2399, |
| "step": 588 |
| }, |
| { |
| "epoch": 1.345011424219345, |
| "grad_norm": 1.1441737413406372, |
| "learning_rate": 4.550357912261579e-06, |
| "loss": 1.2274, |
| "step": 589 |
| }, |
| { |
| "epoch": 1.3472962680883473, |
| "grad_norm": 1.131813645362854, |
| "learning_rate": 4.548574513360431e-06, |
| "loss": 1.2296, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.3495811119573495, |
| "grad_norm": 1.0901340246200562, |
| "learning_rate": 4.546787935673294e-06, |
| "loss": 1.2151, |
| "step": 591 |
| }, |
| { |
| "epoch": 1.351865955826352, |
| "grad_norm": 1.1450812816619873, |
| "learning_rate": 4.544998181972412e-06, |
| "loss": 1.2054, |
| "step": 592 |
| }, |
| { |
| "epoch": 1.3541507996953541, |
| "grad_norm": 1.0988374948501587, |
| "learning_rate": 4.543205255034958e-06, |
| "loss": 1.2133, |
| "step": 593 |
| }, |
| { |
| "epoch": 1.3564356435643563, |
| "grad_norm": 1.0787577629089355, |
| "learning_rate": 4.541409157643027e-06, |
| "loss": 1.1972, |
| "step": 594 |
| }, |
| { |
| "epoch": 1.3587204874333587, |
| "grad_norm": 1.0694243907928467, |
| "learning_rate": 4.539609892583637e-06, |
| "loss": 1.2182, |
| "step": 595 |
| }, |
| { |
| "epoch": 1.3610053313023611, |
| "grad_norm": 1.065865397453308, |
| "learning_rate": 4.537807462648716e-06, |
| "loss": 1.2057, |
| "step": 596 |
| }, |
| { |
| "epoch": 1.3632901751713633, |
| "grad_norm": 1.0816274881362915, |
| "learning_rate": 4.5360018706351075e-06, |
| "loss": 1.1846, |
| "step": 597 |
| }, |
| { |
| "epoch": 1.3655750190403655, |
| "grad_norm": 1.105301022529602, |
| "learning_rate": 4.5341931193445585e-06, |
| "loss": 1.2219, |
| "step": 598 |
| }, |
| { |
| "epoch": 1.367859862909368, |
| "grad_norm": 1.1194454431533813, |
| "learning_rate": 4.5323812115837215e-06, |
| "loss": 1.2021, |
| "step": 599 |
| }, |
| { |
| "epoch": 1.37014470677837, |
| "grad_norm": 1.0899710655212402, |
| "learning_rate": 4.530566150164145e-06, |
| "loss": 1.173, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.3724295506473725, |
| "grad_norm": 1.0824511051177979, |
| "learning_rate": 4.528747937902271e-06, |
| "loss": 1.2131, |
| "step": 601 |
| }, |
| { |
| "epoch": 1.3747143945163747, |
| "grad_norm": 1.0796427726745605, |
| "learning_rate": 4.52692657761943e-06, |
| "loss": 1.1911, |
| "step": 602 |
| }, |
| { |
| "epoch": 1.376999238385377, |
| "grad_norm": 1.1047371625900269, |
| "learning_rate": 4.525102072141839e-06, |
| "loss": 1.1734, |
| "step": 603 |
| }, |
| { |
| "epoch": 1.3792840822543793, |
| "grad_norm": 1.1101821660995483, |
| "learning_rate": 4.523274424300596e-06, |
| "loss": 1.2274, |
| "step": 604 |
| }, |
| { |
| "epoch": 1.3815689261233817, |
| "grad_norm": 1.1410820484161377, |
| "learning_rate": 4.521443636931671e-06, |
| "loss": 1.2, |
| "step": 605 |
| }, |
| { |
| "epoch": 1.3838537699923839, |
| "grad_norm": 1.0687155723571777, |
| "learning_rate": 4.5196097128759095e-06, |
| "loss": 1.2028, |
| "step": 606 |
| }, |
| { |
| "epoch": 1.386138613861386, |
| "grad_norm": 1.0923937559127808, |
| "learning_rate": 4.517772654979024e-06, |
| "loss": 1.2522, |
| "step": 607 |
| }, |
| { |
| "epoch": 1.3884234577303884, |
| "grad_norm": 1.1132218837738037, |
| "learning_rate": 4.515932466091587e-06, |
| "loss": 1.1797, |
| "step": 608 |
| }, |
| { |
| "epoch": 1.3907083015993906, |
| "grad_norm": 1.182809591293335, |
| "learning_rate": 4.514089149069033e-06, |
| "loss": 1.1885, |
| "step": 609 |
| }, |
| { |
| "epoch": 1.392993145468393, |
| "grad_norm": 1.064723253250122, |
| "learning_rate": 4.512242706771647e-06, |
| "loss": 1.174, |
| "step": 610 |
| }, |
| { |
| "epoch": 1.3952779893373952, |
| "grad_norm": 1.1065499782562256, |
| "learning_rate": 4.510393142064567e-06, |
| "loss": 1.1919, |
| "step": 611 |
| }, |
| { |
| "epoch": 1.3975628332063976, |
| "grad_norm": 1.0989713668823242, |
| "learning_rate": 4.508540457817772e-06, |
| "loss": 1.1835, |
| "step": 612 |
| }, |
| { |
| "epoch": 1.3998476770753998, |
| "grad_norm": 1.0850595235824585, |
| "learning_rate": 4.506684656906085e-06, |
| "loss": 1.1945, |
| "step": 613 |
| }, |
| { |
| "epoch": 1.4021325209444022, |
| "grad_norm": 1.1323665380477905, |
| "learning_rate": 4.5048257422091655e-06, |
| "loss": 1.209, |
| "step": 614 |
| }, |
| { |
| "epoch": 1.4044173648134044, |
| "grad_norm": 1.1112160682678223, |
| "learning_rate": 4.5029637166115e-06, |
| "loss": 1.1742, |
| "step": 615 |
| }, |
| { |
| "epoch": 1.4067022086824066, |
| "grad_norm": 1.1052254438400269, |
| "learning_rate": 4.5010985830024086e-06, |
| "loss": 1.1916, |
| "step": 616 |
| }, |
| { |
| "epoch": 1.408987052551409, |
| "grad_norm": 1.0695083141326904, |
| "learning_rate": 4.4992303442760286e-06, |
| "loss": 1.1829, |
| "step": 617 |
| }, |
| { |
| "epoch": 1.4112718964204114, |
| "grad_norm": 1.0871409177780151, |
| "learning_rate": 4.497359003331318e-06, |
| "loss": 1.2053, |
| "step": 618 |
| }, |
| { |
| "epoch": 1.4135567402894136, |
| "grad_norm": 1.093496322631836, |
| "learning_rate": 4.495484563072049e-06, |
| "loss": 1.1825, |
| "step": 619 |
| }, |
| { |
| "epoch": 1.4158415841584158, |
| "grad_norm": 1.0716845989227295, |
| "learning_rate": 4.493607026406802e-06, |
| "loss": 1.1911, |
| "step": 620 |
| }, |
| { |
| "epoch": 1.4181264280274182, |
| "grad_norm": 1.1274534463882446, |
| "learning_rate": 4.4917263962489635e-06, |
| "loss": 1.1737, |
| "step": 621 |
| }, |
| { |
| "epoch": 1.4204112718964204, |
| "grad_norm": 1.082309603691101, |
| "learning_rate": 4.489842675516718e-06, |
| "loss": 1.1986, |
| "step": 622 |
| }, |
| { |
| "epoch": 1.4226961157654228, |
| "grad_norm": 1.0890616178512573, |
| "learning_rate": 4.487955867133047e-06, |
| "loss": 1.2273, |
| "step": 623 |
| }, |
| { |
| "epoch": 1.424980959634425, |
| "grad_norm": 1.0633172988891602, |
| "learning_rate": 4.486065974025723e-06, |
| "loss": 1.1834, |
| "step": 624 |
| }, |
| { |
| "epoch": 1.4272658035034271, |
| "grad_norm": 1.0931994915008545, |
| "learning_rate": 4.484172999127305e-06, |
| "loss": 1.1976, |
| "step": 625 |
| }, |
| { |
| "epoch": 1.4295506473724295, |
| "grad_norm": 1.1375906467437744, |
| "learning_rate": 4.482276945375135e-06, |
| "loss": 1.2093, |
| "step": 626 |
| }, |
| { |
| "epoch": 1.431835491241432, |
| "grad_norm": 1.3243980407714844, |
| "learning_rate": 4.480377815711331e-06, |
| "loss": 1.2102, |
| "step": 627 |
| }, |
| { |
| "epoch": 1.4341203351104341, |
| "grad_norm": 1.0940284729003906, |
| "learning_rate": 4.478475613082783e-06, |
| "loss": 1.1888, |
| "step": 628 |
| }, |
| { |
| "epoch": 1.4364051789794363, |
| "grad_norm": 1.1363506317138672, |
| "learning_rate": 4.4765703404411534e-06, |
| "loss": 1.1833, |
| "step": 629 |
| }, |
| { |
| "epoch": 1.4386900228484387, |
| "grad_norm": 1.1287343502044678, |
| "learning_rate": 4.474662000742864e-06, |
| "loss": 1.2344, |
| "step": 630 |
| }, |
| { |
| "epoch": 1.440974866717441, |
| "grad_norm": 1.3280748128890991, |
| "learning_rate": 4.472750596949098e-06, |
| "loss": 1.2025, |
| "step": 631 |
| }, |
| { |
| "epoch": 1.4432597105864433, |
| "grad_norm": 1.1119890213012695, |
| "learning_rate": 4.470836132025793e-06, |
| "loss": 1.1586, |
| "step": 632 |
| }, |
| { |
| "epoch": 1.4455445544554455, |
| "grad_norm": 1.066416621208191, |
| "learning_rate": 4.4689186089436365e-06, |
| "loss": 1.1717, |
| "step": 633 |
| }, |
| { |
| "epoch": 1.447829398324448, |
| "grad_norm": 1.0481845140457153, |
| "learning_rate": 4.4669980306780605e-06, |
| "loss": 1.1949, |
| "step": 634 |
| }, |
| { |
| "epoch": 1.45011424219345, |
| "grad_norm": 1.094254732131958, |
| "learning_rate": 4.4650744002092384e-06, |
| "loss": 1.2005, |
| "step": 635 |
| }, |
| { |
| "epoch": 1.4523990860624525, |
| "grad_norm": 1.1029901504516602, |
| "learning_rate": 4.46314772052208e-06, |
| "loss": 1.1956, |
| "step": 636 |
| }, |
| { |
| "epoch": 1.4546839299314547, |
| "grad_norm": 1.129492163658142, |
| "learning_rate": 4.461217994606225e-06, |
| "loss": 1.2053, |
| "step": 637 |
| }, |
| { |
| "epoch": 1.4569687738004569, |
| "grad_norm": 1.1537097692489624, |
| "learning_rate": 4.459285225456044e-06, |
| "loss": 1.1668, |
| "step": 638 |
| }, |
| { |
| "epoch": 1.4592536176694593, |
| "grad_norm": 1.0732276439666748, |
| "learning_rate": 4.457349416070626e-06, |
| "loss": 1.2107, |
| "step": 639 |
| }, |
| { |
| "epoch": 1.4615384615384617, |
| "grad_norm": 1.186018943786621, |
| "learning_rate": 4.455410569453777e-06, |
| "loss": 1.1789, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.4638233054074639, |
| "grad_norm": 1.1642903089523315, |
| "learning_rate": 4.453468688614019e-06, |
| "loss": 1.2169, |
| "step": 641 |
| }, |
| { |
| "epoch": 1.466108149276466, |
| "grad_norm": 1.1415472030639648, |
| "learning_rate": 4.451523776564581e-06, |
| "loss": 1.1716, |
| "step": 642 |
| }, |
| { |
| "epoch": 1.4683929931454685, |
| "grad_norm": 1.1644552946090698, |
| "learning_rate": 4.449575836323394e-06, |
| "loss": 1.1497, |
| "step": 643 |
| }, |
| { |
| "epoch": 1.4706778370144706, |
| "grad_norm": 1.2200912237167358, |
| "learning_rate": 4.447624870913091e-06, |
| "loss": 1.2289, |
| "step": 644 |
| }, |
| { |
| "epoch": 1.472962680883473, |
| "grad_norm": 1.1080158948898315, |
| "learning_rate": 4.445670883360996e-06, |
| "loss": 1.1378, |
| "step": 645 |
| }, |
| { |
| "epoch": 1.4752475247524752, |
| "grad_norm": 1.1372804641723633, |
| "learning_rate": 4.443713876699124e-06, |
| "loss": 1.1639, |
| "step": 646 |
| }, |
| { |
| "epoch": 1.4775323686214774, |
| "grad_norm": 1.1383754014968872, |
| "learning_rate": 4.441753853964174e-06, |
| "loss": 1.1558, |
| "step": 647 |
| }, |
| { |
| "epoch": 1.4798172124904798, |
| "grad_norm": 1.1565297842025757, |
| "learning_rate": 4.439790818197527e-06, |
| "loss": 1.242, |
| "step": 648 |
| }, |
| { |
| "epoch": 1.4821020563594822, |
| "grad_norm": 1.156384825706482, |
| "learning_rate": 4.4378247724452375e-06, |
| "loss": 1.2241, |
| "step": 649 |
| }, |
| { |
| "epoch": 1.4843869002284844, |
| "grad_norm": 1.2158401012420654, |
| "learning_rate": 4.43585571975803e-06, |
| "loss": 1.1977, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.4866717440974866, |
| "grad_norm": 1.2885035276412964, |
| "learning_rate": 4.433883663191297e-06, |
| "loss": 1.1916, |
| "step": 651 |
| }, |
| { |
| "epoch": 1.488956587966489, |
| "grad_norm": 1.1530733108520508, |
| "learning_rate": 4.431908605805092e-06, |
| "loss": 1.2362, |
| "step": 652 |
| }, |
| { |
| "epoch": 1.4912414318354912, |
| "grad_norm": 1.1524220705032349, |
| "learning_rate": 4.429930550664121e-06, |
| "loss": 1.2263, |
| "step": 653 |
| }, |
| { |
| "epoch": 1.4935262757044936, |
| "grad_norm": 1.1547584533691406, |
| "learning_rate": 4.427949500837749e-06, |
| "loss": 1.1478, |
| "step": 654 |
| }, |
| { |
| "epoch": 1.4958111195734958, |
| "grad_norm": 1.1083858013153076, |
| "learning_rate": 4.425965459399979e-06, |
| "loss": 1.2183, |
| "step": 655 |
| }, |
| { |
| "epoch": 1.498095963442498, |
| "grad_norm": 1.130506992340088, |
| "learning_rate": 4.423978429429463e-06, |
| "loss": 1.1923, |
| "step": 656 |
| }, |
| { |
| "epoch": 1.5003808073115004, |
| "grad_norm": 1.106553077697754, |
| "learning_rate": 4.421988414009488e-06, |
| "loss": 1.192, |
| "step": 657 |
| }, |
| { |
| "epoch": 1.5026656511805028, |
| "grad_norm": 1.186131477355957, |
| "learning_rate": 4.419995416227973e-06, |
| "loss": 1.19, |
| "step": 658 |
| }, |
| { |
| "epoch": 1.504950495049505, |
| "grad_norm": 1.0756375789642334, |
| "learning_rate": 4.417999439177465e-06, |
| "loss": 1.1992, |
| "step": 659 |
| }, |
| { |
| "epoch": 1.5072353389185071, |
| "grad_norm": 1.1160579919815063, |
| "learning_rate": 4.416000485955135e-06, |
| "loss": 1.1747, |
| "step": 660 |
| }, |
| { |
| "epoch": 1.5095201827875095, |
| "grad_norm": 1.1178874969482422, |
| "learning_rate": 4.413998559662771e-06, |
| "loss": 1.1654, |
| "step": 661 |
| }, |
| { |
| "epoch": 1.511805026656512, |
| "grad_norm": 1.0919098854064941, |
| "learning_rate": 4.411993663406774e-06, |
| "loss": 1.2013, |
| "step": 662 |
| }, |
| { |
| "epoch": 1.5140898705255141, |
| "grad_norm": 1.058582067489624, |
| "learning_rate": 4.409985800298155e-06, |
| "loss": 1.1823, |
| "step": 663 |
| }, |
| { |
| "epoch": 1.5163747143945163, |
| "grad_norm": 1.0792784690856934, |
| "learning_rate": 4.407974973452527e-06, |
| "loss": 1.2013, |
| "step": 664 |
| }, |
| { |
| "epoch": 1.5186595582635185, |
| "grad_norm": 1.112774133682251, |
| "learning_rate": 4.405961185990103e-06, |
| "loss": 1.2005, |
| "step": 665 |
| }, |
| { |
| "epoch": 1.520944402132521, |
| "grad_norm": 1.1190800666809082, |
| "learning_rate": 4.403944441035691e-06, |
| "loss": 1.2146, |
| "step": 666 |
| }, |
| { |
| "epoch": 1.5232292460015233, |
| "grad_norm": 1.1045669317245483, |
| "learning_rate": 4.401924741718685e-06, |
| "loss": 1.2217, |
| "step": 667 |
| }, |
| { |
| "epoch": 1.5255140898705255, |
| "grad_norm": 1.1048752069473267, |
| "learning_rate": 4.399902091173065e-06, |
| "loss": 1.1944, |
| "step": 668 |
| }, |
| { |
| "epoch": 1.5277989337395277, |
| "grad_norm": 1.0909706354141235, |
| "learning_rate": 4.397876492537392e-06, |
| "loss": 1.2058, |
| "step": 669 |
| }, |
| { |
| "epoch": 1.53008377760853, |
| "grad_norm": 1.1354328393936157, |
| "learning_rate": 4.3958479489548e-06, |
| "loss": 1.2164, |
| "step": 670 |
| }, |
| { |
| "epoch": 1.5323686214775325, |
| "grad_norm": 1.1490201950073242, |
| "learning_rate": 4.393816463572993e-06, |
| "loss": 1.182, |
| "step": 671 |
| }, |
| { |
| "epoch": 1.5346534653465347, |
| "grad_norm": 1.1395319700241089, |
| "learning_rate": 4.391782039544239e-06, |
| "loss": 1.2201, |
| "step": 672 |
| }, |
| { |
| "epoch": 1.5369383092155369, |
| "grad_norm": 1.0788644552230835, |
| "learning_rate": 4.389744680025366e-06, |
| "loss": 1.2212, |
| "step": 673 |
| }, |
| { |
| "epoch": 1.5392231530845393, |
| "grad_norm": 1.0663102865219116, |
| "learning_rate": 4.387704388177759e-06, |
| "loss": 1.1872, |
| "step": 674 |
| }, |
| { |
| "epoch": 1.5415079969535415, |
| "grad_norm": 1.1177600622177124, |
| "learning_rate": 4.3856611671673505e-06, |
| "loss": 1.2032, |
| "step": 675 |
| }, |
| { |
| "epoch": 1.5437928408225439, |
| "grad_norm": 1.1109418869018555, |
| "learning_rate": 4.383615020164621e-06, |
| "loss": 1.2041, |
| "step": 676 |
| }, |
| { |
| "epoch": 1.546077684691546, |
| "grad_norm": 1.096182107925415, |
| "learning_rate": 4.3815659503445875e-06, |
| "loss": 1.1988, |
| "step": 677 |
| }, |
| { |
| "epoch": 1.5483625285605482, |
| "grad_norm": 1.2027829885482788, |
| "learning_rate": 4.379513960886807e-06, |
| "loss": 1.1812, |
| "step": 678 |
| }, |
| { |
| "epoch": 1.5506473724295506, |
| "grad_norm": 1.0674421787261963, |
| "learning_rate": 4.377459054975363e-06, |
| "loss": 1.1948, |
| "step": 679 |
| }, |
| { |
| "epoch": 1.552932216298553, |
| "grad_norm": 1.0463448762893677, |
| "learning_rate": 4.375401235798866e-06, |
| "loss": 1.2174, |
| "step": 680 |
| }, |
| { |
| "epoch": 1.5552170601675552, |
| "grad_norm": 1.1295356750488281, |
| "learning_rate": 4.373340506550447e-06, |
| "loss": 1.2013, |
| "step": 681 |
| }, |
| { |
| "epoch": 1.5575019040365574, |
| "grad_norm": 1.116245150566101, |
| "learning_rate": 4.3712768704277535e-06, |
| "loss": 1.1983, |
| "step": 682 |
| }, |
| { |
| "epoch": 1.5597867479055598, |
| "grad_norm": 1.1043322086334229, |
| "learning_rate": 4.369210330632942e-06, |
| "loss": 1.2042, |
| "step": 683 |
| }, |
| { |
| "epoch": 1.5620715917745622, |
| "grad_norm": 1.1363909244537354, |
| "learning_rate": 4.367140890372674e-06, |
| "loss": 1.1793, |
| "step": 684 |
| }, |
| { |
| "epoch": 1.5643564356435644, |
| "grad_norm": 1.099576473236084, |
| "learning_rate": 4.365068552858116e-06, |
| "loss": 1.1849, |
| "step": 685 |
| }, |
| { |
| "epoch": 1.5666412795125666, |
| "grad_norm": 1.0956041812896729, |
| "learning_rate": 4.3629933213049245e-06, |
| "loss": 1.169, |
| "step": 686 |
| }, |
| { |
| "epoch": 1.5689261233815688, |
| "grad_norm": 1.1022474765777588, |
| "learning_rate": 4.36091519893325e-06, |
| "loss": 1.2376, |
| "step": 687 |
| }, |
| { |
| "epoch": 1.5712109672505712, |
| "grad_norm": 1.1013610363006592, |
| "learning_rate": 4.35883418896773e-06, |
| "loss": 1.1665, |
| "step": 688 |
| }, |
| { |
| "epoch": 1.5734958111195736, |
| "grad_norm": 1.1273926496505737, |
| "learning_rate": 4.356750294637478e-06, |
| "loss": 1.1723, |
| "step": 689 |
| }, |
| { |
| "epoch": 1.5757806549885758, |
| "grad_norm": 1.1341313123703003, |
| "learning_rate": 4.3546635191760875e-06, |
| "loss": 1.1813, |
| "step": 690 |
| }, |
| { |
| "epoch": 1.578065498857578, |
| "grad_norm": 1.0935291051864624, |
| "learning_rate": 4.352573865821621e-06, |
| "loss": 1.1932, |
| "step": 691 |
| }, |
| { |
| "epoch": 1.5803503427265804, |
| "grad_norm": 1.189144492149353, |
| "learning_rate": 4.350481337816606e-06, |
| "loss": 1.1798, |
| "step": 692 |
| }, |
| { |
| "epoch": 1.5826351865955828, |
| "grad_norm": 1.2104555368423462, |
| "learning_rate": 4.348385938408033e-06, |
| "loss": 1.1895, |
| "step": 693 |
| }, |
| { |
| "epoch": 1.584920030464585, |
| "grad_norm": 1.1346546411514282, |
| "learning_rate": 4.346287670847345e-06, |
| "loss": 1.1896, |
| "step": 694 |
| }, |
| { |
| "epoch": 1.5872048743335871, |
| "grad_norm": 1.131858468055725, |
| "learning_rate": 4.344186538390438e-06, |
| "loss": 1.1895, |
| "step": 695 |
| }, |
| { |
| "epoch": 1.5894897182025893, |
| "grad_norm": 1.1175106763839722, |
| "learning_rate": 4.342082544297652e-06, |
| "loss": 1.2215, |
| "step": 696 |
| }, |
| { |
| "epoch": 1.5917745620715917, |
| "grad_norm": 1.1056832075119019, |
| "learning_rate": 4.3399756918337675e-06, |
| "loss": 1.1617, |
| "step": 697 |
| }, |
| { |
| "epoch": 1.5940594059405941, |
| "grad_norm": 1.0777372121810913, |
| "learning_rate": 4.337865984268002e-06, |
| "loss": 1.2716, |
| "step": 698 |
| }, |
| { |
| "epoch": 1.5963442498095963, |
| "grad_norm": 1.1472179889678955, |
| "learning_rate": 4.335753424874e-06, |
| "loss": 1.1842, |
| "step": 699 |
| }, |
| { |
| "epoch": 1.5986290936785985, |
| "grad_norm": 1.0825904607772827, |
| "learning_rate": 4.333638016929835e-06, |
| "loss": 1.2377, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.600913937547601, |
| "grad_norm": 1.0884586572647095, |
| "learning_rate": 4.331519763717998e-06, |
| "loss": 1.2003, |
| "step": 701 |
| }, |
| { |
| "epoch": 1.6031987814166033, |
| "grad_norm": 1.132827639579773, |
| "learning_rate": 4.329398668525396e-06, |
| "loss": 1.186, |
| "step": 702 |
| }, |
| { |
| "epoch": 1.6054836252856055, |
| "grad_norm": 1.1931272745132446, |
| "learning_rate": 4.327274734643346e-06, |
| "loss": 1.21, |
| "step": 703 |
| }, |
| { |
| "epoch": 1.6077684691546077, |
| "grad_norm": 1.060774803161621, |
| "learning_rate": 4.3251479653675705e-06, |
| "loss": 1.1893, |
| "step": 704 |
| }, |
| { |
| "epoch": 1.61005331302361, |
| "grad_norm": 1.0615568161010742, |
| "learning_rate": 4.323018363998189e-06, |
| "loss": 1.1814, |
| "step": 705 |
| }, |
| { |
| "epoch": 1.6123381568926123, |
| "grad_norm": 1.0800261497497559, |
| "learning_rate": 4.320885933839718e-06, |
| "loss": 1.1995, |
| "step": 706 |
| }, |
| { |
| "epoch": 1.6146230007616147, |
| "grad_norm": 1.1502355337142944, |
| "learning_rate": 4.318750678201064e-06, |
| "loss": 1.219, |
| "step": 707 |
| }, |
| { |
| "epoch": 1.6169078446306169, |
| "grad_norm": 1.07515287399292, |
| "learning_rate": 4.316612600395515e-06, |
| "loss": 1.1787, |
| "step": 708 |
| }, |
| { |
| "epoch": 1.619192688499619, |
| "grad_norm": 1.075150728225708, |
| "learning_rate": 4.31447170374074e-06, |
| "loss": 1.2062, |
| "step": 709 |
| }, |
| { |
| "epoch": 1.6214775323686215, |
| "grad_norm": 1.1458789110183716, |
| "learning_rate": 4.312327991558782e-06, |
| "loss": 1.2009, |
| "step": 710 |
| }, |
| { |
| "epoch": 1.6237623762376239, |
| "grad_norm": 1.1655830144882202, |
| "learning_rate": 4.3101814671760546e-06, |
| "loss": 1.2004, |
| "step": 711 |
| }, |
| { |
| "epoch": 1.626047220106626, |
| "grad_norm": 1.1394225358963013, |
| "learning_rate": 4.30803213392333e-06, |
| "loss": 1.2059, |
| "step": 712 |
| }, |
| { |
| "epoch": 1.6283320639756282, |
| "grad_norm": 1.109095573425293, |
| "learning_rate": 4.305879995135745e-06, |
| "loss": 1.1727, |
| "step": 713 |
| }, |
| { |
| "epoch": 1.6306169078446306, |
| "grad_norm": 1.072530746459961, |
| "learning_rate": 4.303725054152785e-06, |
| "loss": 1.2059, |
| "step": 714 |
| }, |
| { |
| "epoch": 1.632901751713633, |
| "grad_norm": 1.079399824142456, |
| "learning_rate": 4.3015673143182864e-06, |
| "loss": 1.1929, |
| "step": 715 |
| }, |
| { |
| "epoch": 1.6351865955826352, |
| "grad_norm": 1.1387250423431396, |
| "learning_rate": 4.299406778980428e-06, |
| "loss": 1.1924, |
| "step": 716 |
| }, |
| { |
| "epoch": 1.6374714394516374, |
| "grad_norm": 1.1101268529891968, |
| "learning_rate": 4.297243451491724e-06, |
| "loss": 1.1678, |
| "step": 717 |
| }, |
| { |
| "epoch": 1.6397562833206396, |
| "grad_norm": 1.0745279788970947, |
| "learning_rate": 4.295077335209027e-06, |
| "loss": 1.1632, |
| "step": 718 |
| }, |
| { |
| "epoch": 1.642041127189642, |
| "grad_norm": 1.1062053442001343, |
| "learning_rate": 4.29290843349351e-06, |
| "loss": 1.183, |
| "step": 719 |
| }, |
| { |
| "epoch": 1.6443259710586444, |
| "grad_norm": 1.1918151378631592, |
| "learning_rate": 4.290736749710672e-06, |
| "loss": 1.2, |
| "step": 720 |
| }, |
| { |
| "epoch": 1.6466108149276466, |
| "grad_norm": 1.1596369743347168, |
| "learning_rate": 4.28856228723033e-06, |
| "loss": 1.2232, |
| "step": 721 |
| }, |
| { |
| "epoch": 1.6488956587966488, |
| "grad_norm": 1.1261903047561646, |
| "learning_rate": 4.28638504942661e-06, |
| "loss": 1.1614, |
| "step": 722 |
| }, |
| { |
| "epoch": 1.6511805026656512, |
| "grad_norm": 1.1628916263580322, |
| "learning_rate": 4.284205039677946e-06, |
| "loss": 1.1866, |
| "step": 723 |
| }, |
| { |
| "epoch": 1.6534653465346536, |
| "grad_norm": 1.114009976387024, |
| "learning_rate": 4.282022261367074e-06, |
| "loss": 1.2027, |
| "step": 724 |
| }, |
| { |
| "epoch": 1.6557501904036558, |
| "grad_norm": 1.2100892066955566, |
| "learning_rate": 4.279836717881022e-06, |
| "loss": 1.1922, |
| "step": 725 |
| }, |
| { |
| "epoch": 1.658035034272658, |
| "grad_norm": 1.122144103050232, |
| "learning_rate": 4.277648412611114e-06, |
| "loss": 1.178, |
| "step": 726 |
| }, |
| { |
| "epoch": 1.6603198781416602, |
| "grad_norm": 1.0997899770736694, |
| "learning_rate": 4.275457348952955e-06, |
| "loss": 1.2276, |
| "step": 727 |
| }, |
| { |
| "epoch": 1.6626047220106626, |
| "grad_norm": 1.1784963607788086, |
| "learning_rate": 4.273263530306435e-06, |
| "loss": 1.1889, |
| "step": 728 |
| }, |
| { |
| "epoch": 1.664889565879665, |
| "grad_norm": 1.1602816581726074, |
| "learning_rate": 4.271066960075715e-06, |
| "loss": 1.1671, |
| "step": 729 |
| }, |
| { |
| "epoch": 1.6671744097486672, |
| "grad_norm": 1.132406234741211, |
| "learning_rate": 4.268867641669225e-06, |
| "loss": 1.2017, |
| "step": 730 |
| }, |
| { |
| "epoch": 1.6694592536176693, |
| "grad_norm": 1.1267459392547607, |
| "learning_rate": 4.266665578499664e-06, |
| "loss": 1.2135, |
| "step": 731 |
| }, |
| { |
| "epoch": 1.6717440974866717, |
| "grad_norm": 1.0951600074768066, |
| "learning_rate": 4.2644607739839875e-06, |
| "loss": 1.2463, |
| "step": 732 |
| }, |
| { |
| "epoch": 1.6740289413556741, |
| "grad_norm": 1.073875069618225, |
| "learning_rate": 4.262253231543401e-06, |
| "loss": 1.1879, |
| "step": 733 |
| }, |
| { |
| "epoch": 1.6763137852246763, |
| "grad_norm": 1.1010491847991943, |
| "learning_rate": 4.260042954603366e-06, |
| "loss": 1.1812, |
| "step": 734 |
| }, |
| { |
| "epoch": 1.6785986290936785, |
| "grad_norm": 1.1129403114318848, |
| "learning_rate": 4.2578299465935805e-06, |
| "loss": 1.2281, |
| "step": 735 |
| }, |
| { |
| "epoch": 1.680883472962681, |
| "grad_norm": 1.3570629358291626, |
| "learning_rate": 4.255614210947985e-06, |
| "loss": 1.2013, |
| "step": 736 |
| }, |
| { |
| "epoch": 1.6831683168316833, |
| "grad_norm": 1.104535460472107, |
| "learning_rate": 4.2533957511047485e-06, |
| "loss": 1.1708, |
| "step": 737 |
| }, |
| { |
| "epoch": 1.6854531607006855, |
| "grad_norm": 1.1040364503860474, |
| "learning_rate": 4.25117457050627e-06, |
| "loss": 1.2154, |
| "step": 738 |
| }, |
| { |
| "epoch": 1.6877380045696877, |
| "grad_norm": 1.0932183265686035, |
| "learning_rate": 4.24895067259917e-06, |
| "loss": 1.2182, |
| "step": 739 |
| }, |
| { |
| "epoch": 1.6900228484386899, |
| "grad_norm": 1.0946629047393799, |
| "learning_rate": 4.246724060834284e-06, |
| "loss": 1.2058, |
| "step": 740 |
| }, |
| { |
| "epoch": 1.6923076923076923, |
| "grad_norm": 1.060051679611206, |
| "learning_rate": 4.24449473866666e-06, |
| "loss": 1.1919, |
| "step": 741 |
| }, |
| { |
| "epoch": 1.6945925361766947, |
| "grad_norm": 1.0483872890472412, |
| "learning_rate": 4.242262709555552e-06, |
| "loss": 1.1638, |
| "step": 742 |
| }, |
| { |
| "epoch": 1.6968773800456969, |
| "grad_norm": 1.0956077575683594, |
| "learning_rate": 4.240027976964412e-06, |
| "loss": 1.1805, |
| "step": 743 |
| }, |
| { |
| "epoch": 1.699162223914699, |
| "grad_norm": 1.1185762882232666, |
| "learning_rate": 4.237790544360889e-06, |
| "loss": 1.1923, |
| "step": 744 |
| }, |
| { |
| "epoch": 1.7014470677837015, |
| "grad_norm": 1.0720428228378296, |
| "learning_rate": 4.2355504152168235e-06, |
| "loss": 1.1895, |
| "step": 745 |
| }, |
| { |
| "epoch": 1.7037319116527039, |
| "grad_norm": 1.1020833253860474, |
| "learning_rate": 4.2333075930082345e-06, |
| "loss": 1.1845, |
| "step": 746 |
| }, |
| { |
| "epoch": 1.706016755521706, |
| "grad_norm": 1.107071876525879, |
| "learning_rate": 4.231062081215326e-06, |
| "loss": 1.1751, |
| "step": 747 |
| }, |
| { |
| "epoch": 1.7083015993907082, |
| "grad_norm": 1.1301578283309937, |
| "learning_rate": 4.228813883322472e-06, |
| "loss": 1.151, |
| "step": 748 |
| }, |
| { |
| "epoch": 1.7105864432597104, |
| "grad_norm": 1.096433401107788, |
| "learning_rate": 4.226563002818215e-06, |
| "loss": 1.1728, |
| "step": 749 |
| }, |
| { |
| "epoch": 1.7128712871287128, |
| "grad_norm": 1.1047178506851196, |
| "learning_rate": 4.224309443195261e-06, |
| "loss": 1.1947, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.7151561309977152, |
| "grad_norm": 1.110417127609253, |
| "learning_rate": 4.222053207950472e-06, |
| "loss": 1.2186, |
| "step": 751 |
| }, |
| { |
| "epoch": 1.7174409748667174, |
| "grad_norm": 1.1027036905288696, |
| "learning_rate": 4.219794300584863e-06, |
| "loss": 1.2164, |
| "step": 752 |
| }, |
| { |
| "epoch": 1.7197258187357196, |
| "grad_norm": 1.109299898147583, |
| "learning_rate": 4.217532724603595e-06, |
| "loss": 1.2202, |
| "step": 753 |
| }, |
| { |
| "epoch": 1.722010662604722, |
| "grad_norm": 1.143134593963623, |
| "learning_rate": 4.2152684835159695e-06, |
| "loss": 1.1837, |
| "step": 754 |
| }, |
| { |
| "epoch": 1.7242955064737244, |
| "grad_norm": 1.0689709186553955, |
| "learning_rate": 4.213001580835423e-06, |
| "loss": 1.1874, |
| "step": 755 |
| }, |
| { |
| "epoch": 1.7265803503427266, |
| "grad_norm": 1.128243327140808, |
| "learning_rate": 4.2107320200795236e-06, |
| "loss": 1.1756, |
| "step": 756 |
| }, |
| { |
| "epoch": 1.7288651942117288, |
| "grad_norm": 1.067436933517456, |
| "learning_rate": 4.208459804769963e-06, |
| "loss": 1.2212, |
| "step": 757 |
| }, |
| { |
| "epoch": 1.7311500380807312, |
| "grad_norm": 1.1413803100585938, |
| "learning_rate": 4.206184938432552e-06, |
| "loss": 1.1491, |
| "step": 758 |
| }, |
| { |
| "epoch": 1.7334348819497334, |
| "grad_norm": 1.141803503036499, |
| "learning_rate": 4.203907424597214e-06, |
| "loss": 1.2636, |
| "step": 759 |
| }, |
| { |
| "epoch": 1.7357197258187358, |
| "grad_norm": 1.1099580526351929, |
| "learning_rate": 4.2016272667979814e-06, |
| "loss": 1.2192, |
| "step": 760 |
| }, |
| { |
| "epoch": 1.738004569687738, |
| "grad_norm": 1.100486397743225, |
| "learning_rate": 4.199344468572992e-06, |
| "loss": 1.2044, |
| "step": 761 |
| }, |
| { |
| "epoch": 1.7402894135567402, |
| "grad_norm": 1.0598186254501343, |
| "learning_rate": 4.197059033464476e-06, |
| "loss": 1.1983, |
| "step": 762 |
| }, |
| { |
| "epoch": 1.7425742574257426, |
| "grad_norm": 1.0922818183898926, |
| "learning_rate": 4.194770965018758e-06, |
| "loss": 1.2194, |
| "step": 763 |
| }, |
| { |
| "epoch": 1.744859101294745, |
| "grad_norm": 1.0975127220153809, |
| "learning_rate": 4.1924802667862485e-06, |
| "loss": 1.1465, |
| "step": 764 |
| }, |
| { |
| "epoch": 1.7471439451637472, |
| "grad_norm": 1.0934858322143555, |
| "learning_rate": 4.190186942321438e-06, |
| "loss": 1.1544, |
| "step": 765 |
| }, |
| { |
| "epoch": 1.7494287890327493, |
| "grad_norm": 1.0712271928787231, |
| "learning_rate": 4.187890995182893e-06, |
| "loss": 1.1893, |
| "step": 766 |
| }, |
| { |
| "epoch": 1.7517136329017517, |
| "grad_norm": 1.1157736778259277, |
| "learning_rate": 4.1855924289332485e-06, |
| "loss": 1.2362, |
| "step": 767 |
| }, |
| { |
| "epoch": 1.7539984767707542, |
| "grad_norm": 1.2225691080093384, |
| "learning_rate": 4.183291247139204e-06, |
| "loss": 1.22, |
| "step": 768 |
| }, |
| { |
| "epoch": 1.7562833206397563, |
| "grad_norm": 1.1402082443237305, |
| "learning_rate": 4.180987453371519e-06, |
| "loss": 1.2024, |
| "step": 769 |
| }, |
| { |
| "epoch": 1.7585681645087585, |
| "grad_norm": 1.1124638319015503, |
| "learning_rate": 4.178681051205004e-06, |
| "loss": 1.17, |
| "step": 770 |
| }, |
| { |
| "epoch": 1.7608530083777607, |
| "grad_norm": 1.1337512731552124, |
| "learning_rate": 4.176372044218519e-06, |
| "loss": 1.1862, |
| "step": 771 |
| }, |
| { |
| "epoch": 1.7631378522467631, |
| "grad_norm": 1.0702142715454102, |
| "learning_rate": 4.174060435994962e-06, |
| "loss": 1.2038, |
| "step": 772 |
| }, |
| { |
| "epoch": 1.7654226961157655, |
| "grad_norm": 1.112242579460144, |
| "learning_rate": 4.171746230121273e-06, |
| "loss": 1.2146, |
| "step": 773 |
| }, |
| { |
| "epoch": 1.7677075399847677, |
| "grad_norm": 1.1164225339889526, |
| "learning_rate": 4.169429430188418e-06, |
| "loss": 1.1768, |
| "step": 774 |
| }, |
| { |
| "epoch": 1.76999238385377, |
| "grad_norm": 1.091208577156067, |
| "learning_rate": 4.16711003979139e-06, |
| "loss": 1.193, |
| "step": 775 |
| }, |
| { |
| "epoch": 1.7722772277227723, |
| "grad_norm": 1.138411283493042, |
| "learning_rate": 4.164788062529203e-06, |
| "loss": 1.203, |
| "step": 776 |
| }, |
| { |
| "epoch": 1.7745620715917747, |
| "grad_norm": 1.168305516242981, |
| "learning_rate": 4.1624635020048835e-06, |
| "loss": 1.2154, |
| "step": 777 |
| }, |
| { |
| "epoch": 1.7768469154607769, |
| "grad_norm": 1.0742619037628174, |
| "learning_rate": 4.160136361825465e-06, |
| "loss": 1.214, |
| "step": 778 |
| }, |
| { |
| "epoch": 1.779131759329779, |
| "grad_norm": 1.076762080192566, |
| "learning_rate": 4.1578066456019885e-06, |
| "loss": 1.1834, |
| "step": 779 |
| }, |
| { |
| "epoch": 1.7814166031987813, |
| "grad_norm": 1.1189744472503662, |
| "learning_rate": 4.155474356949487e-06, |
| "loss": 1.191, |
| "step": 780 |
| }, |
| { |
| "epoch": 1.7837014470677837, |
| "grad_norm": 1.0916801691055298, |
| "learning_rate": 4.153139499486988e-06, |
| "loss": 1.2104, |
| "step": 781 |
| }, |
| { |
| "epoch": 1.785986290936786, |
| "grad_norm": 1.1265934705734253, |
| "learning_rate": 4.150802076837506e-06, |
| "loss": 1.2366, |
| "step": 782 |
| }, |
| { |
| "epoch": 1.7882711348057883, |
| "grad_norm": 1.1008100509643555, |
| "learning_rate": 4.148462092628032e-06, |
| "loss": 1.1919, |
| "step": 783 |
| }, |
| { |
| "epoch": 1.7905559786747904, |
| "grad_norm": 1.5858978033065796, |
| "learning_rate": 4.146119550489536e-06, |
| "loss": 1.1927, |
| "step": 784 |
| }, |
| { |
| "epoch": 1.7928408225437928, |
| "grad_norm": 1.1155521869659424, |
| "learning_rate": 4.143774454056954e-06, |
| "loss": 1.1948, |
| "step": 785 |
| }, |
| { |
| "epoch": 1.7951256664127953, |
| "grad_norm": 1.1289353370666504, |
| "learning_rate": 4.141426806969189e-06, |
| "loss": 1.1719, |
| "step": 786 |
| }, |
| { |
| "epoch": 1.7974105102817974, |
| "grad_norm": 1.1492801904678345, |
| "learning_rate": 4.139076612869098e-06, |
| "loss": 1.169, |
| "step": 787 |
| }, |
| { |
| "epoch": 1.7996953541507996, |
| "grad_norm": 1.0931838750839233, |
| "learning_rate": 4.1367238754034935e-06, |
| "loss": 1.1581, |
| "step": 788 |
| }, |
| { |
| "epoch": 1.801980198019802, |
| "grad_norm": 1.0901176929473877, |
| "learning_rate": 4.134368598223132e-06, |
| "loss": 1.2223, |
| "step": 789 |
| }, |
| { |
| "epoch": 1.8042650418888042, |
| "grad_norm": 1.0907678604125977, |
| "learning_rate": 4.132010784982711e-06, |
| "loss": 1.1839, |
| "step": 790 |
| }, |
| { |
| "epoch": 1.8065498857578066, |
| "grad_norm": 1.1389234066009521, |
| "learning_rate": 4.129650439340866e-06, |
| "loss": 1.1765, |
| "step": 791 |
| }, |
| { |
| "epoch": 1.8088347296268088, |
| "grad_norm": 1.0889054536819458, |
| "learning_rate": 4.12728756496016e-06, |
| "loss": 1.1913, |
| "step": 792 |
| }, |
| { |
| "epoch": 1.811119573495811, |
| "grad_norm": 1.090705156326294, |
| "learning_rate": 4.12492216550708e-06, |
| "loss": 1.1692, |
| "step": 793 |
| }, |
| { |
| "epoch": 1.8134044173648134, |
| "grad_norm": 1.1290946006774902, |
| "learning_rate": 4.12255424465203e-06, |
| "loss": 1.2114, |
| "step": 794 |
| }, |
| { |
| "epoch": 1.8156892612338158, |
| "grad_norm": 1.108325719833374, |
| "learning_rate": 4.120183806069328e-06, |
| "loss": 1.1941, |
| "step": 795 |
| }, |
| { |
| "epoch": 1.817974105102818, |
| "grad_norm": 1.0901302099227905, |
| "learning_rate": 4.1178108534371995e-06, |
| "loss": 1.1709, |
| "step": 796 |
| }, |
| { |
| "epoch": 1.8202589489718202, |
| "grad_norm": 1.1386867761611938, |
| "learning_rate": 4.11543539043777e-06, |
| "loss": 1.2008, |
| "step": 797 |
| }, |
| { |
| "epoch": 1.8225437928408226, |
| "grad_norm": 1.1768696308135986, |
| "learning_rate": 4.11305742075706e-06, |
| "loss": 1.1735, |
| "step": 798 |
| }, |
| { |
| "epoch": 1.824828636709825, |
| "grad_norm": 1.093137264251709, |
| "learning_rate": 4.1106769480849795e-06, |
| "loss": 1.1952, |
| "step": 799 |
| }, |
| { |
| "epoch": 1.8271134805788272, |
| "grad_norm": 1.12264084815979, |
| "learning_rate": 4.108293976115325e-06, |
| "loss": 1.2118, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.8293983244478293, |
| "grad_norm": 1.089824914932251, |
| "learning_rate": 4.105908508545766e-06, |
| "loss": 1.1856, |
| "step": 801 |
| }, |
| { |
| "epoch": 1.8316831683168315, |
| "grad_norm": 1.3425248861312866, |
| "learning_rate": 4.1035205490778505e-06, |
| "loss": 1.1942, |
| "step": 802 |
| }, |
| { |
| "epoch": 1.833968012185834, |
| "grad_norm": 1.097115159034729, |
| "learning_rate": 4.101130101416988e-06, |
| "loss": 1.2083, |
| "step": 803 |
| }, |
| { |
| "epoch": 1.8362528560548363, |
| "grad_norm": 1.0973830223083496, |
| "learning_rate": 4.098737169272452e-06, |
| "loss": 1.2033, |
| "step": 804 |
| }, |
| { |
| "epoch": 1.8385376999238385, |
| "grad_norm": 1.127233624458313, |
| "learning_rate": 4.096341756357371e-06, |
| "loss": 1.1941, |
| "step": 805 |
| }, |
| { |
| "epoch": 1.8408225437928407, |
| "grad_norm": 1.097070574760437, |
| "learning_rate": 4.093943866388723e-06, |
| "loss": 1.1971, |
| "step": 806 |
| }, |
| { |
| "epoch": 1.8431073876618431, |
| "grad_norm": 1.0978144407272339, |
| "learning_rate": 4.091543503087327e-06, |
| "loss": 1.2029, |
| "step": 807 |
| }, |
| { |
| "epoch": 1.8453922315308455, |
| "grad_norm": 1.0643872022628784, |
| "learning_rate": 4.089140670177843e-06, |
| "loss": 1.1532, |
| "step": 808 |
| }, |
| { |
| "epoch": 1.8476770753998477, |
| "grad_norm": 1.1128400564193726, |
| "learning_rate": 4.086735371388762e-06, |
| "loss": 1.1851, |
| "step": 809 |
| }, |
| { |
| "epoch": 1.84996191926885, |
| "grad_norm": 1.1439098119735718, |
| "learning_rate": 4.0843276104524e-06, |
| "loss": 1.1816, |
| "step": 810 |
| }, |
| { |
| "epoch": 1.852246763137852, |
| "grad_norm": 1.1020617485046387, |
| "learning_rate": 4.0819173911048965e-06, |
| "loss": 1.2081, |
| "step": 811 |
| }, |
| { |
| "epoch": 1.8545316070068545, |
| "grad_norm": 1.0913503170013428, |
| "learning_rate": 4.079504717086203e-06, |
| "loss": 1.1892, |
| "step": 812 |
| }, |
| { |
| "epoch": 1.856816450875857, |
| "grad_norm": 1.1332125663757324, |
| "learning_rate": 4.077089592140082e-06, |
| "loss": 1.182, |
| "step": 813 |
| }, |
| { |
| "epoch": 1.859101294744859, |
| "grad_norm": 1.0584102869033813, |
| "learning_rate": 4.074672020014098e-06, |
| "loss": 1.2169, |
| "step": 814 |
| }, |
| { |
| "epoch": 1.8613861386138613, |
| "grad_norm": 1.1291085481643677, |
| "learning_rate": 4.072252004459612e-06, |
| "loss": 1.1796, |
| "step": 815 |
| }, |
| { |
| "epoch": 1.8636709824828637, |
| "grad_norm": 1.0868487358093262, |
| "learning_rate": 4.069829549231778e-06, |
| "loss": 1.1832, |
| "step": 816 |
| }, |
| { |
| "epoch": 1.865955826351866, |
| "grad_norm": 1.1105777025222778, |
| "learning_rate": 4.067404658089535e-06, |
| "loss": 1.2242, |
| "step": 817 |
| }, |
| { |
| "epoch": 1.8682406702208683, |
| "grad_norm": 1.1203657388687134, |
| "learning_rate": 4.0649773347956005e-06, |
| "loss": 1.1755, |
| "step": 818 |
| }, |
| { |
| "epoch": 1.8705255140898704, |
| "grad_norm": 1.1000312566757202, |
| "learning_rate": 4.062547583116469e-06, |
| "loss": 1.1829, |
| "step": 819 |
| }, |
| { |
| "epoch": 1.8728103579588729, |
| "grad_norm": 1.1125813722610474, |
| "learning_rate": 4.060115406822402e-06, |
| "loss": 1.2013, |
| "step": 820 |
| }, |
| { |
| "epoch": 1.8750952018278753, |
| "grad_norm": 1.0868099927902222, |
| "learning_rate": 4.057680809687421e-06, |
| "loss": 1.1749, |
| "step": 821 |
| }, |
| { |
| "epoch": 1.8773800456968774, |
| "grad_norm": 1.1242718696594238, |
| "learning_rate": 4.055243795489307e-06, |
| "loss": 1.1601, |
| "step": 822 |
| }, |
| { |
| "epoch": 1.8796648895658796, |
| "grad_norm": 1.1220780611038208, |
| "learning_rate": 4.052804368009589e-06, |
| "loss": 1.197, |
| "step": 823 |
| }, |
| { |
| "epoch": 1.8819497334348818, |
| "grad_norm": 1.0715032815933228, |
| "learning_rate": 4.050362531033545e-06, |
| "loss": 1.1834, |
| "step": 824 |
| }, |
| { |
| "epoch": 1.8842345773038842, |
| "grad_norm": 1.1281424760818481, |
| "learning_rate": 4.0479182883501855e-06, |
| "loss": 1.1653, |
| "step": 825 |
| }, |
| { |
| "epoch": 1.8865194211728866, |
| "grad_norm": 1.0727750062942505, |
| "learning_rate": 4.045471643752258e-06, |
| "loss": 1.1907, |
| "step": 826 |
| }, |
| { |
| "epoch": 1.8888042650418888, |
| "grad_norm": 1.122889757156372, |
| "learning_rate": 4.043022601036238e-06, |
| "loss": 1.1935, |
| "step": 827 |
| }, |
| { |
| "epoch": 1.891089108910891, |
| "grad_norm": 1.1605268716812134, |
| "learning_rate": 4.040571164002319e-06, |
| "loss": 1.211, |
| "step": 828 |
| }, |
| { |
| "epoch": 1.8933739527798934, |
| "grad_norm": 1.0915296077728271, |
| "learning_rate": 4.038117336454411e-06, |
| "loss": 1.1614, |
| "step": 829 |
| }, |
| { |
| "epoch": 1.8956587966488958, |
| "grad_norm": 1.1206241846084595, |
| "learning_rate": 4.035661122200135e-06, |
| "loss": 1.1592, |
| "step": 830 |
| }, |
| { |
| "epoch": 1.897943640517898, |
| "grad_norm": 1.1132665872573853, |
| "learning_rate": 4.033202525050813e-06, |
| "loss": 1.1865, |
| "step": 831 |
| }, |
| { |
| "epoch": 1.9002284843869002, |
| "grad_norm": 1.0694245100021362, |
| "learning_rate": 4.0307415488214675e-06, |
| "loss": 1.1767, |
| "step": 832 |
| }, |
| { |
| "epoch": 1.9025133282559024, |
| "grad_norm": 1.1312873363494873, |
| "learning_rate": 4.028278197330808e-06, |
| "loss": 1.2344, |
| "step": 833 |
| }, |
| { |
| "epoch": 1.9047981721249048, |
| "grad_norm": 1.0864746570587158, |
| "learning_rate": 4.025812474401236e-06, |
| "loss": 1.2146, |
| "step": 834 |
| }, |
| { |
| "epoch": 1.9070830159939072, |
| "grad_norm": 1.0774086713790894, |
| "learning_rate": 4.023344383858826e-06, |
| "loss": 1.1496, |
| "step": 835 |
| }, |
| { |
| "epoch": 1.9093678598629094, |
| "grad_norm": 1.0805225372314453, |
| "learning_rate": 4.0208739295333314e-06, |
| "loss": 1.2098, |
| "step": 836 |
| }, |
| { |
| "epoch": 1.9116527037319115, |
| "grad_norm": 1.0994813442230225, |
| "learning_rate": 4.018401115258172e-06, |
| "loss": 1.1881, |
| "step": 837 |
| }, |
| { |
| "epoch": 1.913937547600914, |
| "grad_norm": 1.0419007539749146, |
| "learning_rate": 4.015925944870428e-06, |
| "loss": 1.1935, |
| "step": 838 |
| }, |
| { |
| "epoch": 1.9162223914699164, |
| "grad_norm": 1.1358449459075928, |
| "learning_rate": 4.013448422210838e-06, |
| "loss": 1.1989, |
| "step": 839 |
| }, |
| { |
| "epoch": 1.9185072353389185, |
| "grad_norm": 1.121999740600586, |
| "learning_rate": 4.010968551123788e-06, |
| "loss": 1.2108, |
| "step": 840 |
| }, |
| { |
| "epoch": 1.9207920792079207, |
| "grad_norm": 1.1504106521606445, |
| "learning_rate": 4.008486335457312e-06, |
| "loss": 1.1768, |
| "step": 841 |
| }, |
| { |
| "epoch": 1.9230769230769231, |
| "grad_norm": 1.128138780593872, |
| "learning_rate": 4.006001779063078e-06, |
| "loss": 1.1992, |
| "step": 842 |
| }, |
| { |
| "epoch": 1.9253617669459253, |
| "grad_norm": 1.1590732336044312, |
| "learning_rate": 4.003514885796388e-06, |
| "loss": 1.181, |
| "step": 843 |
| }, |
| { |
| "epoch": 1.9276466108149277, |
| "grad_norm": 1.0851722955703735, |
| "learning_rate": 4.001025659516171e-06, |
| "loss": 1.1711, |
| "step": 844 |
| }, |
| { |
| "epoch": 1.92993145468393, |
| "grad_norm": 1.066331148147583, |
| "learning_rate": 3.998534104084974e-06, |
| "loss": 1.1728, |
| "step": 845 |
| }, |
| { |
| "epoch": 1.932216298552932, |
| "grad_norm": 1.110464096069336, |
| "learning_rate": 3.99604022336896e-06, |
| "loss": 1.178, |
| "step": 846 |
| }, |
| { |
| "epoch": 1.9345011424219345, |
| "grad_norm": 1.1028679609298706, |
| "learning_rate": 3.993544021237899e-06, |
| "loss": 1.2122, |
| "step": 847 |
| }, |
| { |
| "epoch": 1.936785986290937, |
| "grad_norm": 1.1760601997375488, |
| "learning_rate": 3.991045501565163e-06, |
| "loss": 1.2103, |
| "step": 848 |
| }, |
| { |
| "epoch": 1.939070830159939, |
| "grad_norm": 1.1260336637496948, |
| "learning_rate": 3.988544668227721e-06, |
| "loss": 1.1443, |
| "step": 849 |
| }, |
| { |
| "epoch": 1.9413556740289413, |
| "grad_norm": 1.1055935621261597, |
| "learning_rate": 3.9860415251061334e-06, |
| "loss": 1.1795, |
| "step": 850 |
| }, |
| { |
| "epoch": 1.9436405178979437, |
| "grad_norm": 1.1292855739593506, |
| "learning_rate": 3.983536076084541e-06, |
| "loss": 1.182, |
| "step": 851 |
| }, |
| { |
| "epoch": 1.945925361766946, |
| "grad_norm": 1.1108464002609253, |
| "learning_rate": 3.981028325050667e-06, |
| "loss": 1.1876, |
| "step": 852 |
| }, |
| { |
| "epoch": 1.9482102056359483, |
| "grad_norm": 1.1306401491165161, |
| "learning_rate": 3.978518275895802e-06, |
| "loss": 1.1645, |
| "step": 853 |
| }, |
| { |
| "epoch": 1.9504950495049505, |
| "grad_norm": 1.1031887531280518, |
| "learning_rate": 3.976005932514807e-06, |
| "loss": 1.2047, |
| "step": 854 |
| }, |
| { |
| "epoch": 1.9527798933739526, |
| "grad_norm": 1.0953725576400757, |
| "learning_rate": 3.973491298806101e-06, |
| "loss": 1.1756, |
| "step": 855 |
| }, |
| { |
| "epoch": 1.955064737242955, |
| "grad_norm": 1.109553575515747, |
| "learning_rate": 3.970974378671656e-06, |
| "loss": 1.2228, |
| "step": 856 |
| }, |
| { |
| "epoch": 1.9573495811119574, |
| "grad_norm": 1.1159707307815552, |
| "learning_rate": 3.968455176016993e-06, |
| "loss": 1.2037, |
| "step": 857 |
| }, |
| { |
| "epoch": 1.9596344249809596, |
| "grad_norm": 1.1045714616775513, |
| "learning_rate": 3.965933694751175e-06, |
| "loss": 1.196, |
| "step": 858 |
| }, |
| { |
| "epoch": 1.9619192688499618, |
| "grad_norm": 1.110876202583313, |
| "learning_rate": 3.963409938786801e-06, |
| "loss": 1.1772, |
| "step": 859 |
| }, |
| { |
| "epoch": 1.9642041127189642, |
| "grad_norm": 1.1226321458816528, |
| "learning_rate": 3.9608839120399975e-06, |
| "loss": 1.1875, |
| "step": 860 |
| }, |
| { |
| "epoch": 1.9664889565879666, |
| "grad_norm": 1.1401004791259766, |
| "learning_rate": 3.958355618430417e-06, |
| "loss": 1.2137, |
| "step": 861 |
| }, |
| { |
| "epoch": 1.9687738004569688, |
| "grad_norm": 1.0866281986236572, |
| "learning_rate": 3.95582506188123e-06, |
| "loss": 1.2001, |
| "step": 862 |
| }, |
| { |
| "epoch": 1.971058644325971, |
| "grad_norm": 1.1426069736480713, |
| "learning_rate": 3.9532922463191145e-06, |
| "loss": 1.1794, |
| "step": 863 |
| }, |
| { |
| "epoch": 1.9733434881949732, |
| "grad_norm": 1.1191396713256836, |
| "learning_rate": 3.950757175674257e-06, |
| "loss": 1.2118, |
| "step": 864 |
| }, |
| { |
| "epoch": 1.9756283320639756, |
| "grad_norm": 1.0993397235870361, |
| "learning_rate": 3.948219853880344e-06, |
| "loss": 1.2209, |
| "step": 865 |
| }, |
| { |
| "epoch": 1.977913175932978, |
| "grad_norm": 1.0973010063171387, |
| "learning_rate": 3.945680284874553e-06, |
| "loss": 1.1738, |
| "step": 866 |
| }, |
| { |
| "epoch": 1.9801980198019802, |
| "grad_norm": 1.2131692171096802, |
| "learning_rate": 3.943138472597549e-06, |
| "loss": 1.1833, |
| "step": 867 |
| }, |
| { |
| "epoch": 1.9824828636709824, |
| "grad_norm": 1.1128953695297241, |
| "learning_rate": 3.940594420993479e-06, |
| "loss": 1.1925, |
| "step": 868 |
| }, |
| { |
| "epoch": 1.9847677075399848, |
| "grad_norm": 1.0862925052642822, |
| "learning_rate": 3.938048134009962e-06, |
| "loss": 1.1965, |
| "step": 869 |
| }, |
| { |
| "epoch": 1.9870525514089872, |
| "grad_norm": 1.1464707851409912, |
| "learning_rate": 3.935499615598088e-06, |
| "loss": 1.1579, |
| "step": 870 |
| }, |
| { |
| "epoch": 1.9893373952779894, |
| "grad_norm": 1.1059821844100952, |
| "learning_rate": 3.932948869712412e-06, |
| "loss": 1.169, |
| "step": 871 |
| }, |
| { |
| "epoch": 1.9916222391469915, |
| "grad_norm": 1.1403911113739014, |
| "learning_rate": 3.930395900310939e-06, |
| "loss": 1.1586, |
| "step": 872 |
| }, |
| { |
| "epoch": 1.993907083015994, |
| "grad_norm": 1.0881669521331787, |
| "learning_rate": 3.9278407113551295e-06, |
| "loss": 1.2262, |
| "step": 873 |
| }, |
| { |
| "epoch": 1.9961919268849961, |
| "grad_norm": 1.1039564609527588, |
| "learning_rate": 3.925283306809885e-06, |
| "loss": 1.1951, |
| "step": 874 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 2622, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 6, |
| "save_steps": 437, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.206703557678203e+18, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|